Add support for BPE tokenization.

This commit is contained in:
2023-04-23 18:49:52 -07:00
parent 0fe35cd976
commit f61fe6a18f
9 changed files with 562 additions and 17 deletions

View File

@ -350,3 +350,43 @@ service ImageGenerationService {
*/
rpc GenerateImagesStreaming(GenerateImagesRequest) returns (stream GenerateImagesStreamUpdate);
}
/**
* Represents a request to tokenize an input.
*/
message TokenizeRequest {
/**
* The name of a loaded model to use for tokenization.
*/
string model_name = 1;
/**
* The input string to tokenize.
*/
string input = 2;
}
/**
* Represents a response to tokenization.
*/
message TokenizeResponse {
/**
* The tokens inside the input string.
*/
repeated string tokens = 1;
/**
* The token IDs inside the input string.
*/
repeated uint64 token_ids = 2;
}
/**
* The tokenizer service, for analyzing tokens for a loaded model.
*/
service TokenizerService {
/**
* Analyze the input using a loaded model and return the results.
*/
rpc Tokenize(TokenizeRequest) returns (TokenizeResponse);
}