Job management and preparation for multi-hosting.

2025-12-19 05:20:17 +00:00 · 2023-05-08 16:06:07 -07:00
parent a2d9e14f3a
commit ace2c07aa1
30 changed files with 3879 additions and 2307 deletions
--- a/Common/StableDiffusion.proto
+++ b/Common/StableDiffusion.proto
@@ -1,395 +0,0 @@
-/**
- * Stable Diffusion RPC service for Apple Platforms.
- */
-syntax = "proto3";
-package gay.pizza.stable.diffusion;
-
-/**
- * Utilize a prefix of 'Sd' for Swift.
- */
-option swift_prefix = "Sd";
-
-/**
- * Represents the model attention. Model attention has to do with how the model is encoded, and
- * can determine what compute units are able to support a particular model.
- */
-enum ModelAttention {
-    /**
-     * The model is an original attention type. It can be loaded only onto CPU & GPU compute units.
-     */
-    original = 0;
-
-    /**
-     * The model is a split-ein-sum attention type. It can be loaded onto all compute units,
-     * including the Apple Neural Engine.
-     */
-    split_ein_sum = 1;
-}
-
-/**
- * Represents the schedulers that are used to sample images.
- */
-enum Scheduler {
-    /**
-     * The PNDM (Pseudo numerical methods for diffusion models) scheduler.
-     */
-    pndm = 0;
-
-    /**
-     * The DPM-Solver++ scheduler.
-     */
-    dpm_solver_plus_plus = 1;
-}
-
-/**
- * Represents a specifier for what compute units are available for ML tasks.
- */
-enum ComputeUnits {
-    /**
-     * The CPU as a singular compute unit.
-     */
-    cpu = 0;
-
-    /**
-     * The CPU & GPU combined into a singular compute unit.
-     */
-    cpu_and_gpu = 1;
-
-    /**
-     * Allow the usage of all compute units. CoreML will decided where the model is loaded.
-     */
-    all = 2;
-
-    /**
-     * The CPU & Neural Engine combined into a singular compute unit.
-     */
-    cpu_and_neural_engine = 3;
-}
-
-/**
- * Represents information about an available model.
- * The primary key of a model is it's 'name' field.
- */
-message ModelInfo {
-    /**
-     * The name of the available model. Note that within the context of a single RPC server,
-     * the name of a model is a unique identifier. This may not be true when utilizing a cluster or
-     * load balanced server, so keep that in mind.
-     */
-    string name = 1;
-
-    /**
-     * The attention of the model. Model attention determines what compute units can be used to
-     * load the model and make predictions.
-     */
-    ModelAttention attention = 2;
-
-    /**
-     * Whether the model is currently loaded onto an available compute unit.
-     */
-    bool is_loaded = 3;
-
-    /**
-     * The compute unit that the model is currently loaded into, if it is loaded to one at all.
-     * When is_loaded is false, the value of this field should be null.
-     */
-    ComputeUnits loaded_compute_units = 4;
-    
-    /**
-     * The compute units that this model supports using.
-     */
-    repeated ComputeUnits supported_compute_units = 5;
-}
-
-/**
- * Represents the format of an image.
- */
-enum ImageFormat {
-    /**
-     * The PNG image format.
-     */
-    png = 0;
-}
-
-/**
- * Represents an image within the Stable Diffusion context.
- * This could be an input image for an image generation request, or it could be
- * a generated image from the Stable Diffusion model.
- */
-message Image {
-    /**
-     * The format of the image.
-     */
-    ImageFormat format = 1;
-
-    /**
-     * The raw data of the image, in the specified format.
-     */
-    bytes data = 2;
-}
-
-/**
- * Represents a request to list the models available on the host.
- */
-message ListModelsRequest {}
-
-/**
- * Represents a response to listing the models available on the host.
- */
-message ListModelsResponse {
-    /**
-     * The available models on the Stable Diffusion server.
-     */
-    repeated ModelInfo available_models = 1;
-}
-
-/**
- * Represents a request to load a model into a specified compute unit.
- */
-message LoadModelRequest {
-    /**
-     * The model name to load onto the compute unit.
-     */
-    string model_name = 1;
-
-    /**
-     * The compute units to load the model onto.
-     */
-    ComputeUnits compute_units = 2;
-}
-
-/**
- * Represents a response to loading a model.
- */
-message LoadModelResponse {}
-
-/**
- * The model service, for management and loading of models.
- */
-service ModelService {
-    /**
-     * Lists the available models on the host.
-     * This will return both models that are currently loaded, and models that are not yet loaded.
-     */
-    rpc ListModels(ListModelsRequest) returns (ListModelsResponse);
-
-    /**
-     * Loads a model onto a compute unit.
-     */
-    rpc LoadModel(LoadModelRequest) returns (LoadModelResponse);
-}
-
-/**
- * Represents a request to generate images using a loaded model.
- */
-message GenerateImagesRequest {
-    /**
-     * The model name to use for generation.
-     * The model must be already be loaded using ModelService.LoadModel RPC method.
-     */
-    string model_name = 1;
-
-    /**
-     * The output format for generated images.
-     */
-    ImageFormat output_image_format = 2;
-
-    /**
-     * The number of batches of images to generate.
-     */
-    uint32 batch_count = 3;
-
-    /**
-     * The number of images inside a single batch.
-     */
-    uint32 batch_size = 4;
-
-    /**
-     * The positive textual prompt for image generation.
-     */
-    string prompt = 5;
-
-    /**
-     * The negative prompt for image generation.
-     */
-    string negative_prompt = 6;
-    
-    /**
-     * The random seed to use.
-     * Zero indicates that the seed should be random.
-     */
-    uint32 seed = 7;
-
-    /**
-     * An optional starting image to use for generation.
-     */
-    Image starting_image = 8;
-
-    /**
-     * Indicates whether to enable the safety check network, if it is available.
-     */
-    bool enable_safety_check = 9;
-
-    /**
-     * The scheduler to use for generation.
-     * The default is PNDM, if not specified.
-     */
-    Scheduler scheduler = 10;
-
-    /**
-     * The guidance scale, which controls the influence the prompt has on the image.
-     * If not specified, a reasonable default value is used.
-     */
-    float guidance_scale = 11;
-
-    /**
-     * The strength of the image generation.
-     * If not specified, a reasonable default value is used.
-     */
-    float strength = 12;
-
-    /**
-     * The number of inference steps to perform.
-     * If not specified, a reasonable default value is used.
-     */
-    uint32 step_count = 13;
-
-    /**
-     * Indicates whether to send intermediate images
-     * while in streaming mode.
-     */
-    bool send_intermediates = 14;
-}
-
-/**
- * Represents the response from image generation.
- */
-message GenerateImagesResponse {
-    /**
-     * The set of generated images by the Stable Diffusion pipeline.
-     */
-    repeated Image images = 1;
-
-    /**
-     * The seeds that were used to generate the images.
-     */
-    repeated uint32 seeds = 2;
-}
-
-/**
- * Represents a progress update for an image generation batch.
- */
-message GenerateImagesBatchProgressUpdate {
-    /**
-     * The percentage of this batch that is complete.
-     */
-    float percentage_complete = 1;
-
-    /**
-     * The current state of the generated images from this batch.
-     * These are not usually completed images, but partial images.
-     * These are only available if the request's send_intermediates
-     * parameter is set to true.
-     */
-    repeated Image images = 2;
-}
-
-/**
- * Represents a completion of an image generation batch.
- */
-message GenerateImagesBatchCompletedUpdate {
-    /**
-     * The generated images from this batch.
-     */
-    repeated Image images = 1;
-
-    /**
-     * The seed for this batch.
-     */
-    uint32 seed = 2;
-}
-
-/**
- * Represents a continuous update from an image generation stream.
- */
-message GenerateImagesStreamUpdate {
-    /**
-     * The current batch number that is processing.
-     */
-    uint32 current_batch = 1;
-
-    /**
-     * An update to the image generation pipeline.
-     */
-    oneof update {
-        /**
-         * Batch progress update.
-         */
-        GenerateImagesBatchProgressUpdate batch_progress = 2;
-
-        /**
-         * Batch completion update.
-         */
-        GenerateImagesBatchCompletedUpdate batch_completed = 3;
-    }
-
-    /**
-     * The percentage of completion for the entire submitted job.
-     */
-    float overall_percentage_complete = 4;
-}
-
-/**
- * The image generation service, for generating images from loaded models.
- */
-service ImageGenerationService {
-    /**
-     * Generates images using a loaded model.
-     */
-    rpc GenerateImages(GenerateImagesRequest) returns (GenerateImagesResponse);
-
-    /**
-     * Generates images using a loaded model, providing updates along the way.
-     */
-    rpc GenerateImagesStreaming(GenerateImagesRequest) returns (stream GenerateImagesStreamUpdate);
-}
-
-/**
- * Represents a request to tokenize an input.
- */
-message TokenizeRequest {
-    /**
-     * The name of a loaded model to use for tokenization.
-     */
-    string model_name = 1;
-
-    /**
-     * The input string to tokenize.
-     */
-    string input = 2;
-}
-
-/**
- * Represents a response to tokenization.
- */
-message TokenizeResponse {
-    /**
-     * The tokens inside the input string.
-     */
-    repeated string tokens = 1;
-
-    /**
-     * The token IDs inside the input string.
-     */
-    repeated uint64 token_ids = 2;
-}
-
-/**
- * The tokenizer service, for analyzing tokens for a loaded model.
- */
-service TokenizerService {
-    /**
-     * Analyze the input using a loaded model and return the results.
-     */
-    rpc Tokenize(TokenizeRequest) returns (TokenizeResponse);
-}
--- a/Common/host.proto
+++ b/Common/host.proto
@@ -0,0 +1,63 @@
+/**
+ * Host management for the Stable Diffusion RPC service.
+ */
+syntax = "proto3";
+package gay.pizza.stable.diffusion;
+import "shared.proto";
+
+/**
+ * Utilize a prefix of 'Sd' for Swift.
+ */
+option swift_prefix = "Sd";
+option java_multiple_files = true;
+
+/**
+ * Represents a request to list the models available on the host.
+ */
+message ListModelsRequest {}
+
+/**
+ * Represents a response to listing the models available on the host.
+ */
+message ListModelsResponse {
+  /**
+   * The available models on the Stable Diffusion server.
+   */
+  repeated ModelInfo available_models = 1;
+}
+
+/**
+ * Represents a request to load a model into a specified compute unit.
+ */
+message LoadModelRequest {
+  /**
+   * The model name to load onto the compute unit.
+   */
+  string model_name = 1;
+
+  /**
+   * The compute units to load the model onto.
+   */
+  ComputeUnits compute_units = 2;
+}
+
+/**
+ * Represents a response to loading a model.
+ */
+message LoadModelResponse {}
+
+/**
+ * The model service, for management and loading of models.
+ */
+service ModelService {
+  /**
+   * Lists the available models on the host.
+   * This will return both models that are currently loaded, and models that are not yet loaded.
+   */
+  rpc ListModels(ListModelsRequest) returns (ListModelsResponse);
+
+  /**
+   * Loads a model onto a compute unit.
+   */
+  rpc LoadModel(LoadModelRequest) returns (LoadModelResponse);
+}
--- a/Common/image_generation.proto
+++ b/Common/image_generation.proto
@@ -0,0 +1,192 @@
+/**
+ * Image generation for the Stable Diffusion RPC service.
+ */
+syntax = "proto3";
+package gay.pizza.stable.diffusion;
+import "shared.proto";
+
+/**
+ * Utilize a prefix of 'Sd' for Swift.
+ */
+option swift_prefix = "Sd";
+option java_multiple_files = true;
+
+/**
+ * Represents a request to generate images using a loaded model.
+ */
+message GenerateImagesRequest {
+  /**
+   * The model name to use for generation.
+   * The model must be already be loaded using ModelService.LoadModel RPC method.
+   */
+  string model_name = 1;
+
+  /**
+   * The output format for generated images.
+   */
+  ImageFormat output_image_format = 2;
+
+  /**
+   * The number of batches of images to generate.
+   */
+  uint32 batch_count = 3;
+
+  /**
+   * The number of images inside a single batch.
+   */
+  uint32 batch_size = 4;
+
+  /**
+   * The positive textual prompt for image generation.
+   */
+  string prompt = 5;
+
+  /**
+   * The negative prompt for image generation.
+   */
+  string negative_prompt = 6;
+
+  /**
+   * The random seed to use.
+   * Zero indicates that the seed should be random.
+   */
+  uint32 seed = 7;
+
+  /**
+   * An optional starting image to use for generation.
+   */
+  Image starting_image = 8;
+
+  /**
+   * Indicates whether to enable the safety check network, if it is available.
+   */
+  bool enable_safety_check = 9;
+
+  /**
+   * The scheduler to use for generation.
+   * The default is PNDM, if not specified.
+   */
+  Scheduler scheduler = 10;
+
+  /**
+   * The guidance scale, which controls the influence the prompt has on the image.
+   * If not specified, a reasonable default value is used.
+   */
+  float guidance_scale = 11;
+
+  /**
+   * The strength of the image generation.
+   * If not specified, a reasonable default value is used.
+   */
+  float strength = 12;
+
+  /**
+   * The number of inference steps to perform.
+   * If not specified, a reasonable default value is used.
+   */
+  uint32 step_count = 13;
+
+  /**
+   * Indicates whether to send intermediate images
+   * while in streaming mode.
+   */
+  bool send_intermediates = 14;
+}
+
+/**
+ * Represents the response from image generation.
+ */
+message GenerateImagesResponse {
+  /**
+   * The set of generated images by the Stable Diffusion pipeline.
+   */
+  repeated Image images = 1;
+
+  /**
+   * The seeds that were used to generate the images.
+   */
+  repeated uint32 seeds = 2;
+}
+
+/**
+ * Represents a progress update for an image generation batch.
+ */
+message GenerateImagesBatchProgressUpdate {
+  /**
+   * The percentage of this batch that is complete.
+   */
+  float percentage_complete = 1;
+
+  /**
+   * The current state of the generated images from this batch.
+   * These are not usually completed images, but partial images.
+   * These are only available if the request's send_intermediates
+   * parameter is set to true.
+   */
+  repeated Image images = 2;
+}
+
+/**
+ * Represents a completion of an image generation batch.
+ */
+message GenerateImagesBatchCompletedUpdate {
+  /**
+   * The generated images from this batch.
+   */
+  repeated Image images = 1;
+
+  /**
+   * The seed for this batch.
+   */
+  uint32 seed = 2;
+}
+
+/**
+ * Represents a continuous update from an image generation stream.
+ */
+message GenerateImagesStreamUpdate {
+  /**
+   * The current batch number that is processing.
+   */
+  uint32 current_batch = 1;
+
+  /**
+   * An update to the image generation pipeline.
+   */
+  oneof update {
+    /**
+     * Batch progress update.
+     */
+    GenerateImagesBatchProgressUpdate batch_progress = 2;
+
+    /**
+     * Batch completion update.
+     */
+    GenerateImagesBatchCompletedUpdate batch_completed = 3;
+  }
+
+  /**
+   * The percentage of completion for the entire submitted job.
+   */
+  float overall_percentage_complete = 4;
+
+  /**
+   * The id of the spawned job.
+   */
+  uint64 job_id = 5;
+}
+
+/**
+ * The image generation service, for generating images from loaded models.
+ */
+service ImageGenerationService {
+  /**
+   * Generates images using a loaded model.
+   */
+  rpc GenerateImages(GenerateImagesRequest) returns (GenerateImagesResponse);
+
+  /**
+   * Generates images using a loaded model, providing updates along the way.
+   */
+  rpc GenerateImagesStreaming(GenerateImagesRequest) returns (stream GenerateImagesStreamUpdate);
+}
--- a/Common/jobs.proto
+++ b/Common/jobs.proto
@@ -0,0 +1,131 @@
+/**
+ * Job management for the Stable Diffusion RPC service.
+ */
+syntax = "proto3";
+package gay.pizza.stable.diffusion;
+
+/**
+ * Utilize a prefix of 'Sd' for Swift.
+ */
+option swift_prefix = "Sd";
+option java_multiple_files = true;
+
+/**
+ * Represents the current state of a job.
+ */
+enum JobState {
+  /**
+   * The job is in an unknown state.
+   */
+  unknown = 0;
+
+  /**
+   * The job is queued. It has not started the work.
+   */
+  queued = 1;
+
+  /**
+   * The job is running. The work has been started.
+   */
+  running = 2;
+
+  /**
+   * The job is completed. The work has been completed.
+   */
+  completed = 3;
+
+  /**
+   * The job is cancelled. An actor requested cancellation.
+   */
+  cancelled = 4;
+}
+
+/**
+ * Represents a job that is active
+ */
+message Job {
+  /**
+   * Unique job identifier.
+   */
+  uint64 id = 1;
+
+  /**
+   * Job host identifier.
+   */
+  uint64 host = 2;
+
+  /**
+   * The current state of the job.
+   */
+  JobState state = 3;
+
+  /**
+   * The percentage of completion for the entire job.
+   */
+  float overall_percentage_complete = 4;
+}
+
+/**
+ * Represents a request to get the state of a job.
+ */
+message GetJobRequest {
+  /**
+   * The job id to retrieve the current state for.
+   */
+  uint64 id = 1;
+}
+
+/**
+ * Represents a response to getting the state of a job.
+ */
+message GetJobResponse {
+  /**
+   * The current state of the job.
+   */
+  Job job = 1;
+}
+
+/**
+ * Represents a request to cancel a job.
+ */
+message CancelJobRequest {
+  /**
+   * The job id to cancel.
+   */
+  uint64 id = 1;
+}
+
+/**
+ * Represents a response to cancel a job.
+ */
+message CancelJobResponse {}
+
+/**
+ * Represents a request to stream job updates.
+ */
+message StreamJobUpdatesRequest {
+  /**
+   * The job id to stream updates for. If this is not set or is zero,
+   * all job updates will be sent.
+   */
+  uint64 id = 1;
+}
+
+/**
+ * Represents an update to a job.
+ */
+message JobUpdate {
+  /**
+   * The current state of the job.
+   */
+  Job job = 1;
+}
+
+/**
+ * The job service, for inspecting and monitoring the state of jobs executing on the service.
+ */
+service JobService {
+  rpc GetJob(GetJobRequest) returns (GetJobResponse);
+  rpc CancelJob(CancelJobRequest) returns (CancelJobResponse);
+  rpc StreamJobUpdates(StreamJobUpdatesRequest) returns (stream JobUpdate);
+}
--- a/Common/shared.proto
+++ b/Common/shared.proto
@@ -0,0 +1,130 @@
+/**
+ * Shared messages for the Stable Diffusion RPC service.
+ */
+syntax = "proto3";
+package gay.pizza.stable.diffusion;
+
+/**
+ * Utilize a prefix of 'Sd' for Swift.
+ */
+option swift_prefix = "Sd";
+option java_multiple_files = true;
+
+/**
+ * Represents the model attention. Model attention has to do with how the model is encoded, and
+ * can determine what compute units are able to support a particular model.
+ */
+enum ModelAttention {
+  /**
+   * The model is an original attention type. It can be loaded only onto CPU & GPU compute units.
+   */
+  original = 0;
+
+  /**
+   * The model is a split-ein-sum attention type. It can be loaded onto all compute units,
+   * including the Apple Neural Engine.
+   */
+  split_ein_sum = 1;
+}
+
+/**
+ * Represents the schedulers that are used to sample images.
+ */
+enum Scheduler {
+  /**
+   * The PNDM (Pseudo numerical methods for diffusion models) scheduler.
+   */
+  pndm = 0;
+
+  /**
+   * The DPM-Solver++ scheduler.
+   */
+  dpm_solver_plus_plus = 1;
+}
+
+/**
+ * Represents a specifier for what compute units are available for ML tasks.
+ */
+enum ComputeUnits {
+  /**
+   * The CPU as a singular compute unit.
+   */
+  cpu = 0;
+
+  /**
+   * The CPU & GPU combined into a singular compute unit.
+   */
+  cpu_and_gpu = 1;
+
+  /**
+   * Allow the usage of all compute units. CoreML will decided where the model is loaded.
+   */
+  all = 2;
+
+  /**
+   * The CPU & Neural Engine combined into a singular compute unit.
+   */
+  cpu_and_neural_engine = 3;
+}
+
+/**
+ * Represents information about an available model.
+ * The primary key of a model is it's 'name' field.
+ */
+message ModelInfo {
+  /**
+   * The name of the available model. Note that within the context of a single RPC server,
+   * the name of a model is a unique identifier. This may not be true when utilizing a cluster or
+   * load balanced server, so keep that in mind.
+   */
+  string name = 1;
+
+  /**
+   * The attention of the model. Model attention determines what compute units can be used to
+   * load the model and make predictions.
+   */
+  ModelAttention attention = 2;
+
+  /**
+   * Whether the model is currently loaded onto an available compute unit.
+   */
+  bool is_loaded = 3;
+
+  /**
+   * The compute unit that the model is currently loaded into, if it is loaded to one at all.
+   * When is_loaded is false, the value of this field should be null.
+   */
+  ComputeUnits loaded_compute_units = 4;
+
+  /**
+   * The compute units that this model supports using.
+   */
+  repeated ComputeUnits supported_compute_units = 5;
+}
+
+/**
+ * Represents the format of an image.
+ */
+enum ImageFormat {
+  /**
+   * The PNG image format.
+   */
+  png = 0;
+}
+
+/**
+ * Represents an image within the Stable Diffusion context.
+ * This could be an input image for an image generation request, or it could be
+ * a generated image from the Stable Diffusion model.
+ */
+message Image {
+  /**
+   * The format of the image.
+   */
+  ImageFormat format = 1;
+
+  /**
+   * The raw data of the image, in the specified format.
+   */
+  bytes data = 2;
+}
--- a/Common/tokenizer.proto
+++ b/Common/tokenizer.proto
@@ -0,0 +1,51 @@
+/**
+ * Tokenization for the Stable Diffusion RPC service.
+ */
+syntax = "proto3";
+package gay.pizza.stable.diffusion;
+
+/**
+ * Utilize a prefix of 'Sd' for Swift.
+ */
+option swift_prefix = "Sd";
+option java_multiple_files = true;
+
+/**
+ * Represents a request to tokenize an input.
+ */
+message TokenizeRequest {
+  /**
+   * The name of a loaded model to use for tokenization.
+   */
+  string model_name = 1;
+
+  /**
+   * The input string to tokenize.
+   */
+  string input = 2;
+}
+
+/**
+ * Represents a response to tokenization.
+ */
+message TokenizeResponse {
+  /**
+   * The tokens inside the input string.
+   */
+  repeated string tokens = 1;
+
+  /**
+   * The token IDs inside the input string.
+   */
+  repeated uint64 token_ids = 2;
+}
+
+/**
+ * The tokenizer service, for analyzing tokens for a loaded model.
+ */
+service TokenizerService {
+  /**
+   * Analyze the input using a loaded model and return the results.
+   */
+  rpc Tokenize(TokenizeRequest) returns (TokenizeResponse);
+}