From 6f40cc1ec237c502e7923bcb95194e7c1f6dfb8a Mon Sep 17 00:00:00 2001 From: ea_superstar Date: Fri, 2 Aug 2024 16:52:48 +0100 Subject: [PATCH 1/4] Added descriptions to the parameters. All parameters needing descriptions across: A2T, I2I, I2V, T2I, and Upscale have had their descriptions added. --- runner/app/routes/audio_to_text.py | 4 ++-- runner/app/routes/image_to_image.py | 22 +++++++++++----------- runner/app/routes/image_to_video.py | 20 ++++++++++---------- runner/app/routes/text_to_image.py | 20 ++++++++++---------- runner/app/routes/upscale.py | 12 ++++++------ 5 files changed, 39 insertions(+), 39 deletions(-) diff --git a/runner/app/routes/audio_to_text.py b/runner/app/routes/audio_to_text.py index 06d3eaad..b8d2dbbb 100644 --- a/runner/app/routes/audio_to_text.py +++ b/runner/app/routes/audio_to_text.py @@ -56,8 +56,8 @@ def handle_pipeline_error(e: Exception) -> JSONResponse: include_in_schema=False, ) async def audio_to_text( - audio: Annotated[UploadFile, File()], - model_id: Annotated[str, Form()] = "", + audio: Annotated[UploadFile, File(description="List of denoised audio samples of a NumPy array of shape (batch_size, num_channels, sample_rate).")], + model_id: Annotated[str, Form(description="The huggingface model ID to run the inference on (i.e. SG161222/RealVisXL_V4.0_Lightning:)")] = "", pipeline: Pipeline = Depends(get_pipeline), token: HTTPAuthorizationCredentials = Depends(HTTPBearer(auto_error=False)), ): diff --git a/runner/app/routes/image_to_image.py b/runner/app/routes/image_to_image.py index efb23793..dce558d8 100644 --- a/runner/app/routes/image_to_image.py +++ b/runner/app/routes/image_to_image.py @@ -36,19 +36,19 @@ include_in_schema=False, ) async def image_to_image( - prompt: Annotated[str, Form()], - image: Annotated[UploadFile, File()], - model_id: Annotated[str, Form()] = "", - strength: Annotated[float, Form()] = 0.8, - guidance_scale: Annotated[float, Form()] = 7.5, - image_guidance_scale: Annotated[float, Form()] = 1.5, - negative_prompt: Annotated[str, Form()] = "", - safety_check: Annotated[bool, Form()] = True, - seed: Annotated[int, Form()] = None, + prompt: Annotated[str, Form(description="The prompt or prompts to guide image generation. If not defined, you need to pass prompt_embeds.")], + image: Annotated[UploadFile, File(description="Image, numpy array or tensor representing an image batch to be used as the starting point. For both numpy array and pytorch tensor, the expected value range is between [0, 1] If it’s a tensor or a list or tensors, the expected shape should be (B, C, H, W) or (C, H, W). If it is a numpy array or a list of arrays, the expected shape should be (B, H, W, C) or (H, W, C) It can also accept image latents as image, but if passing latents directly it is not encoded again.")], + model_id: Annotated[str, Form(description="The huggingface model ID to run the inference on (i.e. SG161222/RealVisXL_V4.0_Lightning:)")] = "", + strength: Annotated[float, Form(description=" Indicates extent to transform the reference image. Must be between 0 and 1. image is used as a starting point and more noise is added the higher the strength. The number of denoising steps depends on the amount of noise initially added. When strength is 1, added noise is maximum and the denoising process runs for the full number of iterations specified in num_inference_steps. A value of 1 essentially ignores image.")] = 0.8, + guidance_scale: Annotated[float, Form(description="A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality. Guidance scale is enabled when guidance_scale > 1.")] = 7.5, + image_guidance_scale: Annotated[float, Form(description="Push the generated image towards the initial image. Image guidance scale is enabled by setting image_guidance_scale > 1. Higher image guidance scale encourages generated images that are closely linked to the source image, usually at the expense of lower image quality. This pipeline requires a value of at least 1.")] = 1.5, + negative_prompt: Annotated[str, Form(description="The prompt or prompts to guide what to not include in image generation. If not defined, you need to pass negative_prompt_embeds instead. Ignored when not using guidance (guidance_scale < 1).")] = "", + safety_check: Annotated[bool, Form(description="Classification module that estimates whether generated images could be considered offensive or harmful. Please refer to the model card for more details about a model’s potential harms.")] = True, + seed: Annotated[int, Form(description="The seed to set.")] = None, num_inference_steps: Annotated[ - int, Form() + int, Form(description="The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. This parameter is modulated by strength.") ] = 100, # NOTE: Hardcoded due to varying pipeline values. - num_images_per_prompt: Annotated[int, Form()] = 1, + num_images_per_prompt: Annotated[int, Form(description="The number of images to generate per prompt.")] = 1, pipeline: Pipeline = Depends(get_pipeline), token: HTTPAuthorizationCredentials = Depends(HTTPBearer(auto_error=False)), ): diff --git a/runner/app/routes/image_to_video.py b/runner/app/routes/image_to_video.py index d6b66ebe..03985e51 100644 --- a/runner/app/routes/image_to_video.py +++ b/runner/app/routes/image_to_video.py @@ -35,17 +35,17 @@ include_in_schema=False, ) async def image_to_video( - image: Annotated[UploadFile, File()], - model_id: Annotated[str, Form()] = "", - height: Annotated[int, Form()] = 576, - width: Annotated[int, Form()] = 1024, - fps: Annotated[int, Form()] = 6, - motion_bucket_id: Annotated[int, Form()] = 127, - noise_aug_strength: Annotated[float, Form()] = 0.02, - seed: Annotated[int, Form()] = None, - safety_check: Annotated[bool, Form()] = True, + image: Annotated[UploadFile, File(description="Image, numpy array or tensor representing an image batch to be used as the starting point. For both numpy array and pytorch tensor, the expected value range is between [0, 1] If it’s a tensor or a list or tensors, the expected shape should be (B, C, H, W) or (C, H, W). If it is a numpy array or a list of arrays, the expected shape should be (B, H, W, C) or (H, W, C) It can also accept image latents as image, but if passing latents directly it is not encoded again.")], + model_id: Annotated[str, Form(description="The huggingface model ID to run the inference on (i.e. SG161222/RealVisXL_V4.0_Lightning:)")] = "", + height: Annotated[int, Form(description="The height in pixels of the generated image.")] = 576, + width: Annotated[int, Form(description="The width in pixels of the generated image.")] = 1024, + fps: Annotated[int, Form(description="the frames per second of the generated video.")] = 6, + motion_bucket_id: Annotated[int, Form(description="the motion bucket id to use for the generated video. This can be used to control the motion of the generated video. Increasing the motion bucket id increases the motion of the generated video.")] = 127, + noise_aug_strength: Annotated[float, Form(description="the amount of noise added to the conditioning image. The higher the values the less the video resembles the conditioning image. Increasing this value also increases the motion of the generated video.")] = 0.02, + seed: Annotated[int, Form(description="The seed to set.")] = None, + safety_check: Annotated[bool, Form(description="Classification module that estimates whether generated images could be considered offensive or harmful. Please refer to the model card for more details about a model’s potential harms.")] = True, num_inference_steps: Annotated[ - int, Form() + int, Form(description="The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference.") ] = 25, # NOTE: Hardcoded due to varying pipeline values. pipeline: Pipeline = Depends(get_pipeline), token: HTTPAuthorizationCredentials = Depends(HTTPBearer(auto_error=False)), diff --git a/runner/app/routes/text_to_image.py b/runner/app/routes/text_to_image.py index 2ce794a0..f5e56f53 100644 --- a/runner/app/routes/text_to_image.py +++ b/runner/app/routes/text_to_image.py @@ -22,17 +22,17 @@ class TextToImageParams(BaseModel): # supports OAPI 3.1 https://github.com/deepmap/oapi-codegen/issues/373 model_id: Annotated[ str, - Field(default="", description=""), + Field(default="", description="The huggingface model ID to run the inference on (i.e. SG161222/RealVisXL_V4.0_Lightning:)"), ] - prompt: Annotated[str, Field(description="")] - height: Annotated[int, Field(default=576, description="")] - width: Annotated[int, Field(default=1024, description="")] - guidance_scale: Annotated[float, Field(default=7.5, description="")] - negative_prompt: Annotated[str, Field(default="", description="")] - safety_check: Annotated[bool, Field(default=True, description="")] - seed: Annotated[int, Field(default=None, description="")] - num_inference_steps: Annotated[int, Field(default=50, description="")] - num_images_per_prompt: Annotated[int, Field(default=1, description="")] + prompt: Annotated[str, Field(description="The prompt or prompts to guide image generation. If not defined, you need to pass prompt_embeds.")] + height: Annotated[int, Field(default=576, description="The height in pixels of the generated image.")] + width: Annotated[int, Field(default=1024, description="The width in pixels of the generated image.")] + guidance_scale: Annotated[float, Field(default=7.5, description="A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality. Guidance scale is enabled when guidance_scale > 1.")] + negative_prompt: Annotated[str, Field(default="", description="The prompt or prompts to guide what to not include in image generation. If not defined, you need to pass negative_prompt_embeds instead. Ignored when not using guidance (guidance_scale < 1).")] + safety_check: Annotated[bool, Field(default=True, description="Classification module that estimates whether generated images could be considered offensive or harmful. Please refer to the model card for more details about a model’s potential harms.")] + seed: Annotated[int, Field(default=None, description="The seed to set.")] + num_inference_steps: Annotated[int, Field(default=50, description="The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference.")] + num_images_per_prompt: Annotated[int, Field(default=1, description="The number of images to generate per prompt.")] RESPONSES = { diff --git a/runner/app/routes/upscale.py b/runner/app/routes/upscale.py index 635a80b9..1d989b87 100644 --- a/runner/app/routes/upscale.py +++ b/runner/app/routes/upscale.py @@ -36,13 +36,13 @@ include_in_schema=False, ) async def upscale( - prompt: Annotated[str, Form()], - image: Annotated[UploadFile, File()], - model_id: Annotated[str, Form()] = "", - safety_check: Annotated[bool, Form()] = True, - seed: Annotated[int, Form()] = None, + prompt: Annotated[str, Form(description="The prompt or prompts to guide image generation. If not defined, you need to pass prompt_embeds.")], + image: Annotated[UploadFile, File(description="Image, numpy array or tensor representing an image batch to be used as the starting point. For both numpy array and pytorch tensor, the expected value range is between [0, 1] If it’s a tensor or a list or tensors, the expected shape should be (B, C, H, W) or (C, H, W). If it is a numpy array or a list of arrays, the expected shape should be (B, H, W, C) or (H, W, C) It can also accept image latents as image, but if passing latents directly it is not encoded again.")], + model_id: Annotated[str, Form(description="The huggingface model ID to run the inference on (i.e. SG161222/RealVisXL_V4.0_Lightning:)")] = "", + safety_check: Annotated[bool, Form(description=" Classification module that estimates whether generated images could be considered offensive or harmful. Please refer to the model card for more details about a model’s potential harms.")] = True, + seed: Annotated[int, Form(description="The seed to set.")] = None, num_inference_steps: Annotated[ - int, Form() + int, Form(description="The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. This parameter is modulated by strength.") ] = 75, # NOTE: Hardcoded due to varying pipeline values. pipeline: Pipeline = Depends(get_pipeline), token: HTTPAuthorizationCredentials = Depends(HTTPBearer(auto_error=False)), From 0b152070137d6c96e96f8764eca6694cfd52ebbb Mon Sep 17 00:00:00 2001 From: ea_superstar Date: Sat, 10 Aug 2024 13:23:49 +0100 Subject: [PATCH 2/4] The descriptions have been updated to better apply to the current implementation. --- runner/app/routes/audio_to_text.py | 4 ++-- runner/app/routes/image_to_image.py | 6 +++--- runner/app/routes/image_to_video.py | 4 ++-- runner/app/routes/text_to_image.py | 4 ++-- runner/app/routes/upscale.py | 6 +++--- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/runner/app/routes/audio_to_text.py b/runner/app/routes/audio_to_text.py index b8d2dbbb..358b8f85 100644 --- a/runner/app/routes/audio_to_text.py +++ b/runner/app/routes/audio_to_text.py @@ -56,8 +56,8 @@ def handle_pipeline_error(e: Exception) -> JSONResponse: include_in_schema=False, ) async def audio_to_text( - audio: Annotated[UploadFile, File(description="List of denoised audio samples of a NumPy array of shape (batch_size, num_channels, sample_rate).")], - model_id: Annotated[str, Form(description="The huggingface model ID to run the inference on (i.e. SG161222/RealVisXL_V4.0_Lightning:)")] = "", + audio: Annotated[UploadFile, File(description="This is the path to the audio file to be transcribed.")], + model_id: Annotated[str, Form(description="This is the diffusion model for image generation.")] = "", pipeline: Pipeline = Depends(get_pipeline), token: HTTPAuthorizationCredentials = Depends(HTTPBearer(auto_error=False)), ): diff --git a/runner/app/routes/image_to_image.py b/runner/app/routes/image_to_image.py index dce558d8..a23f281e 100644 --- a/runner/app/routes/image_to_image.py +++ b/runner/app/routes/image_to_image.py @@ -36,9 +36,9 @@ include_in_schema=False, ) async def image_to_image( - prompt: Annotated[str, Form(description="The prompt or prompts to guide image generation. If not defined, you need to pass prompt_embeds.")], - image: Annotated[UploadFile, File(description="Image, numpy array or tensor representing an image batch to be used as the starting point. For both numpy array and pytorch tensor, the expected value range is between [0, 1] If it’s a tensor or a list or tensors, the expected shape should be (B, C, H, W) or (C, H, W). If it is a numpy array or a list of arrays, the expected shape should be (B, H, W, C) or (H, W, C) It can also accept image latents as image, but if passing latents directly it is not encoded again.")], - model_id: Annotated[str, Form(description="The huggingface model ID to run the inference on (i.e. SG161222/RealVisXL_V4.0_Lightning:)")] = "", + prompt: Annotated[str, Form(description="This is the text description for the image. When prompting use + or - after the word to increase the weight of the word in generation, you can add multiple ++ or -- to increase or decrease weight.")], + image: Annotated[UploadFile, File(description="This holds the absolute path to the image file to be transformed.")], + model_id: Annotated[str, Form(description="This is the diffusion model for image generation.")] = "", strength: Annotated[float, Form(description=" Indicates extent to transform the reference image. Must be between 0 and 1. image is used as a starting point and more noise is added the higher the strength. The number of denoising steps depends on the amount of noise initially added. When strength is 1, added noise is maximum and the denoising process runs for the full number of iterations specified in num_inference_steps. A value of 1 essentially ignores image.")] = 0.8, guidance_scale: Annotated[float, Form(description="A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality. Guidance scale is enabled when guidance_scale > 1.")] = 7.5, image_guidance_scale: Annotated[float, Form(description="Push the generated image towards the initial image. Image guidance scale is enabled by setting image_guidance_scale > 1. Higher image guidance scale encourages generated images that are closely linked to the source image, usually at the expense of lower image quality. This pipeline requires a value of at least 1.")] = 1.5, diff --git a/runner/app/routes/image_to_video.py b/runner/app/routes/image_to_video.py index 03985e51..b38c68b2 100644 --- a/runner/app/routes/image_to_video.py +++ b/runner/app/routes/image_to_video.py @@ -35,8 +35,8 @@ include_in_schema=False, ) async def image_to_video( - image: Annotated[UploadFile, File(description="Image, numpy array or tensor representing an image batch to be used as the starting point. For both numpy array and pytorch tensor, the expected value range is between [0, 1] If it’s a tensor or a list or tensors, the expected shape should be (B, C, H, W) or (C, H, W). If it is a numpy array or a list of arrays, the expected shape should be (B, H, W, C) or (H, W, C) It can also accept image latents as image, but if passing latents directly it is not encoded again.")], - model_id: Annotated[str, Form(description="The huggingface model ID to run the inference on (i.e. SG161222/RealVisXL_V4.0_Lightning:)")] = "", + image: Annotated[UploadFile, File(description="This field holds the absolute path to the image file to be transformed.")], + model_id: Annotated[str, Form(description="This is the diffusion model for image generation.")] = "", height: Annotated[int, Form(description="The height in pixels of the generated image.")] = 576, width: Annotated[int, Form(description="The width in pixels of the generated image.")] = 1024, fps: Annotated[int, Form(description="the frames per second of the generated video.")] = 6, diff --git a/runner/app/routes/text_to_image.py b/runner/app/routes/text_to_image.py index f5e56f53..a57180c7 100644 --- a/runner/app/routes/text_to_image.py +++ b/runner/app/routes/text_to_image.py @@ -22,9 +22,9 @@ class TextToImageParams(BaseModel): # supports OAPI 3.1 https://github.com/deepmap/oapi-codegen/issues/373 model_id: Annotated[ str, - Field(default="", description="The huggingface model ID to run the inference on (i.e. SG161222/RealVisXL_V4.0_Lightning:)"), + Field(default="", description="This is the diffusion model for image generation."), ] - prompt: Annotated[str, Field(description="The prompt or prompts to guide image generation. If not defined, you need to pass prompt_embeds.")] + prompt: Annotated[str, Field(description="This is the text description for the image. When prompting use + or - after the word to increase the weight of the word in generation, you can add multiple ++ or -- to increase or decrease weight.")] height: Annotated[int, Field(default=576, description="The height in pixels of the generated image.")] width: Annotated[int, Field(default=1024, description="The width in pixels of the generated image.")] guidance_scale: Annotated[float, Field(default=7.5, description="A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality. Guidance scale is enabled when guidance_scale > 1.")] diff --git a/runner/app/routes/upscale.py b/runner/app/routes/upscale.py index 1d989b87..888a8c1c 100644 --- a/runner/app/routes/upscale.py +++ b/runner/app/routes/upscale.py @@ -36,9 +36,9 @@ include_in_schema=False, ) async def upscale( - prompt: Annotated[str, Form(description="The prompt or prompts to guide image generation. If not defined, you need to pass prompt_embeds.")], - image: Annotated[UploadFile, File(description="Image, numpy array or tensor representing an image batch to be used as the starting point. For both numpy array and pytorch tensor, the expected value range is between [0, 1] If it’s a tensor or a list or tensors, the expected shape should be (B, C, H, W) or (C, H, W). If it is a numpy array or a list of arrays, the expected shape should be (B, H, W, C) or (H, W, C) It can also accept image latents as image, but if passing latents directly it is not encoded again.")], - model_id: Annotated[str, Form(description="The huggingface model ID to run the inference on (i.e. SG161222/RealVisXL_V4.0_Lightning:)")] = "", + prompt: Annotated[str, Form(description="This is the text description for the image. When prompting use + or - after the word to increase the weight of the word in generation, you can add multiple ++ or -- to increase or decrease weight.")], + image: Annotated[UploadFile, File(description="This field holds the absolute path to the image file to be upscaled.")], + model_id: Annotated[str, Form(description="This is the diffusion model for image generation.")] = "", safety_check: Annotated[bool, Form(description=" Classification module that estimates whether generated images could be considered offensive or harmful. Please refer to the model card for more details about a model’s potential harms.")] = True, seed: Annotated[int, Form(description="The seed to set.")] = None, num_inference_steps: Annotated[ From 99404bb9b72ef9e91c8a73020514d8d58ba4debb Mon Sep 17 00:00:00 2001 From: Rick Staa Date: Tue, 13 Aug 2024 14:50:18 +0200 Subject: [PATCH 3/4] refactor: shorten parameter descriptions This commit shortens some of the parameter descriptions since the longer description is found on huggingface. --- runner/app/routes/audio_to_text.py | 12 ++++-- runner/app/routes/image_to_image.py | 65 +++++++++++++++++++++++------ runner/app/routes/image_to_video.py | 52 +++++++++++++++++------ runner/app/routes/text_to_image.py | 65 +++++++++++++++++++++++------ runner/app/routes/upscale.py | 32 ++++++++++---- 5 files changed, 177 insertions(+), 49 deletions(-) diff --git a/runner/app/routes/audio_to_text.py b/runner/app/routes/audio_to_text.py index 358b8f85..5e053567 100644 --- a/runner/app/routes/audio_to_text.py +++ b/runner/app/routes/audio_to_text.py @@ -5,8 +5,7 @@ from app.dependencies import get_pipeline from app.pipelines.base import Pipeline from app.pipelines.utils.audio import AudioConversionError -from app.routes.util import (HTTPError, TextResponse, file_exceeds_max_size, - http_error) +from app.routes.util import HTTPError, TextResponse, file_exceeds_max_size, http_error from fastapi import APIRouter, Depends, File, Form, UploadFile, status from fastapi.responses import JSONResponse from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer @@ -56,8 +55,13 @@ def handle_pipeline_error(e: Exception) -> JSONResponse: include_in_schema=False, ) async def audio_to_text( - audio: Annotated[UploadFile, File(description="This is the path to the audio file to be transcribed.")], - model_id: Annotated[str, Form(description="This is the diffusion model for image generation.")] = "", + audio: Annotated[ + UploadFile, File(description="Uploaded audio file to be transcribed.") + ], + model_id: Annotated[ + str, + Form(description="Hugging Face model ID used for transcription."), + ] = "", pipeline: Pipeline = Depends(get_pipeline), token: HTTPAuthorizationCredentials = Depends(HTTPBearer(auto_error=False)), ): diff --git a/runner/app/routes/image_to_image.py b/runner/app/routes/image_to_image.py index a23f281e..1fe8d7a6 100644 --- a/runner/app/routes/image_to_image.py +++ b/runner/app/routes/image_to_image.py @@ -5,8 +5,7 @@ from app.dependencies import get_pipeline from app.pipelines.base import Pipeline -from app.routes.util import (HTTPError, ImageResponse, http_error, - image_to_data_url) +from app.routes.util import HTTPError, ImageResponse, http_error, image_to_data_url from fastapi import APIRouter, Depends, File, Form, UploadFile, status from fastapi.responses import JSONResponse from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer @@ -36,19 +35,59 @@ include_in_schema=False, ) async def image_to_image( - prompt: Annotated[str, Form(description="This is the text description for the image. When prompting use + or - after the word to increase the weight of the word in generation, you can add multiple ++ or -- to increase or decrease weight.")], - image: Annotated[UploadFile, File(description="This holds the absolute path to the image file to be transformed.")], - model_id: Annotated[str, Form(description="This is the diffusion model for image generation.")] = "", - strength: Annotated[float, Form(description=" Indicates extent to transform the reference image. Must be between 0 and 1. image is used as a starting point and more noise is added the higher the strength. The number of denoising steps depends on the amount of noise initially added. When strength is 1, added noise is maximum and the denoising process runs for the full number of iterations specified in num_inference_steps. A value of 1 essentially ignores image.")] = 0.8, - guidance_scale: Annotated[float, Form(description="A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality. Guidance scale is enabled when guidance_scale > 1.")] = 7.5, - image_guidance_scale: Annotated[float, Form(description="Push the generated image towards the initial image. Image guidance scale is enabled by setting image_guidance_scale > 1. Higher image guidance scale encourages generated images that are closely linked to the source image, usually at the expense of lower image quality. This pipeline requires a value of at least 1.")] = 1.5, - negative_prompt: Annotated[str, Form(description="The prompt or prompts to guide what to not include in image generation. If not defined, you need to pass negative_prompt_embeds instead. Ignored when not using guidance (guidance_scale < 1).")] = "", - safety_check: Annotated[bool, Form(description="Classification module that estimates whether generated images could be considered offensive or harmful. Please refer to the model card for more details about a model’s potential harms.")] = True, - seed: Annotated[int, Form(description="The seed to set.")] = None, + prompt: Annotated[ + str, + Form(description="Text prompt(s) to guide image generation."), + ], + image: Annotated[ + UploadFile, + File(description="Uploaded image to modify with the pipeline."), + ], + model_id: Annotated[ + str, + Form(description="Hugging Face model ID used for image generation."), + ] = "", + strength: Annotated[ + float, + Form( + description="Degree of transformation applied to the reference image (0 to 1)." + ), + ] = 0.8, + guidance_scale: Annotated[ + float, + Form( + description="Encourages model to generate images closely linked to the text prompt (higher values may reduce image quality)." + ), + ] = 7.5, + image_guidance_scale: Annotated[ + float, + Form( + description="Degree to which the generated image is pushed towards the initial image." + ), + ] = 1.5, + negative_prompt: Annotated[ + str, + Form( + description="Text prompt(s) to guide what to exclude from image generation. Ignored if guidance_scale < 1." + ), + ] = "", + safety_check: Annotated[ + bool, + Form( + description="Perform a safety check to estimate if generated images could be offensive or harmful." + ), + ] = True, + seed: Annotated[int, Form(description="Seed for random number generation.")] = None, num_inference_steps: Annotated[ - int, Form(description="The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. This parameter is modulated by strength.") + int, + Form( + description="Number of denoising steps. More steps usually lead to higher quality images but slower inference. Modulated by strength." + ), ] = 100, # NOTE: Hardcoded due to varying pipeline values. - num_images_per_prompt: Annotated[int, Form(description="The number of images to generate per prompt.")] = 1, + num_images_per_prompt: Annotated[ + int, + Form(description="Number of images to generate per prompt."), + ] = 1, pipeline: Pipeline = Depends(get_pipeline), token: HTTPAuthorizationCredentials = Depends(HTTPBearer(auto_error=False)), ): diff --git a/runner/app/routes/image_to_video.py b/runner/app/routes/image_to_video.py index b38c68b2..7f5bdaa2 100644 --- a/runner/app/routes/image_to_video.py +++ b/runner/app/routes/image_to_video.py @@ -5,8 +5,7 @@ from app.dependencies import get_pipeline from app.pipelines.base import Pipeline -from app.routes.util import (HTTPError, VideoResponse, http_error, - image_to_data_url) +from app.routes.util import HTTPError, VideoResponse, http_error, image_to_data_url from fastapi import APIRouter, Depends, File, Form, UploadFile, status from fastapi.responses import JSONResponse from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer @@ -35,17 +34,46 @@ include_in_schema=False, ) async def image_to_video( - image: Annotated[UploadFile, File(description="This field holds the absolute path to the image file to be transformed.")], - model_id: Annotated[str, Form(description="This is the diffusion model for image generation.")] = "", - height: Annotated[int, Form(description="The height in pixels of the generated image.")] = 576, - width: Annotated[int, Form(description="The width in pixels of the generated image.")] = 1024, - fps: Annotated[int, Form(description="the frames per second of the generated video.")] = 6, - motion_bucket_id: Annotated[int, Form(description="the motion bucket id to use for the generated video. This can be used to control the motion of the generated video. Increasing the motion bucket id increases the motion of the generated video.")] = 127, - noise_aug_strength: Annotated[float, Form(description="the amount of noise added to the conditioning image. The higher the values the less the video resembles the conditioning image. Increasing this value also increases the motion of the generated video.")] = 0.02, - seed: Annotated[int, Form(description="The seed to set.")] = None, - safety_check: Annotated[bool, Form(description="Classification module that estimates whether generated images could be considered offensive or harmful. Please refer to the model card for more details about a model’s potential harms.")] = True, + image: Annotated[ + UploadFile, + File(description="Uploaded image to generate a video from."), + ], + model_id: Annotated[ + str, Form(description="Hugging Face model ID used for video generation.") + ] = "", + height: Annotated[ + int, Form(description="The height in pixels of the generated video.") + ] = 576, + width: Annotated[ + int, Form(description="The width in pixels of the generated video.") + ] = 1024, + fps: Annotated[ + int, Form(description="The frames per second of the generated video.") + ] = 6, + motion_bucket_id: Annotated[ + int, + Form( + description="Used for conditioning the amount of motion for the generation. The higher the number the more motion will be in the video." + ), + ] = 127, + noise_aug_strength: Annotated[ + float, + Form( + description="Amount of noise added to the conditioning image. Higher values reduce resemblance to the conditioning image and increase motion." + ), + ] = 0.02, + safety_check: Annotated[ + bool, + Form( + description="Perform a safety check to estimate if generated images could be offensive or harmful." + ), + ] = True, + seed: Annotated[int, Form(description="Seed for random number generation.")] = None, num_inference_steps: Annotated[ - int, Form(description="The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference.") + int, + Form( + description="Number of denoising steps. More steps usually lead to higher quality images but slower inference. Modulated by strength." + ), ] = 25, # NOTE: Hardcoded due to varying pipeline values. pipeline: Pipeline = Depends(get_pipeline), token: HTTPAuthorizationCredentials = Depends(HTTPBearer(auto_error=False)), diff --git a/runner/app/routes/text_to_image.py b/runner/app/routes/text_to_image.py index a57180c7..2d05c775 100644 --- a/runner/app/routes/text_to_image.py +++ b/runner/app/routes/text_to_image.py @@ -5,8 +5,7 @@ from app.dependencies import get_pipeline from app.pipelines.base import Pipeline -from app.routes.util import (HTTPError, ImageResponse, http_error, - image_to_data_url) +from app.routes.util import HTTPError, ImageResponse, http_error, image_to_data_url from fastapi import APIRouter, Depends, status from fastapi.responses import JSONResponse from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer @@ -22,17 +21,59 @@ class TextToImageParams(BaseModel): # supports OAPI 3.1 https://github.com/deepmap/oapi-codegen/issues/373 model_id: Annotated[ str, - Field(default="", description="This is the diffusion model for image generation."), + Field( + default="", description="Hugging Face model ID used for image generation." + ), + ] + prompt: Annotated[ + str, + Field( + description="Text prompt(s) to guide image generation. Separate multiple prompts with '|' if supported by the model." + ), + ] + height: Annotated[ + int, + Field(default=576, description="The height in pixels of the generated image."), + ] + width: Annotated[ + int, + Field(default=1024, description="The width in pixels of the generated image."), + ] + guidance_scale: Annotated[ + float, + Field( + default=7.5, + description="Encourages model to generate images closely linked to the text prompt (higher values may reduce image quality).", + ), + ] + negative_prompt: Annotated[ + str, + Field( + default="", + description="Text prompt(s) to guide what to exclude from image generation. Ignored if guidance_scale < 1.", + ), + ] + safety_check: Annotated[ + bool, + Field( + default=True, + description="Perform a safety check to estimate if generated images could be offensive or harmful.", + ), + ] + seed: Annotated[ + int, Field(default=None, description="Seed for random number generation.") + ] + num_inference_steps: Annotated[ + int, + Field( + default=50, + description="Number of denoising steps. More steps usually lead to higher quality images but slower inference. Modulated by strength.", + ), + ] + num_images_per_prompt: Annotated[ + int, + Field(default=1, description="Number of images to generate per prompt."), ] - prompt: Annotated[str, Field(description="This is the text description for the image. When prompting use + or - after the word to increase the weight of the word in generation, you can add multiple ++ or -- to increase or decrease weight.")] - height: Annotated[int, Field(default=576, description="The height in pixels of the generated image.")] - width: Annotated[int, Field(default=1024, description="The width in pixels of the generated image.")] - guidance_scale: Annotated[float, Field(default=7.5, description="A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality. Guidance scale is enabled when guidance_scale > 1.")] - negative_prompt: Annotated[str, Field(default="", description="The prompt or prompts to guide what to not include in image generation. If not defined, you need to pass negative_prompt_embeds instead. Ignored when not using guidance (guidance_scale < 1).")] - safety_check: Annotated[bool, Field(default=True, description="Classification module that estimates whether generated images could be considered offensive or harmful. Please refer to the model card for more details about a model’s potential harms.")] - seed: Annotated[int, Field(default=None, description="The seed to set.")] - num_inference_steps: Annotated[int, Field(default=50, description="The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference.")] - num_images_per_prompt: Annotated[int, Field(default=1, description="The number of images to generate per prompt.")] RESPONSES = { diff --git a/runner/app/routes/upscale.py b/runner/app/routes/upscale.py index 888a8c1c..ee5c9d77 100644 --- a/runner/app/routes/upscale.py +++ b/runner/app/routes/upscale.py @@ -5,8 +5,7 @@ from app.dependencies import get_pipeline from app.pipelines.base import Pipeline -from app.routes.util import (HTTPError, ImageResponse, http_error, - image_to_data_url) +from app.routes.util import HTTPError, ImageResponse, http_error, image_to_data_url from fastapi import APIRouter, Depends, File, Form, UploadFile, status from fastapi.responses import JSONResponse from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer @@ -36,13 +35,30 @@ include_in_schema=False, ) async def upscale( - prompt: Annotated[str, Form(description="This is the text description for the image. When prompting use + or - after the word to increase the weight of the word in generation, you can add multiple ++ or -- to increase or decrease weight.")], - image: Annotated[UploadFile, File(description="This field holds the absolute path to the image file to be upscaled.")], - model_id: Annotated[str, Form(description="This is the diffusion model for image generation.")] = "", - safety_check: Annotated[bool, Form(description=" Classification module that estimates whether generated images could be considered offensive or harmful. Please refer to the model card for more details about a model’s potential harms.")] = True, - seed: Annotated[int, Form(description="The seed to set.")] = None, + prompt: Annotated[ + str, + Form(description="Text prompt(s) to guide upscaled image generation."), + ], + image: Annotated[ + UploadFile, + File(description="Uploaded image to modify with the pipeline."), + ], + model_id: Annotated[ + str, + Form(description="Hugging Face model ID used for upscaled image generation."), + ] = "", + safety_check: Annotated[ + bool, + Form( + description="Perform a safety check to estimate if generated images could be offensive or harmful." + ), + ] = True, + seed: Annotated[int, Form(description="Seed for random number generation.")] = None, num_inference_steps: Annotated[ - int, Form(description="The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. This parameter is modulated by strength.") + int, + Form( + description="Number of denoising steps. More steps usually lead to higher quality images but slower inference. Modulated by strength." + ), ] = 75, # NOTE: Hardcoded due to varying pipeline values. pipeline: Pipeline = Depends(get_pipeline), token: HTTPAuthorizationCredentials = Depends(HTTPBearer(auto_error=False)), From 2c0e341108b4ff18faba2362dd5d6389d62694e4 Mon Sep 17 00:00:00 2001 From: Rick Staa Date: Tue, 13 Aug 2024 14:57:03 +0200 Subject: [PATCH 4/4] chore: update OpenAPI spec and golang bindings This commit ensures that the OpenAPI spec and golang bindings are updated with the new descriptions. --- runner/openapi.json | 73 ++++++++++----- worker/runner.gen.go | 212 ++++++++++++++++++++++++++++++------------- 2 files changed, 199 insertions(+), 86 deletions(-) diff --git a/runner/openapi.json b/runner/openapi.json index 7aa265ad..f412c768 100644 --- a/runner/openapi.json +++ b/runner/openapi.json @@ -426,11 +426,13 @@ "audio": { "type": "string", "format": "binary", - "title": "Audio" + "title": "Audio", + "description": "Uploaded audio file to be transcribed." }, "model_id": { "type": "string", "title": "Model Id", + "description": "Hugging Face model ID used for transcription.", "default": "" } }, @@ -444,55 +446,66 @@ "properties": { "prompt": { "type": "string", - "title": "Prompt" + "title": "Prompt", + "description": "Text prompt(s) to guide image generation." }, "image": { "type": "string", "format": "binary", - "title": "Image" + "title": "Image", + "description": "Uploaded image to modify with the pipeline." }, "model_id": { "type": "string", "title": "Model Id", + "description": "Hugging Face model ID used for image generation.", "default": "" }, "strength": { "type": "number", "title": "Strength", + "description": "Degree of transformation applied to the reference image (0 to 1).", "default": 0.8 }, "guidance_scale": { "type": "number", "title": "Guidance Scale", + "description": "Encourages model to generate images closely linked to the text prompt (higher values may reduce image quality).", "default": 7.5 }, "image_guidance_scale": { "type": "number", "title": "Image Guidance Scale", + "description": "Degree to which the generated image is pushed towards the initial image.", "default": 1.5 }, "negative_prompt": { "type": "string", "title": "Negative Prompt", + "description": "Text prompt(s) to guide what to exclude from image generation. Ignored if guidance_scale < 1.", "default": "" }, "safety_check": { "type": "boolean", "title": "Safety Check", + "description": "Perform a safety check to estimate if generated images could be offensive or harmful.", "default": true }, "seed": { "type": "integer", - "title": "Seed" + "title": "Seed", + "description": "Seed for random number generation." }, "num_inference_steps": { "type": "integer", "title": "Num Inference Steps", + "description": "Number of denoising steps. More steps usually lead to higher quality images but slower inference. Modulated by strength.", "default": 100 }, "num_images_per_prompt": { "type": "integer", "title": "Num Images Per Prompt", + "description": "Number of images to generate per prompt.", "default": 1 } }, @@ -508,50 +521,60 @@ "image": { "type": "string", "format": "binary", - "title": "Image" + "title": "Image", + "description": "Uploaded image to generate a video from." }, "model_id": { "type": "string", "title": "Model Id", + "description": "Hugging Face model ID used for video generation.", "default": "" }, "height": { "type": "integer", "title": "Height", + "description": "The height in pixels of the generated video.", "default": 576 }, "width": { "type": "integer", "title": "Width", + "description": "The width in pixels of the generated video.", "default": 1024 }, "fps": { "type": "integer", "title": "Fps", + "description": "The frames per second of the generated video.", "default": 6 }, "motion_bucket_id": { "type": "integer", "title": "Motion Bucket Id", + "description": "Used for conditioning the amount of motion for the generation. The higher the number the more motion will be in the video.", "default": 127 }, "noise_aug_strength": { "type": "number", "title": "Noise Aug Strength", + "description": "Amount of noise added to the conditioning image. Higher values reduce resemblance to the conditioning image and increase motion.", "default": 0.02 }, - "seed": { - "type": "integer", - "title": "Seed" - }, "safety_check": { "type": "boolean", "title": "Safety Check", + "description": "Perform a safety check to estimate if generated images could be offensive or harmful.", "default": true }, + "seed": { + "type": "integer", + "title": "Seed", + "description": "Seed for random number generation." + }, "num_inference_steps": { "type": "integer", "title": "Num Inference Steps", + "description": "Number of denoising steps. More steps usually lead to higher quality images but slower inference. Modulated by strength.", "default": 25 } }, @@ -565,30 +588,36 @@ "properties": { "prompt": { "type": "string", - "title": "Prompt" + "title": "Prompt", + "description": "Text prompt(s) to guide upscaled image generation." }, "image": { "type": "string", "format": "binary", - "title": "Image" + "title": "Image", + "description": "Uploaded image to modify with the pipeline." }, "model_id": { "type": "string", "title": "Model Id", + "description": "Hugging Face model ID used for upscaled image generation.", "default": "" }, "safety_check": { "type": "boolean", "title": "Safety Check", + "description": "Perform a safety check to estimate if generated images could be offensive or harmful.", "default": true }, "seed": { "type": "integer", - "title": "Seed" + "title": "Seed", + "description": "Seed for random number generation." }, "num_inference_steps": { "type": "integer", "title": "Num Inference Steps", + "description": "Number of denoising steps. More steps usually lead to higher quality images but slower inference. Modulated by strength.", "default": 75 } }, @@ -700,59 +729,59 @@ "model_id": { "type": "string", "title": "Model Id", - "description": "", + "description": "Hugging Face model ID used for image generation.", "default": "" }, "prompt": { "type": "string", "title": "Prompt", - "description": "" + "description": "Text prompt(s) to guide image generation. Separate multiple prompts with '|' if supported by the model." }, "height": { "type": "integer", "title": "Height", - "description": "", + "description": "The height in pixels of the generated image.", "default": 576 }, "width": { "type": "integer", "title": "Width", - "description": "", + "description": "The width in pixels of the generated image.", "default": 1024 }, "guidance_scale": { "type": "number", "title": "Guidance Scale", - "description": "", + "description": "Encourages model to generate images closely linked to the text prompt (higher values may reduce image quality).", "default": 7.5 }, "negative_prompt": { "type": "string", "title": "Negative Prompt", - "description": "", + "description": "Text prompt(s) to guide what to exclude from image generation. Ignored if guidance_scale < 1.", "default": "" }, "safety_check": { "type": "boolean", "title": "Safety Check", - "description": "", + "description": "Perform a safety check to estimate if generated images could be offensive or harmful.", "default": true }, "seed": { "type": "integer", "title": "Seed", - "description": "" + "description": "Seed for random number generation." }, "num_inference_steps": { "type": "integer", "title": "Num Inference Steps", - "description": "", + "description": "Number of denoising steps. More steps usually lead to higher quality images but slower inference. Modulated by strength.", "default": 50 }, "num_images_per_prompt": { "type": "integer", "title": "Num Images Per Prompt", - "description": "", + "description": "Number of images to generate per prompt.", "default": 1 } }, diff --git a/worker/runner.gen.go b/worker/runner.gen.go index 4d7e6cea..3156c11e 100644 --- a/worker/runner.gen.go +++ b/worker/runner.gen.go @@ -33,47 +33,101 @@ type APIError struct { // BodyAudioToTextAudioToTextPost defines model for Body_audio_to_text_audio_to_text_post. type BodyAudioToTextAudioToTextPost struct { - Audio openapi_types.File `json:"audio"` - ModelId *string `json:"model_id,omitempty"` + // Audio Uploaded audio file to be transcribed. + Audio openapi_types.File `json:"audio"` + + // ModelId Hugging Face model ID used for transcription. + ModelId *string `json:"model_id,omitempty"` } // BodyImageToImageImageToImagePost defines model for Body_image_to_image_image_to_image_post. type BodyImageToImageImageToImagePost struct { - GuidanceScale *float32 `json:"guidance_scale,omitempty"` - Image openapi_types.File `json:"image"` - ImageGuidanceScale *float32 `json:"image_guidance_scale,omitempty"` - ModelId *string `json:"model_id,omitempty"` - NegativePrompt *string `json:"negative_prompt,omitempty"` - NumImagesPerPrompt *int `json:"num_images_per_prompt,omitempty"` - NumInferenceSteps *int `json:"num_inference_steps,omitempty"` - Prompt string `json:"prompt"` - SafetyCheck *bool `json:"safety_check,omitempty"` - Seed *int `json:"seed,omitempty"` - Strength *float32 `json:"strength,omitempty"` + // GuidanceScale Encourages model to generate images closely linked to the text prompt (higher values may reduce image quality). + GuidanceScale *float32 `json:"guidance_scale,omitempty"` + + // Image Uploaded image to modify with the pipeline. + Image openapi_types.File `json:"image"` + + // ImageGuidanceScale Degree to which the generated image is pushed towards the initial image. + ImageGuidanceScale *float32 `json:"image_guidance_scale,omitempty"` + + // ModelId Hugging Face model ID used for image generation. + ModelId *string `json:"model_id,omitempty"` + + // NegativePrompt Text prompt(s) to guide what to exclude from image generation. Ignored if guidance_scale < 1. + NegativePrompt *string `json:"negative_prompt,omitempty"` + + // NumImagesPerPrompt Number of images to generate per prompt. + NumImagesPerPrompt *int `json:"num_images_per_prompt,omitempty"` + + // NumInferenceSteps Number of denoising steps. More steps usually lead to higher quality images but slower inference. Modulated by strength. + NumInferenceSteps *int `json:"num_inference_steps,omitempty"` + + // Prompt Text prompt(s) to guide image generation. + Prompt string `json:"prompt"` + + // SafetyCheck Perform a safety check to estimate if generated images could be offensive or harmful. + SafetyCheck *bool `json:"safety_check,omitempty"` + + // Seed Seed for random number generation. + Seed *int `json:"seed,omitempty"` + + // Strength Degree of transformation applied to the reference image (0 to 1). + Strength *float32 `json:"strength,omitempty"` } // BodyImageToVideoImageToVideoPost defines model for Body_image_to_video_image_to_video_post. type BodyImageToVideoImageToVideoPost struct { - Fps *int `json:"fps,omitempty"` - Height *int `json:"height,omitempty"` - Image openapi_types.File `json:"image"` - ModelId *string `json:"model_id,omitempty"` - MotionBucketId *int `json:"motion_bucket_id,omitempty"` - NoiseAugStrength *float32 `json:"noise_aug_strength,omitempty"` - NumInferenceSteps *int `json:"num_inference_steps,omitempty"` - SafetyCheck *bool `json:"safety_check,omitempty"` - Seed *int `json:"seed,omitempty"` - Width *int `json:"width,omitempty"` + // Fps The frames per second of the generated video. + Fps *int `json:"fps,omitempty"` + + // Height The height in pixels of the generated video. + Height *int `json:"height,omitempty"` + + // Image Uploaded image to generate a video from. + Image openapi_types.File `json:"image"` + + // ModelId Hugging Face model ID used for video generation. + ModelId *string `json:"model_id,omitempty"` + + // MotionBucketId Used for conditioning the amount of motion for the generation. The higher the number the more motion will be in the video. + MotionBucketId *int `json:"motion_bucket_id,omitempty"` + + // NoiseAugStrength Amount of noise added to the conditioning image. Higher values reduce resemblance to the conditioning image and increase motion. + NoiseAugStrength *float32 `json:"noise_aug_strength,omitempty"` + + // NumInferenceSteps Number of denoising steps. More steps usually lead to higher quality images but slower inference. Modulated by strength. + NumInferenceSteps *int `json:"num_inference_steps,omitempty"` + + // SafetyCheck Perform a safety check to estimate if generated images could be offensive or harmful. + SafetyCheck *bool `json:"safety_check,omitempty"` + + // Seed Seed for random number generation. + Seed *int `json:"seed,omitempty"` + + // Width The width in pixels of the generated video. + Width *int `json:"width,omitempty"` } // BodyUpscaleUpscalePost defines model for Body_upscale_upscale_post. type BodyUpscaleUpscalePost struct { - Image openapi_types.File `json:"image"` - ModelId *string `json:"model_id,omitempty"` - NumInferenceSteps *int `json:"num_inference_steps,omitempty"` - Prompt string `json:"prompt"` - SafetyCheck *bool `json:"safety_check,omitempty"` - Seed *int `json:"seed,omitempty"` + // Image Uploaded image to modify with the pipeline. + Image openapi_types.File `json:"image"` + + // ModelId Hugging Face model ID used for upscaled image generation. + ModelId *string `json:"model_id,omitempty"` + + // NumInferenceSteps Number of denoising steps. More steps usually lead to higher quality images but slower inference. Modulated by strength. + NumInferenceSteps *int `json:"num_inference_steps,omitempty"` + + // Prompt Text prompt(s) to guide upscaled image generation. + Prompt string `json:"prompt"` + + // SafetyCheck Perform a safety check to estimate if generated images could be offensive or harmful. + SafetyCheck *bool `json:"safety_check,omitempty"` + + // Seed Seed for random number generation. + Seed *int `json:"seed,omitempty"` } // HTTPError defines model for HTTPError. @@ -111,16 +165,35 @@ type TextResponse struct { // TextToImageParams defines model for TextToImageParams. type TextToImageParams struct { - GuidanceScale *float32 `json:"guidance_scale,omitempty"` - Height *int `json:"height,omitempty"` - ModelId *string `json:"model_id,omitempty"` - NegativePrompt *string `json:"negative_prompt,omitempty"` - NumImagesPerPrompt *int `json:"num_images_per_prompt,omitempty"` - NumInferenceSteps *int `json:"num_inference_steps,omitempty"` - Prompt string `json:"prompt"` - SafetyCheck *bool `json:"safety_check,omitempty"` - Seed *int `json:"seed,omitempty"` - Width *int `json:"width,omitempty"` + // GuidanceScale Encourages model to generate images closely linked to the text prompt (higher values may reduce image quality). + GuidanceScale *float32 `json:"guidance_scale,omitempty"` + + // Height The height in pixels of the generated image. + Height *int `json:"height,omitempty"` + + // ModelId Hugging Face model ID used for image generation. + ModelId *string `json:"model_id,omitempty"` + + // NegativePrompt Text prompt(s) to guide what to exclude from image generation. Ignored if guidance_scale < 1. + NegativePrompt *string `json:"negative_prompt,omitempty"` + + // NumImagesPerPrompt Number of images to generate per prompt. + NumImagesPerPrompt *int `json:"num_images_per_prompt,omitempty"` + + // NumInferenceSteps Number of denoising steps. More steps usually lead to higher quality images but slower inference. Modulated by strength. + NumInferenceSteps *int `json:"num_inference_steps,omitempty"` + + // Prompt Text prompt(s) to guide image generation. Separate multiple prompts with '|' if supported by the model. + Prompt string `json:"prompt"` + + // SafetyCheck Perform a safety check to estimate if generated images could be offensive or harmful. + SafetyCheck *bool `json:"safety_check,omitempty"` + + // Seed Seed for random number generation. + Seed *int `json:"seed,omitempty"` + + // Width The width in pixels of the generated image. + Width *int `json:"width,omitempty"` } // ValidationError defines model for ValidationError. @@ -1481,31 +1554,42 @@ func HandlerWithOptions(si ServerInterface, options ChiServerOptions) http.Handl // Base64 encoded, gzipped, json marshaled Swagger object var swaggerSpec = []string{ - "H4sIAAAAAAAC/+xZ227bOBN+FYL/f+nEhzabhe+SbLcNtoegdrsXRWAw0thmK5FaHtJ6A7/7gkNZomSp", - "cpDEC2R9Zcsaznxz+IZD+o5GMs2kAGE0Hd9RHS0hZfj17OrylVJSue+ZkhkowwHfpHrhPgw3CdAxfacX", - "tEfNKnMP2iguFnS97lEFf1muIKbjL7jkulcsKXQX6+TNV4gMXffouYxXM2ZjLmdGzgz8MLWnTGqzDQpl", - "3Je5VCkzdExvuGBqRQOrKLIFtUdTGUMy47FbHsOc2cStD1a+cwLkMu7006MIPN3Nm7Yw8JQtwIn6L7XH", - "5kAsLI+ZiGCmI+YgBC6dHp+UyF7ncmSCcgUEYdMbUA4CWvl5SC9RpCGkHuFPsAxDLKiGdCN6QKJ6VMCC", - "GX4Ls0zJNDOtOt7ncuTKyzWpsqnPgZ5loJoUDgN9NiXooCZXoLa0cmFg4d1DtWIOCjBmBjJdVToY1NRu", - "hMkEhZuUluA2K9v90mwOZjWLlhB9q1g2ykJpeoJi5ALFCjU3UibABOoBiEOLE/fcBE4bBWJhlhVjg+Nf", - "A1sbia1yqFEv23jly7bOwR2o1MnCWx6DrD82s3BeS90vJZzfWxK1BL5YVqvo5DRY98a/b1r6EKY+iFOp", - "NFyK2Y2NvoGpKxmOTkMtTpKco2RFW0gAyTXMmF3MWgpjMAoI4ITJmV2Q9hrp5tTo5P6U2jtNvvO4Forh", - "YPSytPQnvt9eWaNIBzPay7uNGTbDxl58NnPhX6vOrtyfnjyrdnq/htiYu4ZEv5lOr1oGwRgM44n79n8F", - "czqm/+uX42Q/nyX7xbBXB5gvD4CVtlqAfGYJj5nrJJ2QuIFUd2Gr61uXWH7zmgogTCm2Qh9CtHUFTbiB", - "JWZ5sSmCKl5tmLHVqqQf/qDh/ocCTYNnuTGUBhrsI7c+gs6k0NDCTr1zxN5BzFkYJz/aNMVpq/XoMNdV", - "WA24vaUtvELPv4dkeO+eH9RdrUpCuU8q6ZzzLcporxERBZ554A0eTeGHaU9EtLTi2+6JQPEwERd+fT0R", - "PerOGaGDDkanh8YL5aAC7ypOtDg5lZjdK6aYd+SpjigPmJn+42eJk+d2lHikGSn3sVbw1YJuqPrOjSmR", - "UYXaTKw+zOn4y91W6O62IF4HLH8rIzTTwPP6vQxo3TJV+R9KUcRMpu7Xrr7g/PCmcskgUjtshp/dUNne", - "A+eKpbXN6J67Ur33bQ5dXnHHLpWbD12q4G1wyLfhLUd267nOTgrasDQLXQ1wT4v3HdBNKOiMBU54jFvg", - "kV2RVdysJi6OHrmbas6BKVDFhSBS0v9UKFkak9G108HFXHre6UjxDItzTM8EYVmWcF+txEiirCBnlyTj", - "GSRc+GRsiprfQgag3PuPVgg0dAtKe12D4+HxwEVLZiBYxumYvsCfejRjZomw+3itdmTk0Sb0m8OISwuC", - "uIw3l4BTmefDRRC0cQMxbsFSGBC4KrWJ4RlTpu9OLUcxM6y8IO0qx91u/dbVHLrGiD/4YkOvRoNBDVcQ", - "1P5X7cKzK6jKxo22qxmb2CgCrec2IaVYj758RAjlfN9g/5zF5KPPh7c73I/dT4JZs5SK/w0xGh6+2I/h", - "3FnyShhuVmQqJXnL1MJHfTR6VBBbB51tOKUIKQ5DJ/tK/qUwoARLyATULShSnhg3LQr3yrA5fbleX/eo", - "tmnK1GrDbDKVBLntlvaXeDLCkRMaeoE/ONEn5Fx4NNuVcuvQqRwieoNToutwxYVKc4vDUSWfWJ64x+1w", - "q7rnLlc9Vh7aXHubO3SY+3YY/zfVVPojWI2UeF3aSUqcJ/dFyvYL3T2TsjpFH0h5IOUTkNJTC0npZuwd", - "NsrgZP9TSj5s5q7eHRy2wwPzngnzXHHXdsP8z6R2yn3KBZ52B2z8b+vAvAPzngnzNixa+1VOjcZFVUvF", - "tdpFIm1MLmSaWsHNirxmBr6zFc3/+8LLPD3u92MFLD1a+LfHSb78OHLL6fp6/U8AAAD//wVbg8EvKAAA", + "H4sIAAAAAAAC/+xabXPbuBH+Kxi2M3c3o8iyc246/uY4uYuncc4Ty9cPqUcDEUsSFxDg4cWOmuq/d7Ag", + "Jb6a8tXxTRN9EkUusM8+wL5gyc9RrPJCSZDWRCefIxNnkFO8PL08f6210v660KoAbTngk9yk/sdyKyA6", + "iS5MGk0iuyr8H2M1l2m0Xk8iDb87roFFJx9wyM1kM2Qz92acWv4GsY3Wk+ilYqsFdYyrhVULC59s61+h", + "jO2CQhl/wcDEmheWKxmdRNeFUJQBI/icJFwAsYosgVhNpZdcAptGkyhROqc2OomWXFK9impgceaOhZMo", + "VwzEgrOgNaFO+PHRpAXhjUtTLlPyE42B4Bhy/oo4A4wkSm9woPi0pvciiLJRcoPpNXp3o3CIe57TFLxo", + "uGj97Wc/dZxRGcPCxNRDqBHyYnrcZuS1jJXTNAVT8mEVSUGCphYIqjEkFsqAWBHB5UdgXsJmQDx6UmiV", + "F5Z8n/E0A01uqXB+JroiGpiLyynI744Kblc/1Dn9ucRJrhDnhgLp8iVoTwGOvWcfhbmt8sh5siJ33GYI", + "reAFCC7h/s10jtP3bKbA7j08HnZ5fAWpBgRzl/E4wKh4rJByQwpnMqTwjmpmUIpLbjkVQWbaxkfGaXqc", + "vR8glpB33v6TSEJKLb+FRdgKIyDm203zvfkBN5vjDMhdRq3/B59i4RiQRKu8C4mcp1Jpz2dCmstD/uVm", + "s+cxOazDfldCI5cBWh96lwdnMosCdJ8Nh20T3iHxRCWVe9Q9pgBdmtcA4nJyHoQvQXfgcGkhDWuJeGQC", + "GtA0C4VpopnNhvEwkIobv8Y4cEoulIZwTZxxVHgfBooeXDps6ZiVKUtniRHqDjTZoPDTMCdwHy9XxFgN", + "MrVZx75Knlwh6j7r6vTusivu25PDa2poAna1iDOIPzbIs9pBm71L0D5CEErCMILDcCsay3OMgknbkw2J", + "lRPMpy6VJCCN32RKk4zqPHGiDvMqzHqGYDZgl0oJoBLRArAuI1dQuqWmkqmcBG8foMIL9/JdrVWDhdn0", + "7wPBSyUhA4aQyZUktCgE34Z8DdUah5X5fuafHDbC+lWlsxOpWqmyqBYwhPl2ztwh9Y1mzVvOQLX/9mfN", + "pOVof+tErsxHJZqDQSc3ECvJkLJGpEcddT5+GvCFDHiaNUPN8YterUGScEkK/gmE2UHpmzB5n96dk+om", + "ptEwP8bkP5hRHydFBRgPT1G58tKLpYs/gm2jODx60YZxXSn0S8z9TQ/KU05z5aT1CxDmDFVj1kxSuGYh", + "vPpHpev6y9zH43LkHRfCBxAu8VFnCS+C2EsE3TCsni4UN7CgLl0MuPrsqG3c6cYEHEwoY1sHbxgcChLy", + "plHalWWdBgP5UmBhMjiWUMkIl7EGaiq7G2kDAZy6lAwHjfGUeHT8f5wR97mqYuKOs9buPZwd/dgXD1Hy", + "QeHwnzh3V2srI40kouFsMpSIXIHF6ea3P/X8WcecxwnKpW3sjx8gRvz7xfE3VPHuxOa+9B0LJw8rNXvd", + "tMen38znlwN9OAaWcuGv/qohiU6ivxxsu3kHZSvvYNNrawMsh9eAbXUNAPmVCs6Qj1FI3EJuxrC151tv", + "sbwKM22AUK3pCm2oo21P0IcbqLDZWbU7m3iNpdY1XT/65R+NkwUK9LXgtnXvVkGPfgyF78EUShoYCMRm", + "Z8YugHFa5ykc8ft46mQZU1/rJqwe3EFTB680yV29+/vO/7/P0UaTsNOiLnetxWjH06GMCTMiopplAXiP", + "RT7+DS9EnDn5cfeFQPH6QpyF8e2FmEQWPtm6gR7GqIU2CJWgatY1jBgwcq5wdS+ppsGQr7VZ+5jn2U4r", + "9J7z7L77+e10P4+/6eYnuYKCIs+5E5YXAsphJhwJvvvPd35rGFcUSpeAQ9eBgdjXj3/acbQTzXY8jpYb", + "ppVwmgmlJ+uMFoZCxY3USuXqlyQ6+fC5sx8+dyDe1LLsWxWjmp48234tDcYMHELDja0oYiZzf3csL3s7", + "gqpSssbUDsXor/78PlyDhF5vg6gHVoXt2qNqCIeJR6rEUn3dpAbeHoNCGdQxZLeax+vJwViaF3VTa7jn", + "m+cj0G1d0CurGREwdsCjM8dOc7u68jwG5P5U8RKoBr35HgIjQLi1mSSztojWfg4uk57vDk7L9xhht/r4", + "pJ0kp+eb/omp+eZbfgsFgPbP3zspUdEtaBPmmk0PpzPPlipA0oJHJ9FzvDWJCmozhH2AL/ifWfWsor7q", + "+/hlQRDnrPqYYa7K9fAMgrH+QIolsJIWJI4K0Z5qe+DD7TNGLd1+HzK2HXf7/mDdXEMf7fFG2Gxo1dFs", + "1sJVI/XgN+Pp2RVUo3BG3a0Q7uIYjEmcIFuxSfTjI0LYnq979L+kjLwP6xH0Hj6N3mtJnc2U5v8GhooP", + "nz+N4tJY8lpaXxDNlSJvqU4D60dHjwqi02jowtmKkE0z4vipFv9cWtCSCnIF+hY02XZsqhCFubIenD7c", + "rG8mkXF5TvWq8mwyVwR92w89yLAzgUc+6IkFoXERfUGfq7dGdnW5dd2oEiJagyWNj3Cb3nV/iMNSpaxY", + "vnCM2+F98RNHuWZbZx/mhsPcPsI8NMKEb8PmKhysW06Jb6ZGnRLryadyyuF3Z0/slM0qeu+Ue6f8Ak4Z", + "XAud0tfYOyTK2sn+Xpf832ruZu9gnw73nveVeB72UJvZsHyZO+xy16XAl82Ave+W956397yvxPMqL1qH", + "UX4ag4OamjZttTOhHCNnKs+d5HZFfqYW7ugqKt89YzPPnBwcMA00f5aGp1NRDp/Gfni0vln/NwAA//+4", + "NjpBLjUAAA==", } // GetSwagger returns the content of the embedded swagger specification file