From 3dc5ce959737d3a10722ad5e190b632489ee4969 Mon Sep 17 00:00:00 2001 From: ea_superstar Date: Sat, 10 Aug 2024 13:23:49 +0100 Subject: [PATCH] The descriptions have been updated to better apply to the current implementation. --- runner/app/routes/audio_to_text.py | 4 ++-- runner/app/routes/image_to_image.py | 6 +++--- runner/app/routes/image_to_video.py | 4 ++-- runner/app/routes/text_to_image.py | 4 ++-- runner/app/routes/upscale.py | 6 +++--- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/runner/app/routes/audio_to_text.py b/runner/app/routes/audio_to_text.py index b8d2dbbb3..358b8f853 100644 --- a/runner/app/routes/audio_to_text.py +++ b/runner/app/routes/audio_to_text.py @@ -56,8 +56,8 @@ def handle_pipeline_error(e: Exception) -> JSONResponse: include_in_schema=False, ) async def audio_to_text( - audio: Annotated[UploadFile, File(description="List of denoised audio samples of a NumPy array of shape (batch_size, num_channels, sample_rate).")], - model_id: Annotated[str, Form(description="The huggingface model ID to run the inference on (i.e. SG161222/RealVisXL_V4.0_Lightning:)")] = "", + audio: Annotated[UploadFile, File(description="This is the path to the audio file to be transcribed.")], + model_id: Annotated[str, Form(description="This is the diffusion model for image generation.")] = "", pipeline: Pipeline = Depends(get_pipeline), token: HTTPAuthorizationCredentials = Depends(HTTPBearer(auto_error=False)), ): diff --git a/runner/app/routes/image_to_image.py b/runner/app/routes/image_to_image.py index dce558d81..a23f281e7 100644 --- a/runner/app/routes/image_to_image.py +++ b/runner/app/routes/image_to_image.py @@ -36,9 +36,9 @@ include_in_schema=False, ) async def image_to_image( - prompt: Annotated[str, Form(description="The prompt or prompts to guide image generation. If not defined, you need to pass prompt_embeds.")], - image: Annotated[UploadFile, File(description="Image, numpy array or tensor representing an image batch to be used as the starting point. For both numpy array and pytorch tensor, the expected value range is between [0, 1] If it’s a tensor or a list or tensors, the expected shape should be (B, C, H, W) or (C, H, W). If it is a numpy array or a list of arrays, the expected shape should be (B, H, W, C) or (H, W, C) It can also accept image latents as image, but if passing latents directly it is not encoded again.")], - model_id: Annotated[str, Form(description="The huggingface model ID to run the inference on (i.e. SG161222/RealVisXL_V4.0_Lightning:)")] = "", + prompt: Annotated[str, Form(description="This is the text description for the image. When prompting use + or - after the word to increase the weight of the word in generation, you can add multiple ++ or -- to increase or decrease weight.")], + image: Annotated[UploadFile, File(description="This holds the absolute path to the image file to be transformed.")], + model_id: Annotated[str, Form(description="This is the diffusion model for image generation.")] = "", strength: Annotated[float, Form(description=" Indicates extent to transform the reference image. Must be between 0 and 1. image is used as a starting point and more noise is added the higher the strength. The number of denoising steps depends on the amount of noise initially added. When strength is 1, added noise is maximum and the denoising process runs for the full number of iterations specified in num_inference_steps. A value of 1 essentially ignores image.")] = 0.8, guidance_scale: Annotated[float, Form(description="A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality. Guidance scale is enabled when guidance_scale > 1.")] = 7.5, image_guidance_scale: Annotated[float, Form(description="Push the generated image towards the initial image. Image guidance scale is enabled by setting image_guidance_scale > 1. Higher image guidance scale encourages generated images that are closely linked to the source image, usually at the expense of lower image quality. This pipeline requires a value of at least 1.")] = 1.5, diff --git a/runner/app/routes/image_to_video.py b/runner/app/routes/image_to_video.py index 03985e519..b38c68b20 100644 --- a/runner/app/routes/image_to_video.py +++ b/runner/app/routes/image_to_video.py @@ -35,8 +35,8 @@ include_in_schema=False, ) async def image_to_video( - image: Annotated[UploadFile, File(description="Image, numpy array or tensor representing an image batch to be used as the starting point. For both numpy array and pytorch tensor, the expected value range is between [0, 1] If it’s a tensor or a list or tensors, the expected shape should be (B, C, H, W) or (C, H, W). If it is a numpy array or a list of arrays, the expected shape should be (B, H, W, C) or (H, W, C) It can also accept image latents as image, but if passing latents directly it is not encoded again.")], - model_id: Annotated[str, Form(description="The huggingface model ID to run the inference on (i.e. SG161222/RealVisXL_V4.0_Lightning:)")] = "", + image: Annotated[UploadFile, File(description="This field holds the absolute path to the image file to be transformed.")], + model_id: Annotated[str, Form(description="This is the diffusion model for image generation.")] = "", height: Annotated[int, Form(description="The height in pixels of the generated image.")] = 576, width: Annotated[int, Form(description="The width in pixels of the generated image.")] = 1024, fps: Annotated[int, Form(description="the frames per second of the generated video.")] = 6, diff --git a/runner/app/routes/text_to_image.py b/runner/app/routes/text_to_image.py index f5e56f53d..a57180c7d 100644 --- a/runner/app/routes/text_to_image.py +++ b/runner/app/routes/text_to_image.py @@ -22,9 +22,9 @@ class TextToImageParams(BaseModel): # supports OAPI 3.1 https://github.com/deepmap/oapi-codegen/issues/373 model_id: Annotated[ str, - Field(default="", description="The huggingface model ID to run the inference on (i.e. SG161222/RealVisXL_V4.0_Lightning:)"), + Field(default="", description="This is the diffusion model for image generation."), ] - prompt: Annotated[str, Field(description="The prompt or prompts to guide image generation. If not defined, you need to pass prompt_embeds.")] + prompt: Annotated[str, Field(description="This is the text description for the image. When prompting use + or - after the word to increase the weight of the word in generation, you can add multiple ++ or -- to increase or decrease weight.")] height: Annotated[int, Field(default=576, description="The height in pixels of the generated image.")] width: Annotated[int, Field(default=1024, description="The width in pixels of the generated image.")] guidance_scale: Annotated[float, Field(default=7.5, description="A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality. Guidance scale is enabled when guidance_scale > 1.")] diff --git a/runner/app/routes/upscale.py b/runner/app/routes/upscale.py index 1d989b87f..888a8c1c7 100644 --- a/runner/app/routes/upscale.py +++ b/runner/app/routes/upscale.py @@ -36,9 +36,9 @@ include_in_schema=False, ) async def upscale( - prompt: Annotated[str, Form(description="The prompt or prompts to guide image generation. If not defined, you need to pass prompt_embeds.")], - image: Annotated[UploadFile, File(description="Image, numpy array or tensor representing an image batch to be used as the starting point. For both numpy array and pytorch tensor, the expected value range is between [0, 1] If it’s a tensor or a list or tensors, the expected shape should be (B, C, H, W) or (C, H, W). If it is a numpy array or a list of arrays, the expected shape should be (B, H, W, C) or (H, W, C) It can also accept image latents as image, but if passing latents directly it is not encoded again.")], - model_id: Annotated[str, Form(description="The huggingface model ID to run the inference on (i.e. SG161222/RealVisXL_V4.0_Lightning:)")] = "", + prompt: Annotated[str, Form(description="This is the text description for the image. When prompting use + or - after the word to increase the weight of the word in generation, you can add multiple ++ or -- to increase or decrease weight.")], + image: Annotated[UploadFile, File(description="This field holds the absolute path to the image file to be upscaled.")], + model_id: Annotated[str, Form(description="This is the diffusion model for image generation.")] = "", safety_check: Annotated[bool, Form(description=" Classification module that estimates whether generated images could be considered offensive or harmful. Please refer to the model card for more details about a model’s potential harms.")] = True, seed: Annotated[int, Form(description="The seed to set.")] = None, num_inference_steps: Annotated[