-
Notifications
You must be signed in to change notification settings - Fork 0
/
types.py
187 lines (164 loc) · 7.49 KB
/
types.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
from typing import Any, Callable, Dict, ForwardRef, Iterable, List, Optional, Union
from typing_extensions import TypedDict, Literal, NotRequired
class CompletionLogprobs(TypedDict):
"""
Contains log probability information for a completion.
Attributes:
text_offset (List[int]): List of token offsets in the text.
token_logprobs (List[Optional[float]]): Log probabilities for each token.
tokens (List[str]): List of generated tokens.
top_logprobs (List[Optional[Dict[str, float]]]): Top N tokens and their log probabilities for each position.
"""
text_offset: List[int]
token_logprobs: List[Optional[float]]
tokens: List[str]
top_logprobs: List[Optional[Dict[str, float]]]
class ToolCallFunction(TypedDict):
"""
Represents the details of a function call made by a tool during the completion process.
Attributes:
name (str): The name of the function that was called.
arguments (str): A JSON-encoded string containing the arguments passed to the function.
"""
name: str
arguments: str
class ToolCalls(TypedDict):
"""
Represents a tool call made during completion.
Attributes:
id (str): Unique identifier for the tool call.
type (Literal["function"]): Type of the tool call, currently only "function" is supported.
function (ToolCallFunction): Details of the function call.
"""
id: str
type: Literal["function"]
function: ToolCallFunction
class CompletionChoice(TypedDict):
"""
Represents a single completion choice.
Attributes:
text (str): The generated text.
index (int): Index of this choice in the list of choices.
logprobs (Optional[CompletionLogprobs]): Log probability information, if requested.
finish_reason (Optional[str]): Reason why the generation stopped.
tool_calls (NotRequired[List[ToolCalls]]): List of tool calls made during generation, if any.
"""
text: str
index: int
logprobs: Optional[CompletionLogprobs]
finish_reason: Optional[str]
tool_calls: NotRequired[List[ToolCalls]]
class CompletionUsage(TypedDict):
"""
Contains token usage information for a completion.
Attributes:
prompt_tokens (int): Number of tokens in the prompt.
completion_tokens (int): Number of tokens in the completion.
total_tokens (int): Total number of tokens used (prompt + completion).
"""
prompt_tokens: int
completion_tokens: int
total_tokens: int
class CompletionChunk(TypedDict):
"""
Represents a chunk of a completion response.
Attributes:
id (str): Unique identifier for this completion chunk.
object (Literal["text_completion"]): Object type, always "text_completion".
created (int): Unix timestamp of when this chunk was created.
model (str): Name of the model used for completion.
choices (List[CompletionChoice]): List of completion choices in this chunk.
usage (NotRequired[CompletionUsage]): Token usage statistics, if available.
"""
id: str
object: Literal["text_completion"]
created: int
model: str
choices: List[CompletionChoice]
usage: NotRequired[CompletionUsage]
class Completion(TypedDict):
"""
Represents a complete response from a text completion API.
Attributes:
id (str): A unique identifier for this completion.
object (Literal["text_completion"]): The object type, always "text_completion".
created (int): The Unix timestamp (in seconds) of when the completion was created.
model (str): The name of the model used to generate the completion.
choices (List[CompletionChoice]): A list of generated completion choices.
usage (CompletionUsage): Statistics about the token usage for this completion.
"""
id: str
object: Literal["text_completion"]
created: int
model: str
choices: List[CompletionChoice]
usage: CompletionUsage
class PytorchGenerateConfig(TypedDict, total=False):
"""
Configuration options for text generation using PyTorch models.
This TypedDict allows for flexible configuration with optional fields.
The 'total=False' parameter indicates that all fields are optional.
Attributes:
temperature (float): Controls randomness in generation. Higher values (e.g., 1.0) make output more random,
lower values (e.g., 0.1) make it more deterministic.
repetition_penalty (float): Penalizes repetition in generated text. Values > 1.0 discourage repetition.
top_p (float): Nucleus sampling parameter. Only consider tokens with cumulative probability < top_p.
top_k (int): Only consider the top k tokens for generation at each step.
stream (bool): If True, enable streaming of generated text.
max_tokens (int): Maximum number of tokens to generate.
echo (bool): If True, include the prompt in the generated output.
stop (Optional[Union[str, List[str]]]): Stop sequence(s) to end generation.
stop_token_ids (Optional[Union[int, List[int]]]): Token ID(s) that signal to stop generation.
stream_interval (int): Interval for yielding tokens when streaming.
model (Optional[str]): Name or path of the specific model to use.
tools (Optional[List[Dict]]): List of tool configurations for function calling.
lora_name (Optional[str]): Name of the LoRA adapter to use, if applicable.
stream_options (Optional[Union[dict, None]]): Additional options for streaming.
request_id (Optional[str]): Unique identifier for the generation request.
"""
temperature: float
repetition_penalty: float
top_p: float
top_k: int
stream: bool
max_tokens: int
echo: bool
stop: Optional[Union[str, List[str]]]
stop_token_ids: Optional[Union[int, List[int]]]
stream_interval: int
model: Optional[str]
tools: Optional[List[Dict]]
lora_name: Optional[str]
stream_options: Optional[Union[dict, None]]
request_id: Optional[str]
class PytorchModelConfig(TypedDict, total=False):
"""
Configuration options for PyTorch models.
This TypedDict allows for flexible configuration with optional fields.
The 'total=False' parameter indicates that all fields are optional.
Attributes:
revision (Optional[str]): The specific model revision to use, if applicable.
device (str): The device to run the model on (e.g., 'cpu', 'cuda').
gpus (Optional[str]): Specific GPUs to use, typically a comma-separated list of device ids.
num_gpus (int): Number of GPUs to use for model parallelism.
max_gpu_memory (str): Maximum GPU memory to use per device (e.g., '16GiB').
gptq_ckpt (Optional[str]): Path to the GPTQ checkpoint file, if using GPTQ quantization.
gptq_wbits (int): Number of bits to use in GPTQ quantization.
gptq_groupsize (int): Group size for GPTQ quantization.
gptq_act_order (bool): Whether to use activation order in GPTQ.
trust_remote_code (bool): Whether to trust and execute code from remote sources.
max_num_seqs (int): Maximum number of sequences to process in parallel.
enable_tensorizer (Optional[bool]): Whether to enable tensorizer optimizations.
"""
revision: Optional[str]
device: str
gpus: Optional[str]
num_gpus: int
max_gpu_memory: str
gptq_ckpt: Optional[str]
gptq_wbits: int
gptq_groupsize: int
gptq_act_order: bool
trust_remote_code: bool
max_num_seqs: int
enable_tensorizer: Optional[bool]