-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Now only for completions
- Loading branch information
Showing
52 changed files
with
2,289 additions
and
84 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
#!/usr/bin/env python3 | ||
|
||
from __future__ import annotations | ||
|
||
import asyncio | ||
import pathlib | ||
import uuid | ||
|
||
from yandex_cloud_ml_sdk import AsyncYCloudML | ||
|
||
|
||
def local_path(path: str) -> pathlib.Path: | ||
return pathlib.Path(__file__).parent / path | ||
|
||
|
||
async def get_datasets(sdk): | ||
""" | ||
This function represents getting or creating datasets object. | ||
In real life you could use just a datasets ids, for example: | ||
``` | ||
dataset = await sdk.datasets.get("some_id") | ||
tuning_task = await base_model.tune_deferred( | ||
"dataset_id", | ||
validation_datasets=dataset | ||
) | ||
``` | ||
""" | ||
|
||
async for dataset in sdk.datasets.list(status="READY"): | ||
print(f'using old dataset {dataset=}') | ||
break | ||
else: | ||
print('no old datasets found, creating new one') | ||
dataset_draft = sdk.datasets.completions.from_path_deferred( | ||
path=local_path('example_dataset'), | ||
upload_format='jsonlines', | ||
name='foo', | ||
) | ||
|
||
operation = await dataset_draft.upload() | ||
dataset = await operation | ||
print(f'created new dataset {dataset=}') | ||
|
||
return dataset, dataset | ||
|
||
|
||
async def main() -> None: | ||
sdk = AsyncYCloudML(folder_id='b1ghsjum2v37c2un8h64') | ||
train_dataset, validation_dataset = await get_datasets(sdk) | ||
base_model = sdk.models.completions('yandexgpt-lite') | ||
|
||
tuning_task = await base_model.tune_deferred( | ||
train_dataset, | ||
validation_datasets=validation_dataset, | ||
name=str(uuid.uuid4()) | ||
) | ||
print(f'new {tuning_task=}') | ||
|
||
try: | ||
same_task = await base_model.attach_tune_deferred(tuning_task.id) | ||
print(f'{same_task=}') | ||
|
||
# IMPORTANT | ||
# .get will raise NOT_FOUND first few seconds, before Yandex Cloud "Operation" | ||
# will create a "TuningTask" at the backend. | ||
await asyncio.sleep(5) | ||
|
||
same_task2 = await sdk.tuning.get(tuning_task.id) | ||
print(f'{same_task2=}') | ||
finally: | ||
await tuning_task.cancel() | ||
|
||
|
||
if __name__ == '__main__': | ||
asyncio.run(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
#!/usr/bin/env python3 | ||
|
||
from __future__ import annotations | ||
|
||
import asyncio | ||
import pathlib | ||
import uuid | ||
|
||
from yandex_cloud_ml_sdk import AsyncYCloudML | ||
|
||
|
||
def local_path(path: str) -> pathlib.Path: | ||
return pathlib.Path(__file__).parent / path | ||
|
||
|
||
async def get_datasets(sdk): | ||
""" | ||
This function represents getting or creating datasets object. | ||
In real life you could use just a datasets ids, for example: | ||
``` | ||
dataset = await sdk.datasets.get("some_id") | ||
tuning_task = await base_model.tune_deferred( | ||
"dataset_id", | ||
validation_datasets=dataset | ||
) | ||
``` | ||
""" | ||
|
||
async for dataset in sdk.datasets.list(status="READY"): | ||
print(f'using old dataset {dataset=}') | ||
break | ||
else: | ||
print('no old datasets found, creating new one') | ||
dataset_draft = sdk.datasets.completions.from_path_deferred( | ||
path=local_path('example_dataset'), | ||
upload_format='jsonlines', | ||
name='foo', | ||
) | ||
|
||
operation = await dataset_draft.upload() | ||
dataset = await operation | ||
print(f'created new dataset {dataset=}') | ||
|
||
return dataset, dataset | ||
|
||
|
||
async def main() -> None: | ||
sdk = AsyncYCloudML(folder_id='b1ghsjum2v37c2un8h64') | ||
train_dataset, validation_dataset = await get_datasets(sdk) | ||
base_model = sdk.models.completions('yandexgpt-lite') | ||
|
||
# `.tune(...)` is a shortcut for: | ||
# tuning_task = await base_model.tune_deferred(...) | ||
# new_model = await tuning_task.wait(...) | ||
# But it gives you less control on tune canceling and | ||
# reporting. | ||
new_model = await base_model.tune( | ||
train_dataset, | ||
validation_datasets=validation_dataset, | ||
name=str(uuid.uuid4()) | ||
) | ||
print(f'resulting {new_model}') | ||
|
||
completion_result = await new_model.run("hey!") | ||
print(f'{completion_result=}') | ||
|
||
# or save model.uri somewhere and reuse it later | ||
tuned_uri = new_model.uri | ||
model = sdk.models.completions(tuned_uri) | ||
|
||
completion_result = await model.run("hey!") | ||
print(f'{completion_result=}') | ||
|
||
|
||
if __name__ == '__main__': | ||
asyncio.run(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
#!/usr/bin/env python3 | ||
|
||
from __future__ import annotations | ||
|
||
import asyncio | ||
import pathlib | ||
import uuid | ||
|
||
from yandex_cloud_ml_sdk import AsyncYCloudML | ||
|
||
|
||
def local_path(path: str) -> pathlib.Path: | ||
return pathlib.Path(__file__).parent / path | ||
|
||
|
||
async def get_datasets(sdk): | ||
""" | ||
This function represents getting or creating datasets object. | ||
In real life you could use just a datasets ids, for example: | ||
``` | ||
dataset = await sdk.datasets.get("some_id") | ||
tuning_task = await base_model.tune_deferred( | ||
"dataset_id", | ||
validation_datasets=dataset | ||
) | ||
``` | ||
""" | ||
|
||
async for dataset in sdk.datasets.list(status="READY"): | ||
print(f'using old dataset {dataset=}') | ||
break | ||
else: | ||
print('no old datasets found, creating new one') | ||
dataset_draft = sdk.datasets.completions.from_path_deferred( | ||
path=local_path('example_dataset'), | ||
upload_format='jsonlines', | ||
name='foo', | ||
) | ||
|
||
operation = await dataset_draft.upload() | ||
dataset = await operation | ||
print(f'created new dataset {dataset=}') | ||
|
||
return dataset, dataset | ||
|
||
|
||
async def main() -> None: | ||
sdk = AsyncYCloudML(folder_id='b1ghsjum2v37c2un8h64') | ||
train_dataset, validation_dataset = await get_datasets(sdk) | ||
base_model = sdk.models.completions('yandexgpt-lite') | ||
|
||
tuning_task = await base_model.tune_deferred( | ||
train_dataset, | ||
validation_datasets=validation_dataset, | ||
name=str(uuid.uuid4()) | ||
) | ||
print(f'new {tuning_task=}') | ||
|
||
try: | ||
for _ in range(3): | ||
status = await tuning_task.get_status() | ||
print(f'{status=}') | ||
|
||
task_info = await tuning_task.get_task_info() | ||
print(f'{task_info=}') | ||
|
||
await asyncio.sleep(5) | ||
finally: | ||
await tuning_task.cancel() | ||
|
||
status = await tuning_task.get_status() | ||
print(f'{status=} after cancel') | ||
|
||
task_info = await tuning_task.get_task_info() | ||
print(f'{task_info=} after cancel') | ||
|
||
|
||
if __name__ == '__main__': | ||
asyncio.run(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
#!/usr/bin/env python3 | ||
|
||
from __future__ import annotations | ||
|
||
import asyncio | ||
import pathlib | ||
import uuid | ||
|
||
from yandex_cloud_ml_sdk import AsyncYCloudML | ||
|
||
|
||
def local_path(path: str) -> pathlib.Path: | ||
return pathlib.Path(__file__).parent / path | ||
|
||
|
||
async def get_datasets(sdk): | ||
""" | ||
This function represents getting or creating datasets object. | ||
In real life you could use just a datasets ids, for example: | ||
``` | ||
dataset = await sdk.datasets.get("some_id") | ||
tuning_task = await base_model.tune_deferred( | ||
"dataset_id", | ||
validation_datasets=dataset | ||
) | ||
``` | ||
""" | ||
|
||
async for dataset in sdk.datasets.list(status="READY"): | ||
print(f'using old dataset {dataset=}') | ||
break | ||
else: | ||
print('no old datasets found, creating new one') | ||
dataset_draft = sdk.datasets.completions.from_path_deferred( | ||
path=local_path('example_dataset'), | ||
upload_format='jsonlines', | ||
name='foo', | ||
) | ||
|
||
operation = await dataset_draft.upload() | ||
dataset = await operation | ||
print(f'created new dataset {dataset=}') | ||
|
||
return dataset, dataset | ||
|
||
|
||
async def main() -> None: | ||
sdk = AsyncYCloudML(folder_id='b1ghsjum2v37c2un8h64') | ||
train_dataset, validation_dataset = await get_datasets(sdk) | ||
base_model = sdk.models.completions('yandexgpt-lite') | ||
|
||
task_ids = set() | ||
for _ in range(1): | ||
tuning_task = await base_model.tune_deferred( | ||
train_dataset, | ||
validation_datasets=validation_dataset, | ||
name=str(uuid.uuid4()) | ||
) | ||
task_ids.add(tuning_task.id) | ||
|
||
# NB: tuning tasks have a time gap, before they will | ||
# be available at the backend as a `TuningTasks` | ||
await asyncio.sleep(5) | ||
|
||
async for tuning_task in sdk.tuning.list(): | ||
# or you could wait for tasks, instead of canceling | ||
print(f'found task {tuning_task=}, canceling') | ||
await tuning_task.cancel() | ||
|
||
|
||
if __name__ == '__main__': | ||
asyncio.run(main()) |
Oops, something went wrong.