-
Notifications
You must be signed in to change notification settings - Fork 0
Training models on AWS SageMaker with ML_DSL
Anna Safonova edited this page Jun 29, 2020
·
1 revision
There are two ways to fit a model on AWS SageMaker: using API or jupyter magic functions.
from com.griddynamics.dsl.ml.executors.executors import SageMakerExecutor
from com.griddynamics.dsl.ml.settings.profiles import SageMakerProfile
from com.griddynamics.dsl.ml.jobs.builder import JobBuilder
from com.griddynamics.dsl.ml.sessions import SessionFactory
from com.griddynamics.dsl.ml.settings.description import Platform
from com.griddynamics.dsl.ml.settings.arguments import Arguments
from sagemaker.pytorch import PyTorch
define Profile for job
profile = SageMakerProfile(bucket='test-bucket', cluster='test-cluster',
region='us-east-1', job_prefix='mldsl_test',
container=PyTorch, framework_version='1.4.0',
instance_type='ml.m4.xlarge')
set python script
script_name = 'train_script.py'
args_dct = {
'training': 's3://test-bucket/data'
}
Session instance for sagemaker client
session = SessionFactory(platform=Platform.AWS)
.build_session(job_bucket=profile.bucket,
job_region=profile.region,
cluster=profile.cluster,
job_project_id=profile.region,
ml_region=profile.ai_region)
Executor instance for submitting train job to SageMaker
executor = SageMakerExecutor(session, profile, mode='train',
py_script_name=script_name, args=args_dct)
executor.submit_train_job()
After training, executor.executor can be used for deployment on SageMaker.
from com.griddynamics.dsl.ml.settings.profiles import SageMakerProfile
from com.griddynamics.dsl.ml.settings.description import Platform
define Profile for job
profile = SageMakerProfile(bucket='test-bucket', cluster='test-cluster',
region='us-east-1', job_prefix='mldsl_test',
container=PyTorch, framework_version='1.4.0',
instance_type='ml.m4.xlarge')
set profile
Profile.set('AIProfile', profile)
platform = Platform.AWS
Open or load task script using magic functions %py_script, %py_script_open or %py_load:
%%py_script_open --name train_script.py --path scripts -o output
Training using magic function %py_train:
%py_train -n train_script.py -s model -p AIProfile -pm $platform -o s3://test-bucket/models --training s3://test-bucket/data