Skip to content

Commit

Permalink
Fixing logging in register_model (#1615)
Browse files Browse the repository at this point in the history
* Fixing register model logging

* Adding task_name to register_model

* Passing str instead of bool for pipeline parameters
  • Loading branch information
skanakamedal authored Nov 1, 2023
1 parent 90cf0ce commit ebf9748
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -320,4 +320,4 @@ outputs:
type: uri_folder
description: Output dir to save the finetuned lora weights

command: python model_selector/model_selector.py --task_name TextGeneration $[[--mlflow_model_path '${{inputs.mlflow_model_path}}']] --output_dir model_selector_output && python preprocess/preprocess.py --task_name TextGeneration --text_key '${{inputs.text_key}}' $[[--ground_truth_key '${{inputs.ground_truth_key}}']] $[[--batch_size '${{inputs.batch_size}}']] $[[--pad_to_max_length '${{inputs.pad_to_max_length}}']] $[[--max_seq_length '${{inputs.max_seq_length}}']] --train_file_path '${{inputs.dataset_input}}/train_input.jsonl' --validation_file_path '${{inputs.dataset_input}}/validation_input.jsonl' --test_file_path '${{inputs.dataset_input}}/train_input.jsonl' --model_selector_output model_selector_output --output_dir preprocess_output && python -m torch.distributed.launch --nproc_per_node=${{inputs.number_of_gpu_to_use_finetuning}} finetune/finetune.py $[[--apply_lora '${{inputs.apply_lora}}']] $[[--merge_lora_weights '${{inputs.merge_lora_weights}}']] $[[--lora_alpha '${{inputs.lora_alpha}}']] $[[--lora_r '${{inputs.lora_r}}']] $[[--lora_dropout '${{inputs.lora_dropout}}']] $[[--num_train_epochs '${{inputs.num_train_epochs}}']] $[[--max_steps '${{inputs.max_steps}}']] $[[--per_device_train_batch_size '${{inputs.per_device_train_batch_size}}']] $[[--per_device_eval_batch_size '${{inputs.per_device_eval_batch_size}}']] $[[--auto_find_batch_size '${{inputs.auto_find_batch_size}}']] $[[--optim '${{inputs.optim}}']] $[[--learning_rate '${{inputs.learning_rate}}']] $[[--warmup_steps '${{inputs.warmup_steps}}']] $[[--weight_decay '${{inputs.weight_decay}}']] $[[--adam_beta1 '${{inputs.adam_beta1}}']] $[[--adam_beta2 '${{inputs.adam_beta2}}']] $[[--adam_epsilon '${{inputs.adam_epsilon}}']] $[[--gradient_accumulation_steps '${{inputs.gradient_accumulation_steps}}']] $[[--eval_accumulation_steps '${{inputs.eval_accumulation_steps}}']] $[[--lr_scheduler_type '${{inputs.lr_scheduler_type}}']] $[[--precision '${{inputs.precision}}']] $[[--seed '${{inputs.seed}}']] $[[--enable_full_determinism '${{inputs.enable_full_determinism}}']] $[[--dataloader_num_workers '${{inputs.dataloader_num_workers}}']] $[[--ignore_mismatched_sizes '${{inputs.ignore_mismatched_sizes}}']] $[[--max_grad_norm '${{inputs.max_grad_norm}}']] $[[--evaluation_strategy '${{inputs.evaluation_strategy}}']] $[[--evaluation_steps_interval '${{inputs.evaluation_steps_interval}}']] $[[--eval_steps '${{inputs.eval_steps}}']] $[[--logging_strategy '${{inputs.logging_strategy}}']] $[[--logging_steps '${{inputs.logging_steps}}']] $[[--metric_for_best_model '${{inputs.metric_for_best_model}}']] $[[--resume_from_checkpoint '${{inputs.resume_from_checkpoint}}']] $[[--save_total_limit '${{inputs.save_total_limit}}']] $[[--apply_early_stopping '${{inputs.apply_early_stopping}}']] $[[--early_stopping_patience '${{inputs.early_stopping_patience}}']] $[[--early_stopping_threshold '${{inputs.early_stopping_threshold}}']] $[[--apply_ort '${{inputs.apply_ort}}']] $[[--apply_deepspeed '${{inputs.apply_deepspeed}}']] $[[--deepspeed_stage '${{inputs.deepspeed_stage}}']] --model_selector_output model_selector_output --preprocess_output preprocess_output $[[--system_properties '${{inputs.system_properties}}']] --pytorch_model_folder pytorch_model_folder --mlflow_model_folder mlflow_model_folder --output_model '${{outputs.output_model}}' && python register_model/register_model.py $[[--model_name ${{inputs.registered_model_name}}]] --finetune_args_path pytorch_model_folder/finetune_args.json --registration_details_folder '${{outputs.output_model}}' --model_path pytorch_model_folder/peft_adapter_weights --convert_to_safetensors true --copy_model_to_output true
command: python model_selector/model_selector.py --task_name TextGeneration $[[--mlflow_model_path '${{inputs.mlflow_model_path}}']] --output_dir model_selector_output && python preprocess/preprocess.py --task_name TextGeneration --text_key '${{inputs.text_key}}' $[[--ground_truth_key '${{inputs.ground_truth_key}}']] $[[--batch_size '${{inputs.batch_size}}']] $[[--pad_to_max_length '${{inputs.pad_to_max_length}}']] $[[--max_seq_length '${{inputs.max_seq_length}}']] --train_file_path '${{inputs.dataset_input}}/train_input.jsonl' --validation_file_path '${{inputs.dataset_input}}/validation_input.jsonl' --test_file_path '${{inputs.dataset_input}}/train_input.jsonl' --model_selector_output model_selector_output --output_dir preprocess_output && python -m torch.distributed.launch --nproc_per_node=${{inputs.number_of_gpu_to_use_finetuning}} finetune/finetune.py $[[--apply_lora '${{inputs.apply_lora}}']] $[[--merge_lora_weights '${{inputs.merge_lora_weights}}']] $[[--lora_alpha '${{inputs.lora_alpha}}']] $[[--lora_r '${{inputs.lora_r}}']] $[[--lora_dropout '${{inputs.lora_dropout}}']] $[[--num_train_epochs '${{inputs.num_train_epochs}}']] $[[--max_steps '${{inputs.max_steps}}']] $[[--per_device_train_batch_size '${{inputs.per_device_train_batch_size}}']] $[[--per_device_eval_batch_size '${{inputs.per_device_eval_batch_size}}']] $[[--auto_find_batch_size '${{inputs.auto_find_batch_size}}']] $[[--optim '${{inputs.optim}}']] $[[--learning_rate '${{inputs.learning_rate}}']] $[[--warmup_steps '${{inputs.warmup_steps}}']] $[[--weight_decay '${{inputs.weight_decay}}']] $[[--adam_beta1 '${{inputs.adam_beta1}}']] $[[--adam_beta2 '${{inputs.adam_beta2}}']] $[[--adam_epsilon '${{inputs.adam_epsilon}}']] $[[--gradient_accumulation_steps '${{inputs.gradient_accumulation_steps}}']] $[[--eval_accumulation_steps '${{inputs.eval_accumulation_steps}}']] $[[--lr_scheduler_type '${{inputs.lr_scheduler_type}}']] $[[--precision '${{inputs.precision}}']] $[[--seed '${{inputs.seed}}']] $[[--enable_full_determinism '${{inputs.enable_full_determinism}}']] $[[--dataloader_num_workers '${{inputs.dataloader_num_workers}}']] $[[--ignore_mismatched_sizes '${{inputs.ignore_mismatched_sizes}}']] $[[--max_grad_norm '${{inputs.max_grad_norm}}']] $[[--evaluation_strategy '${{inputs.evaluation_strategy}}']] $[[--evaluation_steps_interval '${{inputs.evaluation_steps_interval}}']] $[[--eval_steps '${{inputs.eval_steps}}']] $[[--logging_strategy '${{inputs.logging_strategy}}']] $[[--logging_steps '${{inputs.logging_steps}}']] $[[--metric_for_best_model '${{inputs.metric_for_best_model}}']] $[[--resume_from_checkpoint '${{inputs.resume_from_checkpoint}}']] $[[--save_total_limit '${{inputs.save_total_limit}}']] $[[--apply_early_stopping '${{inputs.apply_early_stopping}}']] $[[--early_stopping_patience '${{inputs.early_stopping_patience}}']] $[[--early_stopping_threshold '${{inputs.early_stopping_threshold}}']] $[[--apply_ort '${{inputs.apply_ort}}']] $[[--apply_deepspeed '${{inputs.apply_deepspeed}}']] $[[--deepspeed_stage '${{inputs.deepspeed_stage}}']] --model_selector_output model_selector_output --preprocess_output preprocess_output $[[--system_properties '${{inputs.system_properties}}']] --pytorch_model_folder pytorch_model_folder --mlflow_model_folder mlflow_model_folder --output_model '${{outputs.output_model}}' && python register_model/register_model.py --task_name TextGeneration $[[--model_name ${{inputs.registered_model_name}}]] --finetune_args_path pytorch_model_folder/finetune_args.json --registration_details_folder '${{outputs.output_model}}' --model_path pytorch_model_folder/peft_adapter_weights --convert_to_safetensors true --copy_model_to_output true
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ jobs:
text_key: '${{parent.inputs.text_key}}'
ground_truth_key: '${{parent.inputs.ground_truth_key}}'
batch_size: 1000
pad_to_max_length: false
pad_to_max_length: "false"
max_seq_length: 4096
number_of_gpu_to_use_finetuning: '${{parent.inputs.number_of_gpu_to_use_finetuning}}'
apply_lora: "true"
Expand All @@ -167,7 +167,7 @@ jobs:
max_steps: -1
per_device_train_batch_size: '${{parent.inputs.per_device_train_batch_size}}'
per_device_eval_batch_size: '${{parent.inputs.per_device_train_batch_size}}'
auto_find_batch_size: false
auto_find_batch_size: "false"
optim: adamw_hf
learning_rate: '${{parent.inputs.learning_rate}}'
warmup_steps: 0
Expand All @@ -179,23 +179,23 @@ jobs:
lr_scheduler_type: linear
precision: 16
seed: 42
enable_full_determinism: false
enable_full_determinism: "false"
dataloader_num_workers: 0
ignore_mismatched_sizes: true
ignore_mismatched_sizes: "true"
max_grad_norm: 1.0
evaluation_strategy: epoch
evaluation_steps_interval: 0.0
eval_steps: 500
logging_strategy: epoch
logging_steps: 500
metric_for_best_model: loss
resume_from_checkpoint: false
resume_from_checkpoint: "false"
save_total_limit: 1
apply_early_stopping: false
apply_early_stopping: "false"
early_stopping_patience: 0
apply_deepspeed: true
apply_deepspeed: "true"
deepspeed_stage: 3
apply_ort: false
apply_ort: "false"
system_properties: '${{parent.inputs.system_properties}}'
registered_model_name: '${{parent.inputs.registered_model_name}}'
outputs:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,15 @@
from azureml.core import Workspace
from azureml.core.run import Run, _OfflineRun

from azureml.acft.common_components import get_logger_app
from azureml.acft.common_components import get_logger_app, set_logging_parameters, LoggingLiterals
from azureml.acft.contrib.hf import VERSION, PROJECT_NAME
from azureml.acft.contrib.hf.nlp.constants.constants import LOGS_TO_BE_FILTERED_IN_APPINSIGHTS


logger = get_logger_app("azureml.acft.contrib.hf.scripts.components.scripts.register_model.register_model")


COMPONENT_NAME = "ACFT-Register_Model"
SUPPORTED_MODEL_ASSET_TYPES = [Model.Framework.CUSTOM, "PRESETS"]
# omitting underscores which is supported in model name for consistency
VALID_MODEL_NAME_PATTERN = r"^[a-zA-Z0-9-]+$"
Expand Down Expand Up @@ -54,6 +57,11 @@ def parse_args():
choices=[True, False],
help="convert pytorch model to safetensors format"
)
parser.add_argument(
"--task_name",
type=str,
help="Finetuning task name",
)
parser.add_argument(
"--copy_model_to_output",
type=str2bool,
Expand Down Expand Up @@ -242,6 +250,16 @@ def register_model(args: Namespace):
if __name__ == "__main__":
args = parse_args()

set_logging_parameters(
task_type=args.task_name,
acft_custom_dimensions={
LoggingLiterals.PROJECT_NAME: PROJECT_NAME,
LoggingLiterals.PROJECT_VERSION_NUMBER: VERSION,
LoggingLiterals.COMPONENT_NAME: COMPONENT_NAME
},
azureml_pkg_denylist_logging_patterns=LOGS_TO_BE_FILTERED_IN_APPINSIGHTS,
)

# convert to safe tensors
if args.convert_to_safetensors:
convert_lora_weights_to_safetensors(args.model_path)
Expand Down

0 comments on commit ebf9748

Please sign in to comment.