diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index bc578035c..f5ea59641 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -79,10 +79,58 @@ Installation: pip install -U datasets pip install -e "." ``` +
+Linting -### Unit tests +To lint your code: +```shell +tox -e lint +``` + +We use Pylint to checks your Python code for errors, coding standards, code convention and refactoring suggestions. + +Pylint emits [messages](https://pylint.pycqa.org/en/latest/user_guide/messages/index.html) that provides explanations of the failed checks. + +You should fix all message in the following order: +1. Fix each message provided. Select a message [description](https://pylint.pycqa.org/en/latest/user_guide/messages/messages_overview.html#messages-overview) to fix a message. +2. Disable a message (i.e: unbalanced-tuple-unpacking) caused by a particular line of code: + ```python + a, b = ... # pylint: disable=unbalanced-tuple-unpacking + ``` + Please see [here](https://pylint.pycqa.org/en/latest/user_guide/messages/message_control.html#block-disables) for the progma syntax. + +3. Disable a checker globally. Please extend the `disable=` list in the [pylintrc](.pylintrc) file. + > Note: Disable checkers only if there is good reason. +
+ +
+Formatting + +To format your code: +```shell +tox -e fmt +``` +We use [black](https://github.com/psf/black) formatter to format the code. + +You could optionally install the git pre-commit hooks if you would like to format the code automatically for each commit: +``` +brew install pre-commit +pre-commit install +``` +
+ +
+Unit tests + +To run unit tests: +```shell +tox -e py +``` +Running unit tests ensures your contributions do not break exiting code. +We use [pytest](https://docs.pytest.org/) framework to run unit tests. The framework is setup to run all run all test_*.py or *_test.py in the [tests](./tests) directory. -Work in process, to be completed soon. +> Optionally, run `make test` command to do formatting, linting, and testing at once. +
## Your First Code Contribution diff --git a/build/launch_training.py b/build/launch_training.py index 57ede1f6b..5592d5888 100644 --- a/build/launch_training.py +++ b/build/launch_training.py @@ -39,8 +39,12 @@ def txt_to_obj(txt): base64_bytes = txt.encode("ascii") message_bytes = base64.b64decode(base64_bytes) - obj = pickle.loads(message_bytes) - return obj + try: + # If the bytes represent JSON string + return json.loads(message_bytes) + except UnicodeDecodeError: + # Otherwise the bytes are a pickled python dictionary + return pickle.loads(message_bytes) def get_highest_checkpoint(dir_path): diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py index 59943a750..29c5fd299 100644 --- a/tuning/sft_trainer.py +++ b/tuning/sft_trainer.py @@ -44,17 +44,6 @@ from tuning.utils.data_type_utils import get_torch_dtype -class PeftSavingCallback(TrainerCallback): - def on_save(self, args, state, control, **kwargs): - checkpoint_path = os.path.join( - args.output_dir, f"checkpoint-{state.global_step}" - ) - kwargs["model"].save_pretrained(checkpoint_path) - - if "pytorch_model.bin" in os.listdir(checkpoint_path): - os.remove(os.path.join(checkpoint_path, "pytorch_model.bin")) - - class FileLoggingCallback(TrainerCallback): """Exports metrics, e.g., training loss to a file in the checkpoint directory.""" @@ -118,7 +107,6 @@ def train( None for fine tuning The peft configuration to pass to trainer """ - run_distributed = int(os.environ.get("WORLD_SIZE", "1")) > 1 logger = logging.get_logger("sft_trainer") @@ -132,11 +120,6 @@ def train( ): raise ValueError("gradient_accumulation_steps has to be an integer >= 1") - # make sure to unset FSDP args when running on single gpu - if not run_distributed: - train_args.fsdp = "" - train_args.fsdp_config = {"xla": False} - task_type = "CAUSAL_LM" model = AutoModelForCausalLM.from_pretrained( model_args.model_name_or_path, @@ -147,8 +130,6 @@ def train( peft_config = get_hf_peft_config(task_type, peft_config) - model.gradient_checkpointing_enable() - # TODO: Move these to a config as well tokenizer = AutoTokenizer.from_pretrained( model_args.model_name_or_path, cache_dir=train_args.cache_dir, use_fast=True @@ -239,8 +220,7 @@ def train( aim_callback = get_aimstack_callback() file_logger_callback = FileLoggingCallback(logger) - peft_saving_callback = PeftSavingCallback() - callbacks = [aim_callback, peft_saving_callback, file_logger_callback] + callbacks = [aim_callback, file_logger_callback] if train_args.packing: logger.info("Packing is set to True") @@ -281,7 +261,7 @@ def train( peft_config=peft_config, ) - if run_distributed and peft_config is not None: + if trainer.is_fsdp_enabled and peft_config is not None: trainer.accelerator.state.fsdp_plugin.auto_wrap_policy = fsdp_auto_wrap_policy( model )