Skip to content

Commit

Permalink
Merge branch 'main' into unit_tests2
Browse files Browse the repository at this point in the history
  • Loading branch information
tharapalanivel authored Mar 11, 2024
2 parents 851d0ec + 0729820 commit e23ab15
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 26 deletions.
52 changes: 50 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,58 @@ pip install -r requirements.txt
pip install -U datasets
pip install -e .
```
<details>
<summary>Linting</summary>

### Unit tests
To lint your code:
```shell
tox -e lint
```

We use Pylint to checks your Python code for errors, coding standards, code convention and refactoring suggestions.

Pylint emits [messages](https://pylint.pycqa.org/en/latest/user_guide/messages/index.html) that provides explanations of the failed checks.

You should fix all message in the following order:
1. Fix each message provided. Select a message [description](https://pylint.pycqa.org/en/latest/user_guide/messages/messages_overview.html#messages-overview) to fix a message.
2. Disable a message (i.e: unbalanced-tuple-unpacking) caused by a particular line of code:
```python
a, b = ... # pylint: disable=unbalanced-tuple-unpacking
```
Please see [here](https://pylint.pycqa.org/en/latest/user_guide/messages/message_control.html#block-disables) for the progma syntax.

3. Disable a checker globally. Please extend the `disable=` list in the [pylintrc](.pylintrc) file.
> Note: Disable checkers only if there is good reason.
</details>

<details>
<summary>Formatting</summary>

To format your code:
```shell
tox -e fmt
```
We use [black](https://github.com/psf/black) formatter to format the code.

You could optionally install the git pre-commit hooks if you would like to format the code automatically for each commit:
```
brew install pre-commit
pre-commit install
```
</details>

<details>
<summary>Unit tests</summary>

To run unit tests:
```shell
tox -e py
```
Running unit tests ensures your contributions do not break exiting code.
We use [pytest](https://docs.pytest.org/) framework to run unit tests. The framework is setup to run all run all test_*.py or *_test.py in the [tests](./tests) directory.

Work in process, to be completed soon.
> Optionally, run `make test` command to do formatting, linting, and testing at once.
</details>

## Your First Code Contribution

Expand Down
8 changes: 6 additions & 2 deletions build/launch_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,12 @@
def txt_to_obj(txt):
base64_bytes = txt.encode("ascii")
message_bytes = base64.b64decode(base64_bytes)
obj = pickle.loads(message_bytes)
return obj
try:
# If the bytes represent JSON string
return json.loads(message_bytes)
except UnicodeDecodeError:
# Otherwise the bytes are a pickled python dictionary
return pickle.loads(message_bytes)


def get_highest_checkpoint(dir_path):
Expand Down
24 changes: 2 additions & 22 deletions tuning/sft_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,6 @@
from tuning.utils.data_type_utils import get_torch_dtype


class PeftSavingCallback(TrainerCallback):
def on_save(self, args, state, control, **kwargs):
checkpoint_path = os.path.join(
args.output_dir, f"checkpoint-{state.global_step}"
)
kwargs["model"].save_pretrained(checkpoint_path)

if "pytorch_model.bin" in os.listdir(checkpoint_path):
os.remove(os.path.join(checkpoint_path, "pytorch_model.bin"))


class FileLoggingCallback(TrainerCallback):
"""Exports metrics, e.g., training loss to a file in the checkpoint directory."""

Expand Down Expand Up @@ -118,7 +107,6 @@ def train(
None for fine tuning
The peft configuration to pass to trainer
"""
run_distributed = int(os.environ.get("WORLD_SIZE", "1")) > 1

logger = logging.get_logger("sft_trainer")

Expand All @@ -132,11 +120,6 @@ def train(
):
raise ValueError("gradient_accumulation_steps has to be an integer >= 1")

# make sure to unset FSDP args when running on single gpu
if not run_distributed:
train_args.fsdp = ""
train_args.fsdp_config = {"xla": False}

task_type = "CAUSAL_LM"
model = AutoModelForCausalLM.from_pretrained(
model_args.model_name_or_path,
Expand All @@ -147,8 +130,6 @@ def train(

peft_config = get_hf_peft_config(task_type, peft_config)

model.gradient_checkpointing_enable()

# TODO: Move these to a config as well
tokenizer = AutoTokenizer.from_pretrained(
model_args.model_name_or_path, cache_dir=train_args.cache_dir, use_fast=True
Expand Down Expand Up @@ -239,8 +220,7 @@ def train(

aim_callback = get_aimstack_callback()
file_logger_callback = FileLoggingCallback(logger)
peft_saving_callback = PeftSavingCallback()
callbacks = [aim_callback, peft_saving_callback, file_logger_callback]
callbacks = [aim_callback, file_logger_callback]

if train_args.packing:
logger.info("Packing is set to True")
Expand Down Expand Up @@ -281,7 +261,7 @@ def train(
peft_config=peft_config,
)

if run_distributed and peft_config is not None:
if trainer.is_fsdp_enabled and peft_config is not None:
trainer.accelerator.state.fsdp_plugin.auto_wrap_policy = fsdp_auto_wrap_policy(
model
)
Expand Down

0 comments on commit e23ab15

Please sign in to comment.