Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate INT8 mixed-precision from torchao 0.7 #1552

Open
wants to merge 49 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
cf3355e
add int8mp
gau-nernst Sep 12, 2024
d3bbaeb
Merge branch 'pytorch:main' into int8mp
gau-nernst Sep 13, 2024
5a61d3e
add a flag
gau-nernst Sep 13, 2024
560039d
create a quantizer
gau-nernst Sep 13, 2024
2b6e066
add notes on when speedup can be expected
gau-nernst Sep 13, 2024
d32f5b8
clarify doc message
gau-nernst Sep 13, 2024
60dad97
update docs
gau-nernst Sep 13, 2024
8395070
add tiny log
gau-nernst Sep 13, 2024
b7b8a7d
update comment
gau-nernst Sep 13, 2024
2829b03
add guard on torch version and CUDA sm
gau-nernst Sep 13, 2024
688a1c8
add integration test
gau-nernst Sep 13, 2024
21391ad
update test
gau-nernst Sep 13, 2024
f885d56
use dummy alpaca
gau-nernst Sep 13, 2024
7db782c
fix typo
gau-nernst Sep 14, 2024
8306f9a
Merge branch 'pytorch:main' into int8mp
gau-nernst Sep 14, 2024
25a2451
convert speed test to smoke test
gau-nernst Sep 14, 2024
86d5f04
Merge branch 'int8mp' of github.com:gau-nernst/torchtune into int8mp
gau-nernst Sep 14, 2024
6094cdb
fix test
gau-nernst Sep 14, 2024
19a2d3e
add ao version guard
gau-nernst Sep 14, 2024
faec18d
fix
gau-nernst Sep 14, 2024
f4f1945
Merge branch 'pytorch:main' into int8mp
gau-nernst Sep 14, 2024
8fc2826
attempt LoRA
gau-nernst Sep 14, 2024
911df57
fix lora
gau-nernst Sep 15, 2024
51bbeac
skip LoRA
gau-nernst Sep 15, 2024
1e5ae92
skip NF4
gau-nernst Sep 15, 2024
1e4eaf6
Merge branch 'pytorch:main' into int8mp
gau-nernst Sep 15, 2024
30585c2
Merge branch 'main' into int8mp
felipemello1 Oct 3, 2024
45b4365
typo
felipemello1 Oct 3, 2024
3e5b040
Merge branch 'main' into int8mp
gau-nernst Nov 3, 2024
1ac836a
remove unwanted chnages
gau-nernst Nov 3, 2024
5d94cb3
use module swap
gau-nernst Nov 3, 2024
06abd88
remove unused import
gau-nernst Nov 3, 2024
0ff702e
update docs. change to mixed_precision
gau-nernst Nov 3, 2024
05563f2
add test. small fixes
gau-nernst Nov 3, 2024
3050c32
add config entries
gau-nernst Nov 3, 2024
864c6fb
remove extra compile
gau-nernst Nov 3, 2024
1fed859
fix lora finetune
gau-nernst Nov 3, 2024
66e8cdd
Merge branch 'main' into int8mp
gau-nernst Nov 8, 2024
207308b
Merge branch 'main' into int8mp
gau-nernst Nov 12, 2024
0fecc26
fix version check
gau-nernst Nov 12, 2024
39e1fc1
dont set inductor config
gau-nernst Nov 12, 2024
b2bc5ef
Merge branch 'main' into int8mp
gau-nernst Dec 5, 2024
a334986
remove LoRA
gau-nernst Dec 5, 2024
d149801
remove PyTorch version check
gau-nernst Dec 5, 2024
03a1978
add checks in init. add entries to all applicable configs
gau-nernst Dec 5, 2024
35ca06a
Merge branch 'main' into int8mp
gau-nernst Dec 10, 2024
0699aa3
add space
gau-nernst Dec 10, 2024
be9c0fb
consolidate checks
gau-nernst Dec 10, 2024
ca29866
Merge branch 'pytorch:main' into int8mp
gau-nernst Dec 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions tests/recipes/test_full_finetune_single_device.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
CKPT_MODEL_PATHS,
gen_log_file_name,
get_loss_values_from_metric_logger,
get_tps_values_from_metric_logger,
TOKENIZER_PATHS,
)

Expand Down Expand Up @@ -263,3 +264,89 @@ def test_gradient_accumulation(self, tmpdir, monkeypatch):

accum_loss = np.mean(get_loss_values_from_metric_logger(grad_accum_log_file))
torch.testing.assert_close(no_accum_loss, accum_loss, atol=1e-5, rtol=1e-5)


class TestFullFinetuneInt8MixedPrecisionTraining:
def _get_test_config_overrides(self):
return [
"seed=9",
"epochs=1",
"max_steps_per_epoch=5",
"optimizer=torch.optim.AdamW",
"optimizer_in_bwd=False",
"compile=True",
] + dummy_alpaca_dataset_config()

@pytest.mark.integration_test
def test_speed(self, tmpdir, monkeypatch):
model_type = "llama3"
ckpt_type = "tune"
ckpt_component = CKPT_COMPONENT_MAP["tune"]
ckpt = model_type + "_" + ckpt_type
ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
tokenizer_path = Path(TOKENIZER_PATHS[model_type])
ckpt_dir = ckpt_path.parent
log_file_baseline = gen_log_file_name(tmpdir, suffix="baseline")
log_file_int8mp = gen_log_file_name(tmpdir, suffix="int8mp")

model_config = MODEL_TEST_CONFIGS[model_type]

# set dataset.packed=True to have fixed input seq len
cmd1 = f"""
tune run full_finetune_single_device \
--config llama3/8B_full_single_device \
Copy link
Contributor

@felipemello1 felipemello1 Oct 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since there may be issues around lora, can we duplicate this function and add it for LoRA too?

output_dir={tmpdir} \
checkpointer._component_={ckpt_component} \
checkpointer.checkpoint_dir='{ckpt_dir}' \
checkpointer.checkpoint_files=[{ckpt_path}]\
checkpointer.output_dir={tmpdir} \
checkpointer.model_type={model_type.upper()} \
tokenizer.path='{tokenizer_path}' \
tokenizer.prompt_template=null \
tokenizer.max_seq_len=256 \
dataset.packed=True \
metric_logger.filename={log_file_baseline} \
compile=True \
""".split()
cmd1 = cmd1 + self._get_test_config_overrides() + model_config

# Make sure to clear compile state in between tests
torch._dynamo.reset()
monkeypatch.setattr(sys, "argv", cmd1)
with pytest.raises(SystemExit, match=""):
runpy.run_path(TUNE_PATH, run_name="__main__")

quantizer = (
"torchtune.training.quantization.Int8MixedPrecisionTrainingQuantizer"
)
cmd2 = f"""
tune run full_finetune_single_device \
--config llama3/8B_full_single_device \
output_dir={tmpdir} \
checkpointer._component_={ckpt_component} \
checkpointer.checkpoint_dir='{ckpt_dir}' \
checkpointer.checkpoint_files=[{ckpt_path}]\
checkpointer.output_dir={tmpdir} \
checkpointer.model_type={model_type.upper()} \
tokenizer.path='{tokenizer_path}' \
tokenizer.prompt_template=null \
tokenizer.max_seq_len=4096 \
dataset.packed=True \
metric_logger.filename={log_file_int8mp} \
compile=True \
quantizer._component=quantizer._component_={quantizer} \
felipemello1 marked this conversation as resolved.
Show resolved Hide resolved
""".split()
cmd2 = cmd2 + self._get_test_config_overrides() + model_config

torch._dynamo.reset()
monkeypatch.setattr(sys, "argv", cmd2)
with pytest.raises(SystemExit, match=""):
runpy.run_path(TUNE_PATH, run_name="__main__")

# skip the first iteration since it includes compile time
tps_baseline = np.mean(get_tps_values_from_metric_logger(log_file_baseline)[1:])
tps_int8mp = np.mean(get_tps_values_from_metric_logger(log_file_int8mp)[1:])

# check that it is at least 20% faster
speedup = tps_int8mp / tps_baseline
assert speedup > 1.2, speedup
18 changes: 16 additions & 2 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from functools import partial
from io import StringIO
from pathlib import Path
from typing import Any, Dict, Generator, List, Mapping, Optional, TextIO, Tuple, Union
from typing import Any, Generator, List, Mapping, Optional, TextIO, Tuple, Union

import pytest

Expand Down Expand Up @@ -332,7 +332,7 @@ def gpu_test(gpu_count: int = 1):
return pytest.mark.skipif(local_gpu_count < gpu_count, reason=message)


def get_loss_values_from_metric_logger(log_file_path: str) -> Dict[str, float]:
def get_loss_values_from_metric_logger(log_file_path: str) -> list[float]:
"""
Given an output directory containing metric logger .txt file,
parse the .txt and return a list of losses from each logged iteration.
Expand All @@ -343,6 +343,20 @@ def get_loss_values_from_metric_logger(log_file_path: str) -> Dict[str, float]:
return losses


def get_tps_values_from_metric_logger(log_file_path: str) -> list[float]:
"""
Given an output directory containing metric logger .txt file,
parse the .txt and return a list of tokens per second (tps) values
from each logged iteration.
"""
with open(log_file_path, "r") as f:
logs = f.read()
tps_values = [
float(x) for x in re.findall(r"tokens_per_second_per_gpu:(\d+\.\d+)", logs)
]
return tps_values


def gen_log_file_name(tmpdir, suffix: Optional[str] = None) -> str:
"""
Take the tmpdir and just append a non-path version of it as the
Expand Down