From f06a5b952148394e659137a5f5723e792a491fc0 Mon Sep 17 00:00:00 2001 From: Savelyev Date: Wed, 23 Feb 2022 23:03:08 +0300 Subject: [PATCH 1/4] Add fair comparison of INT8 and FP32 models --- ...-pytorch-quantization-aware-training.ipynb | 29 ++++++++++--- ...nsorflow-quantization-aware-training.ipynb | 42 ++++++++++++++++--- 2 files changed, 61 insertions(+), 10 deletions(-) diff --git a/notebooks/302-pytorch-quantization-aware-training/302-pytorch-quantization-aware-training.ipynb b/notebooks/302-pytorch-quantization-aware-training/302-pytorch-quantization-aware-training.ipynb index 457bc37725f..849678f7f98 100644 --- a/notebooks/302-pytorch-quantization-aware-training/302-pytorch-quantization-aware-training.ipynb +++ b/notebooks/302-pytorch-quantization-aware-training/302-pytorch-quantization-aware-training.ipynb @@ -98,6 +98,7 @@ "import zipfile\n", "from pathlib import Path\n", "import logging\n", + "import copy\n", "\n", "import torch\n", "import nncf # Important - should be imported directly after torch\n", @@ -510,6 +511,9 @@ " if is_best:\n", " checkpoint = {\"state_dict\": model.state_dict(), \"acc1\": acc1}\n", " torch.save(checkpoint, fp32_pth_path)\n", + " \n", + "# clone fp32 model for later\n", + "model_fp32 = copy.deepcopy(model)\n", "\n", "print(f\"Accuracy of FP32 model: {best_acc1:.3f}\")" ] @@ -690,16 +694,31 @@ }, "outputs": [], "source": [ - "compression_lr = init_lr / 10\n", - "optimizer = torch.optim.Adam(model.parameters(), lr=compression_lr)\n", + "fine_tune_lr = init_lr / 10\n", + "optimizer = torch.optim.Adam(model.parameters(), lr=fine_tune_lr)\n", "\n", "# train for one epoch with NNCF\n", "train(train_loader, model, criterion, optimizer, epoch=0)\n", "\n", "# evaluate on validation set after Quantization-Aware Training (QAT case)\n", - "acc1 = validate(val_loader, model, criterion)\n", + "acc1_int8 = validate(val_loader, model, criterion)\n", + "\n", + "print(f\"Accuracy of tuned INT8 model: {acc1_int8:.3f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# fine-tune the original fp32 model and compare it to the fine-tuned int8 model\n", + "optimizer = torch.optim.Adam(model_fp32.parameters(), lr=fine_tune_lr)\n", + "train(train_loader, model_fp32, criterion, optimizer, epoch=0)\n", + "acc1_fp32 = validate(val_loader, model_fp32, criterion)\n", "\n", - "print(f\"Accuracy of tuned INT8 model: {acc1:.3f}\")" + "print(f\"Accuracy of tuned FP32 model: {acc1_fp32:.3f}\")\n", + "print(f\"Accuracy gain/drop of INT8 model over FP32 model: {acc1_int8 - acc1_fp32:.3f}\")" ] }, { @@ -872,4 +891,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/notebooks/305-tensorflow-quantization-aware-training/305-tensorflow-quantization-aware-training.ipynb b/notebooks/305-tensorflow-quantization-aware-training/305-tensorflow-quantization-aware-training.ipynb index 6f626bb9927..e3aa266fac1 100644 --- a/notebooks/305-tensorflow-quantization-aware-training/305-tensorflow-quantization-aware-training.ipynb +++ b/notebooks/305-tensorflow-quantization-aware-training/305-tensorflow-quantization-aware-training.ipynb @@ -221,7 +221,7 @@ "outputs": [], "source": [ "IMG_SHAPE = IMG_SIZE + (3,)\n", - "model = ResNet18(input_shape=IMG_SHAPE)" + "model = model_fp32 = ResNet18(input_shape=IMG_SHAPE)" ] }, { @@ -245,6 +245,7 @@ "source": [ "# Load the floating-point weights\n", "model.load_weights(fp32_h5_path)\n", + "model_fp32.load_weights(fp32_h5_path)\n", "\n", "# Compile the floating-point model\n", "model.compile(loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),\n", @@ -402,9 +403,40 @@ " epochs=1)\n", "\n", "# Validate the int8 model\n", - "test_loss, test_acc = model.evaluate(validation_dataset,\n", - " callbacks=tf.keras.callbacks.ProgbarLogger(stateful_metrics=['acc@1']))\n", - "print(f\"\\nAccuracy of INT8 model after fine-tuning: {test_acc:.3f}\")" + "test_loss_int8, test_acc_int8 = model.evaluate(validation_dataset,\n", + " callbacks=tf.keras.callbacks.ProgbarLogger(stateful_metrics=['acc@1']))\n", + "print(f\"\\nAccuracy of INT8 model after fine-tuning: {test_acc_int8:.3f}\")" + ] + }, + { + "cell_type": "markdown", + "id": "8858ebf4", + "metadata": {}, + "source": [ + "Fine-tune the original FP32 model and comapre it to the fine-tuned INT8 model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c7c98dcf", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile the fp32 model\n", + "model_fp32.compile(optimizer=tf.keras.optimizers.Adam(lr=LR),\n", + " loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),\n", + " metrics=[tf.keras.metrics.CategoricalAccuracy(name='acc@1')])\n", + "\n", + "# Train the fp32 model\n", + "model_fp32.fit(train_dataset,\n", + " epochs=1)\n", + "\n", + "# Validate the fp32 model\n", + "test_loss_fp32, test_acc_fp32 = model_fp32.evaluate(validation_dataset,\n", + " callbacks=tf.keras.callbacks.ProgbarLogger(stateful_metrics=['acc@1']))\n", + "print(f\"\\nAccuracy of FP32 model after fine-tuning: {test_acc_fp32:.3f}\")\n", + "print(f\"\\nAccuracy gain/drop of INT8 model over FP32 model: {test_acc_int8 - test_acc_fp32:.3f}\")" ] }, { @@ -557,4 +589,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file From ffd947e7bcbde05083a7828644311f426f14fae9 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Thu, 24 Feb 2022 17:45:04 +0300 Subject: [PATCH 2/4] Update 305-tensorflow-quantization-aware-training.ipynb Typo fix --- .../305-tensorflow-quantization-aware-training.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/notebooks/305-tensorflow-quantization-aware-training/305-tensorflow-quantization-aware-training.ipynb b/notebooks/305-tensorflow-quantization-aware-training/305-tensorflow-quantization-aware-training.ipynb index e3aa266fac1..08f637503bb 100644 --- a/notebooks/305-tensorflow-quantization-aware-training/305-tensorflow-quantization-aware-training.ipynb +++ b/notebooks/305-tensorflow-quantization-aware-training/305-tensorflow-quantization-aware-training.ipynb @@ -413,7 +413,7 @@ "id": "8858ebf4", "metadata": {}, "source": [ - "Fine-tune the original FP32 model and comapre it to the fine-tuned INT8 model" + "Fine-tune the original FP32 model and compare it to the fine-tuned INT8 model" ] }, { @@ -589,4 +589,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} From 9fc195a0d52e6c23c19e0b4cbc381d933daf4e14 Mon Sep 17 00:00:00 2001 From: Savelyev Date: Wed, 2 Mar 2022 16:58:10 +0300 Subject: [PATCH 3/4] Revert addition of FP32 model finetuning. Clear up description of INT8 model finetuning. Fix a bug with validation dataset for notebook 302. --- ...-pytorch-quantization-aware-training.ipynb | 74 ++++++++++--------- ...nsorflow-quantization-aware-training.ipynb | 47 ++---------- 2 files changed, 49 insertions(+), 72 deletions(-) diff --git a/notebooks/302-pytorch-quantization-aware-training/302-pytorch-quantization-aware-training.ipynb b/notebooks/302-pytorch-quantization-aware-training/302-pytorch-quantization-aware-training.ipynb index 849678f7f98..227bb5b6eb9 100644 --- a/notebooks/302-pytorch-quantization-aware-training/302-pytorch-quantization-aware-training.ipynb +++ b/notebooks/302-pytorch-quantization-aware-training/302-pytorch-quantization-aware-training.ipynb @@ -98,7 +98,6 @@ "import zipfile\n", "from pathlib import Path\n", "import logging\n", - "import copy\n", "\n", "import torch\n", "import nncf # Important - should be imported directly after torch\n", @@ -187,12 +186,30 @@ " zip_ref = zipfile.ZipFile(archive_path, \"r\")\n", " zip_ref.extractall(path=data_dir)\n", " zip_ref.close()\n", - " print(f\"Successfully downloaded and extracted dataset to: {data_dir}\")\n", "\n", + "def prepare_tiny_imagenet_200(dataset_dir: Path):\n", + " # format validation set the same way as train set is formatted\n", + " val_data_dir = dataset_dir / 'val'\n", + " val_annotations_file = val_data_dir / 'val_annotations.txt'\n", + " with open(val_annotations_file, 'r') as f:\n", + " val_annotation_data = map(lambda line: line.split('\\t')[:2], f.readlines())\n", + " val_images_dir = val_data_dir / 'images'\n", + " for image_filename, image_label in val_annotation_data:\n", + " from_image_filepath = val_images_dir / image_filename\n", + " to_image_dir = val_data_dir / image_label\n", + " if not to_image_dir.exists():\n", + " to_image_dir.mkdir()\n", + " to_image_filepath = to_image_dir / image_filename\n", + " from_image_filepath.rename(to_image_filepath)\n", + " val_annotations_file.unlink()\n", + " val_images_dir.rmdir()\n", + " \n", "\n", "DATASET_DIR = DATA_DIR / \"tiny-imagenet-200\"\n", "if not DATASET_DIR.exists():\n", - " download_tiny_imagenet_200(DATA_DIR)" + " download_tiny_imagenet_200(DATA_DIR)\n", + " prepare_tiny_imagenet_200(DATASET_DIR)\n", + " print(f\"Successfully downloaded and prepared dataset at: {DATASET_DIR}\")" ] }, { @@ -444,9 +461,10 @@ "\n", "# Data loading code\n", "train_dir = DATASET_DIR / \"train\"\n", + "val_dir = DATASET_DIR / \"val\"\n", "normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n", "\n", - "dataset = datasets.ImageFolder(\n", + "train_dataset = datasets.ImageFolder(\n", " train_dir,\n", " transforms.Compose(\n", " [\n", @@ -457,8 +475,15 @@ " ]\n", " ),\n", ")\n", - "train_dataset, val_dataset = torch.utils.data.random_split(\n", - " dataset, [80000, 20000], generator=torch.Generator().manual_seed(0)\n", + "val_dataset = datasets.ImageFolder(\n", + " val_dir,\n", + " transforms.Compose(\n", + " [\n", + " transforms.Resize(image_size),\n", + " transforms.ToTensor(),\n", + " normalize,\n", + " ]\n", + " ),\n", ")\n", "\n", "train_loader = torch.utils.data.DataLoader(\n", @@ -485,8 +510,6 @@ }, "outputs": [], "source": [ - "acc1 = 0\n", - "best_acc1 = 0\n", "if pretrained_on_tiny_imagenet:\n", " #\n", " # ** WARNING: torch.load functionality uses Python's pickling module that\n", @@ -495,8 +518,9 @@ " #\n", " checkpoint = torch.load(str(fp32_pth_path), map_location=\"cpu\")\n", " model.load_state_dict(checkpoint[\"state_dict\"], strict=True)\n", - " best_acc1 = checkpoint[\"acc1\"]\n", + " acc1_fp32 = checkpoint[\"acc1\"]\n", "else:\n", + " best_acc1 = 0\n", " # Training loop\n", " for epoch in range(0, epochs):\n", " # run a single training epoch\n", @@ -511,11 +535,9 @@ " if is_best:\n", " checkpoint = {\"state_dict\": model.state_dict(), \"acc1\": acc1}\n", " torch.save(checkpoint, fp32_pth_path)\n", - " \n", - "# clone fp32 model for later\n", - "model_fp32 = copy.deepcopy(model)\n", - "\n", - "print(f\"Accuracy of FP32 model: {best_acc1:.3f}\")" + " acc1_fp32 = best_acc1\n", + " \n", + "print(f\"Accuracy of FP32 model: {acc1_fp32:.3f}\")" ] }, { @@ -681,7 +703,7 @@ "source": [ "## Fine-tune the Compressed Model\n", "\n", - "At this step, a regular fine-tuning process is applied to restore accuracy drop. Normally, several epochs of tuning are required with a small learning rate, the same that is usually used at the end of the training of the original model. No other changes in the training pipeline are required. Here is a simple example." + "At this step, a regular fine-tuning process is applied to further improve quantized model accuracy. Normally, several epochs of tuning are required with a small learning rate, the same that is usually used at the end of the training of the original model. No other changes in the training pipeline are required. Here is a simple example." ] }, { @@ -694,8 +716,8 @@ }, "outputs": [], "source": [ - "fine_tune_lr = init_lr / 10\n", - "optimizer = torch.optim.Adam(model.parameters(), lr=fine_tune_lr)\n", + "compression_lr = init_lr / 10\n", + "optimizer = torch.optim.Adam(model.parameters(), lr=compression_lr)\n", "\n", "# train for one epoch with NNCF\n", "train(train_loader, model, criterion, optimizer, epoch=0)\n", @@ -703,22 +725,8 @@ "# evaluate on validation set after Quantization-Aware Training (QAT case)\n", "acc1_int8 = validate(val_loader, model, criterion)\n", "\n", - "print(f\"Accuracy of tuned INT8 model: {acc1_int8:.3f}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# fine-tune the original fp32 model and compare it to the fine-tuned int8 model\n", - "optimizer = torch.optim.Adam(model_fp32.parameters(), lr=fine_tune_lr)\n", - "train(train_loader, model_fp32, criterion, optimizer, epoch=0)\n", - "acc1_fp32 = validate(val_loader, model_fp32, criterion)\n", - "\n", - "print(f\"Accuracy of tuned FP32 model: {acc1_fp32:.3f}\")\n", - "print(f\"Accuracy gain/drop of INT8 model over FP32 model: {acc1_int8 - acc1_fp32:.3f}\")" + "print(f\"Accuracy of tuned INT8 model: {acc1_int8:.3f}\")\n", + "print(f\"Accuracy gain/drop of tuned INT8 model over pre-trained FP32 model: {acc1_int8 - acc1_fp32:.3f}\")" ] }, { diff --git a/notebooks/305-tensorflow-quantization-aware-training/305-tensorflow-quantization-aware-training.ipynb b/notebooks/305-tensorflow-quantization-aware-training/305-tensorflow-quantization-aware-training.ipynb index e3aa266fac1..ed75475211c 100644 --- a/notebooks/305-tensorflow-quantization-aware-training/305-tensorflow-quantization-aware-training.ipynb +++ b/notebooks/305-tensorflow-quantization-aware-training/305-tensorflow-quantization-aware-training.ipynb @@ -221,7 +221,7 @@ "outputs": [], "source": [ "IMG_SHAPE = IMG_SIZE + (3,)\n", - "model = model_fp32 = ResNet18(input_shape=IMG_SHAPE)" + "model = ResNet18(input_shape=IMG_SHAPE)" ] }, { @@ -245,16 +245,15 @@ "source": [ "# Load the floating-point weights\n", "model.load_weights(fp32_h5_path)\n", - "model_fp32.load_weights(fp32_h5_path)\n", "\n", "# Compile the floating-point model\n", "model.compile(loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),\n", " metrics=[tf.keras.metrics.CategoricalAccuracy(name='acc@1')])\n", "\n", "# Validate the floating-point model\n", - "test_loss, test_acc = model.evaluate(validation_dataset,\n", + "test_loss, acc_fp32 = model.evaluate(validation_dataset,\n", " callbacks=tf.keras.callbacks.ProgbarLogger(stateful_metrics=['acc@1']))\n", - "print(f\"\\nAccuracy of FP32 model: {test_acc:.3f}\")" + "print(f\"\\nAccuracy of FP32 model: {acc_fp32:.3f}\")" ] }, { @@ -382,7 +381,7 @@ "source": [ "## Fine-tune the Compressed Model\n", "\n", - "At this step, a regular fine-tuning process is applied to restore accuracy drop. Normally, several epochs of tuning are required with a small learning rate, the same that is usually used at the end of the training of the original model. No other changes in the training pipeline are required. Here is a simple example." + "At this step, a regular fine-tuning process is applied to further improve quantized model accuracy. Normally, several epochs of tuning are required with a small learning rate, the same that is usually used at the end of the training of the original model. No other changes in the training pipeline are required. Here is a simple example." ] }, { @@ -403,40 +402,10 @@ " epochs=1)\n", "\n", "# Validate the int8 model\n", - "test_loss_int8, test_acc_int8 = model.evaluate(validation_dataset,\n", - " callbacks=tf.keras.callbacks.ProgbarLogger(stateful_metrics=['acc@1']))\n", - "print(f\"\\nAccuracy of INT8 model after fine-tuning: {test_acc_int8:.3f}\")" - ] - }, - { - "cell_type": "markdown", - "id": "8858ebf4", - "metadata": {}, - "source": [ - "Fine-tune the original FP32 model and comapre it to the fine-tuned INT8 model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c7c98dcf", - "metadata": {}, - "outputs": [], - "source": [ - "# Compile the fp32 model\n", - "model_fp32.compile(optimizer=tf.keras.optimizers.Adam(lr=LR),\n", - " loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),\n", - " metrics=[tf.keras.metrics.CategoricalAccuracy(name='acc@1')])\n", - "\n", - "# Train the fp32 model\n", - "model_fp32.fit(train_dataset,\n", - " epochs=1)\n", - "\n", - "# Validate the fp32 model\n", - "test_loss_fp32, test_acc_fp32 = model_fp32.evaluate(validation_dataset,\n", - " callbacks=tf.keras.callbacks.ProgbarLogger(stateful_metrics=['acc@1']))\n", - "print(f\"\\nAccuracy of FP32 model after fine-tuning: {test_acc_fp32:.3f}\")\n", - "print(f\"\\nAccuracy gain/drop of INT8 model over FP32 model: {test_acc_int8 - test_acc_fp32:.3f}\")" + "test_loss, acc_int8 = model.evaluate(validation_dataset,\n", + " callbacks=tf.keras.callbacks.ProgbarLogger(stateful_metrics=['acc@1']))\n", + "print(f\"\\nAccuracy of INT8 model after fine-tuning: {test_acc:.3f}\")\n", + "print(f\"\\nAccuracy gain/drop of tuned INT8 model over pre-trained FP32 model: {acc_int8 - acc_fp32:.3f}\")" ] }, { From 1998728dd002b87d636c4024269c493b98c2bbc2 Mon Sep 17 00:00:00 2001 From: Savelyev Date: Fri, 4 Mar 2022 10:58:31 +0300 Subject: [PATCH 4/4] Tweaked CI patching instructions --- .../302-pytorch-quantization-aware-training.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/notebooks/302-pytorch-quantization-aware-training/302-pytorch-quantization-aware-training.ipynb b/notebooks/302-pytorch-quantization-aware-training/302-pytorch-quantization-aware-training.ipynb index 227bb5b6eb9..ac52dafb3ac 100644 --- a/notebooks/302-pytorch-quantization-aware-training/302-pytorch-quantization-aware-training.ipynb +++ b/notebooks/302-pytorch-quantization-aware-training/302-pytorch-quantization-aware-training.ipynb @@ -443,8 +443,8 @@ "outputId": "183bdbb6-4016-463c-8d76-636a6b3a9778", "tags": [], "test_replace": { - "80000, 20000": "300, 100", - "dataset, [": "torch.utils.data.Subset(dataset, torch.arange(400)), [" + "train_dataset, ": "torch.utils.data.Subset(train_dataset, torch.arange(300)), ", + "val_dataset, ": "torch.utils.data.Subset(val_dataset, torch.arange(100)), " } }, "outputs": [],