diff --git a/.idea/.gitignore b/.idea/.gitignore
deleted file mode 100644
index 13566b8..0000000
--- a/.idea/.gitignore
+++ /dev/null
@@ -1,8 +0,0 @@
-# Default ignored files
-/shelf/
-/workspace.xml
-# Editor-based HTTP Client requests
-/httpRequests/
-# Datasource local storage ignored files
-/dataSources/
-/dataSources.local.xml
diff --git a/.idea/encodings.xml b/.idea/encodings.xml
deleted file mode 100644
index fa9b8a7..0000000
--- a/.idea/encodings.xml
+++ /dev/null
@@ -1,9 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="Encoding">
-    <file url="file://$PROJECT_DIR$/dataset/label.txt" charset="GBK" />
-    <file url="file://$PROJECT_DIR$/runs/ghostnet_flower/predict/2022_11_05_14_50_28/result.csv" charset="GBK" />
-    <file url="file://$PROJECT_DIR$/runs/ghostnet_flower/test/2022_11_05_14_40_13/correct.csv" charset="GBK" />
-    <file url="file://$PROJECT_DIR$/runs/ghostnet_flower/test/2022_11_05_14_40_13/tsne.csv" charset="GBK" />
-  </component>
-</project>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
deleted file mode 100644
index 03d9549..0000000
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<component name="InspectionProjectProfileManager">
-  <profile version="1.0">
-    <option name="myName" value="Project Default" />
-    <inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
-  </profile>
-</component>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
deleted file mode 100644
index 105ce2d..0000000
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<component name="InspectionProjectProfileManager">
-  <settings>
-    <option name="USE_PROJECT_PROFILE" value="false" />
-    <version value="1.0" />
-  </settings>
-</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
deleted file mode 100644
index 4b2f238..0000000
--- a/.idea/misc.xml
+++ /dev/null
@@ -1,4 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (pytorch_newest_py38) (2)" project-jdk-type="Python SDK" />
-</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
deleted file mode 100644
index 9c63b12..0000000
--- a/.idea/modules.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectModuleManager">
-    <modules>
-      <module fileurl="file://$PROJECT_DIR$/.idea/pytorch-classifier.iml" filepath="$PROJECT_DIR$/.idea/pytorch-classifier.iml" />
-    </modules>
-  </component>
-</project>
\ No newline at end of file
diff --git a/.idea/pytorch-classifier.iml b/.idea/pytorch-classifier.iml
deleted file mode 100644
index ddd9297..0000000
--- a/.idea/pytorch-classifier.iml
+++ /dev/null
@@ -1,12 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<module type="PYTHON_MODULE" version="4">
-  <component name="NewModuleRootManager">
-    <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Python 3.8 (pytorch_newest_py38) (2)" jdkType="Python SDK" />
-    <orderEntry type="sourceFolder" forTests="false" />
-  </component>
-  <component name="PyDocumentationSettings">
-    <option name="format" value="GOOGLE" />
-    <option name="myDocStringFormat" value="Google" />
-  </component>
-</module>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
deleted file mode 100644
index 94a25f7..0000000
--- a/.idea/vcs.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="VcsDirectoryMappings">
-    <mapping directory="$PROJECT_DIR$" vcs="Git" />
-  </component>
-</project>
\ No newline at end of file
diff --git a/README.md b/README.md
index 87a44e2..537a2e1 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,16 @@
 
 image classifier implement in pytoch.
 
+# Directory
+1. **[Introduction](#Introduction)**
+2. **[How to use](#Howtouse)**
+3. **[Argument Explanation](#ArgumentExplanation)**
+4. **[Model Zoo](#ModelZoo)**
+5. **[Some explanation](#Someexplanation)**
+6. **[TODO](#TODO)**
+7. **[Reference](#Reference)**
+
+<a id="Introduction"></a>
 ## Introduction  
 
 为什么推荐你使用这个代码?
@@ -15,7 +25,7 @@ image classifier implement in pytoch.
   7. 总体精度可视化.(kappa,precision,recll,f1,accuracy,mpa)
 
 - **丰富的模型库**  
-  1. 由作者整合的丰富模型库,主流的模型基本全部支持,支持的模型个数高达50+,其全部支持ImageNet的预训练权重,[详细请看Model Zoo.(变形金刚系列后续更新)](#3)
+  1. 由作者整合的丰富模型库,主流的模型基本全部支持,支持的模型个数高达50+,其全部支持ImageNet的预训练权重,[详细请看Model Zoo.(变形金刚系列后续更新)](#ModelZoo)
   2. 目前支持的模型都是通过作者从github和torchvision整合,因此支持修改、改进模型进行实验,并不是直接调用库创建模型.
 
 - **丰富的训练策略**
@@ -33,6 +43,9 @@ image classifier implement in pytoch.
 - **丰富的学习率调整策略**  
   本程序支持学习率预热,支持预热后的自定义学习率策略.[详细看Some explanation第五点](#1)
 
+- **支持导出各种常用推理框架模型**
+  目前支持导出torchscript,onnx,tensorrt推理模型.
+
 <a id="6"></a>
 
 - **简单的安装过程**  
@@ -44,11 +57,15 @@ image classifier implement in pytoch.
   1. 大部分可视化数据(混淆矩阵,tsne,每个类别的指标)都会以csv或者log的格式保存到本地,方便后期美工图像.
   2. 程序大部分输出信息使用PrettyTable进行美化输出,大大增加可观性.
 
+<a id="Howtouse"></a>
+
 ## How to use
   
   1. 安装程序所需的[环境](#6).
   2. 根据[Some explanation中的第三点](#5)处理好数据集.
 
+<a id="ArgumentExplanation"></a>
+
 ## Argument Explanation
 
 - **main.py**  
@@ -66,6 +83,9 @@ image classifier implement in pytoch.
   - **config**  
     type: string, default: config/config.py  
     配置文件的路径.
+  - **device**  
+    type: string, default: ''  
+    使用的设备.(cuda device, i.e. 0 or 0,1,2,3 or cpu)  
   - **train_path**  
     type: string, default: dataset/train  
     训练集的路径.
@@ -165,6 +185,9 @@ image classifier implement in pytoch.
   - **rdrop**  
     default: False 
     是否采用R-Drop.(不支持知识蒸馏)
+  - **ema**  
+    default: False  
+    是否采用EMA.(不支持知识蒸馏)  
 - **metrice.py**  
   实现计算指标的主要程序.  
   参数解释:  
@@ -179,7 +202,10 @@ image classifier implement in pytoch.
     测试集的路径.  
   - **label_path**  
     type: string, default: dataset/label.txt  
-    标签的路径.  
+    标签的路径. 
+  - **device**  
+    type: string, default: ''  
+    使用的设备.(cuda device, i.e. 0 or 0,1,2,3 or cpu)  
   - **task**  
     type: string, default: test, choices: ['train', 'val', 'test', 'fps']  
     任务类型.选择fps就是单独计算fps指标,选择train、val、test就是计算其指标.
@@ -222,7 +248,9 @@ image classifier implement in pytoch.
   - **cam_type**  
     type: string, default: GradCAMPlusPlus, choices: ['GradCAM', 'HiResCAM', 'ScoreCAM', 'GradCAMPlusPlus', 'AblationCAM', 'XGradCAM', 'EigenCAM', 'FullGrad']  
     热力图可视化的类型.  
-
+  - **device**  
+    type: string, default: ''  
+    使用的设备.(cuda device, i.e. 0 or 0,1,2,3 or cpu)  
 - **processing.py**  
   实现预处理数据集的主要程序.  
   参数解释:  
@@ -238,7 +266,6 @@ image classifier implement in pytoch.
   - **test_size**  
     type: float, default: 0.2  
     测试集的比例.  
-
 - **config/config.py**  
   一些额外的参数配置文件.  
   参数解释:
@@ -246,26 +273,55 @@ image classifier implement in pytoch.
     default: None  
     Example: lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR  
     自定义的学习率调整器.  
-  
   - **lr_scheduler_params**  
     default: {'T_max': 10,'eta_min': 1e-6}  
     Example: lr_scheduler_params = {'step_size': 1,'gamma': 0.95} (此处默认为lr_scheduler = torch.optim.lr_scheduler.StepLR)  
     自定义的学习率调整器的参数,参数需与lr_scheduler匹配.  
-  
   - **random_seed**  
     default: 0  
     随机种子设定值.  
-  
   - **plot_train_batch_count**  
     default: 5  
     训练过程可视化数据的生成数量.  
-  
   - **custom_augment**  
     default: transforms.Compose([])  
     Example: transforms.Compose([transforms.RandomHorizontalFlip(p=0.5),transforms.RandomRotation(degrees=20),])  
     自定义的数据增强.  
+- **export.py**  
+  导出模型的文件.目前支持torchscript,onnx.
+  参数解释:
+  - **save_path**  
+    type: string, default: runs/exp  
+    保存的模型路径,也是保存转换结果的路径.  
+  - **image_size**  
+    type: int, default: 224  
+    输入模型的图像尺寸大小.  
+  - **image_channel**  
+    type:int, default: 3  
+    输入模型的图像通道大小.(目前只支持三通道)  
+  - **batch_size**  
+    type: int, default: 1  
+    单次测试所选取的样本个数.
+  - **dynamic**  
+    default: False  
+    onnx中的dynamic参数.
+  - **simplify**  
+    default: False  
+    onnx中的simplify参数.  
+  - **half**  
+    default: False  
+    FP16模型导出.(仅支持GPU环境导出)
+  - **verbose**  
+    default: False  
+    导出tensorrt时是否显示日志.
+  - **export**  
+    type: string, default: torchscript  choices: ['onnx', 'torchscript', 'tensorrt']
+    选择导出模型.
+  - **device**  
+    type: string, default: torchscript  
+    使用的设备.(cuda device, i.e. 0 or 0,1,2,3 or cpu)  
 
-<p id="3"></p>
+<p id="ModelZoo"></p>
 
 ## Model Zoo  
 
@@ -288,6 +344,8 @@ image classifier implement in pytoch.
   | cspnet | cspresnet50,cspresnext50,cspdarknet53,cs3darknet_m,cs3darknet_l,cs3darknet_x,cs3darknet_focus_m,cs3darknet_focus_l<br>cs3sedarknet_l,cs3sedarknet_x,cs3edgenet_x,cs3se_edgenet_x |
   | dpn | dpn68,dpn68b,dpn92,dpn98,dpn107,dpn131 |
 
+<a id="Someexplanation"></a>
+
 ## Some explanation  
   1. 关于cpu和gpu的问题.  
     
@@ -526,13 +584,20 @@ image classifier implement in pytoch.
   
   <p id="5"></p>
 
-  17. 关于如何使用albumentations的数据增强问题.
-
+  17. 关于如何使用albumentations的数据增强问题.  
     我们可以在[albumentations的github](https://github.com/albumentations-team/albumentations)或者[albumentations的官方网站](https://albumentations.ai/docs/api_reference/augmentations/)中找到自己需要的数据增强的名字,比如[RandomGridShuffle](https://github.com/albumentations-team/albumentations#:~:text=%E2%9C%93-,RandomGridShuffle,-%E2%9C%93)的方法,我们可以在config/config.py中进行创建:
     Create_Albumentations_From_Name('RandomGridShuffle')
     还有些使用者可能需要修改其默认参数,参数可以在其api文档中找到,我们的函数也是支持修改参数的,比如这个RandomGridShuffle函数有一个grid的参数,具体方法如下:
     Create_Albumentations_From_Name('RandomGridShuffle', grid=(3, 3))
     不止一个参数的话直接也是在后面加即可,但是需要指定其参数的名字.
+  
+  18. 关于export文件的一些解释.  
+    1. tensorrt建议在ubuntu上使用,并且tensorrt只支持在gpu上导出和推理.  
+    2. FP16仅支持在gpu上导出和推理.  
+    3. FP16模式不能与dynamic模式一并使用.  
+    4. 详细GPU和CPU的推理速度实验请看[v1.2更新日志](v1.2-update_log.md).
+
+<a id="TODO"></a>
 
 ## TODO
 - [x] Knowledge Distillation
@@ -540,13 +605,16 @@ image classifier implement in pytoch.
 - [x] R-Drop
 - [ ] SWA
 - [ ] DDP Mode
-- [ ] Export Model(onnx, tensorrt, torchscript)  
+- [x] Export Model(onnx, torchscript, TensorRT)  
 - [ ] C++ Inference Code  
 - [ ] Accumulation Gradient  
 - [ ] Model Ensembling  
 - [ ] Freeze Training  
+- [ ] Support Fuse Conv and Bn  
 - [x] Early Stop  
 
+<a id="Reference"></a>
+
 ## Reference
 
   https://github.com/BIGBALLON/CIFAR-ZOO  
diff --git a/config/__pycache__/config.cpython-38.pyc b/config/__pycache__/config.cpython-38.pyc
index 0f17312..bcc2bcf 100644
Binary files a/config/__pycache__/config.cpython-38.pyc and b/config/__pycache__/config.cpython-38.pyc differ
diff --git a/export.py b/export.py
new file mode 100644
index 0000000..38339be
--- /dev/null
+++ b/export.py
@@ -0,0 +1,119 @@
+import os, argparse
+import numpy as np
+os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
+import torch
+import torch.nn as nn
+from utils.utils import select_device
+
+def export_torchscript(opt, model, img, prefix='TorchScript'):
+    print('Starting TorchScript export with pytorch %s...' % torch.__version__)
+    f = os.path.join(opt.save_path, 'best.ts')
+    ts = torch.jit.trace(model, img, strict=False)
+    ts.save(f)
+    print(f'Export TorchScript Model Successfully.\nSave sa {f}')
+
+def export_onnx(opt, model, img, prefix='ONNX'):
+    import onnx
+    f = os.path.join(opt.save_path, 'best.onnx')
+    print('Starting ONNX export with onnx %s...' % onnx.__version__)
+    if opt.dynamic:
+        dynamic_axes = {'images': {0: 'batch', 2: 'height', 3: 'width'}, 'output':{0: 'batch'}}
+    else:
+        dynamic_axes = None
+    
+    torch.onnx.export(
+        (model.to('cpu') if opt.dynamic else model), 
+        (img.to('cpu') if opt.dynamic else img),
+        f, verbose=False, opset_version=13, input_names=['images'], output_names=['output'], dynamic_axes=dynamic_axes)
+    
+    onnx_model = onnx.load(f)  # load onnx model
+    onnx.checker.check_model(onnx_model)  # check onnx model
+
+    if opt.simplify:
+        try:
+            import onnxsim
+            print('\nStarting to simplify ONNX...')
+            onnx_model, check = onnxsim.simplify(onnx_model)
+            assert check, 'assert check failed'
+        except Exception as e:
+            print(f'Simplifier failure: {e}')
+        onnx.save(onnx_model, f)
+    
+    print(f'Export Onnx Model Successfully.\nSave sa {f}')
+
+def export_engine(opt, model, img, workspace=4, prefix='TensorRT'):
+    export_onnx(opt, model, img)
+    onnx_file = os.path.join(opt.save_path, 'best.onnx')
+    assert img.device.type != 'cpu', 'export running on CPU but must be on GPU, i.e. `python export.py --device 0`'
+    import tensorrt as trt
+    print('Starting TensorRT export with TensorRT %s...' % trt.__version__)
+    f = os.path.join(opt.save_path, 'best.engine')
+
+    TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if opt.verbose else trt.Logger()
+    builder = trt.Builder(TRT_LOGGER)
+    config = builder.create_builder_config()
+    config.max_workspace_size = workspace * 1 << 30
+
+    flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
+    network = builder.create_network(flag)
+    parser = trt.OnnxParser(network, TRT_LOGGER)
+    if not parser.parse_from_file(str(onnx_file)):
+        raise RuntimeError(f'failed to load ONNX file: {onnx_file}')
+    
+    inputs = [network.get_input(i) for i in range(network.num_inputs)]
+    outputs = [network.get_output(i) for i in range(network.num_outputs)]
+    for inp in inputs:
+        print(f'input {inp.name} with shape {inp.shape} and dtype {inp.dtype}')
+    for out in outputs:
+        print(f'output {out.name} with shape {out.shape} and dtype {out.dtype}')
+    
+    if opt.dynamic:
+        if img.shape[0] <= 1:
+            print(f"{prefix} WARNING: --dynamic model requires maximum --batch-size argument")
+        profile = builder.create_optimization_profile()
+        for inp in inputs:
+            profile.set_shape(inp.name, (1, *img.shape[1:]), (max(1, img.shape[0] // 2), *img.shape[1:]), img.shape)
+        config.add_optimization_profile(profile)
+
+    print(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 and opt.half else 32} engine in {f}')
+    if builder.platform_has_fast_fp16 and opt.half:
+            config.set_flag(trt.BuilderFlag.FP16)
+    with builder.build_engine(network, config) as engine, open(f, 'wb') as t:
+        t.write(engine.serialize())
+    print(f'Export TensorRT Model Successfully.\nSave sa {f}')
+
+def parse_opt():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--save_path', type=str, default=r'runs/exp', help='save path for model and log')
+    parser.add_argument('--image_size', type=int, default=224, help='image size')
+    parser.add_argument('--image_channel', type=int, default=3, help='image channel')
+    parser.add_argument('--batch_size', type=int, default=1, help='batch size')
+    parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX batchsize')
+    parser.add_argument('--simplify', action='store_true', help='simplify onnx model')
+    parser.add_argument('--half', action="store_true", help='FP32 to FP16')
+    parser.add_argument('--verbose', action="store_true", help='TensorRT:verbose export log')
+    parser.add_argument('--export', default='torchscript', type=str, choices=['onnx', 'torchscript', 'tensorrt'], help='export type')
+    parser.add_argument('--device', type=str, default='0', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+
+    opt = parser.parse_known_args()[0]
+    if not os.path.exists(os.path.join(opt.save_path, 'best.pt')):
+        raise Exception('best.pt not found. please check your --save_path folder')
+    DEVICE = select_device(opt.device)
+    if opt.half:
+        assert DEVICE.type != 'cpu', '--half only supported with GPU export'
+        assert not opt.dynamic, '--half not compatible with --dynamic'
+    ckpt = torch.load(os.path.join(opt.save_path, 'best.pt'))
+    model = ckpt['model'].float().to(DEVICE)
+    img = torch.rand((opt.batch_size, opt.image_channel, opt.image_size, opt.image_size)).to(DEVICE)
+
+    return opt, (model.half() if opt.half else model), (img.half() if opt.half else img), DEVICE
+
+if __name__ == '__main__':
+    opt, model, img, DEVICE = parse_opt()
+
+    if opt.export == 'onnx':
+        export_onnx(opt, model, img)
+    elif opt.export == 'torchscript':
+        export_torchscript(opt, model, img)
+    elif opt.export == 'tensorrt':
+        export_engine(opt, model, img)
\ No newline at end of file
diff --git a/main.py b/main.py
index d8bf934..27a01b0 100644
--- a/main.py
+++ b/main.py
@@ -12,7 +12,7 @@
 from utils.utils_model import select_model
 from utils import utils_aug
 from utils.utils import save_model, plot_train_batch, WarmUpLR, show_config, setting_optimizer, check_batch_size, \
-    plot_log, update_opt, load_weights, get_channels, dict_to_PrettyTable, ModelEMA
+    plot_log, update_opt, load_weights, get_channels, dict_to_PrettyTable, ModelEMA, select_device
 from utils.utils_distill import *
 from utils.utils_loss import *
 
@@ -29,6 +29,7 @@ def parse_opt():
     parser.add_argument('--pretrained', action="store_true", help='using pretrain weight')
     parser.add_argument('--weight', type=str, default='', help='loading weight path')
     parser.add_argument('--config', type=str, default='config/config.py', help='config path')
+    parser.add_argument('--device', type=str, default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
 
     parser.add_argument('--train_path', type=str, default=r'dataset/train', help='train data path')
     parser.add_argument('--val_path', type=str, default=r'dataset/val', help='val data path')
@@ -76,7 +77,7 @@ def parse_opt():
 
     # Tricks parameters
     parser.add_argument('--rdrop', action="store_true", help='using R-Drop')
-    parser.add_argument('--ema', action="store_true", help='using EMA(Exponential Moving Average)')
+    parser.add_argument('--ema', action="store_true", help='using EMA(Exponential Moving Average) Reference to YOLOV5')
 
     opt = parser.parse_known_args()[0]
     if opt.resume:
@@ -100,7 +101,7 @@ def parse_opt():
     show_config(deepcopy(opt))
 
     CLASS_NUM = len(os.listdir(opt.train_path))
-    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    DEVICE = select_device(opt.device, opt.batch_size)
 
     train_transform, test_transform = utils_aug.get_dataprocessing(torchvision.datasets.ImageFolder(opt.train_path),
                                                                    opt)
@@ -126,9 +127,7 @@ def parse_opt():
     test_dataset = torch.utils.data.DataLoader(test_dataset, max(batch_size // (10 if opt.test_tta else 1), 1),
                                                shuffle=False, num_workers=(0 if opt.test_tta else opt.workers))
     scaler = torch.cuda.amp.GradScaler(enabled=(opt.amp if torch.cuda.is_available() else False))
-    ema = None
-    if opt.ema:
-        ema = ModelEMA(model)
+    ema = ModelEMA(model) if opt.ema else None
     optimizer = setting_optimizer(opt, model)
     lr_scheduler = WarmUpLR(optimizer, opt)
     if opt.resume:
@@ -181,7 +180,7 @@ def parse_opt():
             elif opt.kd_method == 'AT':
                 kd_loss = AT().to(DEVICE)
 
-    print('{} begin train on {}!'.format(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), DEVICE))
+    print('{} begin train!'.format(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
     for epoch in range(begin_epoch, opt.epoch):
         if epoch > (save_epoch + opt.patience) and opt.patience != 0:
             print('No Improve from {} to {}, EarlyStopping.'.format(save_epoch + 1, epoch))
diff --git a/metrice.py b/metrice.py
index 5314325..457091a 100644
--- a/metrice.py
+++ b/metrice.py
@@ -6,7 +6,7 @@
 os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
 import numpy as np
 from utils import utils_aug
-from utils.utils import classification_metrice, Metrice_Dataset, visual_predictions, visual_tsne, dict_to_PrettyTable
+from utils.utils import classification_metrice, Metrice_Dataset, visual_predictions, visual_tsne, dict_to_PrettyTable, Model_Inference, select_device
 
 torch.backends.cudnn.deterministic = True
 def set_seed(seed):
@@ -21,26 +21,28 @@ def parse_opt():
     parser.add_argument('--val_path', type=str, default=r'dataset/val', help='val data path')
     parser.add_argument('--test_path', type=str, default=r'dataset/test', help='test data path')
     parser.add_argument('--label_path', type=str, default=r'dataset/label.txt', help='label path')
-    parser.add_argument('--task', type=str, choices=['train', 'val', 'test', 'fps'], default='val', help='train, val, test, fps')
+    parser.add_argument('--device', type=str, default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--task', type=str, choices=['train', 'val', 'test', 'fps'], default='test', help='train, val, test, fps')
     parser.add_argument('--workers', type=int, default=4, help='dataloader workers')
     parser.add_argument('--batch_size', type=int, default=64, help='batch size')
-    parser.add_argument('--save_path', type=str, default=r'runs/mobilenetv2_ST', help='save path for model and log')
+    parser.add_argument('--save_path', type=str, default=r'runs/exp', help='save path for model and log')
     parser.add_argument('--test_tta', action="store_true", help='using TTA Tricks')
     parser.add_argument('--visual', action="store_true", help='visual dataset identification')
     parser.add_argument('--tsne', action="store_true", help='visual tsne')
     parser.add_argument('--half', action="store_true", help='use FP16 half-precision inference')
+    parser.add_argument('--model_type', type=str, choices=['torch', 'torchscript', 'onnx', 'tensorrt'], default='torch', help='model type(default: torch)')
 
     opt = parser.parse_known_args()[0]
 
+    DEVICE = select_device(opt.device, opt.batch_size)
+    if opt.half and DEVICE.type == 'cpu':
+        raise Exception('half inference only supported GPU.')
     if not os.path.exists(os.path.join(opt.save_path, 'best.pt')):
         raise Exception('best.pt not found. please check your --save_path folder')
     ckpt = torch.load(os.path.join(opt.save_path, 'best.pt'))
-    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    model = (ckpt['model'] if opt.half else ckpt['model'].float())
-    model.to(DEVICE)
-    model.eval()
     train_opt = ckpt['opt']
     set_seed(train_opt.random_seed)
+    model = Model_Inference(DEVICE, opt)
 
     print('found checkpoint from {}, model type:{}\n{}'.format(opt.save_path, ckpt['model'].name, dict_to_PrettyTable(ckpt['best_metrice'], 'Best Metrice')))
 
@@ -48,7 +50,7 @@ def parse_opt():
 
     if opt.task == 'fps':
         inputs = torch.rand((opt.batch_size, train_opt.image_channel, train_opt.image_size, train_opt.image_size)).to(DEVICE)
-        if opt.half:
+        if opt.half and torch.cuda.is_available():
             inputs = inputs.half()
         warm_up, test_time = 100, 300
         fps_arr = []
@@ -83,7 +85,6 @@ def parse_opt():
 if __name__ == '__main__':
     opt, model, test_dataset, DEVICE, CLASS_NUM, label, save_path = parse_opt()
     y_true, y_pred, y_score, y_feature, img_path = [], [], [], [], []
-    model.eval()
     with torch.no_grad():
         for x, y, path in tqdm.tqdm(test_dataset, desc='Test Stage'):
             x = (x.half().to(DEVICE) if opt.half else x.to(DEVICE))
@@ -100,7 +101,11 @@ def parse_opt():
 
                 if opt.tsne:
                     pred_feature = model.forward_features(x)
-            pred = torch.softmax(pred, 1)
+            try:
+                pred = torch.softmax(pred, 1)
+            except:
+                pred = torch.softmax(torch.from_numpy(pred), 1) # using torch.softmax will faster than numpy
+
             y_true.extend(list(y.cpu().detach().numpy()))
             y_pred.extend(list(pred.argmax(-1).cpu().detach().numpy()))
             y_score.extend(list(pred.max(-1)[0].cpu().detach().numpy()))
diff --git a/model/__pycache__/__init__.cpython-38.pyc b/model/__pycache__/__init__.cpython-38.pyc
index abd0181..242e622 100644
Binary files a/model/__pycache__/__init__.cpython-38.pyc and b/model/__pycache__/__init__.cpython-38.pyc differ
diff --git a/model/__pycache__/convnext.cpython-38.pyc b/model/__pycache__/convnext.cpython-38.pyc
index df5cc41..ffc943e 100644
Binary files a/model/__pycache__/convnext.cpython-38.pyc and b/model/__pycache__/convnext.cpython-38.pyc differ
diff --git a/model/__pycache__/cspnet.cpython-38.pyc b/model/__pycache__/cspnet.cpython-38.pyc
index 734cc88..88d3bcc 100644
Binary files a/model/__pycache__/cspnet.cpython-38.pyc and b/model/__pycache__/cspnet.cpython-38.pyc differ
diff --git a/model/__pycache__/densenet.cpython-38.pyc b/model/__pycache__/densenet.cpython-38.pyc
index 908a023..ee1bd52 100644
Binary files a/model/__pycache__/densenet.cpython-38.pyc and b/model/__pycache__/densenet.cpython-38.pyc differ
diff --git a/model/__pycache__/dpn.cpython-38.pyc b/model/__pycache__/dpn.cpython-38.pyc
index d8ad239..eb068ec 100644
Binary files a/model/__pycache__/dpn.cpython-38.pyc and b/model/__pycache__/dpn.cpython-38.pyc differ
diff --git a/model/__pycache__/efficientnetv2.cpython-38.pyc b/model/__pycache__/efficientnetv2.cpython-38.pyc
index 90a15f1..eabe70a 100644
Binary files a/model/__pycache__/efficientnetv2.cpython-38.pyc and b/model/__pycache__/efficientnetv2.cpython-38.pyc differ
diff --git a/model/__pycache__/ghostnet.cpython-38.pyc b/model/__pycache__/ghostnet.cpython-38.pyc
index f6a888e..9d5452d 100644
Binary files a/model/__pycache__/ghostnet.cpython-38.pyc and b/model/__pycache__/ghostnet.cpython-38.pyc differ
diff --git a/model/__pycache__/mnasnet.cpython-38.pyc b/model/__pycache__/mnasnet.cpython-38.pyc
index 62bba1b..4919f15 100644
Binary files a/model/__pycache__/mnasnet.cpython-38.pyc and b/model/__pycache__/mnasnet.cpython-38.pyc differ
diff --git a/model/__pycache__/mobilenetv2.cpython-38.pyc b/model/__pycache__/mobilenetv2.cpython-38.pyc
index d01776f..1ae2b42 100644
Binary files a/model/__pycache__/mobilenetv2.cpython-38.pyc and b/model/__pycache__/mobilenetv2.cpython-38.pyc differ
diff --git a/model/__pycache__/mobilenetv3.cpython-38.pyc b/model/__pycache__/mobilenetv3.cpython-38.pyc
index e45f541..8eb4b5e 100644
Binary files a/model/__pycache__/mobilenetv3.cpython-38.pyc and b/model/__pycache__/mobilenetv3.cpython-38.pyc differ
diff --git a/model/__pycache__/repvgg.cpython-38.pyc b/model/__pycache__/repvgg.cpython-38.pyc
index 603bcad..69592bb 100644
Binary files a/model/__pycache__/repvgg.cpython-38.pyc and b/model/__pycache__/repvgg.cpython-38.pyc differ
diff --git a/model/__pycache__/resnest.cpython-38.pyc b/model/__pycache__/resnest.cpython-38.pyc
index 24bc0a1..cc468eb 100644
Binary files a/model/__pycache__/resnest.cpython-38.pyc and b/model/__pycache__/resnest.cpython-38.pyc differ
diff --git a/model/__pycache__/resnet.cpython-38.pyc b/model/__pycache__/resnet.cpython-38.pyc
index 786b87c..20bdaea 100644
Binary files a/model/__pycache__/resnet.cpython-38.pyc and b/model/__pycache__/resnet.cpython-38.pyc differ
diff --git a/model/__pycache__/sequencer.cpython-38.pyc b/model/__pycache__/sequencer.cpython-38.pyc
index b2b713e..7f37a30 100644
Binary files a/model/__pycache__/sequencer.cpython-38.pyc and b/model/__pycache__/sequencer.cpython-38.pyc differ
diff --git a/model/__pycache__/shufflenetv2.cpython-38.pyc b/model/__pycache__/shufflenetv2.cpython-38.pyc
index c9fe966..9033cb9 100644
Binary files a/model/__pycache__/shufflenetv2.cpython-38.pyc and b/model/__pycache__/shufflenetv2.cpython-38.pyc differ
diff --git a/model/__pycache__/vgg.cpython-38.pyc b/model/__pycache__/vgg.cpython-38.pyc
index a9de89a..e40147a 100644
Binary files a/model/__pycache__/vgg.cpython-38.pyc and b/model/__pycache__/vgg.cpython-38.pyc differ
diff --git a/model/__pycache__/vovnet.cpython-38.pyc b/model/__pycache__/vovnet.cpython-38.pyc
index 2093e86..e788fe7 100644
Binary files a/model/__pycache__/vovnet.cpython-38.pyc and b/model/__pycache__/vovnet.cpython-38.pyc differ
diff --git a/model/cspnet.py b/model/cspnet.py
index dcd9709..084ae5f 100644
--- a/model/cspnet.py
+++ b/model/cspnet.py
@@ -847,7 +847,7 @@ def forward(self, x, need_fea=False):
         if need_fea:
             features, features_fc = self.forward_features(x, need_fea=need_fea)
             x = self.forward_head(features_fc)
-            return  features, features_fc, x
+            return features, features_fc, x
         else:
             x = self.forward_features(x)
             x = self.forward_head(x)
diff --git a/predict.py b/predict.py
index f39120a..920c34f 100644
--- a/predict.py
+++ b/predict.py
@@ -7,7 +7,7 @@
 import matplotlib.pyplot as plt
 import numpy as np
 from utils import utils_aug
-from utils.utils import predict_single_image, cam_visual, dict_to_PrettyTable
+from utils.utils import predict_single_image, cam_visual, dict_to_PrettyTable, select_device
 
 def set_seed(seed):
     random.seed(seed)
@@ -24,13 +24,17 @@ def parse_opt():
     parser.add_argument('--cam_visual', action="store_true", help='visual cam')
     parser.add_argument('--cam_type', type=str, choices=['GradCAM', 'HiResCAM', 'ScoreCAM', 'GradCAMPlusPlus', 'AblationCAM', 'XGradCAM', 'EigenCAM', 'FullGrad'], default='FullGrad', help='cam type')
     parser.add_argument('--half', action="store_true", help='use FP16 half-precision inference')
+    parser.add_argument('--device', type=str, default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
 
     opt = parser.parse_known_args()[0]
-
     if not os.path.exists(os.path.join(opt.save_path, 'best.pt')):
         raise Exception('best.pt not found. please check your --save_path folder')
     ckpt = torch.load(os.path.join(opt.save_path, 'best.pt'))
-    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    DEVICE = select_device(opt.device)
+    if opt.half and DEVICE.type == 'cpu':
+        raise Exception('half inference only supported GPU.')
+    if opt.half and opt.cam_visual:
+        raise Exception('cam visual only supported FP32.')
     model = (ckpt['model'] if opt.half else ckpt['model'].float())
     model.to(DEVICE)
     model.eval()
diff --git a/requirements.txt b/requirements.txt
index 53a9071..4b64ad1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,7 @@
+# Pytorch-Classifier requirements
+# Usage: pip install -r requirements.txt
+
+# Base ------------------------------------------------------------------------
 opencv-python
 grad-cam
 timm
@@ -8,4 +12,14 @@ pillow
 thop
 rfconv
 albumentations
-pycm
\ No newline at end of file
+pycm
+
+# Export ----------------------------------------------------------------------
+# onnx # ONNX export
+# onnx-simplifier # ONNX simplifier
+# nvidia-pyindex  # TensorRT export
+# nvidia-tensorrt # TensorRT export
+
+# Export Inference ----------------------------------------------------------------
+# onnxruntime # ONNX CPU Inference
+# onnxruntime-gpu # ONNX GPU Inference
\ No newline at end of file
diff --git a/utils/__pycache__/utils.cpython-38.pyc b/utils/__pycache__/utils.cpython-38.pyc
index d6793a3..96ced2e 100644
Binary files a/utils/__pycache__/utils.cpython-38.pyc and b/utils/__pycache__/utils.cpython-38.pyc differ
diff --git a/utils/__pycache__/utils_aug.cpython-38.pyc b/utils/__pycache__/utils_aug.cpython-38.pyc
index 466a95f..49b4b3f 100644
Binary files a/utils/__pycache__/utils_aug.cpython-38.pyc and b/utils/__pycache__/utils_aug.cpython-38.pyc differ
diff --git a/utils/__pycache__/utils_distill.cpython-38.pyc b/utils/__pycache__/utils_distill.cpython-38.pyc
index e205253..e48df5b 100644
Binary files a/utils/__pycache__/utils_distill.cpython-38.pyc and b/utils/__pycache__/utils_distill.cpython-38.pyc differ
diff --git a/utils/__pycache__/utils_fit.cpython-38.pyc b/utils/__pycache__/utils_fit.cpython-38.pyc
index c00582e..e811dc8 100644
Binary files a/utils/__pycache__/utils_fit.cpython-38.pyc and b/utils/__pycache__/utils_fit.cpython-38.pyc differ
diff --git a/utils/__pycache__/utils_loss.cpython-38.pyc b/utils/__pycache__/utils_loss.cpython-38.pyc
index ffa1b61..4184354 100644
Binary files a/utils/__pycache__/utils_loss.cpython-38.pyc and b/utils/__pycache__/utils_loss.cpython-38.pyc differ
diff --git a/utils/__pycache__/utils_model.cpython-38.pyc b/utils/__pycache__/utils_model.cpython-38.pyc
index 3110170..078909a 100644
Binary files a/utils/__pycache__/utils_model.cpython-38.pyc and b/utils/__pycache__/utils_model.cpython-38.pyc differ
diff --git a/utils/utils.py b/utils/utils.py
index 5ed5ee0..eb0d71b 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -1,5 +1,5 @@
 from sklearn import utils
-import torch, itertools, os, time, thop, json, cv2, math
+import torch, itertools, os, time, thop, json, cv2, math, platform, yaml
 import torch.nn as nn
 import torchvision.transforms as transforms
 import numpy as np
@@ -20,6 +20,7 @@
 from collections import OrderedDict
 from .utils_aug import rand_bbox
 from pycm import ConfusionMatrix
+from collections import namedtuple
 
 cnames = {
 'aliceblue':   '#F0F8FF',
@@ -315,8 +316,9 @@ def show_config(opt):
             else:
                 opt[keys] = opt[keys].replace('\n', '')
 
-        with open(os.path.join(opt['save_path'], 'param.json'), 'w+') as f:
-            f.write(json.dumps(opt, indent=4, separators={':', ','}))
+        with open(os.path.join(opt['save_path'], 'param.yaml'), 'w+') as f:
+            # f.write(json.dumps(opt, indent=4, separators={':', ','}))
+            yaml.dump(opt, f)
 
 def plot_confusion_matrix(cm, classes, save_path, normalize=True, title='Confusion matrix', cmap=plt.cm.Blues, name='test'):
     plt.figure(figsize=(min(len(classes), 30), min(len(classes), 30)))
@@ -636,20 +638,26 @@ def visual_tsne(feature, y_true, path, labels, save_path):
 def predict_single_image(path, model, test_transform, DEVICE, half=False):
     pil_img = Image.open(path)
     tensor_img = test_transform(pil_img).unsqueeze(0).to(DEVICE)
-    tensor_img = (tensor_img.half() if half else tensor_img)
+    tensor_img = (tensor_img.half() if (half and torch.cuda.is_available()) else tensor_img)
     if len(tensor_img.shape) == 5:
         tensor_img = tensor_img.reshape((tensor_img.size(0) * tensor_img.size(1), tensor_img.size(2), tensor_img.size(3), tensor_img.size(4)))
-        pred_result = torch.softmax(model(tensor_img).mean(0), 0)
+        output = model(tensor_img).mean(0)
     else:
-        pred_result = torch.softmax(model(tensor_img)[0], 0)
+        output = model(tensor_img)[0]
+
+    try:
+        pred_result = torch.softmax(output, 0)
+    except:
+        pred_result = torch.softmax(torch.from_numpy(output), 0) # using torch.softmax will faster than numpy
     return int(pred_result.argmax()), pred_result
 
 class cam_visual:
     def __init__(self, model, test_transform, DEVICE, target_layers, opt):
         self.test_transform = test_transform
         self.DEVICE = DEVICE
+        self.opt = opt
 
-        self.cam_model = eval(opt.cam_type)(model=deepcopy(model).float(), target_layers=[target_layers], use_cuda=torch.cuda.is_available())
+        self.cam_model = eval(opt.cam_type)(model=deepcopy(model), target_layers=[target_layers], use_cuda=torch.cuda.is_available())
     
     def __call__(self, path, label):
         pil_img = Image.open(path)
@@ -749,4 +757,117 @@ def update(self, model):
             if v.dtype.is_floating_point:  # true for FP16 and FP32
                 v *= d
                 v += (1 - d) * msd[k].detach()
-        # assert v.dtype == msd[k].dtype == torch.float32, f'{k}: EMA {v.dtype} and model {msd[k].dtype} must be FP32'
\ No newline at end of file
+        # assert v.dtype == msd[k].dtype == torch.float32, f'{k}: EMA {v.dtype} and model {msd[k].dtype} must be FP32'
+
+class Model_Inference:
+    def __init__(self, device, opt):
+        self.opt = opt
+        self.device = device
+
+        if self.opt.model_type == 'torch':
+            ckpt = torch.load(os.path.join(opt.save_path, 'best.pt'))
+            self.model = (ckpt['model'] if opt.half else ckpt['model'].float())
+            self.model.to(self.device)
+            self.model.eval()
+        elif self.opt.model_type == 'onnx':
+            import onnx, onnxruntime
+            providers = ['CUDAExecutionProvider'] if torch.cuda.is_available() else ['CPUExecutionProvider']
+            self.model = onnxruntime.InferenceSession(os.path.join(opt.save_path, 'best.onnx'), providers=providers)
+        elif self.opt.model_type == 'torchscript':
+            self.model = torch.jit.load(os.path.join(opt.save_path, 'best.ts'))
+            self.model = (self.model.half() if opt.half else self.model)
+            self.model.to(self.device)
+            self.model.eval()
+        elif self.opt.model_type == 'tensorrt':
+            import tensorrt as trt
+            if device.type == 'cpu':
+                raise RuntimeError('Tensorrt not support CPU Inference.')
+            Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
+            logger = trt.Logger()
+            with open(os.path.join(opt.save_path, 'best.engine'), 'rb') as f, trt.Runtime(logger) as runtime:
+                model = runtime.deserialize_cuda_engine(f.read())
+            context = model.create_execution_context()
+            bindings = OrderedDict()
+            fp16 = False  # default updated below
+            dynamic = False
+            for index in range(model.num_bindings):
+                name = model.get_binding_name(index)
+                dtype = trt.nptype(model.get_binding_dtype(index))
+                if model.binding_is_input(index):
+                    if -1 in tuple(model.get_binding_shape(index)):  # dynamic
+                        dynamic = True
+                        context.set_binding_shape(index, tuple(model.get_profile_shape(0, index)[2]))
+                    if dtype == np.float16:
+                        fp16 = True
+                shape = tuple(context.get_binding_shape(index))
+                im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
+                bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
+            self.bindings = bindings
+            self.binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
+            self.batch_size = bindings['images'].shape[0]  # if dynamic, this is instead max batch size
+            self.model = model
+            self.fp16 = fp16
+            self.dynamic = dynamic
+            self.context = context
+    
+    def __call__(self, inputs):
+        if self.opt.model_type == 'torch':
+            return self.model(inputs)
+        elif self.opt.model_type == 'onnx':
+            inputs = inputs.cpu().numpy().astype(np.float16 if '16' in self.model.get_inputs()[0].type else np.float32)
+            return self.model.run([self.model.get_outputs()[0].name], {self.model.get_inputs()[0].name: inputs})[0]
+        elif self.opt.model_type == 'torchscript':
+            return self.model(inputs)
+        elif self.opt.model_type == 'tensorrt':
+            if self.fp16:
+                inputs = inputs.half()
+            if self.dynamic and inputs.shape != self.bindings['images'].shape:
+                i_in, i_out = (self.model.get_binding_index(x) for x in ('images', 'output'))
+                self.context.set_binding_shape(i_in, inputs.shape)  # reshape if dynamic
+                self.bindings['images'] = self.bindings['images']._replace(shape=inputs.shape)
+                self.bindings['output'].data.resize_(tuple(self.context.get_binding_shape(i_out)))
+            s = self.bindings['images'].shape
+            assert inputs.shape == s, f"input size {inputs.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
+            self.binding_addrs['images'] = int(inputs.data_ptr())
+            self.context.execute_v2(list(self.binding_addrs.values()))
+            y = self.bindings['output'].data
+            return y
+    
+    def forward_features(self, inputs):
+        try:
+            return self.model.forward_features(inputs)
+        except:
+            raise 'this model is not a torch model.'
+    
+    def cam_layer(self):
+        try:
+            return self.model.cam_layer()
+        except:
+            raise 'this model is not a torch model.'
+
+def select_device(device='', batch_size=0):
+    device = str(device).strip().lower().replace('cuda:', '').replace('none', '')  # to string, 'cuda:0' to '0'
+    cpu = device == 'cpu'
+    if cpu:
+        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
+    elif device:
+        os.environ['CUDA_VISIBLE_DEVICES'] = device
+        assert torch.cuda.is_available() and torch.cuda.device_count() >= len(device.replace(',', '')), \
+            f"Invalid CUDA '--device {device}' requested, use '--device cpu' or pass valid CUDA device(s)"
+
+    print_str = f'Image-Classifier Python-{platform.python_version()} Torch-{torch.__version__} '
+    if not cpu and torch.cuda.is_available():
+        devices = device.split(',') if device else '0'
+        n = len(devices)  # device count
+        if n > 1 and batch_size > 0:  # check batch_size is divisible by device_count
+            assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}'
+        space = ' ' * len(print_str)
+        for i, d in enumerate(devices):
+            p = torch.cuda.get_device_properties(i)
+            print_str += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / (1 << 20):.0f}MiB)\n"
+        arg = 'cuda:0'
+    else:
+        print_str += 'CPU'
+        arg = 'cpu'
+    print(print_str)
+    return torch.device(arg)
diff --git a/v1.2-update_log.md b/v1.2-update_log.md
new file mode 100644
index 0000000..2375652
--- /dev/null
+++ b/v1.2-update_log.md
@@ -0,0 +1,82 @@
+# pytorch-classifier v1.2 更新日志
+
+1. 新增export.py,支持导出(onnx, torchscript, tensorrt)模型.  
+2. metrice.py支持onnx,torchscript,tensorrt的推理.  
+
+        此处在predict.py中暂不支持onnx,torchscript,tensorrt的推理的推理,原因是因为predict.py中的热力图可视化没办法在onnx、torchscript、tensorrt中实现,后续单独推理部分会额外写一部分代码.
+        在metrice.py中,onnx和torchscript和tensorrt的推理也不支持tsne的可视化,那么我在metrice.py中添加onnx,torchscript,tensorrt的推理的目的是为了测试fps和精度.
+        所以简单来说,使用metrice.py最好还是直接用torch模型,torchscript和onnx和tensorrt的推理的推理模型后续会写一个单独的推理代码.
+3. main.py,metrice.py,predict.py,export.py中增加--device参数,可以指定设备.
+4. 优化程序和修复一些bug.
+ 
+---
+#### 训练命令:
+	python main.py --model_name efficientnet_v2_s --config config/config.py --batch_size 128 --Augment AutoAugment --save_path runs/efficientnet_v2_s --device 0 \
+    --pretrained --amp --warmup --ema --imagenet_meanstd
+
+#### GPU 推理速度测试  sh脚本:
+	batch_size=1 # 1 2 4 8 16 32 64
+	python metrice.py --task fps --save_path runs/efficientnet_v2_s --batch_size $batch_size
+	python metrice.py --task fps --save_path runs/efficientnet_v2_s --half --batch_size $batch_size
+	python metrice.py --task fps --save_path runs/efficientnet_v2_s --model_type torchscript --batch_size $batch_size
+	python metrice.py --task fps --save_path runs/efficientnet_v2_s --half --model_type torchscript --batch_size $batch_size
+	python export.py --save_path runs/efficientnet_v2_s --export onnx --simplify --batch_size $batch_size
+	python metrice.py --task fps --save_path runs/efficientnet_v2_s --model_type onnx --batch_size $batch_size
+	python export.py --save_path runs/efficientnet_v2_s --export onnx --simplify --half --batch_size $batch_size
+	python metrice.py --task fps --save_path runs/efficientnet_v2_s --model_type onnx --batch_size $batch_size
+	python export.py --save_path runs/efficientnet_v2_s --export tensorrt --simplify --batch_size $batch_size
+	python metrice.py --task fps --save_path runs/efficientnet_v2_s --model_type tensorrt --batch_size $batch_size
+	python export.py --save_path runs/efficientnet_v2_s --export tensorrt --simplify --half --batch_size $batch_size 
+	python metrice.py --task fps --save_path runs/efficientnet_v2_s --model_type tensorrt --half --batch_size $batch_size
+
+#### CPU 推理速度测试  sh脚本:
+	python export.py --save_path runs/efficientnet_v2_s --export onnx --simplify --dynamic --device cpu
+	batch_size=1
+	python metrice.py --task fps --save_path runs/efficientnet_v2_s --device cpu --batch_size $batch_size
+	python metrice.py --task fps --save_path runs/efficientnet_v2_s --device cpu --model_type torchscript --batch_size $batch_size
+	python metrice.py --task fps --save_path runs/efficientnet_v2_s --device cpu --model_type onnx --batch_size $batch_size
+	batch_size=2
+	python metrice.py --task fps --save_path runs/efficientnet_v2_s --device cpu --batch_size $batch_size
+	python metrice.py --task fps --save_path runs/efficientnet_v2_s --device cpu --model_type torchscript --batch_size $batch_size
+	python metrice.py --task fps --save_path runs/efficientnet_v2_s --device cpu --model_type onnx --batch_size $batch_size
+	batch_size=4
+	python metrice.py --task fps --save_path runs/efficientnet_v2_s --device cpu --batch_size $batch_size
+	python metrice.py --task fps --save_path runs/efficientnet_v2_s --device cpu --model_type torchscript --batch_size $batch_size
+	python metrice.py --task fps --save_path runs/efficientnet_v2_s --device cpu --model_type onnx --batch_size $batch_size
+	batch_size=8
+	python metrice.py --task fps --save_path runs/efficientnet_v2_s --device cpu --batch_size $batch_size
+	python metrice.py --task fps --save_path runs/efficientnet_v2_s --device cpu --model_type torchscript --batch_size $batch_size
+	python metrice.py --task fps --save_path runs/efficientnet_v2_s --device cpu --model_type onnx --batch_size $batch_size
+	batch_size=16
+	python metrice.py --task fps --save_path runs/efficientnet_v2_s --device cpu --batch_size $batch_size
+	python metrice.py --task fps --save_path runs/efficientnet_v2_s --device cpu --model_type torchscript --batch_size $batch_size
+	python metrice.py --task fps --save_path runs/efficientnet_v2_s --device cpu --model_type onnx --batch_size $batch_size
+
+### 各导出模型在cpu和gpu上的fps实验:
+
+实验环境:
+
+| System | CPU | GPU | RAM | Model |
+| :----: | :----: | :----: | :----: | :----: |
+| Ubuntu20.04 | i7-12700KF | RTX-3090 | 32G DDR5 6400 | efficientnet_v2_s |
+
+
+#### GPU
+| model | Torch FP32 FPS | Torch FP16 FPS | TorchScript FP32 FPS| TorchScript FP16 FPS | ONNX FP32 FPS | ONNX FP16 FPS | TensorRT FP32 FPS | TensorRT FP16 FPS |
+| :----: | :----: | :----: | :----: | :----: | :----: | :----: | :----: | :----: |
+| batch-size 1 | 93.77 | 105.65 | 233.21 | 260.07 | 177.41 | 308.52 | 311.60 | 789.19 |
+| batch-size 2 | 94.32 | 108.35 | 208.53 | 253.83 | 166.23 | 258.98 | 275.93 | 713.71 |
+| batch-size 4 | 95.98 | 108.31 | 171.99 | 255.05 | 130.43 | 190.03 | 212.75 | 573.88 |
+| batch-size 8 | 94.03 | 85.76 | 118.79 | 210.58 | 87.65 | 122.31 | 147.36 | 416.71 |
+| batch-size 16 | 61.93 | 76.25 | 75.45 | 125.05 | 50.33 | 69.01 | 87.25 | 260.94 |
+| batch-size 32 | 34.56 | 58.11 | 41.93 | 72.29 | 26.91 | 34.46 | 48.54 | 151.35 |
+| batch-size 64 | 18.64 | 31.57 | 23.15 | 38.90 | 12.67 | 15.90 | 26.19 | 85.47 |
+
+#### CPU
+| model | Torch FP32 FPS | Torch FP16 FPS | TorchScript FP32 FPS| TorchScript FP16 FPS | ONNX FP32 FPS | ONNX FP16 FPS | TensorRT FP32 FPS | TensorRT FP16 FPS |
+| :----: | :----: | :----: | :----: | :----: | :----: | :----: | :----: | :----: |
+| batch-size 1 | 27.91 | Not Support | 46.10 | Not Support | 79.27 | Not Support | Not Support | Not Support |
+| batch-size 1 | 25.26 | Not Support | 24.98 | Not Support | 45.62 | Not Support | Not Support | Not Support |
+| batch-size 4 | 14.02 | Not Support | 13.84 | Not Support | 23.90 | Not Support | Not Support | Not Support |
+| batch-size 8 | 7.53 | Not Support | 7.35 | Not Support | 12.01 | Not Support | Not Support | Not Support |
+| batch-size 16 | 3.07 | Not Support | 3.64 | Not Support | 5.72 | Not Support | Not Support | Not Support |
\ No newline at end of file