detect_plate/torch2trt/speed.py

from models.experimental import attempt_load
from torch2trt.trt_model import TrtModel
import argparse
import torch
import time
from tqdm import tqdm


def run(model,img,warmup_iter,iter):
    
    
    print('start warm up...')
    for _ in tqdm(range(warmup_iter)):
        model(img) 
    
   
    print('start calculate...')
    torch.cuda.synchronize()
    start = time.time()
    for __ in tqdm(range(iter)):
        model(img) 
        torch.cuda.synchronize()
    end = time.time()
    return ((end - start) * 1000)/float(iter)
    
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--torch_path', type=str,required=True, help='torch weights path')  
    parser.add_argument('--trt_path', type=str,required=True, help='tensorrt weights path')

    parser.add_argument('--device', type=int,default=0, help='cuda device')
    parser.add_argument('--img_shape', type=list,default=[1,3,640,640], help='tensorrt weights path')
    parser.add_argument('--warmup_iter', type=int, default=100,help='warm up iter')  
    parser.add_argument('--iter', type=int, default=300,help='average elapsed time of iterations')  
    opt = parser.parse_args()


    # -----------------------torch-----------------------------------------
    img = torch.zeros(opt.img_shape)
    model = attempt_load(opt.torch_path, map_location=torch.device('cpu'))  # load FP32 model
    model.eval()
    total_time=run(model.to(opt.device),img.to(opt.device),opt.warmup_iter,opt.iter)
    print('Pytorch is  %.2f ms/img'%total_time)

    # -----------------------tensorrt-----------------------------------------
    model=TrtModel(opt.trt_path)
    total_time=run(model,img.numpy(),opt.warmup_iter,opt.iter)
    model.destroy()
    print('TensorRT is  %.2f ms/img'%total_time)
添加注释 2024-08-07 09:32:38 +08:00			`from models.experimental import attempt_load`
			`from torch2trt.trt_model import TrtModel`
			`import argparse`
			`import torch`
			`import time`
			`from tqdm import tqdm`


			`def run(model,img,warmup_iter,iter):`


			`print('start warm up...')`
			`for _ in tqdm(range(warmup_iter)):`
			`model(img)`


			`print('start calculate...')`
			`torch.cuda.synchronize()`
			`start = time.time()`
			`for __ in tqdm(range(iter)):`
			`model(img)`
			`torch.cuda.synchronize()`
			`end = time.time()`
			`return ((end - start) * 1000)/float(iter)`

			`if __name__ == '__main__':`
			`parser = argparse.ArgumentParser()`
			`parser.add_argument('--torch_path', type=str,required=True, help='torch weights path')`
			`parser.add_argument('--trt_path', type=str,required=True, help='tensorrt weights path')`

			`parser.add_argument('--device', type=int,default=0, help='cuda device')`
			`parser.add_argument('--img_shape', type=list,default=[1,3,640,640], help='tensorrt weights path')`
			`parser.add_argument('--warmup_iter', type=int, default=100,help='warm up iter')`
			`parser.add_argument('--iter', type=int, default=300,help='average elapsed time of iterations')`
			`opt = parser.parse_args()`


			`# -----------------------torch-----------------------------------------`
			`img = torch.zeros(opt.img_shape)`
			`model = attempt_load(opt.torch_path, map_location=torch.device('cpu')) # load FP32 model`
			`model.eval()`
			`total_time=run(model.to(opt.device),img.to(opt.device),opt.warmup_iter,opt.iter)`
			`print('Pytorch is %.2f ms/img'%total_time)`

			`# -----------------------tensorrt-----------------------------------------`
			`model=TrtModel(opt.trt_path)`
			`total_time=run(model,img.numpy(),opt.warmup_iter,opt.iter)`
			`model.destroy()`
			`print('TensorRT is %.2f ms/img'%total_time)`