#encoding=gbk
import tensorrt as trt
import numpy as np
import os
import cv2
import pycuda.driver as cuda
import pycuda.autoinit
from imutils import paths
from tqdm import tqdm
class HostDeviceMem(object):
def __init__(self, host_mem, device_mem):
self.host = host_mem
self.device = device_mem
def __str__(self):
return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
def __repr__(self):
return self.__str__()
class TrtModel:
def __init__(self, engine_path, max_batch_size=1, dtype=np.float32):
self.engine_path = engine_path
self.dtype = dtype
self.logger = trt.Logger(trt.Logger.WARNING)
self.runtime = trt.Runtime(self.logger)
self.engine = self.load_engine(self.runtime, self.engine_path)
self.max_batch_size = max_batch_size
self.inputs, self.outputs, self.bindings, self.stream = self.allocate_buffers()
self.context = self.engine.create_execution_context()
@staticmethod
def load_engine(trt_runtime, engine_path):
trt.init_libnvinfer_plugins(None, "")
with open(engine_path, 'rb') as f:
engine_data = f.read()
engine = trt_runtime.deserialize_cuda_engine(engine_data)
return engine
def allocate_buffers(self):
inputs = []
outputs = []
bindings = []
stream = cuda.Stream()
for binding in self.engine:
# size = trt.volume(self.engine.get_binding_shape(binding)) * self.max_batch_size
#*******
ssize = self.engine.get_binding_shape(binding)
ssize[0]=self.max_batch_size
size=trt.volume(ssize)
#*******
host_mem = cuda.pagelocked_empty(size, self.dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
bindings.append(int(device_mem))
if self.engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
return inputs, outputs, bindings, stream
def __call__(self, x: np.ndarray, batch_size=2):
x = x.astype(self.dtype)
np.copyto(self.inputs[0].host, x.ravel())
for inp in self.inputs:
cuda.memcpy_htod_async(inp.device, inp.host, self.stream)
#**********
origin_inputshape=self.engine.get_binding_shape(0)
origin_inputshape[0]=batch_size
self.context.set_binding_shape(0,(origin_inputshape))
#**********
self.context.execute_async(batch_size=batch_size, bindings=self.bindings, stream_handle=self.stream.handle)
for out in self.outputs:
cuda.memcpy_dtoh_async(out.host, out.device, self.stream)
self.stream.synchronize()
return [out.host.reshape(batch_size, -1) for out in self.outputs]
if __name__ == "__main__":
# 验证模式:fp32,fp16,int8
val_type='fp16'
#---------------------------------
path=r'./imgs/'
trt_engine_path = r'./model/{}.engine'.format(val_type)
out_path=r'./out/{}'.format(val_type)
if not os.path.exists(out_path):
os.makedirs(out_path)
#均值和方差
mean = (120, 114, 104)
std = (70, 69, 73)
#构建模型
model = TrtModel(trt_engine_path)
pic_paths = list(paths.list_images(path))
for pic_path in tqdm(pic_paths):
name=os.path.basename(pic_path).split('.')[0]
# 输入图像预处理
img = cv2.imread(pic_path)
imgbak = img.copy()
img = img[:, :, ::-1]
img = np.array(img).astype(np.float32) # 注意输入type一定要np.float32
img -= mean # 减均值
img /= std # 除方差
img = np.array([np.transpose(img, (2, 0, 1))])
#模型推理
result = model(img, 1)
# 保存图像
img_out=np.reshape(result[0][0],(512,512))
img_out =img_out.astype('uint8')
# img_out=img_out*25
img_out[img_out>0]=255
cv2.imwrite(os.path.join(out_path,'{}_{}.png'.format(val_type,name)),img_out)
TensorRT Python验证代码---分割类
最后编辑于 :
©著作权归作者所有,转载或内容合作请联系作者
- 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
- 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
- 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
推荐阅读更多精彩内容
- 实现功能: python实现KNN建模,选择最佳K值,对数据样本进行分类预测,并验证评估。 实现代码: # 导入需...
- 目前网上关于滑块的缺口识别的方法很多,但是都不极简,看起来繁杂,各种算法的都有,有遍历的有二分法的,今天写个最简单...
- 现在验证码的种类真的是越来越多,短信验证码、语音验证码、图片验证码、滑块验证码 ... 我们在 PC 的网页端或者...
- 现在验证码的种类真的是越来越多,短信验证码、语音验证码、图片验证码、滑块验证码 ... 我们在 PC 的网页端或者...