YOU ONLY LOOK ONCE
数据集准备
使用labelimg工具标记数据(voc格式)
把标记好的xml文件转成txt,转化脚本如下(python2.7)
1, 获取所有图像名
```
import os
dirlist = os.listdir("/home/room/mxj_workspace/data/voc_clock/train_img/")
fp = open("ImageID.txt","w")
for name in dirlist:
fp.write(name)
fp.write("\n")
fp.close()
```
2,提取xml中的坐标和label转化为txt,注意修改label名字和路径,新建好对应的ImageID文件夹,最后把生成的txt拷贝到train_img.
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
#sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
#classes = ["black_watch","box_watch","white_watch","light_watch","square_watch","sign","IO_State"]
classes = ["s_box"]
def convert(size, box):
dw = 1./(size[0])
dh = 1./(size[1])
x = (box[0] + box[1])/2.0 - 1
y = (box[2] + box[3])/2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
def convert_annotation (image_id):
in_file = open('/home/mahxn0/darknet/box/train_xml/%s.xml'%(image_id))
out_file = open('/home/mahxn0/darknet/box/ImageID/%s.txt'%(image_id), 'w')
tree=ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
cls = obj.find('name').text
if cls not in classes :
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
bb = convert((w,h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
#wd = getcwd()
#for year, image_set in sets:
if not os.path.exists('/home/mahxn0/darknet/box/img_file'):
os.makedirs('/home/mahxn0/darknet/box/img_file/')
image_ids = open('/home/mahxn0/darknet/box/ImageID.txt').read().strip().split()
listtr_file = open('/home/mahxn0/darknet/box/train.list', 'w')
listte_file = open('/home/mahxn0/darknet/box/val.list', 'w')
i = 0
for image_id in image_ids:
i = i+1
if(i%10 == 0):
listte_file.write('/home/mahxn0/darknet/box/train_img/%s.jpg\n'%( image_id.split('.')[0]))
else:
listtr_file.write('/home/mahxn0/darknet/box/train_img/%s.jpg\n'%( image_id.split('.')[0]))
convert_annotation(image_id.split('.')[0])
listte_file.close()
listtr_file.close()
#os.system("cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt > train.txt")
#os.system("cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt")
## yolov3
- 修改cfg文件 关键:3*(classes+5)
- 找到cfg文件的三处classes位置,classes改成你的检测类别数,上一层filter修改为:3*(classes+5)
- 修改cfg/coco.data的类别数为你自己检测的类别数目,train.list和val.list改为你上面用label.py生成的,coco.names里面修改为你自己的label名字,backup是模型保存的位置
训练
./darknet detector train cfg/coco.data cfg/yolov3.cfg darknet53.conv.74 -gpus 0,1,2,3
Region 23 Avg IOU: 0.331109, Class: 0.552714, Obj: 0.017880, No Obj: 0.021078, .5R: 0.129032, .75R: 0.000000, count: 62
219: 5.798628, 26.150927 avg loss, 0.000007 rate, 1.180564 seconds, 42048 images
Loaded: 12.885740 seconds
Region 16 Avg IOU: 0.210043, Class: 0.500716, Obj: 0.037469, No Obj: 0.031145, .5R: 0.000000, .75R: 0.000000, count: 3
Region 16 Avg IOU: 0.302149, Class: 0.318319, Obj: 0.086097, No Obj: 0.030979, .5R: 0.000000, .75R: 0.000000, count: 1
Region 16 Avg IOU: 0.203812, Class: 0.335673, Obj: 0.063994, No Obj: 0.031331, .5R: 0.000000, .75R: 0.000000, count: 1
Region 23 Avg IOU: 0.312156, Class: 0.556277, Obj: 0.012325, No Obj: 0.019171, .5R: 0.120000, .75R: 0.000000, count: 50
Region 23 Avg IOU: 0.373455, Class: 0.508114, Obj: 0.015595, No Obj: 0.019038, .5R: 0.203390, .75R: 0.000000, count: 59
Region 23 Avg IOU: 0.344760, Class: 0.490172, Obj: 0.013907, No Obj: 0.019223, .5R: 0.187500, .75R: 0.000000, count: 48
Region 16 Avg IOU: 0.454259, Class: 0.426787, Obj: 0.027839, No Obj: 0.031548, .5R: 0.000000, .75R: 0.000000, count: 1
Region 16 Avg IOU: 0.366378, Class: 0.445379, Obj: 0.043471, No Obj: 0.030944, .5R: 0.000000, .75R: 0.000000, count: 2
Region 16 Avg IOU: -nan, Class: -nan, Obj: -nan, No Obj: 0.030927, .5R: -nan, .75R: -nan, count: 0
Region 23 Avg IOU: 0.362018, Class: 0.513913, Obj: 0.014860, No Obj: 0.019196, .5R: 0.224138, .75R: 0.000000, count: 58
Region 23 Avg IOU: 0.278272, Class: 0.531918, Obj: 0.013913, No Obj: 0.019277, .5R: 0.065217, .75R: 0.000000, count: 46
Region 23 Avg IOU: 0.322512, Class: 0.549836, Obj: 0.016681, No Obj: 0.019718, .5R: 0.102564, .75R: 0.000000, count: 39
tiny-yolov3
获取预训练模型
- 从官方地址下载yolov3-tiny.weights
- 下载 理论上并没有说提取多少层的特征合适,这里我们提取前15层当作与训练模型
./darknet partial cfg/yolov3-tiny.cfg yolov3-tiny.weights yolov3-tiny.conv.15 15
---
layer filters size input output
0 conv 16 3 x 3 / 1 416 x 416 x 3 -> 416 x 416 x 16 0.150 BF
1 max 2 x 2 / 2 416 x 416 x 16 -> 208 x 208 x 16 0.003 BF
2 conv 32 3 x 3 / 1 208 x 208 x 16 -> 208 x 208 x 32 0.399 BF
3 max 2 x 2 / 2 208 x 208 x 32 -> 104 x 104 x 32 0.001 BF
4 conv 64 3 x 3 / 1 104 x 104 x 32 -> 104 x 104 x 64 0.399 BF
5 max 2 x 2 / 2 104 x 104 x 64 -> 52 x 52 x 64 0.001 BF
6 conv 128 3 x 3 / 1 52 x 52 x 64 -> 52 x 52 x 128 0.399 BF
7 max 2 x 2 / 2 52 x 52 x 128 -> 26 x 26 x 128 0.000 BF
8 conv 256 3 x 3 / 1 26 x 26 x 128 -> 26 x 26 x 256 0.399 BF
9 max 2 x 2 / 2 26 x 26 x 256 -> 13 x 13 x 256 0.000 BF
10 conv 512 3 x 3 / 1 13 x 13 x 256 -> 13 x 13 x 512 0.399 BF
11 max 2 x 2 / 1 13 x 13 x 512 -> 13 x 13 x 512 0.000 BF
12 conv 1024 3 x 3 / 1 13 x 13 x 512 -> 13 x 13 x1024 1.595 BF
13 conv 256 1 x 1 / 1 13 x 13 x1024 -> 13 x 13 x 256 0.089 BF
14 conv 512 3 x 3 / 1 13 x 13 x 256 -> 13 x 13 x 512 0.399 BF
15 conv 255 1 x 1 / 1 13 x 13 x 512 -> 13 x 13 x 255 0.044 BF
16 yolo
17 route 13
18 conv 128 1 x 1 / 1 13 x 13 x 256 -> 13 x 13 x 128 0.011 BF
19 upsample 2x 13 x 13 x 128 -> 26 x 26 x 128
20 route 19 8
21 conv 256 3 x 3 / 1 26 x 26 x 384 -> 26 x 26 x 256 1.196 BF
22 conv 255 1 x 1 / 1 26 x 26 x 256 -> 26 x 26 x 255 0.088 BF
23 yolo
Total BFLOPS 5.571
Loading weights from backup/yolov3-tiny.weights...
seen 64
Done!
Saving weights to yolov3-tiny.conv.15
训练:
./darknet detector train cfg/coco.data cfg/yolov3-tiny.cfg yolov3-tiny.conv.15 15
问题汇总:
1> 多个模型报错out of memory,把cfg中的batch,sub设置为1
2>海康相机的解码:
"rtspsrc location=rtsp://admin:123qweasd@192.168.0.222:554/h264/ch1/main/av_stream latency=200 ! rtph264depay ! h264parse ! omxh264dec ! videoconvert ! appsink sync=false"
3>刚开始出现nan正常现象,如果全部是nan,是xml生成的txt错误或者label名字错误,查看coco.names,生成的txt确定文件都正确
4>2000张样本迭代2000-5000次最佳,10000样本迭代20000次左右(主要看学习率的下降和数据复杂度)
测试:
-c index 按照索引打开摄像头
-out_filename *.avi 保存结果到视频文件
-thresh 设置检测置信度
-ext_output < /media/mahxn0/DATA/tool/state3.list > result1.txt
批量测试图片准确度并且显示
python v2调用(已封装成python库)
- 注意:get_network_boxs函数最后的c_int是调节框的准确度的
- 模型的batchsize和subvision测试的时候改成1 ,否则检测结果会不准确,目前正在查找原因
- free image必须打开释放内存
#-*- coding=utf-8 -*-
from ctypes import *
import math
import random
import time
import cv2
import numpy as np
import re
import os
import sys
def sample(probs):
s = sum(probs)
probs = [a/s for a in probs]
r = random.uniform(0, 1)
for i in range(len(probs)):
r = r - probs[i]
if r <= 0:
return i
return len(probs)-1
# def c_array(ctype, values):
# arr = (ctype*len(values))()
# arr[:] = values
# return arr
def c_array(ctype, values):
return (ctype * len(values))(*values)
class BOX(Structure):
_fields_ = [("x", c_float),
("y", c_float),
("w", c_float),
("h", c_float)]
class DETECTION(Structure):
_fields_ = [("bbox", BOX),
("classes", c_int),
("prob", POINTER(c_float)),
("mask", POINTER(c_float)),
("objectness", c_float),
("sort_class", c_int)]
class IMAGE(Structure):
_fields_ = [("w", c_int),
("h", c_int),
("c", c_int),
("data", POINTER(c_float))]
class METADATA(Structure):
_fields_ = [("classes", c_int),
("names", POINTER(c_char_p))]
#lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL)
lib = CDLL("/home/mahxn0/darknet/darknet.so", RTLD_GLOBAL)
lib.network_width.argtypes = [c_void_p]
lib.network_width.restype = c_int
lib.network_height.argtypes = [c_void_p]
lib.network_height.restype = c_int
predict = lib.network_predict
predict.argtypes = [c_void_p, POINTER(c_float)]
predict.restype = POINTER(c_float)
set_gpu = lib.cuda_set_device
set_gpu.argtypes = [c_int]
make_image = lib.make_image
make_image.argtypes = [c_int, c_int, c_int]
make_image.restype = IMAGE
get_network_boxes = lib.get_network_boxes
get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int),c_int]
get_network_boxes.restype = POINTER(DETECTION)
make_network_boxes = lib.make_network_boxes
make_network_boxes.argtypes = [c_void_p]
make_network_boxes.restype = POINTER(DETECTION)
free_detections = lib.free_detections
free_detections.argtypes = [POINTER(DETECTION), c_int]
free_ptrs = lib.free_ptrs
free_ptrs.argtypes = [POINTER(c_void_p), c_int]
network_predict = lib.network_predict
network_predict.argtypes = [c_void_p, POINTER(c_float)]
reset_rnn = lib.reset_rnn
reset_rnn.argtypes = [c_void_p]
load_net = lib.load_network
load_net.argtypes = [c_char_p, c_char_p, c_int]
load_net.restype = c_void_p
do_nms_obj = lib.do_nms_obj
do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
do_nms_sort = lib.do_nms_sort
do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
free_image = lib.free_image
free_image.argtypes = [IMAGE]
letterbox_image = lib.letterbox_image
letterbox_image.argtypes = [IMAGE, c_int, c_int]
letterbox_image.restype = IMAGE
load_meta = lib.get_metadata
lib.get_metadata.argtypes = [c_char_p]
lib.get_metadata.restype = METADATA
load_image = lib.load_image_color
load_image.argtypes = [c_char_p, c_int, c_int]
load_image.restype = IMAGE
rgbgr_image = lib.rgbgr_image
rgbgr_image.argtypes = [IMAGE]
predict_image = lib.network_predict_image
predict_image.argtypes = [c_void_p, IMAGE]
predict_image.restype = POINTER(c_float)
ndarray_image = lib.ndarray_to_image
ndarray_image.argtypes = [POINTER(c_ubyte), POINTER(c_long), POINTER(c_long)]
ndarray_image.restype = IMAGE
#net = load_net("/media/mahxn0/DATA/workspace/src/detectAndRecog/src/yolo_surface/data/robot/robot.cfg", "/media/mahxn0/DATA/workspace/src/detectAndRecog/src/yolo_surface/data/robot/robot_final.weights", 0)
#meta = load_meta("/media/mahxn0/DATA/workspace/src/detectAndRecog/src/yolo_surface/data/robot/robot.data")
net = load_net("/home/mahxn0/darknet/yolo_box/box.cfg", "/home/mahxn0/darknet/yolo_box/backup/box_final.weights", 0)
meta = load_meta("/home/mahxn0/darknet/yolo_box/box.data")
#net = load_net("/home/mahxn0/ROS_workspace/darknet/cfg/yolov3-tiny.cfg", "/home/mahxn0/ROS_workspace/darknet/yolov3-tiny.weights", 0)
#meta = load_meta("/home/mahxn0/ROS_workspace/darknet/cfg/coco.data")
#video =cv2.VideoCapture(0)
class yolo_helmet(object):
def __init__(self):
pass
def detect_pic(self, image, thresh=0.3, hier_thresh=.5, nms=.45):
im = self.nparray_to_image(image)
num = c_int(0)
pnum = pointer(num)
predict_image(net, im)
dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum)
num = pnum[0]
if (nms): do_nms_obj(dets, num, meta.classes, nms)
res = []
for j in range(num):
for i in range(meta.classes):
if dets[j].prob[i] > 0:
b = dets[j].bbox
left=(b.x-b.w/2)
right=(b.x+b.w/2)
top=(b.y-b.h/2)
bot=(b.y+b.h/2)
if left < 0:
left = 0
if right > im.w-1:
right = im.w-1
if top < 0:
top = 0
if bot > im.h-1:
bot = im.h-1
res.append((meta.names[i], dets[j].prob[i],left,top,right,bot))
res = sorted(res, key=lambda x: -x[1])
free_image(im) #not sure if this will cause a memory leak.
free_detections(dets, num)
return res
def detect(self, image, thresh=.3, hier_thresh=.5, nms=.45):
t0=time.time()
#rgbgr_image(im)
im = self.nparray_to_image(image)
t1=time.time()
num = c_int(0)
pnum = pointer(num)
predict_image(net, im)
dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum,0)
num = pnum[0]
if (nms): do_nms_obj(dets, num, meta.classes, nms)
res = []
for j in range(num):
for i in range(meta.classes):
if dets[j].prob[i] > 0:
b = dets[j].bbox
left=b.x-b.w/2
right=b.x+b.w/2
top=b.y-b.h/2
bot=b.y+b.h/2
if left < 0:
left = 0
if right > im.w-1:
right = im.w-1
if top < 0:
top = 0
if bot > im.h-1:
bot = im.h-1
res.append((meta.names[i], dets[j].prob[i],left,top,right,bot))
res = sorted(res, key=lambda x: -x[1])
free_image(im) #not sure if this will cause a memory leak.
free_detections(dets, num)
t2=time.time()
print("detect take %d s",t2-t0);
print("array_to_image take %d s",t1-t0)
return res
def array_to_image(self,arr):
arr = arr.transpose(2,0,1)
c = arr.shape[0]
h = arr.shape[1]
w = arr.shape[2]
arr = (arr/255.0).flatten()
data = c_array(c_float, arr)
im = IMAGE(w,h,c,data)
return im
def nparray_to_image(self,img):
data = img.ctypes.data_as(POINTER(c_ubyte))
image = ndarray_image(data, img.ctypes.shape, img.ctypes.strides)
return image
def getXY(self,i):
return int(i)
if __name__ == "__main__":
picDir = '/home/mahxn0/Downloads/20181226/'
print("path:",picDir)
filenames = os.listdir(picDir)
i=0
r=yolo_helmet()
font=cv2.FONT_HERSHEY_SCRIPT_COMPLEX
for name in filenames:
filename = os.path.join(picDir,name)
print(filename)
image=cv2.imread(filename)
out=r.detect(image)
print(out)
for res in out:
x1=r.getXY(res[2])
y1=r.getXY(res[3])
x2=r.getXY(res[4])
y2=r.getXY(res[5])
if x1>300 and y1 > 300:
frame_rect=image[(y1-80):(y2+80),(x1-80):(x2+80)]
cv2.imwrite('/home/mahxn0/darknet/image/box_rect3/'+str(i)+'.jpg',frame_rect)
i+=1
# f_img=None
# cap=cv2.VideoCapture()
# cap.open("/media/mahxn0/Mahxn0/M_DataSets/jinan_data/Video/2018-07-07/192.168.0.222_01_20180707150311306.mp4")
# # cap.set(3,1280)
# # cap.set(4,720)
# # cap.set(5,60)
# # cap.set(10,-4)
# # cap.set(11,40)
# # cap.set(12,20)
# # cap.set(15,-2)
# #cap.open('rtsp://admin:123qweasd@192.168.0.222:554/h264/ch1/main/av_stream')
# cv2.namedWindow('YOLOV3')
# r = yolo_helmet()
# result = None
# fileindex=0
# font=cv2.FONT_HERSHEY_SCRIPT_COMPLEX
# #font = cv2.CAP_PVAPI_PIXELFORMAT_MONO8 # 使用默认字体
# while(cap.isOpened()):
# rect,frame=cap.read()
# frame_res=frame
# if True:
# out = r.detect(frame)
# # cv2.imshow("YOLOV3",frame)
# print(out)
# for res in out:
# x1=r.getXY(res[2])
# y1=r.getXY(res[3])
# x2=r.getXY(res[4])
# y2=r.getXY(res[5])
# frame_res=cv2.rectangle(frame, (x1,y1), (x2,y2), (87,255,123),4)
# cv2.putText(frame_res, res[0] + ' ' + str(res[1]), (x1,y1), font,1,(0,0,255),2)
# frame_rect=frame[x2:y2,x1:y1]
# cv2.imwrite("test.jpg",frame_rect)
# if frame_res is None:
# print("frame_res is empty")
# else:
# cv2.imshow("YOLOV3",frame)
# cv2.waitKey(1)
pythonv3调用:
from ctypes import *
import math
import random
import cv2
import time
import numpy as np
def sample(probs):
s = sum(probs)
probs = [a/s for a in probs]
r = random.uniform(0, 1)
for i in range(len(probs)):
r = r - probs[i]
if r <= 0:
return i
return len(probs)-1
def c_array(ctype, values):
arr = (ctype*len(values))()
arr[:] = values
return arr
class BOX(Structure):
_fields_ = [("x", c_float),
("y", c_float),
("w", c_float),
("h", c_float)]
class DETECTION(Structure):
_fields_ = [("bbox", BOX),
("classes", c_int),
("prob", POINTER(c_float)),
("mask", POINTER(c_float)),
("objectness", c_float),
("sort_class", c_int)]
class IMAGE(Structure):
_fields_ = [("w", c_int),
("h", c_int),
("c", c_int),
("data", POINTER(c_float))]
class METADATA(Structure):
_fields_ = [("classes", c_int),
("names", POINTER(c_char_p))]
#lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL)
lib = CDLL("/home/mahxn0/darknet/darknet.so", RTLD_GLOBAL)
lib.network_width.argtypes = [c_void_p]
lib.network_width.restype = c_int
lib.network_height.argtypes = [c_void_p]
lib.network_height.restype = c_int
predict = lib.network_predict
predict.argtypes = [c_void_p, POINTER(c_float)]
predict.restype = POINTER(c_float)
set_gpu = lib.cuda_set_device
set_gpu.argtypes = [c_int]
make_image = lib.make_image
make_image.argtypes = [c_int, c_int, c_int]
make_image.restype = IMAGE
get_network_boxes = lib.get_network_boxes
get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int)]
get_network_boxes.restype = POINTER(DETECTION)
make_network_boxes = lib.make_network_boxes
make_network_boxes.argtypes = [c_void_p]
make_network_boxes.restype = POINTER(DETECTION)
free_detections = lib.free_detections
free_detections.argtypes = [POINTER(DETECTION), c_int]
free_ptrs = lib.free_ptrs
free_ptrs.argtypes = [POINTER(c_void_p), c_int]
network_predict = lib.network_predict
network_predict.argtypes = [c_void_p, POINTER(c_float)]
reset_rnn = lib.reset_rnn
reset_rnn.argtypes = [c_void_p]
load_net = lib.load_network
load_net.argtypes = [c_char_p, c_char_p, c_int]
load_net.restype = c_void_p
do_nms_obj = lib.do_nms_obj
do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
do_nms_sort = lib.do_nms_sort
do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
free_image = lib.free_image
free_image.argtypes = [IMAGE]
letterbox_image = lib.letterbox_image
letterbox_image.argtypes = [IMAGE, c_int, c_int]
letterbox_image.restype = IMAGE
load_meta = lib.get_metadata
lib.get_metadata.argtypes = [c_char_p]
lib.get_metadata.restype = METADATA
load_image = lib.load_image_color
load_image.argtypes = [c_char_p, c_int, c_int]
load_image.restype = IMAGE
rgbgr_image = lib.rgbgr_image
rgbgr_image.argtypes = [IMAGE]
predict_image = lib.network_predict_image
predict_image.argtypes = [c_void_p, IMAGE]
predict_image.restype = POINTER(c_float)
net = load_net(b"model/yolo_box/box.cfg", b"model/yolo_box/box.weights", 0)
meta = load_meta(b"model/yolo_box/box.data")
class yolo(object):
def __init__(self):
pass
def convertBack(self,x, y, w, h):
xmin = int(round(x - (w / 2)))
xmax = int(round(x + (w / 2)))
ymin = int(round(y - (h / 2)))
ymax = int(round(y + (h / 2)))
return xmin, ymin, xmax, ymax
def array_to_image(self,arr):
# need to return old values to avoid python freeing memory
arr = arr.transpose(2,0,1)
c, h, w = arr.shape[0:3]
arr = np.ascontiguousarray(arr.flat, dtype=np.float32) / 255.0
data = arr.ctypes.data_as(POINTER(c_float))
im = IMAGE(w,h,c,data)
return im, arr
def detect(self,image, thresh=.5, hier_thresh=.5, nms=.45):
im, image = self.array_to_image(image)
rgbgr_image(im)
num = c_int(0)
pnum = pointer(num)
predict_image(net, im)
dets = get_network_boxes(net, im.w, im.h, thresh,
hier_thresh, None, 0, pnum)
num = pnum[0]
if nms: do_nms_obj(dets, num, meta.classes, nms)
res = []
for j in range(num):
a = dets[j].prob[0:meta.classes]
if any(a):
ai = np.array(a).nonzero()[0]
for i in ai:
b = dets[j].bbox
#res.append((meta.names[i], dets[j].prob[i],
# (b.x, b.y, b.w, b.h)))
left=(b.x-b.w/2)
right=(b.x+b.w/2)
top=(b.y-b.h/2)
bot=(b.y+b.h/2)
if left < 0:
left = 0
if right > im.w-1:
right = im.w-1
if top < 0:
top = 0
if bot > im.h-1:
bot = im.h-1
res.append((meta.names[i], dets[j].prob[i],left,top,right,bot))
res = sorted(res, key=lambda x: -x[1])
if isinstance(image, bytes): free_image(im)
free_detections(dets, num)
return res
if __name__ == "__main__":
# load video here
cap = cv2.VideoCapture("board0.mp4")
ret, img = cap.read()
fps = cap.get(cv2.CAP_PROP_FPS)
yolo=yolo()
print("Frames per second using video.get(cv2.CAP_PROP_FPS) : {0}".format(fps))
cv2.namedWindow("img", cv2.WINDOW_NORMAL)
while(1):
ret, img = cap.read()
if ret:
# r = detect_np(net, meta, img)
r = yolo.detect(img)
for i in r:
x, y, w, h = i[2][0], i[2][1], i[2][2], i[2][3]
xmin, ymin, xmax, ymax = yolo.convertBack(float(x), float(y), float(w), float(h))
pt1 = (xmin, ymin)
pt2 = (xmax, ymax)
cv2.rectangle(img, pt1, pt2, (0, 255, 0), 2)
cv2.putText(img, i[0].decode() + " [" + str(round(i[1] * 100, 2)) + "]", (pt1[0], pt1[1] + 20), cv2.FONT_HERSHEY_SIMPLEX, 1, [0, 255, 0], 4)
cv2.imshow("img", img)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
yolov4
自动给xml打标签
最近新出yolov4,发现之前的python版本会报错,发现是函数的参数变了,修改以后可以了,放上代码
调用yolov4训练的模型 对车辆行人非机动车自动打标签:
#-*- coding=utf-8 -*-
from ctypes import *
import math
import random
import time
import cv2
import numpy as np
import re
import os
import sys
def sample(probs):
s = sum(probs)
probs = [a/s for a in probs]
r = random.uniform(0, 1)
for i in range(len(probs)):
r = r - probs[i]
if r <= 0:
return i
return len(probs)-1
# def c_array(ctype, values):
# arr = (ctype*len(values))()
# arr[:] = values
# return arr
def c_array(ctype, values):
return (ctype * len(values))(*values)
class BOX(Structure):
_fields_ = [("x", c_float),
("y", c_float),
("w", c_float),
("h", c_float)]
class DETECTION(Structure):
_fields_ = [("bbox", BOX),
("classes", c_int),
("prob", POINTER(c_float)),
("mask", POINTER(c_float)),
("objectness", c_float),
("sort_class", c_int),
("uc", POINTER(c_float)),
("points", c_int)]
class IMAGE(Structure):
_fields_ = [("w", c_int),
("h", c_int),
("c", c_int),
("data", POINTER(c_float))]
class METADATA(Structure):
_fields_ = [("classes", c_int),
("names", POINTER(c_char_p))]
#lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL)
lib = CDLL("/home/training/mxj/darknet/darknet.so", RTLD_GLOBAL)
lib.network_width.argtypes = [c_void_p]
lib.network_width.restype = c_int
lib.network_height.argtypes = [c_void_p]
lib.network_height.restype = c_int
predict = lib.network_predict
predict.argtypes = [c_void_p, POINTER(c_float)]
predict.restype = POINTER(c_float)
set_gpu = lib.cuda_set_device
set_gpu.argtypes = [c_int]
make_image = lib.make_image
make_image.argtypes = [c_int, c_int, c_int]
make_image.restype = IMAGE
get_network_boxes = lib.get_network_boxes
get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int),c_int]
get_network_boxes.restype = POINTER(DETECTION)
make_network_boxes = lib.make_network_boxes
make_network_boxes.argtypes = [c_void_p]
make_network_boxes.restype = POINTER(DETECTION)
free_detections = lib.free_detections
free_detections.argtypes = [POINTER(DETECTION), c_int]
free_ptrs = lib.free_ptrs
free_ptrs.argtypes = [POINTER(c_void_p), c_int]
network_predict = lib.network_predict
network_predict.argtypes = [c_void_p, POINTER(c_float)]
reset_rnn = lib.reset_rnn
reset_rnn.argtypes = [c_void_p]
load_net = lib.load_network
load_net.argtypes = [c_char_p, c_char_p, c_int]
load_net.restype = c_void_p
do_nms_obj = lib.do_nms_obj
do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
do_nms_sort = lib.do_nms_sort
do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
free_image = lib.free_image
free_image.argtypes = [IMAGE]
letterbox_image = lib.letterbox_image
letterbox_image.argtypes = [IMAGE, c_int, c_int]
letterbox_image.restype = IMAGE
load_meta = lib.get_metadata
lib.get_metadata.argtypes = [c_char_p]
lib.get_metadata.restype = METADATA
load_image = lib.load_image_color
load_image.argtypes = [c_char_p, c_int, c_int]
load_image.restype = IMAGE
rgbgr_image = lib.rgbgr_image
rgbgr_image.argtypes = [IMAGE]
predict_image = lib.network_predict_image
predict_image.argtypes = [c_void_p, IMAGE]
predict_image.restype = POINTER(c_float)
ndarray_image = lib.ndarray_to_image
ndarray_image.argtypes = [POINTER(c_ubyte), POINTER(c_long), POINTER(c_long)]
ndarray_image.restype = IMAGE
#net = load_net("/media/mahxn0/DATA/workspace/src/detectAndRecog/src/yolo_surface/data/robot/robot.cfg", "/media/mahxn0/DATA/workspace/src/detectAndRecog/src/yolo_surface/data/robot/robot_final.weights", 0)
#meta = load_meta("/media/mahxn0/DATA/workspace/src/detectAndRecog/src/yolo_surface/data/robot/robot.data")
net = load_net("/home/training/mxj/darknet/cfg/yolov4.cfg", "/home/training/mxj/darknet/yolov4.weights", 0)
meta = load_meta("/home/training/mxj/darknet/cfg/coco.data")
#net = load_net("/home/mahxn0/darknet/watch_shunhua/watch.cfg", "/home/mahxn0/darknet/watch_shunhua/watch_final.weights", 0)
#meta = load_meta("/home/mahxn0/darknet/watch_shunhua/watch.data")
#net = load_net('/home/mahxn0/darknet/cfg/yolov3.cfg', '/home/mahxn0/darknet/yolov3.weights', 0)
#meta = load_meta("/home/mahxn0/darknet/cfg/coco.data")
#net = load_net("/home/mahxn0/ROS_workspace/darknet/cfg/yolov3-tiny.cfg", "/home/mahxn0/ROS_workspace/darknet/yolov3-tiny.weights", 0)
#meta = load_meta("/home/mahxn0/ROS_workspace/darknet/cfg/coco.data")
#video =cv2.VideoCapture(0)
class yolo_helmet(object):
def __init__(self):
pass
def detect_pic(self, image, thresh=0.25, hier_thresh=.5, nms=.45):
im = self.nparray_to_image(image)
num = c_int(0)
pnum = pointer(num)
predict_image(net, im)
dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum,0)
num = pnum[0]
if (nms): do_nms_obj(dets, num, meta.classes, nms)
res = []
for j in range(num):
for i in range(meta.classes):
if dets[j].prob[i] > 0:
b = dets[j].bbox
left=(b.x-b.w/2)
right=(b.x+b.w/2)
top=(b.y-b.h/2)
bot=(b.y+b.h/2)
if left < 0:
left = 0
if right > im.w-1:
right = im.w-1
if top < 0:
top = 0
if bot > im.h-1:
bot = im.h-1
if meta.names[i] in ['bicycle','motorbike','car','bus','truck','person']:
res.append((meta.names[i], dets[j].prob[i],left,top,right,bot))
res = sorted(res, key=lambda x: -x[1])
free_image(im) #not sure if this will cause a memory leak.
free_detections(dets, num)
return res
def detect(self, imagename, thresh=.25, hier_thresh=.5, nms=.45):
t0=time.time()
#im=self.array_to_image(image)
#rgbgr_image(im)
image=cv2.imread(imagename)
im = self.nparray_to_image(image)
t1=time.time()
num = c_int(0)
pnum = pointer(num)
predict_image(net, im)
dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum,0)
num = pnum[0]
if (nms): do_nms_obj(dets, num, meta.classes, nms)
res = []
for j in range(num):
for i in range(meta.classes):
if dets[j].prob[i] > 0:
b = dets[j].bbox
left=(b.x-b.w/2)-5
right=(b.x+b.w/2)+5
top=(b.y-b.h/2)-5
bot=(b.y+b.h/2)+5
if left < 0:
left = 0
if right > im.w-1:
right = im.w-1
if top < 0:
top = 0
if bot > im.h-1:
bot = im.h-1
if meta.names[i] in ['bicycle','motorbike','car','bus','truck','person']:
res.append((meta.names[i], dets[j].prob[i],left,top,right,bot))
print(meta.names[i])
print('1111111111')
res = sorted(res, key=lambda x: -x[1])
print(res)
#free_image(im) #not sure if this will cause a memory leak.
#free_detections(dets, num)
t2=time.time()
print("detect take %d s",t2-t0);
print("array_to_image take %d s",t1-t0)
return res
#def merge_iou(self,result_lists):
def compute_iou(self,rect1,rect2):
x1=np.max((rect1[0],rect2[0]))
x2=np.min((rect1[2],rect2[2]))
y1=np.max((rect1[1],rect2[1]))
y2=np.min((rect1[3],rect2[3]))
if x2<=x1 or y2<=y1:
return 0
intersect=(x2-x1)*(y2-y1)
area1=(rect1[2]-rect1[0])*(rect1[3]-rect1[1])
area2=(rect2[2]-rect2[0])*(rect2[3]-rect2[1])
print(intersect,area1,area2)
iou=intersect/(area1+area2-intersect)
return iou
def array_to_image(self,arr):
arr = arr.transpose(2,0,1)
c = arr.shape[0]
h = arr.shape[1]
w = arr.shape[2]
arr = (arr/255.0).flatten()
data = c_array(c_float, arr)
im = IMAGE(w,h,c,data)
return im
def nparray_to_image(self,img):
data = img.ctypes.data_as(POINTER(c_ubyte))
image = ndarray_image(data, img.ctypes.shape, img.ctypes.strides)
return image
def getXY(self,i):
return int(i)
if __name__ == "__main__":
imgDir = '/home/training/mxj/1/'
filenames = os.listdir(imgDir)
r=yolo_helmet()
for name in filenames:
imgName = os.path.join(imgDir,name)
if 'png' in name:
print(imgName)
img = cv2.imread(imgName)
xmlName = os.path.join(imgDir,name[:-3]+'xml')
rets = r.detect(imgName)
if len(rets)>0:
with open(xmlName,'w') as f:
f.write('<annotation verified="no">\n')
f.write('<folder>yf</folder>\n')
f.write('<filename>'+name[:-4]+'</filename>\n')
f.write('<path>'+imgName+'</path>\n')
f.write('<source>\n')
f.write('<database>Unknown</database>\n')
f.write('</source>\n')
f.write('<size>\n')
f.write('<width>'+str(img.shape[1])+'</width>\n')
f.write('<height>'+str(img.shape[0])+'</height>\n')
f.write('<depth>'+str(img.shape[2])+'</depth>\n')
f.write('</size>\n')
f.write('<segmented>0</segmented>\n')
for obj in rets:
retClass = obj[0]
if retClass=='bicycle' or retClass=='motorbike' or 'car' or retClass=='bus' or retClass=='truck' or retClass=='person':
if retClass=='bicycle' or retClass=='motorbike':
retClass='nonMoto'
if retClass=='car' or retClass=='bus' or retClass=='truck':
retClass='car'
score = obj[1]
#box = [int(x) for x in obj[2]]
left = int(obj[2])
top = int(obj[3])
right = int(obj[4])
bottom = int(obj[5])
f.write('<object>\n')
f.write('<name>'+retClass+'</name>\n')
f.write('<pose>Unspecified</pose>\n')
f.write('<truncated>0</truncated>\n')
f.write('<Difficult>0</Difficult>\n')
f.write('<bndbox>\n')
f.write('<xmin>'+str(left)+'</xmin>\n')
f.write('<ymin>'+str(top)+'</ymin>\n')
f.write('<xmax>'+str(right)+'</xmax>\n')
f.write('<ymax>'+str(bottom)+'</ymax>\n')
f.write('</bndbox>\n')
f.write('</object>\n')
f.write('</annotation>\n')
f.close()