为什么,我要选这种课题
情况概述
只需要对"车辆"进行识别。在对数据集的处理过程中,会对BDD100K中的['bike','car','motor','bus','truck','train']、KITTI数据集中的['Car','Truck','Van','Tram']同一归类为'car'。
对于交通桩,当然是能找到现成的数据集最好,想到以前天天打标签就可怕。
数据集处理
对于数据集,最重要的一步就是缝合,先把所有现成的数据集转成VOC格式,再进行一些复制黏贴就可以了。
BDD100K 转 VOC
1. 下载数据集和标签
官网:https://bdd-data.berkeley.edu/
这里因为懒,只下载了10K的图片和Label,但后面就要多点步骤。
2.数据集转换
参考了这里的代码,对于代码进行了一部分处理,对car进行了合并,并且删去了person和traffic light类。
#bdd100k2voc.py
import os
import os.path as osp
import json
from xml.etree.ElementTree import Element, SubElement
from xml.etree import ElementTree
from xml.dom import minidom
from PIL import Image
from tqdm import tqdm
DEBUG = False
BDD_FOLDER = "E:/bdd100k_images_10k_3/bdd100k/images"
if DEBUG:
XML_PATH = "./xml"
else:
XML_PATH = BDD_FOLDER + "/xml"
def bdd_to_voc(bdd_folder, xml_folder):
image_path = bdd_folder + "/10k/%s"
label_path = bdd_folder + "/labels/bdd100k_labels_images_%s.json"
classes = set()
for trainval in ['train', 'val']:
image_folder = image_path % trainval
json_path = label_path % trainval
xml_folder_ = osp.join(xml_folder, trainval)
if not os.path.exists(xml_folder_):
os.makedirs(xml_folder_)
with open(json_path) as f:
j = f.read()
data = json.loads(j)
for datum in tqdm(data):
if not os.path.exists(osp.join(image_folder, datum['name'])): # 跳过不存在图片的txt
continue
tmp_list = []
annotation = Element('annotation')
SubElement(annotation, 'folder').text ='VOC2007'
SubElement(annotation, 'filename').text = datum['name']
source = get_source()
owner = get_owner()
annotation.append(source)
annotation.append(owner)
size = get_size(osp.join(image_folder, datum['name']))
annotation.append(size)
SubElement(annotation, 'segmented').text ='0'
for label in datum['labels']:
if label['category'] not in ["bike","car","motor","bus","truck","rider","train","traffic sign"]: # 去除不要的light
continue
else:
tmp_list.append(1)
if label['category'] in ["bike","car","motor","bus","truck","rider","train"]:
label['category'] = "car" # 同一归类为car
else:
label['category'] = "sign" # 改成sign
try:
box2d = label['box2d']
except KeyError:
continue
else:
bndbox = get_bbox(box2d)
object_ = Element('object')
SubElement(object_, 'name').text = label['category']
SubElement(object_, 'pose').text = "Unspecified"
SubElement(object_, 'truncated').text = '0'
SubElement(object_, 'difficult').text = '0'
classes.add(label['category'])
object_.append(bndbox)
annotation.append(object_)
if len(tmp_list) == 0:
continue
xml_filename = osp.splitext(datum['name'])[0] + '.xml'
with open(osp.join(xml_folder_, xml_filename), 'w') as f:
f.write(prettify(annotation))
print(classes)
def get_owner():
owner = Element('owner')
SubElement(owner, 'flickrid').text ='NULL'
SubElement(owner, 'name').text ='lijing'
return owner
def get_source():
source = Element('source')
SubElement(source, 'database').text ='voc_bdd'
SubElement(source, 'annotation').text ='VOC2007'
SubElement(source, 'image').text ='flickr'
SubElement(source, 'flickrid').text ='NULL'
return source
def get_size(image_path):
i = Image.open(image_path)
sz = Element('size')
SubElement(sz, 'width').text = str(i.width)
SubElement(sz, 'height').text = str(i.height)
SubElement(sz, 'depth').text = str(3)
return sz
def get_bbox(box2d):
bndbox = Element('bndbox')
SubElement(bndbox, 'xmin').text = str(int(round(box2d['x1'])))
SubElement(bndbox, 'ymin').text = str(int(round(box2d['y1'])))
SubElement(bndbox, 'xmax').text = str(int(round(box2d['x2'])))
SubElement(bndbox, 'ymax').text = str(int(round(box2d['y2'])))
return bndbox
def prettify(elem):
rough_string = ElementTree.tostring(elem, 'utf-8')
reparsed = minidom.parseString(rough_string)
return reparsed.toprettyxml(indent="\t")
if __name__ == "__main__":
bdd_to_voc(BDD_FOLDER, XML_PATH)
3.生成用于划分的txt
生成trainval.txt
# 在PowerShell运行,这是20年代
Get-ChildItem -Path ./xml/train | ForEach-Object { $_.BaseName } > ./trainval.txt
会在当前目录出现一个trainval.txt,记得改成UTF-8。
开划
因为标签里val的json与10K内val的图片不匹配,实际上是一张都对不上,因此val文件夹内是空的。需要将train内的文件划分成训练集与数据集。
如果使用100K的数据集...大家都应该有那种voc2yolo4.py
用来直接划的吧?实在不行上面的PowerShell多跑两遍。
# random_split_train_val.py
import numpy as np
import sys
filename = "trainval.txt"
train_file = filename.split('.')[0]+'_train.txt'
val_file = filename.split('.')[0]+'_val.txt'
test_file = filename.split('.')[0]+'_test.txt'
idx = []
with open(filename,'r') as f:
for line in f:
idx.append(line.strip())
f.close()
idx = np.random.permutation(idx)
train_idx = sorted(idx[:int(len(idx)*0.8)]) # 5:1 训练集和数据集
val_idx = sorted(idx[int(len(idx)*0.8):])
with open(train_file,'w') as f:
for i in train_idx:
f.write('{}\n'.format(i))
f.close()
with open(val_file, 'w') as f:
for i in val_idx:
f.write('{}\n'.format(i))
f.close()
with open(test_file, 'w') as f:
for i in val_idx:
f.write('{}\n'.format(i))
f.close()
print('Training set is save to ' + train_file)
print('Validation set is saved to ' + val_file)
print('Testing set is saved to ' + test_file)
就此,BDD100K 的缝合部分完成。BDD100K现在的Annotations、ImageSets以及JPEGImages文件夹可以不管了。
路锥的 VOC 数据集
在迷惘之际,发现有位前人做完的交通锥数据集,看到issues里说jbox上不了…
于是就从jbox上下载到了文件。
jbox内的文件已经包含了trainval.txt
,因此同样的,使用上面的random_split_train_val.py
直接开划。
划完后对路锥与BDD100K的train、val、test的文本文件进行合并。
新建一个文件夹./merge,随后将python文件与需要合并的txt放进去。
# merge_sets.py
import os
folder = os.listdir("./merge")
sets_merged = open("merged.txt","w")
for file in folder:
if file.split(".")[-1]!="txt":
continue
with open(file,"r",encoding="utf-8") as f:
for line in f:
print(line)
sets_merged.writelines(line)
sets_merged.close()
合并完之后,将Annotations、ImageSets以及JPEGImages目录的图片放在一起,大功告成。
KITTI 转 VOC
这里是很早之前完成的,因此没有逐步的教程了,主要是根据这里的方法一步步来。
完成后使用上面的方法,将KITTI的文本文件也进行合并,图片与xml文件放在一起。
至此,伟大的缝合就完成了。
开始训练
需要注意,生成带路径、用于训练的文本文件时,由于数据集内图片格式不一样,需要对代码进行调整。
#voc_annoation.py
import xml.etree.ElementTree as ET
from os import getcwd
sets=[('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
classes = ["car","sign","trafficcone"]
def convert_annotation(year, image_id, list_file):
in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id), encoding='utf-8')
tree=ET.parse(in_file)
root = tree.getroot()
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('ymin').text)), int(float(xmlbox.find('xmax').text)), int(float(xmlbox.find('ymax').text)))
list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
wd = getcwd()
for year, image_set in sets:
image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set), encoding='utf-8').read().strip().split()
list_file = open('%s_%s.txt'%(year, image_set), 'w', encoding='utf-8')
for image_id in image_ids:
if image_id.startswith("00") and image_id.isdigit(): # 如果是KITTI的图片,判断图片名的特征生成.png拓展名的文本
list_file.write('%s\VOCdevkit\VOC%s\JPEGImages\%s.png'%(wd, year, image_id))
convert_annotation(year, image_id, list_file)
else: # 否则就生成.jpg拓展名的文本
list_file.write('%s\VOCdevkit\VOC%s\JPEGImages\%s.jpg'%(wd, year, image_id))
convert_annotation(year, image_id, list_file)
list_file.write('\n')
list_file.close()
现在就可以开始训练辣。
统计类别与数目
合并完需要看看多不多
#type&cat.py
import os
import xml.dom.minidom
import matplotlib.pyplot as plt
xml_path = './Annotations/'
files = os.listdir(xml_path)
gt_dict = {}
for xm in files:
xmlfile = os.path.join(xml_path,xm)
dom = xml.dom.minidom.parse(xmlfile)
root = dom.documentElement
filenamelist = root.getElementsByTagName("filename")
filename = filenamelist[0].childNodes[0].data
objectlist = root.getElementsByTagName("object")
##
for objects in objectlist:
namelist = objects.getElementsByTagName("name")
objectname = namelist[0].childNodes[0].data
if objectname == '-':
print(filename)
if objectname in gt_dict:
gt_dict[objectname] += 1
else:
gt_dict[objectname] = 1
dic = sorted(gt_dict.items(), key=lambda d: d[1], reverse=True)
print(dic)
x, y = zip(*dic)
plt.bar(x, y)
for x,y in zip(x, y):
plt.text(x,y, y, ha='center',va='bottom',)
plt.savefig('categories.png')