在KITTI、BDD100K数据集上训练YOLOv4——车辆、路牌、路锥检测

为什么,我要选这种课题

情况概述

只需要对"车辆"进行识别。在对数据集的处理过程中,会对BDD100K中的['bike','car','motor','bus','truck','train']、KITTI数据集中的['Car','Truck','Van','Tram']同一归类为'car'。
对于交通桩,当然是能找到现成的数据集最好,想到以前天天打标签就可怕。

数据集处理

对于数据集,最重要的一步就是缝合,先把所有现成的数据集转成VOC格式,再进行一些复制黏贴就可以了。

BDD100K 转 VOC

1. 下载数据集和标签

官网:https://bdd-data.berkeley.edu/
这里因为懒,只下载了10K的图片和Label,但后面就要多点步骤。

2.数据集转换

参考了这里的代码,对于代码进行了一部分处理,对car进行了合并,并且删去了person和traffic light类。

#bdd100k2voc.py
import os
import os.path as osp

import json

from xml.etree.ElementTree import Element, SubElement
from xml.etree import ElementTree
from xml.dom import minidom

from PIL import Image

from tqdm import tqdm

DEBUG = False

BDD_FOLDER = "E:/bdd100k_images_10k_3/bdd100k/images"

if DEBUG:
    XML_PATH = "./xml"
else:
    XML_PATH = BDD_FOLDER + "/xml"


def bdd_to_voc(bdd_folder, xml_folder):
    image_path = bdd_folder + "/10k/%s"
    label_path = bdd_folder + "/labels/bdd100k_labels_images_%s.json"

    classes = set()

    for trainval in ['train', 'val']:
        image_folder = image_path % trainval
        json_path = label_path % trainval
        xml_folder_ = osp.join(xml_folder, trainval)

        if not os.path.exists(xml_folder_):
            os.makedirs(xml_folder_)

        with open(json_path) as f:
            j = f.read()
        data = json.loads(j)
        for datum in tqdm(data):
            if not os.path.exists(osp.join(image_folder, datum['name'])): # 跳过不存在图片的txt
                continue
            tmp_list = []
            annotation = Element('annotation')
            SubElement(annotation, 'folder').text ='VOC2007'
            SubElement(annotation, 'filename').text = datum['name']
            source = get_source()
            owner = get_owner()
            annotation.append(source)
            annotation.append(owner)
            size = get_size(osp.join(image_folder, datum['name']))
            annotation.append(size)
            SubElement(annotation, 'segmented').text ='0'

            for label in datum['labels']:
                if label['category'] not in ["bike","car","motor","bus","truck","rider","train","traffic sign"]: # 去除不要的light
                    continue
                else:
                    tmp_list.append(1)
                    if label['category'] in ["bike","car","motor","bus","truck","rider","train"]:
                        label['category'] = "car" # 同一归类为car
                    else:
                        label['category'] = "sign" # 改成sign
                try:
                    box2d = label['box2d']
                except KeyError:
                    continue
                else:
                    bndbox = get_bbox(box2d)

                object_ = Element('object')
                

                SubElement(object_, 'name').text = label['category']
                SubElement(object_, 'pose').text = "Unspecified"
                SubElement(object_, 'truncated').text = '0'
                SubElement(object_, 'difficult').text = '0'
                classes.add(label['category'])

                object_.append(bndbox)
                annotation.append(object_)
            if len(tmp_list) == 0:
                continue
            xml_filename = osp.splitext(datum['name'])[0] + '.xml'
            with open(osp.join(xml_folder_, xml_filename), 'w') as f:
                f.write(prettify(annotation))
    print(classes)

def get_owner():
    owner = Element('owner')
    SubElement(owner, 'flickrid').text ='NULL'
    SubElement(owner, 'name').text ='lijing'
    return owner

def get_source():
    source = Element('source')
    SubElement(source, 'database').text ='voc_bdd'
    SubElement(source, 'annotation').text ='VOC2007'
    SubElement(source, 'image').text ='flickr'
    SubElement(source, 'flickrid').text ='NULL'
    return source




def get_size(image_path):
    i = Image.open(image_path)
    sz = Element('size')
    SubElement(sz, 'width').text = str(i.width)
    SubElement(sz, 'height').text = str(i.height)
    SubElement(sz, 'depth').text = str(3)
    return sz


def get_bbox(box2d):
    bndbox = Element('bndbox')
    SubElement(bndbox, 'xmin').text = str(int(round(box2d['x1'])))
    SubElement(bndbox, 'ymin').text = str(int(round(box2d['y1'])))
    SubElement(bndbox, 'xmax').text = str(int(round(box2d['x2'])))
    SubElement(bndbox, 'ymax').text = str(int(round(box2d['y2'])))
    return bndbox


def prettify(elem):
    rough_string = ElementTree.tostring(elem, 'utf-8')
    reparsed = minidom.parseString(rough_string)
    return reparsed.toprettyxml(indent="\t")


if __name__ == "__main__":
    bdd_to_voc(BDD_FOLDER, XML_PATH)

3.生成用于划分的txt

生成trainval.txt

# 在PowerShell运行,这是20年代
 Get-ChildItem -Path ./xml/train | ForEach-Object { $_.BaseName } > ./trainval.txt

会在当前目录出现一个trainval.txt,记得改成UTF-8。

开划

因为标签里val的json与10K内val的图片不匹配,实际上是一张都对不上,因此val文件夹内是空的。需要将train内的文件划分成训练集与数据集。
如果使用100K的数据集...大家都应该有那种voc2yolo4.py用来直接划的吧?实在不行上面的PowerShell多跑两遍。

# random_split_train_val.py
import numpy as np
import sys


filename = "trainval.txt"

train_file = filename.split('.')[0]+'_train.txt'
val_file = filename.split('.')[0]+'_val.txt'
test_file = filename.split('.')[0]+'_test.txt'

idx = []
with open(filename,'r') as f:
    for line in f:
        idx.append(line.strip())
f.close()

idx = np.random.permutation(idx)

train_idx = sorted(idx[:int(len(idx)*0.8)]) # 5:1 训练集和数据集
val_idx = sorted(idx[int(len(idx)*0.8):]) 

with open(train_file,'w') as f:
    for i in train_idx:
        f.write('{}\n'.format(i))
f.close()

with open(val_file, 'w') as f:
    for i in val_idx:
        f.write('{}\n'.format(i))
f.close()

with open(test_file, 'w') as f:
    for i in val_idx:
        f.write('{}\n'.format(i))
f.close()

print('Training set is save to ' + train_file)
print('Validation set is saved to ' + val_file)
print('Testing set is saved to ' + test_file)

就此,BDD100K 的缝合部分完成。BDD100K现在的Annotations、ImageSets以及JPEGImages文件夹可以不管了。

路锥的 VOC 数据集

在迷惘之际,发现有位前人做完的交通锥数据集,看到issues里说jbox上不了…
于是就从jbox上下载到了文件。
jbox内的文件已经包含了trainval.txt,因此同样的,使用上面的random_split_train_val.py直接开划。
划完后对路锥与BDD100K的train、val、test的文本文件进行合并。
新建一个文件夹./merge,随后将python文件与需要合并的txt放进去。

# merge_sets.py
import os

folder = os.listdir("./merge")
sets_merged = open("merged.txt","w")
for file in folder:
    if file.split(".")[-1]!="txt":
        continue
    with open(file,"r",encoding="utf-8") as f:
        for line in f:
            print(line)
            sets_merged.writelines(line)
sets_merged.close()

合并完之后,将Annotations、ImageSets以及JPEGImages目录的图片放在一起,大功告成。

KITTI 转 VOC

这里是很早之前完成的,因此没有逐步的教程了,主要是根据这里的方法一步步来。
完成后使用上面的方法,将KITTI的文本文件也进行合并,图片与xml文件放在一起。

至此,伟大的缝合就完成了。


开始训练

需要注意,生成带路径、用于训练的文本文件时,由于数据集内图片格式不一样,需要对代码进行调整。

#voc_annoation.py
import xml.etree.ElementTree as ET
from os import getcwd

sets=[('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
classes = ["car","sign","trafficcone"]
def convert_annotation(year, image_id, list_file):
    in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id), encoding='utf-8')
    tree=ET.parse(in_file)
    root = tree.getroot()
    
    for obj in root.iter('object'):
        difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls not in classes:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('ymin').text)), int(float(xmlbox.find('xmax').text)), int(float(xmlbox.find('ymax').text)))
        list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))

wd = getcwd()

for year, image_set in sets:
    image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set), encoding='utf-8').read().strip().split()
    list_file = open('%s_%s.txt'%(year, image_set), 'w', encoding='utf-8')
    for image_id in image_ids:
        if image_id.startswith("00") and image_id.isdigit(): # 如果是KITTI的图片,判断图片名的特征生成.png拓展名的文本
            list_file.write('%s\VOCdevkit\VOC%s\JPEGImages\%s.png'%(wd, year, image_id))
            convert_annotation(year, image_id, list_file)
        else: # 否则就生成.jpg拓展名的文本
            list_file.write('%s\VOCdevkit\VOC%s\JPEGImages\%s.jpg'%(wd, year, image_id))
            convert_annotation(year, image_id, list_file)
        list_file.write('\n')
    list_file.close()

现在就可以开始训练辣。


统计类别与数目

合并完需要看看多不多

#type&cat.py
import os
import xml.dom.minidom
import matplotlib.pyplot as plt

xml_path = './Annotations/'
files = os.listdir(xml_path)
 
gt_dict = {}
 

for xm in files:
    xmlfile = os.path.join(xml_path,xm)
    dom = xml.dom.minidom.parse(xmlfile)
    root = dom.documentElement
    filenamelist = root.getElementsByTagName("filename")
    filename = filenamelist[0].childNodes[0].data
    objectlist = root.getElementsByTagName("object")
    ##
    for objects in objectlist:
        namelist = objects.getElementsByTagName("name")
        objectname = namelist[0].childNodes[0].data
        if objectname == '-':
            print(filename)
        if objectname in gt_dict:
            gt_dict[objectname] += 1
        else:
            gt_dict[objectname] = 1
dic = sorted(gt_dict.items(), key=lambda d: d[1], reverse=True)
print(dic)
x, y = zip(*dic)
plt.bar(x, y)
for x,y in zip(x, y):
    plt.text(x,y, y, ha='center',va='bottom',)
plt.savefig('categories.png')
Edit with Markdown