乞讨，读书人的偷，怎么能叫偷呢

情况概述

给导师看了看前文的缝合妙计后，要求再多四五类进行训练，不然论文不给过。同时要求规定论文起码要到20000字。
文书的缝合工作不成问题，可数据集咋办呢？
看到同组 YOLOv5 进行人物识别的数据集不错，花了三天乞求过来。需要转成VOC数据集，对数据进行筛选后继续缝合。

将 YOLO 格式转换成 VOC 格式

参考了这个项目的代码，但对于逻辑进行了几处改动：对空文件进行跳过处理，忽视"people"标签，并且对全是"people"标签的文件进行省略。

# yolo2voc.py
import os
import cv2
from xml.dom.minidom import parseString
from lxml.etree import Element, SubElement, tostring
import numpy as np
from os.path import join

## path root folder
ROOT = 'coco'
YOLO_CLASSES = ('speedhump', 'trafficcone', 'firehydrant', 'guardrail', 'manhole', 'people', 'sign')


## converts the normalized positions  into integer positions
def unconvert(class_id, width, height, x, y, w, h):

    xmax = int((x*width) + (w * width)/2.0)
    xmin = int((x*width) - (w * width)/2.0)
    ymax = int((y*height) + (h * height)/2.0)
    ymin = int((y*height) - (h * height)/2.0)
    class_id = int(class_id)
    return (class_id, xmin, xmax, ymin, ymax)


def xml_transform(root, classes):  
    class_path  = join(root, 'labels')
    ids = list()
    l=os.listdir(class_path)
    
    check = '.DS_Store' in l
    if check == True:
        l.remove('.DS_Store')
        
    ids=[os.path.splitext(x)[0] for x in l]   

    annopath = join(root, 'labels', '%s.txt')
    imgpath = join(root, 'images', '%s.jpg')
    
    os.makedirs(join(root, 'outputs'), exist_ok=True)
    outpath = join(root, 'outputs', '%s.xml')

    for i in range(len(ids)):
        img_id = ids[i] 
        print(imgpath % img_id)
        img= cv2.imread(imgpath % img_id)
        height, width, channels = img.shape
        target = (annopath % img_id)
        people_object = 0
        if os.path.exists(target):
            try:
                label_norm= np.loadtxt(target).reshape(-1, 5)
            except ValueError:
                print(target + ":ValueError")
                continue
            node_root = Element('annotation')
            node_folder = SubElement(node_root, 'folder')
            node_folder.text = 'VOC2007'
            img_name = img_id + '.jpg'
        
            node_filename = SubElement(node_root, 'filename')
            node_filename.text = img_name
            
            node_source= SubElement(node_root, 'source')
            node_database = SubElement(node_source, 'database')
            node_database.text = 'Coco database'
            
            node_size = SubElement(node_root, 'size')
            node_width = SubElement(node_size, 'width')
            node_width.text = str(width)
        
            node_height = SubElement(node_size, 'height')
            node_height.text = str(height)

            node_depth = SubElement(node_size, 'depth')
            node_depth.text = str(channels)

            node_segmented = SubElement(node_root, 'segmented')
            node_segmented.text = '0'
            print("\n\n\n"+target)
            print(label_norm)
            for i in range(len(label_norm)):
                labels_conv = label_norm[i]
                new_label = unconvert(labels_conv[0], width, height, labels_conv[1], labels_conv[2], labels_conv[3], labels_conv[4])
                if classes[new_label[0]] == "people":
                    people_object = people_object + 1
                    continue
                node_object = SubElement(node_root, 'object')
                node_name = SubElement(node_object, 'name')
                node_name.text = classes[new_label[0]]
                node_pose = SubElement(node_object, 'pose')
                node_pose.text = 'Unspecified'
                node_truncated = SubElement(node_object, 'truncated')
                node_truncated.text = '0'
                node_difficult = SubElement(node_object, 'difficult')
                node_difficult.text = '0'
                node_bndbox = SubElement(node_object, 'bndbox')
                node_xmin = SubElement(node_bndbox, 'xmin')
                node_xmin.text = str(new_label[1])
                node_ymin = SubElement(node_bndbox, 'ymin')
                node_ymin.text = str(new_label[3])
                node_xmax = SubElement(node_bndbox, 'xmax')
                node_xmax.text =  str(new_label[2])
                node_ymax = SubElement(node_bndbox, 'ymax')
                node_ymax.text = str(new_label[4])
                xml = tostring(node_root, pretty_print=True)  
                dom = parseString(xml)
            if people_object == len(label_norm):
                print(target + " full of ppl,skipped!")
                continue
            else:
                f =  open(outpath % img_id, "wb")
                f.write(xml)
                f.close()
       

xml_transform(ROOT, YOLO_CLASSES)

生成xml标签后，需要删除啥都没有的图片：都是人而未生成xml文件的、同学手抖按了两下"下一张"没标记的。

删除空图片

没啥好说的，删除没有xml的图片。

#removeUnusedPic.py
import os

def getFileList(dir, extract):
    fileList = []
    filenames = os.listdir(dir)
    for filename in filenames:
        ext = os.path.splitext(filename)[-1]
        if ext == extract:
            fileList.append(filename)
    return fileList


if __name__ == '__main__':

    images = "./coco/"
    
    imageRootPath = os.path.join(images, 'images')
    labelRootPath = os.path.join(images, 'outputs')
    
    imageName = getFileList(imageRootPath, '.jpg')
    labelName = getFileList(labelRootPath, '.xml')

    labelName = [label.replace(".xml", ".jpg") for label in labelName]
    lackImages = set(imageName) - set(labelName)

    for file in lackImages:
        os.remove(os.path.join(imageRootPath,file))
        print(os.path.join(imageRootPath,file))

完成后即为VOC格式的数据集，随后与前文大同小异：生成索引文件，划分训练、验证、测试集、合并。

生成索引文件

powershell内运行 Get-ChildItem -Path ./outputs | ForEach-Object { $_.BaseName } > ./trainval.txt

划分训练集

记得把上面的trainval.txt改成UTF-8,不然报错。
用前文内的random_split_train_val.py划分。

合并索引文件

将划完的训练、验证、测试集与先前完成的训练、验证、测试集进行合并。
使用前文内的merge_sets.py合并。

偷同组YOLO格式的数据集转VOC格式