YOLO Data Processing Tool Function Collection

ChangeTheLabel.py

#change the number corresponding to the label file 
#
import os
folder_path = r'C:UsersUserDesktopcocotransfercell_phonelabel'
#traverse each folder txt file 
for filename in os.listdir(folder_path):
if filename.endswith('.txt'):
  file_path = os.path.join(folder_path, filename)
  #read txt file content 
  with open(file_path, 'r') as file:
      lines = file.readlines()
  #change the first digit of each line to 2 
  modified_lines = []
  for line in lines:
      split_line = line.strip().split(' ')
      split_line[0] = '1'
      modified_line = ' '.join(split_line) + 'n'
      modified_lines.append(modified_line)
  #write the modified content into a file 
  with open(file_path, 'w') as file:
      file.writelines(modified_lines)
print("modification completed !")

Coco_Extract.py

#extract corresponding category datasets 
#
from pycocotools.coco import COCO
import os
import shutil
from tqdm import tqdm
import skimage.io as io
import matplotlib.pyplot as plt
import cv2
from PIL import Image, ImageDraw
#reference link 
# https://blog.csdn.net/Accelerating/article/details/126855883
# https://blog.csdn.net/Dongjiuqing/article/details/127949190
pathset = 'wine glass'
'''
 traffic'person', 'bicycle', 'car', 'motorcycle', 'bus', 'train',   'truck'
'people', 'bicycle',  'automobile', 'motorcycle',    'public buses', 'train', 'truck'
257249     7056     43532    8654        6061     4570     9970
 goods'bottle', 'wine glass', 'cup', 'bowl',   'fork', 'knife', 'spoon', 
'bottle',   'wine glass',       'glass', 'bowl',    'fork',  'knife',    'spoon',  
 24070    7839          20574   14323
 mobile phone'cell phone'
'''
#path to be set 
savepath = r"C:/Users/User/Desktop/coco/transfer/"+pathset+'/'
# print(savepath)
img_dir = savepath + 'images/'
anno_dir = savepath + 'annotations/'
datasets_list = ['train2017', 'val2017']
# datasets_list = ['val2017']
# coco there are 80 categories, write the name of the class to be extracted here, in order to person take as an example 
classes_names = [pathset]
#original containing all categories coco dataset path 
'''
 the directory format is as follows :
$COCO_PATH
----|annotations
----|train2017
----|val2017
----|test2017
'''
dataDir = 'C:/Users/User/Desktop/coco/'
headstr = """
<annotation>
<folder>VOC</folder>
<filename>%s</filename>
<source>
  <database>My Database</database>
  <annotation>COCO</annotation>
  <image>flickr</image>
  <flickrid>NULL</flickrid>
</source>
<owner>
  <flickrid>NULL</flickrid>
  <name>company</name>
</owner>
<size>
  <width>%d</width>
  <height>%d</height>
  <depth>%d</depth>
</size>
<segmented>0</segmented>
"""
objstr = """
<object>
  <name>%s</name>
  <pose>Unspecified</pose>
  <truncated>0</truncated>
  <difficult>0</difficult>
  <bndbox>
      <xmin>%d</xmin>
      <ymin>%d</ymin>
      <xmax>%d</xmax>
      <ymax>%d</ymax>
  </bndbox>
</object>
"""
tailstr = '''
</annotation>
'''
#check if the directory exists. if it exists, delete it first before creating it. otherwise, create it directly 
def mkr(path):
if not os.path.exists(path):
  os.makedirs(path)  #can create multi-level directories 
def id2name(coco):
classes = dict()
for cls in coco.dataset['categories']:
  classes[cls['id']] = cls['name']
return classes
def write_xml(anno_path, head, objs, tail):
f = open(anno_path, "w")
f.write(head)
for obj in objs:
  f.write(objstr % (obj[0], obj[1], obj[2], obj[3], obj[4]))
f.write(tail)
def save_annotations_and_imgs(coco, dataset, filename, objs):
#convert image to xml example :COCO_train2017_000000196610.jpg-->COCO_train2017_000000196610.xml
dst_anno_dir = os.path.join(anno_dir, dataset)
mkr(dst_anno_dir)
anno_path = dst_anno_dir + '/' + filename[:-3] + 'xml'
img_path = dataDir + dataset + '/' + filename
# print("img_path: ", img_path)
dst_img_dir = os.path.join(img_dir, dataset)
mkr(dst_img_dir)
dst_imgpath = dst_img_dir + '/' + filename
# print("dst_imgpath: ", dst_imgpath)
img = cv2.imread(img_path)
# if (img.shape[2] == 1):
#    print(filename + " not a RGB image")
#   return
shutil.copy(img_path, dst_imgpath)
head = headstr % (filename, img.shape[1], img.shape[0], img.shape[2])
tail = tailstr
write_xml(anno_path, head, objs, tail)
def showimg(coco, dataset, img, classes, cls_id, show=True):
global dataDir
I = Image.open('%s/%s/%s' % (dataDir, dataset, img['file_name']))
#adopt id obtain annotation information 
annIds = coco.getAnnIds(imgIds=img['id'], catIds=cls_id, iscrowd=None)
# print(annIds)
anns = coco.loadAnns(annIds)
# print(anns)
# coco.showAnns(anns)
objs = []
for ann in anns:
  class_name = classes[ann['category_id']]
  if class_name in classes_names:
      # print(class_name)
      if 'bbox' in ann:
          bbox = ann['bbox']
          xmin = int(bbox[0])
          ymin = int(bbox[1])
          xmax = int(bbox[2] + bbox[0])
          ymax = int(bbox[3] + bbox[1])
          obj = [class_name, xmin, ymin, xmax, ymax]
          objs.append(obj)
          draw = ImageDraw.Draw(I)
          draw.rectangle([xmin, ymin, xmax, ymax])
if show:
  plt.figure()
  plt.axis('off')
  plt.imshow(I)
  plt.show()
return objs
for dataset in datasets_list:
# ./COCO/annotations/instances_train2017.json
annFile = '{}/annotations/instances_{}.json'.format(dataDir, dataset)
#apply COCO API used to initialize annotation data 
coco = COCO(annFile)
#obtain COCO all categories in the dataset 
classes = id2name(coco)
# print(classes)
# [1, 2, 3, 4, 6, 8]
classes_ids = coco.getCatIds(catNms=classes_names)
# print(classes_ids)
for cls in classes_names:
  #get the information for this class id
  cls_id = coco.getCatIds(catNms=[cls])
  img_ids = coco.getImgIds(catIds=cls_id)
  # print(cls, len(img_ids))
  # imgIds=img_ids[0:10]
  for imgId in tqdm(img_ids):
      img = coco.loadImgs(imgId)[0]
      filename = img['file_name']
      # print(filename)
      objs = showimg(coco, dataset, img, classes, classes_ids, show=False)
      # print(objs)
      save_annotations_and_imgs(coco, dataset, filename, objs)

Count.py

#folder file count 
#
import os
def count_files_in_folder(folder_path):
try:
  #get all files and subfolders in the folder 
  items = os.listdir(folder_path)
  #initialize counter 
  file_count = 0
  #traverse all items in a folder 
  for item in items:
      item_path = os.path.join(folder_path, item)
      #determine if it is a file 
      if os.path.isfile(item_path):
          file_count += 1
      #if it is a sub folder, recursively call count_files_in_folder
      elif os.path.isdir(item_path):
          file_count += count_files_in_folder(item_path)
  return file_count
except Exception as e:
  print(f"Error counting files: {e}")
  return None
#test example 
folder_path = "/home/ws/CoodWorkRun/Database/smoDB_phoDB_glaDB_faceDB/JPEGImages"
result = count_files_in_folder(folder_path)
if result is not None:
print(f"Number of files in {folder_path}: {result}")

Delete.py

#detect if two folder files match or do not match delete 
# delete
import os
image_folder = r'C:UsersUserDesktopcocotransferbottleJPEGImages'
label_folder = r'C:UsersUserDesktopcocotransferbottleannotations'
#get all file names in the image folder and label folder 
#get all file names in the image folder and label folder (excluding suffixes) 
image_files = set(os.path.splitext(filename)[0] for filename in os.listdir(image_folder))
label_files = set(os.path.splitext(filename)[0] for filename in os.listdir(label_folder))
print(image_files)
print(len(image_files))
print(label_files)
print(len(label_files))
#identify the files that need to be deleted 
files_to_delete = image_files.symmetric_difference(label_files)
print('remove folders :')
print(files_to_delete)
print('number of deletions :')
print(len(files_to_delete))
# #delete mismatched files 
# for filename in files_to_delete:
#     # print(filename)
#     if filename in image_files:
#         os.remove(os.path.join(image_folder, filename+'.jpg'))
#     if filename in label_files:
#         os.remove(os.path.join(label_folder, filename+'.txt'))

Divide.py

#dataset partitioning 
#
import os
import random
import shutil
#original data storage path 
data_dir = r"D:DatabaseDatabase metro smoDB_phoDB_glaDB_faceDB_v2/"
images_dir = os.path.join(data_dir, "JPEGImages")
labels_dir = os.path.join(data_dir, "label")
#partitioned training and validation set paths 
train_dir = "D:DatabaseDatabase metro smoDB_phoDB_glaDB_faceDB_v2/train"
train_images_dir = os.path.join(train_dir, "images")
train_labels_dir = os.path.join(train_dir, "labels")
val_dir = "D:DatabaseDatabase metro smoDB_phoDB_glaDB_faceDB_v2/val"
val_images_dir = os.path.join(val_dir, "images")
val_labels_dir = os.path.join(val_dir, "labels")
#create training and validation set directories 
os.makedirs(train_images_dir, exist_ok=True)
os.makedirs(train_labels_dir, exist_ok=True)
os.makedirs(val_images_dir, exist_ok=True)
os.makedirs(val_labels_dir, exist_ok=True)
#get all image file names 
image_files = os.listdir(images_dir)
#randomly shuffle the file list 
random.shuffle(image_files)
#calculate the number of partitions 
total_images = len(image_files)
train_ratio = 0.9
num_train = int(total_images * train_ratio)
#divide image and label files into training and validation sets 
train_file_list = []
val_file_list = []
for i, image_file in enumerate(image_files):
label_file = image_file.replace(".jpg", ".txt")
if i < num_train:
  #divide into training sets 
  shutil.copy(os.path.join(images_dir, image_file), os.path.join(train_images_dir, image_file))
  shutil.copy(os.path.join(labels_dir, label_file), os.path.join(train_labels_dir, label_file))
  train_file_list.append(os.path.join("train", "images", image_file))
else:
  #divide into validation sets 
  shutil.copy(os.path.join(images_dir, image_file), os.path.join(val_images_dir, image_file))
  shutil.copy(os.path.join(labels_dir, label_file), os.path.join(val_labels_dir, label_file))
  val_file_list.append(os.path.join("val", "images", image_file))
#establish train.txt and val.txt file 
with open(os.path.join(data_dir, "train.txt"), "w") as train_txt_file:
train_txt_file.write("n".join(train_file_list))
with open(os.path.join(data_dir, "val.txt"), "w") as val_txt_file:
val_txt_file.write("n".join(val_file_list))
print(f"partition completed, training set includes{ num_train }samples, validation set contains{ total_images - num_train }samples.")

Examine.py

#inspect txt is the file content in the folder empty 
import os
folder_path = "/home/ws/CoodWorkRun/Database/smoDB_phoDB_glaDB/label"
empty_files = []
for file_name in os.listdir(folder_path):
if file_name.endswith(".txt"):
  file_path = os.path.join(folder_path, file_name)
  if os.path.getsize(file_path) == 0:
      empty_files.append(file_name)
      print("Empty file:", file_name)
num_empty_folders = len(empty_files)
print("Number of empty files:", num_empty_folders)

Label_Make. py

#label production for facial dataset 
from PIL import Image,ImageDraw
anno_box_path = r"/home/ws/CoodWorkRun/Database/ facial dataset /CelebA/Anno/list_bbox_celeba.txt"
label_dir = "/home/ws/CoodWorkRun/Database/ facial dataset /CelebA/label"
img_dir = "/home/ws/CoodWorkRun/Database/ facial dataset /CelebA/Img/img_celeba.7z/img_celeba"
count = 0
epoch = 1
box_file = open(anno_box_path,"r")
i = 0
for line in box_file:
if i < 2:
  i += 1
  continue
i += 1
print(line)
imgname = line[0:6]
#print(imgname)
img_strs = line.split()
x1, y1, w, h = int(img_strs[1]), int(img_strs[2]), int(img_strs[3]), int(img_strs[4])
x2, y2 = x1+w, y1+h
img = Image.open(f"{img_dir}/{img_strs[0]}")
img_w, img_h = img.size
# ****************************
dw = 1. / (int(img_w))
dh = 1. / (int(img_h))
x = ((x1 + x2) / 2.0 - 1)*dw
y = ((y1 + y2) / 2.0 - 1)*dh
w = (x2 - x1)*dw
h = (y2 - y1)*dh
# x = x * dw
# w = w * dw
# y = y * dh
# h = h * dh
# ****************************
label_txt = open(f"{label_dir}/{imgname}.txt", "w")
label_txt.write(f"0 {x} {y} {w} {h}n")
label_txt.flush()
label_txt.close()
if i == 1:
  exit()

Rename.py

#label file renaming 
import os
def rename_files(folder_path_images, folder_path_labels):
try:
  #get a list of files in two folders 
  image_files = sorted(os.listdir(folder_path_images))
  label_files = sorted(os.listdir(folder_path_labels))
  #ensure that the number of files in both folders is the same 
  if len(image_files) != len(label_files):
      print("Error: The number of files in the two folders does not match.")
      return
  #traverse the file list, corresponding to renaming files 
  for image_file, label_file in zip(image_files, label_files):
      image_old_path = os.path.join(folder_path_images, image_file)
      label_old_path = os.path.join(folder_path_labels, label_file)
      #get file name and extension 
      image_name, image_ext = os.path.splitext(image_file)
      #generate a new file name 
      new_name = f"phoneB_{image_name}"
      #build a new file path 
      image_new_path = os.path.join(folder_path_images, f"{new_name}{image_ext}")
      label_new_path = os.path.join(folder_path_labels, f"{new_name}.txt")
      #rename file 
      os.rename(image_old_path, image_new_path)
      os.rename(label_old_path, label_new_path)
  print("Files renamed successfully.")
except Exception as e:
  print(f"Error: {e}")
#usage examples 
folder_path_images = r'C:UsersUserDesktopcocotransfercell_phoneJPEGImages'
folder_path_labels = r'C:UsersUserDesktopcocotransfercell_phonelabel'
rename_files(folder_path_images, folder_path_labels)

XmlToTxt.py

# xml turn txt label file 
#
import xml.etree.ElementTree as ET
from os import listdir, getcwd
import glob
import cv2
#folder_path_images = r'C:UsersUserDesktopcocotransferbottleJPEGImages'
# folder_path_labels = r'C:UsersUserDesktopcocotransferbottleannotations'
classes = ["cell phone"]  # <name>person</name> fill in what is in the middle, separate multiple with commas 
def convert(size, box):
#add a judgment on the range of the bounding box to prevent 0 from being used as the dividend 
if size[0] == 0:
  dw = size[0]
else:
  dw = 1.0 / size[0]
if size[1] == 0:
  dw = size[1]
else:
  dh = 1.0 / size[1]
x = (box[0] + box[1]) / 2.0
y = (box[2] + box[3]) / 2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
# return (x, y, w, h)
return ('%.6f' % x, '%.6f' % y, '%.6f' % w, '%.6f' % h)
def convert_annotation(image_name, image_path):
print(f"Processing {image_name}")
# print(image_name[0:-3])
f = open(r'C:UsersUserDesktopcocotransfercell_phoneannotations/' + image_name[0:-3] + 'xml', encoding="utf8")  # xml file storage folder path 
out_file = open(r'C:UsersUserDesktopcocotransfercell_phone/label/' + image_name[0:-3] + 'txt', 'w')  #store converted txt file path, remember to create it first label folder 
xml_text = f.read()
root = ET.fromstring(xml_text)
f.close()
size = root.find('size')
#fill in the missing height and width of the image 
img = cv2.imread(image_path)
sz = img.shape
w = int(sz[1])
h = int(sz[0])
# w = int(size.find('width').text)
# h = int(size.find('height').text)
for obj in root.iter('object'):
  cls = obj.find('name').text
  if cls not in classes:
      # print(cls)
      continue
  cls_id = classes.index(cls)
  xmlbox = obj.find('bndbox')
  b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
       float(xmlbox.find('ymax').text))
  bb = convert((w, h), b)
  out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + 'n')
wd = getcwd()
if __name__== '__main__':
for image_path in glob.glob(r"C:UsersUserDesktopcocotransfercell_phone/JPEGImages/*.jpg"):  #the folder for storing pictures, what type of pictures do you own, and replace them yourself 
  image_name = image_path.split('\')[-1]
  # print(image_name)
  convert_annotation(image_name, image_path)
print('complete !')

YOLO Data Processing Tool Function Collection

ChangeTheLabel.py

Coco_Extract.py

Count.py

Delete.py

Divide.py

Examine.py

Label_Make. py

Rename.py

XmlToTxt.py

Related articles

Latest articles

Hot tags：