ChangeTheLabel.py
#change the number corresponding to the label file
#
import os
folder_path = r'C:UsersUserDesktopcocotransfercell_phonelabel'
#traverse each folder txt file
for filename in os.listdir(folder_path):
if filename.endswith('.txt'):
file_path = os.path.join(folder_path, filename)
#read txt file content
with open(file_path, 'r') as file:
lines = file.readlines()
#change the first digit of each line to 2
modified_lines = []
for line in lines:
split_line = line.strip().split(' ')
split_line[0] = '1'
modified_line = ' '.join(split_line) + 'n'
modified_lines.append(modified_line)
#write the modified content into a file
with open(file_path, 'w') as file:
file.writelines(modified_lines)
print("modification completed !")
Coco_Extract.py
#extract corresponding category datasets
#
from pycocotools.coco import COCO
import os
import shutil
from tqdm import tqdm
import skimage.io as io
import matplotlib.pyplot as plt
import cv2
from PIL import Image, ImageDraw
#reference link
# https://blog.csdn.net/Accelerating/article/details/126855883
# https://blog.csdn.net/Dongjiuqing/article/details/127949190
pathset = 'wine glass'
'''
traffic'person', 'bicycle', 'car', 'motorcycle', 'bus', 'train', 'truck'
'people', 'bicycle', 'automobile', 'motorcycle', 'public buses', 'train', 'truck'
257249 7056 43532 8654 6061 4570 9970
goods'bottle', 'wine glass', 'cup', 'bowl', 'fork', 'knife', 'spoon',
'bottle', 'wine glass', 'glass', 'bowl', 'fork', 'knife', 'spoon',
24070 7839 20574 14323
mobile phone'cell phone'
'''
#path to be set
savepath = r"C:/Users/User/Desktop/coco/transfer/"+pathset+'/'
# print(savepath)
img_dir = savepath + 'images/'
anno_dir = savepath + 'annotations/'
datasets_list = ['train2017', 'val2017']
# datasets_list = ['val2017']
# coco there are 80 categories, write the name of the class to be extracted here, in order to person take as an example
classes_names = [pathset]
#original containing all categories coco dataset path
'''
the directory format is as follows :
$COCO_PATH
----|annotations
----|train2017
----|val2017
----|test2017
'''
dataDir = 'C:/Users/User/Desktop/coco/'
headstr = """
<annotation>
<folder>VOC</folder>
<filename>%s</filename>
<source>
<database>My Database</database>
<annotation>COCO</annotation>
<image>flickr</image>
<flickrid>NULL</flickrid>
</source>
<owner>
<flickrid>NULL</flickrid>
<name>company</name>
</owner>
<size>
<width>%d</width>
<height>%d</height>
<depth>%d</depth>
</size>
<segmented>0</segmented>
"""
objstr = """
<object>
<name>%s</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>%d</xmin>
<ymin>%d</ymin>
<xmax>%d</xmax>
<ymax>%d</ymax>
</bndbox>
</object>
"""
tailstr = '''
</annotation>
'''
#check if the directory exists. if it exists, delete it first before creating it. otherwise, create it directly
def mkr(path):
if not os.path.exists(path):
os.makedirs(path) #can create multi-level directories
def id2name(coco):
classes = dict()
for cls in coco.dataset['categories']:
classes[cls['id']] = cls['name']
return classes
def write_xml(anno_path, head, objs, tail):
f = open(anno_path, "w")
f.write(head)
for obj in objs:
f.write(objstr % (obj[0], obj[1], obj[2], obj[3], obj[4]))
f.write(tail)
def save_annotations_and_imgs(coco, dataset, filename, objs):
#convert image to xml example :COCO_train2017_000000196610.jpg-->COCO_train2017_000000196610.xml
dst_anno_dir = os.path.join(anno_dir, dataset)
mkr(dst_anno_dir)
anno_path = dst_anno_dir + '/' + filename[:-3] + 'xml'
img_path = dataDir + dataset + '/' + filename
# print("img_path: ", img_path)
dst_img_dir = os.path.join(img_dir, dataset)
mkr(dst_img_dir)
dst_imgpath = dst_img_dir + '/' + filename
# print("dst_imgpath: ", dst_imgpath)
img = cv2.imread(img_path)
# if (img.shape[2] == 1):
# print(filename + " not a RGB image")
# return
shutil.copy(img_path, dst_imgpath)
head = headstr % (filename, img.shape[1], img.shape[0], img.shape[2])
tail = tailstr
write_xml(anno_path, head, objs, tail)
def showimg(coco, dataset, img, classes, cls_id, show=True):
global dataDir
I = Image.open('%s/%s/%s' % (dataDir, dataset, img['file_name']))
#adopt id obtain annotation information
annIds = coco.getAnnIds(imgIds=img['id'], catIds=cls_id, iscrowd=None)
# print(annIds)
anns = coco.loadAnns(annIds)
# print(anns)
# coco.showAnns(anns)
objs = []
for ann in anns:
class_name = classes[ann['category_id']]
if class_name in classes_names:
# print(class_name)
if 'bbox' in ann:
bbox = ann['bbox']
xmin = int(bbox[0])
ymin = int(bbox[1])
xmax = int(bbox[2] + bbox[0])
ymax = int(bbox[3] + bbox[1])
obj = [class_name, xmin, ymin, xmax, ymax]
objs.append(obj)
draw = ImageDraw.Draw(I)
draw.rectangle([xmin, ymin, xmax, ymax])
if show:
plt.figure()
plt.axis('off')
plt.imshow(I)
plt.show()
return objs
for dataset in datasets_list:
# ./COCO/annotations/instances_train2017.json
annFile = '{}/annotations/instances_{}.json'.format(dataDir, dataset)
#apply COCO API used to initialize annotation data
coco = COCO(annFile)
#obtain COCO all categories in the dataset
classes = id2name(coco)
# print(classes)
# [1, 2, 3, 4, 6, 8]
classes_ids = coco.getCatIds(catNms=classes_names)
# print(classes_ids)
for cls in classes_names:
#get the information for this class id
cls_id = coco.getCatIds(catNms=[cls])
img_ids = coco.getImgIds(catIds=cls_id)
# print(cls, len(img_ids))
# imgIds=img_ids[0:10]
for imgId in tqdm(img_ids):
img = coco.loadImgs(imgId)[0]
filename = img['file_name']
# print(filename)
objs = showimg(coco, dataset, img, classes, classes_ids, show=False)
# print(objs)
save_annotations_and_imgs(coco, dataset, filename, objs)
Count.py
#folder file count
#
import os
def count_files_in_folder(folder_path):
try:
#get all files and subfolders in the folder
items = os.listdir(folder_path)
#initialize counter
file_count = 0
#traverse all items in a folder
for item in items:
item_path = os.path.join(folder_path, item)
#determine if it is a file
if os.path.isfile(item_path):
file_count += 1
#if it is a sub folder, recursively call count_files_in_folder
elif os.path.isdir(item_path):
file_count += count_files_in_folder(item_path)
return file_count
except Exception as e:
print(f"Error counting files: {e}")
return None
#test example
folder_path = "/home/ws/CoodWorkRun/Database/smoDB_phoDB_glaDB_faceDB/JPEGImages"
result = count_files_in_folder(folder_path)
if result is not None:
print(f"Number of files in {folder_path}: {result}")
Delete.py
#detect if two folder files match or do not match delete
# delete
import os
image_folder = r'C:UsersUserDesktopcocotransferbottleJPEGImages'
label_folder = r'C:UsersUserDesktopcocotransferbottleannotations'
#get all file names in the image folder and label folder
#get all file names in the image folder and label folder (excluding suffixes)
image_files = set(os.path.splitext(filename)[0] for filename in os.listdir(image_folder))
label_files = set(os.path.splitext(filename)[0] for filename in os.listdir(label_folder))
print(image_files)
print(len(image_files))
print(label_files)
print(len(label_files))
#identify the files that need to be deleted
files_to_delete = image_files.symmetric_difference(label_files)
print('remove folders :')
print(files_to_delete)
print('number of deletions :')
print(len(files_to_delete))
# #delete mismatched files
# for filename in files_to_delete:
# # print(filename)
# if filename in image_files:
# os.remove(os.path.join(image_folder, filename+'.jpg'))
# if filename in label_files:
# os.remove(os.path.join(label_folder, filename+'.txt'))
Divide.py
#dataset partitioning
#
import os
import random
import shutil
#original data storage path
data_dir = r"D:DatabaseDatabase metro smoDB_phoDB_glaDB_faceDB_v2/"
images_dir = os.path.join(data_dir, "JPEGImages")
labels_dir = os.path.join(data_dir, "label")
#partitioned training and validation set paths
train_dir = "D:DatabaseDatabase metro smoDB_phoDB_glaDB_faceDB_v2/train"
train_images_dir = os.path.join(train_dir, "images")
train_labels_dir = os.path.join(train_dir, "labels")
val_dir = "D:DatabaseDatabase metro smoDB_phoDB_glaDB_faceDB_v2/val"
val_images_dir = os.path.join(val_dir, "images")
val_labels_dir = os.path.join(val_dir, "labels")
#create training and validation set directories
os.makedirs(train_images_dir, exist_ok=True)
os.makedirs(train_labels_dir, exist_ok=True)
os.makedirs(val_images_dir, exist_ok=True)
os.makedirs(val_labels_dir, exist_ok=True)
#get all image file names
image_files = os.listdir(images_dir)
#randomly shuffle the file list
random.shuffle(image_files)
#calculate the number of partitions
total_images = len(image_files)
train_ratio = 0.9
num_train = int(total_images * train_ratio)
#divide image and label files into training and validation sets
train_file_list = []
val_file_list = []
for i, image_file in enumerate(image_files):
label_file = image_file.replace(".jpg", ".txt")
if i < num_train:
#divide into training sets
shutil.copy(os.path.join(images_dir, image_file), os.path.join(train_images_dir, image_file))
shutil.copy(os.path.join(labels_dir, label_file), os.path.join(train_labels_dir, label_file))
train_file_list.append(os.path.join("train", "images", image_file))
else:
#divide into validation sets
shutil.copy(os.path.join(images_dir, image_file), os.path.join(val_images_dir, image_file))
shutil.copy(os.path.join(labels_dir, label_file), os.path.join(val_labels_dir, label_file))
val_file_list.append(os.path.join("val", "images", image_file))
#establish train.txt and val.txt file
with open(os.path.join(data_dir, "train.txt"), "w") as train_txt_file:
train_txt_file.write("n".join(train_file_list))
with open(os.path.join(data_dir, "val.txt"), "w") as val_txt_file:
val_txt_file.write("n".join(val_file_list))
print(f"partition completed, training set includes{ num_train }samples, validation set contains{ total_images - num_train }samples.")
Examine.py
#inspect txt is the file content in the folder empty
import os
folder_path = "/home/ws/CoodWorkRun/Database/smoDB_phoDB_glaDB/label"
empty_files = []
for file_name in os.listdir(folder_path):
if file_name.endswith(".txt"):
file_path = os.path.join(folder_path, file_name)
if os.path.getsize(file_path) == 0:
empty_files.append(file_name)
print("Empty file:", file_name)
num_empty_folders = len(empty_files)
print("Number of empty files:", num_empty_folders)
Label_Make. py
#label production for facial dataset
from PIL import Image,ImageDraw
anno_box_path = r"/home/ws/CoodWorkRun/Database/ facial dataset /CelebA/Anno/list_bbox_celeba.txt"
label_dir = "/home/ws/CoodWorkRun/Database/ facial dataset /CelebA/label"
img_dir = "/home/ws/CoodWorkRun/Database/ facial dataset /CelebA/Img/img_celeba.7z/img_celeba"
count = 0
epoch = 1
box_file = open(anno_box_path,"r")
i = 0
for line in box_file:
if i < 2:
i += 1
continue
i += 1
print(line)
imgname = line[0:6]
#print(imgname)
img_strs = line.split()
x1, y1, w, h = int(img_strs[1]), int(img_strs[2]), int(img_strs[3]), int(img_strs[4])
x2, y2 = x1+w, y1+h
img = Image.open(f"{img_dir}/{img_strs[0]}")
img_w, img_h = img.size
# ****************************
dw = 1. / (int(img_w))
dh = 1. / (int(img_h))
x = ((x1 + x2) / 2.0 - 1)*dw
y = ((y1 + y2) / 2.0 - 1)*dh
w = (x2 - x1)*dw
h = (y2 - y1)*dh
# x = x * dw
# w = w * dw
# y = y * dh
# h = h * dh
# ****************************
label_txt = open(f"{label_dir}/{imgname}.txt", "w")
label_txt.write(f"0 {x} {y} {w} {h}n")
label_txt.flush()
label_txt.close()
if i == 1:
exit()
Rename.py
#label file renaming
import os
def rename_files(folder_path_images, folder_path_labels):
try:
#get a list of files in two folders
image_files = sorted(os.listdir(folder_path_images))
label_files = sorted(os.listdir(folder_path_labels))
#ensure that the number of files in both folders is the same
if len(image_files) != len(label_files):
print("Error: The number of files in the two folders does not match.")
return
#traverse the file list, corresponding to renaming files
for image_file, label_file in zip(image_files, label_files):
image_old_path = os.path.join(folder_path_images, image_file)
label_old_path = os.path.join(folder_path_labels, label_file)
#get file name and extension
image_name, image_ext = os.path.splitext(image_file)
#generate a new file name
new_name = f"phoneB_{image_name}"
#build a new file path
image_new_path = os.path.join(folder_path_images, f"{new_name}{image_ext}")
label_new_path = os.path.join(folder_path_labels, f"{new_name}.txt")
#rename file
os.rename(image_old_path, image_new_path)
os.rename(label_old_path, label_new_path)
print("Files renamed successfully.")
except Exception as e:
print(f"Error: {e}")
#usage examples
folder_path_images = r'C:UsersUserDesktopcocotransfercell_phoneJPEGImages'
folder_path_labels = r'C:UsersUserDesktopcocotransfercell_phonelabel'
rename_files(folder_path_images, folder_path_labels)
XmlToTxt.py
# xml turn txt label file
#
import xml.etree.ElementTree as ET
from os import listdir, getcwd
import glob
import cv2
#folder_path_images = r'C:UsersUserDesktopcocotransferbottleJPEGImages'
# folder_path_labels = r'C:UsersUserDesktopcocotransferbottleannotations'
classes = ["cell phone"] # <name>person</name> fill in what is in the middle, separate multiple with commas
def convert(size, box):
#add a judgment on the range of the bounding box to prevent 0 from being used as the dividend
if size[0] == 0:
dw = size[0]
else:
dw = 1.0 / size[0]
if size[1] == 0:
dw = size[1]
else:
dh = 1.0 / size[1]
x = (box[0] + box[1]) / 2.0
y = (box[2] + box[3]) / 2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
# return (x, y, w, h)
return ('%.6f' % x, '%.6f' % y, '%.6f' % w, '%.6f' % h)
def convert_annotation(image_name, image_path):
print(f"Processing {image_name}")
# print(image_name[0:-3])
f = open(r'C:UsersUserDesktopcocotransfercell_phoneannotations/' + image_name[0:-3] + 'xml', encoding="utf8") # xml file storage folder path
out_file = open(r'C:UsersUserDesktopcocotransfercell_phone/label/' + image_name[0:-3] + 'txt', 'w') #store converted txt file path, remember to create it first label folder
xml_text = f.read()
root = ET.fromstring(xml_text)
f.close()
size = root.find('size')
#fill in the missing height and width of the image
img = cv2.imread(image_path)
sz = img.shape
w = int(sz[1])
h = int(sz[0])
# w = int(size.find('width').text)
# h = int(size.find('height').text)
for obj in root.iter('object'):
cls = obj.find('name').text
if cls not in classes:
# print(cls)
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
bb = convert((w, h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + 'n')
wd = getcwd()
if __name__== '__main__':
for image_path in glob.glob(r"C:UsersUserDesktopcocotransfercell_phone/JPEGImages/*.jpg"): #the folder for storing pictures, what type of pictures do you own, and replace them yourself
image_name = image_path.split('\')[-1]
# print(image_name)
convert_annotation(image_name, image_path)
print('complete !')