import json import os import shutil from math import ceil from typing import List, Optional, Union #from ai_platform.common.config import settings # from ai_platform.model.crud import image_label_crud as ilc, project_list_crud as plc, \ # image_dataset_curd as idc #from ai_platform.common.logger import logger # from ai_platform.model.database import session from app.core.common_utils import logger from app.json_util import write_info # root_path = settings.root_path # root_path = '/home/wd/server/ai_platform/data_set/' #db = session def delete_file(files: List[str]): """ 删除文件 :param files: :return: """ for file in files: if os.path.exists(file): os.remove(file) def get_file_then_delete_file(path: str): """ 删除指定路径下的所有文件 :param path: :return: """ (filedir, filename) = os.path.split(path) if os.path.exists(filedir): del_files = [] for (dirpath, dirnames, filenames) in os.walk(filedir): for filename in filenames: del_files.append(os.path.join(dirpath, filename)) # del_files = os.listdir(filedir) delete_file(files=del_files) return filedir def delete_dir_file(files: List[str], json_files: List[str]): """ 若训练集、测试机、验证集的存放文件夹不为空, 删除文件夹下所有文件 :param json_files: :param files: :return: """ logger.info('删除图片数据') train_target_path = files[0].replace('ori/images', 'trained/images/train') train_filedir = get_file_then_delete_file(train_target_path) val_target_path = files[0].replace('ori/images', 'trained/images/val') val_filedir = get_file_then_delete_file(val_target_path) test_target_path = files[0].replace('ori/images', 'trained/images/test') test_filedir = get_file_then_delete_file(test_target_path) if len(json_files) == 0: logger.info('无json数据') else: logger.info('删除json数据') train_target_path = json_files[0].replace('ori/labels', 'trained/labels/train') get_file_then_delete_file(train_target_path) val_target_path = json_files[0].replace('ori/labels', 'trained/labels/val') get_file_then_delete_file(val_target_path) val_target_path = json_files[0].replace('ori/labels', 'trained/labels/test') get_file_then_delete_file(val_target_path) return [train_filedir + '/', val_filedir + '/', test_filedir + '/'] def mv_file(train_files: List[str], test_files: List[str], r_v_rate: Optional[float] = 0.9, t_t_rate: Optional[float] = 0.9): """ 移动图片标签到指定位置 :param train_files:测试集 :param test_files:验证集 :param r_v_rate:训练集内部比例 :param t_t_rate:训练-验证比例 :return: """ train_img_files = [i for i in train_files if not i.endswith('.json')] train_json_files = [i for i in train_files if i.endswith('.json')] test_img_files = [i for i in test_files if not i.endswith('.json')] test_json_files = [i for i in test_files if i.endswith('.json')] # 训练集、验证集、测试集 #logger.info('训练集、验证集、测试集开始划分') train_len_all = len(train_img_files) if t_t_rate is not None: test_len_all = len(test_img_files) len_all = train_len_all + test_len_all t_t_rate_c = test_len_all / len_all if t_t_rate_c > t_t_rate: train_len_all = ceil(len_all * t_t_rate) test_files.extend(train_img_files[train_len_all:]) train_len = ceil(train_len_all * r_v_rate) # t_files: 训练集, val_files:验证集 t_files = train_img_files[0:train_len] val_files = train_img_files[train_len:train_len_all] # 判断目标文件夹是否存在, 存在则删除目录下文件 #logger.info('判断目标文件夹是否存在, 存在则删除目录下文件') target_path = delete_dir_file(files=train_img_files, json_files=train_json_files) # 放到指定文件夹 #logger.info('放到指定文件夹') # t_files:训练集开始移动 for file in t_files: if os.path.exists(file): file_path = file.replace('ori/images', 'trained/images/train') # /3148803620347904/ori/images/4.jpg (filedir, filename) = os.path.split(file_path) if not os.path.exists(filedir): os.makedirs(filedir) shutil.copyfile(file, file_path) # json 放到指定文件夹下 json_file = os.path.splitext(file)[0].replace('images', 'labels') + '.json' if json_file in train_json_files: file_path = json_file.replace('ori/labels', 'trained/labels/train') # /3148803620347904/ori/labels/4.jpg.json (filedir, filename) = os.path.split(file_path) if not os.path.exists(filedir): os.makedirs(filedir) shutil.copyfile(json_file, file_path) # 测试集开始 for file in val_files: if os.path.exists(file): file_path = file.replace('ori/images', 'trained/images/val') (filedir, filename) = os.path.split(file_path) if not os.path.exists(filedir): os.makedirs(filedir) shutil.copyfile(file, file_path) # json 放到指定文件夹下 json_file = os.path.splitext(file)[0].replace('images', 'labels') + '.json' if json_file in train_json_files: file_path = json_file.replace('ori/labels', 'trained/labels/val') (filedir, filename) = os.path.split(file_path) if not os.path.exists(filedir): os.makedirs(filedir) shutil.copyfile(json_file, file_path) for file in test_img_files: if os.path.exists(file): file_path = file.replace('ori/images', 'trained/images/test') # /3148803620347904/ori/images/4.jpg (filedir, filename) = os.path.split(file_path) if not os.path.exists(filedir): os.makedirs(filedir) shutil.copyfile(file, file_path) # json 放到指定文件夹下 json_file = os.path.splitext(file)[0].replace('images', 'labels') + '.json' if json_file in test_json_files: file_path = json_file.replace('ori/labels', 'trained/labels/test') # /3148803620347904/ori/labels/4.jpg.json (filedir, filename) = os.path.split(file_path) if not os.path.exists(filedir): os.makedirs(filedir) shutil.copyfile(json_file, file_path) return target_path def get_file(ori_path: str, type_list: Union[object,str]): # imgs = idc.get_image_all_proj_no(proj_no=proj_no, db=db) imgs = os.listdir(ori_path + '/images') train_files = [] test_files = [] # 训练、测试比例强制9:1 for img in imgs[0:1]: path = ori_path + 'images/' +img # print(os.path.exists(path)) if os.path.exists(path): test_files.append(path) #label = ori_path + 'labels/' + os.path.split(path)[1] (filename1, extension) = os.path.splitext(img) # 文件名与后缀名分开 label = ori_path + 'labels/' + filename1 + '.json' if label is not None: #train_files.append(label) test_files.append(label) for img in imgs[1:]: path = ori_path + 'images/' +img if os.path.exists(path): train_files.append(path) (filename2, extension) = os.path.splitext(img) # 文件名与后缀名分开 label = ori_path + 'labels/' + filename2 + '.json' if label is not None: train_files.append(label) if len(train_files) == 0 or len(test_files) == 0: print(len(train_files)) print(len(test_files)) return False # proj = plc.get_proj_by_proj_no(proj_no=proj_no, db=db) target_path = mv_file(train_files=train_files, test_files=test_files) # 生成标签 # img_types = ilc.get_label_by_proj_no(proj_no=proj_no, db=db) # type_list = [] # for img_type in img_types: # type_list.append(img_type.lebel_type) type_dict = {'classes': type_list} str_json = json.dumps(type_dict) path = os.path.dirname(ori_path) + '/img_label_type' # path = root_path + proj_no + '/img_label_type' write_info(file_name=path, file_info=json.loads(str_json)) target_path.append(path + '.json') return target_path # def get_file_path(proj_no: str): # """ # 识别算法,给算法传递图片路径 # :param proj_no: # :return: # """ # path = root_path + '/' + proj_no # img_path = path # # 创建他们所需的文件夹 # vgg_path = path + '/vgg' # if not os.path.exists(vgg_path): # # vgg不存在,创建 # train_path = vgg_path + '/train' # test_path = vgg_path + '/test' # os.makedirs(train_path) # os.makedirs(test_path) # # 生成标签 # img_types = ilc.get_label_by_proj_no(proj_no=proj_no, db=db) # type_list = [] # for img_type in img_types: # type_list.append(img_type.lebel_type) # type_dict = {'classes': type_list} # str_json = json.dumps(type_dict) # path = root_path + proj_no + '/img_label_type' # write_info(file_name=path, file_info=json.loads(str_json)) # return img_path, path + '.json' if __name__ == '__main__': # s = os.path.exists('D:/pythonProject/DeepLearnAiPlatform/data_set/868503011860480/ori/images/1.png') # print(s) # file = 'D:/pythonProject/DeepLearnAiPlatform/data_set/3148803620347904/ori/labels/36.json' # file_path = 'D:/pythonProject/DeepLearnAiPlatform/data_set/3148803620347904/trained/labels/36.json' s = get_file(proj_no='3148803620347904') # shutil.copyfile(file, file_path) print(s)