Python multi process processing of CSV files (read/interpret/write)

Multithreading.

import multiprocessing,os,csv,re
def read_all_csv_file(csv_file_root_path):
file_list = os.listdir(csv_file_root_path)

return file_list
def write_csv(new_file_name,Dictfiles):
print("new",new_file_name,"csv file")
with open(new_file_name,'w',newline='') as f:
  print("start writing data >>>>>>>>>>>>>>>>>>>>")
  print("write header fields >>>>>>>>>>>>>>>>>>>>")
  headers = ['ShipName', 'IMO', 'ShipTypeEN', 'Speed', 'Lon', 'Lat', 'Dest', 'ETA', 'UnixTime', 'Lon_d', 'Lat_d']
  writer = csv.DictWriter(f,fieldnames=headers)
  writer.writeheader()
  print("finished writing header fields----------------")
  print("start writing table body data >>>>>>>>>>>>>>>>>>>>")
  
  writecount = 0
  sumcount = len(Dictfiles)
  print("total table body data", sumcount, "strip")
  
  for Dictfile in Dictfiles:
      writecount +=1
      print("currently writing the number", writecount,"/",sumcount, "strip")
      # print(">>>>>This is Dictfile>>>>>", Dictfile)
      writer.writerow(Dictfile)
def OrderDictRow_To_Dict(item):
directItem = {}
directItem["ShipName"]=item["ShipName"]
directItem["IMO"]=item["IMO"]
directItem["ShipTypeEN"]=item["ShipTypeEN"]
directItem["Speed"]=item["Speed"]
directItem["Lon"]=item["Lon"]
directItem["Lat"]=item["Lat"]
directItem["Dest"]=item["Dest"]
directItem["UnixTime"] = item["UnixTime"]
directItem["ETA"]=item["ETA"]
directItem["Lon_d"]=item["Lon_d"]
directItem["Lat_d"]=item["Lat_d"]
return directItem
def range_dect(lat,lon):
lat_N: float = 40.933
lon_E: float = 122.133
lat_S: float = 37.117
lon_W: float = 117.55

if lat<lat_N and lat>lat_S:
  if lon<lon_E and lon>lon_W:
      return True
  else:
      return False
else:
  return False
def GeoCoord_convert_du2decimal(location):
location = str(location)
location_list = re.split('[° ′]',location)
du = float(location_list[0])

if len(location_list)>2:
  fen =float(location_list[1])
  fen = fen/60
else:
  fen = 0
GeoCoord_decimal: float = du+fen

return GeoCoord_decimal
def Read_Judge_Write_CSV(csv_file_path,target_filepath):
print(">>>>> the current child process number is :",os.getpid())
print(">>>>> the current parent process number is :",os.getppid())
print("Starting function>>>>>Read_Judge_Write_CSV")

print("READING CSV FILE NOW:n"",csv_file_path)  
result_CSV_DictRow = []
with open(csv_file_path, 'r', encoding='gbk', errors='ignore') as f:
 
  csv_Dictreader = csv.DictReader(x.replace('', '') for x in f)
  next(csv_Dictreader)
  Not_In_BoHai_count = 0
  In_BoHai_count = 0
  
  print(">>>>>>>>>>Judging whether it is in BoHai area>>>>>>>>>>")
  for OrderDictrow in csv_Dictreader:
      lat = GeoCoord_convert_du2decimal(OrderDictrow["Lat"])
      lon = GeoCoord_convert_du2decimal(OrderDictrow["Lon"])
      
      Is_in_BoHai = range_dect(lat, lon)
      if Is_in_BoHai:
          In_BoHai_count = In_BoHai_count + 1
          
          Dict_Row = OrderDictRow_To_Dict(OrderDictrow)
          
          result_CSV_DictRow.append(Dict_Row)
      else:
          Not_In_BoHai_count = Not_In_BoHai_count + 1
  print("--------------------------------Judgement finished--------------------------------")
  print(">>>>>>>>>>>>>>>> current CSV the data within the bohai sea range in the form is as follows（ DictType)>>>>>>>>>>>>>>>>n", result_CSV_DictRow)
  print(">>>>>>>>>>>>>>>> now start writing to the specified folder",target_filepath)
  write_csv(target_filepath, result_CSV_DictRow)
  print("t total", Not_In_BoHai_count, "the data is located outside the bohai sea t", In_BoHai_count, "the data is within the bohai sea range", "n")
def MP(functionxx,c1,c2):
# ProcessingOBJ = multiprocessing.Process(target="task name",name="process name"the system will generally set up Process00 Processing01 Processing 02 and so on, group="process group"generally, it is None)
ProcessingOBJxx = multiprocessing.Process(target=functionxx,args=(c1,c2,))
ProcessingOBJxx.start()
if __name__== '__main__':

source_CSV_root_path = "E:\201907-2\"
target_CSV_root_Path = "D:\CSV_stage01\"
CSV_fileNAME_list = read_all_csv_file(source_CSV_root_path)
CSV_file_count = len(CSV_fileNAME_list)
print("**********table of contents", source_CSV_root_path, "lower co ownership", CSV_file_count, "individual CSV files**********")
#counter for the number of files that have been read 
count = 0
for CSV_fileNAME in CSV_fileNAME_list:
  MP(Read_Judge_Write_CSV,source_CSV_root_path + CSV_fileNAME,target_CSV_root_Path + "new_CSV_" + CSV_fileNAME)
  
  count = count + 1
  print("---------------opened", count, "/", CSV_file_count, "processes used for read and write processing CSV file n")

Multi process.

import threading
 thread object = threading.Thread(target= task name, args= (task parameter one, task parameter two,) 
 thread object .setDaemon(True) #set as guardian main thread 
 thread object .start()

Multi process example code (same applies to multi-threaded code).

import multiprocessing
 process object = multiprocessing.Process(target= task, args= (task parameter one, task parameter two,) 
 process object .start()

Python multi process processing of CSV files (read/interpret/write)

Related articles

Latest articles

Hot tags：