-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdicMerge.py
More file actions
63 lines (53 loc) · 1.87 KB
/
dicMerge.py
File metadata and controls
63 lines (53 loc) · 1.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#encoding:utf-8
import os, sys, time
start_time = time.time()
dicDirectory = sys.argv[1]
outPutFile = sys.argv[2]
outPutPath = dicDirectory + outPutFile
dicArray = []
dicName = []
merge = []
'''get all dictionary in specify directory'''
for file in os.listdir(dicDirectory):
if file.endswith(".txt"):
fileDir = dicDirectory + file
dicArray.append(fileDir)
dicName.append(file)
countDic = int(0)
print("\nProcessing:")
'''compare to entire dictionary'''
for eachDic in dicArray:
print(eachDic)
if int(0) == countDic:
with open(eachDic, "r") as ins:
for line in ins:
merge.append(line.rstrip())
else:
new_word_array = []
duplicates = []
new_list = []
''' put all new word into array for comparing duplicates '''
with open(eachDic, "r") as ins:
for line in ins:
new_word_array.append(line.rstrip())
''' check if duplicated then put it into the duplicateList '''
for new_word in new_word_array:
''' for filter anything '''
if '..' in new_word:
duplicates.append(new_word)
for old_word in merge:
if new_word == old_word:
duplicates.append(new_word)
''' remove duplicates '''
print("Number of duplicates: ",len(duplicates),"\n")
new_list = list(set(new_word_array) - set(duplicates))
''' add to merge '''
for new_words in new_list:
merge.append(new_words)
countDic += 1
''' write to result '''
result = open(outPutPath, 'w+')
result.write("\n".join(merge))
result.close()
''' information logging '''
print("\n-->",countDic," dictionary:\n "," ".join(dicName),"\n\n---> joined into: ",outPutFile,"\n---> Dictionary Size: ",len(merge)," words\n---> %s seconds" % (time.time() - start_time),"\n")