Skip to content

Commit 18c89c7

Browse files
v1.1
1 parent 2d748a7 commit 18c89c7

File tree

1 file changed

+119
-0
lines changed

1 file changed

+119
-0
lines changed
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
"""
2+
The Cerebrum library and engine
3+
Copyright (c) 2025, by David Carteau. All rights reserved.
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.
22+
"""
23+
24+
##############################################################################
25+
## NAME: shuffle.py ##
26+
## AUTHOR: David Carteau, France, February 2025 ##
27+
## LICENSE: MIT (see above and "license.txt" file content) ##
28+
##############################################################################
29+
30+
##############################################################################
31+
## PURPOSE: ##
32+
## Shuffle positions.txt file (result stored in positions-shuffled.txt) ##
33+
##############################################################################
34+
35+
import os
36+
import shutil
37+
import random
38+
39+
from tqdm import tqdm
40+
41+
# number of temporary files (adjust if needed)
42+
N_CHUNCKS = 256
43+
44+
# name of the temporary folder (adjust if needed)
45+
TMP_FOLDER = os.path.abspath("./tmp")
46+
47+
# random seed (adjust if needed)
48+
# 21.05.2014 = date of Orion's first public release :-)
49+
SEED = 21052014
50+
51+
52+
def main():
53+
random.seed(SEED)
54+
55+
source = './positions.txt'
56+
target = './positions-shuffled.txt'
57+
58+
# temporary folder creation
59+
60+
if os.path.exists(TMP_FOLDER):
61+
shutil.rmtree(TMP_FOLDER)
62+
#end if
63+
64+
os.mkdir(TMP_FOLDER)
65+
66+
# 1st step : read source file and spread its content over temporary files
67+
68+
print("Reading dataset...")
69+
70+
chuncks = []
71+
72+
for i in range(N_CHUNCKS):
73+
chunck = open(f'{TMP_FOLDER}/chunck-{i}.txt', 'wt')
74+
chuncks.append(chunck)
75+
#end for
76+
77+
with open(source, 'rt') as file:
78+
for line in tqdm(file):
79+
i = random.randrange(N_CHUNCKS)
80+
chuncks[i].write(line)
81+
#end for
82+
#end with
83+
84+
for i in range(N_CHUNCKS):
85+
chuncks[i].close()
86+
#end for
87+
88+
print()
89+
90+
# 2nd step : read each chunk, shuffle its content, and write to target file
91+
92+
print("Shuffling samples...")
93+
94+
with open(target, 'wt') as o_file:
95+
for i in tqdm(range(N_CHUNCKS)):
96+
with open(f'{TMP_FOLDER}/chunck-{i}.txt', 'rt') as i_file:
97+
lines = i_file.readlines()
98+
#end with
99+
100+
random.shuffle(lines)
101+
102+
for line in lines:
103+
o_file.write(line)
104+
#end for
105+
#end for
106+
#end with
107+
108+
print()
109+
110+
# temporary folder deletion
111+
112+
shutil.rmtree(TMP_FOLDER)
113+
114+
print("Done !")
115+
#end def
116+
117+
if __name__ == "__main__":
118+
main()
119+
#end if

0 commit comments

Comments
 (0)