Skip to content

Commit 3992498

Browse files
committed
better compression of answers
1 parent c696b2f commit 3992498

File tree

1 file changed

+17
-10
lines changed

1 file changed

+17
-10
lines changed

compress/compress5.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
outfile = open('../encoded.h', 'w')
22

3+
def toBitmap(length, decider):
4+
def encodeByte(offset, decider):
5+
return sum( 1 << i for i in range(8) if offset+i < length and decider(offset+i) )
6+
7+
return bytes(encodeByte(x, decider) for x in range(0,length,8))
8+
39
def dumpBlob(name, blob):
410
n = len(blob)
511
outfile.write("const uint8_t %s[%u] = {\n" % (name, n))
@@ -41,15 +47,18 @@ def encodeList(ww):
4147

4248
return out
4349

44-
words = [[] for i in range(26)]
4550
allwords = []
4651

4752
with open("full.txt") as f:
4853
for w in f:
4954
w = w.strip()
5055
if len(w) == 5:
5156
allwords.append(w)
52-
words[ord(w[0])-ord('a')].append(w[1:])
57+
58+
allwords.sort()
59+
words = [[] for i in range(26)]
60+
for w in allwords:
61+
words[ord(w[0])-ord('a')].append(w[1:])
5362

5463
encoded = tuple(map(encodeList, words))
5564
offsets = []
@@ -63,17 +72,18 @@ def encodeList(ww):
6372

6473
special = b''
6574
prev = 0
66-
deltas = []
75+
answers = set()
6776
with open("answers.txt") as f:
6877
for w in f:
6978
w = w.strip()
7079
if len(w) == 5:
7180
i = allwords.index(w)
72-
deltas.append(i - prev)
73-
prev = i
81+
answers.add(i)
82+
83+
answerBlob = toBitmap(len(allwords), lambda x : x in answers)
7484

7585
dumpBlob("wordBlob", wordBlob)
76-
dumpBlob("specialDeltas", deltas)
86+
dumpBlob("answers", answerBlob)
7787

7888
outfile.write("""typedef struct {
7989
uint16_t wordNumber;
@@ -91,11 +101,8 @@ def encodeList(ww):
91101

92102
with open("../sizes.h", "w") as sizes:
93103
sizes.write("#define NUM_WORDS %u\n" % len(allwords))
94-
sizes.write("#define NUM_ANSWERS %u\n" % len(deltas))
104+
sizes.write("#define NUM_ANSWERS %u\n" % len(answers))
95105

96106
#print(sum(map(len, encoded)))
97107
#print(max(map(len, encoded)))
98108

99-
#print(max(deltas))
100-
assert(max(deltas)<256)
101-
#print(len(deltas))

0 commit comments

Comments
 (0)