-
-
Notifications
You must be signed in to change notification settings - Fork 185
Expand file tree
/
Copy pathPackedBitVector.py
More file actions
143 lines (117 loc) · 4.02 KB
/
PackedBitVector.py
File metadata and controls
143 lines (117 loc) · 4.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
from typing import TYPE_CHECKING, Any, List, Optional, Tuple
if TYPE_CHECKING:
from ..classes.generated import PackedBitVector
def reshape(data: list, shape: Optional[Tuple[int, ...]] = None) -> List[Any]:
if shape is None:
return data
if len(shape) == 1:
m = shape[0]
return [data[i : i + m] for i in range(0, len(data), m)]
elif len(shape) == 2:
m, n = shape
return [[data[i + j : i + j + n] for j in range(0, m * n, n)] for i in range(0, len(data), m * n)]
else:
raise ValueError("Invalid shape")
def unpack_ints(
packed: "PackedBitVector",
start: int = 0,
count: Optional[int] = None,
shape: Optional[Tuple[int, ...]] = None,
) -> List[Any]:
assert packed.m_BitSize is not None
m_BitSize = packed.m_BitSize
m_Data = packed.m_Data
bitPos = m_BitSize * start
indexPos = bitPos // 8
bitPos %= 8
if count is None:
count = packed.m_NumItems
# if m_BitSize <= 8:
# dtype = np.uint8
# elif m_BitSize <= 16:
# dtype = np.uint16
# elif m_BitSize <= 32:
# dtype = np.uint32
# elif m_BitSize <= 64:
# dtype = np.uint64
# else:
# raise ValueError("Invalid bit size")
# data = np.zeros(packed.m_NumItems, dtype=dtype)
data = [0] * count
for i in range(count):
bits = 0
value = 0
while bits < m_BitSize:
value |= (m_Data[indexPos] >> bitPos) << bits
num = min(m_BitSize - bits, 8 - bitPos)
bitPos += num
bits += num
if bitPos == 8:
indexPos += 1
bitPos = 0
data[i] = value & ((1 << m_BitSize) - 1)
return reshape(data, shape)
def unpack_floats(
packed: "PackedBitVector",
start: int = 0,
count: Optional[int] = None,
shape: Optional[Tuple[int, ...]] = None,
) -> List[Any]:
assert packed.m_BitSize is not None and packed.m_Range is not None and packed.m_Start is not None
# avoid zero division of scale
if packed.m_BitSize == 0:
quantized = [packed.m_Start] * (packed.m_NumItems if count is None else count)
else:
# read as int and cast up to double to prevent loss of precision
quantized_f64 = unpack_ints(packed, start, count)
scale = packed.m_Range / ((1 << packed.m_BitSize) - 1)
quantized = [x * scale + packed.m_Start for x in quantized_f64]
return reshape(quantized, shape)
# def pack_ints(
# data: npt.NDArray[np.uint], bitsize: Optional[int] = 0
# ) -> PackedBitVector:
# # ensure that the data type is unsigned
# assert "uint" in data.dtype.name
# m_NumItems = data.size
# maxi = data.max()
# # Prevent overflow
# if bitsize:
# m_BitSize = bitsize
# else:
# m_BitSize = (32 if maxi == 0xFFFFFFFF else np.ceil(np.log2(maxi + 1))) % 256
# m_Data = np.zeros((m_NumItems * m_BitSize + 7) // 8, dtype=np.uint8)
# indexPos = 0
# bitPos = 0
# for x in data:
# bits = 0
# while bits < m_BitSize:
# m_Data[indexPos] |= (x >> bits) << bitPos
# num = min(m_BitSize - bits, 8 - bitPos)
# bitPos += num
# bits += num
# if bitPos == 8:
# indexPos += 1
# bitPos = 0
# return PackedBitVector(m_NumItems=m_NumItems, m_BitSize=m_BitSize, m_Data=m_Data)
# def pack_floats(
# data: npt.NDArray[np.floating[Any]],
# bitsize: Optional[int] = None,
# ) -> PackedBitVector:
# min = data.min()
# max = data.max()
# range = max - min
# data_f64 = data.astype(np.float64)
# # rebase to 0
# data_f64 -= min
# # scale to [0, 1]
# data_f64 /= range
# # quantize to [0, 2^bit_size - 1]
# bitsize = bitsize or max(data.itemsize, 32)
# assert bitsize is not None
# data_f64 *= (1 << bitsize) - 1
# # pack the data
# packed = pack_ints(data_f64.astype(np.uint32), bitsize)
# packed.m_Start = min
# packed.m_Range = range
# return packed
__all__ = ("unpack_ints", "unpack_floats")