1+ #include "git-compat-util.h"
2+ #include "bloom.h"
3+ #include "diff.h"
4+ #include "diffcore.h"
5+ #include "revision.h"
6+ #include "hashmap.h"
7+ #include "commit-graph.h"
8+ #include "commit.h"
9+
10+ define_commit_slab (bloom_filter_slab , struct bloom_filter );
11+
12+ struct bloom_filter_slab bloom_filters ;
13+
14+ struct pathmap_hash_entry {
15+ struct hashmap_entry entry ;
16+ const char path [FLEX_ARRAY ];
17+ };
18+
19+ static uint32_t rotate_left (uint32_t value , int32_t count )
20+ {
21+ uint32_t mask = 8 * sizeof (uint32_t ) - 1 ;
22+ count &= mask ;
23+ return ((value << count ) | (value >> ((- count ) & mask )));
24+ }
25+
26+ static inline unsigned char get_bitmask (uint32_t pos )
27+ {
28+ return ((unsigned char )1 ) << (pos & (BITS_PER_WORD - 1 ));
29+ }
30+
31+ static int load_bloom_filter_from_graph (struct commit_graph * g ,
32+ struct bloom_filter * filter ,
33+ struct commit * c )
34+ {
35+ uint32_t lex_pos , start_index , end_index ;
36+
37+ while (c -> graph_pos < g -> num_commits_in_base )
38+ g = g -> base_graph ;
39+
40+ /* The commit graph commit 'c' lives in doesn't carry bloom filters. */
41+ if (!g -> chunk_bloom_indexes )
42+ return 0 ;
43+
44+ lex_pos = c -> graph_pos - g -> num_commits_in_base ;
45+
46+ end_index = get_be32 (g -> chunk_bloom_indexes + 4 * lex_pos );
47+
48+ if (lex_pos > 0 )
49+ start_index = get_be32 (g -> chunk_bloom_indexes + 4 * (lex_pos - 1 ));
50+ else
51+ start_index = 0 ;
52+
53+ filter -> len = end_index - start_index ;
54+ filter -> data = (unsigned char * )(g -> chunk_bloom_data +
55+ sizeof (unsigned char ) * start_index +
56+ BLOOMDATA_CHUNK_HEADER_SIZE );
57+
58+ return 1 ;
59+ }
60+
61+ /*
62+ * Calculate the murmur3 32-bit hash value for the given data
63+ * using the given seed.
64+ * Produces a uniformly distributed hash value.
65+ * Not considered to be cryptographically secure.
66+ * Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
67+ */
68+ uint32_t murmur3_seeded (uint32_t seed , const char * data , size_t len )
69+ {
70+ const uint32_t c1 = 0xcc9e2d51 ;
71+ const uint32_t c2 = 0x1b873593 ;
72+ const uint32_t r1 = 15 ;
73+ const uint32_t r2 = 13 ;
74+ const uint32_t m = 5 ;
75+ const uint32_t n = 0xe6546b64 ;
76+ int i ;
77+ uint32_t k1 = 0 ;
78+ const char * tail ;
79+
80+ int len4 = len / sizeof (uint32_t );
81+
82+ uint32_t k ;
83+ for (i = 0 ; i < len4 ; i ++ ) {
84+ uint32_t byte1 = (uint32_t )data [4 * i ];
85+ uint32_t byte2 = ((uint32_t )data [4 * i + 1 ]) << 8 ;
86+ uint32_t byte3 = ((uint32_t )data [4 * i + 2 ]) << 16 ;
87+ uint32_t byte4 = ((uint32_t )data [4 * i + 3 ]) << 24 ;
88+ k = byte1 | byte2 | byte3 | byte4 ;
89+ k *= c1 ;
90+ k = rotate_left (k , r1 );
91+ k *= c2 ;
92+
93+ seed ^= k ;
94+ seed = rotate_left (seed , r2 ) * m + n ;
95+ }
96+
97+ tail = (data + len4 * sizeof (uint32_t ));
98+
99+ switch (len & (sizeof (uint32_t ) - 1 )) {
100+ case 3 :
101+ k1 ^= ((uint32_t )tail [2 ]) << 16 ;
102+ /*-fallthrough*/
103+ case 2 :
104+ k1 ^= ((uint32_t )tail [1 ]) << 8 ;
105+ /*-fallthrough*/
106+ case 1 :
107+ k1 ^= ((uint32_t )tail [0 ]) << 0 ;
108+ k1 *= c1 ;
109+ k1 = rotate_left (k1 , r1 );
110+ k1 *= c2 ;
111+ seed ^= k1 ;
112+ break ;
113+ }
114+
115+ seed ^= (uint32_t )len ;
116+ seed ^= (seed >> 16 );
117+ seed *= 0x85ebca6b ;
118+ seed ^= (seed >> 13 );
119+ seed *= 0xc2b2ae35 ;
120+ seed ^= (seed >> 16 );
121+
122+ return seed ;
123+ }
124+
125+ void fill_bloom_key (const char * data ,
126+ size_t len ,
127+ struct bloom_key * key ,
128+ const struct bloom_filter_settings * settings )
129+ {
130+ int i ;
131+ const uint32_t seed0 = 0x293ae76f ;
132+ const uint32_t seed1 = 0x7e646e2c ;
133+ const uint32_t hash0 = murmur3_seeded (seed0 , data , len );
134+ const uint32_t hash1 = murmur3_seeded (seed1 , data , len );
135+
136+ key -> hashes = (uint32_t * )xcalloc (settings -> num_hashes , sizeof (uint32_t ));
137+ for (i = 0 ; i < settings -> num_hashes ; i ++ )
138+ key -> hashes [i ] = hash0 + i * hash1 ;
139+ }
140+
141+ void add_key_to_filter (const struct bloom_key * key ,
142+ struct bloom_filter * filter ,
143+ const struct bloom_filter_settings * settings )
144+ {
145+ int i ;
146+ uint64_t mod = filter -> len * BITS_PER_WORD ;
147+
148+ for (i = 0 ; i < settings -> num_hashes ; i ++ ) {
149+ uint64_t hash_mod = key -> hashes [i ] % mod ;
150+ uint64_t block_pos = hash_mod / BITS_PER_WORD ;
151+
152+ filter -> data [block_pos ] |= get_bitmask (hash_mod );
153+ }
154+ }
155+
156+ void init_bloom_filters (void )
157+ {
158+ init_bloom_filter_slab (& bloom_filters );
159+ }
160+
161+ struct bloom_filter * get_bloom_filter (struct repository * r ,
162+ struct commit * c ,
163+ int compute_if_not_present )
164+ {
165+ struct bloom_filter * filter ;
166+ struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS ;
167+ int i ;
168+ struct diff_options diffopt ;
169+ int max_changes = 512 ;
170+
171+ if (bloom_filters .slab_size == 0 )
172+ return NULL ;
173+
174+ filter = bloom_filter_slab_at (& bloom_filters , c );
175+
176+ if (!filter -> data ) {
177+ load_commit_graph_info (r , c );
178+ if (c -> graph_pos != COMMIT_NOT_FROM_GRAPH &&
179+ r -> objects -> commit_graph -> chunk_bloom_indexes ) {
180+ if (load_bloom_filter_from_graph (r -> objects -> commit_graph , filter , c ))
181+ return filter ;
182+ else
183+ return NULL ;
184+ }
185+ }
186+
187+ if (filter -> data || !compute_if_not_present )
188+ return filter ;
189+
190+ repo_diff_setup (r , & diffopt );
191+ diffopt .flags .recursive = 1 ;
192+ diffopt .detect_rename = 0 ;
193+ diffopt .max_changes = max_changes ;
194+ diff_setup_done (& diffopt );
195+
196+ if (c -> parents )
197+ diff_tree_oid (& c -> parents -> item -> object .oid , & c -> object .oid , "" , & diffopt );
198+ else
199+ diff_tree_oid (NULL , & c -> object .oid , "" , & diffopt );
200+ diffcore_std (& diffopt );
201+
202+ if (diff_queued_diff .nr <= max_changes ) {
203+ struct hashmap pathmap ;
204+ struct pathmap_hash_entry * e ;
205+ struct hashmap_iter iter ;
206+ hashmap_init (& pathmap , NULL , NULL , 0 );
207+
208+ for (i = 0 ; i < diff_queued_diff .nr ; i ++ ) {
209+ const char * path = diff_queued_diff .queue [i ]-> two -> path ;
210+
211+ /*
212+ * Add each leading directory of the changed file, i.e. for
213+ * 'dir/subdir/file' add 'dir' and 'dir/subdir' as well, so
214+ * the Bloom filter could be used to speed up commands like
215+ * 'git log dir/subdir', too.
216+ *
217+ * Note that directories are added without the trailing '/'.
218+ */
219+ do {
220+ char * last_slash = strrchr (path , '/' );
221+
222+ FLEX_ALLOC_STR (e , path , path );
223+ hashmap_entry_init (& e -> entry , strhash (path ));
224+ hashmap_add (& pathmap , & e -> entry );
225+
226+ if (!last_slash )
227+ last_slash = (char * )path ;
228+ * last_slash = '\0' ;
229+
230+ } while (* path );
231+
232+ diff_free_filepair (diff_queued_diff .queue [i ]);
233+ }
234+
235+ filter -> len = (hashmap_get_size (& pathmap ) * settings .bits_per_entry + BITS_PER_WORD - 1 ) / BITS_PER_WORD ;
236+ filter -> data = xcalloc (filter -> len , sizeof (unsigned char ));
237+
238+ hashmap_for_each_entry (& pathmap , & iter , e , entry ) {
239+ struct bloom_key key ;
240+ fill_bloom_key (e -> path , strlen (e -> path ), & key , & settings );
241+ add_key_to_filter (& key , filter , & settings );
242+ }
243+
244+ hashmap_free_entries (& pathmap , struct pathmap_hash_entry , entry );
245+ } else {
246+ for (i = 0 ; i < diff_queued_diff .nr ; i ++ )
247+ diff_free_filepair (diff_queued_diff .queue [i ]);
248+ filter -> data = NULL ;
249+ filter -> len = 0 ;
250+ }
251+
252+ free (diff_queued_diff .queue );
253+ DIFF_QUEUE_CLEAR (& diff_queued_diff );
254+
255+ return filter ;
256+ }
257+
258+ int bloom_filter_contains (const struct bloom_filter * filter ,
259+ const struct bloom_key * key ,
260+ const struct bloom_filter_settings * settings )
261+ {
262+ int i ;
263+ uint64_t mod = filter -> len * BITS_PER_WORD ;
264+
265+ if (!mod )
266+ return -1 ;
267+
268+ for (i = 0 ; i < settings -> num_hashes ; i ++ ) {
269+ uint64_t hash_mod = key -> hashes [i ] % mod ;
270+ uint64_t block_pos = hash_mod / BITS_PER_WORD ;
271+ if (!(filter -> data [block_pos ] & get_bitmask (hash_mod )))
272+ return 0 ;
273+ }
274+
275+ return 1 ;
276+ }
0 commit comments