44#include "lockfile.h"
55#include "packfile.h"
66#include "object-store.h"
7+ #include "packfile.h"
78#include "midx.h"
89
910#define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
@@ -182,12 +183,21 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
182183 packs -> list [packs -> nr ] = add_packed_git (full_path ,
183184 full_path_len ,
184185 0 );
186+
185187 if (!packs -> list [packs -> nr ]) {
186188 warning (_ ("failed to add packfile '%s'" ),
187189 full_path );
188190 return ;
189191 }
190192
193+ if (open_pack_index (packs -> list [packs -> nr ])) {
194+ warning (_ ("failed to open pack-index '%s'" ),
195+ full_path );
196+ close_pack (packs -> list [packs -> nr ]);
197+ FREE_AND_NULL (packs -> list [packs -> nr ]);
198+ return ;
199+ }
200+
191201 packs -> names [packs -> nr ] = xstrdup (file_name );
192202 packs -> pack_name_concat_len += strlen (file_name ) + 1 ;
193203 packs -> nr ++ ;
@@ -228,6 +238,119 @@ static void sort_packs_by_name(char **pack_names, uint32_t nr_packs, uint32_t *p
228238 free (pairs );
229239}
230240
241+ struct pack_midx_entry {
242+ struct object_id oid ;
243+ uint32_t pack_int_id ;
244+ time_t pack_mtime ;
245+ uint64_t offset ;
246+ };
247+
248+ static int midx_oid_compare (const void * _a , const void * _b )
249+ {
250+ const struct pack_midx_entry * a = (const struct pack_midx_entry * )_a ;
251+ const struct pack_midx_entry * b = (const struct pack_midx_entry * )_b ;
252+ int cmp = oidcmp (& a -> oid , & b -> oid );
253+
254+ if (cmp )
255+ return cmp ;
256+
257+ if (a -> pack_mtime > b -> pack_mtime )
258+ return -1 ;
259+ else if (a -> pack_mtime < b -> pack_mtime )
260+ return 1 ;
261+
262+ return a -> pack_int_id - b -> pack_int_id ;
263+ }
264+
265+ static void fill_pack_entry (uint32_t pack_int_id ,
266+ struct packed_git * p ,
267+ uint32_t cur_object ,
268+ struct pack_midx_entry * entry )
269+ {
270+ if (!nth_packed_object_oid (& entry -> oid , p , cur_object ))
271+ die (_ ("failed to locate object %d in packfile" ), cur_object );
272+
273+ entry -> pack_int_id = pack_int_id ;
274+ entry -> pack_mtime = p -> mtime ;
275+
276+ entry -> offset = nth_packed_object_offset (p , cur_object );
277+ }
278+
279+ /*
280+ * It is possible to artificially get into a state where there are many
281+ * duplicate copies of objects. That can create high memory pressure if
282+ * we are to create a list of all objects before de-duplication. To reduce
283+ * this memory pressure without a significant performance drop, automatically
284+ * group objects by the first byte of their object id. Use the IDX fanout
285+ * tables to group the data, copy to a local array, then sort.
286+ *
287+ * Copy only the de-duplicated entries (selected by most-recent modified time
288+ * of a packfile containing the object).
289+ */
290+ static struct pack_midx_entry * get_sorted_entries (struct packed_git * * p ,
291+ uint32_t * perm ,
292+ uint32_t nr_packs ,
293+ uint32_t * nr_objects )
294+ {
295+ uint32_t cur_fanout , cur_pack , cur_object ;
296+ uint32_t alloc_fanout , alloc_objects , total_objects = 0 ;
297+ struct pack_midx_entry * entries_by_fanout = NULL ;
298+ struct pack_midx_entry * deduplicated_entries = NULL ;
299+
300+ for (cur_pack = 0 ; cur_pack < nr_packs ; cur_pack ++ )
301+ total_objects += p [cur_pack ]-> num_objects ;
302+
303+ /*
304+ * As we de-duplicate by fanout value, we expect the fanout
305+ * slices to be evenly distributed, with some noise. Hence,
306+ * allocate slightly more than one 256th.
307+ */
308+ alloc_objects = alloc_fanout = total_objects > 3200 ? total_objects / 200 : 16 ;
309+
310+ ALLOC_ARRAY (entries_by_fanout , alloc_fanout );
311+ ALLOC_ARRAY (deduplicated_entries , alloc_objects );
312+ * nr_objects = 0 ;
313+
314+ for (cur_fanout = 0 ; cur_fanout < 256 ; cur_fanout ++ ) {
315+ uint32_t nr_fanout = 0 ;
316+
317+ for (cur_pack = 0 ; cur_pack < nr_packs ; cur_pack ++ ) {
318+ uint32_t start = 0 , end ;
319+
320+ if (cur_fanout )
321+ start = get_pack_fanout (p [cur_pack ], cur_fanout - 1 );
322+ end = get_pack_fanout (p [cur_pack ], cur_fanout );
323+
324+ for (cur_object = start ; cur_object < end ; cur_object ++ ) {
325+ ALLOC_GROW (entries_by_fanout , nr_fanout + 1 , alloc_fanout );
326+ fill_pack_entry (perm [cur_pack ], p [cur_pack ], cur_object , & entries_by_fanout [nr_fanout ]);
327+ nr_fanout ++ ;
328+ }
329+ }
330+
331+ QSORT (entries_by_fanout , nr_fanout , midx_oid_compare );
332+
333+ /*
334+ * The batch is now sorted by OID and then mtime (descending).
335+ * Take only the first duplicate.
336+ */
337+ for (cur_object = 0 ; cur_object < nr_fanout ; cur_object ++ ) {
338+ if (cur_object && !oidcmp (& entries_by_fanout [cur_object - 1 ].oid ,
339+ & entries_by_fanout [cur_object ].oid ))
340+ continue ;
341+
342+ ALLOC_GROW (deduplicated_entries , * nr_objects + 1 , alloc_objects );
343+ memcpy (& deduplicated_entries [* nr_objects ],
344+ & entries_by_fanout [cur_object ],
345+ sizeof (struct pack_midx_entry ));
346+ (* nr_objects )++ ;
347+ }
348+ }
349+
350+ free (entries_by_fanout );
351+ return deduplicated_entries ;
352+ }
353+
231354static size_t write_midx_pack_names (struct hashfile * f ,
232355 char * * pack_names ,
233356 uint32_t num_packs )
@@ -271,6 +394,8 @@ int write_midx_file(const char *object_dir)
271394 uint64_t written = 0 ;
272395 uint32_t chunk_ids [MIDX_MAX_CHUNKS + 1 ];
273396 uint64_t chunk_offsets [MIDX_MAX_CHUNKS + 1 ];
397+ uint32_t nr_entries ;
398+ struct pack_midx_entry * entries = NULL ;
274399
275400 midx_name = get_midx_filename (object_dir );
276401 if (safe_create_leading_directories (midx_name )) {
@@ -296,6 +421,8 @@ int write_midx_file(const char *object_dir)
296421 ALLOC_ARRAY (pack_perm , packs .nr );
297422 sort_packs_by_name (packs .names , packs .nr , pack_perm );
298423
424+ entries = get_sorted_entries (packs .list , pack_perm , packs .nr , & nr_entries );
425+
299426 hold_lock_file_for_update (& lk , midx_name , LOCK_DIE_ON_ERROR );
300427 f = hashfd (lk .tempfile -> fd , lk .tempfile -> filename .buf );
301428 FREE_AND_NULL (midx_name );
@@ -365,5 +492,6 @@ int write_midx_file(const char *object_dir)
365492
366493 free (packs .list );
367494 free (packs .names );
495+ free (entries );
368496 return 0 ;
369497}
0 commit comments