99#include "blame.h"
1010#include "alloc.h"
1111#include "commit-slab.h"
12+ #include "bloom.h"
13+ #include "commit-graph.h"
1214
1315define_commit_slab (blame_suspects , struct blame_origin * );
1416static struct blame_suspects blame_suspects ;
@@ -1246,13 +1248,74 @@ static int fill_blob_sha1_and_mode(struct repository *r,
12461248 return -1 ;
12471249}
12481250
1251+ struct blame_bloom_data {
1252+ /*
1253+ * Changed-path Bloom filter keys. These can help prevent
1254+ * computing diffs against first parents, but we need to
1255+ * expand the list as code is moved or files are renamed.
1256+ */
1257+ struct bloom_filter_settings * settings ;
1258+ struct bloom_key * * keys ;
1259+ int nr ;
1260+ int alloc ;
1261+ };
1262+
1263+ static int bloom_count_queries = 0 ;
1264+ static int bloom_count_no = 0 ;
1265+ static int maybe_changed_path (struct repository * r ,
1266+ struct blame_origin * origin ,
1267+ struct blame_bloom_data * bd )
1268+ {
1269+ int i ;
1270+ struct bloom_filter * filter ;
1271+
1272+ if (!bd )
1273+ return 1 ;
1274+
1275+ if (origin -> commit -> generation == GENERATION_NUMBER_INFINITY )
1276+ return 1 ;
1277+
1278+ filter = get_bloom_filter (r , origin -> commit , 0 );
1279+
1280+ if (!filter )
1281+ return 1 ;
1282+
1283+ bloom_count_queries ++ ;
1284+ for (i = 0 ; i < bd -> nr ; i ++ ) {
1285+ if (bloom_filter_contains (filter ,
1286+ bd -> keys [i ],
1287+ bd -> settings ))
1288+ return 1 ;
1289+ }
1290+
1291+ bloom_count_no ++ ;
1292+ return 0 ;
1293+ }
1294+
1295+ static void add_bloom_key (struct blame_bloom_data * bd ,
1296+ const char * path )
1297+ {
1298+ if (!bd )
1299+ return ;
1300+
1301+ if (bd -> nr >= bd -> alloc ) {
1302+ bd -> alloc *= 2 ;
1303+ REALLOC_ARRAY (bd -> keys , bd -> alloc );
1304+ }
1305+
1306+ bd -> keys [bd -> nr ] = xmalloc (sizeof (struct bloom_key ));
1307+ fill_bloom_key (path , strlen (path ), bd -> keys [bd -> nr ], bd -> settings );
1308+ bd -> nr ++ ;
1309+ }
1310+
12491311/*
12501312 * We have an origin -- check if the same path exists in the
12511313 * parent and return an origin structure to represent it.
12521314 */
12531315static struct blame_origin * find_origin (struct repository * r ,
12541316 struct commit * parent ,
1255- struct blame_origin * origin )
1317+ struct blame_origin * origin ,
1318+ struct blame_bloom_data * bd )
12561319{
12571320 struct blame_origin * porigin ;
12581321 struct diff_options diff_opts ;
@@ -1286,10 +1349,18 @@ static struct blame_origin *find_origin(struct repository *r,
12861349
12871350 if (is_null_oid (& origin -> commit -> object .oid ))
12881351 do_diff_cache (get_commit_tree_oid (parent ), & diff_opts );
1289- else
1290- diff_tree_oid (get_commit_tree_oid (parent ),
1291- get_commit_tree_oid (origin -> commit ),
1292- "" , & diff_opts );
1352+ else {
1353+ int compute_diff = 1 ;
1354+ if (origin -> commit -> parents &&
1355+ !oidcmp (& parent -> object .oid ,
1356+ & origin -> commit -> parents -> item -> object .oid ))
1357+ compute_diff = maybe_changed_path (r , origin , bd );
1358+
1359+ if (compute_diff )
1360+ diff_tree_oid (get_commit_tree_oid (parent ),
1361+ get_commit_tree_oid (origin -> commit ),
1362+ "" , & diff_opts );
1363+ }
12931364 diffcore_std (& diff_opts );
12941365
12951366 if (!diff_queued_diff .nr ) {
@@ -1341,7 +1412,8 @@ static struct blame_origin *find_origin(struct repository *r,
13411412 */
13421413static struct blame_origin * find_rename (struct repository * r ,
13431414 struct commit * parent ,
1344- struct blame_origin * origin )
1415+ struct blame_origin * origin ,
1416+ struct blame_bloom_data * bd )
13451417{
13461418 struct blame_origin * porigin = NULL ;
13471419 struct diff_options diff_opts ;
@@ -1366,6 +1438,7 @@ static struct blame_origin *find_rename(struct repository *r,
13661438 struct diff_filepair * p = diff_queued_diff .queue [i ];
13671439 if ((p -> status == 'R' || p -> status == 'C' ) &&
13681440 !strcmp (p -> two -> path , origin -> path )) {
1441+ add_bloom_key (bd , p -> one -> path );
13691442 porigin = get_origin (parent , p -> one -> path );
13701443 oidcpy (& porigin -> blob_oid , & p -> one -> oid );
13711444 porigin -> mode = p -> one -> mode ;
@@ -2332,6 +2405,11 @@ static void distribute_blame(struct blame_scoreboard *sb, struct blame_entry *bl
23322405
23332406#define MAXSG 16
23342407
2408+ typedef struct blame_origin * (* blame_find_alg )(struct repository * ,
2409+ struct commit * ,
2410+ struct blame_origin * ,
2411+ struct blame_bloom_data * );
2412+
23352413static void pass_blame (struct blame_scoreboard * sb , struct blame_origin * origin , int opt )
23362414{
23372415 struct rev_info * revs = sb -> revs ;
@@ -2356,8 +2434,7 @@ static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin,
23562434 * common cases, then we look for renames in the second pass.
23572435 */
23582436 for (pass = 0 ; pass < 2 - sb -> no_whole_file_rename ; pass ++ ) {
2359- struct blame_origin * (* find )(struct repository * , struct commit * , struct blame_origin * );
2360- find = pass ? find_rename : find_origin ;
2437+ blame_find_alg find = pass ? find_rename : find_origin ;
23612438
23622439 for (i = 0 , sg = first_scapegoat (revs , commit , sb -> reverse );
23632440 i < num_sg && sg ;
@@ -2369,7 +2446,7 @@ static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin,
23692446 continue ;
23702447 if (parse_commit (p ))
23712448 continue ;
2372- porigin = find (sb -> repo , p , origin );
2449+ porigin = find (sb -> repo , p , origin , sb -> bloom_data );
23732450 if (!porigin )
23742451 continue ;
23752452 if (oideq (& porigin -> blob_oid , & origin -> blob_oid )) {
@@ -2809,3 +2886,45 @@ struct blame_entry *blame_entry_prepend(struct blame_entry *head,
28092886 blame_origin_incref (o );
28102887 return new_head ;
28112888}
2889+
2890+ void setup_blame_bloom_data (struct blame_scoreboard * sb ,
2891+ const char * path )
2892+ {
2893+ struct blame_bloom_data * bd ;
2894+
2895+ if (!sb -> repo -> objects -> commit_graph )
2896+ return ;
2897+
2898+ if (!sb -> repo -> objects -> commit_graph -> bloom_filter_settings )
2899+ return ;
2900+
2901+ bd = xmalloc (sizeof (struct blame_bloom_data ));
2902+
2903+ bd -> settings = sb -> repo -> objects -> commit_graph -> bloom_filter_settings ;
2904+
2905+ bd -> alloc = 4 ;
2906+ bd -> nr = 0 ;
2907+ ALLOC_ARRAY (bd -> keys , bd -> alloc );
2908+
2909+ add_bloom_key (bd , path );
2910+
2911+ sb -> bloom_data = bd ;
2912+ }
2913+
2914+ void cleanup_scoreboard (struct blame_scoreboard * sb )
2915+ {
2916+ if (sb -> bloom_data ) {
2917+ int i ;
2918+ for (i = 0 ; i < sb -> bloom_data -> nr ; i ++ ) {
2919+ free (sb -> bloom_data -> keys [i ]-> hashes );
2920+ free (sb -> bloom_data -> keys [i ]);
2921+ }
2922+ free (sb -> bloom_data -> keys );
2923+ FREE_AND_NULL (sb -> bloom_data );
2924+
2925+ trace2_data_intmax ("blame" , sb -> repo ,
2926+ "bloom/queries" , bloom_count_queries );
2927+ trace2_data_intmax ("blame" , sb -> repo ,
2928+ "bloom/response-no" , bloom_count_no );
2929+ }
2930+ }
0 commit comments