@@ -45,6 +45,7 @@ static struct string_list extra_refs = STRING_LIST_INIT_NODUP;
4545static struct string_list tag_refs = STRING_LIST_INIT_NODUP ;
4646static struct refspec refspecs = REFSPEC_INIT_FETCH ;
4747static int anonymize ;
48+ static struct hashmap anonymized_seeds ;
4849static struct revision_sources revision_sources ;
4950
5051static int parse_opt_signed_tag_mode (const struct option * opt ,
@@ -119,57 +120,74 @@ static int has_unshown_parent(struct commit *commit)
119120}
120121
121122struct anonymized_entry {
123+ struct hashmap_entry hash ;
124+ const char * anon ;
125+ const char orig [FLEX_ARRAY ];
126+ };
127+
128+ struct anonymized_entry_key {
122129 struct hashmap_entry hash ;
123130 const char * orig ;
124131 size_t orig_len ;
125- const char * anon ;
126- size_t anon_len ;
127132};
128133
129134static int anonymized_entry_cmp (const void * unused_cmp_data ,
130135 const struct hashmap_entry * eptr ,
131136 const struct hashmap_entry * entry_or_key ,
132- const void * unused_keydata )
137+ const void * keydata )
133138{
134139 const struct anonymized_entry * a , * b ;
135140
136141 a = container_of (eptr , const struct anonymized_entry , hash );
137- b = container_of (entry_or_key , const struct anonymized_entry , hash );
142+ if (keydata ) {
143+ const struct anonymized_entry_key * key = keydata ;
144+ int equal = !strncmp (a -> orig , key -> orig , key -> orig_len ) &&
145+ !a -> orig [key -> orig_len ];
146+ return !equal ;
147+ }
138148
139- return a -> orig_len != b -> orig_len ||
140- memcmp (a -> orig , b -> orig , a -> orig_len );
149+ b = container_of ( entry_or_key , const struct anonymized_entry , hash );
150+ return strcmp (a -> orig , b -> orig );
141151}
142152
143153/*
144154 * Basically keep a cache of X->Y so that we can repeatedly replace
145155 * the same anonymized string with another. The actual generation
146156 * is farmed out to the generate function.
147157 */
148- static const void * anonymize_mem (struct hashmap * map ,
149- void * (* generate )(const void * , size_t * ),
150- const void * orig , size_t * len )
158+ static const char * anonymize_str (struct hashmap * map ,
159+ char * (* generate )(void * ),
160+ const char * orig , size_t len ,
161+ void * data )
151162{
152- struct anonymized_entry key , * ret ;
163+ struct anonymized_entry_key key ;
164+ struct anonymized_entry * ret ;
153165
154166 if (!map -> cmpfn )
155167 hashmap_init (map , anonymized_entry_cmp , NULL , 0 );
156168
157- hashmap_entry_init (& key .hash , memhash (orig , * len ));
169+ hashmap_entry_init (& key .hash , memhash (orig , len ));
158170 key .orig = orig ;
159- key .orig_len = * len ;
160- ret = hashmap_get_entry (map , & key , hash , NULL );
171+ key .orig_len = len ;
172+
173+ /* First check if it's a token the user configured manually... */
174+ if (anonymized_seeds .cmpfn )
175+ ret = hashmap_get_entry (& anonymized_seeds , & key , hash , & key );
176+ else
177+ ret = NULL ;
178+
179+ /* ...otherwise check if we've already seen it in this context... */
180+ if (!ret )
181+ ret = hashmap_get_entry (map , & key , hash , & key );
161182
183+ /* ...and finally generate a new mapping if necessary */
162184 if (!ret ) {
163- ret = xmalloc ( sizeof ( * ret ) );
185+ FLEX_ALLOC_MEM ( ret , orig , orig , len );
164186 hashmap_entry_init (& ret -> hash , key .hash .hash );
165- ret -> orig = xstrdup (orig );
166- ret -> orig_len = * len ;
167- ret -> anon = generate (orig , len );
168- ret -> anon_len = * len ;
187+ ret -> anon = generate (data );
169188 hashmap_put (map , & ret -> hash );
170189 }
171190
172- * len = ret -> anon_len ;
173191 return ret -> anon ;
174192}
175193
@@ -181,13 +199,13 @@ static const void *anonymize_mem(struct hashmap *map,
181199 */
182200static void anonymize_path (struct strbuf * out , const char * path ,
183201 struct hashmap * map ,
184- void * (* generate )(const void * , size_t * ))
202+ char * (* generate )(void * ))
185203{
186204 while (* path ) {
187205 const char * end_of_component = strchrnul (path , '/' );
188206 size_t len = end_of_component - path ;
189- const char * c = anonymize_mem (map , generate , path , & len );
190- strbuf_add (out , c , len );
207+ const char * c = anonymize_str (map , generate , path , len , NULL );
208+ strbuf_addstr (out , c );
191209 path = end_of_component ;
192210 if (* path )
193211 strbuf_addch (out , * path ++ );
@@ -361,12 +379,12 @@ static void print_path_1(const char *path)
361379 printf ("%s" , path );
362380}
363381
364- static void * anonymize_path_component (const void * path , size_t * len )
382+ static char * anonymize_path_component (void * data )
365383{
366384 static int counter ;
367385 struct strbuf out = STRBUF_INIT ;
368386 strbuf_addf (& out , "path%d" , counter ++ );
369- return strbuf_detach (& out , len );
387+ return strbuf_detach (& out , NULL );
370388}
371389
372390static void print_path (const char * path )
@@ -383,20 +401,23 @@ static void print_path(const char *path)
383401 }
384402}
385403
386- static void * generate_fake_oid (const void * old , size_t * len )
404+ static char * generate_fake_oid (void * data )
387405{
388406 static uint32_t counter = 1 ; /* avoid null oid */
389407 const unsigned hashsz = the_hash_algo -> rawsz ;
390- unsigned char * out = xcalloc (hashsz , 1 );
408+ unsigned char out [GIT_MAX_RAWSZ ];
409+ char * hex = xmallocz (GIT_MAX_HEXSZ );
410+
411+ hashclr (out );
391412 put_be32 (out + hashsz - 4 , counter ++ );
392- return out ;
413+ return hash_to_hex_algop_r ( hex , out , the_hash_algo ) ;
393414}
394415
395- static const struct object_id * anonymize_oid (const struct object_id * oid )
416+ static const char * anonymize_oid (const char * oid_hex )
396417{
397418 static struct hashmap objs ;
398- size_t len = the_hash_algo -> rawsz ;
399- return anonymize_mem (& objs , generate_fake_oid , oid , & len );
419+ size_t len = strlen ( oid_hex ) ;
420+ return anonymize_str (& objs , generate_fake_oid , oid_hex , len , NULL );
400421}
401422
402423static void show_filemodify (struct diff_queue_struct * q ,
@@ -455,9 +476,9 @@ static void show_filemodify(struct diff_queue_struct *q,
455476 */
456477 if (no_data || S_ISGITLINK (spec -> mode ))
457478 printf ("M %06o %s " , spec -> mode ,
458- oid_to_hex ( anonymize ?
459- anonymize_oid (& spec -> oid ) :
460- & spec -> oid ));
479+ anonymize ?
480+ anonymize_oid (oid_to_hex ( & spec -> oid ) ) :
481+ oid_to_hex ( & spec -> oid ));
461482 else {
462483 struct object * object = lookup_object (the_repository ,
463484 & spec -> oid );
@@ -493,12 +514,12 @@ static const char *find_encoding(const char *begin, const char *end)
493514 return bol ;
494515}
495516
496- static void * anonymize_ref_component (const void * old , size_t * len )
517+ static char * anonymize_ref_component (void * data )
497518{
498519 static int counter ;
499520 struct strbuf out = STRBUF_INIT ;
500521 strbuf_addf (& out , "ref%d" , counter ++ );
501- return strbuf_detach (& out , len );
522+ return strbuf_detach (& out , NULL );
502523}
503524
504525static const char * anonymize_refname (const char * refname )
@@ -517,13 +538,6 @@ static const char *anonymize_refname(const char *refname)
517538 static struct strbuf anon = STRBUF_INIT ;
518539 int i ;
519540
520- /*
521- * We also leave "master" as a special case, since it does not reveal
522- * anything interesting.
523- */
524- if (!strcmp (refname , "refs/heads/master" ))
525- return refname ;
526-
527541 strbuf_reset (& anon );
528542 for (i = 0 ; i < ARRAY_SIZE (prefixes ); i ++ ) {
529543 if (skip_prefix (refname , prefixes [i ], & refname )) {
@@ -546,14 +560,13 @@ static char *anonymize_commit_message(const char *old)
546560 return xstrfmt ("subject %d\n\nbody\n" , counter ++ );
547561}
548562
549- static struct hashmap idents ;
550- static void * anonymize_ident (const void * old , size_t * len )
563+ static char * anonymize_ident (void * data )
551564{
552565 static int counter ;
553566 struct strbuf out = STRBUF_INIT ;
554567 strbuf_addf (& out , "User %d <user%d@example.com>" , counter , counter );
555568 counter ++ ;
556- return strbuf_detach (& out , len );
569+ return strbuf_detach (& out , NULL );
557570}
558571
559572/*
@@ -563,6 +576,7 @@ static void *anonymize_ident(const void *old, size_t *len)
563576 */
564577static void anonymize_ident_line (const char * * beg , const char * * end )
565578{
579+ static struct hashmap idents ;
566580 static struct strbuf buffers [] = { STRBUF_INIT , STRBUF_INIT };
567581 static unsigned which_buffer ;
568582
@@ -588,9 +602,9 @@ static void anonymize_ident_line(const char **beg, const char **end)
588602 size_t len ;
589603
590604 len = split .mail_end - split .name_begin ;
591- ident = anonymize_mem (& idents , anonymize_ident ,
592- split .name_begin , & len );
593- strbuf_add (out , ident , len );
605+ ident = anonymize_str (& idents , anonymize_ident ,
606+ split .name_begin , len , NULL );
607+ strbuf_addstr (out , ident );
594608 strbuf_addch (out , ' ' );
595609 strbuf_add (out , split .date_begin , split .tz_end - split .date_begin );
596610 } else {
@@ -712,9 +726,10 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
712726 if (mark )
713727 printf (":%d\n" , mark );
714728 else
715- printf ("%s\n" , oid_to_hex (anonymize ?
716- anonymize_oid (& obj -> oid ) :
717- & obj -> oid ));
729+ printf ("%s\n" ,
730+ anonymize ?
731+ anonymize_oid (oid_to_hex (& obj -> oid )) :
732+ oid_to_hex (& obj -> oid ));
718733 i ++ ;
719734 }
720735
@@ -729,12 +744,12 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
729744 show_progress ();
730745}
731746
732- static void * anonymize_tag (const void * old , size_t * len )
747+ static char * anonymize_tag (void * data )
733748{
734749 static int counter ;
735750 struct strbuf out = STRBUF_INIT ;
736751 strbuf_addf (& out , "tag message %d" , counter ++ );
737- return strbuf_detach (& out , len );
752+ return strbuf_detach (& out , NULL );
738753}
739754
740755static void handle_tail (struct object_array * commits , struct rev_info * revs ,
@@ -804,8 +819,8 @@ static void handle_tag(const char *name, struct tag *tag)
804819 name = anonymize_refname (name );
805820 if (message ) {
806821 static struct hashmap tags ;
807- message = anonymize_mem (& tags , anonymize_tag ,
808- message , & message_size );
822+ message = anonymize_str (& tags , anonymize_tag ,
823+ message , message_size , NULL );
809824 }
810825 }
811826
@@ -1136,6 +1151,37 @@ static void handle_deletes(void)
11361151 }
11371152}
11381153
1154+ static char * anonymize_seed (void * data )
1155+ {
1156+ return xstrdup (data );
1157+ }
1158+
1159+ static int parse_opt_anonymize_map (const struct option * opt ,
1160+ const char * arg , int unset )
1161+ {
1162+ struct hashmap * map = opt -> value ;
1163+ const char * delim , * value ;
1164+ size_t keylen ;
1165+
1166+ BUG_ON_OPT_NEG (unset );
1167+
1168+ delim = strchr (arg , ':' );
1169+ if (delim ) {
1170+ keylen = delim - arg ;
1171+ value = delim + 1 ;
1172+ } else {
1173+ keylen = strlen (arg );
1174+ value = arg ;
1175+ }
1176+
1177+ if (!keylen || !* value )
1178+ return error (_ ("--anonymize-map token cannot be empty" ));
1179+
1180+ anonymize_str (map , anonymize_seed , arg , keylen , (void * )value );
1181+
1182+ return 0 ;
1183+ }
1184+
11391185int cmd_fast_export (int argc , const char * * argv , const char * prefix )
11401186{
11411187 struct rev_info revs ;
@@ -1177,6 +1223,9 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix)
11771223 OPT_STRING_LIST (0 , "refspec" , & refspecs_list , N_ ("refspec" ),
11781224 N_ ("Apply refspec to exported refs" )),
11791225 OPT_BOOL (0 , "anonymize" , & anonymize , N_ ("anonymize output" )),
1226+ OPT_CALLBACK_F (0 , "anonymize-map" , & anonymized_seeds , N_ ("from:to" ),
1227+ N_ ("convert <from> to <to> in anonymized output" ),
1228+ PARSE_OPT_NONEG , parse_opt_anonymize_map ),
11801229 OPT_BOOL (0 , "reference-excluded-parents" ,
11811230 & reference_excluded_commits , N_ ("Reference parents which are not in fast-export stream by object id" )),
11821231 OPT_BOOL (0 , "show-original-ids" , & show_original_ids ,
@@ -1204,6 +1253,9 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix)
12041253 if (argc > 1 )
12051254 usage_with_options (fast_export_usage , options );
12061255
1256+ if (anonymized_seeds .cmpfn && !anonymize )
1257+ die (_ ("--anonymize-map without --anonymize does not make sense" ));
1258+
12071259 if (refspecs_list .nr ) {
12081260 int i ;
12091261
0 commit comments