@@ -887,9 +887,32 @@ static int has_file_name(struct index_state *istate,
887887 return retval ;
888888}
889889
890+
891+ /*
892+ * Like strcmp(), but also return the offset of the first change.
893+ * If strings are equal, return the length.
894+ */
895+ int strcmp_offset (const char * s1 , const char * s2 , size_t * first_change )
896+ {
897+ size_t k ;
898+
899+ if (!first_change )
900+ return strcmp (s1 , s2 );
901+
902+ for (k = 0 ; s1 [k ] == s2 [k ]; k ++ )
903+ if (s1 [k ] == '\0' )
904+ break ;
905+
906+ * first_change = k ;
907+ return (unsigned char )s1 [k ] - (unsigned char )s2 [k ];
908+ }
909+
890910/*
891911 * Do we have another file with a pathname that is a proper
892912 * subset of the name we're trying to add?
913+ *
914+ * That is, is there another file in the index with a path
915+ * that matches a sub-directory in the given entry?
893916 */
894917static int has_dir_name (struct index_state * istate ,
895918 const struct cache_entry * ce , int pos , int ok_to_replace )
@@ -898,9 +921,51 @@ static int has_dir_name(struct index_state *istate,
898921 int stage = ce_stage (ce );
899922 const char * name = ce -> name ;
900923 const char * slash = name + ce_namelen (ce );
924+ size_t len_eq_last ;
925+ int cmp_last = 0 ;
926+
927+ /*
928+ * We are frequently called during an iteration on a sorted
929+ * list of pathnames and while building a new index. Therefore,
930+ * there is a high probability that this entry will eventually
931+ * be appended to the index, rather than inserted in the middle.
932+ * If we can confirm that, we can avoid binary searches on the
933+ * components of the pathname.
934+ *
935+ * Compare the entry's full path with the last path in the index.
936+ */
937+ if (istate -> cache_nr > 0 ) {
938+ cmp_last = strcmp_offset (name ,
939+ istate -> cache [istate -> cache_nr - 1 ]-> name ,
940+ & len_eq_last );
941+ if (cmp_last > 0 ) {
942+ if (len_eq_last == 0 ) {
943+ /*
944+ * The entry sorts AFTER the last one in the
945+ * index and their paths have no common prefix,
946+ * so there cannot be a F/D conflict.
947+ */
948+ return retval ;
949+ } else {
950+ /*
951+ * The entry sorts AFTER the last one in the
952+ * index, but has a common prefix. Fall through
953+ * to the loop below to disect the entry's path
954+ * and see where the difference is.
955+ */
956+ }
957+ } else if (cmp_last == 0 ) {
958+ /*
959+ * The entry exactly matches the last one in the
960+ * index, but because of multiple stage and CE_REMOVE
961+ * items, we fall through and let the regular search
962+ * code handle it.
963+ */
964+ }
965+ }
901966
902967 for (;;) {
903- int len ;
968+ size_t len ;
904969
905970 for (;;) {
906971 if (* -- slash == '/' )
@@ -910,6 +975,67 @@ static int has_dir_name(struct index_state *istate,
910975 }
911976 len = slash - name ;
912977
978+ if (cmp_last > 0 ) {
979+ /*
980+ * (len + 1) is a directory boundary (including
981+ * the trailing slash). And since the loop is
982+ * decrementing "slash", the first iteration is
983+ * the longest directory prefix; subsequent
984+ * iterations consider parent directories.
985+ */
986+
987+ if (len + 1 <= len_eq_last ) {
988+ /*
989+ * The directory prefix (including the trailing
990+ * slash) also appears as a prefix in the last
991+ * entry, so the remainder cannot collide (because
992+ * strcmp said the whole path was greater).
993+ *
994+ * EQ: last: xxx/A
995+ * this: xxx/B
996+ *
997+ * LT: last: xxx/file_A
998+ * this: xxx/file_B
999+ */
1000+ return retval ;
1001+ }
1002+
1003+ if (len > len_eq_last ) {
1004+ /*
1005+ * This part of the directory prefix (excluding
1006+ * the trailing slash) is longer than the known
1007+ * equal portions, so this sub-directory cannot
1008+ * collide with a file.
1009+ *
1010+ * GT: last: xxxA
1011+ * this: xxxB/file
1012+ */
1013+ return retval ;
1014+ }
1015+
1016+ if (istate -> cache_nr > 0 &&
1017+ ce_namelen (istate -> cache [istate -> cache_nr - 1 ]) > len ) {
1018+ /*
1019+ * The directory prefix lines up with part of
1020+ * a longer file or directory name, but sorts
1021+ * after it, so this sub-directory cannot
1022+ * collide with a file.
1023+ *
1024+ * last: xxx/yy-file (because '-' sorts before '/')
1025+ * this: xxx/yy/abc
1026+ */
1027+ return retval ;
1028+ }
1029+
1030+ /*
1031+ * This is a possible collision. Fall through and
1032+ * let the regular search code handle it.
1033+ *
1034+ * last: xxx
1035+ * this: xxx/file
1036+ */
1037+ }
1038+
9131039 pos = index_name_stage_pos (istate , name , len , stage );
9141040 if (pos >= 0 ) {
9151041 /*
@@ -1001,7 +1127,16 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e
10011127
10021128 if (!(option & ADD_CACHE_KEEP_CACHE_TREE ))
10031129 cache_tree_invalidate_path (istate , ce -> name );
1004- pos = index_name_stage_pos (istate , ce -> name , ce_namelen (ce ), ce_stage (ce ));
1130+
1131+ /*
1132+ * If this entry's path sorts after the last entry in the index,
1133+ * we can avoid searching for it.
1134+ */
1135+ if (istate -> cache_nr > 0 &&
1136+ strcmp (ce -> name , istate -> cache [istate -> cache_nr - 1 ]-> name ) > 0 )
1137+ pos = - istate -> cache_nr - 1 ;
1138+ else
1139+ pos = index_name_stage_pos (istate , ce -> name , ce_namelen (ce ), ce_stage (ce ));
10051140
10061141 /* existing match? Just replace it. */
10071142 if (pos >= 0 ) {
0 commit comments