77
88/*
99 * How to handle various characters in refnames:
10- * This table is used by both the SIMD and non-SIMD code. It has
11- * some cases that are only useful for the SIMD; these are handled
12- * equivalently to the listed disposition in the non-SIMD code.
1310 * 0: An acceptable character for refs
14- * 1: @, look for a following { to reject @{ in refs (SIMD or = 0)
15- * 2: \0: End-of-component and string
16- * 3: /: End-of-component (SIMD or = 2)
17- * 4: ., look for a preceding . to reject .. in refs
18- * 5: {, look for a preceding @ to reject @{ in refs
19- * 6: *, usually a bad character except, once as a wildcard (SIMD or = 7)
20- * 7: A bad character except * (see check_refname_component below)
11+ * 1: End-of-component
12+ * 2: ., look for a preceding . to reject .. in refs
13+ * 3: {, look for a preceding @ to reject @{ in refs
14+ * 4: A bad character: ASCII control characters, "~", "^", ":" or SP
2115 */
2216static unsigned char refname_disposition [256 ] = {
23- 2 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 ,
24- 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 ,
25- 7 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 6 , 0 , 0 , 0 , 4 , 3 ,
26- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 7 , 0 , 0 , 0 , 0 , 7 ,
27- 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
28- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 7 , 7 , 0 , 7 , 0 ,
17+ 1 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 ,
18+ 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 ,
19+ 4 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 4 , 0 , 0 , 0 , 2 , 1 ,
20+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 4 , 0 , 0 , 0 , 0 , 4 ,
2921 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
30- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 5 , 0 , 0 , 7 , 7
22+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 4 , 4 , 0 , 4 , 0 ,
23+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
24+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 3 , 0 , 0 , 4 , 4
3125};
3226
3327/*
@@ -39,9 +33,8 @@ static unsigned char refname_disposition[256] = {
3933 * - any path component of it begins with ".", or
4034 * - it has double dots "..", or
4135 * - it has ASCII control character, "~", "^", ":" or SP, anywhere, or
42- * - it has pattern-matching notation "*", "?", "[", anywhere, or
43- * - it ends with a "/", or
44- * - it ends with ".lock", or
36+ * - it ends with a "/".
37+ * - it ends with ".lock"
4538 * - it contains a "\" (backslash)
4639 */
4740static int check_refname_component (const char * refname , int flags )
@@ -53,19 +46,17 @@ static int check_refname_component(const char *refname, int flags)
5346 int ch = * cp & 255 ;
5447 unsigned char disp = refname_disposition [ch ];
5548 switch (disp ) {
56- case 2 : /* fall-through */
57- case 3 :
49+ case 1 :
5850 goto out ;
59- case 4 :
51+ case 2 :
6052 if (last == '.' )
6153 return -1 ; /* Refname contains "..". */
6254 break ;
63- case 5 :
55+ case 3 :
6456 if (last == '@' )
6557 return -1 ; /* Refname contains "@{". */
6658 break ;
67- case 6 : /* fall-through */
68- case 7 :
59+ case 4 :
6960 return -1 ;
7061 }
7162 last = ch ;
@@ -88,7 +79,7 @@ static int check_refname_component(const char *refname, int flags)
8879 return cp - refname ;
8980}
9081
91- static int check_refname_format_bytewise (const char * refname , int flags )
82+ int check_refname_format (const char * refname , int flags )
9283{
9384 int component_len , component_count = 0 ;
9485
@@ -124,195 +115,6 @@ static int check_refname_format_bytewise(const char *refname, int flags)
124115 return 0 ;
125116}
126117
127- #if defined(__GNUC__ ) && defined(__x86_64__ )
128- #define SSE_VECTOR_BYTES 16
129-
130- /* Vectorized version of check_refname_format. */
131- int check_refname_format (const char * refname , int flags )
132- {
133- const char * cp = refname ;
134-
135- const __m128i dot = _mm_set1_epi8 ('.' );
136- const __m128i at = _mm_set1_epi8 ('@' );
137- const __m128i curly = _mm_set1_epi8 ('{' );
138- const __m128i slash = _mm_set1_epi8 ('/' );
139- const __m128i zero = _mm_set1_epi8 ('\000' );
140- const __m128i el = _mm_set1_epi8 ('l' );
141-
142- /* below '*', all characters are forbidden or rare */
143- const __m128i star_ub = _mm_set1_epi8 ('*' + 1 );
144-
145- const __m128i colon = _mm_set1_epi8 (':' );
146- const __m128i question = _mm_set1_epi8 ('?' );
147-
148- /* '['..'^' contains 4 characters: 3 forbidden and 1 rare */
149- const __m128i bracket_lb = _mm_set1_epi8 ('[' - 1 );
150- const __m128i caret_ub = _mm_set1_epi8 ('^' + 1 );
151-
152- /* '~' and above are forbidden */
153- const __m128i tilde_lb = _mm_set1_epi8 ('~' - 1 );
154-
155- int component_count = 0 ;
156-
157- if (refname [0 ] == 0 || refname [0 ] == '/' ) {
158- /* entirely empty ref or initial ref component */
159- return -1 ;
160- }
161-
162- /*
163- * Initial ref component of '.'; below we look for /. so we'll
164- * miss this.
165- */
166- if (refname [0 ] == '.' ) {
167- if (refname [1 ] == '/' || refname [1 ] == '\0' )
168- return -1 ;
169- if (!(flags & REFNAME_DOT_COMPONENT ))
170- return -1 ;
171- }
172- while (1 ) {
173- __m128i tmp , tmp1 , result ;
174- uint64_t mask ;
175-
176- if ((uintptr_t ) cp % PAGE_SIZE > PAGE_SIZE - SSE_VECTOR_BYTES - 1 )
177- /*
178- * End-of-page; fall back to slow method for
179- * this entire ref.
180- */
181- return check_refname_format_bytewise (refname , flags );
182-
183- tmp = _mm_loadu_si128 ((__m128i * )cp );
184- tmp1 = _mm_loadu_si128 ((__m128i * )(cp + 1 ));
185-
186- /*
187- * This range (note the lt) contains some
188- * permissible-but-rare characters (including all
189- * characters >= 128), which we handle later. It also
190- * includes \000.
191- */
192- result = _mm_cmplt_epi8 (tmp , star_ub );
193-
194- result = _mm_or_si128 (result , _mm_cmpeq_epi8 (tmp , question ));
195- result = _mm_or_si128 (result , _mm_cmpeq_epi8 (tmp , colon ));
196-
197- /* This range contains the permissible ] as bycatch */
198- result = _mm_or_si128 (result , _mm_and_si128 (
199- _mm_cmpgt_epi8 (tmp , bracket_lb ),
200- _mm_cmplt_epi8 (tmp , caret_ub )));
201-
202- result = _mm_or_si128 (result , _mm_cmpgt_epi8 (tmp , tilde_lb ));
203-
204- /* .. */
205- result = _mm_or_si128 (result , _mm_and_si128 (
206- _mm_cmpeq_epi8 (tmp , dot ),
207- _mm_cmpeq_epi8 (tmp1 , dot )));
208- /* @{ */
209- result = _mm_or_si128 (result , _mm_and_si128 (
210- _mm_cmpeq_epi8 (tmp , at ),
211- _mm_cmpeq_epi8 (tmp1 , curly )));
212- /* // */
213- result = _mm_or_si128 (result , _mm_and_si128 (
214- _mm_cmpeq_epi8 (tmp , slash ),
215- _mm_cmpeq_epi8 (tmp1 , slash )));
216- /* trailing / */
217- result = _mm_or_si128 (result , _mm_and_si128 (
218- _mm_cmpeq_epi8 (tmp , slash ),
219- _mm_cmpeq_epi8 (tmp1 , zero )));
220- /* .l, beginning of .lock */
221- result = _mm_or_si128 (result , _mm_and_si128 (
222- _mm_cmpeq_epi8 (tmp , dot ),
223- _mm_cmpeq_epi8 (tmp1 , el )));
224- /*
225- * Even though /. is not necessarily an error, we flag
226- * it anyway. If we find it, we'll check if it's valid
227- * and if so we'll advance just past it.
228- */
229- result = _mm_or_si128 (result , _mm_and_si128 (
230- _mm_cmpeq_epi8 (tmp , slash ),
231- _mm_cmpeq_epi8 (tmp1 , dot )));
232-
233- mask = _mm_movemask_epi8 (result );
234- if (mask ) {
235- /*
236- * We've found either end-of-string, or some
237- * probably-bad character or substring.
238- */
239- int i = __builtin_ctz (mask );
240- switch (refname_disposition [cp [i ] & 255 ]) {
241- case 0 : /* fall-through */
242- case 5 :
243- /*
244- * bycatch: a good character that's in
245- * one of the ranges of mostly-forbidden
246- * characters
247- */
248- cp += i + 1 ;
249- break ;
250- case 1 :
251- if (cp [i + 1 ] == '{' )
252- return -1 ;
253- cp += i + 1 ;
254- break ;
255- case 2 :
256- if (!(flags & REFNAME_ALLOW_ONELEVEL )
257- && !component_count && !strchr (refname , '/' ))
258- /* Refname has only one component. */
259- return -1 ;
260- return 0 ;
261- case 3 :
262- component_count ++ ;
263- /*
264- * Even if leading dots are allowed, don't
265- * allow "." as a component (".." is
266- * prevented by case 4 below).
267- */
268- if (cp [i + 1 ] == '.' ) {
269- if (cp [i + 2 ] == '\0' )
270- return -1 ;
271- if (flags & REFNAME_DOT_COMPONENT ) {
272- /* skip to just after the /. */
273- cp += i + 2 ;
274- break ;
275- }
276- return -1 ;
277- } else if (cp [i + 1 ] == '/' || cp [i + 1 ] == '\0' )
278- return -1 ;
279- break ;
280- case 4 :
281- if (cp [i + 1 ] == '.' || cp [i + 1 ] == '\0' )
282- return -1 ;
283- /* .lock as end-of-component or end-of-string */
284- if ((!strncmp (cp + i , ".lock" , 5 ))
285- && (cp [i + 5 ] == '/' || cp [i + 5 ] == 0 ))
286- return -1 ;
287- cp += 1 ;
288- break ;
289- case 6 :
290- if (((cp == refname + i ) || cp [i - 1 ] == '/' )
291- && (cp [i + 1 ] == '/' || cp [i + 1 ] == 0 ))
292- if (flags & REFNAME_REFSPEC_PATTERN ) {
293- flags &= ~REFNAME_REFSPEC_PATTERN ;
294- /* restart after the * */
295- cp += i + 1 ;
296- continue ;
297- }
298- /* fall-through */
299- case 7 :
300- return -1 ;
301- }
302- } else
303- cp += SSE_VECTOR_BYTES ;
304- }
305- }
306-
307- #else
308-
309- int check_refname_format (const char * refname , int flags )
310- {
311- return check_refname_format_bytewise (refname , flags );
312- }
313-
314- #endif
315-
316118struct ref_entry ;
317119
318120/*
0 commit comments