Skip to content

Commit 5a38104

Browse files
committed
Control ctype behavior internally with a method table.
Previously, pattern matching and case mapping behavior branched based on the provider. Refactor to use a method table, which is less error-prone. This is also a step toward multiple provider versions, which we may want to support in the future. Reviewed-by: Andreas Karlsson <andreas@proxel.se> Reviewed-by: Peter Eisentraut <peter@eisentraut.org> Discussion: https://postgr.es/m/2830211e1b6e6a2e26d845780b03e125281ea17b.camel%40j-davis.com
1 parent d81dcc8 commit 5a38104

File tree

9 files changed

+686
-508
lines changed

9 files changed

+686
-508
lines changed

src/backend/regex/regc_pg_locale.c

Lines changed: 77 additions & 352 deletions
Large diffs are not rendered by default.

src/backend/utils/adt/like.c

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale)
9898
else if (locale->is_default)
9999
return pg_tolower(c);
100100
else
101-
return tolower_l(c, locale->info.lt);
101+
return char_tolower(c, locale);
102102
}
103103

104104

@@ -209,7 +209,17 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
209209
* way.
210210
*/
211211

212-
if (pg_database_encoding_max_length() > 1 || (locale->provider == COLLPROVIDER_ICU))
212+
if (locale->ctype_is_c ||
213+
(char_tolower_enabled(locale) &&
214+
pg_database_encoding_max_length() == 1))
215+
{
216+
p = VARDATA_ANY(pat);
217+
plen = VARSIZE_ANY_EXHDR(pat);
218+
s = VARDATA_ANY(str);
219+
slen = VARSIZE_ANY_EXHDR(str);
220+
return SB_IMatchText(s, slen, p, plen, locale);
221+
}
222+
else
213223
{
214224
pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
215225
PointerGetDatum(pat)));
@@ -224,14 +234,6 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
224234
else
225235
return MB_MatchText(s, slen, p, plen, 0);
226236
}
227-
else
228-
{
229-
p = VARDATA_ANY(pat);
230-
plen = VARSIZE_ANY_EXHDR(pat);
231-
s = VARDATA_ANY(str);
232-
slen = VARSIZE_ANY_EXHDR(str);
233-
return SB_IMatchText(s, slen, p, plen, locale);
234-
}
235237
}
236238

237239
/*

src/backend/utils/adt/like_support.c

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1495,13 +1495,8 @@ pattern_char_isalpha(char c, bool is_multibyte,
14951495
{
14961496
if (locale->ctype_is_c)
14971497
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
1498-
else if (is_multibyte && IS_HIGHBIT_SET(c))
1499-
return true;
1500-
else if (locale->provider != COLLPROVIDER_LIBC)
1501-
return IS_HIGHBIT_SET(c) ||
1502-
(c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
15031498
else
1504-
return isalpha_l((unsigned char) c, locale->info.lt);
1499+
return char_is_cased(c, locale);
15051500
}
15061501

15071502

src/backend/utils/adt/pg_locale.c

Lines changed: 44 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -79,31 +79,6 @@ extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
7979
extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
8080
extern char *get_collation_actual_version_libc(const char *collcollate);
8181

82-
extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src,
83-
ssize_t srclen, pg_locale_t locale);
84-
extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src,
85-
ssize_t srclen, pg_locale_t locale);
86-
extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src,
87-
ssize_t srclen, pg_locale_t locale);
88-
extern size_t strfold_builtin(char *dst, size_t dstsize, const char *src,
89-
ssize_t srclen, pg_locale_t locale);
90-
91-
extern size_t strlower_icu(char *dst, size_t dstsize, const char *src,
92-
ssize_t srclen, pg_locale_t locale);
93-
extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src,
94-
ssize_t srclen, pg_locale_t locale);
95-
extern size_t strupper_icu(char *dst, size_t dstsize, const char *src,
96-
ssize_t srclen, pg_locale_t locale);
97-
extern size_t strfold_icu(char *dst, size_t dstsize, const char *src,
98-
ssize_t srclen, pg_locale_t locale);
99-
100-
extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
101-
ssize_t srclen, pg_locale_t locale);
102-
extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
103-
ssize_t srclen, pg_locale_t locale);
104-
extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
105-
ssize_t srclen, pg_locale_t locale);
106-
10782
/* GUC settings */
10883
char *locale_messages;
10984
char *locale_monetary;
@@ -1092,6 +1067,9 @@ create_pg_locale(Oid collid, MemoryContext context)
10921067
Assert((result->collate_is_c && result->collate == NULL) ||
10931068
(!result->collate_is_c && result->collate != NULL));
10941069

1070+
Assert((result->ctype_is_c && result->ctype == NULL) ||
1071+
(!result->ctype_is_c && result->ctype != NULL));
1072+
10951073
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
10961074
&isnull);
10971075
if (!isnull)
@@ -1256,77 +1234,31 @@ size_t
12561234
pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
12571235
pg_locale_t locale)
12581236
{
1259-
if (locale->provider == COLLPROVIDER_BUILTIN)
1260-
return strlower_builtin(dst, dstsize, src, srclen, locale);
1261-
#ifdef USE_ICU
1262-
else if (locale->provider == COLLPROVIDER_ICU)
1263-
return strlower_icu(dst, dstsize, src, srclen, locale);
1264-
#endif
1265-
else if (locale->provider == COLLPROVIDER_LIBC)
1266-
return strlower_libc(dst, dstsize, src, srclen, locale);
1267-
else
1268-
/* shouldn't happen */
1269-
PGLOCALE_SUPPORT_ERROR(locale->provider);
1270-
1271-
return 0; /* keep compiler quiet */
1237+
return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
12721238
}
12731239

12741240
size_t
12751241
pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
12761242
pg_locale_t locale)
12771243
{
1278-
if (locale->provider == COLLPROVIDER_BUILTIN)
1279-
return strtitle_builtin(dst, dstsize, src, srclen, locale);
1280-
#ifdef USE_ICU
1281-
else if (locale->provider == COLLPROVIDER_ICU)
1282-
return strtitle_icu(dst, dstsize, src, srclen, locale);
1283-
#endif
1284-
else if (locale->provider == COLLPROVIDER_LIBC)
1285-
return strtitle_libc(dst, dstsize, src, srclen, locale);
1286-
else
1287-
/* shouldn't happen */
1288-
PGLOCALE_SUPPORT_ERROR(locale->provider);
1289-
1290-
return 0; /* keep compiler quiet */
1244+
return locale->ctype->strtitle(dst, dstsize, src, srclen, locale);
12911245
}
12921246

12931247
size_t
12941248
pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
12951249
pg_locale_t locale)
12961250
{
1297-
if (locale->provider == COLLPROVIDER_BUILTIN)
1298-
return strupper_builtin(dst, dstsize, src, srclen, locale);
1299-
#ifdef USE_ICU
1300-
else if (locale->provider == COLLPROVIDER_ICU)
1301-
return strupper_icu(dst, dstsize, src, srclen, locale);
1302-
#endif
1303-
else if (locale->provider == COLLPROVIDER_LIBC)
1304-
return strupper_libc(dst, dstsize, src, srclen, locale);
1305-
else
1306-
/* shouldn't happen */
1307-
PGLOCALE_SUPPORT_ERROR(locale->provider);
1308-
1309-
return 0; /* keep compiler quiet */
1251+
return locale->ctype->strupper(dst, dstsize, src, srclen, locale);
13101252
}
13111253

13121254
size_t
13131255
pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen,
13141256
pg_locale_t locale)
13151257
{
1316-
if (locale->provider == COLLPROVIDER_BUILTIN)
1317-
return strfold_builtin(dst, dstsize, src, srclen, locale);
1318-
#ifdef USE_ICU
1319-
else if (locale->provider == COLLPROVIDER_ICU)
1320-
return strfold_icu(dst, dstsize, src, srclen, locale);
1321-
#endif
1322-
/* for libc, just use strlower */
1323-
else if (locale->provider == COLLPROVIDER_LIBC)
1324-
return strlower_libc(dst, dstsize, src, srclen, locale);
1258+
if (locale->ctype->strfold)
1259+
return locale->ctype->strfold(dst, dstsize, src, srclen, locale);
13251260
else
1326-
/* shouldn't happen */
1327-
PGLOCALE_SUPPORT_ERROR(locale->provider);
1328-
1329-
return 0; /* keep compiler quiet */
1261+
return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
13301262
}
13311263

13321264
/*
@@ -1463,6 +1395,41 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
14631395
return locale->collate->strnxfrm_prefix(dest, destsize, src, srclen, locale);
14641396
}
14651397

1398+
/*
1399+
* char_is_cased()
1400+
*
1401+
* Fuzzy test of whether the given char is case-varying or not. The argument
1402+
* is a single byte, so in a multibyte encoding, just assume any non-ASCII
1403+
* char is case-varying.
1404+
*/
1405+
bool
1406+
char_is_cased(char ch, pg_locale_t locale)
1407+
{
1408+
return locale->ctype->char_is_cased(ch, locale);
1409+
}
1410+
1411+
/*
1412+
* char_tolower_enabled()
1413+
*
1414+
* Does the provider support char_tolower()?
1415+
*/
1416+
bool
1417+
char_tolower_enabled(pg_locale_t locale)
1418+
{
1419+
return (locale->ctype->char_tolower != NULL);
1420+
}
1421+
1422+
/*
1423+
* char_tolower()
1424+
*
1425+
* Convert char (single-byte encoding) to lowercase.
1426+
*/
1427+
char
1428+
char_tolower(unsigned char ch, pg_locale_t locale)
1429+
{
1430+
return locale->ctype->char_tolower(ch, locale);
1431+
}
1432+
14661433
/*
14671434
* Return required encoding ID for the given locale, or -1 if any encoding is
14681435
* valid for the locale.

src/backend/utils/adt/pg_locale_builtin.c

Lines changed: 98 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,6 @@
2424
extern pg_locale_t create_pg_locale_builtin(Oid collid,
2525
MemoryContext context);
2626
extern char *get_collation_actual_version_builtin(const char *collcollate);
27-
extern size_t strlower_builtin(char *dest, size_t destsize, const char *src,
28-
ssize_t srclen, pg_locale_t locale);
29-
extern size_t strtitle_builtin(char *dest, size_t destsize, const char *src,
30-
ssize_t srclen, pg_locale_t locale);
31-
extern size_t strupper_builtin(char *dest, size_t destsize, const char *src,
32-
ssize_t srclen, pg_locale_t locale);
33-
extern size_t strfold_builtin(char *dest, size_t destsize, const char *src,
34-
ssize_t srclen, pg_locale_t locale);
35-
3627

3728
struct WordBoundaryState
3829
{
@@ -76,15 +67,15 @@ initcap_wbnext(void *state)
7667
return wbstate->len;
7768
}
7869

79-
size_t
70+
static size_t
8071
strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
8172
pg_locale_t locale)
8273
{
8374
return unicode_strlower(dest, destsize, src, srclen,
8475
locale->info.builtin.casemap_full);
8576
}
8677

87-
size_t
78+
static size_t
8879
strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
8980
pg_locale_t locale)
9081
{
@@ -102,22 +93,114 @@ strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
10293
initcap_wbnext, &wbstate);
10394
}
10495

105-
size_t
96+
static size_t
10697
strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
10798
pg_locale_t locale)
10899
{
109100
return unicode_strupper(dest, destsize, src, srclen,
110101
locale->info.builtin.casemap_full);
111102
}
112103

113-
size_t
104+
static size_t
114105
strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
115106
pg_locale_t locale)
116107
{
117108
return unicode_strfold(dest, destsize, src, srclen,
118109
locale->info.builtin.casemap_full);
119110
}
120111

112+
static bool
113+
wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale)
114+
{
115+
return pg_u_isdigit(wc, !locale->info.builtin.casemap_full);
116+
}
117+
118+
static bool
119+
wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale)
120+
{
121+
return pg_u_isalpha(wc);
122+
}
123+
124+
static bool
125+
wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale)
126+
{
127+
return pg_u_isalnum(wc, !locale->info.builtin.casemap_full);
128+
}
129+
130+
static bool
131+
wc_isupper_builtin(pg_wchar wc, pg_locale_t locale)
132+
{
133+
return pg_u_isupper(wc);
134+
}
135+
136+
static bool
137+
wc_islower_builtin(pg_wchar wc, pg_locale_t locale)
138+
{
139+
return pg_u_islower(wc);
140+
}
141+
142+
static bool
143+
wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale)
144+
{
145+
return pg_u_isgraph(wc);
146+
}
147+
148+
static bool
149+
wc_isprint_builtin(pg_wchar wc, pg_locale_t locale)
150+
{
151+
return pg_u_isprint(wc);
152+
}
153+
154+
static bool
155+
wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale)
156+
{
157+
return pg_u_ispunct(wc, !locale->info.builtin.casemap_full);
158+
}
159+
160+
static bool
161+
wc_isspace_builtin(pg_wchar wc, pg_locale_t locale)
162+
{
163+
return pg_u_isspace(wc);
164+
}
165+
166+
static bool
167+
char_is_cased_builtin(char ch, pg_locale_t locale)
168+
{
169+
return IS_HIGHBIT_SET(ch) ||
170+
(ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
171+
}
172+
173+
static pg_wchar
174+
wc_toupper_builtin(pg_wchar wc, pg_locale_t locale)
175+
{
176+
return unicode_uppercase_simple(wc);
177+
}
178+
179+
static pg_wchar
180+
wc_tolower_builtin(pg_wchar wc, pg_locale_t locale)
181+
{
182+
return unicode_lowercase_simple(wc);
183+
}
184+
185+
static const struct ctype_methods ctype_methods_builtin = {
186+
.strlower = strlower_builtin,
187+
.strtitle = strtitle_builtin,
188+
.strupper = strupper_builtin,
189+
.strfold = strfold_builtin,
190+
.wc_isdigit = wc_isdigit_builtin,
191+
.wc_isalpha = wc_isalpha_builtin,
192+
.wc_isalnum = wc_isalnum_builtin,
193+
.wc_isupper = wc_isupper_builtin,
194+
.wc_islower = wc_islower_builtin,
195+
.wc_isgraph = wc_isgraph_builtin,
196+
.wc_isprint = wc_isprint_builtin,
197+
.wc_ispunct = wc_ispunct_builtin,
198+
.wc_isspace = wc_isspace_builtin,
199+
.char_is_cased = char_is_cased_builtin,
200+
.wc_tolower = wc_tolower_builtin,
201+
.wc_toupper = wc_toupper_builtin,
202+
};
203+
121204
pg_locale_t
122205
create_pg_locale_builtin(Oid collid, MemoryContext context)
123206
{
@@ -161,6 +244,8 @@ create_pg_locale_builtin(Oid collid, MemoryContext context)
161244
result->deterministic = true;
162245
result->collate_is_c = true;
163246
result->ctype_is_c = (strcmp(locstr, "C") == 0);
247+
if (!result->ctype_is_c)
248+
result->ctype = &ctype_methods_builtin;
164249

165250
return result;
166251
}

0 commit comments

Comments
 (0)