Skip to content

Commit 0a90df5

Browse files
committed
Avoid global LC_CTYPE dependency in pg_locale_icu.c.
ICU still depends on libc for compatibility with certain historical behavior for single-byte encodings. Make the dependency explicit by holding a locale_t object when required. We should consider a better solution in the future, such as decoding the text to UTF-32 and using u_tolower(). That would be a behavior change and require additional infrastructure though; so for now, just avoid the global LC_CTYPE dependency. Reviewed-by: Chao Li <li.evan.chao@gmail.com> Discussion: https://postgr.es/m/450ceb6260cad30d7afdf155d991a9caafee7c0d.camel@j-davis.com
1 parent 87b2968 commit 0a90df5

File tree

2 files changed

+44
-4
lines changed

2 files changed

+44
-4
lines changed

src/backend/utils/adt/pg_locale_icu.c

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,29 @@ static const struct ctype_methods ctype_methods_icu = {
244244
.wc_toupper = toupper_icu,
245245
.wc_tolower = tolower_icu,
246246
};
247+
248+
/*
249+
* ICU still depends on libc for compatibility with certain historical
250+
* behavior for single-byte encodings. See downcase_ident_icu().
251+
*
252+
* XXX: consider fixing by decoding the single byte into a code point, and
253+
* using u_tolower().
254+
*/
255+
static locale_t
256+
make_libc_ctype_locale(const char *ctype)
257+
{
258+
locale_t loc;
259+
260+
#ifndef WIN32
261+
loc = newlocale(LC_CTYPE_MASK, ctype, NULL);
262+
#else
263+
loc = _create_locale(LC_ALL, ctype);
264+
#endif
265+
if (!loc)
266+
report_newlocale_failure(ctype);
267+
268+
return loc;
269+
}
247270
#endif
248271

249272
pg_locale_t
@@ -254,6 +277,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
254277
const char *iculocstr;
255278
const char *icurules = NULL;
256279
UCollator *collator;
280+
locale_t loc = (locale_t) 0;
257281
pg_locale_t result;
258282

259283
if (collid == DEFAULT_COLLATION_OID)
@@ -276,6 +300,18 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
276300
if (!isnull)
277301
icurules = TextDatumGetCString(datum);
278302

303+
/* libc only needed for default locale and single-byte encoding */
304+
if (pg_database_encoding_max_length() == 1)
305+
{
306+
const char *ctype;
307+
308+
datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
309+
Anum_pg_database_datctype);
310+
ctype = TextDatumGetCString(datum);
311+
312+
loc = make_libc_ctype_locale(ctype);
313+
}
314+
279315
ReleaseSysCache(tp);
280316
}
281317
else
@@ -306,6 +342,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
306342
result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
307343
result->icu.locale = MemoryContextStrdup(context, iculocstr);
308344
result->icu.ucol = collator;
345+
result->icu.lt = loc;
309346
result->deterministic = deterministic;
310347
result->collate_is_c = false;
311348
result->ctype_is_c = false;
@@ -578,17 +615,19 @@ downcase_ident_icu(char *dst, size_t dstsize, const char *src,
578615
ssize_t srclen, pg_locale_t locale)
579616
{
580617
int i;
581-
bool enc_is_single_byte;
618+
bool libc_lower;
619+
locale_t lt = locale->icu.lt;
620+
621+
libc_lower = lt && (pg_database_encoding_max_length() == 1);
582622

583-
enc_is_single_byte = pg_database_encoding_max_length() == 1;
584623
for (i = 0; i < srclen && i < dstsize; i++)
585624
{
586625
unsigned char ch = (unsigned char) src[i];
587626

588627
if (ch >= 'A' && ch <= 'Z')
589628
ch = pg_ascii_tolower(ch);
590-
else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch))
591-
ch = tolower(ch);
629+
else if (libc_lower && IS_HIGHBIT_SET(ch) && isupper_l(ch, lt))
630+
ch = tolower_l(ch, lt);
592631
dst[i] = (char) ch;
593632
}
594633

src/include/utils/pg_locale.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ struct pg_locale_struct
167167
{
168168
const char *locale;
169169
UCollator *ucol;
170+
locale_t lt;
170171
} icu;
171172
#endif
172173
};

0 commit comments

Comments
 (0)