Skip to content

Commit 7f007e4

Browse files
committed
ltree: fix case-insensitive matching.
Previously, ltree_prefix_eq_ci() used lowercasing with the default collation; while ltree_crc32_sz() used tolower() directly. These were equivalent only if the default collation provider was libc and the encoding was single-byte. Change both to use casefolding with the default collation. Backpatch through 18, where the casefolding APIs were introduced. The bug exists in earlier versions, but would require some adaptation. A REINDEX is required for ltree indexes where the database default collation is not libc. Reviewed-by: Chao Li <li.evan.chao@gmail.com> Reviewed-by: Peter Eisentraut <peter@eisentraut.org> Backpatch-through: 18 Discussion: https://postgr.es/m/450ceb6260cad30d7afdf155d991a9caafee7c0d.camel@j-davis.com Discussion: https://postgr.es/m/01fc00fd66f641b9693d4f9f1af0ccf44cbdfbdf.camel@j-davis.com
1 parent 24bf379 commit 7f007e4

File tree

2 files changed

+74
-11
lines changed

2 files changed

+74
-11
lines changed

contrib/ltree/crc32.c

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,31 +10,61 @@
1010
#include "postgres.h"
1111
#include "ltree.h"
1212

13+
#include "crc32.h"
14+
#include "utils/pg_crc.h"
1315
#ifdef LOWER_NODE
14-
#include <ctype.h>
15-
#define TOLOWER(x) tolower((unsigned char) (x))
16-
#else
17-
#define TOLOWER(x) (x)
16+
#include "utils/pg_locale.h"
1817
#endif
1918

20-
#include "crc32.h"
21-
#include "utils/pg_crc.h"
19+
#ifdef LOWER_NODE
2220

2321
unsigned int
2422
ltree_crc32_sz(const char *buf, int size)
2523
{
2624
pg_crc32 crc;
2725
const char *p = buf;
26+
static pg_locale_t locale = NULL;
27+
28+
if (!locale)
29+
locale = pg_database_locale();
2830

2931
INIT_TRADITIONAL_CRC32(crc);
3032
while (size > 0)
3133
{
32-
char c = (char) TOLOWER(*p);
34+
char foldstr[UNICODE_CASEMAP_BUFSZ];
35+
int srclen = pg_mblen(p);
36+
size_t foldlen;
37+
38+
/* fold one codepoint at a time */
39+
foldlen = pg_strfold(foldstr, UNICODE_CASEMAP_BUFSZ, p, srclen,
40+
locale);
41+
42+
COMP_TRADITIONAL_CRC32(crc, foldstr, foldlen);
43+
44+
size -= srclen;
45+
p += srclen;
46+
}
47+
FIN_TRADITIONAL_CRC32(crc);
48+
return (unsigned int) crc;
49+
}
50+
51+
#else
3352

34-
COMP_TRADITIONAL_CRC32(crc, &c, 1);
53+
unsigned int
54+
ltree_crc32_sz(const char *buf, int size)
55+
{
56+
pg_crc32 crc;
57+
const char *p = buf;
58+
59+
INIT_TRADITIONAL_CRC32(crc);
60+
while (size > 0)
61+
{
62+
COMP_TRADITIONAL_CRC32(crc, p, 1);
3563
size--;
3664
p++;
3765
}
3866
FIN_TRADITIONAL_CRC32(crc);
3967
return (unsigned int) crc;
4068
}
69+
70+
#endif /* !LOWER_NODE */

contrib/ltree/lquery_op.c

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,11 +93,44 @@ ltree_prefix_eq(const char *a, size_t a_sz, const char *b, size_t b_sz)
9393
bool
9494
ltree_prefix_eq_ci(const char *a, size_t a_sz, const char *b, size_t b_sz)
9595
{
96-
char *al = str_tolower(a, a_sz, DEFAULT_COLLATION_OID);
97-
char *bl = str_tolower(b, b_sz, DEFAULT_COLLATION_OID);
96+
static pg_locale_t locale = NULL;
97+
size_t al_sz = a_sz + 1;
98+
size_t al_len;
99+
char *al = palloc(al_sz);
100+
size_t bl_sz = b_sz + 1;
101+
size_t bl_len;
102+
char *bl = palloc(bl_sz);
98103
bool res;
99104

100-
res = (strncmp(al, bl, a_sz) == 0);
105+
if (!locale)
106+
locale = pg_database_locale();
107+
108+
/* casefold both a and b */
109+
110+
al_len = pg_strfold(al, al_sz, a, a_sz, locale);
111+
if (al_len + 1 > al_sz)
112+
{
113+
/* grow buffer if needed and retry */
114+
al_sz = al_len + 1;
115+
al = repalloc(al, al_sz);
116+
al_len = pg_strfold(al, al_sz, a, a_sz, locale);
117+
Assert(al_len + 1 <= al_sz);
118+
}
119+
120+
bl_len = pg_strfold(bl, bl_sz, b, b_sz, locale);
121+
if (bl_len + 1 > bl_sz)
122+
{
123+
/* grow buffer if needed and retry */
124+
bl_sz = bl_len + 1;
125+
bl = repalloc(bl, bl_sz);
126+
bl_len = pg_strfold(bl, bl_sz, b, b_sz, locale);
127+
Assert(bl_len + 1 <= bl_sz);
128+
}
129+
130+
if (al_len > bl_len)
131+
res = false;
132+
else
133+
res = (strncmp(al, bl, al_len) == 0);
101134

102135
pfree(al);
103136
pfree(bl);

0 commit comments

Comments
 (0)