Skip to content

Commit b7fc90a

Browse files
committed
json: add concept of normalization
Let's add a concept of normalization: as preparation for signing json records let's add a mechanism to bring JSON records into a well-defined order so that we can safely validate JSON records. This adds two booleans to each JsonVariant object: "sorted" and "normalized". The latter indicates whether a variant is fully sorted (i.e. all keys of objects listed in alphabetical order) recursively down the tree. The former is a weaker property: it only checks whether the keys of the object itself are sorted. All variants which are "normalized" are also "sorted", but not vice versa. The knowledge of the "sorted" property is then used to optimize searching for keys in the variant by using bisection. Both properties are determined at the moment the variants are allocated. Since our objects are immutable this is safe.
1 parent ca409a5 commit b7fc90a

File tree

3 files changed

+314
-20
lines changed

3 files changed

+314
-20
lines changed

src/shared/json.c

Lines changed: 219 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,15 @@ struct JsonVariant {
7171
/* While comparing two arrays, we use this for marking what we already have seen */
7272
bool is_marked:1;
7373

74-
/* Ersase from memory when freeing */
74+
/* Erase from memory when freeing */
7575
bool sensitive:1;
7676

77+
/* If this is an object the fields are strictly ordered by name */
78+
bool sorted:1;
79+
80+
/* If in addition to this object all objects referenced by it are also ordered strictly by name */
81+
bool normalized:1;
82+
7783
/* The current 'depth' of the JsonVariant, i.e. how many levels of member variants this has */
7884
uint16_t depth;
7985

@@ -215,10 +221,10 @@ static uint16_t json_variant_depth(JsonVariant *v) {
215221
return v->depth;
216222
}
217223

218-
static JsonVariant *json_variant_normalize(JsonVariant *v) {
224+
static JsonVariant *json_variant_formalize(JsonVariant *v) {
219225

220-
/* Converts json variants to their normalized form, i.e. fully dereferenced and wherever possible converted to
221-
* the "magic" version if there is one */
226+
/* Converts json variant pointers to their normalized form, i.e. fully dereferenced and wherever
227+
* possible converted to the "magic" version if there is one */
222228

223229
if (!v)
224230
return NULL;
@@ -259,9 +265,9 @@ static JsonVariant *json_variant_normalize(JsonVariant *v) {
259265
}
260266
}
261267

262-
static JsonVariant *json_variant_conservative_normalize(JsonVariant *v) {
268+
static JsonVariant *json_variant_conservative_formalize(JsonVariant *v) {
263269

264-
/* Much like json_variant_normalize(), but won't simplify if the variant has a source/line location attached to
270+
/* Much like json_variant_formalize(), but won't simplify if the variant has a source/line location attached to
265271
* it, in order not to lose context */
266272

267273
if (!v)
@@ -273,7 +279,7 @@ static JsonVariant *json_variant_conservative_normalize(JsonVariant *v) {
273279
if (v->source || v->line > 0 || v->column > 0)
274280
return v;
275281

276-
return json_variant_normalize(v);
282+
return json_variant_formalize(v);
277283
}
278284

279285
static int json_variant_new(JsonVariant **ret, JsonVariantType type, size_t space) {
@@ -451,7 +457,7 @@ static void json_variant_set(JsonVariant *a, JsonVariant *b) {
451457
case JSON_VARIANT_ARRAY:
452458
case JSON_VARIANT_OBJECT:
453459
a->is_reference = true;
454-
a->reference = json_variant_ref(json_variant_conservative_normalize(b));
460+
a->reference = json_variant_ref(json_variant_conservative_formalize(b));
455461
break;
456462

457463
case JSON_VARIANT_NULL:
@@ -476,6 +482,7 @@ static void json_variant_copy_source(JsonVariant *v, JsonVariant *from) {
476482

477483
int json_variant_new_array(JsonVariant **ret, JsonVariant **array, size_t n) {
478484
_cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
485+
bool normalized = true;
479486

480487
assert_return(ret, -EINVAL);
481488
if (n == 0) {
@@ -511,8 +518,13 @@ int json_variant_new_array(JsonVariant **ret, JsonVariant **array, size_t n) {
511518

512519
json_variant_set(w, c);
513520
json_variant_copy_source(w, c);
521+
522+
if (!json_variant_is_normalized(c))
523+
normalized = false;
514524
}
515525

526+
v->normalized = normalized;
527+
516528
*ret = TAKE_PTR(v);
517529
return 0;
518530
}
@@ -550,6 +562,8 @@ int json_variant_new_array_bytes(JsonVariant **ret, const void *p, size_t n) {
550562
};
551563
}
552564

565+
v->normalized = true;
566+
553567
*ret = v;
554568
return 0;
555569
}
@@ -601,12 +615,16 @@ int json_variant_new_array_strv(JsonVariant **ret, char **l) {
601615
memcpy(w->string, l[v->n_elements], k+1);
602616
}
603617

618+
v->normalized = true;
619+
604620
*ret = TAKE_PTR(v);
605621
return 0;
606622
}
607623

608624
int json_variant_new_object(JsonVariant **ret, JsonVariant **array, size_t n) {
609625
_cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
626+
const char *prev = NULL;
627+
bool sorted = true, normalized = true;
610628

611629
assert_return(ret, -EINVAL);
612630
if (n == 0) {
@@ -630,9 +648,20 @@ int json_variant_new_object(JsonVariant **ret, JsonVariant **array, size_t n) {
630648
*c = array[v->n_elements];
631649
uint16_t d;
632650

633-
if ((v->n_elements & 1) == 0 &&
634-
!json_variant_is_string(c))
635-
return -EINVAL; /* Every second one needs to be a string, as it is the key name */
651+
if ((v->n_elements & 1) == 0) {
652+
const char *k;
653+
654+
if (!json_variant_is_string(c))
655+
return -EINVAL; /* Every second one needs to be a string, as it is the key name */
656+
657+
assert_se(k = json_variant_string(c));
658+
659+
if (prev && strcmp(k, prev) <= 0)
660+
sorted = normalized = false;
661+
662+
prev = k;
663+
} else if (!json_variant_is_normalized(c))
664+
normalized = false;
636665

637666
d = json_variant_depth(c);
638667
if (d >= DEPTH_MAX) /* Refuse too deep nesting */
@@ -649,6 +678,9 @@ int json_variant_new_object(JsonVariant **ret, JsonVariant **array, size_t n) {
649678
json_variant_copy_source(w, c);
650679
}
651680

681+
v->normalized = normalized;
682+
v->sorted = sorted;
683+
652684
*ret = TAKE_PTR(v);
653685
return 0;
654686
}
@@ -1131,7 +1163,7 @@ JsonVariant *json_variant_by_index(JsonVariant *v, size_t idx) {
11311163
if (idx >= v->n_elements)
11321164
return NULL;
11331165

1134-
return json_variant_conservative_normalize(v + 1 + idx);
1166+
return json_variant_conservative_formalize(v + 1 + idx);
11351167

11361168
mismatch:
11371169
log_debug("Element in non-array/non-object JSON variant requested by index, returning NULL.");
@@ -1154,6 +1186,37 @@ JsonVariant *json_variant_by_key_full(JsonVariant *v, const char *key, JsonVaria
11541186
if (v->is_reference)
11551187
return json_variant_by_key(v->reference, key);
11561188

1189+
if (v->sorted) {
1190+
size_t a = 0, b = v->n_elements/2;
1191+
1192+
/* If the variant is sorted we can use bisection to find the entry we need in O(log(n)) time */
1193+
1194+
while (b > a) {
1195+
JsonVariant *p;
1196+
const char *f;
1197+
int c;
1198+
1199+
i = (a + b) / 2;
1200+
p = json_variant_dereference(v + 1 + i*2);
1201+
1202+
assert_se(f = json_variant_string(p));
1203+
1204+
c = strcmp(key, f);
1205+
if (c == 0) {
1206+
if (ret_key)
1207+
*ret_key = json_variant_conservative_formalize(v + 1 + i*2);
1208+
1209+
return json_variant_conservative_formalize(v + 1 + i*2 + 1);
1210+
} else if (c < 0)
1211+
b = i;
1212+
else
1213+
a = i + 1;
1214+
}
1215+
1216+
goto not_found;
1217+
}
1218+
1219+
/* The variant is not sorted, hence search for the field linearly */
11571220
for (i = 0; i < v->n_elements; i += 2) {
11581221
JsonVariant *p;
11591222

@@ -1165,9 +1228,9 @@ JsonVariant *json_variant_by_key_full(JsonVariant *v, const char *key, JsonVaria
11651228
if (streq(json_variant_string(p), key)) {
11661229

11671230
if (ret_key)
1168-
*ret_key = json_variant_conservative_normalize(v + 1 + i);
1231+
*ret_key = json_variant_conservative_formalize(v + 1 + i);
11691232

1170-
return json_variant_conservative_normalize(v + 1 + i + 1);
1233+
return json_variant_conservative_formalize(v + 1 + i + 1);
11711234
}
11721235
}
11731236

@@ -1192,8 +1255,8 @@ JsonVariant *json_variant_by_key(JsonVariant *v, const char *key) {
11921255
bool json_variant_equal(JsonVariant *a, JsonVariant *b) {
11931256
JsonVariantType t;
11941257

1195-
a = json_variant_normalize(a);
1196-
b = json_variant_normalize(b);
1258+
a = json_variant_formalize(a);
1259+
b = json_variant_formalize(b);
11971260

11981261
if (a == b)
11991262
return true;
@@ -1305,7 +1368,7 @@ void json_variant_sensitive(JsonVariant *v) {
13051368
* flag to all contained variants. And if those are then destroyed this is propagated further down,
13061369
* and so on. */
13071370

1308-
v = json_variant_normalize(v);
1371+
v = json_variant_formalize(v);
13091372
if (!json_variant_is_regular(v))
13101373
return;
13111374

@@ -2022,7 +2085,7 @@ static int json_variant_copy(JsonVariant **nv, JsonVariant *v) {
20222085
c->n_ref = 1;
20232086
c->type = t;
20242087
c->is_reference = true;
2025-
c->reference = json_variant_ref(json_variant_normalize(v));
2088+
c->reference = json_variant_ref(json_variant_formalize(v));
20262089

20272090
*nv = c;
20282091
return 0;
@@ -3824,6 +3887,144 @@ int json_dispatch_variant(const char *name, JsonVariant *variant, JsonDispatchFl
38243887
return 0;
38253888
}
38263889

3890+
static int json_cmp_strings(const void *x, const void *y) {
3891+
JsonVariant *const *a = x, *const *b = y;
3892+
3893+
if (!json_variant_is_string(*a) || !json_variant_is_string(*b))
3894+
return CMP(*a, *b);
3895+
3896+
return strcmp(json_variant_string(*a), json_variant_string(*b));
3897+
}
3898+
3899+
int json_variant_sort(JsonVariant **v) {
3900+
_cleanup_free_ JsonVariant **a = NULL;
3901+
JsonVariant *n = NULL;
3902+
size_t i, m;
3903+
int r;
3904+
3905+
assert(v);
3906+
3907+
if (json_variant_is_sorted(*v))
3908+
return 0;
3909+
3910+
if (!json_variant_is_object(*v))
3911+
return -EMEDIUMTYPE;
3912+
3913+
/* Sorts they key/value pairs in an object variant */
3914+
3915+
m = json_variant_elements(*v);
3916+
a = new(JsonVariant*, m);
3917+
if (!a)
3918+
return -ENOMEM;
3919+
3920+
for (i = 0; i < m; i++)
3921+
a[i] = json_variant_by_index(*v, i);
3922+
3923+
qsort(a, m/2, sizeof(JsonVariant*)*2, json_cmp_strings);
3924+
3925+
r = json_variant_new_object(&n, a, m);
3926+
if (r < 0)
3927+
return r;
3928+
if (!n->sorted) /* Check if this worked. This will fail if there are multiple identical keys used. */
3929+
return -ENOTUNIQ;
3930+
3931+
json_variant_unref(*v);
3932+
*v = n;
3933+
3934+
return 1;
3935+
}
3936+
3937+
int json_variant_normalize(JsonVariant **v) {
3938+
_cleanup_free_ JsonVariant **a = NULL;
3939+
JsonVariant *n = NULL;
3940+
size_t i, j, m;
3941+
int r;
3942+
3943+
assert(v);
3944+
3945+
if (json_variant_is_normalized(*v))
3946+
return 0;
3947+
3948+
if (!json_variant_is_object(*v) && !json_variant_is_array(*v))
3949+
return -EMEDIUMTYPE;
3950+
3951+
/* Sorts the key/value pairs in an object variant anywhere down the tree in the specified variant */
3952+
3953+
m = json_variant_elements(*v);
3954+
a = new(JsonVariant*, m);
3955+
if (!a)
3956+
return -ENOMEM;
3957+
3958+
for (i = 0; i < m; i++) {
3959+
a[i] = json_variant_ref(json_variant_by_index(*v, i));
3960+
3961+
r = json_variant_normalize(a + i);
3962+
if (r < 0)
3963+
goto finish;
3964+
}
3965+
3966+
qsort(a, m/2, sizeof(JsonVariant*)*2, json_cmp_strings);
3967+
3968+
if (json_variant_is_object(*v))
3969+
r = json_variant_new_object(&n, a, m);
3970+
else {
3971+
assert(json_variant_is_array(*v));
3972+
r = json_variant_new_array(&n, a, m);
3973+
}
3974+
if (r < 0)
3975+
goto finish;
3976+
if (!n->normalized) { /* Let's see if normalization worked. It will fail if there are multiple
3977+
* identical keys used in the same object anywhere, or if there are floating
3978+
* point numbers used (see below) */
3979+
r = -ENOTUNIQ;
3980+
goto finish;
3981+
}
3982+
3983+
json_variant_unref(*v);
3984+
*v = n;
3985+
3986+
r = 1;
3987+
3988+
finish:
3989+
for (j = 0; j < i; j++)
3990+
json_variant_unref(a[j]);
3991+
3992+
return r;
3993+
}
3994+
3995+
bool json_variant_is_normalized(JsonVariant *v) {
3996+
3997+
/* For now, let's consider anything containing numbers not expressible as integers as
3998+
* non-normalized. That's because we cannot sensibly compare them due to accuracy issues, nor even
3999+
* store them if they are too large. */
4000+
if (json_variant_is_real(v) && !json_variant_is_integer(v) && !json_variant_is_unsigned(v))
4001+
return false;
4002+
4003+
/* The concept only applies to variants that include other variants, i.e. objects and arrays. All
4004+
* others are normalized anyway. */
4005+
if (!json_variant_is_object(v) && !json_variant_is_array(v))
4006+
return true;
4007+
4008+
/* Empty objects/arrays don't include any other variant, hence are always normalized too */
4009+
if (json_variant_elements(v) == 0)
4010+
return true;
4011+
4012+
return v->normalized; /* For everything else there's an explicit boolean we maintain */
4013+
}
4014+
4015+
bool json_variant_is_sorted(JsonVariant *v) {
4016+
4017+
/* Returns true if all key/value pairs of an object are properly sorted. Note that this only applies
4018+
* to objects, not arrays. */
4019+
4020+
if (!json_variant_is_object(v))
4021+
return true;
4022+
if (json_variant_elements(v) <= 1)
4023+
return true;
4024+
4025+
return v->sorted;
4026+
}
4027+
38274028
static const char* const json_variant_type_table[_JSON_VARIANT_TYPE_MAX] = {
38284029
[JSON_VARIANT_STRING] = "string",
38294030
[JSON_VARIANT_INTEGER] = "integer",

src/shared/json.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ static inline bool json_variant_is_null(JsonVariant *v) {
123123
bool json_variant_is_negative(JsonVariant *v);
124124
bool json_variant_is_blank_object(JsonVariant *v);
125125
bool json_variant_is_blank_array(JsonVariant *v);
126+
bool json_variant_is_normalized(JsonVariant *v);
127+
bool json_variant_is_sorted(JsonVariant *v);
126128

127129
size_t json_variant_elements(JsonVariant *v);
128130
JsonVariant *json_variant_by_index(JsonVariant *v, size_t index);
@@ -180,6 +182,9 @@ int json_variant_merge(JsonVariant **v, JsonVariant *m);
180182

181183
int json_variant_strv(JsonVariant *v, char ***ret);
182184

185+
int json_variant_sort(JsonVariant **v);
186+
int json_variant_normalize(JsonVariant **v);
187+
183188
typedef enum JsonParseFlags {
184189
JSON_PARSE_SENSITIVE = 1 << 0, /* mark variant as "sensitive", i.e. something containing secret key material or such */
185190
} JsonParseFlags;

0 commit comments

Comments
 (0)