Skip to content

Commit c30a49b

Browse files
committed
string-util: tweak ellipsation a bit
This primarily changes to things: 1. Ellipsation to 0, 1 or 2 characters is now supported. Previously we'd hit an assert if the new lengths was < 3, this is now permitted. The result strings won't show too much info still of course, but the code becomes a bit more generic and robust to use. 2. If a UTF-8 mode is disabled and the input string is pure ASCII, then "..." is used for ellipsation, otherwise (as before) "…". This means on a pure-ASCII system we should remain pure-ASCII, matching behaviour otherwise exposed with special_glyph() and friends. Note that we'll use "…" for ellipsiation as soon as either the locale settings indicate an UTF-8 mode or the input string already contains non-ASCII unicode characters. Testing for these special cases is improved.
1 parent adea407 commit c30a49b

File tree

2 files changed

+92
-25
lines changed

2 files changed

+92
-25
lines changed

src/basic/string-util.c

Lines changed: 68 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#include "alloc-util.h"
1717
#include "gunicode.h"
18+
#include "locale-util.h"
1819
#include "macro.h"
1920
#include "string-util.h"
2021
#include "terminal-util.h"
@@ -452,62 +453,104 @@ bool string_has_cc(const char *p, const char *ok) {
452453
}
453454

454455
static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
455-
size_t x;
456+
size_t x, need_space;
456457
char *r;
457458

458459
assert(s);
459460
assert(percent <= 100);
460-
assert(new_length >= 3);
461+
assert(new_length != (size_t) -1);
461462

462-
if (old_length <= 3 || old_length <= new_length)
463+
if (old_length <= new_length)
463464
return strndup(s, old_length);
464465

465-
r = new0(char, new_length+3);
466+
/* Special case short ellipsations */
467+
switch (new_length) {
468+
469+
case 0:
470+
return strdup("");
471+
472+
case 1:
473+
if (is_locale_utf8())
474+
return strdup("…");
475+
else
476+
return strdup(".");
477+
478+
case 2:
479+
if (!is_locale_utf8())
480+
return strdup("..");
481+
482+
break;
483+
484+
default:
485+
break;
486+
}
487+
488+
/* Calculate how much space the ellipsis will take up. If we are in UTF-8 mode we only need space for one
489+
* character ("…"), otherwise for three characters ("..."). Note that in both cases we need 3 bytes of storage,
490+
* either for the UTF-8 encoded character or for three ASCII characters. */
491+
need_space = is_locale_utf8() ? 1 : 3;
492+
493+
r = new(char, new_length+3);
466494
if (!r)
467495
return NULL;
468496

469-
x = (new_length * percent) / 100;
497+
assert(new_length >= need_space);
470498

471-
if (x > new_length - 3)
472-
x = new_length - 3;
499+
x = ((new_length - need_space) * percent + 50) / 100;
500+
assert(x <= new_length - need_space);
473501

474502
memcpy(r, s, x);
475-
r[x] = 0xe2; /* tri-dot ellipsis: … */
476-
r[x+1] = 0x80;
477-
r[x+2] = 0xa6;
503+
504+
if (is_locale_utf8()) {
505+
r[x+0] = 0xe2; /* tri-dot ellipsis: … */
506+
r[x+1] = 0x80;
507+
r[x+2] = 0xa6;
508+
} else {
509+
r[x+0] = '.';
510+
r[x+1] = '.';
511+
r[x+2] = '.';
512+
}
513+
478514
memcpy(r + x + 3,
479-
s + old_length - (new_length - x - 1),
480-
new_length - x - 1);
515+
s + old_length - (new_length - x - need_space),
516+
new_length - x - need_space + 1);
481517

482518
return r;
483519
}
484520

485521
char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
486-
size_t x;
487-
char *e;
522+
size_t x, k, len, len2;
488523
const char *i, *j;
489-
unsigned k, len, len2;
524+
char *e;
490525
int r;
491526

527+
/* Note that 'old_length' refers to bytes in the string, while 'new_length' refers to character cells taken up
528+
* on screen. This distinction doesn't matter for ASCII strings, but it does matter for non-ASCII UTF-8
529+
* strings.
530+
*
531+
* Ellipsation is done in a locale-dependent way:
532+
* 1. If the string passed in is fully ASCII and the current locale is not UTF-8, three dots are used ("...")
533+
* 2. Otherwise, a unicode ellipsis is used ("…")
534+
*
535+
* In other words: you'll get a unicode ellipsis as soon as either the string contains non-ASCII characters or
536+
* the current locale is UTF-8.
537+
*/
538+
492539
assert(s);
493540
assert(percent <= 100);
494541

495542
if (new_length == (size_t) -1)
496543
return strndup(s, old_length);
497544

498-
assert(new_length >= 3);
545+
if (new_length == 0)
546+
return strdup("");
499547

500-
/* if no multibyte characters use ascii_ellipsize_mem for speed */
548+
/* If no multibyte characters use ascii_ellipsize_mem for speed */
501549
if (ascii_is_valid(s))
502550
return ascii_ellipsize_mem(s, old_length, new_length, percent);
503551

504-
if (old_length <= 3 || old_length <= new_length)
505-
return strndup(s, old_length);
506-
507-
x = (new_length * percent) / 100;
508-
509-
if (x > new_length - 3)
510-
x = new_length - 3;
552+
x = ((new_length - 1) * percent) / 100;
553+
assert(x <= new_length - 1);
511554

512555
k = 0;
513556
for (i = s; k < x && i < s + old_length; i = utf8_next_char(i)) {
@@ -552,7 +595,7 @@ char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigne
552595
*/
553596

554597
memcpy(e, s, len);
555-
e[len] = 0xe2; /* tri-dot ellipsis: … */
598+
e[len + 0] = 0xe2; /* tri-dot ellipsis: … */
556599
e[len + 1] = 0x80;
557600
e[len + 2] = 0xa6;
558601

src/test/test-ellipsize.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,30 @@ static void test_one(const char *p) {
1717
_cleanup_free_ char *t;
1818
t = ellipsize(p, columns(), 70);
1919
puts(t);
20+
free(t);
21+
t = ellipsize(p, columns(), 0);
22+
puts(t);
23+
free(t);
24+
t = ellipsize(p, columns(), 100);
25+
puts(t);
26+
free(t);
27+
t = ellipsize(p, 0, 50);
28+
puts(t);
29+
free(t);
30+
t = ellipsize(p, 1, 50);
31+
puts(t);
32+
free(t);
33+
t = ellipsize(p, 2, 50);
34+
puts(t);
35+
free(t);
36+
t = ellipsize(p, 3, 50);
37+
puts(t);
38+
free(t);
39+
t = ellipsize(p, 4, 50);
40+
puts(t);
41+
free(t);
42+
t = ellipsize(p, 5, 50);
43+
puts(t);
2044
}
2145

2246
int main(int argc, char *argv[]) {

0 commit comments

Comments
 (0)