Skip to content

Commit 1d73b52

Browse files
barrbraingitster
authored andcommitted
Add string-specific memory pool
Intern strings so they can be compared by address and stored without wasting space. This library uses the macros in the obj_pool.h and trp.h to create a memory pool for strings and expose an API for handling them. [rr: added API docs] [jn: with some API simplifications, new documentation and tests] Signed-off-by: David Barr <david.barr@cordelta.com> Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent 951f316 commit 1d73b52

File tree

7 files changed

+210
-3
lines changed

7 files changed

+210
-3
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@
173173
/test-run-command
174174
/test-sha1
175175
/test-sigchain
176+
/test-string-pool
176177
/test-treap
177178
/common-cmds.h
178179
*.tar.gz

Makefile

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,7 @@ TEST_PROGRAMS_NEED_X += test-path-utils
415415
TEST_PROGRAMS_NEED_X += test-run-command
416416
TEST_PROGRAMS_NEED_X += test-sha1
417417
TEST_PROGRAMS_NEED_X += test-sigchain
418+
TEST_PROGRAMS_NEED_X += test-string-pool
418419
TEST_PROGRAMS_NEED_X += test-treap
419420
TEST_PROGRAMS_NEED_X += test-index-version
420421

@@ -1742,7 +1743,7 @@ ifndef NO_CURL
17421743
endif
17431744
XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \
17441745
xdiff/xmerge.o xdiff/xpatience.o
1745-
VCSSVN_OBJS =
1746+
VCSSVN_OBJS = vcs-svn/string_pool.o
17461747
OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS)
17471748

17481749
dep_files := $(foreach f,$(OBJECTS),$(dir $f).depend/$(notdir $f).d)
@@ -1867,7 +1868,7 @@ xdiff-interface.o $(XDIFF_OBJS): \
18671868
xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h
18681869

18691870
$(VCSSVN_OBJS): \
1870-
vcs-svn/obj_pool.h vcs-svn/trp.h
1871+
vcs-svn/obj_pool.h vcs-svn/trp.h vcs-svn/string_pool.h
18711872
endif
18721873

18731874
exec_cmd.s exec_cmd.o: EXTRA_CPPFLAGS = \
@@ -2018,10 +2019,12 @@ test-delta$X: diff-delta.o patch-delta.o
20182019

20192020
test-parse-options$X: parse-options.o
20202021

2022+
test-string-pool$X: vcs-svn/lib.a
2023+
20212024
.PRECIOUS: $(TEST_OBJS)
20222025

20232026
test-%$X: test-%.o $(GITLIBS)
2024-
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
2027+
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(filter %.a,$^) $(LIBS)
20252028

20262029
check-sha1:: test-sha1$X
20272030
./test-sha1.sh

t/t0080-vcs-svn.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,22 @@ test_expect_success 'obj pool: high-water mark' '
7676
test_cmp expected actual
7777
'
7878

79+
test_expect_success 'string pool' '
80+
echo a does not equal b >expected.differ &&
81+
echo a equals a >expected.match &&
82+
echo equals equals equals >expected.matchmore &&
83+
84+
test-string-pool "a,--b" >actual.differ &&
85+
test-string-pool "a,a" >actual.match &&
86+
test-string-pool "equals-equals" >actual.matchmore &&
87+
test_must_fail test-string-pool a,a,a &&
88+
test_must_fail test-string-pool a &&
89+
90+
test_cmp expected.differ actual.differ &&
91+
test_cmp expected.match actual.match &&
92+
test_cmp expected.matchmore actual.matchmore
93+
'
94+
7995
test_expect_success 'treap sort' '
8096
cat <<-\EOF >unsorted &&
8197
68

test-string-pool.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* test-string-pool.c: code to exercise the svn importer's string pool
3+
*/
4+
5+
#include "git-compat-util.h"
6+
#include "vcs-svn/string_pool.h"
7+
8+
int main(int argc, char *argv[])
9+
{
10+
const uint32_t unequal = pool_intern("does not equal");
11+
const uint32_t equal = pool_intern("equals");
12+
uint32_t buf[3];
13+
uint32_t n;
14+
15+
if (argc != 2)
16+
usage("test-string-pool <string>,<string>");
17+
18+
n = pool_tok_seq(3, buf, ",-", argv[1]);
19+
if (n >= 3)
20+
die("too many strings");
21+
if (n <= 1)
22+
die("too few strings");
23+
24+
buf[2] = buf[1];
25+
buf[1] = (buf[0] == buf[2]) ? equal : unequal;
26+
pool_print_seq(3, buf, ' ', stdout);
27+
fputc('\n', stdout);
28+
29+
pool_reset();
30+
return 0;
31+
}

vcs-svn/string_pool.c

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/*
2+
* Licensed under a two-clause BSD-style license.
3+
* See LICENSE for details.
4+
*/
5+
6+
#include "git-compat-util.h"
7+
#include "trp.h"
8+
#include "obj_pool.h"
9+
#include "string_pool.h"
10+
11+
static struct trp_root tree = { ~0 };
12+
13+
struct node {
14+
uint32_t offset;
15+
struct trp_node children;
16+
};
17+
18+
/* Two memory pools: one for struct node, and another for strings */
19+
obj_pool_gen(node, struct node, 4096)
20+
obj_pool_gen(string, char, 4096)
21+
22+
static char *node_value(struct node *node)
23+
{
24+
return node ? string_pointer(node->offset) : NULL;
25+
}
26+
27+
static int node_cmp(struct node *a, struct node *b)
28+
{
29+
return strcmp(node_value(a), node_value(b));
30+
}
31+
32+
/* Build a Treap from the node structure (a trp_node w/ offset) */
33+
trp_gen(static, tree_, struct node, children, node, node_cmp);
34+
35+
const char *pool_fetch(uint32_t entry)
36+
{
37+
return node_value(node_pointer(entry));
38+
}
39+
40+
uint32_t pool_intern(const char *key)
41+
{
42+
/* Canonicalize key */
43+
struct node *match = NULL, *node;
44+
uint32_t key_len;
45+
if (key == NULL)
46+
return ~0;
47+
key_len = strlen(key) + 1;
48+
node = node_pointer(node_alloc(1));
49+
node->offset = string_alloc(key_len);
50+
strcpy(node_value(node), key);
51+
match = tree_search(&tree, node);
52+
if (!match) {
53+
tree_insert(&tree, node);
54+
} else {
55+
node_free(1);
56+
string_free(key_len);
57+
node = match;
58+
}
59+
return node_offset(node);
60+
}
61+
62+
uint32_t pool_tok_r(char *str, const char *delim, char **saveptr)
63+
{
64+
char *token = strtok_r(str, delim, saveptr);
65+
return token ? pool_intern(token) : ~0;
66+
}
67+
68+
void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream)
69+
{
70+
uint32_t i;
71+
for (i = 0; i < len && ~seq[i]; i++) {
72+
fputs(pool_fetch(seq[i]), stream);
73+
if (i < len - 1 && ~seq[i + 1])
74+
fputc(delim, stream);
75+
}
76+
}
77+
78+
uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str)
79+
{
80+
char *context = NULL;
81+
uint32_t token = ~0;
82+
uint32_t length;
83+
84+
if (sz == 0)
85+
return ~0;
86+
if (str)
87+
token = pool_tok_r(str, delim, &context);
88+
for (length = 0; length < sz; length++) {
89+
seq[length] = token;
90+
if (token == ~0)
91+
return length;
92+
token = pool_tok_r(NULL, delim, &context);
93+
}
94+
seq[sz - 1] = ~0;
95+
return sz;
96+
}
97+
98+
void pool_reset(void)
99+
{
100+
node_reset();
101+
string_reset();
102+
}

vcs-svn/string_pool.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#ifndef STRING_POOL_H_
2+
#define STRING_POOL_H_
3+
4+
uint32_t pool_intern(const char *key);
5+
const char *pool_fetch(uint32_t entry);
6+
uint32_t pool_tok_r(char *str, const char *delim, char **saveptr);
7+
void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream);
8+
uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str);
9+
void pool_reset(void);
10+
11+
#endif

vcs-svn/string_pool.txt

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
string_pool API
2+
===============
3+
4+
The string_pool API provides facilities for replacing strings
5+
with integer keys that can be more easily compared and stored.
6+
The facilities are designed so that one could teach Git without
7+
too much trouble to store the information needed for these keys to
8+
remain valid over multiple executions.
9+
10+
Functions
11+
---------
12+
13+
pool_intern::
14+
Include a string in the string pool and get its key.
15+
If that string is already in the pool, retrieves its
16+
existing key.
17+
18+
pool_fetch::
19+
Retrieve the string associated to a given key.
20+
21+
pool_tok_r::
22+
Extract the key of the next token from a string.
23+
Interface mimics strtok_r.
24+
25+
pool_print_seq::
26+
Print a sequence of strings named by key to a file, using the
27+
specified delimiter to separate them.
28+
29+
If NULL (key ~0) appears in the sequence, the sequence ends
30+
early.
31+
32+
pool_tok_seq::
33+
Split a string into tokens, storing the keys of segments
34+
into a caller-provided array.
35+
36+
Unless sz is 0, the array will always be ~0-terminated.
37+
If there is not enough room for all the tokens, the
38+
array holds as many tokens as fit in the entries before
39+
the terminating ~0. Return value is the index after the
40+
last token, or sz if the tokens did not fit.
41+
42+
pool_reset::
43+
Deallocate storage for the string pool.

0 commit comments

Comments
 (0)