Skip to content

Commit 039dc71

Browse files
kbleesgitster
authored andcommitted
hashmap: factor out getting a hash code from a SHA1
Copying the first bytes of a SHA1 is duplicated in six places, however, the implications (the actual value would depend on the endianness of the platform) is documented only once. Add a properly documented API for this. Signed-off-by: Karsten Blees <blees@dcon.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent 6f92e5f commit 039dc71

File tree

8 files changed

+29
-40
lines changed

8 files changed

+29
-40
lines changed

Documentation/technical/api-hashmap.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,15 @@ Functions
5858
+
5959
`strihash` and `memihash` are case insensitive versions.
6060

61+
`unsigned int sha1hash(const unsigned char *sha1)`::
62+
63+
Converts a cryptographic hash (e.g. SHA-1) into an int-sized hash code
64+
for use in hash tables. Cryptographic hashes are supposed to have
65+
uniform distribution, so in contrast to `memhash()`, this just copies
66+
the first `sizeof(int)` bytes without shuffling any bits. Note that
67+
the results will be different on big-endian and little-endian
68+
platforms, so they should not be stored or transferred over the net.
69+
6170
`void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function, size_t initial_size)`::
6271

6372
Initializes a hashmap structure.

builtin/describe.c

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,10 @@ static int commit_name_cmp(const struct commit_name *cn1,
5656
return hashcmp(cn1->peeled, peeled ? peeled : cn2->peeled);
5757
}
5858

59-
static inline unsigned int hash_sha1(const unsigned char *sha1)
60-
{
61-
unsigned int hash;
62-
memcpy(&hash, sha1, sizeof(hash));
63-
return hash;
64-
}
65-
6659
static inline struct commit_name *find_commit_name(const unsigned char *peeled)
6760
{
6861
struct commit_name key;
69-
hashmap_entry_init(&key, hash_sha1(peeled));
62+
hashmap_entry_init(&key, sha1hash(peeled));
7063
return hashmap_get(&names, &key, peeled);
7164
}
7265

@@ -114,7 +107,7 @@ static void add_to_known_names(const char *path,
114107
if (!e) {
115108
e = xmalloc(sizeof(struct commit_name));
116109
hashcpy(e->peeled, peeled);
117-
hashmap_entry_init(e, hash_sha1(peeled));
110+
hashmap_entry_init(e, sha1hash(peeled));
118111
hashmap_add(&names, e);
119112
e->path = NULL;
120113
}

decorate.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,7 @@
88

99
static unsigned int hash_obj(const struct object *obj, unsigned int n)
1010
{
11-
unsigned int hash;
12-
13-
memcpy(&hash, obj->sha1, sizeof(unsigned int));
14-
return hash % n;
11+
return sha1hash(obj->sha1) % n;
1512
}
1613

1714
static void *insert_decoration(struct decoration *n, const struct object *base, void *decoration)

diffcore-rename.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -242,14 +242,12 @@ struct file_similarity {
242242

243243
static unsigned int hash_filespec(struct diff_filespec *filespec)
244244
{
245-
unsigned int hash;
246245
if (!filespec->sha1_valid) {
247246
if (diff_populate_filespec(filespec, 0))
248247
return 0;
249248
hash_sha1_file(filespec->data, filespec->size, "blob", filespec->sha1);
250249
}
251-
memcpy(&hash, filespec->sha1, sizeof(hash));
252-
return hash;
250+
return sha1hash(filespec->sha1);
253251
}
254252

255253
static int find_identical_files(struct hashmap *srcs,

hashmap.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,17 @@ extern unsigned int strihash(const char *buf);
1313
extern unsigned int memhash(const void *buf, size_t len);
1414
extern unsigned int memihash(const void *buf, size_t len);
1515

16+
static inline unsigned int sha1hash(const unsigned char *sha1)
17+
{
18+
/*
19+
* Equivalent to 'return *(unsigned int *)sha1;', but safe on
20+
* platforms that don't support unaligned reads.
21+
*/
22+
unsigned int hash;
23+
memcpy(&hash, sha1, sizeof(hash));
24+
return hash;
25+
}
26+
1627
/* data structures */
1728

1829
struct hashmap_entry {

khash.h

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -320,19 +320,12 @@ static const double __ac_HASH_UPPER = 0.77;
320320
code; \
321321
} }
322322

323-
static inline khint_t __kh_oid_hash(const unsigned char *oid)
324-
{
325-
khint_t hash;
326-
memcpy(&hash, oid, sizeof(hash));
327-
return hash;
328-
}
329-
330323
#define __kh_oid_cmp(a, b) (hashcmp(a, b) == 0)
331324

332-
KHASH_INIT(sha1, const unsigned char *, void *, 1, __kh_oid_hash, __kh_oid_cmp)
325+
KHASH_INIT(sha1, const unsigned char *, void *, 1, sha1hash, __kh_oid_cmp)
333326
typedef kh_sha1_t khash_sha1;
334327

335-
KHASH_INIT(sha1_pos, const unsigned char *, int, 1, __kh_oid_hash, __kh_oid_cmp)
328+
KHASH_INIT(sha1_pos, const unsigned char *, int, 1, sha1hash, __kh_oid_cmp)
336329
typedef kh_sha1_pos_t khash_sha1_pos;
337330

338331
#endif /* __AC_KHASH_H */

object.c

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -50,18 +50,7 @@ int type_from_string(const char *str)
5050
*/
5151
static unsigned int hash_obj(const unsigned char *sha1, unsigned int n)
5252
{
53-
unsigned int hash;
54-
55-
/*
56-
* Since the sha1 is essentially random, we just take the
57-
* required number of bits directly from the first
58-
* sizeof(unsigned int) bytes of sha1. First we have to copy
59-
* the bytes into a properly aligned integer. If we cared
60-
* about getting consistent results across architectures, we
61-
* would have to call ntohl() here, too.
62-
*/
63-
memcpy(&hash, sha1, sizeof(unsigned int));
64-
return hash & (n - 1);
53+
return sha1hash(sha1) & (n - 1);
6554
}
6655

6756
/*

pack-objects.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,9 @@ static uint32_t locate_object_entry_hash(struct packing_data *pdata,
77
const unsigned char *sha1,
88
int *found)
99
{
10-
uint32_t i, hash, mask = (pdata->index_size - 1);
10+
uint32_t i, mask = (pdata->index_size - 1);
1111

12-
memcpy(&hash, sha1, sizeof(uint32_t));
13-
i = hash & mask;
12+
i = sha1hash(sha1) & mask;
1413

1514
while (pdata->index[i] > 0) {
1615
uint32_t pos = pdata->index[i] - 1;

0 commit comments

Comments
 (0)