blob: 42ca2347edd2e13c5e3132a077ac96777bbefa11 [file] [log] [blame]
Junio C Hamano5b3533d2014-02-27 23:07:151hashmap API
2===========
3
4The hashmap API is a generic implementation of hash-based key-value mappings.
5
6Data Structures
7---------------
8
9`struct hashmap`::
10
11The hash table structure.
12+
13The `size` member keeps track of the total number of entries. The `cmpfn`
14member is a function used to compare two entries for equality. The `table` and
15`tablesize` members store the hash table and its size, respectively.
16
17`struct hashmap_entry`::
18
19An opaque structure representing an entry in the hash table, which must
20be used as first member of user data structures. Ideally it should be
21followed by an int-sized member to prevent unused memory on 64-bit
22systems due to alignment.
23+
24The `hash` member is the entry's hash code and the `next` member points to the
25next entry in case of collisions (i.e. if multiple entries map to the same
26bucket).
27
28`struct hashmap_iter`::
29
30An iterator structure, to be used with hashmap_iter_* functions.
31
32Types
33-----
34
35`int (*hashmap_cmp_fn)(const void *entry, const void *entry_or_key, const void *keydata)`::
36
37User-supplied function to test two hashmap entries for equality. Shall
38return 0 if the entries are equal.
39+
40This function is always called with non-NULL `entry` / `entry_or_key`
41parameters that have the same hash code. When looking up an entry, the `key`
42and `keydata` parameters to hashmap_get and hashmap_remove are always passed
43as second and third argument, respectively. Otherwise, `keydata` is NULL.
44
45Functions
46---------
47
48`unsigned int strhash(const char *buf)`::
49`unsigned int strihash(const char *buf)`::
50`unsigned int memhash(const void *buf, size_t len)`::
51`unsigned int memihash(const void *buf, size_t len)`::
52
53Ready-to-use hash functions for strings, using the FNV-1 algorithm (see
54http://www.isthe.com/chongo/tech/comp/fnv).
55+
56`strhash` and `strihash` take 0-terminated strings, while `memhash` and
57`memihash` operate on arbitrary-length memory.
58+
59`strihash` and `memihash` are case insensitive versions.
60
61`void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function, size_t initial_size)`::
62
63Initializes a hashmap structure.
64+
65`map` is the hashmap to initialize.
66+
67The `equals_function` can be specified to compare two entries for equality.
68If NULL, entries are considered equal if their hash codes are equal.
69+
70If the total number of entries is known in advance, the `initial_size`
71parameter may be used to preallocate a sufficiently large table and thus
72prevent expensive resizing. If 0, the table is dynamically resized.
73
74`void hashmap_free(struct hashmap *map, int free_entries)`::
75
76Frees a hashmap structure and allocated memory.
77+
78`map` is the hashmap to free.
79+
80If `free_entries` is true, each hashmap_entry in the map is freed as well
81(using stdlib's free()).
82
83`void hashmap_entry_init(void *entry, unsigned int hash)`::
84
85Initializes a hashmap_entry structure.
86+
87`entry` points to the entry to initialize.
88+
89`hash` is the hash code of the entry.
90
91`void *hashmap_get(const struct hashmap *map, const void *key, const void *keydata)`::
92
93Returns the hashmap entry for the specified key, or NULL if not found.
94+
95`map` is the hashmap structure.
96+
97`key` is a hashmap_entry structure (or user data structure that starts with
98hashmap_entry) that has at least been initialized with the proper hash code
99(via `hashmap_entry_init`).
100+
101If an entry with matching hash code is found, `key` and `keydata` are passed
102to `hashmap_cmp_fn` to decide whether the entry matches the key.
103
104`void *hashmap_get_next(const struct hashmap *map, const void *entry)`::
105
106Returns the next equal hashmap entry, or NULL if not found. This can be
107used to iterate over duplicate entries (see `hashmap_add`).
108+
109`map` is the hashmap structure.
110+
111`entry` is the hashmap_entry to start the search from, obtained via a previous
112call to `hashmap_get` or `hashmap_get_next`.
113
114`void hashmap_add(struct hashmap *map, void *entry)`::
115
116Adds a hashmap entry. This allows to add duplicate entries (i.e.
117separate values with the same key according to hashmap_cmp_fn).
118+
119`map` is the hashmap structure.
120+
121`entry` is the entry to add.
122
123`void *hashmap_put(struct hashmap *map, void *entry)`::
124
125Adds or replaces a hashmap entry. If the hashmap contains duplicate
126entries equal to the specified entry, only one of them will be replaced.
127+
128`map` is the hashmap structure.
129+
130`entry` is the entry to add or replace.
131+
132Returns the replaced entry, or NULL if not found (i.e. the entry was added).
133
134`void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata)`::
135
136Removes a hashmap entry matching the specified key. If the hashmap
137contains duplicate entries equal to the specified key, only one of
138them will be removed.
139+
140`map` is the hashmap structure.
141+
142`key` is a hashmap_entry structure (or user data structure that starts with
143hashmap_entry) that has at least been initialized with the proper hash code
144(via `hashmap_entry_init`).
145+
146If an entry with matching hash code is found, `key` and `keydata` are
147passed to `hashmap_cmp_fn` to decide whether the entry matches the key.
148+
149Returns the removed entry, or NULL if not found.
150
151`void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter)`::
152`void *hashmap_iter_next(struct hashmap_iter *iter)`::
153`void *hashmap_iter_first(struct hashmap *map, struct hashmap_iter *iter)`::
154
155Used to iterate over all entries of a hashmap.
156+
157`hashmap_iter_init` initializes a `hashmap_iter` structure.
158+
159`hashmap_iter_next` returns the next hashmap_entry, or NULL if there are no
160more entries.
161+
162`hashmap_iter_first` is a combination of both (i.e. initializes the iterator
163and returns the first entry, if any).
164
165Usage example
166-------------
167
168Here's a simple usage example that maps long keys to double values.
169[source,c]
170------------
171struct hashmap map;
172
173struct long2double {
174struct hashmap_entry ent; /* must be the first member! */
175long key;
176double value;
177};
178
179static int long2double_cmp(const struct long2double *e1, const struct long2double *e2, const void *unused)
180{
181return !(e1->key == e2->key);
182}
183
184void long2double_init(void)
185{
186hashmap_init(&map, (hashmap_cmp_fn) long2double_cmp, 0);
187}
188
189void long2double_free(void)
190{
191hashmap_free(&map, 1);
192}
193
194static struct long2double *find_entry(long key)
195{
196struct long2double k;
197hashmap_entry_init(&k, memhash(&key, sizeof(long)));
198k.key = key;
199return hashmap_get(&map, &k, NULL);
200}
201
202double get_value(long key)
203{
204struct long2double *e = find_entry(key);
205return e ? e->value : 0;
206}
207
208void set_value(long key, double value)
209{
210struct long2double *e = find_entry(key);
211if (!e) {
212e = malloc(sizeof(struct long2double));
213hashmap_entry_init(e, memhash(&key, sizeof(long)));
214e->key = key;
215hashmap_add(&map, e);
216}
217e->value = value;
218}
219------------
220
221Using variable-sized keys
222-------------------------
223
224The `hashmap_entry_get` and `hashmap_entry_remove` functions expect an ordinary
225`hashmap_entry` structure as key to find the correct entry. If the key data is
226variable-sized (e.g. a FLEX_ARRAY string) or quite large, it is undesirable
227to create a full-fledged entry structure on the heap and copy all the key data
228into the structure.
229
230In this case, the `keydata` parameter can be used to pass
231variable-sized key data directly to the comparison function, and the `key`
232parameter can be a stripped-down, fixed size entry structure allocated on the
233stack.
234
235See test-hashmap.c for an example using arbitrary-length strings as keys.