| 1 | // Copyright (c) 2005, Google Inc. |
|---|
| 2 | // All rights reserved. |
|---|
| 3 | // |
|---|
| 4 | // Redistribution and use in source and binary forms, with or without |
|---|
| 5 | // modification, are permitted provided that the following conditions are |
|---|
| 6 | // met: |
|---|
| 7 | // |
|---|
| 8 | // * Redistributions of source code must retain the above copyright |
|---|
| 9 | // notice, this list of conditions and the following disclaimer. |
|---|
| 10 | // * Redistributions in binary form must reproduce the above |
|---|
| 11 | // copyright notice, this list of conditions and the following disclaimer |
|---|
| 12 | // in the documentation and/or other materials provided with the |
|---|
| 13 | // distribution. |
|---|
| 14 | // * Neither the name of Google Inc. nor the names of its |
|---|
| 15 | // contributors may be used to endorse or promote products derived from |
|---|
| 16 | // this software without specific prior written permission. |
|---|
| 17 | // |
|---|
| 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|---|
| 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|---|
| 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|---|
| 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|---|
| 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|---|
| 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|---|
| 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|---|
| 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|---|
| 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|---|
| 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|---|
| 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|---|
| 29 | |
|---|
| 30 | // --- |
|---|
| 31 | // Author: Craig Silverstein |
|---|
| 32 | // |
|---|
| 33 | // This is just a very thin wrapper over sparsehashtable.h, just |
|---|
| 34 | // like sgi stl's stl_hash_map is a very thin wrapper over |
|---|
| 35 | // stl_hashtable. The major thing we define is operator[], because |
|---|
| 36 | // we have a concept of a data_type which stl_hashtable doesn't |
|---|
| 37 | // (it only has a key and a value). |
|---|
| 38 | // |
|---|
| 39 | // We adhere mostly to the STL semantics for hash-map. One important |
|---|
| 40 | // exception is that insert() may invalidate iterators entirely -- STL |
|---|
| 41 | // semantics are that insert() may reorder iterators, but they all |
|---|
| 42 | // still refer to something valid in the hashtable. Not so for us. |
|---|
| 43 | // Likewise, insert() may invalidate pointers into the hashtable. |
|---|
| 44 | // (Whether insert invalidates iterators and pointers depends on |
|---|
| 45 | // whether it results in a hashtable resize). On the plus side, |
|---|
| 46 | // delete() doesn't invalidate iterators or pointers at all, or even |
|---|
| 47 | // change the ordering of elements. |
|---|
| 48 | // |
|---|
| 49 | // Here are a few "power user" tips: |
|---|
| 50 | // |
|---|
| 51 | // 1) set_deleted_key(): |
|---|
| 52 | // Unlike STL's hash_map, if you want to use erase() you |
|---|
| 53 | // *must* call set_deleted_key() after construction. |
|---|
| 54 | // |
|---|
| 55 | // 2) resize(0): |
|---|
| 56 | // When an item is deleted, its memory isn't freed right |
|---|
| 57 | // away. This is what allows you to iterate over a hashtable |
|---|
| 58 | // and call erase() without invalidating the iterator. |
|---|
| 59 | // To force the memory to be freed, call resize(0). |
|---|
| 60 | // For tr1 compatibility, this can also be called as rehash(0). |
|---|
| 61 | // |
|---|
| 62 | // 3) min_load_factor(0.0) |
|---|
| 63 | // Setting the minimum load factor to 0.0 guarantees that |
|---|
| 64 | // the hash table will never shrink. |
|---|
| 65 | // |
|---|
| 66 | // Roughly speaking: |
|---|
| 67 | // (1) dense_hash_map: fastest, uses the most memory unless entries are small |
|---|
| 68 | // (2) sparse_hash_map: slowest, uses the least memory |
|---|
| 69 | // (3) hash_map / unordered_map (STL): in the middle |
|---|
| 70 | // |
|---|
| 71 | // Typically I use sparse_hash_map when I care about space and/or when |
|---|
| 72 | // I need to save the hashtable on disk. I use hash_map otherwise. I |
|---|
| 73 | // don't personally use dense_hash_map ever; some people use it for |
|---|
| 74 | // small maps with lots of lookups. |
|---|
| 75 | // |
|---|
| 76 | // - dense_hash_map has, typically, about 78% memory overhead (if your |
|---|
| 77 | // data takes up X bytes, the hash_map uses .78X more bytes in overhead). |
|---|
| 78 | // - sparse_hash_map has about 4 bits overhead per entry. |
|---|
| 79 | // - sparse_hash_map can be 3-7 times slower than the others for lookup and, |
|---|
| 80 | // especially, inserts. See time_hash_map.cc for details. |
|---|
| 81 | // |
|---|
| 82 | // See /usr/(local/)?doc/sparsehash-*/sparse_hash_map.html |
|---|
| 83 | // for information about how to use this class. |
|---|
| 84 | |
|---|
| 85 | #ifndef _SPARSE_HASH_MAP_H_ |
|---|
| 86 | #define _SPARSE_HASH_MAP_H_ |
|---|
| 87 | |
|---|
| 88 | #include <google/sparsehash/sparseconfig.h> |
|---|
| 89 | #include <stdio.h> // for FILE * in read()/write() |
|---|
| 90 | #include <algorithm> // for the default template args |
|---|
| 91 | #include <functional> // for equal_to |
|---|
| 92 | #include <memory> // for alloc<> |
|---|
| 93 | #include <utility> // for pair<> |
|---|
| 94 | #include HASH_FUN_H // defined in config.h |
|---|
| 95 | #include <google/sparsehash/sparsehashtable.h> |
|---|
| 96 | |
|---|
| 97 | |
|---|
| 98 | _START_GOOGLE_NAMESPACE_ |
|---|
| 99 | |
|---|
| 100 | using STL_NAMESPACE::pair; |
|---|
| 101 | |
|---|
| 102 | template <class Key, class T, |
|---|
| 103 | class HashFcn = SPARSEHASH_HASH<Key>, // defined in sparseconfig.h |
|---|
| 104 | class EqualKey = STL_NAMESPACE::equal_to<Key>, |
|---|
| 105 | class Alloc = STL_NAMESPACE::allocator<T> > |
|---|
| 106 | class sparse_hash_map { |
|---|
| 107 | private: |
|---|
| 108 | // Apparently select1st is not stl-standard, so we define our own |
|---|
| 109 | struct SelectKey { |
|---|
| 110 | const Key& operator()(const pair<const Key, T>& p) const { |
|---|
| 111 | return p.first; |
|---|
| 112 | } |
|---|
| 113 | }; |
|---|
| 114 | struct SetKey { |
|---|
| 115 | void operator()(pair<const Key, T>* value, const Key& new_key) const { |
|---|
| 116 | *const_cast<Key*>(&value->first) = new_key; |
|---|
| 117 | // It would be nice to clear the rest of value here as well, in |
|---|
| 118 | // case it's taking up a lot of memory. We do this by clearing |
|---|
| 119 | // the value. This assumes T has a zero-arg constructor! |
|---|
| 120 | value->second = T(); |
|---|
| 121 | } |
|---|
| 122 | }; |
|---|
| 123 | |
|---|
| 124 | // The actual data |
|---|
| 125 | typedef sparse_hashtable<pair<const Key, T>, Key, HashFcn, |
|---|
| 126 | SelectKey, SetKey, EqualKey, Alloc> ht; |
|---|
| 127 | ht rep; |
|---|
| 128 | |
|---|
| 129 | public: |
|---|
| 130 | typedef typename ht::key_type key_type; |
|---|
| 131 | typedef T data_type; |
|---|
| 132 | typedef T mapped_type; |
|---|
| 133 | typedef typename ht::value_type value_type; |
|---|
| 134 | typedef typename ht::hasher hasher; |
|---|
| 135 | typedef typename ht::key_equal key_equal; |
|---|
| 136 | typedef Alloc allocator_type; |
|---|
| 137 | |
|---|
| 138 | typedef typename ht::size_type size_type; |
|---|
| 139 | typedef typename ht::difference_type difference_type; |
|---|
| 140 | typedef typename ht::pointer pointer; |
|---|
| 141 | typedef typename ht::const_pointer const_pointer; |
|---|
| 142 | typedef typename ht::reference reference; |
|---|
| 143 | typedef typename ht::const_reference const_reference; |
|---|
| 144 | |
|---|
| 145 | typedef typename ht::iterator iterator; |
|---|
| 146 | typedef typename ht::const_iterator const_iterator; |
|---|
| 147 | typedef typename ht::local_iterator local_iterator; |
|---|
| 148 | typedef typename ht::const_local_iterator const_local_iterator; |
|---|
| 149 | |
|---|
| 150 | // Iterator functions |
|---|
| 151 | iterator begin() { return rep.begin(); } |
|---|
| 152 | iterator end() { return rep.end(); } |
|---|
| 153 | const_iterator begin() const { return rep.begin(); } |
|---|
| 154 | const_iterator end() const { return rep.end(); } |
|---|
| 155 | |
|---|
| 156 | // These come from tr1's unordered_map. For us, a bucket has 0 or 1 elements. |
|---|
| 157 | local_iterator begin(size_type i) { return rep.begin(i); } |
|---|
| 158 | local_iterator end(size_type i) { return rep.end(i); } |
|---|
| 159 | const_local_iterator begin(size_type i) const { return rep.begin(i); } |
|---|
| 160 | const_local_iterator end(size_type i) const { return rep.end(i); } |
|---|
| 161 | |
|---|
| 162 | // Accessor functions |
|---|
| 163 | // TODO(csilvers): implement Alloc get_allocator() const; |
|---|
| 164 | hasher hash_funct() const { return rep.hash_funct(); } |
|---|
| 165 | hasher hash_function() const { return hash_funct(); } |
|---|
| 166 | key_equal key_eq() const { return rep.key_eq(); } |
|---|
| 167 | |
|---|
| 168 | |
|---|
| 169 | // Constructors |
|---|
| 170 | explicit sparse_hash_map(size_type expected_max_items_in_table = 0, |
|---|
| 171 | const hasher& hf = hasher(), |
|---|
| 172 | const key_equal& eql = key_equal()) |
|---|
| 173 | : rep(expected_max_items_in_table, hf, eql) { } |
|---|
| 174 | |
|---|
| 175 | template <class InputIterator> |
|---|
| 176 | sparse_hash_map(InputIterator f, InputIterator l, |
|---|
| 177 | size_type expected_max_items_in_table = 0, |
|---|
| 178 | const hasher& hf = hasher(), |
|---|
| 179 | const key_equal& eql = key_equal()) |
|---|
| 180 | : rep(expected_max_items_in_table, hf, eql) { |
|---|
| 181 | rep.insert(f, l); |
|---|
| 182 | } |
|---|
| 183 | // We use the default copy constructor |
|---|
| 184 | // We use the default operator=() |
|---|
| 185 | // We use the default destructor |
|---|
| 186 | |
|---|
| 187 | void clear() { rep.clear(); } |
|---|
| 188 | void swap(sparse_hash_map& hs) { rep.swap(hs.rep); } |
|---|
| 189 | |
|---|
| 190 | |
|---|
| 191 | // Functions concerning size |
|---|
| 192 | size_type size() const { return rep.size(); } |
|---|
| 193 | size_type max_size() const { return rep.max_size(); } |
|---|
| 194 | bool empty() const { return rep.empty(); } |
|---|
| 195 | size_type bucket_count() const { return rep.bucket_count(); } |
|---|
| 196 | size_type max_bucket_count() const { return rep.max_bucket_count(); } |
|---|
| 197 | |
|---|
| 198 | // These are tr1 methods. bucket() is the bucket the key is or would be in. |
|---|
| 199 | size_type bucket_size(size_type i) const { return rep.bucket_size(i); } |
|---|
| 200 | size_type bucket(const key_type& key) const { return rep.bucket(key); } |
|---|
| 201 | float load_factor() const { |
|---|
| 202 | return size() * 1.0f / bucket_count(); |
|---|
| 203 | } |
|---|
| 204 | float max_load_factor() const { |
|---|
| 205 | float shrink, grow; |
|---|
| 206 | rep.get_resizing_parameters(&shrink, &grow); |
|---|
| 207 | return grow; |
|---|
| 208 | } |
|---|
| 209 | void max_load_factor(float new_grow) { |
|---|
| 210 | float shrink, grow; |
|---|
| 211 | rep.get_resizing_parameters(&shrink, &grow); |
|---|
| 212 | rep.set_resizing_parameters(shrink, new_grow); |
|---|
| 213 | } |
|---|
| 214 | // These aren't tr1 methods but perhaps ought to be. |
|---|
| 215 | float min_load_factor() const { |
|---|
| 216 | float shrink, grow; |
|---|
| 217 | rep.get_resizing_parameters(&shrink, &grow); |
|---|
| 218 | return shrink; |
|---|
| 219 | } |
|---|
| 220 | void min_load_factor(float new_shrink) { |
|---|
| 221 | float shrink, grow; |
|---|
| 222 | rep.get_resizing_parameters(&shrink, &grow); |
|---|
| 223 | rep.set_resizing_parameters(new_shrink, grow); |
|---|
| 224 | } |
|---|
| 225 | // Deprecated; use min_load_factor() or max_load_factor() instead. |
|---|
| 226 | void set_resizing_parameters(float shrink, float grow) { |
|---|
| 227 | return rep.set_resizing_parameters(shrink, grow); |
|---|
| 228 | } |
|---|
| 229 | |
|---|
| 230 | void resize(size_type hint) { rep.resize(hint); } |
|---|
| 231 | void rehash(size_type hint) { resize(hint); } // the tr1 name |
|---|
| 232 | |
|---|
| 233 | // Lookup routines |
|---|
| 234 | iterator find(const key_type& key) { return rep.find(key); } |
|---|
| 235 | const_iterator find(const key_type& key) const { return rep.find(key); } |
|---|
| 236 | |
|---|
| 237 | data_type& operator[](const key_type& key) { // This is our value-add! |
|---|
| 238 | iterator it = find(key); |
|---|
| 239 | if (it != end()) { |
|---|
| 240 | return it->second; |
|---|
| 241 | } else { |
|---|
| 242 | return insert(value_type(key, data_type())).first->second; |
|---|
| 243 | } |
|---|
| 244 | } |
|---|
| 245 | |
|---|
| 246 | size_type count(const key_type& key) const { return rep.count(key); } |
|---|
| 247 | |
|---|
| 248 | pair<iterator, iterator> equal_range(const key_type& key) { |
|---|
| 249 | return rep.equal_range(key); |
|---|
| 250 | } |
|---|
| 251 | pair<const_iterator, const_iterator> equal_range(const key_type& key) const { |
|---|
| 252 | return rep.equal_range(key); |
|---|
| 253 | } |
|---|
| 254 | |
|---|
| 255 | // Insertion routines |
|---|
| 256 | pair<iterator, bool> insert(const value_type& obj) { return rep.insert(obj); } |
|---|
| 257 | template <class InputIterator> |
|---|
| 258 | void insert(InputIterator f, InputIterator l) { rep.insert(f, l); } |
|---|
| 259 | void insert(const_iterator f, const_iterator l) { rep.insert(f, l); } |
|---|
| 260 | // required for std::insert_iterator; the passed-in iterator is ignored |
|---|
| 261 | iterator insert(iterator, const value_type& obj) { return insert(obj).first; } |
|---|
| 262 | |
|---|
| 263 | |
|---|
| 264 | // Deletion routines |
|---|
| 265 | // THESE ARE NON-STANDARD! I make you specify an "impossible" key |
|---|
| 266 | // value to identify deleted buckets. You can change the key as |
|---|
| 267 | // time goes on, or get rid of it entirely to be insert-only. |
|---|
| 268 | void set_deleted_key(const key_type& key) { |
|---|
| 269 | rep.set_deleted_key(key); |
|---|
| 270 | } |
|---|
| 271 | void clear_deleted_key() { rep.clear_deleted_key(); } |
|---|
| 272 | key_type deleted_key() const { return rep.deleted_key(); } |
|---|
| 273 | |
|---|
| 274 | // These are standard |
|---|
| 275 | size_type erase(const key_type& key) { return rep.erase(key); } |
|---|
| 276 | void erase(iterator it) { rep.erase(it); } |
|---|
| 277 | void erase(iterator f, iterator l) { rep.erase(f, l); } |
|---|
| 278 | |
|---|
| 279 | |
|---|
| 280 | // Comparison |
|---|
| 281 | bool operator==(const sparse_hash_map& hs) const { return rep == hs.rep; } |
|---|
| 282 | bool operator!=(const sparse_hash_map& hs) const { return rep != hs.rep; } |
|---|
| 283 | |
|---|
| 284 | |
|---|
| 285 | // I/O -- this is an add-on for writing metainformation to disk |
|---|
| 286 | bool write_metadata(FILE *fp) { return rep.write_metadata(fp); } |
|---|
| 287 | bool read_metadata(FILE *fp) { return rep.read_metadata(fp); } |
|---|
| 288 | bool write_nopointer_data(FILE *fp) { return rep.write_nopointer_data(fp); } |
|---|
| 289 | bool read_nopointer_data(FILE *fp) { return rep.read_nopointer_data(fp); } |
|---|
| 290 | }; |
|---|
| 291 | |
|---|
| 292 | // We need a global swap as well |
|---|
| 293 | template <class Key, class T, class HashFcn, class EqualKey, class Alloc> |
|---|
| 294 | inline void swap(sparse_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm1, |
|---|
| 295 | sparse_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm2) { |
|---|
| 296 | hm1.swap(hm2); |
|---|
| 297 | } |
|---|
| 298 | |
|---|
| 299 | _END_GOOGLE_NAMESPACE_ |
|---|
| 300 | |
|---|
| 301 | #endif /* _SPARSE_HASH_MAP_H_ */ |
|---|