levelDB 代码阅读笔记 03 cache

Mar 6, 2022 · 14 min read · leveldb

Overview

Cache接口

没有太多好说的，可以注意这里用了void*来表示任意类型数据。在c++17之后可以考虑用std::any代替，参考 std::any 笔记

 1
 2
 3namespace leveldb {
 4
 5class LEVELDB_EXPORT Cache;
 6
 7// Create a new cache with a fixed size capacity.  This implementation
 8// of Cache uses a least-recently-used eviction policy.
 9LEVELDB_EXPORT Cache* NewLRUCache(size_t capacity);
10
11class LEVELDB_EXPORT Cache {
12 public:
13  Cache() = default;
14
15  Cache(const Cache&) = delete;
16  Cache& operator=(const Cache&) = delete;
17
18  // Destroys all existing entries by calling the "deleter"
19  // function that was passed to the constructor.
20  virtual ~Cache();
21
22  // Opaque handle to an entry stored in the cache.
23  struct Handle {};
24
25  // Insert a mapping from key->value into the cache and assign it
26  // the specified charge against the total cache capacity.
27  //
28  // Returns a handle that corresponds to the mapping.  The caller
29  // must call this->Release(handle) when the returned mapping is no
30  // longer needed.
31  //
32  // When the inserted entry is no longer needed, the key and
33  // value will be passed to "deleter".
34  virtual Handle* Insert(const Slice& key, void* value, size_t charge,
35                         void (*deleter)(const Slice& key, void* value)) = 0;
36
37  // If the cache has no mapping for "key", returns nullptr.
38  //
39  // Else return a handle that corresponds to the mapping.  The caller
40  // must call this->Release(handle) when the returned mapping is no
41  // longer needed.
42  virtual Handle* Lookup(const Slice& key) = 0;
43
44  // Release a mapping returned by a previous Lookup().
45  // REQUIRES: handle must not have been released yet.
46  // REQUIRES: handle must have been returned by a method on *this.
47  virtual void Release(Handle* handle) = 0;
48
49  // Return the value encapsulated in a handle returned by a
50  // successful Lookup().
51  // REQUIRES: handle must not have been released yet.
52  // REQUIRES: handle must have been returned by a method on *this.
53  virtual void* Value(Handle* handle) = 0;
54
55  // If the cache contains entry for key, erase it.  Note that the
56  // underlying entry will be kept around until all existing handles
57  // to it have been released.
58  virtual void Erase(const Slice& key) = 0;
59
60  // Return a new numeric id.  May be used by multiple clients who are
61  // sharing the same cache to partition the key space.  Typically the
62  // client will allocate a new id at startup and prepend the id to
63  // its cache keys.
64  virtual uint64_t NewId() = 0;
65
66  // Remove all cache entries that are not actively in use.  Memory-constrained
67  // applications may wish to call this method to reduce memory usage.
68  // Default implementation of Prune() does nothing.  Subclasses are strongly
69  // encouraged to override the default implementation.  A future release of
70  // leveldb may change Prune() to a pure abstract method.
71  virtual void Prune() {}
72
73  // Return an estimate of the combined charges of all elements stored in the
74  // cache.
75  virtual size_t TotalCharge() const = 0;
76};
77
78}  // namespace leveldb
79
80#endif  // STORAGE_LEVELDB_INCLUDE_CACHE_H_
81

LRUHandle

这部分虽然在前面，但是具体的含义其实要看完完整的实现才清楚。

 1
 2struct LRUHandle {
 3  void* value;  // 缓存的value
 4  void (*deleter)(const Slice&, void* value);
 5  LRUHandle* next_hash;  // 用于HashTable中，指向这个bucket的下一个slot
 6  LRUHandle* next;   // 双向链表的实现
 7  LRUHandle* prev;   // 双向链表的实现
 8  size_t charge;  // TODO(opt): Only allow uint32_t?
 9  size_t key_length;
10  bool in_cache;     // Whether entry is in the cache.
11  uint32_t refs;     // References, including cache reference, if present.
12  uint32_t hash;     // Hash of key(); used for fast sharding and comparisons
13  char key_data[1];  // Beginning of key
14		     // kk: why char[1]? 
15
16  Slice key() const {
17    // next is only equal to this if the LRU handle is the list head of an
18    // empty list. List heads never have meaningful keys.
19    assert(next != this);
20
21    return Slice(key_data, key_length);
22  }
23};
24
25

HandleTable

实现的一个简单的 closed addressing 的HashTable. 之前有看过复杂的多的ska_flat_hash_map的源码，但是ska_flat_hash_map是open addressing的。本来以为可能大差不大，实际看起来发现细节还挺多的，我们一点点来看。代码中已经加了相应注释

FindPointer

该函数返回指向对应key的slot，或者返回一个指向对应链表最后的slot的后面从用位运算替代取模的代码来看，bucket个数一定是2的倍数

这里要注意的细节是，while判断其实包含了四种可能的case.

hash值相等，key值相等。那么这个slot的元素就是我们要找的
hash值相等，key值不等。说明hash function发生了冲突，对于不同的key算出了相同的hash value.
hash值不等，key值不等。这是没有发生冲突的一般情况。 这里要注意的是，为什么hash值不等却跑到了同一个linked list上去？ 这是因为，不同的hash值在 hash & (length_ - 1) 后也是可以得到相同值的。
hash值不等，key值相等。不可能发生这种情况。对于相同的key值，只要是hash function不变，得到的hash值一定是相同的。

这里的判断条件实际上包含了1-3三种情况。

1(*ptr)->hash != hash || key != (*ptr)->key())
2

 1
 2  // Return a pointer to slot that points to a cache entry that
 3  // matches key/hash.  If there is no such cache entry, return a
 4  // pointer to the trailing slot in the corresponding linked list.
 5  LRUHandle** FindPointer(const Slice& key, uint32_t hash) {
 6	  // kk: length_ should be power of 2
 7	  //
 8    LRUHandle** ptr = &list_[hash & (length_ - 1)];
 9    // kk: ptr is the bucket position
10    // TODO(kk): different key might get same hash value.
11    // kk:
12    // four case here:
13    // 1: hash == ptr->hash , key == ptr->key. this is the exact element we find. return 
14    // 2: hash == ptr->hash , key != ptr->key. hash function collision happen! try next node
15    // 3: hash != ptr->hash , key != ptr->key. general case for different key. try next node.
16    // Note this is a possible case . for different hash can get the same hash & (length-1) value,
17    // so different hash Handles might be in the same bucket
18    // 4: hash != ptr->hash, key == ptr->key .   impossible!
19    // combine 1..3 case here. 
20    while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) {
21      ptr = &(*ptr)->next_hash;
22    }
23    //kk: return last node's next_hash pointer(aka nullptr) 
24    //or the  pointer of LRUHandle the exact element we find 
25    return ptr;
26  }
27

Insert

插入一个元素需要注意的是当元素存在时，old返回非nullptr，此时会将本身存在的元素替换掉，并且更新新插入元素的next_hash 如果插入的元素之前不存在，就返回nullptr 否则返回存在的元素的位置

 1
 2
 3  // return nullptr if h is not found in table
 4  // return non-nullptr if h is already in table
 5
 6  LRUHandle* Insert(LRUHandle* h) {
 7    LRUHandle** ptr = FindPointer(h->key(), h->hash);
 8    LRUHandle* old = *ptr;
 9    // kk: two case here:
10    // 1. old is nullptr. we not find the exact element. 
11    // 2. old is not nullptr, we find the exact key. just update next_hash value of h.
12    h->next_hash = (old == nullptr ? nullptr : old->next_hash);
13    // kk: assiment new elemnt h to the slot. might override old value
14    *ptr = h;
15    if (old == nullptr) {
16      ++elems_;
17      if (elems_ > length_) {
18        // Since each cache entry is fairly large, we aim for a small
19        // average linked list length (<= 1).
20        Resize();
21      }
22    }
23    return old;
24  }
25

Remove

找到要删除的slot所在位置，然后把后面的节点往前连即可

如果要删除的元素不存在就返回nullptr 否则返回被删除的元素

 1   // return nullptr if no such key
 2  // otherwise just return the element just removed
 3
 4  LRUHandle* Remove(const Slice& key, uint32_t hash) {
 5    LRUHandle** ptr = FindPointer(key, hash);
 6    LRUHandle* result = *ptr;
 7    // kk : if result is nullptr, it meants no such key in table. Just do nothing.
 8    if (result != nullptr) {
 9	// kk: link next node, so the linked list will not break
10      *ptr = result->next_hash;
11      --elems_;
12    }
13    return result;
14  }
15

Resize

一般称为Rehash操作。如我们前面预料，length的确一直是2的倍数。当HashTable较慢(或者初始时)，进行该操作,将原有的元素复制到新的位置

 1
 2  // kk: rehash operation 
 3  // invoke this function in ctor 
 4  void Resize() {
 5    uint32_t new_length = 4;
 6    // kk:binary lifting
 7    // when there is ave 1 element in each linked list ,then do "binary lifting"
 8    while (new_length < elems_) {
 9      new_length *= 2;
10    }
11
12    LRUHandle** new_list = new LRUHandle*[new_length];
13    memset(new_list, 0, sizeof(new_list[0]) * new_length);
14    // kk: count is the number of elements we have copyied
15    // copy all elems in list_ to new_list
16    uint32_t count = 0;
17    for (uint32_t i = 0; i < length_; i++) {
18      LRUHandle* h = list_[i];
19      // processing each linked list
20      while (h != nullptr) {
21	// kk: protect h->next_hash for next time processing
22        LRUHandle* next = h->next_hash;
23        uint32_t hash = h->hash;
24	// ptr is fixed value for same hash & new_length
25        LRUHandle** ptr = &new_list[hash & (new_length - 1)];
26	//TODO(???): 一个头插法实现，每次新节点插入在头部
27  // 每次把新节点h插入到ptr前面，再把ptr指向h
28        h->next_hash = *ptr;
29	// kk: assigment element h to slot pointed by ptr.
30        *ptr = h;
31	// kk : continue processing next node
32        h = next;
33        count++;
34      }
35    }
36    assert(elems_ == count);
37    delete[] list_;
38    list_ = new_list;
39    length_ = new_length;
40  }
41};
42
43

这里有两部分需要注意.

链表复制的部分看了半天竟然没看懂... 最后发现其实是个经典的头插法实现参考单链表的头插法和尾插法和 Linked List | Set 2 (Inserting a node)

 1
 2
 3/* Given a reference (pointer to pointer)
 4to the head of a list and an int,
 5inserts a new node on the front of the list. */
 6void push(Node** head_ref, int new_data)
 7{
 8    /* 1. allocate node */
 9    Node* new_node = new Node();
10 
11    /* 2. put in the data */
12    new_node->data = new_data;
13 
14    /* 3. Make next of new node as head */
15    new_node->next = (*head_ref);
16 
17    /* 4. move the head to point to the new node */
18    (*head_ref) = new_node;
19}
20

就很震惊我为什么会看不懂。。想了一下，是因为我一直以为h是旧的节点，分配出的new_list才是新节点(毕竟名字中都带了一个new字)

但是实际上，new_list是本身就存在的链表，h才是要新加入的节点

每次将h插入到ptr的前面，并且将ptr更新为链表新的头部. 这样一想就变得好懂多了。。

1
2        h->next_hash = *ptr;
3	// kk: assigment element h to slot pointed by ptr.
4        *ptr = h;
5

由于bucket num改变，原本在同一个bucket中的两个元素现在可能被分配到不同的bucket中

LRUCache

低配版的LRUCache

LRUCache算是面试题的常客了，通常的实现是用hashmap+双端链表其中hashmap的key是要缓存的key, value是链表上的一个node 当get或者put某个key时，将对应的node放到链表的一端(表示最近被访问过) 当cache超出给定的capacity后，则从链表的另一端(表示最早被访问)删除节点

由于需要从链表的两端分别操作，因此一般的实现都是使用双向链表

一个经典的cpp实现如下,代码来自leetcode

 1
 2
 3
 4/*
 5    hash + 双向链表，put和get都是O(1)时间复杂度的经典实现。。。(list.size()可能不能O(1)?)
 6  把cache的iter位置移动到cache.begin()位置
 7  list.splice 方法 Transfers elements from one list to another.
 8  cache.splice(cache.begin(),cache,iter);//移动到链表头
 9*/
10class LRUCache {
11public:
12    unordered_map<int,list<pair<int,int>>::iterator>hash;
13    // 用list维护size为capacity的元素集合，新加入的在前面，这样超过了容量后，直接删除最后的。
14    list<pair<int,int>> cache;//key,value
15    int limit;
16    LRUCache(int capacity) {
17        limit = capacity;
18        hash.reserve(capacity);
19    }
20    int get(int key) {
21        if (hash.find(key)==hash.end()){
22            return -1;
23        }else{
24            auto iter = hash[key];
25            cache.splice(cache.begin(),cache,iter);
26            return cache.begin()->second;
27        }
28    }
29    
30    void put(int key, int value) {
31        //如果容量到了，删除尾部元素,再加上新结点
32        if (hash.find(key)==hash.end()){
33            if(cache.size()==limit)   {
34                hash.erase(cache.back().first);
35                cache.pop_back();
36            }//直接添加
37            cache.emplace_front(key,value);
38            hash[key]=cache.begin(); 
39        }
40        else {//如果插入相同key的元素，除了要移动，value也需要更新
41            auto iter = hash[key];
42            iter->second=value;//更新地址value
43            cache.splice(cache.begin(),cache,iter);//移动到链表头
44            hash[key]=cache.begin();   //更新hashtable的value
45        }
46    }
47};
48

那么一个真实世界的LRUCache是怎么样的呢?

声明

 1
 2// A single shard of sharded cache.
 3class LRUCache {
 4 public:
 5  LRUCache();
 6  ~LRUCache();
 7
 8  // Separate from constructor so caller can easily make an array of LRUCache
 9  void SetCapacity(size_t capacity) { capacity_ = capacity; }
10
11  // Like Cache methods, but with an extra "hash" parameter.
12  Cache::Handle* Insert(const Slice& key, uint32_t hash, void* value,
13                        size_t charge,
14                        void (*deleter)(const Slice& key, void* value));
15  Cache::Handle* Lookup(const Slice& key, uint32_t hash);
16  void Release(Cache::Handle* handle);
17  void Erase(const Slice& key, uint32_t hash);
18  void Prune();
19  size_t TotalCharge() const {
20    MutexLock l(&mutex_);
21    return usage_;
22  }
23
24 private:
25  void LRU_Remove(LRUHandle* e);
26  void LRU_Append(LRUHandle* list, LRUHandle* e);
27  void Ref(LRUHandle* e);
28  void Unref(LRUHandle* e);
29  bool FinishErase(LRUHandle* e) EXCLUSIVE_LOCKS_REQUIRED(mutex_);
30
31  // Initialized before use.
32  size_t capacity_;
33
34  // mutex_ protects the following state.
35  mutable port::Mutex mutex_;
36  size_t usage_ GUARDED_BY(mutex_);
37
38  // Dummy head of LRU list.
39  // lru.prev is newest entry, lru.next is oldest entry.
40  // Entries have refs==1 and in_cache==true.
41  LRUHandle lru_ GUARDED_BY(mutex_);
42
43  // Dummy head of in-use list.
44  // Entries are in use by clients, and have refs >= 2 and in_cache==true.
45  LRUHandle in_use_ GUARDED_BY(mutex_);
46
47  HandleTable table_ GUARDED_BY(mutex_);
48};
49

链表结构

实现中有两个链表,in_use_和lru_

值得关注的是，这是两个环形的双向链表

其中lru_的结构如下:

dummy node -> node1(oldest) -> node2 -> ... -> nodeN(newest) -> dummy node

in_use_的结构类似，但是node之间不存在特定顺序

开头的dummy node和结尾的实际上是同一个

 1LRUCache::LRUCache() : capacity_(0), usage_(0) {
 2  // Make empty circular linked lists.
 3  lru_.next = &lru_;
 4  lru_.prev = &lru_;
 5  in_use_.next = &in_use_;
 6  in_use_.prev = &in_use_;
 7}
 8
 9
10void LRUCache::LRU_Remove(LRUHandle* e) {
11  e->next->prev = e->prev;
12  e->prev->next = e->next;
13}
14
15// note: the linked list is a circle list
16// dummy node -> node1(oldest) -> node2 -> ... -> nodeN(newest) -> dummy node
17void LRUCache::LRU_Append(LRUHandle* list, LRUHandle* e) {
18  // Make "e" newest entry by inserting just before *list
19  e->next = list;
20  e->prev = list->prev;
21  e->prev->next = e;
22  e->next->prev = e;
23}
24

entry 引用计数

每个entry只会在lru_或者in_use_之一（也可能都不在) lru_是LRUCache本身的链表，in_use_是表示哪些entry被client所使用

lru_ have refs==1 and in_cache==true.

in_use_ have ref>=2 and in_cache==true.

 1
 2void LRUCache::Ref(LRUHandle* e) {
 3  if (e->refs == 1 && e->in_cache) {  // If on lru_ list, move to in_use_ list.
 4    LRU_Remove(e);
 5    LRU_Append(&in_use_, e);
 6  }
 7  e->refs++;
 8}
 9
10void LRUCache::Unref(LRUHandle* e) {
11  assert(e->refs > 0);
12  e->refs--;
13  if (e->refs == 0) {  // Deallocate.
14    assert(!e->in_cache);
15    (*e->deleter)(e->key(), e->value);
16    free(e);
17  } else if (e->in_cache && e->refs == 1) {
18    // No longer in use; move to lru_ list.
19    LRU_Remove(e);
20    LRU_Append(&lru_, e);
21  }
22}
23

Insert

这部分比较值得说,细节见注释。只说两点:

插入的时候是对e进行处理，设置refs和in_cache以及添加到链表上，然后当发现该元素已经存在在cache中时，undo之前的操作
当usage_超过了cache的承载能力的时候，while循环里看起来只执行了一次。 实际上是FinishErase中的LRU_Remove操作每次会修改lru_

 1
 2Cache::Handle* LRUCache::Insert(const Slice& key, uint32_t hash, void* value,
 3                                size_t charge,
 4                                void (*deleter)(const Slice& key,
 5                                                void* value)) {
 6  MutexLock l(&mutex_);
 7
 8  // kk : 1 means key_data,beginning of char,used as a placeholder
 9  LRUHandle* e =
10      reinterpret_cast<LRUHandle*>(malloc(sizeof(LRUHandle) - 1 + key.size()));
11  e->value = value;
12  e->deleter = deleter;
13  e->charge = charge;
14  e->key_length = key.size();
15  e->hash = hash;
16  e->in_cache = false;
17  e->refs = 1;  // for the returned handle.
18  std::memcpy(e->key_data, key.data(), key.size());
19
20  if (capacity_ > 0) {
21    e->refs++;  // for the cache's reference.
22    e->in_cache = true;
23    LRU_Append(&in_use_, e);
24    usage_ += charge;
25    // table_.Insert will return nullptr if no e in table_ before
26    // otherwise return a non-nullptr value.
27    // if nullptr, FinishErase just do nothing
28    // if not nullptr, undo above 4 lines
29    FinishErase(table_.Insert(e));
30  } else {  // don't cache. (capacity_==0 is supported and turns off caching.)
31    // next is read by key() in an assert, so it must be initialized
32    e->next = nullptr;
33  }
34  // lru_.next == &lru_ means lru_ is the dummy node after last node in linked list
35  while (usage_ > capacity_ && lru_.next != &lru_) {
36	// kk: why while instead of if? 
37	// LRU_Remove() in FinishErase function change lru_ value
38	// FIXME: is this oldest node? yes,lur_.next is oldest. 
39    LRUHandle* old = lru_.next;
40    assert(old->refs == 1);
41    bool erased = FinishErase(table_.Remove(old->key(), old->hash));
42    // kk: old should always be in table_, so erased should be true.
43    if (!erased) {  // to avoid unused variable when compiled NDEBUG
44      assert(erased);
45    }
46  }
47
48  return reinterpret_cast<Cache::Handle*>(e);
49}
50
51// If e != nullptr, finish removing *e from the cache; it has already been
52// removed from the hash table.  Return whether e != nullptr.
53bool LRUCache::FinishErase(LRUHandle* e) {
54  if (e != nullptr) {
55    assert(e->in_cache);
56    LRU_Remove(e);
57    e->in_cache = false;
58    usage_ -= e->charge;
59    Unref(e);
60  }
61  return e != nullptr;
62}
63

ShardedLRUCache

代码实现很直观，关键就是根据hash值决定使用哪个cache

Warning

这么做的目的主要是减小锁的粒度把一把大锁变成了kNumShards把小锁，从而增加cache的并行度

 1
 2static const int kNumShardBits = 4;
 3static const int kNumShards = 1 << kNumShardBits;
 4
 5class ShardedLRUCache : public Cache {
 6 private:
 7  LRUCache shard_[kNumShards];
 8  port::Mutex id_mutex_;
 9  uint64_t last_id_;
10
11  static inline uint32_t HashSlice(const Slice& s) {
12    return Hash(s.data(), s.size(), 0);
13  }
14  // 决定一个key进入到哪个Shard中
15  static uint32_t Shard(uint32_t hash) { return hash >> (32 - kNumShardBits); }
16
17 public:
18  explicit ShardedLRUCache(size_t capacity) : last_id_(0) {
19    const size_t per_shard = (capacity + (kNumShards - 1)) / kNumShards;
20    // per_shard表示每个cache要支持的capacity
21    for (int s = 0; s < kNumShards; s++) {
22      shard_[s].SetCapacity(per_shard);
23    }
24  }
25  ~ShardedLRUCache() override {}
26  Handle* Insert(const Slice& key, void* value, size_t charge,
27                 void (*deleter)(const Slice& key, void* value)) override {
28    const uint32_t hash = HashSlice(key);
29    return shard_[Shard(hash)].Insert(key, hash, value, charge, deleter);
30  }
31  Handle* Lookup(const Slice& key) override {
32    const uint32_t hash = HashSlice(key);
33    return shard_[Shard(hash)].Lookup(key, hash);
34  }
35  void Release(Handle* handle) override {
36    LRUHandle* h = reinterpret_cast<LRUHandle*>(handle);
37    shard_[Shard(h->hash)].Release(handle);
38  }
39  void Erase(const Slice& key) override {
40    const uint32_t hash = HashSlice(key);
41    shard_[Shard(hash)].Erase(key, hash);
42  }
43  void* Value(Handle* handle) override {
44    return reinterpret_cast<LRUHandle*>(handle)->value;
45  }
46  uint64_t NewId() override {
47    MutexLock l(&id_mutex_);
48    return ++(last_id_);
49  }
50  void Prune() override {
51    for (int s = 0; s < kNumShards; s++) {
52      shard_[s].Prune();
53    }
54  }
55  size_t TotalCharge() const override {
56    size_t total = 0;
57    for (int s = 0; s < kNumShards; s++) {
58      total += shard_[s].TotalCharge();
59    }
60    return total;
61  }
62};
63

levelDB 代码阅读笔记 03 cache

Overview

Cache接口

LRUHandle

HandleTable

FindPointer

Insert

Remove

Resize

LRUCache

低配版的LRUCache

声明

链表结构

entry 引用计数

Insert

ShardedLRUCache

Posts in this Series