levelDB 代码阅读笔记 02 comparator

Mar 1, 2022 · 10 min read · leveldb

Overview

levelDB是一个有序的KV存储，因此key的顺序是十分关键的 levelDB提供用户自己定义key顺序的能力

先看下comparator的接口

接口 include/leveldb/comparator.h

 1
 2// A Comparator object provides a total order across slices that are
 3// used as keys in an sstable or a database.  A Comparator implementation
 4// must be thread-safe since leveldb may invoke its methods concurrently
 5// from multiple threads.
 6class LEVELDB_EXPORT Comparator {
 7 public:
 8  virtual ~Comparator();
 9
10  // Three-way comparison.  Returns value:
11  //   < 0 iff "a" < "b",
12  //   == 0 iff "a" == "b",
13  //   > 0 iff "a" > "b"
14  virtual int Compare(const Slice& a, const Slice& b) const = 0;
15
16  // The name of the comparator.  Used to check for comparator
17  // mismatches (i.e., a DB created with one comparator is
18  // accessed using a different comparator.
19  //
20  // The client of this package should switch to a new name whenever
21  // the comparator implementation changes in a way that will cause
22  // the relative ordering of any two keys to change.
23  //
24  // Names starting with "leveldb." are reserved and should not be used
25  // by any clients of this package.
26  virtual const char* Name() const = 0;
27
28  // Advanced functions: these are used to reduce the space requirements
29  // for internal data structures like index blocks.
30
31  // If *start < limit, changes *start to a short string in [start,limit).
32  // Simple comparator implementations may return with *start unchanged,
33  // i.e., an implementation of this method that does nothing is correct.
34  virtual void FindShortestSeparator(std::string* start,
35                                     const Slice& limit) const = 0;
36
37  // Changes *key to a short string >= *key.
38  // Simple comparator implementations may return with *key unchanged,
39  // i.e., an implementation of this method that does nothing is correct.
40  virtual void FindShortSuccessor(std::string* key) const = 0;
41};
42

比较让人迷惑的应该是 FindShortestSeparator和 FindShortSuccessor 函数。其实可以简单理解成为了节省存储，而对key做了一定修改。这个实现是不确定的，甚至完全什么都不做也是合法的。

下面我们主要看一下levelDB中两个实现了这个接口的sub class

BytewiseComparatorImpl

默认的comparator,按照byte的字典序比较大小

主体实现

需要注意 FindShortestSeparator和 FindShortSuccessor 的实现并非唯一的

 1
 2class BytewiseComparatorImpl : public Comparator {
 3 public:
 4  BytewiseComparatorImpl() = default;
 5
 6  const char* Name() const override { return "leveldb.BytewiseComparator"; }
 7
 8  int Compare(const Slice& a, const Slice& b) const override {
 9    return a.compare(b);
10  }
11
12  void FindShortestSeparator(std::string* start,
13                             const Slice& limit) const override {
14    // Find length of common prefix
15    size_t min_length = std::min(start->size(), limit.size());
16    size_t diff_index = 0;
17    // diff_index是一个不同的index
18    while ((diff_index < min_length) &&
19           ((*start)[diff_index] == limit[diff_index])) {
20      diff_index++;
21    }
22
23    if (diff_index >= min_length) {
24      // 没有不同的字符，说明一个串是另一个串的前缀，不需要做任何操作
25      // Do not shorten if one string is a prefix of the other
26    } else {
27      uint8_t diff_byte = static_cast<uint8_t>((*start)[diff_index]);
28      if (diff_byte < static_cast<uint8_t>(0xff) &&
29          diff_byte + 1 < static_cast<uint8_t>(limit[diff_index])) {
30        (*start)[diff_index]++;
31        start->resize(diff_index + 1);
32        assert(Compare(*start, limit) < 0);
33      }
34    }
35  }
36
37  void FindShortSuccessor(std::string* key) const override {
38    // Find first character that can be incremented
39    size_t n = key->size();
40    for (size_t i = 0; i < n; i++) {
41      const uint8_t byte = (*key)[i];
42      if (byte != static_cast<uint8_t>(0xff)) {
43        (*key)[i] = byte + 1;
44        key->resize(i + 1);
45        return;
46      }
47    }
48    // *key is a run of 0xffs.  Leave it alone.
49  }
50};
51
52

Scott Meyers's singleton

比较值得一提的可能是这个单例的实现

1
2const Comparator* BytewiseComparator() {
3  static NoDestructor<BytewiseComparatorImpl> singleton;
4  return singleton.get();
5}

Scott Meyers's singleton 的实现虽然已经人尽皆知了，不妨来罗嗦几句讲一下重点

 1
 2struct singleton_t
 3{
 4
 5  static
 6  singleton_t &
 7  instance()
 8  {
 9    static singleton_t s;
10    return s;
11  } // instance
12
13  singleton_t(const singleton_t &) = delete;
14  singleton_t & operator = (const singleton_t &) = delete;
15
16private:
17
18  singleton_t() {}
19  ~singleton_t() {}
20
21
22

要注意的有三点:

static function在第一次被访问时创建
static local variable在第一次被执行时创建，在整个程序结束时销毁。不被访问就不会创建，实现了"lazy loading"
c++11之后，标准保证了local static variable的创建是thread-safe的

但是 NoDestructor 是个什么东西呢

NoDestructor

 1
 2// Wraps an instance whose destructor is never called.
 3//
 4// This is intended for use with function-level static variables.
 5template <typename InstanceType>
 6class NoDestructor {
 7 public:
 8  template <typename... ConstructorArgTypes>
 9  explicit NoDestructor(ConstructorArgTypes&&... constructor_args) {
10    static_assert(sizeof(instance_storage_) >= sizeof(InstanceType),
11                  "instance_storage_ is not large enough to hold the instance");
12    static_assert(
13        alignof(decltype(instance_storage_)) >= alignof(InstanceType),
14        "instance_storage_ does not meet the instance's alignment requirement");
15    new (&instance_storage_)
16        InstanceType(std::forward<ConstructorArgTypes>(constructor_args)...);
17  }
18
19  ~NoDestructor() = default;
20
21  NoDestructor(const NoDestructor&) = delete;
22  NoDestructor& operator=(const NoDestructor&) = delete;
23
24  InstanceType* get() {
25    return reinterpret_cast<InstanceType*>(&instance_storage_);
26  }
27
28 private:
29
30  typename std::aligned_storage<sizeof(InstanceType),
31                                alignof(InstanceType)>::type instance_storage_;
32};
33
34
35

变长参数与std::forward

首先是看到构造函数的变长模板和变长参数列表的用法，可以支持传入任意数量个任意类型的参数，这个用法在std::make_unique之类的函数非常常见并且一般会用std::forward将参数的形式按照传入的形式往下层传，参考 cppreference:make_unique

1
2template<class T, class... Args>
3std::enable_if_t<!std::is_array<T>::value, std::unique_ptr<T>>
4make_unique(Args&&... args)
5{
6    return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
7}

placement new

可以看到这里也很特别

1
2    new (&instance_storage_)
3        InstanceType(std::forward<ConstructorArgTypes>(constructor_args)...);
4

看到new会自然想到这个内存是内配在heap上的，然而实际上却未必。这个语法叫做placement new 可以支持在一个预先分配好的memory(不一定是在heap上，可以是在stack上)来在指定的内存中构造对象

 1
 2// within any block scope...
 3{
 4    alignas(T) unsigned char buf[sizeof(T)];
 5    // Statically allocate the storage with automatic storage duration
 6    // which is large enough for any object of type `T`.
 7    T* tptr = new(buf) T; // Construct a `T` object, placing it directly into your 
 8                          // pre-allocated storage at memory address `buf`.
 9    tptr->~T();           // You must **manually** call the object's destructor
10                          // if its side effects is depended by the program.
11}                         // Leaving this block scope automatically deallocates `buf`.
12
13

需要注意的是，与普通的new和delete对应不同。使用placement new构造出的对象，需要显式调用析构函数。参考is there a placemnet delete?的说明

那么问题来了，为啥要有placemnet new? 有啥好处？我理解主要有两点:

Tip

类似内存池，每次构造新对象的时候不需要发生malloc,速度更快
分配内存可能发生异常。将分配内存的代码与构造对象的部分分开，方便处理

std::aligned_storage

这个其实就是与placement new配合使用的，只分配内存，不构造对象。参考 aligned_storage

两个模板参数分别是对象至少的size，以及最小的aligment要求。

Info

The type defined by std::aligned_storage<>::type can be used to create uninitialized memory blocks suitable to hold the objects of given type, optionally aligned stricter than their natural alignment requirement, for example on a cache or page boundary. As with any other uninitialized storage, the objects are created using placement new and destroyed with explicit destructor calls.

why use NoDestructor

代码看懂了，但还有最重要的一点是，为什么要用这个？

这个实现最初貌似出现在chromium的开发者讨论中 Proposal: use base::NoDestructor for leaky non-POD globals (and get rid of base::Singleton?)

chromium中的实现在这里

我看这个代码最大的疑问是，就算不用NoDestructor，一个static local variable也不会进行析构（在程序退出时才进行),那么为啥要用NoDestructor呢？

Info

Variables declared at block scope with the specifier static or thread_local (since C++11) have static or thread (since C++11) storage duration but are initialized the first time control passes through their declaration (unless their initialization is zero- or constant-initialization, which can be performed before the block is first entered). On all further calls, the declaration is skipped.

The destructor for a block-scope static variable is called at program exit, but only if the initialization took place successfully.

看了chromium中的注释才发现，是被名字误解了

Warning

NoDestructor的目的并不是提供一种不用析构的能力，而是提供了其他优势，又恰好不需要析构。所以用了这个名字

同时，我之前理解是不对的。

Warning

static local variable是在程序退出时析构，而NoDestructor是程序退出时也不会析构。

这两个不是一回事，可以从如下这段测试中看出区别:

 1
 2// Copyright (c) 2018 The LevelDB Authors. All rights reserved.
 3// Use of this source code is governed by a BSD-style license that can be
 4// found in the LICENSE file. See the AUTHORS file for names of contributors.
 5
 6#include "util/no_destructor.h"
 7
 8#include <cstdint>
 9#include <cstdlib>
10#include <utility>
11#include <iostream>
12
13#include "gtest/gtest.h"
14
15namespace leveldb {
16
17namespace {
18
19struct DoNotDestruct {
20 public:
21  DoNotDestruct(uint32_t a, uint64_t b) : a(a), b(b) {}
22  ~DoNotDestruct() { 
23    std::cout<<" dtor"<<std::endl;
24    std::abort(); }
25
26  // Used to check constructor argument forwarding.
27  uint32_t a;
28  uint64_t b;
29};
30
31constexpr const uint32_t kGoldenA = 0xdeadbeef;
32constexpr const uint64_t kGoldenB = 0xaabbccddeeffaabb;
33
34}  // namespace
35
36TEST(NoDestructorTest, StackInstance) {
37  NoDestructor<DoNotDestruct> instance(kGoldenA, kGoldenB);
38  ASSERT_EQ(kGoldenA, instance.get()->a);
39  ASSERT_EQ(kGoldenB, instance.get()->b);
40}
41
42TEST(NoDestructorTest, StaticInstance) {
43  static NoDestructor<DoNotDestruct> instance(kGoldenA, kGoldenB);
44  ASSERT_EQ(kGoldenA, instance.get()->a);
45  ASSERT_EQ(kGoldenB, instance.get()->b);
46}
47
48TEST(NoDestructorTest, StaticIns) {
49  static DoNotDestruct instance(kGoldenA, kGoldenB);
50  ASSERT_EQ(kGoldenA, instance.a);
51  ASSERT_EQ(kGoldenB, instance.b);
52}
53
54}  // namespace leveldb
55
56

输出的结果:

 1
 2root@KUANZEREN-PC4:~/workspace/leveldb/build# ./leveldb_tests    --gtest_filter=*NoDestructor* 
 3Running main() from /root/workspace/leveldb/third_party/googletest/googletest/src/gtest_main.cc
 4Note: Google Test filter = *NoDestructor*
 5[==========] Running 3 tests from 1 test suite.
 6[----------] Global test environment set-up.
 7[----------] 3 tests from NoDestructorTest
 8[ RUN      ] NoDestructorTest.StackInstance
 9[       OK ] NoDestructorTest.StackInstance (0 ms)
10[ RUN      ] NoDestructorTest.StaticInstance
11[       OK ] NoDestructorTest.StaticInstance (0 ms)
12[ RUN      ] NoDestructorTest.StaticIns
13[       OK ] NoDestructorTest.StaticIns (0 ms)
14[----------] 3 tests from NoDestructorTest (0 ms total)
15
16[----------] Global test environment tear-down
17[==========] 3 tests from 1 test suite ran. (0 ms total)
18[  PASSED  ] 3 tests.
19 dtor
20Aborted
21

那NoDesctructor有什么优势呢？

对比local static,NoDesctructor是inline stored的，不存在malloc和指针解引用，效率更高
对比chromium中的 base::LazyInstance，支持对构造函数中传入参数

经常提到的一个词是"inline storage / stored inline"我理解在这个语境下说的是storage_duration中的 static storage duration (存疑)

参考:

其他 Comparator

ReverseKeyComparator

测试用的Comparator,将key颠倒顺序后使用bytewiseComparator比较大小也就是 "abc"变为"cba"

CountComparator

测试用。其他Comparator的一个包装器，加了个atomic的计数器，用来benchmark

InternalKeyComparator

这个值得讲一下

首先看到Compare函数

 1
 2int InternalKeyComparator::Compare(const Slice& akey, const Slice& bkey) const {
 3  // Order by:
 4  //    increasing user key (according to user-supplied comparator)
 5  //    decreasing sequence number
 6  //    decreasing type (though sequence# should be enough to disambiguate)
 7  int r = user_comparator_->Compare(ExtractUserKey(akey), ExtractUserKey(bkey));
 8  if (r == 0) {
 9
10    const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8);
11    const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8);
12    if (anum > bnum) {
13      r = -1;
14    } else if (anum < bnum) {
15      r = +1;
16    }
17  }
18  return r;
19}

最初让人比较疑惑的是，看起来只有两个比较逻辑:

按照user key升序排列(根据传入的comparator)
根据DecodeFixex64得到的值(就是把Slice中的最后8byte转换为一个uint64_t的值) 降序排列

为什么注释里写了三个比较逻辑？这里的代码是看不出来的，带着这个疑问，我们去看下DecodeFixex64的值的意义是什么呢

我们首先看下InternalKey是什么

 1
 2// Modules in this directory should keep internal keys wrapped inside
 3// the following class instead of plain strings so that we do not
 4// incorrectly use string comparisons instead of an InternalKeyComparator.
 5class InternalKey {
 6 private:
 7  std::string rep_;
 8
 9 public:
10  InternalKey() {}  // Leave rep_ as empty to indicate it is invalid
11  InternalKey(const Slice& user_key, SequenceNumber s, ValueType t) {
12    AppendInternalKey(&rep_, ParsedInternalKey(user_key, s, t));
13  }
14
15  bool DecodeFrom(const Slice& s) {
16    rep_.assign(s.data(), s.size());
17    return !rep_.empty();
18  }
19
20  Slice Encode() const {
21    assert(!rep_.empty());
22    return rep_;
23  }
24
25  Slice user_key() const { return ExtractUserKey(rep_); }
26
27  void SetFrom(const ParsedInternalKey& p) {
28    rep_.clear();
29    AppendInternalKey(&rep_, p);
30  }
31
32  void Clear() { rep_.clear(); }
33
34  std::string DebugString() const;
35};
36
37
38
39

其中最重要的是 AppendInternalKey的部分，里面涵盖了InternalKey的表示其实就是由如下三部分组成了一个string | userkey | sequence | type |

 1
 2static uint64_t PackSequenceAndType(uint64_t seq, ValueType t) {
 3  assert(seq <= kMaxSequenceNumber);
 4  assert(t <= kValueTypeForSeek);
 5  // 高位是sequence number,低位是type
 6  return (seq << 8) | t;
 7}
 8
 9void AppendInternalKey(std::string* result, const ParsedInternalKey& key) {
10  // 把user key这个slice 往result后拼接
11  result->append(key.user_key.data(), key.user_key.size());
12  PutFixed64(result, PackSequenceAndType(key.sequence, key.type));
13  // Result 中分了三部分
14  // user_key , sequence, type.  后两部分合成了一个uint64_t
15}
16

要注意的是sequence和type共同放在一个uint64_t中，高56bit为sequence为sequence,低8 bit为type

 1
 2// Value types encoded as the last component of internal keys.
 3// DO NOT CHANGE THESE ENUM VALUES: they are embedded in the on-disk
 4// data structures.
 5enum ValueType { kTypeDeletion = 0x0, kTypeValue = 0x1 };
 6// kValueTypeForSeek defines the ValueType that should be passed when
 7// constructing a ParsedInternalKey object for seeking to a particular
 8// sequence number (since we sort sequence numbers in decreasing order
 9// and the value type is embedded as the low 8 bits in the sequence
10// number in internal keys, we need to use the highest-numbered
11// ValueType, not the lowest).
12static const ValueType kValueTypeForSeek = kTypeValue;
13
14typedef uint64_t SequenceNumber;
15
16// We leave eight bits empty at the bottom so a type and sequence#
17// can be packed together into 64-bits.
18// 在真正pack的时候会做一个移位
19static const SequenceNumber kMaxSequenceNumber = ((0x1ull << 56) - 1);
20
21struct ParsedInternalKey {
22  Slice user_key;
23  SequenceNumber sequence;
24  ValueType type;
25
26  ParsedInternalKey() {}  // Intentionally left uninitialized (for speed)
27  ParsedInternalKey(const Slice& u, const SequenceNumber& seq, ValueType t)
28      : user_key(u), sequence(seq), type(t) {}
29  std::string DebugString() const;
30};
31

这也就解释了最初的疑问。 uint64_t实际上暗含了先比较sequence,再比较type的比较逻辑

1
2  const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8);
3    const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8);
4    if (anum > bnum) {
5      r = -1;
6    } else if (anum < bnum) {
7      r = +1;
8    }
9

感觉把几个信息合并到一起表示的技巧在levelDB的实现中比较常见，有点类似状压DP里面的技巧