LevelDB的编码coding

LevelDB的编码coding

util/coding.h

#include "leveldb/slice.h"
#include "port/port.h"

namespace leveldb {

// Standard Put... routines append to a string
    //将uint32_t类型的值转化为字符串保存到string里面
void PutFixed32(std::string* dst, uint32_t value);
    //将uint64_t类型的值转化为字符串保存到string里面
void PutFixed64(std::string* dst, uint64_t value);
void PutVarint32(std::string* dst, uint32_t value);
void PutVarint64(std::string* dst, uint64_t value);
void PutLengthPrefixedSlice(std::string* dst, const Slice& value);

// Standard Get... routines parse a value from the beginning of a Slice
// and advance the slice past the parsed value.
bool GetVarint32(Slice* input, uint32_t* value);
bool GetVarint64(Slice* input, uint64_t* value);
bool GetLengthPrefixedSlice(Slice* input, Slice* result);

// Pointer-based variants of GetVarint...  These either store a value
// in *v and return a pointer just past the parsed value, or return
// nullptr on error.  These routines only look at bytes in the range
// [p..limit-1]
const char* GetVarint32Ptr(const char* p, const char* limit, uint32_t* v);
const char* GetVarint64Ptr(const char* p, const char* limit, uint64_t* v);

// Returns the length of the varint32 or varint64 encoding of "v"
int VarintLength(uint64_t v);

// Lower-level versions of Put... that write directly into a character buffer
// and return a pointer just past the last byte written.
// REQUIRES: dst has enough space for the value being written
char* EncodeVarint32(char* dst, uint32_t value);
char* EncodeVarint64(char* dst, uint64_t value);

// Lower-level versions of Put... that write directly into a character buffer
// REQUIRES: dst has enough space for the value being written

    //将uint32的值编码为char*(字符数组)
inline void EncodeFixed32(char* dst, uint32_t value) {
    //指针类型转换
  uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);

  // Recent clang and gcc optimize this to a single mov / str instruction.
    //4个字节
  buffer[0] = static_cast<uint8_t>(value);
  buffer[1] = static_cast<uint8_t>(value >> 8);
  buffer[2] = static_cast<uint8_t>(value >> 16);
  buffer[3] = static_cast<uint8_t>(value >> 24);
}

inline void EncodeFixed64(char* dst, uint64_t value) {
  uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);

  // Recent clang and gcc optimize this to a single mov / str instruction.
    //和前面一样,只不过这里8个字节
  buffer[0] = static_cast<uint8_t>(value);
  buffer[1] = static_cast<uint8_t>(value >> 8);
  buffer[2] = static_cast<uint8_t>(value >> 16);
  buffer[3] = static_cast<uint8_t>(value >> 24);
  buffer[4] = static_cast<uint8_t>(value >> 32);
  buffer[5] = static_cast<uint8_t>(value >> 40);
  buffer[6] = static_cast<uint8_t>(value >> 48);
  buffer[7] = static_cast<uint8_t>(value >> 56);
}

// Lower-level versions of Get... that read directly from a character buffer
// without any bounds checking.

    //解码,将char*的字符数组解码为uint32
inline uint32_t DecodeFixed32(const char* ptr) {
    //指针类型转换
  const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);

  // Recent clang and gcc optimize this to a single mov / ldr instruction.
    //4个字节,然后或起来,就是32个字节
  return (static_cast<uint32_t>(buffer[0])) |
         (static_cast<uint32_t>(buffer[1]) << 8) |
         (static_cast<uint32_t>(buffer[2]) << 16) |
         (static_cast<uint32_t>(buffer[3]) << 24);
}

inline uint64_t DecodeFixed64(const char* ptr) {
  const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);

  // Recent clang and gcc optimize this to a single mov / ldr instruction.
    //同上,这里是64个字节
  return (static_cast<uint64_t>(buffer[0])) |
         (static_cast<uint64_t>(buffer[1]) << 8) |
         (static_cast<uint64_t>(buffer[2]) << 16) |
         (static_cast<uint64_t>(buffer[3]) << 24) |
         (static_cast<uint64_t>(buffer[4]) << 32) |
         (static_cast<uint64_t>(buffer[5]) << 40) |
         (static_cast<uint64_t>(buffer[6]) << 48) |
         (static_cast<uint64_t>(buffer[7]) << 56);
}

// Internal routine for use by fallback path of GetVarint32Ptr
    //这个函数等会儿看
    //这里编码应该是用低7为来保存数据,第8为用来标识是否数据存完,如果第8位为1,说明数据没有完,后面字节接着
    //如果第8位为0,说明这是当前数据的最后一个字节
const char* GetVarint32PtrFallback(const char* p, const char* limit,
                                   uint32_t* value);
inline const char* GetVarint32Ptr(const char* p, const char* limit,
                                  uint32_t* value) {
  if (p < limit) {
    uint32_t result = *(reinterpret_cast<const uint8_t*>(p));
      //如果第8位为0,说明了这是最后一个字节
    if ((result & 128) == 0) {
      *value = result;
      return p + 1;
    }
  }
    //如果第8位为1,说明这不是最后一个字节,所以在下面这个函数里面,需要解码多个字节,解码的结果保存到value中
    //返回值是当前数据解码完后的下一个数据开始的地方
  return GetVarint32PtrFallback(p, limit, value);
}

}  // namespace leveldb

util/coding.cc

#include "util/coding.h"

namespace leveldb {

    //将uint32_t类型的值转化为字符串保存到string里面
void PutFixed32(std::string* dst, uint32_t value) {
  char buf[sizeof(value)];
    //将uint32的值存入buf
  EncodeFixed32(buf, value);
    //将buf中的sizeof(buf)这么多个字符添加到string的后面
  dst->append(buf, sizeof(buf));
}
	//将uint64_t类型的值转化为字符串保存到string里面
void PutFixed64(std::string* dst, uint64_t value) {
  char buf[sizeof(value)];
    //将uint64的值存入buf
  EncodeFixed64(buf, value);
    //将buf中的sizeof(buf)这么多个字符添加到string的后面
  dst->append(buf, sizeof(buf));
}

char* EncodeVarint32(char* dst, uint32_t v) {
  // Operate on characters as unsigneds
    //类型转换,将char*转换为uint8_t*
  uint8_t* ptr = reinterpret_cast<uint8_t*>(dst);
    //10000000
  static const int B = 128;
    //因为一个字节只有低7位真正保存数据,第8位用于判断是否保存完毕
  if (v < (1 << 7)) {
      //1<<7==10000000
      //只有小于等于7位
    *(ptr++) = v;
  } else if (v < (1 << 14)) {
      //小于等于14位
      //将第8位置1,表示后面还有数据
      //保存前7位
    *(ptr++) = v | B;
      //保存后7位
    *(ptr++) = v >> 7;
  } else if (v < (1 << 21)) {
      //同上,21位
      //低7位
    *(ptr++) = v | B;
      //中7位
    *(ptr++) = (v >> 7) | B;
      //高7位
    *(ptr++) = v >> 14;
  } else if (v < (1 << 28)) {
    *(ptr++) = v | B;
    *(ptr++) = (v >> 7) | B;
    *(ptr++) = (v >> 14) | B;
    *(ptr++) = v >> 21;
  } else {
    *(ptr++) = v | B;
    *(ptr++) = (v >> 7) | B;
    *(ptr++) = (v >> 14) | B;
    *(ptr++) = (v >> 21) | B;
    *(ptr++) = v >> 28;
  }
    //返回值很奇怪,返回的是保存的数据后面一个位置
  return reinterpret_cast<char*>(ptr);
}

void PutVarint32(std::string* dst, uint32_t v) {
  char buf[5];
    //将uint32_t的数据保存到buf中,返回最后一个位置
  char* ptr = EncodeVarint32(buf, v);
    //因为返回的是下一个位置,所以减去初始位置,就是占用的字节数
  dst->append(buf, ptr - buf);
}

char* EncodeVarint64(char* dst, uint64_t v) {
    //同上
  static const int B = 128;
  uint8_t* ptr = reinterpret_cast<uint8_t*>(dst);
  while (v >= B) {
    *(ptr++) = v | B;
    v >>= 7;
  }
  *(ptr++) = static_cast<uint8_t>(v);
  return reinterpret_cast<char*>(ptr);
}

void PutVarint64(std::string* dst, uint64_t v) {
    //同上
  char buf[10];
  char* ptr = EncodeVarint64(buf, v);
  dst->append(buf, ptr - buf);
}

void PutLengthPrefixedSlice(std::string* dst, const Slice& value) {
    //结果就是length+data
    //先将slice的长度本身编码dst中
  PutVarint32(dst, value.size());
    //然后将slice的数据添加到dst后面
  dst->append(value.data(), value.size());
}

    //返回按照长度7编码后的字节数
int VarintLength(uint64_t v) {
  int len = 1;
  while (v >= 128) {
    v >>= 7;
    len++;
  }
  return len;
}

const char* GetVarint32PtrFallback(const char* p, const char* limit,
                                   uint32_t* value) {
  uint32_t result = 0;
    //最多就4个字节吗?并且不能超出limit
    //每个字节只用7位保存数据
  for (uint32_t shift = 0; shift <= 28 && p < limit; shift += 7) {
    uint32_t byte = *(reinterpret_cast<const uint8_t*>(p));
    p++;
    if (byte & 128) {
        //第8位为1,数据没有完
      // More bytes are present
      result |= ((byte & 127) << shift);
    } else {
        //第8位为0,数据已经完了
      result |= (byte << shift);
      *value = result;
      return reinterpret_cast<const char*>(p);
    }
  }
  return nullptr;
}

    //将Slice中的数据解码为uint32_t后保存到value中
bool GetVarint32(Slice* input, uint32_t* value) {
  const char* p = input->data();
    //获取指针位置上限
  const char* limit = p + input->size();
  const char* q = GetVarint32Ptr(p, limit, value);
  if (q == nullptr) {
    return false;
  } else {
      //还会构建一个新的slice,也就是解码之后slice剩下的数据
      //因为q指向的本来就是剩下数据开始的地方
    *input = Slice(q, limit - q);
    return true;
  }
}

const char* GetVarint64Ptr(const char* p, const char* limit, uint64_t* value) {
  uint64_t result = 0;
  for (uint32_t shift = 0; shift <= 63 && p < limit; shift += 7) {
      //其实和前面一样,每次7位,用第8位判断数据是否处理完毕
    uint64_t byte = *(reinterpret_cast<const uint8_t*>(p));
    p++;
    if (byte & 128) {
        //没完毕
      // More bytes are present
      result |= ((byte & 127) << shift);
    } else {
        //完了
      result |= (byte << shift);
      *value = result;
        //返回结束后的下一个字节的位置
      return reinterpret_cast<const char*>(p);
    }
  }
  return nullptr;
}

    //和32位版本函数差不多
bool GetVarint64(Slice* input, uint64_t* value) {
  const char* p = input->data();
  const char* limit = p + input->size();
  const char* q = GetVarint64Ptr(p, limit, value);
  if (q == nullptr) {
    return false;
  } else {
    *input = Slice(q, limit - q);
    return true;
  }
}

const char* GetLengthPrefixedSlice(const char* p, const char* limit,
                                   Slice* result) {
  uint32_t len;
    //将读取的数据保存到len中,其实就是可能就是之前保存的长度吧,因为有一个将slice保存到这里面的
  p = GetVarint32Ptr(p, limit, &len);
  if (p == nullptr) return nullptr;
    //超出限制
  if (p + len > limit) return nullptr;
    //新的slice
  *result = Slice(p, len);
  return p + len;
}

bool GetLengthPrefixedSlice(Slice* input, Slice* result) {
  uint32_t len;
    //首先读取第一个长度为4字节的uint32_t,他的值的大小是后续slice数据的长度
    //因为之前编码的时候,是将slice的长度存到前4个字节,后面存储真正的数据
    //此时,input就指向了真正数据开始的地方
  if (GetVarint32(input, &len) && input->size() >= len) {
      //指向重新构造的新的slice,其实就是原始的数据(上一步获得的)
    *result = Slice(input->data(), len);
      //从input中移除被读出的slice的数据
    input->remove_prefix(len);
    return true;
  } else {
    return false;
  }
}

}  // namespace leveldb

LevelDB的编码coding

上一篇:mysql问题汇总含解决方案


下一篇:基于proxysql实现的读写分离案例