LevelDB的编码coding
util/coding.h
#include "leveldb/slice.h"
#include "port/port.h"
namespace leveldb {
// Standard Put... routines append to a string
//将uint32_t类型的值转化为字符串保存到string里面
void PutFixed32(std::string* dst, uint32_t value);
//将uint64_t类型的值转化为字符串保存到string里面
void PutFixed64(std::string* dst, uint64_t value);
void PutVarint32(std::string* dst, uint32_t value);
void PutVarint64(std::string* dst, uint64_t value);
void PutLengthPrefixedSlice(std::string* dst, const Slice& value);
// Standard Get... routines parse a value from the beginning of a Slice
// and advance the slice past the parsed value.
bool GetVarint32(Slice* input, uint32_t* value);
bool GetVarint64(Slice* input, uint64_t* value);
bool GetLengthPrefixedSlice(Slice* input, Slice* result);
// Pointer-based variants of GetVarint... These either store a value
// in *v and return a pointer just past the parsed value, or return
// nullptr on error. These routines only look at bytes in the range
// [p..limit-1]
const char* GetVarint32Ptr(const char* p, const char* limit, uint32_t* v);
const char* GetVarint64Ptr(const char* p, const char* limit, uint64_t* v);
// Returns the length of the varint32 or varint64 encoding of "v"
int VarintLength(uint64_t v);
// Lower-level versions of Put... that write directly into a character buffer
// and return a pointer just past the last byte written.
// REQUIRES: dst has enough space for the value being written
char* EncodeVarint32(char* dst, uint32_t value);
char* EncodeVarint64(char* dst, uint64_t value);
// Lower-level versions of Put... that write directly into a character buffer
// REQUIRES: dst has enough space for the value being written
//将uint32的值编码为char*(字符数组)
inline void EncodeFixed32(char* dst, uint32_t value) {
//指针类型转换
uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
// Recent clang and gcc optimize this to a single mov / str instruction.
//4个字节
buffer[0] = static_cast<uint8_t>(value);
buffer[1] = static_cast<uint8_t>(value >> 8);
buffer[2] = static_cast<uint8_t>(value >> 16);
buffer[3] = static_cast<uint8_t>(value >> 24);
}
inline void EncodeFixed64(char* dst, uint64_t value) {
uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
// Recent clang and gcc optimize this to a single mov / str instruction.
//和前面一样,只不过这里8个字节
buffer[0] = static_cast<uint8_t>(value);
buffer[1] = static_cast<uint8_t>(value >> 8);
buffer[2] = static_cast<uint8_t>(value >> 16);
buffer[3] = static_cast<uint8_t>(value >> 24);
buffer[4] = static_cast<uint8_t>(value >> 32);
buffer[5] = static_cast<uint8_t>(value >> 40);
buffer[6] = static_cast<uint8_t>(value >> 48);
buffer[7] = static_cast<uint8_t>(value >> 56);
}
// Lower-level versions of Get... that read directly from a character buffer
// without any bounds checking.
//解码,将char*的字符数组解码为uint32
inline uint32_t DecodeFixed32(const char* ptr) {
//指针类型转换
const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
// Recent clang and gcc optimize this to a single mov / ldr instruction.
//4个字节,然后或起来,就是32个字节
return (static_cast<uint32_t>(buffer[0])) |
(static_cast<uint32_t>(buffer[1]) << 8) |
(static_cast<uint32_t>(buffer[2]) << 16) |
(static_cast<uint32_t>(buffer[3]) << 24);
}
inline uint64_t DecodeFixed64(const char* ptr) {
const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
// Recent clang and gcc optimize this to a single mov / ldr instruction.
//同上,这里是64个字节
return (static_cast<uint64_t>(buffer[0])) |
(static_cast<uint64_t>(buffer[1]) << 8) |
(static_cast<uint64_t>(buffer[2]) << 16) |
(static_cast<uint64_t>(buffer[3]) << 24) |
(static_cast<uint64_t>(buffer[4]) << 32) |
(static_cast<uint64_t>(buffer[5]) << 40) |
(static_cast<uint64_t>(buffer[6]) << 48) |
(static_cast<uint64_t>(buffer[7]) << 56);
}
// Internal routine for use by fallback path of GetVarint32Ptr
//这个函数等会儿看
//这里编码应该是用低7为来保存数据,第8为用来标识是否数据存完,如果第8位为1,说明数据没有完,后面字节接着
//如果第8位为0,说明这是当前数据的最后一个字节
const char* GetVarint32PtrFallback(const char* p, const char* limit,
uint32_t* value);
inline const char* GetVarint32Ptr(const char* p, const char* limit,
uint32_t* value) {
if (p < limit) {
uint32_t result = *(reinterpret_cast<const uint8_t*>(p));
//如果第8位为0,说明了这是最后一个字节
if ((result & 128) == 0) {
*value = result;
return p + 1;
}
}
//如果第8位为1,说明这不是最后一个字节,所以在下面这个函数里面,需要解码多个字节,解码的结果保存到value中
//返回值是当前数据解码完后的下一个数据开始的地方
return GetVarint32PtrFallback(p, limit, value);
}
} // namespace leveldb
util/coding.cc
#include "util/coding.h"
namespace leveldb {
//将uint32_t类型的值转化为字符串保存到string里面
void PutFixed32(std::string* dst, uint32_t value) {
char buf[sizeof(value)];
//将uint32的值存入buf
EncodeFixed32(buf, value);
//将buf中的sizeof(buf)这么多个字符添加到string的后面
dst->append(buf, sizeof(buf));
}
//将uint64_t类型的值转化为字符串保存到string里面
void PutFixed64(std::string* dst, uint64_t value) {
char buf[sizeof(value)];
//将uint64的值存入buf
EncodeFixed64(buf, value);
//将buf中的sizeof(buf)这么多个字符添加到string的后面
dst->append(buf, sizeof(buf));
}
char* EncodeVarint32(char* dst, uint32_t v) {
// Operate on characters as unsigneds
//类型转换,将char*转换为uint8_t*
uint8_t* ptr = reinterpret_cast<uint8_t*>(dst);
//10000000
static const int B = 128;
//因为一个字节只有低7位真正保存数据,第8位用于判断是否保存完毕
if (v < (1 << 7)) {
//1<<7==10000000
//只有小于等于7位
*(ptr++) = v;
} else if (v < (1 << 14)) {
//小于等于14位
//将第8位置1,表示后面还有数据
//保存前7位
*(ptr++) = v | B;
//保存后7位
*(ptr++) = v >> 7;
} else if (v < (1 << 21)) {
//同上,21位
//低7位
*(ptr++) = v | B;
//中7位
*(ptr++) = (v >> 7) | B;
//高7位
*(ptr++) = v >> 14;
} else if (v < (1 << 28)) {
*(ptr++) = v | B;
*(ptr++) = (v >> 7) | B;
*(ptr++) = (v >> 14) | B;
*(ptr++) = v >> 21;
} else {
*(ptr++) = v | B;
*(ptr++) = (v >> 7) | B;
*(ptr++) = (v >> 14) | B;
*(ptr++) = (v >> 21) | B;
*(ptr++) = v >> 28;
}
//返回值很奇怪,返回的是保存的数据后面一个位置
return reinterpret_cast<char*>(ptr);
}
void PutVarint32(std::string* dst, uint32_t v) {
char buf[5];
//将uint32_t的数据保存到buf中,返回最后一个位置
char* ptr = EncodeVarint32(buf, v);
//因为返回的是下一个位置,所以减去初始位置,就是占用的字节数
dst->append(buf, ptr - buf);
}
char* EncodeVarint64(char* dst, uint64_t v) {
//同上
static const int B = 128;
uint8_t* ptr = reinterpret_cast<uint8_t*>(dst);
while (v >= B) {
*(ptr++) = v | B;
v >>= 7;
}
*(ptr++) = static_cast<uint8_t>(v);
return reinterpret_cast<char*>(ptr);
}
void PutVarint64(std::string* dst, uint64_t v) {
//同上
char buf[10];
char* ptr = EncodeVarint64(buf, v);
dst->append(buf, ptr - buf);
}
void PutLengthPrefixedSlice(std::string* dst, const Slice& value) {
//结果就是length+data
//先将slice的长度本身编码dst中
PutVarint32(dst, value.size());
//然后将slice的数据添加到dst后面
dst->append(value.data(), value.size());
}
//返回按照长度7编码后的字节数
int VarintLength(uint64_t v) {
int len = 1;
while (v >= 128) {
v >>= 7;
len++;
}
return len;
}
const char* GetVarint32PtrFallback(const char* p, const char* limit,
uint32_t* value) {
uint32_t result = 0;
//最多就4个字节吗?并且不能超出limit
//每个字节只用7位保存数据
for (uint32_t shift = 0; shift <= 28 && p < limit; shift += 7) {
uint32_t byte = *(reinterpret_cast<const uint8_t*>(p));
p++;
if (byte & 128) {
//第8位为1,数据没有完
// More bytes are present
result |= ((byte & 127) << shift);
} else {
//第8位为0,数据已经完了
result |= (byte << shift);
*value = result;
return reinterpret_cast<const char*>(p);
}
}
return nullptr;
}
//将Slice中的数据解码为uint32_t后保存到value中
bool GetVarint32(Slice* input, uint32_t* value) {
const char* p = input->data();
//获取指针位置上限
const char* limit = p + input->size();
const char* q = GetVarint32Ptr(p, limit, value);
if (q == nullptr) {
return false;
} else {
//还会构建一个新的slice,也就是解码之后slice剩下的数据
//因为q指向的本来就是剩下数据开始的地方
*input = Slice(q, limit - q);
return true;
}
}
const char* GetVarint64Ptr(const char* p, const char* limit, uint64_t* value) {
uint64_t result = 0;
for (uint32_t shift = 0; shift <= 63 && p < limit; shift += 7) {
//其实和前面一样,每次7位,用第8位判断数据是否处理完毕
uint64_t byte = *(reinterpret_cast<const uint8_t*>(p));
p++;
if (byte & 128) {
//没完毕
// More bytes are present
result |= ((byte & 127) << shift);
} else {
//完了
result |= (byte << shift);
*value = result;
//返回结束后的下一个字节的位置
return reinterpret_cast<const char*>(p);
}
}
return nullptr;
}
//和32位版本函数差不多
bool GetVarint64(Slice* input, uint64_t* value) {
const char* p = input->data();
const char* limit = p + input->size();
const char* q = GetVarint64Ptr(p, limit, value);
if (q == nullptr) {
return false;
} else {
*input = Slice(q, limit - q);
return true;
}
}
const char* GetLengthPrefixedSlice(const char* p, const char* limit,
Slice* result) {
uint32_t len;
//将读取的数据保存到len中,其实就是可能就是之前保存的长度吧,因为有一个将slice保存到这里面的
p = GetVarint32Ptr(p, limit, &len);
if (p == nullptr) return nullptr;
//超出限制
if (p + len > limit) return nullptr;
//新的slice
*result = Slice(p, len);
return p + len;
}
bool GetLengthPrefixedSlice(Slice* input, Slice* result) {
uint32_t len;
//首先读取第一个长度为4字节的uint32_t,他的值的大小是后续slice数据的长度
//因为之前编码的时候,是将slice的长度存到前4个字节,后面存储真正的数据
//此时,input就指向了真正数据开始的地方
if (GetVarint32(input, &len) && input->size() >= len) {
//指向重新构造的新的slice,其实就是原始的数据(上一步获得的)
*result = Slice(input->data(), len);
//从input中移除被读出的slice的数据
input->remove_prefix(len);
return true;
} else {
return false;
}
}
} // namespace leveldb