1.lua的zio
1.1 zio的定义
typedef const char * (*lua_Reader) (lua_State *L, void *ud, size_t *sz);
struct Zio {
size_t n; //缓存长度
const char *p; //缓存
lua_Reader reader; //读取内容的回调函数
void *data; //回调函数的参数
lua_State *L; /* Lua state (for reader) */
};
LUAI_FUNC void luaZ_init (lua_State *L, ZIO *z, lua_Reader reader,void *data);
LUAI_FUNC int luaZ_fill (ZIO *z);
#define zgetc(z) (((z)->n--)>0 ? cast_uchar(*(z)->p++) : luaZ_fill(z))
1.2 zio的初始化
void luaZ_init (lua_State *L, ZIO *z, lua_Reader reader, void *data) {
z->L = L;
z->reader = reader;
z->data = data;
z->n = 0;
z->p = NULL;
}
1.3 zio的读取
#define EOZ (-1)
int luaZ_fill (ZIO *z) {
size_t size;
lua_State *L = z->L;
const char *buff;
lua_unlock(L);
buff = z->reader(L, z->data, &size); //调用回调,buff是读取到的数据,size返回的是buff的长度
lua_lock(L);
if (buff == NULL || size == 0) //读到结尾
return EOZ; //返回-1
z->n = size - 1; //长度
z->p = buff; //缓冲区
return cast_uchar(*(z->p++)); //转成unsigned char
}
//返回一个字符,如果缓冲区数据为空则读取数据
#define zgetc(z) (((z)->n--)>0 ? cast_uchar(*(z)->p++) : luaZ_fill(z))
2.lua语法解析
2.1 EBNF词法
statlist -> { stat [';'] }
fieldsel -> ['.' | ':'] NAME
index -> '[' expr ']'
recfield -> (NAME | '['exp']') = exp
listfield -> exp
field -> listfield | recfield
constructor -> '{' [ field { sep field } [sep] ] '}'
sep -> ',' | ';'
parlist -> [ {NAME ','} (NAME | '...') ]
body -> '(' parlist ')' block END
explist -> expr { ',' expr }
funcargs -> '(' [ explist ] ')'
funcargs -> constructor
funcargs -> STRING
primaryexp -> NAME | '(' expr ')'
suffixedexp -> primaryexp { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs
simpleexp -> FLT | INT | STRING | NIL | TRUE | FALSE | ... | constructor | FUNCTION body | suffixedexp
block -> statlist
restassign -> ',' suffixedexp restassign
restassign -> '=' explist
cond -> exp
label -> '::' NAME '::'
whilestat -> WHILE cond DO block END
repeatstat -> REPEAT block UNTIL cond
forbody -> DO block
fornum -> NAME = exp,exp[,exp] forbody
forlist -> NAME {,NAME} IN explist forbody
forstat -> FOR (fornum | forlist) END
test_then_block -> [IF | ELSEIF] cond THEN block
ifstat -> IF cond THEN block {ELSEIF cond THEN block} [ELSE block] END
ATTRIB -> ['<' Name '>']
stat -> LOCAL NAME ATTRIB { ',' NAME ATTRIB } ['=' explist]
funcname -> NAME {fieldsel} [':' NAME]
funcstat -> FUNCTION funcname body
stat -> func | assignment
stat -> RETURN [explist] [';']
- []中括号包住的部分表示可选
- {}大括号包住的部分,表示会有0次或者多次出现
- 大写字母表示一个终结符
这些词法来源于lparser.c文件中的注释,一般会有独立的函数处理各个词法
2.2 lua中token定义
enum RESERVED {
/* terminal symbols denoted by reserved words */
TK_AND = FIRST_RESERVED, TK_BREAK,
TK_DO, TK_ELSE, TK_ELSEIF, TK_END, TK_FALSE, TK_FOR, TK_FUNCTION,
TK_GOTO, TK_IF, TK_IN, TK_LOCAL, TK_NIL, TK_NOT, TK_OR, TK_REPEAT,
TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE,
/* other terminal symbols */
TK_IDIV, TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE,
TK_SHL, TK_SHR,
TK_DBCOLON, TK_EOS,
TK_FLT, TK_INT, TK_NAME, TK_STRING
};
static const char *const luaX_tokens [] = {
"and", "break", "do", "else", "elseif",
"end", "false", "for", "function", "goto", "if",
"in", "local", "nil", "not", "or", "repeat",
"return", "then", "true", "until", "while",
"//", "..", "...", "==", ">=", "<=", "~=",
"<<", ">>", "::", "<eof>",
"<number>", "<integer>", "<name>", "<string>"
};
在这些token定义中while之前的token在虚拟机创建时已经加入字符串池
#define NUM_RESERVED (cast_int(TK_WHILE-FIRST_RESERVED + 1))
void luaX_init (lua_State *L) {
int i;
TString *e = luaS_newliteral(L, LUA_ENV); /* create env name */
luaC_fix(L, obj2gco(e)); /* never collect this name */
for (i=0; i<NUM_RESERVED; i++) {
TString *ts = luaS_new(L, luaX_tokens[i]);
luaC_fix(L, obj2gco(ts)); /* reserved words are never collected */
ts->extra = cast_byte(i+1); /* reserved word */
}
}
这段代码的一个细节就是ts->extra的赋值,这个之在token分割函数中被用到,用来区分关键字
2.3 token分割函数
#define next(ls) (ls->current = zgetc(ls->z)) //使用zget读取字符
#define isreserved(s) ((s)->tt == LUA_VSHRSTR && (s)->extra > 0) //关键字区分
static int llex (LexState *ls, SemInfo *seminfo) {
luaZ_resetbuffer(ls->buff);
for (;;) {
switch (ls->current) {
case '\n': case '\r': { //换行
inclinenumber(ls);
break;
}
case ' ': case '\f': case '\t': case '\v': { //空格
next(ls);
break;
}
case '-': { //可能是负数,可能是注释
next(ls);
if (ls->current != '-') return '-';
/* else is a comment */
next(ls);
if (ls->current == '[') { //多行注释
size_t sep = skip_sep(ls);
luaZ_resetbuffer(ls->buff); /* 'skip_sep' may dirty the buffer */
if (sep >= 2) {
read_long_string(ls, NULL, sep); //读到没注释的地方
luaZ_resetbuffer(ls->buff); /* previous call may dirty the buff. */
break;
}
}
/* else short comment */
while (!currIsNewline(ls) && ls->current != EOZ) //单行注释,跳出这一行
next(ls); /* skip until end of line (or end of file) */
break;
}
case '[': { /* long string or simply '[' */
size_t sep = skip_sep(ls);
if (sep >= 2) {
read_long_string(ls, seminfo, sep); //读到']'之后,把字符串保存到seminfo->ts
return TK_STRING;
}
else if (sep == 0) /* '[=...' missing second bracket? */
lexerror(ls, "invalid long string delimiter", TK_STRING);
return '[';
}
case '=': {
next(ls);
if (check_next1(ls, '=')) return TK_EQ; /* '==' */
else return '=';
}
case '<': {
next(ls);
if (check_next1(ls, '=')) return TK_LE; /* '<=' */
else if (check_next1(ls, '<')) return TK_SHL; /* '<<' */
else return '<';
}
case '>': {
next(ls);
if (check_next1(ls, '=')) return TK_GE; /* '>=' */
else if (check_next1(ls, '>')) return TK_SHR; /* '>>' */
else return '>';
}
case '/': {
next(ls);
if (check_next1(ls, '/')) return TK_IDIV; /* '//' */
else return '/';
}
case '~': {
next(ls);
if (check_next1(ls, '=')) return TK_NE; /* '~=' */
else return '~';
}
case ':': {
next(ls);
if (check_next1(ls, ':')) return TK_DBCOLON; /* '::' */
else return ':';
}
case '"': case '\'': { //字符串
read_string(ls, ls->current, seminfo); //读取字符串,保存在seminfo->ts
return TK_STRING;
}
case '.': { /* '.', '..', '...', or number */
save_and_next(ls);
if (check_next1(ls, '.')) {
if (check_next1(ls, '.'))
return TK_DOTS; /* '...' */
else return TK_CONCAT; /* '..' */
}
else if (!lisdigit(ls->current)) return '.';
else return read_numeral(ls, seminfo);
}
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9': {
return read_numeral(ls, seminfo);
}
case EOZ: { //结束
return TK_EOS;
}
default: {
if (lislalpha(ls->current)) { //标识符判断
TString *ts;
do {
save_and_next(ls);
} while (lislalnum(ls->current));
ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
luaZ_bufflen(ls->buff));
seminfo->ts = ts;
if (isreserved(ts)) //关键字
return ts->extra - 1 + FIRST_RESERVED; //ts->extra在luaX_init初始化
else {
return TK_NAME; //标识符
}
}
else { /* single-char tokens ('+', '*', '%', '{', '}', ...) */
int c = ls->current;
next(ls);
return c;
}
}
}
}
}
2.4 gdb调试脚本token输出
为了方便调试简单在源码了复制了一份代码
define plua_tokens
if $argc != 1
p "参数数目错误,{plua_tokens LexState *}"
end
printf "%s",print_tokens($arg0)
end
在gdb_print.c实现print_tokens然后封装一个调试脚本
width = 10
height = 30
str = "hello lua"
tab = {a=12,b=14,c=30}
function number_max(a,b)
if a > b then
return a
end
return b
end
gdb执行
b statlist
plua_tokens ls
ls类型是 LexState
输出结果
居然的做法可以看 https://github.com/huoyang11/read_lua/blob/main/src/gdb_print.c
2.5 gdb调试脚本code输出
plua_lscode ls
ls类型是 LexState
在lua语法解析时可以通过这个脚本看解析的指令