由于老师要求,最近在做oceanbase存储过程的实现,在oceanbase 0.4以前是不支持存储过程的。实现的主要步骤主要包括
1、语法解析
2、词法解析
3、具体执行语法树的步骤
现在先来说说语法解析吧,在这一块主要是使用的flex( 词法分析器生成工具) 和bison(语法分析器生成器) 这两个是对用户输入的存储过程语句进行解析的
来具体说说该怎么实现对sql语句的分析吧
1、首先建立一个lex的文件
%option noyywrap nodefault yylineno case-insensitive %{ #include "prosql.tab.hpp" #include <stdarg.h> #include <string.h> #include <stdlib.h> #include <stdio.h> #include <malloc.h> //YYSTYPE yylval; int oldstate; extern "C" int yylex(); //extern "C" int yyparse(); extern "C" void yyerror(const char *s, ...); extern char globalInputText[10000]; extern int readInputForLexer( char *buffer, int *numBytesRead, int maxBytesToRead ); #undef YY_INPUT #define YY_INPUT(b,r,s) readInputForLexer(b,&r,s) %} %x COMMENT %% CREATE { return CREATE; } PROCEDURE { return PROCEDURE; } SQL { return SQL; } DECLARE { return DECLARE; } SET { return SET; } BEGIN { return BEGINT; } END { return END; } INT { return INT; } VARCHAR { return VARCHAR; } DATE { return DATE; } TIME { return TIME; } DOUBLE { return DOUBLE; } IF { return IF; } THEN { return THEN; } ELSE { return ELSE; } ENDIF { return ENDIF; } FOR { return FOR; } WHEN { return WHEN; } WHILE { return WHILE; } [0-9]+ { yylval.strval = strdup(yytext);/*printf("number=%s\n",yylval.strval);*/ return INTNUM; }/*number*/ [0-9]+"."[0-9]* | "."[0-9]+ | [0-9]+E[-+]?[0-9]+ | [0-9]+"."[0-9]*E[-+]?[0-9]+ | "."[0-9]*E[-+]?[0-9]+ { yylval.strval = strdup(yytext);/*printf("float=%s\n",yylval.strval);*/ return APPROXNUM; }/*double*/ TRUE { yylval.strval = "1";/*printf("bool=%s\n",yylval.strval);*/ return BOOL; }/*bool*/ FALSE { yylval.strval = "0";/*printf("bool=%s\n",yylval.strval);*/ return BOOL; }/*bool*/ '(\\.|''|[^'\n])*' | \"(\\.|\"\"|[^"\n])*\" { char *temp = strdup(yytext); yylval.strval = strdup(yytext); //GetCorrectString(yylval.strval, temp); /*printf("string=%s\n",yylval.strval);*/ return STRING; }/*string*/ '(\\.|[^'\n])*$ { yyerror("Unterminated string %s", yytext); } \"(\\.|[^"\n])*$ { yyerror("Unterminated string %s", yytext); } X'[0-9A-F]+' | 0X[0-9A-F]+ { yylval.strval = strdup(yytext); return STRING; } 0B[01]+ | B'[01]+' { yylval.strval = strdup(yytext); return STRING; } [-+&~|^/%*(),.;!] { return yytext[0]; } "&&" { return ANDOP; } "||" { return OR; } "<" { yylval.subtok = 1; return COMPARISON; } ">" { yylval.subtok = 2; return COMPARISON; } "!=" | "<>" { yylval.subtok = 3; return COMPARISON; } "=" { yylval.subtok = 4; return COMPARISON; } "<=" { yylval.subtok = 5; return COMPARISON; } ">=" { yylval.subtok = 6; return COMPARISON; } "<=>" { yylval.subtok = 12; return COMPARISON; } "<<" { yylval.subtok = 1; return SHIFT; } ">>" { yylval.subtok = 2; return SHIFT; } [A-Za-z][A-Za-z0-9_]* { yylval.strval = strdup(yytext); /*printf("name 1=%s\n",yylval.strval);*/ return NAME; } `[^`/\\.\n]+` { yylval.strval = strdup(yytext+1); /*printf("name 2=%s\n",yylval.strval);*/ yylval.strval[yyleng-2] = 0; return NAME; } `[^`\n]*$ { yyerror("unterminated quoted name %s", yytext); } @[0-9a-z_.$]+ | @\"[^"\n]+\" | @`[^`\n]+` | @'[^'\n]+' { yylval.strval = strdup(yytext+1); return USERVAR; } @\"[^"\n]*$ { yyerror("unterminated quoted user variable %s", yytext); } @`[^`\n]*$ { yyerror("unterminated quoted user variable %s", yytext); } @'[^'\n]*$ { yyerror("unterminated quoted user variable %s", yytext); } ":=" { return ASSIGN; } #.* ; "--"[ \t].* ; "/*" { oldstate = YY_START; BEGIN COMMENT; } <COMMENT>"*/" { BEGIN oldstate; } <COMMENT>.|\n ; <COMMENT><<EOF>> { yyerror("unclosed comment"); } [ \t\n] /* white space */ . { yyerror("mystery character '%c'", *yytext); } %%
这一部分呢就是对 每个我们自定义的满足正则的识别
接下来是对词的语法识别
%{ #include <stdlib.h> #include <stdarg.h> #include <string.h> #include <stdio.h> #include <malloc.h> char * parsetreeroot=NULL; extern "C" int yylex(); extern "C" int yyparse(); extern "C" void yyerror(const char *s, ...); char globalInputText[10000]; int globalReadOffset; int readInputForLexer( char *buffer, int *numBytesRead, int maxBytesToRead ); char * mystrcat(char *s1,char *s2) { char *p1=(char *)malloc(strlen(s1)+strlen(s2)+1); strcpy(p1,s1); strcat(p1,s2); return p1; } %} %locations %union { int intval; double floatval; char *strval; int subtok; } %token <strval> NAME %token <strval> STRING %token <strval> INTNUM %token <strval> BOOL %token <strval> APPROXNUM %token <strval> USERVAR %type <strval> stmt_root create_stmt para_list definition data_type pro_block pro_parameters declare_list set_list %type <strval> assign_var pro_body pro_stmt_list sql_stmt expr %right ASSIGN %left OR %left XOR %left ANDOP %left NOT '!' %left BETWEEN %left <subtok> COMPARISON /* = <> < > <= >= <=> */ %left '|' %left '&' %left <subtok> SHIFT /* << >> */ %left '+' '-' %left '*' '/' '%' MOD %left '^' %token CREATE %token PROCEDURE %token PRONAME %token DECLARE %token SET %token BEGINT %token END %token SQL %token INT %token VARCHAR %token DATE %token TIME %token DOUBLE %token IF %token NOT %token EXISTS %token THEN %token ELSE %token ENDIF %token FOR %token WHEN %token WHILE %start stmt_root %% stmt_root: create_stmt pro_block { $$=mystrcat($1,$2); parsetreeroot=$$;} ; create_stmt: CREATE PROCEDURE NAME '(' para_list ')' { char *temp=mystrcat("create procedure ",$3); temp=mystrcat(temp,"("); temp=mystrcat(temp,$5); $$=mystrcat(temp,")(create)\n"); } ; /* opt_if_not_exists: { $$ = 0; } | IF NOT EXISTS { $$ = 1; } ; */ para_list: definition { $$=$1; } |definition ',' para_list { char *temp=mystrcat($1,","); $$=mystrcat(temp,$3); } ; definition: USERVAR data_type { char *temp=mystrcat($1," "); $$=mystrcat(temp,$2); } ; data_type: DATE {$$="date"; } | TIME {$$="time"; } | VARCHAR '(' INTNUM ')' {$$="varchar"; } | INT {$$="int"; } | DOUBLE {$$="double"; } ; pro_block: BEGINT pro_parameters pro_body END { char *temp=mystrcat("begin\n",$2); temp=mystrcat(temp,""); temp=mystrcat(temp,$3); $$=mystrcat(temp,"end"); //printf("pro_body %s\n",$3); } ; pro_parameters: declare_list ';' { $$=mystrcat($1,";(declare)\n");} |pro_parameters declare_list ';' { char *temp=mystrcat($1,$2); $$=mystrcat(temp,";(declare)\n"); } |pro_parameters set_list ';' { char *temp=mystrcat($1,$2); $$=mystrcat(temp,";(set)\n"); } ; declare_list: |DECLARE definition { $$=mystrcat("declare ",$2); } |declare_list ',' definition { char *temp=mystrcat($1,","); $$=mystrcat(temp,$3); } ; set_list: |SET assign_var { $$=mystrcat("set ",$2); } | set_list ',' assign_var { char *temp=mystrcat($1,","); $$=mystrcat(temp,$3); } ; assign_var : USERVAR COMPARISON expr { char *temp=mystrcat($1,"="); $$=mystrcat(temp,$3); } ; expr: NAME { $$=$1;} | STRING { $$=$1;} | INTNUM { $$=$1;} | APPROXNUM { $$=$1;} | BOOL { $$=$1;} ; pro_body : pro_stmt_list { $$=$1; } ; pro_stmt_list: sql_stmt {$$=$1; } |pro_stmt_list sql_stmt { $$=mystrcat($1,$2); } ; sql_stmt: |SQL NAME ';' { $$=mystrcat($2,";(sql)\n");} ; %% /* int main(int argc, char* argv[]) { yyparse(); }*/ int readInputForLexer( char *buffer, int *numBytesRead, int maxBytesToRead ) { int numBytesToRead = maxBytesToRead; int bytesRemaining = strlen(globalInputText)-globalReadOffset; int i; if ( numBytesToRead > bytesRemaining ) { numBytesToRead = bytesRemaining; } for ( i = 0; i < numBytesToRead; i++ ) { buffer[i] = globalInputText[globalReadOffset+i]; } *numBytesRead = numBytesToRead; globalReadOffset += numBytesToRead; return 0; } void yyerror(const char *s, ...) { fprintf(stderr, "error: %s\n", s); } void zzerror(const char *s, ...) { extern int yylineno; va_list ap; va_start(ap, s); fprintf(stderr, "%d: error: ", yylineno); vfprintf(stderr, s, ap); fprintf(stderr, "\n"); } int yywrap(void) { return 1; } char* getsql() { return parsetreeroot; }
这部分就是对上一个识别出来的词 进行顺序上的确定,构成一个完整的语法
这些需要在linux环境下进行调试
bison -d 文件名
flex 文件名