Atitit 发帖机系列(8) 词法分析器v5 版本新特性说明)
v5 增加对sql单引号的内部支持。可以作为string
结构调整,使用递归法重构循环发。。放弃循环发。
V4 java dsl词法分析 使用循环
V3 sql的词法分析
atitit..sql update语法的词法分析,与语法ast构建 -
V1版 anno 解析器 基于fsm设计模式
V2
Atitit.antlr实现词法分析 - attilax
/AtiPlatf_ee/src/com/attilax/fsm/JavaTokenScannerV2.java
package com.attilax.fsm;
import java.util.Collection;
import java.util.List;
import com.attilax.io.filex;
import com.attilax.io.pathx;
import com.attilax.json.AtiJson;
import com.google.common.collect.Lists;
public class JavaTokenScannerV2 {
public static void main(String[] args) {
String f = pathx.prjPath_webrootMode() + "/tokenT/a.txt";
String s = "meth(\\\"select from table where a='bb' \",'str2',\'s3\') ";
s = "meth(\"select from tab where a='abc'\",'str2','s3')";
// s = filex.read(f);
List<Token> li = new JavaTokenScannerV2().getTokens(s);
System.out.println(AtiJson.toJson(li));
}
List<Token> tokens = Lists.newArrayList();
String curTokenTxt = "";
String splitors = "(),";
String curStat = "ini";
String curDbquoStat = "ini";
private String code;
public char[] code_char_arr;
public char cur_char;
int gColumn = -1;
@SuppressWarnings("unchecked")
public List<Token> getTokens(String codeStr) {
List<Token> li = Lists.newArrayList();
code_char_arr = codeStr.toCharArray();
while (true) {
Object tk;
try {
tk = nextTokens();
} catch (TokenEndEx e) {
break;
}
if (tk instanceof Token)
li.add((Token) tk);
else if (tk instanceof List)
li.addAll((Collection<? extends Token>) tk);
else
throw new RuntimeException("token type err,curchar:" + this.cur_char + ",colidx:" + this.gColumn);
}
return li;
}
public Object nextTokens() throws TokenEndEx {
// code_char_arr = code.toCharArray();
gColumn++;
if (gColumn > code_char_arr.length - 1)
throw new TokenEndEx(new String(code_char_arr));
cur_char = code_char_arr[gColumn];
// cur_char=cur_char;
if (this.curTokenTxt.equals("1598"))
System.out.println("dbg");
if (this.gColumn == 30)
System.out.println("dbg");
// get next char,,then change stat
// jude cur char and cur stat...then if or not chage stat
switch (cur_char) {
case '(':
return BrkStartEvt();
// break;
case ')':
return brkEndEvt();
case '\'':
return sQuoEvt();
case '\"':
return dbQuoEvt();
case ':':
return colonEvt();
case ',':
return commaEvt();
default:
return normalCharEvt();
// break;
}
}
private Object normalCharEvt() throws TokenEndEx {
// normal char
curTokenTxt = curTokenTxt + String.valueOf(cur_char);
// gColumn += 1;
return nextTokens();
}
private Object commaEvt() throws TokenEndEx {
List<Token> tokens = Lists.newArrayList();
if (cur_char == ',' && !this.curStat.equals("squoStart") && !this.curStat.equals("dbquoStart")) {
if (this.curTokenTxt.trim().length() > 0) {
String curTokenTye = "con";
if (this.curTokenTxt.startsWith(":"))
curTokenTye = "op";
else if (this.curStat.equals("colon"))
curTokenTye = "op";
Token tk4 = new Token(this.curTokenTxt).setType(curTokenTye);
tokens.add(tk4);
return tokens;
}
Token tk3 = new Token(String.valueOf(cur_char)).setType("spltr");
tokens.add(tk3);
this.curTokenTxt = "";
this.curStat = "commaStat";
return tk3;
}
return normalCharEvt();
}
private Object colonEvt() {
List<Token> tokens = Lists.newArrayList();
if (cur_char == ':' && !this.curStat.equals("strStart")) {
if (this.curTokenTxt.trim().length() > 0) {
String curTokenTye = "con";
Token tk4 = new Token(this.curTokenTxt).setType(curTokenTye);
tokens.add(tk4);
}
Token tk3 = new Token(String.valueOf(cur_char)).setType("op");
tokens.add(tk3);
this.curTokenTxt = "";
this.curStat = "colon";
return tokens;
}
return tokens;
}
private Object dbQuoEvt() throws TokenEndEx {
// ---------------str type
// first dbquo
if ((cur_char == '\"') && (this.curDbquoStat != "dbquoStart")) //
{
this.curStat = "dbquoStart";
this.curDbquoStat = "dbquoStart";
this.curTokenTxt = "";
return nextTokens();
}
// for close dbquo
if ((cur_char == '\"') && this.curDbquoStat.equals("dbquoStart")) {
Token tk3 = new Token(this.curTokenTxt).setType("str");
tokens.add(tk3);
this.curTokenTxt = "";
this.curStat = "dbquoEnd";
this.curDbquoStat = "dbquoEnd";
return tk3;
}
return normalCharEvt();
}
private Object sQuoEvt() throws TokenEndEx {
char c = this.cur_char;
// for in dbquo ..single quo ..none process as normal char
// first single quoe
if (c == '\'' && (this.curStat != "dbquoStart") && this.curStat != "squoStart") //
{
this.curStat = "squoStart";
this.curTokenTxt = "";
return nextTokens();
}
// for close single quoue
if ((c == '\'') && this.curStat.equals("squoStart")) {
Token tk3 = new Token(this.curTokenTxt).setType("str");
tokens.add(tk3);
this.curTokenTxt = "";
this.curStat = "squoEnd";
return tk3;
}
return normalCharEvt();
}
private Object brkEndEvt() {
char c = this.cur_char;
if (c == ')' && !this.curStat.equals("strStart")) { // && cur stat
// =brk start
if (this.curTokenTxt.length() > 0) // jeig cant smp... last end brk
// is impt..if smp continue
// ,then cant add end brk to
// token
{
String type = gettype_4curCharIsBrkend(this.curTokenTxt, this.curStat);
Token tk3 = new Token(this.curTokenTxt).setType(type);
tokens.add(tk3);
}
Token tk2 = new Token(")").setType("op");
tokens.add(tk2);
this.curTokenTxt = "";
this.curStat = "brkEnd";
return tk2;
}
return c;
}
}
作者:: 绰号:老哇的爪子 ( 全名::Attilax Akbar Al Rapanui 阿提拉克斯 阿克巴 阿尔 拉帕努伊 )
汉字名:艾提拉(艾龙), EMAIL:1466519819@qq.com
转载请注明来源: http://www.cnblogs.com/attilax/
--Atiend