参考自https://blog.csdn.net/just_a_new_life/article/details/80895433
通过对程序的测试,发现此词法分析器处在不足之处,对无符号数不能够识别。为此,对无符号状态转换图进行了研究,通过状态转换图构建出初步的程序。
无符号状态转换图
1 #include <iostream> 2 #include <cstdio> 3 #include <cstring> 4 #include <cstdlib> 5 #define _KEY_WORDEND "waiting for your expanding" 6 using namespace std; 7 typedef struct //词的结构,二元组形式(单词种别,单词自身的值) 8 { 9 int typenum; //单词种别 10 char * word; 11 }WORD; 12 char input[255]; 13 char token[255] = ""; 14 int p_input; //指针 15 int p_token; 16 char ch; 17 char * rwtab[] = { "begin","if","then","while","do","end","int","main", 18 "else","float","double","return","cout",_KEY_WORDEND }; 19 20 WORD * scanner();//扫描 21 22 int main() 23 { 24 int over = 1; 25 WORD* oneword = new WORD; 26 27 //实现从文件读取代码段 28 cout << "read something from data.txt" << endl; 29 FILE *fp; 30 if((fp=freopen("data.txt","r",stdin))==NULL) 31 { 32 printf("Not found file!\n"); 33 return 0; 34 } 35 else 36 { 37 while ((scanf("%[^#]s", &input)) != EOF) 38 { 39 p_input = 0; 40 printf("your words:\n%s\n", input); 41 while (over < 1000 && over != -1) 42 { 43 oneword = scanner(); 44 if (oneword->typenum < 1000) 45 { 46 if(oneword->typenum != 999) 47 cout << "<"<< oneword->word <<","<< oneword->typenum <<">"<< endl; 48 } 49 over = oneword->typenum; 50 } 51 scanf("%[^#]s", input); 52 } 53 } 54 return 0; 55 } 56 57 58 59 //回退一个字符 60 void retract() 61 { 62 p_input--; 63 } 64 65 //从输入缓冲区读取一个字符到ch中 66 char m_getch() 67 { 68 ch = input[p_input]; 69 p_input++; 70 return ch; 71 } 72 73 //拼接单词 74 void concat() 75 { 76 token[p_token] = ch; 77 p_token++; 78 token[p_token] = '\0'; 79 } 80 81 //检索关键字表格 82 int reserve() 83 { 84 int i = 0; 85 while(strcmp(rwtab[i], _KEY_WORDEND)) 86 { 87 if (!strcmp(rwtab[i], token)) 88 return i + 1; 89 i++; 90 } 91 return 10;//如果不是关键字,则返回种别码10 92 } 93 94 int cm(){ 95 concat(); 96 m_getch(); 97 } 98 99 //判断是否数字 100 int (digit()) 101 { 102 if (ch >= '0'&&ch <= '9') 103 return 1; 104 else 105 return 0; 106 } 107 108 //匹配e字符 109 int (E()){ 110 if (ch == 'e'||ch == 'E') 111 return 1; 112 else 113 return 0; 114 } 115 116 //匹配+/-字符 117 int (as()){ 118 if(ch == '-'||ch =='+') 119 return 1; 120 else 121 return 0; 122 } 123 124 //匹配小数点 125 int dot(){ 126 if(ch == '.') 127 return 1; 128 else 129 return 0; 130 } 131 132 void getbc() 133 { 134 while (ch == ' ' || ch == 10) 135 { 136 ch = input[p_input]; 137 p_input++; 138 } 139 } 140 141 142 //1->4 143 //3->4 144 //后面的匹配是一样的,创建一个函数复用 145 146 //4->R 147 WORD * s4(WORD * myword){ 148 cm(); 149 if (digit()){ 150 cm(); 151 if (digit()){ 152 while((digit())) cm(); 153 retract(); //回退一个字符 154 myword->typenum = reserve();//判断是否为关键字,返回种别码 155 myword->word = token; 156 return myword; 157 } 158 else{//other 159 retract(); //回退一个字符 160 myword->typenum = reserve();//判断是否为关键字,返回种别码 161 myword->word = token; 162 return myword; 163 } 164 } 165 else if (as()){ 166 cm(); 167 if (digit()){ 168 cm(); 169 while(1){ 170 if (digit()){ 171 cm(); 172 }else{//other 173 retract(); //回退一个字符 174 myword->typenum = reserve();//判断是否为关键字,返回种别码 175 myword->word = token; 176 return myword; 177 } 178 } 179 } 180 } 181 } 182 183 184 185 WORD * scanner(){ 186 WORD * myword = new WORD; 187 myword->typenum = 10; //初始值 188 myword->word = ""; 189 p_token = 0; //单词缓冲区指针 190 m_getch(); 191 getbc();//去掉空白 192 193 if (digit()){ 194 cm(); 195 while(1){//进行环循环匹配 196 if (digit()){ 197 cm(); 198 } 199 else if (E()){ 200 return s4(myword); 201 } 202 else if (dot()){ 203 cm(); 204 if (digit()){ 205 cm(); 206 while(1){//进行环循环匹配 207 if (digit()){ 208 cm(); 209 } 210 if (E()){ 211 return s4(myword); 212 }else{//other 213 retract(); //回退一个字符 214 myword->typenum = reserve();//判断是否为关键字,返回种别码 215 myword->word = token; 216 return myword; 217 } 218 } 219 } 220 }else{//other 221 retract(); //回退一个字符 222 myword->typenum = reserve();//判断是否为关键字,返回种别码 223 myword->word = token; 224 return myword; 225 } 226 } 227 } 228 }
在编写程序中出现一个小问题,有关环的问题,到底在哪里进行循环,详见代码注释处。
太久没写了,语言可能有点表达不清,以后多更新一下。