最近上《编译原理》这门课程,老师要求我们自己写一个词法分析程序。要求如下:
状态图
首先我们将文法转化为状态图~
符号表
然后我们建立好符号表~
类别编码 | 单词名称 | 类别编码 | 单词名称 | 类别编码 | 单词名称 |
---|---|---|---|---|---|
1 | const | 11 | end | 20 | > |
2 | var | 12 | read | 21 | := |
3 | procedure | 13 | write | 22 | + |
4 | odd | 14 | 变量名 | 23 | - |
5 | if | 15 | 数字 | 24 | * |
6 | then | 16 | <> | 25 | / |
7 | while | 17 | <= | 26 | ( |
8 | do | 18 | < | 27 | ) |
9 | call | 19 | >= | 28 | ; |
10 | begin |
代码
#include <iostream>
#include <fstream>
#include <string>
#include <cstring>
#include <cstdio>
#include <utility>
#include <vector>
using namespace std;
class LexicalAnalysis{
static const vector<string> rwtab;
private:
ifstream in;
ofstream out;
string buffer;
vector<pair<int, string>> words;
void readFile(){
string tmp;
while (! in.eof()){
getline(in, tmp);
buffer.append(tmp);
}
}
public:
LexicalAnalysis(){
in.open("test.txt");
readFile();
}
LexicalAnalysis(string address){
in.open(address);
readFile();
}
bool Scanner(){
int len = buffer.length();
int p = 0;
char ch;
while (p < len){
int syn = 0;
string token;
ch = buffer[p++];
while (p < len && ch == ' '){//过滤掉空格
ch = buffer[p++];
}
if ('a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z'){ //以字母开头,可能为标识符或变量名
while ((p < len) && ('a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || '0' <= ch && ch <= '9')){
token += ch; ch = buffer[p++];
}
p--;
syn = 14;
for (int i=0;i<13;i++){
if (rwtab[i] == token){
syn = i+1;
}
}
}
else if ('0' <= ch && ch <= '9'){//数字
while (p < len && '0' <= ch && ch <= '9'){
token += ch; ch = buffer[p++];
}
syn = 15;
}
else switch (ch){//其他字符
case '<':
token += ch;
ch = buffer[p++];
if (ch == '>'){
token += ch; syn = 16;
}
else if (ch == '='){
token += ch; syn = 17;
}
else{
syn = 18; p--;
}
break;
case '>':
token += ch;
ch = buffer[p++];
if (ch == '='){
token += ch; syn = 19;
}
else{
p--; syn = 20;
}
break;
case ':':
token += ch;
ch = buffer[p++];
if (ch == '='){
token += ch; syn = 21;
}else{
return false;
}
break;
case '+': token += ch; syn = 22; break;
case '-': token += ch; syn = 23; break;
case '*': token += ch; syn = 24; break;
case '/': token += ch; syn = 25; break;
case '(': token += ch; syn = 26; break;
case ')': token += ch; syn = 27; break;
case ';': token += ch; syn = 28; break;
default:
syn = -1;
break;
}
if (syn > 0){
words.push_back(make_pair(syn, token));
}
}
}
void Print(){
for (int i = 0; i < words.size(); i++){
std::cout << '(' << words[i].first << ',' << words[i].second << ')' << std::endl;
}
}
};
const vector<string> LexicalAnalysis::rwtab = {
"const", "var", "procedure", "odd", "if", "then", "while", "do", "call", "begin", "end", "read", "write"
};
int main(){
LexicalAnalysis test;
test.Scanner();
test.Print();
}