Java实现简单的词法分析器:
-
需求分析
-
5类符号
- 保留字(keywords):if、int、for、while、do、return、break、continue等等;单词种别码为1
- 其他的字符都为标识符(identifier);单词种别码为2
- 常数为无符号数(unsigned number);单词种别码为3
- 运算符(operator)包括:+、-、*、/、=、>、<等;单词种别码为4。
- 分隔符(separator)包括: “,”“;”“(”“)”“{”“}”等; 单词种别码为5。
分析流程
-
-
代码设计
-
目录结构
-
symbols
目录下的txt文件分别保存关键字信息(keywords.txt)、操作符信息(operators.txt)、分隔符信息(separators.txt)。
Util
包中分别为文件读取工具类(FileReadUtil)和词法分析工具类(LexerUtil)
symbols
包中为符号类
lexer
包中的类和Main
结合用来控制界面显示,界面使用JavaFx实现。
-
符号类设计
所有的符号类都继承自抽象类
Token
,Token
类中带参构造函数用来初始化,读取保存在txt文件中的保留字,标识符,分隔符信息并保存在List中。KeyWord
类、Operator
类、Separator
类重写这个带参构造函数来读取符号并调用Token
的getList()
方法将读取的的符号保存到它们的内部的静态List中。Identifier
类、UnsignedNumber
类不重写构造函数,即他们只有默认的无参构造函数。
这部分只给出Token
、KeyWord
和Identifier
的代码实现。
完整代码在我的Github: https://github.com/LiuJinxuan/Lexer.git
Token类
package com.company.symbols;
import com.company.utils.FileReadUtil;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* Created by Ljx on 2017/5/6.
*/
public abstract class Token {
private static List<String> tokens = new ArrayList<>();
public Token() {
}
public Token(String fileName) {
try {
tokens = FileReadUtil.ReadSymbols(fileName);
} catch (FileNotFoundException e) {
System.out.println(fileName + " NOT FOUND!");
} catch (IOException e) {
e.printStackTrace();
}
}
protected List<String> getTokens() {
return tokens;
}
//返回识别码TAG
public abstract int getTAG();
//返回具体的类型
public abstract String getDetail();
}
KeyWord类
package com.company.symbols;
import java.util.List;
/**
* Created by Ljx on 2017/5/5.
*/
public class KeyWord extends Token {
private static List<String> keyWords;
private static int TAG = 1;
public KeyWord(String fileName) {
super(fileName);
keyWords = super.getTokens();
}
public List<String> getKeywordList() {
return keyWords;
}
@Override
public int getTAG() {
return TAG;
}
@Override
public String getDetail() {
return "保留字";
}
}
Identifier类
package com.company.symbols;
/**
* Created by Ljx on 2017/5/5.
*/
public class Identifier extends Token {
private static int TAG = 2;
@Override
public int getTAG() {
return TAG;
}
@Override
public String getDetail() {
return "标识符";
}
}
-
工具类
-
文件读取工具类FileReadUtil
-
package com.company.utils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* Created by Ljx on 2017/5/5.
*/
public class FileReadUtil {
//一行行读取文件,将每一行存到到List中
public static List<String> ReadSymbols(String fileName) throws IOException {
BufferedReader in = new BufferedReader(new FileReader(fileName));
String s;
List<String> list = new ArrayList<>();
while ((s = in.readLine()) != null) list.add(s);
in.close();
return list;
}
//得到添加行号后的代码,用于显示在界面上
public static String GetCode(File file) throws IOException {
int currentLine = 1;
BufferedReader in = new BufferedReader(new FileReader(file));
String s;
StringBuilder sb = new StringBuilder();
while ((s = in.readLine()) != null) {
sb.append(currentLine < 10 ? currentLine + " | " : currentLine + " | "); //对齐行号
sb.append(s);
sb.append("\n");
currentLine++;
}
in.close();
return sb.toString();
}
}
-
词法分析工具类LexerUtil
package com.company.utils;
import com.company.lexer.LexResult;
import com.company.symbols.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* Created by Ljx on 2017/5/5.
*
* 词法分析工具类
*
*/
public class LexerUtil {
private final static String KEYWORD_PATH = "symbols\\keywords.txt";
private final static String OPERATOR_PATH = "symbols\\operators.txt";
private final static String SEPARATOR_PATH = "symbols\\separators.txt";
private final static KeyWord keyWord = new KeyWord(KEYWORD_PATH);
private final static Operator operator = new Operator(OPERATOR_PATH);
private final static Separator separator = new Separator(SEPARATOR_PATH);
private final static UnsignedNumber unsignedNumber = new UnsignedNumber();
private final static Identifier identifier = new Identifier();
public static List<LexResult> LexicalAnalysis(File file) {
int currentLine = 1;//用于保存该字符的行号
List<LexResult> lexResultList = new ArrayList<>();
try {
BufferedReader in = new BufferedReader(new FileReader(file));
String s;
List<String> list;
while ((s = in.readLine()) != null) {
list = division(s); //分割字符
for (String symbol : list) {
//用于判断当前字符属于哪种符号
Token currentType;
if (isKeyWord(symbol)) currentType = keyWord;
else if (isOperator(symbol)) currentType = operator;
else if (isSeparator(symbol)) currentType = separator;
//若不是关键字,操作符,分隔符,将该字符强转为Integer类型
//抛出异常,表明该字符为标识符
//不抛出异常,表面该字符为无符号数
else {
try {
Integer.valueOf(symbol);
currentType = unsignedNumber;
} catch (NumberFormatException e) {
currentType = identifier;
}
}
LexResult lexResult = new LexResult(String.valueOf(currentLine),
symbol, String.valueOf(currentType.getTAG()), currentType.getDetail());
lexResultList.add(lexResult);
}
currentLine++;
}
in.close();
} catch (IOException e) {
e.printStackTrace();
}
return lexResultList;
}
/**
* 1.将字符串转化为字符数组,定义一个StringBuilder对象用来保存符号
* 2.遍历字符数组,进行如下操作:
* if(字符 == 操作符 || 分隔符 || 空格) --> 从该字符分割
* if(StringBuilder长度不为0)-->将StringBuilder取出所有空格后存入List
* if(字符 == 空格) --> 将空格存入List
* 清空StringBuilder;
* continue;
* else --> StringBuilder后添加该字符
*
* @param s 一行语句
* @return 一行行中分割后的字符列表
*/
private static List<String> division(String s) {
char[] chars = s.trim().toCharArray(); //去除首尾空格并转化为字符数组
List<String> list = new ArrayList<>(); //保存组合出的单词和字符
StringBuilder sb = new StringBuilder();
for (int i = 0; i < chars.length; i++) {
if (isOperator(String.valueOf(chars[i]))
|| isSeparator(String.valueOf(chars[i]))
|| chars[i] == ' ') {
if (sb.length() != 0) list.add(sb.toString().replaceAll(" ", ""));
if (chars[i] != ' ') list.add(String.valueOf(chars[i]));
sb.delete(0, sb.length()); //清空StringBuilder
continue;
}
sb.append(chars[i]);
}
return list;
}
private static boolean isKeyWord(String s) {
return keyWord.getKeywordList().contains(s);
}
private static boolean isOperator(String s) {
return operator.getOperatorList().contains(s);
}
private static boolean isSeparator(String s) {
return separator.getSeparatorList().contains(s);
}
}
-
词法分析结果类LexResult
package com.company.lexer;
import javafx.beans.property.SimpleStringProperty;
import javafx.beans.property.StringProperty;
/**
* Created by Ljx on 2017/5/7.
*/
public class LexResult {
private final StringProperty line;
private final StringProperty symbol;
private final StringProperty TAG;
private final StringProperty detail;
public LexResult() {
this(null, null, null, null);
}
public LexResult(String line, String symbol, String TAG, String detail) {
this.line = new SimpleStringProperty(line);
this.symbol = new SimpleStringProperty(symbol);
this.TAG = new SimpleStringProperty(TAG);
this.detail = new SimpleStringProperty(detail);
}
public String getLine() {
return line.get();
}
public StringProperty lineProperty() {
return line;
}
public void setLine(String line) {
this.line.set(line);
}
public String getSymbol() {
return symbol.get();
}
public StringProperty symbolProperty() {
return symbol;
}
public void setSymbol(String symbol) {
this.symbol.set(symbol);
}
public String getTAG() {
return TAG.get();
}
public StringProperty TAGProperty() {
return TAG;
}
public void setTAG(String TAG) {
this.TAG.set(TAG);
}
public String getDetail() {
return detail.get();
}
public StringProperty detailProperty() {
return detail;
}
public void setDetail(String detail) {
this.detail.set(detail);
}
@Override
public String toString() {
return "LexResult{" +
"line=" + line +
", symbol=" + symbol +
", TAG=" + TAG +
", detail=" + detail +
'}';
}
}
-
界面
- 界面使用Java8自带的JavaFx实现
完整代码见: https://github.com/LiuJinxuan/Lexer
- 界面使用Java8自带的JavaFx实现
以上就是一个简单的词法分析器实现。