文中例子使用JAVA编写
将一段json字符串转化为json对象,常见的方法是使用org.json库
例子
JSON字符串(拷贝至amazon.com搜索框推荐)
{
"alias": "aps",
"prefix": "我",
"suffix": null,
"suggestions": [
{
"suggType": "KeywordSuggestion",
"type": "KEYWORD",
"value": "我們與惡的距離"
},
{
"suggType": "KeywordSuggestion",
"type": "KEYWORD",
"value": "我的奋斗"
},
{
"suggType": "KeywordSuggestion",
"type": "KEYWORD",
"value": "我们这一代"
}
],
"suggestionTitleId": null,
"responseId": "3JSLXEWBTX3GO",
"shuffled": false
}
org.json解析
import org.json.JSONObject;
import org.json.JSONArray;
JSONObject response = new JSONObject(jsonStr);
JSONArray suggestions = response.getArray("suggestions");
for (int i = 0; i < suggestions.length(); i++) {
JSONObject sug = suggestions.getJSONObject(i);
// do sth
}
原理
看了其源码,流程大概是
- 将字符串转化为一组token列表
- 再逐个匹配token,校验token前后合理性
- 递归遍历组装成对象或数组
实现
这里简单实现一下
Token
public class Token {
public String type;
public String label;
public int line;
public int[] range;
public Token(String type, String label, int line, int start, int end) {
this.type = type;
this.label = label;
this.line = line;
this.range = new int[]{ start, end };
}
@Override
public String toString() {
return "{" +
"type:'" + type + '\'' +
", label:'" + label + '\'' +
", line:" + line +
", range:" + Arrays.toString(range)
'}';
}
}
TakedToken
public class TakedToken {
public int charCount;
public Token token;
public TakedToken(int charCount) {
this.charCount = charCount;
}
}
Taker
public interface Taker {
TakedToken take(int index, String input, Token prevToken, int line);
}
Tokenzier
public class Tokenzier {
public static final String WHITESPACES = " \t\n\u000B\f\r";
public final String type;
public final String nextChars;
public String label;
public Taker taker;
private Tokenizer(String type, String label, String... nextChars) {
this.type = type;
this.label = label;
this.nextChars = join(nextChars, "");
}
public static Tokenizer tokenizer(String type, String label, String... nextChars) {
Tokenizer tokenizer = new Tokenizer(type, label, nextChars);
tokenizer.taker = tokenizer.new DefaultTaker();
return tokenizer;
}
public static Tokenizer stringTokenizer(String... nextChars) {
Tokenizer tokenizer = new Tokenizer("string", null, nextChars);
tokenizer.taker = tokenizer.new StringTaker();
return tokenizer;
}
public static Tokenizer numberTokenizer(String... nextChars) {
Tokenizer tokenizer = new Tokenizer("number", null, nextChars);
tokenizer.taker = tokenizer.new NumberTaker();
return tokenizer;
}
public TakedToken take(int index, String input, Token prevToken, int line) {
return taker.take(index, input, prevToken, line);
}
private boolean checkNext(char nextChar) {
return WHITESPACES.indexOf(nextChar) >= 0 || nextChars.indexOf(nextChar) >= 0;
}
private TakedToken accept(int charStart, int charCount, int line, String label) {
TakedToken tt = new TakedToken(charCount);
tt.token = new Token(type, label, line, charStart, charStart + charCount - 1);
return tt;
}
private TakedToken accept(int charStart, int charCount, int line) {
return accept(charStart, charCount, line, label);
}
private class DefaultTaker implements Taker {
@Override
public TakedToken take(int index, String input, Token prevToken, int line) {
int charCount = label.length();
int target = index + charCount;
if (label.equals(substr(input, index, charCount))
&& (target >= input.length() || checkNext(input.charAt(target)))) {
return accept(index, charCount, line);
}
return null;
}
}
private class StringTaker implements Taker {
@Override
public TakedToken take(int index, String input, Token prevToken, int line) {
char c = input.charAt(index);
char firstChar = c;
if (c != '\'' && c != '"') {
return null;
}
StringBuilder str = new StringBuilder();
int idx = index;
while (idx < input.length()) {
c = input.charAt(++idx);
if (firstChar == c && str.length() > 0 && str.charAt(str.length() - 1) == '\\') {
str.append(c);
continue;
}
if (firstChar != c) {
str.append(c);
continue;
}
// 字符串需要处理转义字符
return accept(index, idx - index + 1, line, unescapeStr(str.toString()));
}
return null;
}
}
private class NumberTaker implements Taker {
@Override
public TakedToken take(int index, String input, Token prevToken, int line) {
char c = input.charAt(index);
if (c < '0' || c > '9') {
return null;
}
String str = c + "";
int idx = index;
boolean hasDot = false;
while (idx < input.length()) {
c = input.charAt(++idx);
if ((c < '0' || c > '9') && c != '.') {
Number number;
if (hasDot) {
try {
number = Double.parseDouble(str);
} catch (NumberFormatException e) {
return null;
}
} else {
try {
number = Long.parseLong(str);
} catch (NumberFormatException e) {
return null;
}
}
return accept(index, idx - index, line, String.valueOf(number));
}
str += c;
if (c == '.') {
if (hasDot) {
return null;
}
hasDot = true;
}
}
return null;
}
}
}
JSONLexer
public class JSONLexer {
private static final String NUMBERS = "0123456789";
private static final Tokenizer[] TOKENIZERS;
static {
TOKENIZERS = new Tokenizer[]{
Tokenizer.tokenizer("l_brace", "{", "}\""),
Tokenizer.tokenizer("r_brace", "}", ",]}"),
Tokenizer.tokenizer("l_bracket", "[", "ftn"/*false/true/null*/, NUMBERS, "]{\""),
Tokenizer.tokenizer("r_bracket", "]", ",]}"),
Tokenizer.tokenizer("colon", ":", "ftn"/*false/true/null*/, NUMBERS, "\"[{"),
Tokenizer.tokenizer("comma", ",", "ftn"/*false/true/null*/, NUMBERS, "\"{["),
Tokenizer.tokenizer("true", "true", ",]}"),
Tokenizer.tokenizer("false", "false", ",]}"),
Tokenizer.tokenizer("null", "null", ",]}"),
Tokenizer.numberTokenizer(",]}"),
Tokenizer.stringTokenizer(":,]}")
};
}
private final String input;
private int current = 0;
private int line = 1;
private LinkedList<Token> tokens = new LinkedList<>();
private JSONLexer(String input) {
this.input = input;
}
private void skipWhitespace() {
while (current < input.length()) {
char c = input.charAt(current);
if (Tokenizer.WHITESPACE.indexOf(c) >= 0) {
break;
}
if ('\n' == c) {
line++;
}
current++;
}
}
private Token nextToken() throws JSONParseException {
Token prevToken = tokens.isEmpty() ? null : tokens.getLast();
for (Tokenizer tokenizer : TOKENIZERS) {
TakedToken ttoken = tokenizer.take(current, input, prevToken, line);
if (ttoken != null) {
current += ttoken.charCount;
ttoken.token.line = line;
return ttoken.token;
}
}
throw new JSONParseException("unknown characters @line " + line + ": \n" + input.substring(current));
}
private void handle() throws JSONParseException {
skipWhitespace();
while (current < input.length()) {
Token token = nextToken();
tokens.add(token);
skipWhitespace();
}
tokens.add(new Token("eof", null, null, line));
}
public static Token[] handle(String input) throws JSONParseException {
Lexer lexer = new Lexer(input);
lexer.handle();
return lexer.tokens.toArray(new Token[0]);
}
}
JSONParser
public class JSONParser {
public static Object parse(String json) throws JSONParseException {
Token[] tokens = Lexer.handle(json);
if (tokens.length == 0 || tokens[0].type.equals('eof'))) {
return ;
}
return new Parser(tokens).parse();
}
private static class Parser {
private final Token[] tokens;
private int current = 0;
private Parser(Token[] tokens) {
this.tokens = tokens;
}
private boolean matchToken(String type) throws JSONParseException {
if (current >= tokens.length) {
throw new JSONParseException("no more token");
}
return tokens[current].type.equals(type);
}
private void moveToNextToken() {
current++;
}
private Token requireToken(String type, String error) throws JSONParseException {
if (current >= tokens.length) {
throw new JSONParseException(error + " , no more tokens");
}
Token token = tokens[current];
if (token.type.equals(type)) {
current++;
return token;
}
throw new JSONParseException(error + ", but got " + token.label);
}
private Object parse() {
if (matchToken("l_brace")) {
moveToNextToken();
if (matchToken("r_brace")) {
moveToNextToken();
return new HashMap<String, Object>;
}
Map<String, Object> map = new HashMap<>();
while (current < tokens.length) {
String key = requireToken("string", "string required").label;
requireToken("colon", "colon : required");
map.put(key, parse()); // todo key值重复的时候,第一次的value属无效值,造成额外计算成本;反向解析可以解决此问题
if (matchToken("comma")) { // ,
moveToNextToken();
continue;
}
requireToken("r_brace", "r_brace } required");
break;
}
return map;
}
if (matchToken("l_bracket")) {
moveToNextToken();
if (matchToken("r_bracket")) {
moveToNextToken();
return new ArrayList<Object>();
}
List<Object> list = new ArrayList<>();
while (current < tokens.length) {
list.add(parse());
if (matchToken("comma")) { // ,
moveToNextToken();
continue;
}
requireToken("r_bracket", "r_brace ] required");
break;
}
return list;
}
Token token = tokens[current];
String type = token.type;
if ("number".equals(type)) {
moveToNextToken();
try {
return Long.parseLong(token.label);
} catch (NumberFormatException e) {
return Double.parseDouble(token.label);
}
}
if ("string".equals(type)) {
moveToNextToken();
return token.label;
}
if ("true".equals(type)) {
moveToNextToken();
return true;
}
if ("false".equals(type)) {
moveToNextToken();
return false;
}
if ("null".equals(type)) {
moveToNextToken();
return null;
}
throw new JSONParseException("invalid token @" + token.range[0]);
}
}
}
调用如下就可以解析json了
Object obj = JSONParser.parse(jsonStr);
if (obj instanceof Map as kvs) {
kvs.get("suggestions");
}