add plain parser for simplified string to map conversion

This commit is contained in:
Jörg Prante 2021-10-18 11:13:47 +02:00
parent 9f584c4c7a
commit f80c001075
4 changed files with 427 additions and 12 deletions

View file

@ -7,11 +7,9 @@ import org.xbib.datastructures.api.Generator;
import org.xbib.datastructures.api.Node;
import org.xbib.datastructures.api.Parser;
import org.xbib.datastructures.api.TimeValue;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
import java.time.Instant;
import java.util.Map;
@ -38,8 +36,19 @@ public class Json implements DataStructure {
this.separator = separator;
}
public static Map<String, Object> toMap(String json) throws IOException {
return toMap(new StringReader(json));
public static String toString(Map<String, Object> map) throws IOException {
return INSTANCE.createBuilder().buildMap(map).build();
}
@SuppressWarnings("unchecked")
public static Map<String, Object> toMap(String json) {
PlainParser parser = new PlainParser();
parser.parse(json);
Object object = parser.getResult();
if (object instanceof Map) {
return (Map<String, Object>) parser.getResult();
}
throw new JsonException("unexpected, Json.toMap got not a map instance: " + object.getClass());
}
public static Map<String, Object> toMap(Reader reader) throws IOException {
@ -49,10 +58,6 @@ public class Json implements DataStructure {
}
}
public static String toString(Map<String, Object> map) throws IOException {
return INSTANCE.createBuilder().buildMap(map).build();
}
@Override
public Parser createParser() {
return new StreamParser();

View file

@ -0,0 +1,386 @@
package org.xbib.datastructures.json.tiny;
import org.xbib.datastructures.tiny.TinyList;
import org.xbib.datastructures.tiny.TinyMap;
import java.util.Deque;
import java.util.LinkedList;
import java.util.Objects;
/**
* The plain parser is a simplified parser without nodes, parsing directly to plain maps/lists/values.
*/
public class PlainParser {
private static final char EOS = (char) -1;
private static final char DOUBLE_QUOTE = '"';
private static final char BACKSLASH = '\\';
private static final char OPEN_MAP = '{';
private static final char CLOSE_MAP = '}';
private static final char OPEN_LIST = '[';
private static final char CLOSE_LIST = ']';
private static final char COMMA = ',';
private static final char COLON = ':';
private String input;
private int i;
private char ch;
private Object result;
private final Deque<Object> stack = new LinkedList<>();
public PlainParser() {
}
public void parse(String input) throws JsonException {
Objects.requireNonNull(input);
this.input = input;
this.i = 0;
stack.clear();
ch = next();
skipWhitespace();
parseValue();
skipWhitespace();
if (ch != EOS) {
throw new JsonException("malformed json: " + ch);
}
}
public Object getResult() {
return result;
}
private void parseValue() throws JsonException {
switch (ch) {
case DOUBLE_QUOTE:
ch = next();
parseString(false);
break;
case OPEN_MAP:
parseMap();
break;
case OPEN_LIST:
parseList();
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '-':
parseNumber();
break;
case 't':
parseTrue();
break;
case 'f':
parseFalse();
break;
case 'n':
parseNull();
break;
default:
throw new JsonException("illegal character: " + ch);
}
}
private void parseNumber() throws JsonException {
boolean minus = false;
boolean dot = false;
boolean exponent = false;
int start = i - 1;
while (true) {
if (ch == '-') {
if (i - start > 1) {
throw new JsonException("minus inside number");
}
ch =next();
minus = true;
} else if (ch == 'e' || ch == 'E') {
ch = next();
if (exponent) {
throw new JsonException("double exponents");
}
exponent = true;
ch = next();
if (ch == '-' || ch == '+') {
ch = next();
if (ch < '0' || ch > '9') {
throw new JsonException("invalid exponent");
}
} else if (ch < '0' || ch > '9') {
throw new JsonException("invalid exponent");
}
} else if (ch == '.') {
ch = next();
if (dot) {
throw new JsonException("multiple dots");
}
if (i - start == 1) {
throw new JsonException("no digit before dot");
}
dot = true;
} else if (ch >= '0' && ch <= '9') {
ch = next();
} else {
break;
}
}
if (minus && i - start == 1) {
throw new JsonException("isolated minus");
}
if (dot || exponent) {
valueNode(FastDoubleParser.parseDouble(input.substring(start, i - 1)));
} else {
valueNode(Long.parseLong(input.substring(start, i - 1)));
}
}
private void parseString(boolean isKey) throws JsonException {
boolean escaped = false;
int start = i - 1;
while (true) {
if (ch == DOUBLE_QUOTE) {
if (escaped) {
CharSequence s = unescape(input.substring(start, i - 1));
if (isKey) {
stack.push(new KeyNode(s));
} else {
valueNode(s);
}
} else {
if (isKey) {
stack.push(new KeyNode(input.substring(start, i - 1)));
} else {
valueNode(input.substring(start, i - 1));
}
}
ch = next();
return;
} else if (ch == BACKSLASH) {
escaped = true;
ch = next();
if (ch == DOUBLE_QUOTE || ch == '/' || ch == BACKSLASH || ch == 'b' || ch == 'f' || ch == 'n' || ch == 'r' || ch == 't') {
ch = next();
} else if (ch == 'u') {
expectHex();
expectHex();
expectHex();
expectHex();
} else {
throw new JsonException("illegal escape char: " + ch);
}
} else if (ch < 32) {
throw new JsonException("illegal control char: " + ch);
} else {
ch = next();
}
}
}
private void parseList() {
int count = 0;
stack.push(TinyList.builder());
ch = next();
while (true) {
skipWhitespace();
if (ch == CLOSE_LIST) {
result = stack.pop();
tryAppend(result);
ch = next();
return;
}
if (count > 0) {
expectChar(COMMA);
ch = next();
skipWhitespace();
}
parseValue();
count++;
}
}
private void parseMap() {
int count = 0;
stack.push(TinyMap.builder());
ch = next();
while (true) {
skipWhitespace();
if (ch == CLOSE_MAP) {
result = stack.pop();
tryAppend(result);
ch = next();
return;
}
if (count > 0) {
expectChar(COMMA);
ch = next();
skipWhitespace();
}
expectChar(DOUBLE_QUOTE);
ch = next();
parseString(true);
skipWhitespace();
expectChar(COLON);
ch = next();
skipWhitespace();
parseValue();
count++;
}
}
private void parseNull() throws JsonException {
ch = next();
expectChar('u');
ch = next();
expectChar('l');
ch = next();
expectChar('l');
valueNode(null);
ch = next();
}
private void parseTrue() throws JsonException {
ch = next();
expectChar('r');
ch = next();
expectChar('u');
ch = next();
expectChar('e');
valueNode(true);
ch = next();
}
private void parseFalse() throws JsonException {
ch = next();
expectChar('a');
ch = next();
expectChar('l');
ch = next();
expectChar('s');
ch = next();
expectChar('e');
valueNode(false);
ch = next();
}
private void expectChar(char expected) throws JsonException {
if (ch != expected) {
throw new JsonException("expected char " + expected + " but got " + ch);
}
}
private void expectHex() throws JsonException {
ch = next();
if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) {
return;
}
throw new JsonException("invalid hex char " + ch);
}
private void skipWhitespace() {
while (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') {
ch = next();
}
}
private static CharSequence unescape(CharSequence input) {
StringBuilder result = new StringBuilder(input.length());
int i = 0;
while (i < input.length()) {
if (input.charAt(i) == BACKSLASH) {
i++;
switch (input.charAt(i)) {
case BACKSLASH:
result.append(BACKSLASH);
break;
case '/':
result.append('/');
break;
case DOUBLE_QUOTE:
result.append(DOUBLE_QUOTE);
break;
case 'b':
result.append('\b');
break;
case 'f':
result.append('\f');
break;
case 'n':
result.append('\n');
break;
case 'r':
result.append('\r');
break;
case 't':
result.append('\t');
break;
case 'u': {
result.append(Character.toChars(Integer.parseInt(input.toString().substring(i + 1, i + 5), 16)));
i += 4;
}
}
} else {
result.append(input.charAt(i));
}
i++;
}
return result;
}
private char next() {
try {
return input.charAt(i++);
} catch (StringIndexOutOfBoundsException e) {
return (char) -1;
}
}
private void valueNode(Object object) {
if (!tryAppend(object)) {
stack.push(object);
result = object;
}
}
@SuppressWarnings("unchecked")
private boolean tryAppend(Object object) {
if (!stack.isEmpty()) {
if (stack.peek() instanceof TinyList.Builder) {
@SuppressWarnings("unchecked")
TinyList.Builder<Object> builder = (TinyList.Builder<Object>) stack.peek();
builder.add(object);
return true;
} else if (stack.peek() instanceof KeyNode){
KeyNode key = (KeyNode) stack.pop();
if (stack.peek() instanceof TinyMap.Builder) {
@SuppressWarnings("unchecked")
TinyMap.Builder<String, Object> builder = (TinyMap.Builder<String, Object>) stack.peek();
if (builder != null) {
String k = key != null ? key.get().toString() : null;
builder.put(k, object);
return true;
}
}
}
}
return false;
}
}

View file

@ -17,10 +17,6 @@ public class TinyJsonListener implements JsonResult {
private final ValueNode FALSE_NODE = new ValueNode(Boolean.FALSE);
public Deque<Node<?>> getStack() {
return stack;
}
@Override
public Node<?> getResult() {
return node;

View file

@ -0,0 +1,28 @@
package org.xbib.datastructures.json.tiny.test;
import org.junit.jupiter.api.Test;
import org.xbib.datastructures.json.tiny.PlainParser;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class PlainParserTest {
@Test
public void testStringParser() throws IOException {
try (InputStream inputStream = ParserTest.class.getResourceAsStream("/org/xbib/datastructures/json/tiny/test/test.json")) {
if (inputStream != null) {
byte[] b = inputStream.readAllBytes();
String string = new String(b, StandardCharsets.UTF_8);
PlainParser parser = new PlainParser();
parser.parse(string);
assertEquals("{a=b, c=d, e=[f, g], h={i={j=k}}, l=null, m=true, n=false, o=0, p=1, q=-1, r=0.0, s=1.0, t=2.1, u=-1.0, v=-2.1, w=, x=₫, y=Jörg}",
parser.getResult().toString());
}
}
}
}