From e225155e6867bfe9650a343056a1bfac4bbeaf9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Prante?= Date: Mon, 13 Mar 2023 11:39:50 +0100 Subject: [PATCH] begin of MARC parsing in JSON --- gradle.properties | 2 +- src/main/java/org/xbib/marc/MarcRecord.java | 25 +- .../org/xbib/marc/json/JsonException.java | 13 + .../java/org/xbib/marc/json/JsonListener.java | 30 ++ .../java/org/xbib/marc/json/JsonParser.java | 291 ++++++++++++++++++ .../xbib/marc/json/JsonResultListener.java | 6 + src/main/java/org/xbib/marc/json/KeyNode.java | 24 ++ .../java/org/xbib/marc/json/ListNode.java | 6 + src/main/java/org/xbib/marc/json/MapNode.java | 6 + .../org/xbib/marc/json/MarcJsonListener.java | 113 +++++++ .../java/org/xbib/marc/json/MarcListNode.java | 15 + .../java/org/xbib/marc/json/MarcMapNode.java | 15 + src/main/java/org/xbib/marc/json/Node.java | 6 + src/main/java/org/xbib/marc/json/Parser.java | 9 + .../java/org/xbib/marc/json/ValueNode.java | 29 ++ 15 files changed, 576 insertions(+), 14 deletions(-) create mode 100644 src/main/java/org/xbib/marc/json/JsonException.java create mode 100644 src/main/java/org/xbib/marc/json/JsonListener.java create mode 100644 src/main/java/org/xbib/marc/json/JsonParser.java create mode 100644 src/main/java/org/xbib/marc/json/JsonResultListener.java create mode 100644 src/main/java/org/xbib/marc/json/KeyNode.java create mode 100644 src/main/java/org/xbib/marc/json/ListNode.java create mode 100644 src/main/java/org/xbib/marc/json/MapNode.java create mode 100644 src/main/java/org/xbib/marc/json/MarcJsonListener.java create mode 100644 src/main/java/org/xbib/marc/json/MarcListNode.java create mode 100644 src/main/java/org/xbib/marc/json/MarcMapNode.java create mode 100644 src/main/java/org/xbib/marc/json/Node.java create mode 100644 src/main/java/org/xbib/marc/json/Parser.java create mode 100644 src/main/java/org/xbib/marc/json/ValueNode.java diff --git a/gradle.properties b/gradle.properties index a3c90aa..67e7414 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,5 +1,5 @@ group = org.xbib name = marc -version = 2.9.20 +version = 2.9.21 org.gradle.warning.mode = ALL diff --git a/src/main/java/org/xbib/marc/MarcRecord.java b/src/main/java/org/xbib/marc/MarcRecord.java index 2b02e00..7f3f770 100644 --- a/src/main/java/org/xbib/marc/MarcRecord.java +++ b/src/main/java/org/xbib/marc/MarcRecord.java @@ -15,6 +15,7 @@ */ package org.xbib.marc; +import java.util.Objects; import org.xbib.marc.label.RecordLabel; import java.io.InputStream; @@ -44,7 +45,7 @@ import static org.xbib.marc.json.MarcJsonWriter.LEADER_TAG; import static org.xbib.marc.json.MarcJsonWriter.TYPE_TAG; /** - * A MARC record. This is an extended MARC record augmented with MarcXchange information. + * A MARC record. */ public class MarcRecord implements Map { @@ -91,9 +92,7 @@ public class MarcRecord implements Map { this.format = format; this.type = type; this.recordLabel = recordLabel; - if (recordLabel == null) { - throw new NullPointerException("record label must not be null"); - } + Objects.requireNonNull(recordLabel, "record label must not be null"); this.marcFields = marcFields; this.delegate = lightweight ? Map.of() : createMapFromMarcFields(comparator); } @@ -179,6 +178,15 @@ public class MarcRecord implements Map { return recordLabel; } + /** + * Return the MARC fields of this record. + * + * @return the MARC field list + */ + public List getFields() { + return marcFields; + } + public LocalDate getCreationDate(LocalDate defaultDate) { if (marcFields != null) { MarcField marcField = getFirst("008"); @@ -255,15 +263,6 @@ public class MarcRecord implements Map { } } - /** - * Return the MARC fields of this record. - * - * @return the MARC field list - */ - public List getFields() { - return marcFields; - } - /** * Filter all MARC fields of this record with a given tag. * diff --git a/src/main/java/org/xbib/marc/json/JsonException.java b/src/main/java/org/xbib/marc/json/JsonException.java new file mode 100644 index 0000000..a6b33c3 --- /dev/null +++ b/src/main/java/org/xbib/marc/json/JsonException.java @@ -0,0 +1,13 @@ +package org.xbib.marc.json; + +@SuppressWarnings("serial") +public class JsonException extends RuntimeException { + + public JsonException(String message) { + super(message); + } + + public JsonException(Exception exception) { + super(exception); + } +} diff --git a/src/main/java/org/xbib/marc/json/JsonListener.java b/src/main/java/org/xbib/marc/json/JsonListener.java new file mode 100644 index 0000000..ff4fba9 --- /dev/null +++ b/src/main/java/org/xbib/marc/json/JsonListener.java @@ -0,0 +1,30 @@ +package org.xbib.marc.json; + +public interface JsonListener { + + void begin(); + + void end(); + + void onNull(); + + void onTrue(); + + void onFalse(); + + void onKey(CharSequence key); + + void onValue(CharSequence value); + + void onLong(Long value); + + void onDouble(Double value); + + void beginCollection(); + + void endCollection(); + + void beginMap(); + + void endMap(); +} diff --git a/src/main/java/org/xbib/marc/json/JsonParser.java b/src/main/java/org/xbib/marc/json/JsonParser.java new file mode 100644 index 0000000..af5906f --- /dev/null +++ b/src/main/java/org/xbib/marc/json/JsonParser.java @@ -0,0 +1,291 @@ +package org.xbib.marc.json; + +import java.io.IOException; +import java.io.Reader; +import java.util.Objects; + +public class JsonParser implements Parser { + + private final JsonResultListener listener; + + private Reader reader; + + private int ch; + + public JsonParser() { + this(new MarcJsonListener()); + } + + public JsonParser(JsonResultListener listener) { + this.listener = listener; + } + + @Override + public Node parse(Reader reader) throws IOException { + Objects.requireNonNull(reader); + Objects.requireNonNull(listener); + this.reader = reader; + listener.begin(); + ch = reader.read(); + skipWhitespace(); + parseValue(); + skipWhitespace(); + if (ch != -1) { + throw new JsonException("malformed json: " + ch); + } + listener.end(); + return listener.getResult(); + } + + private void parseValue() throws IOException, JsonException { + switch (ch) { + case '"' -> parseString(false); + case '{' -> parseMap(); + case '[' -> parseList(); + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-' -> parseNumber(); + case 't' -> parseTrue(); + case 'f' -> parseFalse(); + case 'n' -> parseNull(); + default -> throw new JsonException("illegal character: " + ch); + } + } + + private void parseNumber() throws IOException, JsonException { + boolean minus = false; + boolean dot = false; + boolean exponent = false; + StringBuilder sb = new StringBuilder(); + while (true) { + if (ch == '-') { + if (sb.length() > 1) { + throw new JsonException("minus inside number"); + } + sb.append((char) ch); + ch = reader.read(); + minus = true; + } else if (ch == 'e' || ch == 'E') { + sb.append((char) ch); + ch = reader.read(); + if (exponent) { + throw new JsonException("double exponents"); + } + exponent = true; + ch = reader.read(); + if (ch == '-' || ch == '+') { + ch = reader.read(); + if (ch < '0' || ch > '9') { + throw new JsonException("invalid exponent"); + } + } else if (ch < '0' || ch > '9') { + throw new JsonException("invalid exponent"); + } + } else if (ch == '.') { + sb.append((char) ch); + ch = reader.read(); + if (dot) { + throw new JsonException("multiple dots"); + } + if (sb.length() == 1) { + throw new JsonException("no digit before dot"); + } + dot = true; + } else if (ch >= '0' && ch <= '9') { + sb.append((char) ch); + ch = reader.read(); + } else { + break; + } + } + if (minus && sb.length() == 1) { + throw new JsonException("isolated minus"); + } + if (dot || exponent) { + listener.onDouble(Double.parseDouble(sb.toString())); + } else { + listener.onLong(Long.parseLong(sb.toString())); + } + } + + private void parseString(boolean isKey) throws IOException, JsonException { + reader.mark(1024); + ch = reader.read(); + boolean escaped = false; + int count = 1; + while (true) { + if (ch == '"') { + char[] buffer = new char[count - 1]; + reader.reset(); + reader.read(buffer, 0, count - 1); + reader.read(); + CharSequence s = new String(buffer); + if (escaped) { + s = unescape(s); + if (isKey) { + listener.onKey(s); + } else { + listener.onValue(s); + } + } else { + if (isKey) { + listener.onKey(s); + } else { + listener.onValue(s); + } + } + ch = reader.read(); + return; + } else if (ch == '\\') { + escaped = true; + ch = reader.read(); + if (ch == '"' || ch == '/' || ch == '\\' || ch == 'b' || ch == 'f' || ch == 'n' || ch == 'r' || ch == 't') { + ch = reader.read(); + count += 2; + } else if (ch == 'u') { + expectHex(); + expectHex(); + expectHex(); + expectHex(); + count += 5; + } else { + throw new JsonException("illegal escape char: " + ch); + } + } else if (ch < 32) { + throw new JsonException("illegal control char: " + ch); + } else { + count++; + ch = reader.read(); + } + } + } + + private void parseList() throws IOException { + int count = 0; + listener.beginCollection(); + ch = reader.read(); + while (true) { + skipWhitespace(); + if (ch == ']') { + listener.endCollection(); + ch = reader.read(); + return; + } + if (count > 0) { + expectChar(','); + ch = reader.read(); + skipWhitespace(); + } + parseValue(); + count++; + } + } + + private void parseMap() throws IOException, JsonException { + int count = 0; + listener.beginMap(); + ch = reader.read(); + while (true) { + skipWhitespace(); + if (ch == '}') { + listener.endMap(); + ch = reader.read(); + return; + } + if (count > 0) { + expectChar(','); + ch = reader.read(); + skipWhitespace(); + } + expectChar('"'); + parseString(true); + skipWhitespace(); + expectChar(':'); + ch = reader.read(); + skipWhitespace(); + parseValue(); + count++; + } + } + + private void parseNull() throws IOException, JsonException { + ch = reader.read(); + expectChar('u'); + ch = reader.read(); + expectChar('l'); + ch = reader.read(); + expectChar('l'); + listener.onNull(); + ch = reader.read(); + } + + private void parseTrue() throws IOException, JsonException { + ch = reader.read(); + expectChar('r'); + ch = reader.read(); + expectChar('u'); + ch = reader.read(); + expectChar('e'); + listener.onTrue(); + ch = reader.read(); + } + + private void parseFalse() throws IOException, JsonException { + ch = reader.read(); + expectChar('a'); + ch = reader.read(); + expectChar('l'); + ch = reader.read(); + expectChar('s'); + ch = reader.read(); + expectChar('e'); + listener.onFalse(); + ch = reader.read(); + } + + private void expectChar(char expected) throws JsonException { + if (ch != expected) { + throw new JsonException("expected char " + expected + " but got " + (char)ch); + } + } + + private void expectHex() throws IOException, JsonException { + ch = reader.read(); + if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) { + return; + } + throw new JsonException("invalid hex char " + ch); + } + + private void skipWhitespace() throws IOException { + while (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') { + ch = reader.read(); + } + } + + private static CharSequence unescape(CharSequence input) { + StringBuilder result = new StringBuilder(input.length()); + int i = 0; + while (i < input.length()) { + if (input.charAt(i) == '\\') { + i++; + switch (input.charAt(i)) { + case '\\' -> result.append('\\'); + case '/' -> result.append('/'); + case '"' -> result.append('"'); + case 'b' -> result.append('\b'); + case 'f' -> result.append('\f'); + case 'n' -> result.append('\n'); + case 'r' -> result.append('\r'); + case 't' -> result.append('\t'); + case 'u' -> { + result.append(Character.toChars(Integer.parseInt(input.toString().substring(i + 1, i + 5), 16))); + i += 4; + } + } + } else { + result.append(input.charAt(i)); + } + i++; + } + return result; + } +} diff --git a/src/main/java/org/xbib/marc/json/JsonResultListener.java b/src/main/java/org/xbib/marc/json/JsonResultListener.java new file mode 100644 index 0000000..fdbb7d9 --- /dev/null +++ b/src/main/java/org/xbib/marc/json/JsonResultListener.java @@ -0,0 +1,6 @@ +package org.xbib.marc.json; + +public interface JsonResultListener extends JsonListener { + + Node getResult(); +} diff --git a/src/main/java/org/xbib/marc/json/KeyNode.java b/src/main/java/org/xbib/marc/json/KeyNode.java new file mode 100644 index 0000000..d266ab2 --- /dev/null +++ b/src/main/java/org/xbib/marc/json/KeyNode.java @@ -0,0 +1,24 @@ +package org.xbib.marc.json; + +public class KeyNode implements Node { + + private CharSequence value; + + public KeyNode(CharSequence value) { + this.value = value; + } + + public void setValue(String value) { + this.value = value; + } + + @Override + public CharSequence get() { + return value; + } + + @Override + public String toString() { + return value != null ? value.toString() : null; + } +} diff --git a/src/main/java/org/xbib/marc/json/ListNode.java b/src/main/java/org/xbib/marc/json/ListNode.java new file mode 100644 index 0000000..578a227 --- /dev/null +++ b/src/main/java/org/xbib/marc/json/ListNode.java @@ -0,0 +1,6 @@ +package org.xbib.marc.json; + +import java.util.List; + +public interface ListNode extends Node>> { +} diff --git a/src/main/java/org/xbib/marc/json/MapNode.java b/src/main/java/org/xbib/marc/json/MapNode.java new file mode 100644 index 0000000..45cf417 --- /dev/null +++ b/src/main/java/org/xbib/marc/json/MapNode.java @@ -0,0 +1,6 @@ +package org.xbib.marc.json; + +import java.util.Map; + +public interface MapNode extends Node>> { +} diff --git a/src/main/java/org/xbib/marc/json/MarcJsonListener.java b/src/main/java/org/xbib/marc/json/MarcJsonListener.java new file mode 100644 index 0000000..7b9ab19 --- /dev/null +++ b/src/main/java/org/xbib/marc/json/MarcJsonListener.java @@ -0,0 +1,113 @@ +package org.xbib.marc.json; + +import java.util.Deque; +import java.util.LinkedList; + +public class MarcJsonListener implements JsonResultListener { + + private Node node; + + private final Deque> stack = new LinkedList<>(); + + private final ValueNode NULL_NODE = new ValueNode(null); + + private final ValueNode TRUE_NODE = new ValueNode(Boolean.TRUE); + + private final ValueNode FALSE_NODE = new ValueNode(Boolean.FALSE); + + public MarcJsonListener() { + } + + @Override + public Node getResult() { + return node; + } + + @Override + public void begin() { + stack.clear(); + } + + @Override + public void end() { + } + + @Override + public void onNull() { + valueNode(NULL_NODE); + } + + @Override + public void onTrue() { + valueNode(TRUE_NODE); + } + + @Override + public void onFalse() { + valueNode(FALSE_NODE); + } + + @Override + public void onKey(CharSequence key) { + stack.push(new KeyNode(key)); + } + + @Override + public void onValue(CharSequence value) { + valueNode(new ValueNode(value)); + } + + @Override + public void onLong(Long value) { + valueNode(new ValueNode(value)); + } + + @Override + public void onDouble(Double value) { + valueNode(new ValueNode(value)); + } + + @Override + public void beginCollection() { + stack.push(new MarcListNode()); + } + + @Override + public void endCollection() { + node = stack.pop(); + tryAppend(node); + } + + @Override + public void beginMap() { + stack.push(new MarcMapNode()); + } + + @Override + public void endMap() { + node = stack.pop(); + tryAppend(node); + } + + private void valueNode(ValueNode valueNode) { + if (!tryAppend(valueNode)) { + stack.push(valueNode); + node = valueNode; + } + } + + private boolean tryAppend(Node node) { + if (!stack.isEmpty()) { + if (stack.peek() instanceof MarcListNode listNode) { + listNode.add(node); + return true; + } else if (stack.peek() instanceof KeyNode) { + KeyNode keyNode = (KeyNode) stack.pop(); + MarcMapNode mapNode = (MarcMapNode) stack.peek(); + mapNode.put(keyNode.get(), node); + return true; + } + } + return false; + } +} diff --git a/src/main/java/org/xbib/marc/json/MarcListNode.java b/src/main/java/org/xbib/marc/json/MarcListNode.java new file mode 100644 index 0000000..d53bac4 --- /dev/null +++ b/src/main/java/org/xbib/marc/json/MarcListNode.java @@ -0,0 +1,15 @@ +package org.xbib.marc.json; + +import java.util.LinkedList; +import java.util.List; + +public class MarcListNode extends LinkedList> implements ListNode { + + public MarcListNode() { + } + + @Override + public List> get() { + return this; + } +} diff --git a/src/main/java/org/xbib/marc/json/MarcMapNode.java b/src/main/java/org/xbib/marc/json/MarcMapNode.java new file mode 100644 index 0000000..5d6dec6 --- /dev/null +++ b/src/main/java/org/xbib/marc/json/MarcMapNode.java @@ -0,0 +1,15 @@ +package org.xbib.marc.json; + +import java.util.LinkedHashMap; +import java.util.Map; + +public class MarcMapNode extends LinkedHashMap> implements MapNode { + + public MarcMapNode() { + } + + @Override + public Map> get() { + return this; + } +} diff --git a/src/main/java/org/xbib/marc/json/Node.java b/src/main/java/org/xbib/marc/json/Node.java new file mode 100644 index 0000000..34188b6 --- /dev/null +++ b/src/main/java/org/xbib/marc/json/Node.java @@ -0,0 +1,6 @@ +package org.xbib.marc.json; + +public interface Node { + + T get(); +} diff --git a/src/main/java/org/xbib/marc/json/Parser.java b/src/main/java/org/xbib/marc/json/Parser.java new file mode 100644 index 0000000..2b7736c --- /dev/null +++ b/src/main/java/org/xbib/marc/json/Parser.java @@ -0,0 +1,9 @@ +package org.xbib.marc.json; + +import java.io.IOException; +import java.io.Reader; + +public interface Parser { + + Node parse(Reader reader) throws IOException; +} diff --git a/src/main/java/org/xbib/marc/json/ValueNode.java b/src/main/java/org/xbib/marc/json/ValueNode.java new file mode 100644 index 0000000..f0f4d4e --- /dev/null +++ b/src/main/java/org/xbib/marc/json/ValueNode.java @@ -0,0 +1,29 @@ +package org.xbib.marc.json; + +public class ValueNode implements Node { + + private Object value; + + public ValueNode(Object value) { + this.value = value; + } + + @Override + public int getDepth() { + return 0; + } + + public void set(Object value) { + this.value = value; + } + + @Override + public Object get() { + return value; + } + + @Override + public String toString() { + return value != null ? value.toString() : null; + } +}