begin of MARC parsing in JSON

This commit is contained in:
Jörg Prante 2023-03-13 11:39:50 +01:00
parent dceb103fcb
commit e225155e68
15 changed files with 576 additions and 14 deletions

View file

@ -1,5 +1,5 @@
group = org.xbib group = org.xbib
name = marc name = marc
version = 2.9.20 version = 2.9.21
org.gradle.warning.mode = ALL org.gradle.warning.mode = ALL

View file

@ -15,6 +15,7 @@
*/ */
package org.xbib.marc; package org.xbib.marc;
import java.util.Objects;
import org.xbib.marc.label.RecordLabel; import org.xbib.marc.label.RecordLabel;
import java.io.InputStream; import java.io.InputStream;
@ -44,7 +45,7 @@ import static org.xbib.marc.json.MarcJsonWriter.LEADER_TAG;
import static org.xbib.marc.json.MarcJsonWriter.TYPE_TAG; import static org.xbib.marc.json.MarcJsonWriter.TYPE_TAG;
/** /**
* A MARC record. This is an extended MARC record augmented with MarcXchange information. * A MARC record.
*/ */
public class MarcRecord implements Map<String, Object> { public class MarcRecord implements Map<String, Object> {
@ -91,9 +92,7 @@ public class MarcRecord implements Map<String, Object> {
this.format = format; this.format = format;
this.type = type; this.type = type;
this.recordLabel = recordLabel; this.recordLabel = recordLabel;
if (recordLabel == null) { Objects.requireNonNull(recordLabel, "record label must not be null");
throw new NullPointerException("record label must not be null");
}
this.marcFields = marcFields; this.marcFields = marcFields;
this.delegate = lightweight ? Map.of() : createMapFromMarcFields(comparator); this.delegate = lightweight ? Map.of() : createMapFromMarcFields(comparator);
} }
@ -179,6 +178,15 @@ public class MarcRecord implements Map<String, Object> {
return recordLabel; return recordLabel;
} }
/**
* Return the MARC fields of this record.
*
* @return the MARC field list
*/
public List<MarcField> getFields() {
return marcFields;
}
public LocalDate getCreationDate(LocalDate defaultDate) { public LocalDate getCreationDate(LocalDate defaultDate) {
if (marcFields != null) { if (marcFields != null) {
MarcField marcField = getFirst("008"); MarcField marcField = getFirst("008");
@ -255,15 +263,6 @@ public class MarcRecord implements Map<String, Object> {
} }
} }
/**
* Return the MARC fields of this record.
*
* @return the MARC field list
*/
public List<MarcField> getFields() {
return marcFields;
}
/** /**
* Filter all MARC fields of this record with a given tag. * Filter all MARC fields of this record with a given tag.
* *

View file

@ -0,0 +1,13 @@
package org.xbib.marc.json;
@SuppressWarnings("serial")
public class JsonException extends RuntimeException {
public JsonException(String message) {
super(message);
}
public JsonException(Exception exception) {
super(exception);
}
}

View file

@ -0,0 +1,30 @@
package org.xbib.marc.json;
public interface JsonListener {
void begin();
void end();
void onNull();
void onTrue();
void onFalse();
void onKey(CharSequence key);
void onValue(CharSequence value);
void onLong(Long value);
void onDouble(Double value);
void beginCollection();
void endCollection();
void beginMap();
void endMap();
}

View file

@ -0,0 +1,291 @@
package org.xbib.marc.json;
import java.io.IOException;
import java.io.Reader;
import java.util.Objects;
public class JsonParser implements Parser {
private final JsonResultListener listener;
private Reader reader;
private int ch;
public JsonParser() {
this(new MarcJsonListener());
}
public JsonParser(JsonResultListener listener) {
this.listener = listener;
}
@Override
public Node<?> parse(Reader reader) throws IOException {
Objects.requireNonNull(reader);
Objects.requireNonNull(listener);
this.reader = reader;
listener.begin();
ch = reader.read();
skipWhitespace();
parseValue();
skipWhitespace();
if (ch != -1) {
throw new JsonException("malformed json: " + ch);
}
listener.end();
return listener.getResult();
}
private void parseValue() throws IOException, JsonException {
switch (ch) {
case '"' -> parseString(false);
case '{' -> parseMap();
case '[' -> parseList();
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-' -> parseNumber();
case 't' -> parseTrue();
case 'f' -> parseFalse();
case 'n' -> parseNull();
default -> throw new JsonException("illegal character: " + ch);
}
}
private void parseNumber() throws IOException, JsonException {
boolean minus = false;
boolean dot = false;
boolean exponent = false;
StringBuilder sb = new StringBuilder();
while (true) {
if (ch == '-') {
if (sb.length() > 1) {
throw new JsonException("minus inside number");
}
sb.append((char) ch);
ch = reader.read();
minus = true;
} else if (ch == 'e' || ch == 'E') {
sb.append((char) ch);
ch = reader.read();
if (exponent) {
throw new JsonException("double exponents");
}
exponent = true;
ch = reader.read();
if (ch == '-' || ch == '+') {
ch = reader.read();
if (ch < '0' || ch > '9') {
throw new JsonException("invalid exponent");
}
} else if (ch < '0' || ch > '9') {
throw new JsonException("invalid exponent");
}
} else if (ch == '.') {
sb.append((char) ch);
ch = reader.read();
if (dot) {
throw new JsonException("multiple dots");
}
if (sb.length() == 1) {
throw new JsonException("no digit before dot");
}
dot = true;
} else if (ch >= '0' && ch <= '9') {
sb.append((char) ch);
ch = reader.read();
} else {
break;
}
}
if (minus && sb.length() == 1) {
throw new JsonException("isolated minus");
}
if (dot || exponent) {
listener.onDouble(Double.parseDouble(sb.toString()));
} else {
listener.onLong(Long.parseLong(sb.toString()));
}
}
private void parseString(boolean isKey) throws IOException, JsonException {
reader.mark(1024);
ch = reader.read();
boolean escaped = false;
int count = 1;
while (true) {
if (ch == '"') {
char[] buffer = new char[count - 1];
reader.reset();
reader.read(buffer, 0, count - 1);
reader.read();
CharSequence s = new String(buffer);
if (escaped) {
s = unescape(s);
if (isKey) {
listener.onKey(s);
} else {
listener.onValue(s);
}
} else {
if (isKey) {
listener.onKey(s);
} else {
listener.onValue(s);
}
}
ch = reader.read();
return;
} else if (ch == '\\') {
escaped = true;
ch = reader.read();
if (ch == '"' || ch == '/' || ch == '\\' || ch == 'b' || ch == 'f' || ch == 'n' || ch == 'r' || ch == 't') {
ch = reader.read();
count += 2;
} else if (ch == 'u') {
expectHex();
expectHex();
expectHex();
expectHex();
count += 5;
} else {
throw new JsonException("illegal escape char: " + ch);
}
} else if (ch < 32) {
throw new JsonException("illegal control char: " + ch);
} else {
count++;
ch = reader.read();
}
}
}
private void parseList() throws IOException {
int count = 0;
listener.beginCollection();
ch = reader.read();
while (true) {
skipWhitespace();
if (ch == ']') {
listener.endCollection();
ch = reader.read();
return;
}
if (count > 0) {
expectChar(',');
ch = reader.read();
skipWhitespace();
}
parseValue();
count++;
}
}
private void parseMap() throws IOException, JsonException {
int count = 0;
listener.beginMap();
ch = reader.read();
while (true) {
skipWhitespace();
if (ch == '}') {
listener.endMap();
ch = reader.read();
return;
}
if (count > 0) {
expectChar(',');
ch = reader.read();
skipWhitespace();
}
expectChar('"');
parseString(true);
skipWhitespace();
expectChar(':');
ch = reader.read();
skipWhitespace();
parseValue();
count++;
}
}
private void parseNull() throws IOException, JsonException {
ch = reader.read();
expectChar('u');
ch = reader.read();
expectChar('l');
ch = reader.read();
expectChar('l');
listener.onNull();
ch = reader.read();
}
private void parseTrue() throws IOException, JsonException {
ch = reader.read();
expectChar('r');
ch = reader.read();
expectChar('u');
ch = reader.read();
expectChar('e');
listener.onTrue();
ch = reader.read();
}
private void parseFalse() throws IOException, JsonException {
ch = reader.read();
expectChar('a');
ch = reader.read();
expectChar('l');
ch = reader.read();
expectChar('s');
ch = reader.read();
expectChar('e');
listener.onFalse();
ch = reader.read();
}
private void expectChar(char expected) throws JsonException {
if (ch != expected) {
throw new JsonException("expected char " + expected + " but got " + (char)ch);
}
}
private void expectHex() throws IOException, JsonException {
ch = reader.read();
if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) {
return;
}
throw new JsonException("invalid hex char " + ch);
}
private void skipWhitespace() throws IOException {
while (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') {
ch = reader.read();
}
}
private static CharSequence unescape(CharSequence input) {
StringBuilder result = new StringBuilder(input.length());
int i = 0;
while (i < input.length()) {
if (input.charAt(i) == '\\') {
i++;
switch (input.charAt(i)) {
case '\\' -> result.append('\\');
case '/' -> result.append('/');
case '"' -> result.append('"');
case 'b' -> result.append('\b');
case 'f' -> result.append('\f');
case 'n' -> result.append('\n');
case 'r' -> result.append('\r');
case 't' -> result.append('\t');
case 'u' -> {
result.append(Character.toChars(Integer.parseInt(input.toString().substring(i + 1, i + 5), 16)));
i += 4;
}
}
} else {
result.append(input.charAt(i));
}
i++;
}
return result;
}
}

View file

@ -0,0 +1,6 @@
package org.xbib.marc.json;
public interface JsonResultListener extends JsonListener {
Node<?> getResult();
}

View file

@ -0,0 +1,24 @@
package org.xbib.marc.json;
public class KeyNode implements Node<CharSequence> {
private CharSequence value;
public KeyNode(CharSequence value) {
this.value = value;
}
public void setValue(String value) {
this.value = value;
}
@Override
public CharSequence get() {
return value;
}
@Override
public String toString() {
return value != null ? value.toString() : null;
}
}

View file

@ -0,0 +1,6 @@
package org.xbib.marc.json;
import java.util.List;
public interface ListNode extends Node<List<Node<?>>> {
}

View file

@ -0,0 +1,6 @@
package org.xbib.marc.json;
import java.util.Map;
public interface MapNode extends Node<Map<CharSequence, Node<?>>> {
}

View file

@ -0,0 +1,113 @@
package org.xbib.marc.json;
import java.util.Deque;
import java.util.LinkedList;
public class MarcJsonListener implements JsonResultListener {
private Node<?> node;
private final Deque<Node<?>> stack = new LinkedList<>();
private final ValueNode NULL_NODE = new ValueNode(null);
private final ValueNode TRUE_NODE = new ValueNode(Boolean.TRUE);
private final ValueNode FALSE_NODE = new ValueNode(Boolean.FALSE);
public MarcJsonListener() {
}
@Override
public Node<?> getResult() {
return node;
}
@Override
public void begin() {
stack.clear();
}
@Override
public void end() {
}
@Override
public void onNull() {
valueNode(NULL_NODE);
}
@Override
public void onTrue() {
valueNode(TRUE_NODE);
}
@Override
public void onFalse() {
valueNode(FALSE_NODE);
}
@Override
public void onKey(CharSequence key) {
stack.push(new KeyNode(key));
}
@Override
public void onValue(CharSequence value) {
valueNode(new ValueNode(value));
}
@Override
public void onLong(Long value) {
valueNode(new ValueNode(value));
}
@Override
public void onDouble(Double value) {
valueNode(new ValueNode(value));
}
@Override
public void beginCollection() {
stack.push(new MarcListNode());
}
@Override
public void endCollection() {
node = stack.pop();
tryAppend(node);
}
@Override
public void beginMap() {
stack.push(new MarcMapNode());
}
@Override
public void endMap() {
node = stack.pop();
tryAppend(node);
}
private void valueNode(ValueNode valueNode) {
if (!tryAppend(valueNode)) {
stack.push(valueNode);
node = valueNode;
}
}
private boolean tryAppend(Node<?> node) {
if (!stack.isEmpty()) {
if (stack.peek() instanceof MarcListNode listNode) {
listNode.add(node);
return true;
} else if (stack.peek() instanceof KeyNode) {
KeyNode keyNode = (KeyNode) stack.pop();
MarcMapNode mapNode = (MarcMapNode) stack.peek();
mapNode.put(keyNode.get(), node);
return true;
}
}
return false;
}
}

View file

@ -0,0 +1,15 @@
package org.xbib.marc.json;
import java.util.LinkedList;
import java.util.List;
public class MarcListNode extends LinkedList<Node<?>> implements ListNode {
public MarcListNode() {
}
@Override
public List<Node<?>> get() {
return this;
}
}

View file

@ -0,0 +1,15 @@
package org.xbib.marc.json;
import java.util.LinkedHashMap;
import java.util.Map;
public class MarcMapNode extends LinkedHashMap<CharSequence, Node<?>> implements MapNode {
public MarcMapNode() {
}
@Override
public Map<CharSequence, Node<?>> get() {
return this;
}
}

View file

@ -0,0 +1,6 @@
package org.xbib.marc.json;
public interface Node<T> {
T get();
}

View file

@ -0,0 +1,9 @@
package org.xbib.marc.json;
import java.io.IOException;
import java.io.Reader;
public interface Parser {
Node<?> parse(Reader reader) throws IOException;
}

View file

@ -0,0 +1,29 @@
package org.xbib.marc.json;
public class ValueNode implements Node<Object> {
private Object value;
public ValueNode(Object value) {
this.value = value;
}
@Override
public int getDepth() {
return 0;
}
public void set(Object value) {
this.value = value;
}
@Override
public Object get() {
return value;
}
@Override
public String toString() {
return value != null ? value.toString() : null;
}
}