make replace faster in strict field validation, remove spurious replaces
This commit is contained in:
parent
9950003ea7
commit
6ae3134827
6 changed files with 67 additions and 101 deletions
|
@ -47,15 +47,13 @@ public class StrictMarcFieldValidator implements MarcFieldValidator {
|
|||
public String validateTag(String tagCandidate) {
|
||||
String tag = tagCandidate;
|
||||
if (tag != null) {
|
||||
// We have inconsistent use of tag symbols as placeholders for a "blank space"
|
||||
// and we need to fix it here for consistency.
|
||||
tag = tag.replace('-', BLANK)
|
||||
.replace('#', BLANK)
|
||||
.replace('.', BLANK)
|
||||
.replace('_', BLANK);
|
||||
// do not allow empty tags
|
||||
if (tag.isEmpty()) {
|
||||
tag = BLANK_TAG;
|
||||
} else {
|
||||
// We have inconsistent use of tag symbols as placeholders for a "blank space"
|
||||
// and we need to fix it here for consistency.
|
||||
tag = tag.replaceAll("[-#.^_]", BLANK_STRING);
|
||||
}
|
||||
}
|
||||
return tag;
|
||||
|
@ -65,16 +63,13 @@ public class StrictMarcFieldValidator implements MarcFieldValidator {
|
|||
public String validateIndicator(String indicatorCandidate) {
|
||||
String indicator = indicatorCandidate;
|
||||
if (indicator != null) {
|
||||
// We have inconsistent use of indicator symbols as placeholders for a "blank space"
|
||||
// and we need to fix it here for consistency.
|
||||
indicator = indicator
|
||||
.replace('-', BLANK)
|
||||
.replace('#', BLANK)
|
||||
.replace('.', BLANK)
|
||||
.replace('_', BLANK);
|
||||
// we do not allow an empty indicator. Elasticsearch field names require a length > 0.
|
||||
if (indicator.isEmpty()) {
|
||||
indicator = BLANK_STRING;
|
||||
} else {
|
||||
// We have inconsistent use of indicator symbols as placeholders for a "blank space"
|
||||
// and we need to fix it here for consistency.
|
||||
indicator = indicator.replaceAll("[-#.^_]", BLANK_STRING);
|
||||
}
|
||||
}
|
||||
return indicator;
|
||||
|
@ -84,15 +79,13 @@ public class StrictMarcFieldValidator implements MarcFieldValidator {
|
|||
public String validateSubfieldId(String subfieldIdCandidate) {
|
||||
String id = subfieldIdCandidate;
|
||||
if (id != null) {
|
||||
// We have inconsistent use of subfield id symbols as placeholders for a "blank space"
|
||||
// and we need to fix it here for consistency.
|
||||
id = id.replace('-', BLANK)
|
||||
.replace('#', BLANK)
|
||||
.replace('.', BLANK)
|
||||
.replace('_', BLANK);
|
||||
// we do not allow an empty subfield id. Elasticsearch field names require a length > 0.
|
||||
if (id.isEmpty()) {
|
||||
id = BLANK_STRING;
|
||||
} else {
|
||||
// We have inconsistent use of subfield id symbols as placeholders for a "blank space"
|
||||
// and we need to fix it here for consistency.
|
||||
id = id.replaceAll("[-#.^_]", BLANK_STRING);
|
||||
}
|
||||
}
|
||||
return id;
|
||||
|
|
|
@ -73,7 +73,7 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo
|
|||
|
||||
private boolean fatalErrors;
|
||||
|
||||
private EnumSet<Style> style;
|
||||
private final EnumSet<Style> style;
|
||||
|
||||
private Exception exception;
|
||||
|
||||
|
@ -83,7 +83,7 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo
|
|||
|
||||
private int splitlimit;
|
||||
|
||||
private int bufferSize;
|
||||
private final int bufferSize;
|
||||
|
||||
private boolean compress;
|
||||
|
||||
|
@ -143,44 +143,6 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo
|
|||
newWriter(fileNamePattern, fileNameCounter, bufferSize, compress);
|
||||
}
|
||||
|
||||
private static String escape(String value) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int i = 0; i < value.length(); i++) {
|
||||
char c = value.charAt(i);
|
||||
switch (c) {
|
||||
case '"':
|
||||
sb.append("\\\"");
|
||||
break;
|
||||
case '\\':
|
||||
sb.append("\\\\");
|
||||
break;
|
||||
case '\b':
|
||||
sb.append("\\b");
|
||||
break;
|
||||
case '\f':
|
||||
sb.append("\\f");
|
||||
break;
|
||||
case '\n':
|
||||
sb.append("\\n");
|
||||
break;
|
||||
case '\r':
|
||||
sb.append("\\r");
|
||||
break;
|
||||
case '\t':
|
||||
sb.append("\\t");
|
||||
break;
|
||||
default:
|
||||
if (c < 0x1f) {
|
||||
sb.append("\\u").append(String.format("%04x", (int) c));
|
||||
} else {
|
||||
sb.append(c);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public MarcJsonWriter setIndex(String index, String indexType) {
|
||||
this.index = index;
|
||||
this.indexType = indexType;
|
||||
|
@ -375,7 +337,7 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo
|
|||
jsonWriter.writeArrayClose();
|
||||
} else {
|
||||
jsonWriter.writeObjectOpen();
|
||||
jsonWriter.writeMemberName(marcField.getIndicator().replace(' ', '_'));
|
||||
jsonWriter.writeMemberName(marcField.getIndicator());
|
||||
jsonWriter.writeMemberSeparator();
|
||||
jsonWriter.writeArrayOpen();
|
||||
boolean subfieldseparator = false;
|
||||
|
@ -399,7 +361,7 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo
|
|||
}
|
||||
|
||||
/**
|
||||
* Write MARC record from underlying map as key-oriented JSON.
|
||||
* Write MARC record from underlying map as key-oriented JSON. Use repeat maps to create lists.
|
||||
* @param marcRecord the MARC record
|
||||
* @throws IOException if writing fails
|
||||
*/
|
||||
|
@ -583,15 +545,16 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo
|
|||
jsonWriter = new JsonWriter(writer);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private void writeMetaDataLine(MarcRecord marcRecord) {
|
||||
String id;
|
||||
Object object = marcRecord.get("001");
|
||||
// step down to indicator/subfield ID levels if possible, get first value, assuming single field/value in 001
|
||||
if (object instanceof Map) {
|
||||
object = ((Map) object).values().iterator().next();
|
||||
object = ((Map<String, Object>) object).values().iterator().next();
|
||||
}
|
||||
if (object instanceof Map) {
|
||||
object = ((Map) object).values().iterator().next();
|
||||
object = ((Map<String, Object>) object).values().iterator().next();
|
||||
}
|
||||
id = object.toString();
|
||||
if (index != null && indexType != null && id != null) {
|
||||
|
@ -614,6 +577,44 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo
|
|||
}
|
||||
}
|
||||
|
||||
private static String escape(String value) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int i = 0; i < value.length(); i++) {
|
||||
char c = value.charAt(i);
|
||||
switch (c) {
|
||||
case '"':
|
||||
sb.append("\\\"");
|
||||
break;
|
||||
case '\\':
|
||||
sb.append("\\\\");
|
||||
break;
|
||||
case '\b':
|
||||
sb.append("\\b");
|
||||
break;
|
||||
case '\f':
|
||||
sb.append("\\f");
|
||||
break;
|
||||
case '\n':
|
||||
sb.append("\\n");
|
||||
break;
|
||||
case '\r':
|
||||
sb.append("\\r");
|
||||
break;
|
||||
case '\t':
|
||||
sb.append("\\t");
|
||||
break;
|
||||
default:
|
||||
if (c < 0x1f) {
|
||||
sb.append("\\u").append(String.format("%04x", (int) c));
|
||||
} else {
|
||||
sb.append(c);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
|
|
@ -133,19 +133,7 @@ public class InverseMarcContentHandler implements MarcListener, MarcXchangeConst
|
|||
attrs.addAttribute(EMPTY_STRING, TAG_ATTRIBUTE, prefix(TAG_ATTRIBUTE), CDATA, field.getTag());
|
||||
contentHandler.startElement(nsUri, CONTROLFIELD, prefix(CONTROLFIELD), attrs);
|
||||
String value = field.getValue();
|
||||
if (value != null && !value.isEmpty()) {
|
||||
switch (field.getTag()) {
|
||||
case "006":
|
||||
case "007":
|
||||
case "008":
|
||||
// fix wrong fill characters here
|
||||
value = value.replace('^', '|');
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
contentHandler.characters(value.toCharArray(), 0, value.length());
|
||||
}
|
||||
contentHandler.characters(value.toCharArray(), 0, value.length());
|
||||
contentHandler.endElement(nsUri, CONTROLFIELD, prefix(CONTROLFIELD));
|
||||
} else {
|
||||
String tag = field.getTag();
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.ByteArrayInputStream;
|
|||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.net.URL;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.Channels;
|
||||
import java.nio.channels.ReadableByteChannel;
|
||||
|
@ -91,31 +92,15 @@ public class StreamMatcher {
|
|||
}
|
||||
}
|
||||
|
||||
public static void compareTwoXmls(Class<?> cl, String resourceName,
|
||||
String suffix1, Producer producer1,
|
||||
String suffix2, Producer producer2) throws IOException {
|
||||
Path path1 = Files.createTempFile(resourceName, suffix1);
|
||||
Path path2 = Files.createTempFile(resourceName, suffix2);
|
||||
try (InputStream inputStream1 = cl.getResource(resourceName).openStream();
|
||||
OutputStream outputStream1 = Files.newOutputStream(path1);
|
||||
InputStream inputStream2 = Files.newInputStream(path1);
|
||||
OutputStream outputStream2 = Files.newOutputStream(path2)) {
|
||||
producer1.produce(inputStream1, outputStream1);
|
||||
producer2.produce(inputStream2, outputStream2);
|
||||
assertThat("XML check of " + path1, path1, CompareMatcher.isIdenticalTo(cl.getResource(resourceName + suffix1).openStream()));
|
||||
assertThat("XML check of " + path2, path2, CompareMatcher.isIdenticalTo(cl.getResource(resourceName + suffix2).openStream()));
|
||||
} finally {
|
||||
Files.delete(path1);
|
||||
Files.delete(path2);
|
||||
}
|
||||
}
|
||||
|
||||
public static void generate(Class<?> cl, String resourceName, String suffix, Producer producer) throws IOException {
|
||||
Path path = Paths.get("src/test/resources", cl.getPackageName().replace('.', '/'), resourceName + suffix);
|
||||
logger.log(Level.INFO, "path = " + path);
|
||||
try (InputStream inputStream = cl.getResource(resourceName).openStream();
|
||||
OutputStream outputStream = Files.newOutputStream(path)) {
|
||||
producer.produce(inputStream, outputStream);
|
||||
URL url = cl.getResource(resourceName);
|
||||
if (url != null) {
|
||||
try (InputStream inputStream = url.openStream();
|
||||
OutputStream outputStream = Files.newOutputStream(path)) {
|
||||
producer.produce(inputStream, outputStream);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -352,8 +352,7 @@ public class MarcJsonWriterTest {
|
|||
"test_ubl.mrc"
|
||||
}) {
|
||||
StreamMatcher.fileMatch(getClass(), s, ".json", (inputStream, outputStream) -> {
|
||||
try (MarcJsonWriter writer = new MarcJsonWriter(outputStream, EnumSet.of(MarcJsonWriter.Style.ALLOW_DUPLICATES))
|
||||
) {
|
||||
try (MarcJsonWriter writer = new MarcJsonWriter(outputStream, EnumSet.of(MarcJsonWriter.Style.ALLOW_DUPLICATES))) {
|
||||
Marc.builder()
|
||||
.setFormat(MarcXchangeConstants.MARCXCHANGE_FORMAT)
|
||||
.setType(MarcXchangeConstants.BIBLIOGRAPHIC_TYPE)
|
||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in a new issue