add OBVSG test with fix fo subfield ID length

This commit is contained in:
Jörg Prante 2017-10-16 15:06:00 +02:00
parent d947c5cb81
commit ae5b85f923
11 changed files with 1583 additions and 1494 deletions

View file

@ -1,6 +1,6 @@
group = org.xbib
name = marc
version = 1.0.13
version = 1.0.14
xbib-content.version = 1.0.7
xbib-bibliographic-character-sets.version = 1.0.0

View file

@ -30,7 +30,6 @@ import org.xbib.marc.io.BufferedSeparatorInputStream;
import org.xbib.marc.io.BytesReference;
import org.xbib.marc.io.Chunk;
import org.xbib.marc.io.ChunkStream;
import org.xbib.marc.io.InformationSeparator;
import org.xbib.marc.label.RecordLabel;
import org.xbib.marc.label.RecordLabelFixer;
import org.xbib.marc.transformer.MarcTransformer;

View file

@ -467,7 +467,7 @@ public class MarcField implements Comparable<MarcField> {
/**
* Set subfield with help of record label information from raw data.
* @param label the record label
* @param raw the raw data
* @param raw the subfield, including ID and separator
* @return this builder
*/
public Builder subfield(RecordLabel label, String raw) {
@ -481,6 +481,27 @@ public class MarcField implements Comparable<MarcField> {
return this;
}
/**
* Set synthetic subfield with help of record label information from raw data.
* If the len of the subfield ID is zero or undefined, the dummy subfield ID is used.
* @param label the record label
* @param dummySubfieldId the dummy subfield ID
* @param value the subfield value
* @return this builder
*/
public Builder subfield(RecordLabel label, String dummySubfieldId, String value) {
int len = label.getSubfieldIdentifierLength() - 1;
if (len <= 0) {
subfields.add(new Subfield(dummySubfieldId, value));
subfieldIds.add(dummySubfieldId);
} else if (value.length() >= len) {
String id = value.substring(0, len);
subfields.add(new Subfield(id, value.substring(len)));
subfieldIds.add(id);
}
return this;
}
/**
* Set a new data field with help of a record label from raw data.
* @param label the record label

View file

@ -40,6 +40,8 @@ import java.util.List;
*/
public class MarcGenerator implements ChunkListener<byte[], BytesReference>, Closeable {
private static final String EMPTY = " ";
private String format;
private String type;
@ -166,11 +168,12 @@ public class MarcGenerator implements ChunkListener<byte[], BytesReference>, Clo
int pos = recordLabel.getIndicatorLength();
builder.indicator(this.data.substring(0, pos));
if (pos < this.data.length()) {
builder.subfield(" ", this.data.substring(pos));
builder.subfield(recordLabel, EMPTY, this.data.substring(pos));
}
}
} else {
boolean found = false;
// try more than one position
for (int offset = 1; offset < 5; offset++) {
if (directory.containsKey(position + offset)) {
position = position + offset;
@ -202,7 +205,7 @@ public class MarcGenerator implements ChunkListener<byte[], BytesReference>, Clo
break;
}
case US: /* 1f */{
builder.subfield(recordLabel, this.data);
builder.subfield(recordLabel, EMPTY, this.data);
break;
}
default: {

View file

@ -48,9 +48,7 @@ public class SWBTest {
try (InputStream in = getClass().getResource(file).openStream()) {
Marc.Builder builder = Marc.builder()
.setInputStream(in)
.setCharset(Charset.forName("UTF-8"))
.setFormat("Marc21")
.setType("Bibliographic");
.setCharset(Charset.forName("UTF-8"));
for (MarcRecord marcRecord : builder.iterable()) {
count++;
}

View file

@ -52,6 +52,10 @@ public class MabTest {
private final StringBuilder sb = new StringBuilder();
/**
* Shows how to override erraneous subfield ID length label settings, which is notorious to MAB dialect.
* @throws Exception if test faul
*/
@Test
public void testZDB() throws Exception {
String s = "1217zdbtit.dat";

View file

@ -0,0 +1,64 @@
package org.xbib.marc.dialects.mab;
import org.junit.Test;
import org.xbib.marc.Marc;
import org.xbib.marc.MarcRecord;
import org.xbib.marc.label.RecordLabel;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.concurrent.atomic.AtomicInteger;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
/**
*
*/
public class OBVSGTest {
@Test
public void testMarcStream() throws Exception {
String[] files = {
"obvsg1.mab",
"obvsg2.mab",
"obvsg3.mab"
};
for (String file : files) {
AtomicInteger count = new AtomicInteger();
try (InputStream in = getClass().getResource(file).openStream()) {
Marc marc = Marc.builder()
.setInputStream(in)
.setCharset(Charset.forName("UTF-8"))
.build();
marc.iso2709Stream().chunks().forEach(chunk -> {
count.incrementAndGet();
});
}
assertTrue(count.get() > 0);
}
}
@Test
public void testMarcRecordIterable() throws Exception {
String[] files = {
"obvsg1.mab",
"obvsg2.mab",
"obvsg3.mab"
};
for (String file : files) {
int count = 0;
try (InputStream in = getClass().getResource(file).openStream()) {
Marc.Builder builder = Marc.builder()
.setInputStream(in)
.setCharset(Charset.forName("x-MAB"))
.setRecordLabelFixer(recordLabel ->
RecordLabel.builder().from(recordLabel).setSubfieldIdentifierLength(0).build());
for (MarcRecord marcRecord : builder.iterable()) {
count++;
}
}
assertEquals(1, count);
}
}
}

View file

@ -1,15 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="info:lc/xmlns/marcxchange-v2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="info:lc/xmlns/marcxchange-v2 http://www.loc.gov/standards/iso25577/marcxchange-2-0.xsd">
<collection xmlns="info:lc/xmlns/marcxchange-v2" xsi:schemaLocation="info:lc/xmlns/marcxchange-v2 http://www.loc.gov/standards/iso25577/marcxchange-2-0.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<record format="MARC21" type="Bibliographic">
<leader>01794 0000445 000 </leader>
<controlfield tag="001">000800000020003000008020004200038035001200080005001700092008008100109010001700190016002200207029002200229035001200251035001600263040001800279042000800297043003000305049000900335050002200344072001200366110004800378245024900426246002200675260008500697300004100782504005100823650004300874650004300917650004300960650004301003650004101046610005901087700002001146700002501166938005301191994001201244910002601256991006601282</controlfield>
<datafield tag="260" ind1=" " ind2=" ">
<datafield ind2=" " ind1=" " tag="260">
<subfield code="a">0772</subfield>
</datafield>
<datafield tag="200" ind1=" " ind2=" ">
<datafield ind2=" " ind1=" " tag="200">
<subfield code="a">90311033800.0</subfield>
</datafield>
<datafield tag="051" ind1=" " ind2=" ">
<datafield ind2=" " ind1=" " tag="051">
<subfield code="a">028s 01609cam 22004094a 4500 </subfield>
</datafield>
</record>

File diff suppressed because it is too large Load diff

File diff suppressed because one or more lines are too long

View file

@ -24,7 +24,7 @@
<subfield code="c">Michael Chabon.</subfield>
</datafield>
<datafield ind2=" " ind1=" " tag="250">
<subfield code=" "> </subfield>
<subfield code=" "></subfield>
<subfield code="a">1st ed.</subfield>
</datafield>
<datafield ind2=" " ind1=" " tag="260">