add OBVSG test with fix fo subfield ID length

This commit is contained in:
Jörg Prante 2017-10-16 15:06:00 +02:00
parent d947c5cb81
commit ae5b85f923
11 changed files with 1583 additions and 1494 deletions

View file

@ -1,6 +1,6 @@
group = org.xbib group = org.xbib
name = marc name = marc
version = 1.0.13 version = 1.0.14
xbib-content.version = 1.0.7 xbib-content.version = 1.0.7
xbib-bibliographic-character-sets.version = 1.0.0 xbib-bibliographic-character-sets.version = 1.0.0

View file

@ -30,7 +30,6 @@ import org.xbib.marc.io.BufferedSeparatorInputStream;
import org.xbib.marc.io.BytesReference; import org.xbib.marc.io.BytesReference;
import org.xbib.marc.io.Chunk; import org.xbib.marc.io.Chunk;
import org.xbib.marc.io.ChunkStream; import org.xbib.marc.io.ChunkStream;
import org.xbib.marc.io.InformationSeparator;
import org.xbib.marc.label.RecordLabel; import org.xbib.marc.label.RecordLabel;
import org.xbib.marc.label.RecordLabelFixer; import org.xbib.marc.label.RecordLabelFixer;
import org.xbib.marc.transformer.MarcTransformer; import org.xbib.marc.transformer.MarcTransformer;

View file

@ -467,7 +467,7 @@ public class MarcField implements Comparable<MarcField> {
/** /**
* Set subfield with help of record label information from raw data. * Set subfield with help of record label information from raw data.
* @param label the record label * @param label the record label
* @param raw the raw data * @param raw the subfield, including ID and separator
* @return this builder * @return this builder
*/ */
public Builder subfield(RecordLabel label, String raw) { public Builder subfield(RecordLabel label, String raw) {
@ -481,6 +481,27 @@ public class MarcField implements Comparable<MarcField> {
return this; return this;
} }
/**
* Set synthetic subfield with help of record label information from raw data.
* If the len of the subfield ID is zero or undefined, the dummy subfield ID is used.
* @param label the record label
* @param dummySubfieldId the dummy subfield ID
* @param value the subfield value
* @return this builder
*/
public Builder subfield(RecordLabel label, String dummySubfieldId, String value) {
int len = label.getSubfieldIdentifierLength() - 1;
if (len <= 0) {
subfields.add(new Subfield(dummySubfieldId, value));
subfieldIds.add(dummySubfieldId);
} else if (value.length() >= len) {
String id = value.substring(0, len);
subfields.add(new Subfield(id, value.substring(len)));
subfieldIds.add(id);
}
return this;
}
/** /**
* Set a new data field with help of a record label from raw data. * Set a new data field with help of a record label from raw data.
* @param label the record label * @param label the record label

View file

@ -40,6 +40,8 @@ import java.util.List;
*/ */
public class MarcGenerator implements ChunkListener<byte[], BytesReference>, Closeable { public class MarcGenerator implements ChunkListener<byte[], BytesReference>, Closeable {
private static final String EMPTY = " ";
private String format; private String format;
private String type; private String type;
@ -166,11 +168,12 @@ public class MarcGenerator implements ChunkListener<byte[], BytesReference>, Clo
int pos = recordLabel.getIndicatorLength(); int pos = recordLabel.getIndicatorLength();
builder.indicator(this.data.substring(0, pos)); builder.indicator(this.data.substring(0, pos));
if (pos < this.data.length()) { if (pos < this.data.length()) {
builder.subfield(" ", this.data.substring(pos)); builder.subfield(recordLabel, EMPTY, this.data.substring(pos));
} }
} }
} else { } else {
boolean found = false; boolean found = false;
// try more than one position
for (int offset = 1; offset < 5; offset++) { for (int offset = 1; offset < 5; offset++) {
if (directory.containsKey(position + offset)) { if (directory.containsKey(position + offset)) {
position = position + offset; position = position + offset;
@ -202,7 +205,7 @@ public class MarcGenerator implements ChunkListener<byte[], BytesReference>, Clo
break; break;
} }
case US: /* 1f */{ case US: /* 1f */{
builder.subfield(recordLabel, this.data); builder.subfield(recordLabel, EMPTY, this.data);
break; break;
} }
default: { default: {

View file

@ -48,9 +48,7 @@ public class SWBTest {
try (InputStream in = getClass().getResource(file).openStream()) { try (InputStream in = getClass().getResource(file).openStream()) {
Marc.Builder builder = Marc.builder() Marc.Builder builder = Marc.builder()
.setInputStream(in) .setInputStream(in)
.setCharset(Charset.forName("UTF-8")) .setCharset(Charset.forName("UTF-8"));
.setFormat("Marc21")
.setType("Bibliographic");
for (MarcRecord marcRecord : builder.iterable()) { for (MarcRecord marcRecord : builder.iterable()) {
count++; count++;
} }

View file

@ -52,6 +52,10 @@ public class MabTest {
private final StringBuilder sb = new StringBuilder(); private final StringBuilder sb = new StringBuilder();
/**
* Shows how to override erraneous subfield ID length label settings, which is notorious to MAB dialect.
* @throws Exception if test faul
*/
@Test @Test
public void testZDB() throws Exception { public void testZDB() throws Exception {
String s = "1217zdbtit.dat"; String s = "1217zdbtit.dat";

View file

@ -0,0 +1,64 @@
package org.xbib.marc.dialects.mab;
import org.junit.Test;
import org.xbib.marc.Marc;
import org.xbib.marc.MarcRecord;
import org.xbib.marc.label.RecordLabel;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.concurrent.atomic.AtomicInteger;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
/**
*
*/
public class OBVSGTest {
@Test
public void testMarcStream() throws Exception {
String[] files = {
"obvsg1.mab",
"obvsg2.mab",
"obvsg3.mab"
};
for (String file : files) {
AtomicInteger count = new AtomicInteger();
try (InputStream in = getClass().getResource(file).openStream()) {
Marc marc = Marc.builder()
.setInputStream(in)
.setCharset(Charset.forName("UTF-8"))
.build();
marc.iso2709Stream().chunks().forEach(chunk -> {
count.incrementAndGet();
});
}
assertTrue(count.get() > 0);
}
}
@Test
public void testMarcRecordIterable() throws Exception {
String[] files = {
"obvsg1.mab",
"obvsg2.mab",
"obvsg3.mab"
};
for (String file : files) {
int count = 0;
try (InputStream in = getClass().getResource(file).openStream()) {
Marc.Builder builder = Marc.builder()
.setInputStream(in)
.setCharset(Charset.forName("x-MAB"))
.setRecordLabelFixer(recordLabel ->
RecordLabel.builder().from(recordLabel).setSubfieldIdentifierLength(0).build());
for (MarcRecord marcRecord : builder.iterable()) {
count++;
}
}
assertEquals(1, count);
}
}
}

View file

@ -1,15 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="info:lc/xmlns/marcxchange-v2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="info:lc/xmlns/marcxchange-v2 http://www.loc.gov/standards/iso25577/marcxchange-2-0.xsd"> <collection xmlns="info:lc/xmlns/marcxchange-v2" xsi:schemaLocation="info:lc/xmlns/marcxchange-v2 http://www.loc.gov/standards/iso25577/marcxchange-2-0.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<record format="MARC21" type="Bibliographic"> <record format="MARC21" type="Bibliographic">
<leader>01794 0000445 000 </leader> <leader>01794 0000445 000 </leader>
<controlfield tag="001">000800000020003000008020004200038035001200080005001700092008008100109010001700190016002200207029002200229035001200251035001600263040001800279042000800297043003000305049000900335050002200344072001200366110004800378245024900426246002200675260008500697300004100782504005100823650004300874650004300917650004300960650004301003650004101046610005901087700002001146700002501166938005301191994001201244910002601256991006601282</controlfield> <controlfield tag="001">000800000020003000008020004200038035001200080005001700092008008100109010001700190016002200207029002200229035001200251035001600263040001800279042000800297043003000305049000900335050002200344072001200366110004800378245024900426246002200675260008500697300004100782504005100823650004300874650004300917650004300960650004301003650004101046610005901087700002001146700002501166938005301191994001201244910002601256991006601282</controlfield>
<datafield tag="260" ind1=" " ind2=" "> <datafield ind2=" " ind1=" " tag="260">
<subfield code="a">0772</subfield> <subfield code="a">0772</subfield>
</datafield> </datafield>
<datafield tag="200" ind1=" " ind2=" "> <datafield ind2=" " ind1=" " tag="200">
<subfield code="a">90311033800.0</subfield> <subfield code="a">90311033800.0</subfield>
</datafield> </datafield>
<datafield tag="051" ind1=" " ind2=" "> <datafield ind2=" " ind1=" " tag="051">
<subfield code="a">028s 01609cam 22004094a 4500 </subfield> <subfield code="a">028s 01609cam 22004094a 4500 </subfield>
</datafield> </datafield>
</record> </record>

File diff suppressed because it is too large Load diff

File diff suppressed because one or more lines are too long

View file

@ -24,7 +24,7 @@
<subfield code="c">Michael Chabon.</subfield> <subfield code="c">Michael Chabon.</subfield>
</datafield> </datafield>
<datafield ind2=" " ind1=" " tag="250"> <datafield ind2=" " ind1=" " tag="250">
<subfield code=" "> </subfield> <subfield code=" "></subfield>
<subfield code="a">1st ed.</subfield> <subfield code="a">1st ed.</subfield>
</datafield> </datafield>
<datafield ind2=" " ind1=" " tag="260"> <datafield ind2=" " ind1=" " tag="260">