add OBVSG test with fix fo subfield ID length
This commit is contained in:
parent
d947c5cb81
commit
ae5b85f923
11 changed files with 1583 additions and 1494 deletions
|
@ -1,6 +1,6 @@
|
|||
group = org.xbib
|
||||
name = marc
|
||||
version = 1.0.13
|
||||
version = 1.0.14
|
||||
|
||||
xbib-content.version = 1.0.7
|
||||
xbib-bibliographic-character-sets.version = 1.0.0
|
||||
|
|
|
@ -30,7 +30,6 @@ import org.xbib.marc.io.BufferedSeparatorInputStream;
|
|||
import org.xbib.marc.io.BytesReference;
|
||||
import org.xbib.marc.io.Chunk;
|
||||
import org.xbib.marc.io.ChunkStream;
|
||||
import org.xbib.marc.io.InformationSeparator;
|
||||
import org.xbib.marc.label.RecordLabel;
|
||||
import org.xbib.marc.label.RecordLabelFixer;
|
||||
import org.xbib.marc.transformer.MarcTransformer;
|
||||
|
|
|
@ -467,7 +467,7 @@ public class MarcField implements Comparable<MarcField> {
|
|||
/**
|
||||
* Set subfield with help of record label information from raw data.
|
||||
* @param label the record label
|
||||
* @param raw the raw data
|
||||
* @param raw the subfield, including ID and separator
|
||||
* @return this builder
|
||||
*/
|
||||
public Builder subfield(RecordLabel label, String raw) {
|
||||
|
@ -481,6 +481,27 @@ public class MarcField implements Comparable<MarcField> {
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set synthetic subfield with help of record label information from raw data.
|
||||
* If the len of the subfield ID is zero or undefined, the dummy subfield ID is used.
|
||||
* @param label the record label
|
||||
* @param dummySubfieldId the dummy subfield ID
|
||||
* @param value the subfield value
|
||||
* @return this builder
|
||||
*/
|
||||
public Builder subfield(RecordLabel label, String dummySubfieldId, String value) {
|
||||
int len = label.getSubfieldIdentifierLength() - 1;
|
||||
if (len <= 0) {
|
||||
subfields.add(new Subfield(dummySubfieldId, value));
|
||||
subfieldIds.add(dummySubfieldId);
|
||||
} else if (value.length() >= len) {
|
||||
String id = value.substring(0, len);
|
||||
subfields.add(new Subfield(id, value.substring(len)));
|
||||
subfieldIds.add(id);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a new data field with help of a record label from raw data.
|
||||
* @param label the record label
|
||||
|
|
|
@ -40,6 +40,8 @@ import java.util.List;
|
|||
*/
|
||||
public class MarcGenerator implements ChunkListener<byte[], BytesReference>, Closeable {
|
||||
|
||||
private static final String EMPTY = " ";
|
||||
|
||||
private String format;
|
||||
|
||||
private String type;
|
||||
|
@ -166,11 +168,12 @@ public class MarcGenerator implements ChunkListener<byte[], BytesReference>, Clo
|
|||
int pos = recordLabel.getIndicatorLength();
|
||||
builder.indicator(this.data.substring(0, pos));
|
||||
if (pos < this.data.length()) {
|
||||
builder.subfield(" ", this.data.substring(pos));
|
||||
builder.subfield(recordLabel, EMPTY, this.data.substring(pos));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
boolean found = false;
|
||||
// try more than one position
|
||||
for (int offset = 1; offset < 5; offset++) {
|
||||
if (directory.containsKey(position + offset)) {
|
||||
position = position + offset;
|
||||
|
@ -202,7 +205,7 @@ public class MarcGenerator implements ChunkListener<byte[], BytesReference>, Clo
|
|||
break;
|
||||
}
|
||||
case US: /* 1f */{
|
||||
builder.subfield(recordLabel, this.data);
|
||||
builder.subfield(recordLabel, EMPTY, this.data);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
|
|
|
@ -48,9 +48,7 @@ public class SWBTest {
|
|||
try (InputStream in = getClass().getResource(file).openStream()) {
|
||||
Marc.Builder builder = Marc.builder()
|
||||
.setInputStream(in)
|
||||
.setCharset(Charset.forName("UTF-8"))
|
||||
.setFormat("Marc21")
|
||||
.setType("Bibliographic");
|
||||
.setCharset(Charset.forName("UTF-8"));
|
||||
for (MarcRecord marcRecord : builder.iterable()) {
|
||||
count++;
|
||||
}
|
||||
|
|
|
@ -52,6 +52,10 @@ public class MabTest {
|
|||
|
||||
private final StringBuilder sb = new StringBuilder();
|
||||
|
||||
/**
|
||||
* Shows how to override erraneous subfield ID length label settings, which is notorious to MAB dialect.
|
||||
* @throws Exception if test faul
|
||||
*/
|
||||
@Test
|
||||
public void testZDB() throws Exception {
|
||||
String s = "1217zdbtit.dat";
|
||||
|
|
64
src/test/java/org/xbib/marc/dialects/mab/OBVSGTest.java
Normal file
64
src/test/java/org/xbib/marc/dialects/mab/OBVSGTest.java
Normal file
|
@ -0,0 +1,64 @@
|
|||
package org.xbib.marc.dialects.mab;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.xbib.marc.Marc;
|
||||
import org.xbib.marc.MarcRecord;
|
||||
import org.xbib.marc.label.RecordLabel;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class OBVSGTest {
|
||||
|
||||
@Test
|
||||
public void testMarcStream() throws Exception {
|
||||
String[] files = {
|
||||
"obvsg1.mab",
|
||||
"obvsg2.mab",
|
||||
"obvsg3.mab"
|
||||
};
|
||||
for (String file : files) {
|
||||
AtomicInteger count = new AtomicInteger();
|
||||
try (InputStream in = getClass().getResource(file).openStream()) {
|
||||
Marc marc = Marc.builder()
|
||||
.setInputStream(in)
|
||||
.setCharset(Charset.forName("UTF-8"))
|
||||
.build();
|
||||
marc.iso2709Stream().chunks().forEach(chunk -> {
|
||||
count.incrementAndGet();
|
||||
});
|
||||
}
|
||||
assertTrue(count.get() > 0);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMarcRecordIterable() throws Exception {
|
||||
String[] files = {
|
||||
"obvsg1.mab",
|
||||
"obvsg2.mab",
|
||||
"obvsg3.mab"
|
||||
};
|
||||
for (String file : files) {
|
||||
int count = 0;
|
||||
try (InputStream in = getClass().getResource(file).openStream()) {
|
||||
Marc.Builder builder = Marc.builder()
|
||||
.setInputStream(in)
|
||||
.setCharset(Charset.forName("x-MAB"))
|
||||
.setRecordLabelFixer(recordLabel ->
|
||||
RecordLabel.builder().from(recordLabel).setSubfieldIdentifierLength(0).build());
|
||||
for (MarcRecord marcRecord : builder.iterable()) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
assertEquals(1, count);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,15 +1,15 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<collection xmlns="info:lc/xmlns/marcxchange-v2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="info:lc/xmlns/marcxchange-v2 http://www.loc.gov/standards/iso25577/marcxchange-2-0.xsd">
|
||||
<collection xmlns="info:lc/xmlns/marcxchange-v2" xsi:schemaLocation="info:lc/xmlns/marcxchange-v2 http://www.loc.gov/standards/iso25577/marcxchange-2-0.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
<record format="MARC21" type="Bibliographic">
|
||||
<leader>01794 0000445 000 </leader>
|
||||
<controlfield tag="001">000800000020003000008020004200038035001200080005001700092008008100109010001700190016002200207029002200229035001200251035001600263040001800279042000800297043003000305049000900335050002200344072001200366110004800378245024900426246002200675260008500697300004100782504005100823650004300874650004300917650004300960650004301003650004101046610005901087700002001146700002501166938005301191994001201244910002601256991006601282</controlfield>
|
||||
<datafield tag="260" ind1=" " ind2=" ">
|
||||
<datafield ind2=" " ind1=" " tag="260">
|
||||
<subfield code="a">0772</subfield>
|
||||
</datafield>
|
||||
<datafield tag="200" ind1=" " ind2=" ">
|
||||
<datafield ind2=" " ind1=" " tag="200">
|
||||
<subfield code="a">90311033800.0</subfield>
|
||||
</datafield>
|
||||
<datafield tag="051" ind1=" " ind2=" ">
|
||||
<datafield ind2=" " ind1=" " tag="051">
|
||||
<subfield code="a">028s 01609cam 22004094a 4500 </subfield>
|
||||
</datafield>
|
||||
</record>
|
||||
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because one or more lines are too long
|
@ -24,7 +24,7 @@
|
|||
<subfield code="c">Michael Chabon.</subfield>
|
||||
</datafield>
|
||||
<datafield ind2=" " ind1=" " tag="250">
|
||||
<subfield code=" "> </subfield>
|
||||
<subfield code=" "></subfield>
|
||||
<subfield code="a">1st ed.</subfield>
|
||||
</datafield>
|
||||
<datafield ind2=" " ind1=" " tag="260">
|
||||
|
|
Loading…
Reference in a new issue