add ISSN example

This commit is contained in:
Jörg Prante 2017-03-06 22:42:57 +01:00
parent b825b056a2
commit 6c80bd052f
6 changed files with 208 additions and 2 deletions

View file

@ -122,7 +122,6 @@ try (MarcJsonWriter writer = new MarcJsonWriter("bulk%d.jsonl.gz", 10000,
.writeCollection();
}
----
where the result can be indexed by a simple bash script using `curl`, because our JSON
@ -148,6 +147,73 @@ By executing `curl localhost:9200/_search?pretty` you can examine the result.
image:{img}/marcxchange-in-elasticsearch.png[]
### Example: finding all ISSNs
This Java program scans through a MARC file, checks for ISSN values, and collects them in
JSON format (the library `org.xbib:content-core:1.0.7` is used for JSON formatting)
[source,java]
----
public void findISSNs() throws IOException {
Map<String, List<Map<String, String>>> result = new TreeMap<>();
// set up MARC listener
MarcListener marcListener = new MarcFieldAdapter() {
@Override
public void field(MarcField field) {
Collection<Map<String, String>> values = field.getSubfields().stream()
.filter(f -> matchISSNField(field, f))
.map(f -> Collections.singletonMap(f.getId(), f.getValue()))
.collect(Collectors.toList());
if (!values.isEmpty()) {
result.putIfAbsent(field.getTag(), new ArrayList<>());
List<Map<String, String>> list = result.get(field.getTag());
list.addAll(values);
result.put(field.getTag(), list);
}
}
};
// read MARC file
Marc.builder()
.setInputStream(getClass().getResource("issns.mrc").openStream())
.setMarcListener(marcListener)
.build()
.writeCollection();
// collect ISSNs
List<String> issns = result.values().stream()
.map(l -> l.stream()
.map(m -> m.values().iterator().next())
.collect(Collectors.toList()))
.flatMap(List::stream)
.distinct()
.collect(Collectors.toList());
// JSON output
XContentBuilder builder = contentBuilder().prettyPrint()
.startObject();
for (Map.Entry<String, List<Map<String, String>>> entry : result.entrySet()) {
builder.field(entry.getKey(), entry.getValue());
}
builder.array("issns", issns);
builder.endObject();
logger.log(Level.INFO, builder.string());
}
private static boolean matchISSNField(MarcField field, MarcField.Subfield subfield) {
switch (field.getTag()) {
case "011": {
return "a".equals(subfield.getId()) || "f".equals(subfield.getId());
}
case "421":
case "451":
case "452":
case "488":
return "x".equals(subfield.getId());
}
return false;
}
----
## Bibliographic character sets
Bibliographic character sets predate the era of Unicode. Before Unicode, characters sets were

View file

@ -40,6 +40,7 @@ dependencies {
testCompile "org.xmlunit:xmlunit-matchers:${project.property('xmlunit-matchers.version')}"
testCompile "com.github.stefanbirkner:system-rules:${project.property('system-rules.version')}"
testCompile "org.xbib:bibliographic-character-sets:${project.property('bibliographic-character-sets.version')}"
testCompile "org.xbib:content-core:${project.property('content-core.version')}"
asciidoclet "org.asciidoctor:asciidoclet:${project.property('asciidoclet.version')}"
wagon "org.apache.maven.wagon:wagon-ssh-external:${project.property('wagon-ssh-external.version')}"
}

View file

@ -1,11 +1,12 @@
group = org.xbib
name = marc
version = 1.0.10
version = 1.0.11
junit.version = 4.12
xalan.version = 2.7.2
xmlunit-matchers.version = 2.3.0
system-rules.version = 1.16.0
bibliographic-character-sets.version = 1.0.0
content-core.version = 1.0.7
asciidoclet.version = 1.5.4
wagon-ssh-external.version = 2.10

View file

@ -0,0 +1,46 @@
/*
Copyright 2016 Jörg Prante
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.xbib.marc;
/**
* A MARC listener with empty methods. Useful for extending.
*/
public class MarcFieldAdapter implements MarcListener {
@Override
public void beginCollection() {
}
@Override
public void beginRecord(String format, String type) {
}
@Override
public void leader(String label) {
}
@Override
public void field(MarcField field) {
}
@Override
public void endRecord() {
}
@Override
public void endCollection() {
}
}

View file

@ -0,0 +1,91 @@
package org.xbib.marc.filter;
import static org.xbib.content.json.JsonXContent.contentBuilder;
import org.junit.Test;
import org.xbib.content.XContentBuilder;
import org.xbib.marc.Marc;
import org.xbib.marc.MarcField;
import org.xbib.marc.MarcFieldAdapter;
import org.xbib.marc.MarcListener;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
/**
* Demo of collecting ISSNs from a MARC file.
*
* "issns.mrc" courtesy of Steven Hirren (steven.hirren.gmail.com)
*/
public class MarcFieldFilterTest {
private static final Logger logger = Logger.getLogger(MarcFieldFilterTest.class.getName());
@Test
public void findISSNs() throws IOException {
Map<String, List<Map<String, String>>> result = new TreeMap<>();
// set up MARC listener
MarcListener marcListener = new MarcFieldAdapter() {
@Override
public void field(MarcField field) {
Collection<Map<String, String>> values = field.getSubfields().stream()
.filter(f -> matchISSNField(field, f))
.map(f -> Collections.singletonMap(f.getId(), f.getValue()))
.collect(Collectors.toList());
if (!values.isEmpty()) {
result.putIfAbsent(field.getTag(), new ArrayList<>());
List<Map<String, String>> list = result.get(field.getTag());
list.addAll(values);
result.put(field.getTag(), list);
}
}
};
// read MARC file
Marc.builder()
.setInputStream(getClass().getResource("issns.mrc").openStream())
.setMarcListener(marcListener)
.build()
.writeCollection();
// collect ISSNs
List<String> issns = result.values().stream()
.map(l -> l.stream()
.map(m -> m.values().iterator().next())
.collect(Collectors.toList()))
.flatMap(List::stream)
.distinct()
.collect(Collectors.toList());
// JSON output
XContentBuilder builder = contentBuilder().prettyPrint()
.startObject();
for (Map.Entry<String, List<Map<String, String>>> entry : result.entrySet()) {
builder.field(entry.getKey(), entry.getValue());
}
builder.array("issns", issns);
builder.endObject();
logger.log(Level.INFO, builder.string());
}
private static boolean matchISSNField(MarcField field, MarcField.Subfield subfield) {
switch (field.getTag()) {
case "011": {
return "a".equals(subfield.getId()) || "f".equals(subfield.getId());
}
case "421":
case "451":
case "452":
case "488":
return "x".equals(subfield.getId());
}
return false;
}
}

View file

@ -0,0 +1 @@
00675cas a2200253 a 450 001000300000005001300003011002500016035001900041100004100060102000700101105001800108106000600126110001600132200002200148207003100170210004500201452004200246530002200288531001600310676001600326801001300342801001900355856004700374202013060511330 a0261-3794f0261-3794 a(ISSN)02613794 a20130605a19829999 ba aGB a 1  ar aaha 1 aElectoral studies0 aVol. 1, no. 1 (Apr. 1982)- aGuildfordcButterworth Scientificd1982-11tElectoral studies (Online)x1873-6890 aElectoral studies aElect. stud a324.605v19 0aFRbISSN 2aGRbEKTgAACR24 uhttps://eskep.ekt.gr/eskep/journal/show/2000874cas a2200301 450 001000300000005001300003011002500016035001900041040001100060100004100071102000700112105001800119106000600137110001600143200003400159207002100193210004600214321003400260452005400294530003400348531001900382676001100401711003000412801001300442801001900455856005100474856004700525402013060511270 a0001-4842f0001-4842 a(ISSN)00014842 aACHRE4 a20130605a19689999 ba aUS a 1  ar aafa 1 aAccounts of chemical research0 av. 1- Jan. 1968- aWashington, DCcAmerican Chemical Society aChemical abstractsx0009-225810tAccounts of chemical research (Online)x1520-4898 aAccounts of chemical research aAcc. chem. res a540/.5 2aAmerican Chemical Society 0aFRbISSN 2aGRbEKTgAACR24 uhttp://pubs.acs.org/journals/achre4/index.html4 uhttps://eskep.ekt.gr/eskep/journal/show/4000927cas a2200301 i 450 001000300000005001300003011002500016035001900041100004100060102000700101106000600108110001600114200002300130210003000153421004800183421003500231421004100266434004300307434004800350452005400398530002300452531001800475675002400493711002900517801001300546801001900559856004700578602013060511310 a0001-5342f0001-5342 a(ISSN)00015342 a20130605a19359999 | ba aNL ar aah 1 aActa biotheoretica aLeidencRijksuniversiteit 0tActa biotheoretica. Supplementumx0169-7242 0tFolia biotheoreticax0920-2676 1tBibliotheca biotheoreticax0373-3408 tBibliographia biotheoreticax0920-2684 tActa biotheoretica. Supplementumx0169-724211tActa biotheoretica (Dordrecht. Online)x1572-8358 aActa biotheoretica aActa biotheor a573v15th Dutch ed. 2aRijksuniversiteit Leiden 0aFRbISSN 2aGRbEKTgAACR24 uhttps://eskep.ekt.gr/eskep/journal/show/60