add ISSN example
This commit is contained in:
parent
b825b056a2
commit
6c80bd052f
6 changed files with 208 additions and 2 deletions
68
README.adoc
68
README.adoc
|
@ -122,7 +122,6 @@ try (MarcJsonWriter writer = new MarcJsonWriter("bulk%d.jsonl.gz", 10000,
|
||||||
.writeCollection();
|
.writeCollection();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
----
|
----
|
||||||
|
|
||||||
where the result can be indexed by a simple bash script using `curl`, because our JSON
|
where the result can be indexed by a simple bash script using `curl`, because our JSON
|
||||||
|
@ -148,6 +147,73 @@ By executing `curl localhost:9200/_search?pretty` you can examine the result.
|
||||||
|
|
||||||
image:{img}/marcxchange-in-elasticsearch.png[]
|
image:{img}/marcxchange-in-elasticsearch.png[]
|
||||||
|
|
||||||
|
### Example: finding all ISSNs
|
||||||
|
|
||||||
|
This Java program scans through a MARC file, checks for ISSN values, and collects them in
|
||||||
|
JSON format (the library `org.xbib:content-core:1.0.7` is used for JSON formatting)
|
||||||
|
|
||||||
|
[source,java]
|
||||||
|
----
|
||||||
|
public void findISSNs() throws IOException {
|
||||||
|
Map<String, List<Map<String, String>>> result = new TreeMap<>();
|
||||||
|
// set up MARC listener
|
||||||
|
MarcListener marcListener = new MarcFieldAdapter() {
|
||||||
|
@Override
|
||||||
|
public void field(MarcField field) {
|
||||||
|
Collection<Map<String, String>> values = field.getSubfields().stream()
|
||||||
|
.filter(f -> matchISSNField(field, f))
|
||||||
|
.map(f -> Collections.singletonMap(f.getId(), f.getValue()))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
if (!values.isEmpty()) {
|
||||||
|
result.putIfAbsent(field.getTag(), new ArrayList<>());
|
||||||
|
List<Map<String, String>> list = result.get(field.getTag());
|
||||||
|
list.addAll(values);
|
||||||
|
result.put(field.getTag(), list);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
// read MARC file
|
||||||
|
Marc.builder()
|
||||||
|
.setInputStream(getClass().getResource("issns.mrc").openStream())
|
||||||
|
.setMarcListener(marcListener)
|
||||||
|
.build()
|
||||||
|
.writeCollection();
|
||||||
|
// collect ISSNs
|
||||||
|
List<String> issns = result.values().stream()
|
||||||
|
.map(l -> l.stream()
|
||||||
|
.map(m -> m.values().iterator().next())
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.flatMap(List::stream)
|
||||||
|
.distinct()
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
// JSON output
|
||||||
|
XContentBuilder builder = contentBuilder().prettyPrint()
|
||||||
|
.startObject();
|
||||||
|
for (Map.Entry<String, List<Map<String, String>>> entry : result.entrySet()) {
|
||||||
|
builder.field(entry.getKey(), entry.getValue());
|
||||||
|
}
|
||||||
|
builder.array("issns", issns);
|
||||||
|
builder.endObject();
|
||||||
|
|
||||||
|
logger.log(Level.INFO, builder.string());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean matchISSNField(MarcField field, MarcField.Subfield subfield) {
|
||||||
|
switch (field.getTag()) {
|
||||||
|
case "011": {
|
||||||
|
return "a".equals(subfield.getId()) || "f".equals(subfield.getId());
|
||||||
|
}
|
||||||
|
case "421":
|
||||||
|
case "451":
|
||||||
|
case "452":
|
||||||
|
case "488":
|
||||||
|
return "x".equals(subfield.getId());
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
----
|
||||||
|
|
||||||
## Bibliographic character sets
|
## Bibliographic character sets
|
||||||
|
|
||||||
Bibliographic character sets predate the era of Unicode. Before Unicode, characters sets were
|
Bibliographic character sets predate the era of Unicode. Before Unicode, characters sets were
|
||||||
|
|
|
@ -40,6 +40,7 @@ dependencies {
|
||||||
testCompile "org.xmlunit:xmlunit-matchers:${project.property('xmlunit-matchers.version')}"
|
testCompile "org.xmlunit:xmlunit-matchers:${project.property('xmlunit-matchers.version')}"
|
||||||
testCompile "com.github.stefanbirkner:system-rules:${project.property('system-rules.version')}"
|
testCompile "com.github.stefanbirkner:system-rules:${project.property('system-rules.version')}"
|
||||||
testCompile "org.xbib:bibliographic-character-sets:${project.property('bibliographic-character-sets.version')}"
|
testCompile "org.xbib:bibliographic-character-sets:${project.property('bibliographic-character-sets.version')}"
|
||||||
|
testCompile "org.xbib:content-core:${project.property('content-core.version')}"
|
||||||
asciidoclet "org.asciidoctor:asciidoclet:${project.property('asciidoclet.version')}"
|
asciidoclet "org.asciidoctor:asciidoclet:${project.property('asciidoclet.version')}"
|
||||||
wagon "org.apache.maven.wagon:wagon-ssh-external:${project.property('wagon-ssh-external.version')}"
|
wagon "org.apache.maven.wagon:wagon-ssh-external:${project.property('wagon-ssh-external.version')}"
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
group = org.xbib
|
group = org.xbib
|
||||||
name = marc
|
name = marc
|
||||||
version = 1.0.10
|
version = 1.0.11
|
||||||
|
|
||||||
junit.version = 4.12
|
junit.version = 4.12
|
||||||
xalan.version = 2.7.2
|
xalan.version = 2.7.2
|
||||||
xmlunit-matchers.version = 2.3.0
|
xmlunit-matchers.version = 2.3.0
|
||||||
system-rules.version = 1.16.0
|
system-rules.version = 1.16.0
|
||||||
bibliographic-character-sets.version = 1.0.0
|
bibliographic-character-sets.version = 1.0.0
|
||||||
|
content-core.version = 1.0.7
|
||||||
asciidoclet.version = 1.5.4
|
asciidoclet.version = 1.5.4
|
||||||
wagon-ssh-external.version = 2.10
|
wagon-ssh-external.version = 2.10
|
||||||
|
|
46
src/main/java/org/xbib/marc/MarcFieldAdapter.java
Normal file
46
src/main/java/org/xbib/marc/MarcFieldAdapter.java
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
/*
|
||||||
|
Copyright 2016 Jörg Prante
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
|
||||||
|
*/
|
||||||
|
package org.xbib.marc;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A MARC listener with empty methods. Useful for extending.
|
||||||
|
*/
|
||||||
|
public class MarcFieldAdapter implements MarcListener {
|
||||||
|
@Override
|
||||||
|
public void beginCollection() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void beginRecord(String format, String type) {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void leader(String label) {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void field(MarcField field) {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void endRecord() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void endCollection() {
|
||||||
|
}
|
||||||
|
}
|
91
src/test/java/org/xbib/marc/filter/MarcFieldFilterTest.java
Normal file
91
src/test/java/org/xbib/marc/filter/MarcFieldFilterTest.java
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
package org.xbib.marc.filter;
|
||||||
|
|
||||||
|
import static org.xbib.content.json.JsonXContent.contentBuilder;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.xbib.content.XContentBuilder;
|
||||||
|
import org.xbib.marc.Marc;
|
||||||
|
import org.xbib.marc.MarcField;
|
||||||
|
import org.xbib.marc.MarcFieldAdapter;
|
||||||
|
import org.xbib.marc.MarcListener;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.TreeMap;
|
||||||
|
import java.util.logging.Level;
|
||||||
|
import java.util.logging.Logger;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Demo of collecting ISSNs from a MARC file.
|
||||||
|
*
|
||||||
|
* "issns.mrc" courtesy of Steven Hirren (steven.hirren.gmail.com)
|
||||||
|
*/
|
||||||
|
public class MarcFieldFilterTest {
|
||||||
|
|
||||||
|
private static final Logger logger = Logger.getLogger(MarcFieldFilterTest.class.getName());
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void findISSNs() throws IOException {
|
||||||
|
Map<String, List<Map<String, String>>> result = new TreeMap<>();
|
||||||
|
// set up MARC listener
|
||||||
|
MarcListener marcListener = new MarcFieldAdapter() {
|
||||||
|
@Override
|
||||||
|
public void field(MarcField field) {
|
||||||
|
Collection<Map<String, String>> values = field.getSubfields().stream()
|
||||||
|
.filter(f -> matchISSNField(field, f))
|
||||||
|
.map(f -> Collections.singletonMap(f.getId(), f.getValue()))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
if (!values.isEmpty()) {
|
||||||
|
result.putIfAbsent(field.getTag(), new ArrayList<>());
|
||||||
|
List<Map<String, String>> list = result.get(field.getTag());
|
||||||
|
list.addAll(values);
|
||||||
|
result.put(field.getTag(), list);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
// read MARC file
|
||||||
|
Marc.builder()
|
||||||
|
.setInputStream(getClass().getResource("issns.mrc").openStream())
|
||||||
|
.setMarcListener(marcListener)
|
||||||
|
.build()
|
||||||
|
.writeCollection();
|
||||||
|
// collect ISSNs
|
||||||
|
List<String> issns = result.values().stream()
|
||||||
|
.map(l -> l.stream()
|
||||||
|
.map(m -> m.values().iterator().next())
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.flatMap(List::stream)
|
||||||
|
.distinct()
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
// JSON output
|
||||||
|
XContentBuilder builder = contentBuilder().prettyPrint()
|
||||||
|
.startObject();
|
||||||
|
for (Map.Entry<String, List<Map<String, String>>> entry : result.entrySet()) {
|
||||||
|
builder.field(entry.getKey(), entry.getValue());
|
||||||
|
}
|
||||||
|
builder.array("issns", issns);
|
||||||
|
builder.endObject();
|
||||||
|
|
||||||
|
logger.log(Level.INFO, builder.string());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean matchISSNField(MarcField field, MarcField.Subfield subfield) {
|
||||||
|
switch (field.getTag()) {
|
||||||
|
case "011": {
|
||||||
|
return "a".equals(subfield.getId()) || "f".equals(subfield.getId());
|
||||||
|
}
|
||||||
|
case "421":
|
||||||
|
case "451":
|
||||||
|
case "452":
|
||||||
|
case "488":
|
||||||
|
return "x".equals(subfield.getId());
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
1
src/test/resources/org/xbib/marc/filter/issns.mrc
Normal file
1
src/test/resources/org/xbib/marc/filter/issns.mrc
Normal file
|
@ -0,0 +1 @@
|
||||||
|
00675cas a2200253 a 450 001000300000005001300003011002500016035001900041100004100060102000700101105001800108106000600126110001600132200002200148207003100170210004500201452004200246530002200288531001600310676001600326801001300342801001900355856004700374202013060511330 a0261-3794f0261-3794 a(ISSN)02613794 a20130605a19829999 ba aGB a 1 ar aaha 1 aElectoral studies0 aVol. 1, no. 1 (Apr. 1982)- aGuildfordcButterworth Scientificd1982-11tElectoral studies (Online)x1873-6890 aElectoral studies aElect. stud a324.605v19 0aFRbISSN 2aGRbEKTgAACR24 uhttps://eskep.ekt.gr/eskep/journal/show/2000874cas a2200301 450 001000300000005001300003011002500016035001900041040001100060100004100071102000700112105001800119106000600137110001600143200003400159207002100193210004600214321003400260452005400294530003400348531001900382676001100401711003000412801001300442801001900455856005100474856004700525402013060511270 a0001-4842f0001-4842 a(ISSN)00014842 aACHRE4 a20130605a19689999 ba aUS a 1 ar aafa 1 aAccounts of chemical research0 av. 1- Jan. 1968- aWashington, DCcAmerican Chemical Society aChemical abstractsx0009-225810tAccounts of chemical research (Online)x1520-4898 aAccounts of chemical research aAcc. chem. res a540/.5 2aAmerican Chemical Society 0aFRbISSN 2aGRbEKTgAACR24 uhttp://pubs.acs.org/journals/achre4/index.html4 uhttps://eskep.ekt.gr/eskep/journal/show/4000927cas a2200301 i 450 001000300000005001300003011002500016035001900041100004100060102000700101106000600108110001600114200002300130210003000153421004800183421003500231421004100266434004300307434004800350452005400398530002300452531001800475675002400493711002900517801001300546801001900559856004700578602013060511310 a0001-5342f0001-5342 a(ISSN)00015342 a20130605a19359999 | ba aNL ar aah 1 aActa biotheoretica aLeidencRijksuniversiteit 0tActa biotheoretica. Supplementumx0169-7242 0tFolia biotheoreticax0920-2676 1tBibliotheca biotheoreticax0373-3408 tBibliographia biotheoreticax0920-2684 tActa biotheoretica. Supplementumx0169-724211tActa biotheoretica (Dordrecht. Online)x1572-8358 aActa biotheoretica aActa biotheor a573v15th Dutch ed. 2aRijksuniversiteit Leiden 0aFRbISSN 2aGRbEKTgAACR24 uhttps://eskep.ekt.gr/eskep/journal/show/60
|
Loading…
Reference in a new issue