fix PQF integer token, add SRU JDK client

This commit is contained in:
Jörg Prante 2023-01-23 18:02:06 +01:00
parent 351ea46d19
commit 401127f779
19 changed files with 311 additions and 43 deletions

View file

@ -25,7 +25,7 @@ dependencyResolutionManagement {
library('groovy-macro', 'org.apache.groovy', 'groovy-macro').versionRef('groovy')
library('groovy-templates', 'org.apache.groovy', 'groovy-templates').versionRef('groovy')
library('groovy-test', 'org.apache.groovy', 'groovy-test').versionRef('groovy')
library('marc', 'org.xbib', 'marc').version('2.9.14')
library('marc', 'org.xbib', 'marc').version('2.9.15')
}
}
}

View file

@ -0,0 +1,3 @@
dependencies {
implementation libs.marc
}

View file

@ -0,0 +1,6 @@
module org.xbib.sru.client.jdk {
exports org.xbib.sru.client.jdk;
requires org.xbib.marc;
requires java.net.http;
requires java.logging;
}

View file

@ -1,15 +1,18 @@
package org.xbib.sru.client.jdk;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.io.InputStream;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.nio.charset.StandardCharsets;
import java.util.function.Consumer;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.xbib.marc.Marc;
import org.xbib.marc.MarcRecord;
import org.xbib.sru.client.jdk.util.UrlBuilder;
public class SRUClient {
@ -35,7 +38,7 @@ public class SRUClient {
String recordSchema,
Integer startRecord,
Integer maximumRecords,
Consumer<Reader> consumer) throws IOException, InterruptedException {
Consumer<InputStream> consumer) throws IOException, InterruptedException {
UrlBuilder url = UrlBuilder.fromUrl(builder.baseURL);
url.queryParam(SRUConstants.OPERATION_PARAMETER, "searchRetrieve");
url.queryParam(SRUConstants.VERSION_PARAMETER, "1.1");
@ -50,21 +53,15 @@ public class SRUClient {
.header("user-agent", builder.userAgent != null ? builder.userAgent : "xbib SRU client")
.GET()
.build();
logger.log(Level.INFO, "sending " + httpRequest);
HttpResponse<String> httpResponse = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofString());
logger.log(Level.FINE, "sending " + httpRequest);
HttpResponse<InputStream> httpResponse = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofInputStream());
int status = httpResponse.statusCode();
logger.log(Level.FINE, "response status = " + status + " headers = " + httpResponse.headers());
String contentType = httpResponse.headers().firstValue("content-type").orElse(null);
if (status == 200) {
String string = httpResponse.body();
if (string != null && string.length() > 0) {
consumer.accept(new StringReader(string));
InputStream inputStream = httpResponse.body();
consumer.accept(inputStream);
}
}
}
public void close() {
}
public static class Builder {

View file

@ -0,0 +1,43 @@
package org.xbib.sru.client.jdk.test;
import org.junit.jupiter.api.Test;
import org.xbib.marc.Marc;
import org.xbib.marc.MarcRecord;
import org.xbib.marc.MarcRecordIterator;
import org.xbib.sru.client.jdk.SRUClient;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.logging.Level;
import java.util.logging.Logger;
public class LVITest {
private static final Logger logger = Logger.getLogger(LVITest.class.getName());
@Test
public void testLVI() throws IOException, InterruptedException {
SRUClient client = SRUClient.builder()
.setBaseURL("https://sru.hbz-nrw.de/lvi")
.build();
client.searchRetrieve("bib.personalName = \"Smith\"",
"marcxml",
1,
10,
this::dumpRecords);
}
private void dumpRecords(InputStream inputStream) {
MarcRecordIterator iterator = Marc.builder()
.setInputStream(inputStream)
.setCharset(StandardCharsets.UTF_8)
.xmlRecordIterator();
while (iterator.hasNext()) {
MarcRecord marcRecord = iterator.next();
logger.log(Level.INFO, marcRecord.toString());
}
// total number available after records are iterated, SRU numberOfRecords element may be located at bottom.
logger.log(Level.INFO, "total number of records = " + iterator.getTotalNumberOfRecords());
}
}

View file

@ -77,6 +77,9 @@ public abstract class BEREncoding {
*/
private int[] lengthEncoding;
public BEREncoding() {
}
public int[] getIdentifierEncoding() {
return identifierEncoding;
}

View file

@ -1,5 +1,5 @@
module org.xbib.z3950lib.client.jdk {
exports org.xbib.z3950.client.jdk;
requires transitive org.xbib.z3950lib.client.api;
requires org.xbib.z3950lib.client.api;
requires java.logging;
}

View file

@ -391,26 +391,37 @@ public class JDKZClient implements Client, Closeable {
private String pass;
private long timeout = 5000;
private long timeout;
private String preferredRecordSyntax = "1.2.840.10003.5.10"; // marc21
private String preferredRecordSyntax;
private String resultSetName = "default";
private String resultSetName;
private String elementSetName = null;
private String elementSetName;
private String encoding = "ANSEL";
private String encoding;
private String format = "MARC21";
private String format;
private String type = "Bibliographic";
private String type;
private List<String> databases = Collections.singletonList("");
private List<String> databases;
private Integer preferredMessageSize = 10 * 1024 * 1024;
private Integer preferredMessageSize;
private InitListener initListener;
private Builder() {
this.timeout = 5000;
this.preferredRecordSyntax = "1.2.840.10003.5.10"; // marc21
this.resultSetName = "default";
this.encoding = "ANSEL";
this.format = "MARC21";
this.type = "Bibliographic";
this.databases = Collections.singletonList("");
this.preferredMessageSize = 10 * 1024 * 1024;
}
public Builder setHost(String host) {
this.host = host;
return this;

View file

@ -1 +1,190 @@
%{ import java.io.Reader; import java.io.IOException; /** Mike Taylor, IndexData: "Prefix Query Format (PQF), also known as Prefix Query Notation (PQN) was defined in 1995, as part of the YAZ toolkit, and has since become the de facto standard representation of RPN queries." From: http://www.indexdata.com/yaz/doc/tools.tkl#PQF The grammar of the PQF is as follows: query ::= top-set query-struct. top-set ::= [ '@attrset' string ] query-struct ::= attr-spec | simple | complex | '@term' term-type query attr-spec ::= '@attr' [ string ] string query-struct complex ::= operator query-struct query-struct. operator ::= '@and' | '@or' | '@not' | '@prox' proximity. simple ::= result-set | term. result-set ::= '@set' string. term ::= string. proximity ::= exclusion distance ordered relation which-code unit-code. exclusion ::= '1' | '0' | 'void'. distance ::= integer. ordered ::= '1' | '0'. relation ::= integer. which-code ::= 'known' | 'private' | integer. unit-code ::= integer. term-type ::= 'general' | 'numeric' | 'string' | 'oid' | 'datetime' | 'null'. You will note that the syntax above is a fairly faithful representation of RPN, except for the Attribute, which has been moved a step away from the term, allowing you to associate one or more attributes with an entire query structure. The parser will automatically apply the given attributes to each term as required. The @attr operator is followed by an attribute specification (attr-spec above). The specification consists of an optional attribute set, an attribute type-value pair and a sub-query. The attribute type-value pair is packed in one string: an attribute type, an equals sign, and an attribute value, like this: @attr 1=1003. The type is always an integer but the value may be either an integer or a string (if it doesn't start with a digit character). A string attribute-value is encoded as a Type-1 ``complex'' attribute with the list of values containing the single string specified, and including no semantic indicators. */ %} %class PQFParser %interface PQFTokens %package org.xbib.z3950.common.pqf %token NL %token <String> OR %token <String> AND %token <String> NOT %token <String> ATTR %token <String> ATTRSET %token <String> TERM %token <String> SET %token <String> VOID %token <String> KNOWN %token <String> PRIVATE %token <String> TERMTYPE %token <String> CHARSTRING1 %token <String> CHARSTRING2 %token <String> OPERATORS %token <String> EQUALS %token <Integer> INTEGER %left AND %left OR %left NOT %type <PQF> pqf %type <Query> querystruct %type <Expression> expression %type <AttrStr> attrstr %type <Term> term %type <Setname> resultset %start pqf %% pqf : ATTRSET CHARSTRING1 querystruct { this.pqf = new PQF($2, $3); $$ = this.pqf; } | querystruct { this.pqf = new PQF($1); $$ = this.pqf; } ; querystruct : attrspec | simple | complex | TERM TERMTYPE pqf { $$ = new Query($3); }; attrspec : ATTR attrstr querystruct { $$ = new Query($2, $3); } | ATTR CHARSTRING1 attrstr querystruct { $$ = new Query($2, $3, $4); }; simple : resultset { $$ = new Query($1); } | term { $$ = new Query($1); }; complex : expression { $$ = new Query($1); }; resultset : SET CHARSTRING1 { $$ = new Setname($2); }; term : CHARSTRING1 { $$ = new Term($1); } | CHARSTRING2 { $$ = new Term($1); } }; attrstr: INTEGER EQUALS INTEGER { $$ = new AttrStr($1, $3); } | INTEGER EQUALS CHARSTRING1 { $$ = new AttrStr($1, $3); }; expression: AND querystruct querystruct { $$ = new Expression($1, $2, $3); } | OR querystruct querystruct { $$ = new Expression($1, $2, $3); } | NOT querystruct querystruct { $$ = new Expression($1, $2, $3); } ; %% private PQFLexer lexer; private PQF pqf; public PQFParser(Reader r) { this.lexer = new PQFLexer(r); lexer.nextToken(); } public void yyerror(String error) { throw new SyntaxException("PQF error at " + "[" + lexer.getLine() + "," + lexer.getColumn() +"]" + ": " + error); } public PQF getResult() { return pqf; }
%{
import java.io.Reader;
import java.io.IOException;
import java.math.BigDecimal;
/**
Mike Taylor, IndexData:
"Prefix Query Format (PQF), also known as
Prefix Query Notation (PQN) was defined in 1995, as part of the YAZ
toolkit, and has since become the de facto standard representation of
RPN queries."
From: http://www.indexdata.com/yaz/doc/tools.tkl#PQF
The grammar of the PQF is as follows:
query ::= top-set query-struct.
top-set ::= [ '@attrset' string ]
query-struct ::= attr-spec | simple | complex | '@term' term-type query
attr-spec ::= '@attr' [ string ] string query-struct
complex ::= operator query-struct query-struct.
operator ::= '@and' | '@or' | '@not' | '@prox' proximity.
simple ::= result-set | term.
result-set ::= '@set' string.
term ::= string.
proximity ::= exclusion distance ordered relation which-code unit-code.
exclusion ::= '1' | '0' | 'void'.
distance ::= integer.
ordered ::= '1' | '0'.
relation ::= integer.
which-code ::= 'known' | 'private' | integer.
unit-code ::= integer.
term-type ::= 'general' | 'numeric' | 'string' | 'oid' | 'datetime' | 'null'.
You will note that the syntax above is a fairly faithful representation of RPN,
except for the Attribute, which has been moved a step away from the term,
allowing you to associate one or more attributes with an entire query structure.
The parser will automatically apply the given attributes to each term as required.
The @attr operator is followed by an attribute specification (attr-spec above).
The specification consists of an optional attribute set, an attribute
type-value pair and a sub-query. The attribute type-value pair is packed
in one string: an attribute type, an equals sign, and an attribute value,
like this: @attr 1=1003. The type is always an integer but the value may be
either an integer or a string (if it doesn't start with a digit character).
A string attribute-value is encoded as a Type-1 ``complex'' attribute with
the list of values containing the single string specified, and including
no semantic indicators.
*/
%}
%class PQFParser
%interface PQFTokens
%package org.xbib.z3950.common.pqf
%token NL
%token <String> OR
%token <String> AND
%token <String> NOT
%token <String> ATTR
%token <String> ATTRSET
%token <String> TERM
%token <String> SET
%token <String> VOID
%token <String> KNOWN
%token <String> PRIVATE
%token <String> TERMTYPE
%token <String> CHARSTRING1
%token <String> CHARSTRING2
%token <String> OPERATORS
%token <String> EQUALS
%token <BigDecimal> INTEGER
%left AND
%left OR
%left NOT
%type <PQF> pqf
%type <Query> querystruct
%type <Expression> expression
%type <AttrStr> attrstr
%type <Term> term
%type <Setname> resultset
%start pqf
%%
pqf : ATTRSET CHARSTRING1 querystruct {
this.pqf = new PQF($2, $3);
$$ = this.pqf;
}
| querystruct {
this.pqf = new PQF($1);
$$ = this.pqf;
}
;
querystruct : attrspec | simple | complex | TERM TERMTYPE pqf {
$$ = new Query($3);
};
attrspec : ATTR attrstr querystruct {
$$ = new Query($2, $3);
}
| ATTR CHARSTRING1 attrstr querystruct {
$$ = new Query($2, $3, $4);
};
simple : resultset {
$$ = new Query($1);
}
| term {
$$ = new Query($1);
};
complex : expression {
$$ = new Query($1);
};
resultset : SET CHARSTRING1 {
$$ = new Setname($2);
};
term : CHARSTRING1 {
$$ = new Term($1);
}
| CHARSTRING2 {
$$ = new Term($1);
}
| INTEGER {
$$ = new Term($1);
};
attrstr: INTEGER EQUALS INTEGER {
$$ = new AttrStr($1, $3);
}
| INTEGER EQUALS CHARSTRING1 {
$$ = new AttrStr($1, $3);
};
expression: AND querystruct querystruct {
$$ = new Expression($1, $2, $3);
}
| OR querystruct querystruct {
$$ = new Expression($1, $2, $3);
}
| NOT querystruct querystruct {
$$ = new Expression($1, $2, $3);
}
;
%%
private PQFLexer lexer;
private PQF pqf;
public PQFParser(Reader r) {
this.lexer = new PQFLexer(r);
lexer.nextToken();
}
public void yyerror(String error) {
throw new SyntaxException("PQF error at " +
"[" + lexer.getLine() + "," + lexer.getColumn() + "]" + ": " + error);
}
public PQF getResult()
{
return pqf;
}

View file

@ -1,22 +1,26 @@
package org.xbib.z3950.common.pqf;
import java.math.BigDecimal;
/**
* PQF abstract syntax tree.
*/
public class AttrStr extends Node {
private Integer left;
private Integer right;
private String rightStr;
private final Integer left;
private final Integer right;
private final String rightStr;
public AttrStr(Integer left, Integer right) {
this.left = left;
this.right = right;
public AttrStr(BigDecimal left, BigDecimal right) {
this.left = left.intValue();
this.right = right.intValue();
this.rightStr = null;
}
public AttrStr(Integer left, String right) {
this.left = left;
public AttrStr(BigDecimal left, String right) {
this.left = left.intValue();
this.rightStr = right;
this.right = null;
}
@Override

View file

@ -5,11 +5,11 @@ package org.xbib.z3950.common.pqf;
*/
public class Expression extends Node {
private String op;
private final String op;
private Query q1;
private final Query q1;
private Query q2;
private final Query q2;
public Expression(String op, Query q1, Query q2) {
this.op = op;

View file

@ -5,6 +5,9 @@ package org.xbib.z3950.common.pqf;
*/
public abstract class Node {
public Node () {
}
/**
* Try to accept this node by a visitor.
*

View file

@ -23,7 +23,7 @@ import java.util.Stack;
*/
public class PQFRPNGenerator implements Visitor {
private Stack<ASN1Any> result;
private final Stack<ASN1Any> result;
private RPNQuery rpnQuery;
@ -63,7 +63,7 @@ public class PQFRPNGenerator implements Visitor {
any = !result.isEmpty() && result.peek() instanceof AttributeElement ? result.pop() : null;
}
operand.attrTerm.attributes = new AttributeList();
operand.attrTerm.attributes.value = attrs.toArray(new AttributeElement[attrs.size()]);
operand.attrTerm.attributes.value = attrs.toArray(new AttributeElement[0]);
RPNStructure rpn = new RPNStructure();
rpn.c_op = operand;
if (attrs.size() > 0) {

View file

@ -8,7 +8,7 @@ import java.util.LinkedList;
public class Query extends Node {
private String attrschema;
private LinkedList<AttrStr> attrspec = new LinkedList<>();
private final LinkedList<AttrStr> attrspec = new LinkedList<>();
private Query querystruct;
private Setname setname;
private Term term;

View file

@ -3,8 +3,8 @@ package org.xbib.z3950.common.pqf;
/**
* Syntax exception.
*/
@SuppressWarnings("serial")
public class SyntaxException extends RuntimeException {
private static final long serialVersionUID = -962913398056374183L;
/**
* Creates a new SyntaxException object.

View file

@ -1,5 +1,7 @@
package org.xbib.z3950.common.pqf;
import java.math.BigDecimal;
/**
*
*/
@ -11,6 +13,10 @@ public class Term extends Node {
this.value = value;
}
public Term(BigDecimal value) {
this.value = value.toString();
}
public String getValue() {
return value;
}

View file

@ -1,6 +1,7 @@
package org.xbib.z3950.common.pqf;
import java.io.*;
import java.math.BigDecimal;
%%
%class PQFLexer
@ -105,7 +106,7 @@ TERMTYPE = "general" | "numeric" | "string" | "oid" | "datetime" | "null"
}
<YYINITIAL>{INTEGER} {
yylval = Integer.parseInt(yytext());
yylval = new BigDecimal(yytext());
return INTEGER;
}

View file

@ -35,7 +35,7 @@ class PQFParserTest {
}
}
assertEquals(errors, 0);
assertEquals(ok, 17);
assertEquals(ok, 19);
}
/**

View file

@ -6,6 +6,8 @@ dylan
@set Result-1
@and @set seta @set setb
@attr 1=4 computer
@attr 1=4 123456789
@attr 1=8 123456789123456789123456789
@attr 1=12 @attr 5=1 "abc1234567"
@attr 1=4 @attr 4=1 "self portrait"
@attrset exp1 @attr 1=1 CategoryList