Merge pull request #1 from hbz/fix-phrase-handling

Fix phrase handling in Elasticsearch query generator
This commit is contained in:
Jörg Prante 2017-01-26 17:33:53 +01:00 committed by GitHub
commit 99eaa0bd0c
5 changed files with 19 additions and 18 deletions

View file

@ -128,7 +128,7 @@ public class FilterGenerator implements Visitor {
case ALL: { case ALL: {
String field = arg1.toString(); String field = arg1.toString();
String value = arg2 != null ? arg2.toString() : ""; String value = arg2 != null ? arg2.toString() : "";
boolean phrase = arg2 instanceof Token && ((Token) arg2).isProtected(); boolean phrase = arg2 instanceof Token && ((Token) arg2).isQuoted();
if (phrase) { if (phrase) {
builder.startArray("and"); builder.startArray("and");
QuotedStringTokenizer qst = new QuotedStringTokenizer(value); QuotedStringTokenizer qst = new QuotedStringTokenizer(value);
@ -144,7 +144,7 @@ public class FilterGenerator implements Visitor {
break; break;
} }
case ANY: { case ANY: {
boolean phrase = arg2 instanceof Token && ((Token) arg2).isProtected(); boolean phrase = arg2 instanceof Token && ((Token) arg2).isQuoted();
String field = arg1.toString(); String field = arg1.toString();
String value = arg2 != null ? arg2.toString() : ""; String value = arg2 != null ? arg2.toString() : "";
if (phrase) { if (phrase) {

View file

@ -194,17 +194,10 @@ public class QueryGenerator implements Visitor {
break; break;
} }
case PHRASE: { case PHRASE: {
String field = arg1.toString();
String value = arg2 != null ? arg2.toString() : "";
if (tok2 != null) { if (tok2 != null) {
if (tok2.isProtected()) { String field = arg1.toString();
builder.startObject("match_phrase") String value = tok2.isQuoted() ? tok2.getString() : arg2.toString();
.startObject(field) if (tok2.isAll()) {
.field("query", tok2.getString())
.field("slop", 0)
.endObject()
.endObject();
} else if (tok2.isAll()) {
builder.startObject("match_all").endObject(); builder.startObject("match_all").endObject();
} else if (tok2.isWildcard()) { } else if (tok2.isWildcard()) {
builder.startObject("wildcard").field(field, value).endObject(); builder.startObject("wildcard").field(field, value).endObject();
@ -264,7 +257,7 @@ public class QueryGenerator implements Visitor {
String from = null; String from = null;
String to = null; String to = null;
if (tok2 != null) { if (tok2 != null) {
if (!tok2.isProtected()) { if (!tok2.isQuoted()) {
throw new IllegalArgumentException("range within: unable to derive range from a non-phrase: " + value); throw new IllegalArgumentException("range within: unable to derive range from a non-phrase: " + value);
} }
if (tok2.getStringList().size() != 2) { if (tok2.getStringList().size() != 2) {

View file

@ -22,7 +22,7 @@ public class Token implements Node {
public enum TokenClass { public enum TokenClass {
NORMAL, ALL, WILDCARD, BOUNDARY, PROTECTED NORMAL, ALL, WILDCARD, BOUNDARY, QUOTED
} }
private TokenType type; private TokenType type;
@ -60,12 +60,12 @@ public class Token implements Node {
} }
if (this.value != null) { if (this.value != null) {
// protected? // quoted?
if (value.startsWith("\"") && value.endsWith("\"")) { if (value.startsWith("\"") && value.endsWith("\"")) {
this.stringvalue = value; this.stringvalue = value;
this.value = value.substring(1, value.length() - 1).replaceAll("\\\\\"", "\""); this.value = value.substring(1, value.length() - 1).replaceAll("\\\\\"", "\"");
this.values = parseQuot(this.value); this.values = parseQuot(this.value);
tokenClass.add(TokenClass.PROTECTED); tokenClass.add(TokenClass.QUOTED);
} }
// wildcard? // wildcard?
if (this.value.indexOf('*') >= 0 || this.value.indexOf('?') >= 0) { if (this.value.indexOf('*') >= 0 || this.value.indexOf('?') >= 0) {
@ -182,8 +182,8 @@ public class Token implements Node {
return sb.toString(); return sb.toString();
} }
public boolean isProtected() { public boolean isQuoted() {
return tokenClass.contains(TokenClass.PROTECTED); return tokenClass.contains(TokenClass.QUOTED);
} }
public boolean isBoundary() { public boolean isBoundary() {

View file

@ -121,3 +121,7 @@ unix and facet.creator = "on" and facet.subject = "on" and facet.date = "off"|{"
unix and facet.date = on|{"from":0,"size":10,"query":{"bool":{"must":{"simple_query_string":{"query":"unix","fields":["cql.allIndexes"],"analyze_wildcard":true,"default_operator":"and"}}}},"aggregations":{"myfacet":"myvalue"}} unix and facet.date = on|{"from":0,"size":10,"query":{"bool":{"must":{"simple_query_string":{"query":"unix","fields":["cql.allIndexes"],"analyze_wildcard":true,"default_operator":"and"}}}},"aggregations":{"myfacet":"myvalue"}}
(cql.allIndexes = "")|{"from":0,"size":10,"query":{"simple_query_string":{"query":"\"\"","fields":["cql.allIndexes"],"analyze_wildcard":true,"default_operator":"and"}}} (cql.allIndexes = "")|{"from":0,"size":10,"query":{"simple_query_string":{"query":"\"\"","fields":["cql.allIndexes"],"analyze_wildcard":true,"default_operator":"and"}}}
cql.allIndexes all 3125294126|{"from":0,"size":10,"query":{"simple_query_string":{"query":"3125294126","fields":["cql.allIndexes"],"analyze_wildcard":true,"default_operator":"and"}}} cql.allIndexes all 3125294126|{"from":0,"size":10,"query":{"simple_query_string":{"query":"3125294126","fields":["cql.allIndexes"],"analyze_wildcard":true,"default_operator":"and"}}}
Item.callnumber adj QAP2230|{"from":0,"size":10,"query":{"match_phrase":{"Item.callnumber":{"query":"QAP2230","slop":0}}}}
Item.callnumber adj QAP22*|{"from":0,"size":10,"query":{"wildcard":{"Item.callnumber":"QAP22*"}}}
Item.callnumber adj "K 32/70 A 10"|{"from":0,"size":10,"query":{"match_phrase":{"Item.callnumber":{"query":"K 32/70 A 10","slop":0}}}}
Item.callnumber adj "K 32/70 A*"|{"from":0,"size":10,"query":{"wildcard":{"Item.callnumber":"K 32/70 A*"}}}

View file

@ -139,3 +139,7 @@ unix and facet.dc.creator = "on" and facet.dc.subject = "on" and facet.dc.subjec
unix and facet.dc.date = "on" and facet.dc.subject = "on" and facet.dc.subject = "buckets=20"|unix unix and facet.dc.date = "on" and facet.dc.subject = "on" and facet.dc.subject = "buckets=20"|unix
unix and facet.dc.creator = "on" and facet.dc.subject = "on" and facet.dc.subject = "buckets=20"|unix unix and facet.dc.creator = "on" and facet.dc.subject = "on" and facet.dc.subject = "buckets=20"|unix
cql.allIndexes all "linux;" cql.allIndexes all "linux;"
Item.callnumber adj QAP2230
Item.callnumber adj QAP22*
Item.callnumber adj "K 32/70 A 10"
Item.callnumber adj "K 32/70 A*"