add trie, add mini Json

This commit is contained in:
Jörg Prante 2022-07-31 01:19:06 +02:00
parent fdd791dcd5
commit bbd498482a
41 changed files with 4701 additions and 47 deletions

View file

@ -0,0 +1,681 @@
package org.xbib.datastructures.json.mini;
import java.io.IOException;
import java.time.Instant;
import java.util.Collection;
import java.util.Deque;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
/**
* A minimal JSON parser and generator for Java Maps and Collections.
*/
public class Json {
private Json() {
}
public static String toString(Map<String, Object> map) throws IOException {
return map != null ? new JsonBuilder().buildMap(map).build() : null;
}
public static String toString(Collection<Object> collection) throws IOException {
return collection != null ? new JsonBuilder().buildCollection(collection).build() : null;
}
@SuppressWarnings("unchecked")
public static Map<String, Object> toMap(String json) throws IOException {
if (json == null) {
return null;
}
JsonParser parser = new JsonParser();
parser.parse(json);
Object object = parser.getResult();
if (object instanceof Map) {
return (Map<String, Object>) parser.getResult();
}
throw new IllegalArgumentException(("unexpected, not a map instance: " + object.getClass()));
}
@SuppressWarnings("unchecked")
public static Collection<Object> toCollection(String json) throws IOException {
if (json == null) {
return null;
}
JsonParser parser = new JsonParser();
parser.parse(json);
Object object = parser.getResult();
if (object instanceof Collection) {
return (Collection<Object>) parser.getResult();
}
throw new IllegalArgumentException(("unexpected, not a collection instance: " + object.getClass()));
}
private static class JsonParser {
private static final char EOS = (char) -1;
private static final char DOUBLE_QUOTE = '"';
private static final char BACKSLASH = '\\';
private static final char OPEN_MAP = '{';
private static final char CLOSE_MAP = '}';
private static final char OPEN_LIST = '[';
private static final char CLOSE_LIST = ']';
private static final char COMMA = ',';
private static final char COLON = ':';
private String input;
private int i;
private char ch;
private Object result;
private final Deque<Object> stack = new LinkedList<>();
public JsonParser() {
}
public void parse(String input) throws IOException {
Objects.requireNonNull(input);
this.input = input;
this.i = 0;
stack.clear();
ch = next();
skipWhitespace();
parseValue();
skipWhitespace();
if (ch != EOS) {
throw new IOException("malformed json: " + ch);
}
}
public Object getResult() {
return result;
}
private void parseValue() throws IOException {
switch (ch) {
case DOUBLE_QUOTE:
ch = next();
parseString(false);
break;
case OPEN_MAP:
parseMap();
break;
case OPEN_LIST:
parseList();
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '-':
parseNumber();
break;
case 't':
parseTrue();
break;
case 'f':
parseFalse();
break;
case 'n':
parseNull();
break;
default:
throw new IOException("illegal character: " + ch);
}
}
private void parseNumber() throws IOException {
boolean minus = false;
boolean dot = false;
boolean exponent = false;
int start = i - 1;
while (true) {
if (ch == '-') {
if (i - start > 1) {
throw new IOException("minus inside number");
}
ch =next();
minus = true;
} else if (ch == 'e' || ch == 'E') {
ch = next();
if (exponent) {
throw new IOException("double exponents");
}
exponent = true;
ch = next();
if (ch == '-' || ch == '+') {
ch = next();
if (ch < '0' || ch > '9') {
throw new IOException("invalid exponent");
}
} else if (ch < '0' || ch > '9') {
throw new IOException("invalid exponent");
}
} else if (ch == '.') {
ch = next();
if (dot) {
throw new IOException("multiple dots");
}
if (i - start == 1) {
throw new IOException("no digit before dot");
}
dot = true;
} else if (ch >= '0' && ch <= '9') {
ch = next();
} else {
break;
}
}
if (minus && i - start == 1) {
throw new IOException("isolated minus");
}
if (dot || exponent) {
valueNode(Double.parseDouble(input.substring(start, i - 1)));
} else {
valueNode(Long.parseLong(input.substring(start, i - 1)));
}
}
private void parseString(boolean isKey) throws IOException {
boolean escaped = false;
int start = i - 1;
while (true) {
if (ch == DOUBLE_QUOTE) {
if (escaped) {
CharSequence s = unescape(input.substring(start, i - 1));
if (isKey) {
stack.push(new KeyNode(s));
} else {
valueNode(s);
}
} else {
if (isKey) {
stack.push(new KeyNode(input.substring(start, i - 1)));
} else {
valueNode(input.substring(start, i - 1));
}
}
ch = next();
return;
} else if (ch == BACKSLASH) {
escaped = true;
ch = next();
if (ch == DOUBLE_QUOTE || ch == '/' || ch == BACKSLASH || ch == 'b' || ch == 'f' || ch == 'n' || ch == 'r' || ch == 't') {
ch = next();
} else if (ch == 'u') {
expectHex();
expectHex();
expectHex();
expectHex();
} else {
throw new IOException("illegal escape char: " + ch);
}
} else if (ch < 32) {
throw new IOException("illegal control char: " + ch);
} else {
ch = next();
}
}
}
private void parseList() throws IOException {
int count = 0;
List<Object> list = new LinkedList<>();
stack.push(list);
ch = next();
while (true) {
skipWhitespace();
if (ch == CLOSE_LIST) {
result = stack.pop();
tryAppend(result);
ch = next();
return;
}
if (count > 0) {
expectChar(COMMA);
ch = next();
skipWhitespace();
}
parseValue();
count++;
}
}
private void parseMap() throws IOException {
int count = 0;
Map<String, Object> map = new LinkedHashMap<>();
stack.push(map);
ch = next();
while (true) {
skipWhitespace();
if (ch == CLOSE_MAP) {
result = stack.pop();
tryAppend(result);
ch = next();
return;
}
if (count > 0) {
expectChar(COMMA);
ch = next();
skipWhitespace();
}
expectChar(DOUBLE_QUOTE);
ch = next();
parseString(true);
skipWhitespace();
expectChar(COLON);
ch = next();
skipWhitespace();
parseValue();
count++;
}
}
private void parseNull() throws IOException {
ch = next();
expectChar('u');
ch = next();
expectChar('l');
ch = next();
expectChar('l');
valueNode(null);
ch = next();
}
private void parseTrue() throws IOException {
ch = next();
expectChar('r');
ch = next();
expectChar('u');
ch = next();
expectChar('e');
valueNode(true);
ch = next();
}
private void parseFalse() throws IOException {
ch = next();
expectChar('a');
ch = next();
expectChar('l');
ch = next();
expectChar('s');
ch = next();
expectChar('e');
valueNode(false);
ch = next();
}
private void expectChar(char expected) throws IOException {
if (ch != expected) {
throw new IOException("expected char " + expected + " but got " + ch);
}
}
private void expectHex() throws IOException {
ch = next();
if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) {
return;
}
throw new IOException("invalid hex char " + ch);
}
private void skipWhitespace() {
while (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') {
ch = next();
}
}
private static CharSequence unescape(CharSequence input) {
StringBuilder result = new StringBuilder(input.length());
int i = 0;
while (i < input.length()) {
if (input.charAt(i) == BACKSLASH) {
i++;
switch (input.charAt(i)) {
case BACKSLASH:
result.append(BACKSLASH);
break;
case '/':
result.append('/');
break;
case DOUBLE_QUOTE:
result.append(DOUBLE_QUOTE);
break;
case 'b':
result.append('\b');
break;
case 'f':
result.append('\f');
break;
case 'n':
result.append('\n');
break;
case 'r':
result.append('\r');
break;
case 't':
result.append('\t');
break;
case 'u': {
result.append(Character.toChars(Integer.parseInt(input.toString().substring(i + 1, i + 5), 16)));
i += 4;
}
}
} else {
result.append(input.charAt(i));
}
i++;
}
return result;
}
private char next() {
try {
return input.charAt(i++);
} catch (StringIndexOutOfBoundsException e) {
return (char) -1;
}
}
private void valueNode(Object object) {
if (!tryAppend(object)) {
stack.push(object);
result = object;
}
}
@SuppressWarnings("unchecked")
private boolean tryAppend(Object object) {
if (!stack.isEmpty()) {
if (stack.peek() instanceof List) {
@SuppressWarnings("unchecked")
List<Object> list = (List<Object>) stack.peek();
list.add(object);
return true;
} else if (stack.peek() instanceof KeyNode){
KeyNode key = (KeyNode) stack.pop();
if (stack.peek() instanceof Map) {
@SuppressWarnings("unchecked")
Map<String, Object> map = (Map<String, Object>) stack.peek();
if (map != null) {
String k = key != null ? key.get().toString() : null;
map.put(k, object);
return true;
}
}
}
}
return false;
}
}
static class KeyNode {
private final CharSequence value;
public KeyNode(CharSequence value) {
this.value = value;
}
public CharSequence get() {
return value;
}
}
static class JsonBuilder {
private final Appendable appendable;
private State state;
protected JsonBuilder() {
this(new StringBuilder());
}
protected JsonBuilder(Appendable appendable) {
this.appendable = appendable;
this.state = new State(null, 0, Structure.DOCSTART, true);
}
public JsonBuilder beginCollection() throws IOException {
this.state = new State(state, state.level + 1, Structure.COLLECTION, true);
appendable.append('[');
return this;
}
public JsonBuilder endCollection() throws IOException {
if (state.structure != Structure.COLLECTION) {
throw new IOException("no array to close");
}
appendable.append(']');
this.state = state != null ? state.parent : null;
return this;
}
public JsonBuilder beginMap() throws IOException {
if (state.structure == Structure.COLLECTION) {
beginArrayValue();
}
this.state = new State(state, state.level + 1, Structure.MAP, true);
appendable.append('{');
return this;
}
public JsonBuilder endMap() throws IOException {
if (state.structure != Structure.MAP && state.structure != Structure.KEY) {
throw new IOException("no object to close");
}
appendable.append('}');
this.state = state != null ? state.parent : null;
return this;
}
public JsonBuilder buildMap(Map<String, Object> map) throws IOException {
Objects.requireNonNull(map);
boolean wrap = state.structure != Structure.MAP;
if (wrap) {
beginMap();
}
for (Map.Entry<String, Object> entry : map.entrySet()) {
buildKey(entry.getKey());
buildValue(entry.getValue());
}
if (wrap) {
endMap();
}
return this;
}
public JsonBuilder buildCollection(Collection<?> collection) throws IOException {
Objects.requireNonNull(collection);
beginCollection();
for (Object object : collection) {
buildValue(object);
}
endCollection();
return this;
}
@SuppressWarnings("unchecked")
public JsonBuilder buildValue(Object object) throws IOException {
if (object instanceof Map) {
buildMap((Map<String, Object>) object);
return this;
} else if (object instanceof Collection) {
buildCollection((Collection<Object>) object);
return this;
}
if (state.structure == Structure.COLLECTION) {
beginArrayValue();
}
if (object == null) {
buildNull();
} else if (object instanceof CharSequence) {
buildString((CharSequence) object, true);
} else if (object instanceof Boolean) {
buildBoolean((Boolean) object);
} else if (object instanceof Byte) {
buildNumber((byte) object);
} else if (object instanceof Integer) {
buildNumber((int) object);
} else if (object instanceof Long) {
buildNumber((long) object);
} else if (object instanceof Float) {
buildNumber((float) object);
} else if (object instanceof Double) {
buildNumber((double) object);
} else if (object instanceof Number) {
buildNumber((Number) object);
} else if (object instanceof Instant) {
buildInstant((Instant) object);
} else {
throw new IllegalArgumentException("unable to write object class " + object.getClass());
}
return this;
}
public JsonBuilder buildKey(CharSequence string) throws IOException {
if (state.structure == Structure.COLLECTION) {
beginArrayValue();
} else if (state.structure == Structure.MAP || state.structure == Structure.KEY) {
beginKey(string != null ? string.toString() : null);
}
buildString(string, true);
if (state.structure == Structure.MAP || state.structure == Structure.KEY) {
endKey(string != null ? string.toString() : null);
}
state.structure = Structure.KEY;
return this;
}
public JsonBuilder buildNull() throws IOException {
if (state.structure == Structure.COLLECTION) {
beginArrayValue();
}
buildString("null", false);
return this;
}
public String build() {
return appendable.toString();
}
private void beginKey(String k) throws IOException {
if (state.first) {
state.first = false;
} else {
appendable.append(",");
}
}
private void endKey(String k) throws IOException {
appendable.append(":");
}
private void beginArrayValue() throws IOException {
if (state.first) {
state.first = false;
} else {
appendable.append(",");
}
}
private void buildBoolean(boolean bool) throws IOException {
buildString(bool ? "true" : "false", false);
}
private void buildNumber(Number number) throws IOException {
buildString(number != null ? number.toString() : null, false);
}
private void buildInstant(Instant instant) throws IOException {
buildString(instant.toString(), true);
}
private void buildString(CharSequence string, boolean escape) throws IOException {
appendable.append(escape ? escapeString(string) : string);
}
private CharSequence escapeString(CharSequence string) {
StringBuilder sb = new StringBuilder();
sb.append('"');
int start = 0;
int l = string.length();
for (int i = 0; i < l; i++) {
char c = string.charAt(i);
if (c == '"' || c == '\\' || c < 32) {
if (i > start) {
sb.append(string, start, i);
}
start = i + 1;
sb.append(escapeCharacter(c));
}
}
if (l > start) {
sb.append(string, start, l);
}
sb.append('"');
return sb;
}
private static String escapeCharacter(char c) {
switch (c) {
case '\n':
return "\\n";
case '\r':
return "\\r";
case '\t':
return "\\t";
case '\\':
return "\\\\";
case '\'':
return "\\'";
case '\"':
return "\\\"";
}
String hex = Integer.toHexString(c);
return "\\u0000".substring(0, 6 - hex.length()) + hex;
}
private enum Structure {
DOCSTART, MAP, KEY, COLLECTION
}
private static class State {
State parent;
int level;
Structure structure;
boolean first;
State(State parent, int level, Structure structure, boolean first) {
this.parent = parent;
this.level = level;
this.structure = structure;
this.first = first;
}
}
}
}

View file

View file

@ -24,7 +24,8 @@ public class Trie {
if (lastNode.divergeKeyIndex == key.length) { if (lastNode.divergeKeyIndex == key.length) {
if (lastNode.divergePatternIndex == lastNode.value.length) { if (lastNode.divergePatternIndex == lastNode.value.length) {
lastNode.isLeaf = true; lastNode.isLeaf = true;
} else {// we need to reduce length of the compressed pattern in the current node, } else {
// we need to reduce length of the compressed pattern in the current node,
// make it node leaf, and create child that carry over the original children/isLeaf // make it node leaf, and create child that carry over the original children/isLeaf
char[] childValue = Arrays.copyOfRange(lastNode.value, char[] childValue = Arrays.copyOfRange(lastNode.value,
lastNode.divergePatternIndex, lastNode.value.length); lastNode.divergePatternIndex, lastNode.value.length);

View file

@ -0,0 +1,907 @@
package org.xbib.datastructures.trie.concurrent;
import org.xbib.datastructures.trie.concurrent.util.CharSequences;
import org.xbib.datastructures.trie.concurrent.util.KeyValuePair;
import org.xbib.datastructures.trie.concurrent.util.LazyIterator;
import org.xbib.datastructures.trie.concurrent.util.Node;
import org.xbib.datastructures.trie.concurrent.util.NodeFactory;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Deque;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import static org.xbib.datastructures.trie.concurrent.ConcurrentRadixTree.SearchResult.Classification;
/**
* An implementation of {@link RadixTree} which supports lock-free concurrent reads, and allows items to be added to and
* to be removed from the tree <i>atomically</i> by background thread(s), without blocking reads.
* <p/>
* Unlike reads, writes require locking of the tree (locking out other writing threads only; reading threads are never
* blocked). Currently write locks are coarse-grained; in fact they are tree-level. In future branch-level write locks
* might be added, but the current implementation is targeted at high concurrency read-mostly use cases.
*/
public class ConcurrentRadixTree<O> implements RadixTree<O> {
private final NodeFactory nodeFactory;
protected volatile Node root;
// Write operations acquire write lock, read operations are lock-free.
private final Lock writeLock = new ReentrantLock();
/**
* Creates a new {@link ConcurrentRadixTree} which will use the given {@link NodeFactory} to create nodes.
*
* @param nodeFactory An object which creates {@link Node} objects on-demand, and which might return node
* implementations optimized for storing the values supplied to it for the creation of each node
*/
public ConcurrentRadixTree(NodeFactory nodeFactory) {
this.nodeFactory = nodeFactory;
this.root = nodeFactory.createNode("", null, Collections.emptyList(), true);
}
protected void acquireWriteLock() {
writeLock.lock();
}
protected void releaseWriteLock() {
writeLock.unlock();
}
@Override
public O put(CharSequence key, O value) {
@SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"})
O existingValue = (O) putInternal(key, value, true); // putInternal acquires write lock
return existingValue;
}
@Override
public O putIfAbsent(CharSequence key, O value) {
@SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"})
O existingValue = (O) putInternal(key, value, false); // putInternal acquires write lock
return existingValue;
}
@Override
public O getValueForExactKey(CharSequence key) {
SearchResult searchResult = searchTree(key);
if (searchResult.classification.equals(SearchResult.Classification.EXACT_MATCH)) {
@SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"})
O value = (O) searchResult.nodeFound.getValue();
return value;
}
return null;
}
@Override
public Iterable<CharSequence> getKeysStartingWith(CharSequence prefix) {
SearchResult searchResult = searchTree(prefix);
Classification classification = searchResult.classification;
switch (classification) {
case EXACT_MATCH: {
return getDescendantKeys(prefix, searchResult.nodeFound);
}
case KEY_ENDS_MID_EDGE: {
// Append the remaining characters of the edge to the key.
// For example if we searched for CO, but first matching node was COFFEE,
// the key associated with the first node should be COFFEE...
CharSequence edgeSuffix = CharSequences.getSuffix(searchResult.nodeFound.getIncomingEdge(), searchResult.charsMatchedInNodeFound);
prefix = CharSequences.concatenate(prefix, edgeSuffix);
return getDescendantKeys(prefix, searchResult.nodeFound);
}
default: {
// Incomplete match means key is not a prefix of any node...
return Collections.emptySet();
}
}
}
@Override
public Iterable<O> getValuesForKeysStartingWith(CharSequence prefix) {
SearchResult searchResult = searchTree(prefix);
Classification classification = searchResult.classification;
switch (classification) {
case EXACT_MATCH: {
return getDescendantValues(prefix, searchResult.nodeFound);
}
case KEY_ENDS_MID_EDGE: {
// Append the remaining characters of the edge to the key.
// For example if we searched for CO, but first matching node was COFFEE,
// the key associated with the first node should be COFFEE...
CharSequence edgeSuffix = CharSequences.getSuffix(searchResult.nodeFound.getIncomingEdge(), searchResult.charsMatchedInNodeFound);
prefix = CharSequences.concatenate(prefix, edgeSuffix);
return getDescendantValues(prefix, searchResult.nodeFound);
}
default: {
// Incomplete match means key is not a prefix of any node...
return Collections.emptySet();
}
}
}
@Override
public Iterable<KeyValuePair<O>> getKeyValuePairsForKeysStartingWith(CharSequence prefix) {
SearchResult searchResult = searchTree(prefix);
Classification classification = searchResult.classification;
switch (classification) {
case EXACT_MATCH: {
return getDescendantKeyValuePairs(prefix, searchResult.nodeFound);
}
case KEY_ENDS_MID_EDGE: {
// Append the remaining characters of the edge to the key.
// For example if we searched for CO, but first matching node was COFFEE,
// the key associated with the first node should be COFFEE...
CharSequence edgeSuffix = CharSequences.getSuffix(searchResult.nodeFound.getIncomingEdge(), searchResult.charsMatchedInNodeFound);
prefix = CharSequences.concatenate(prefix, edgeSuffix);
return getDescendantKeyValuePairs(prefix, searchResult.nodeFound);
}
default: {
// Incomplete match means key is not a prefix of any node...
return Collections.emptySet();
}
}
}
@Override
public boolean remove(CharSequence key) {
if (key == null) {
throw new IllegalArgumentException("The key argument was null");
}
acquireWriteLock();
try {
SearchResult searchResult = searchTree(key);
SearchResult.Classification classification = searchResult.classification;
if (classification == Classification.EXACT_MATCH) {
if (searchResult.nodeFound.getValue() == null) {
// This node was created automatically as a split between two branches (implicit node).
// No need to remove it...
return false;
}
// Proceed with deleting the node...
List<Node> childEdges = searchResult.nodeFound.getOutgoingEdges();
if (childEdges.size() > 1) {
// This node has more than one child, so if we delete the value from this node, we still need
// to leave a similar node in place to act as the split between the child edges.
// Just delete the value associated with this node.
// -> Clone this node without its value, preserving its child nodes...
@SuppressWarnings({"NullableProblems"})
Node cloned = nodeFactory.createNode(searchResult.nodeFound.getIncomingEdge(), null, searchResult.nodeFound.getOutgoingEdges(), false);
// Re-add the replacement node to the parent...
searchResult.parentNode.updateOutgoingEdge(cloned);
} else if (childEdges.size() == 1) {
// Node has one child edge.
// Create a new node which is the concatenation of the edges from this node and its child,
// and which has the outgoing edges of the child and the value from the child.
Node child = childEdges.get(0);
CharSequence concatenatedEdges = CharSequences.concatenate(searchResult.nodeFound.getIncomingEdge(), child.getIncomingEdge());
Node mergedNode = nodeFactory.createNode(concatenatedEdges, child.getValue(), child.getOutgoingEdges(), false);
// Re-add the merged node to the parent...
searchResult.parentNode.updateOutgoingEdge(mergedNode);
} else {
// Node has no children. Delete this node from its parent,
// which involves re-creating the parent rather than simply updating its child edge
// (this is why we need parentNodesParent).
// However if this would leave the parent with only one remaining child edge,
// and the parent itself has no value (is a split node), and the parent is not the root node
// (a special case which we never merge), then we also need to merge the parent with its
// remaining child.
List<Node> currentEdgesFromParent = searchResult.parentNode.getOutgoingEdges();
// Create a list of the outgoing edges of the parent which will remain
// if we remove this child...
// Use a non-resizable list, as a sanity check to force ArrayIndexOutOfBounds...
List<Node> newEdgesOfParent = Arrays.asList(new Node[searchResult.parentNode.getOutgoingEdges().size() - 1]);
for (int i = 0, added = 0, numParentEdges = currentEdgesFromParent.size(); i < numParentEdges; i++) {
Node node = currentEdgesFromParent.get(i);
if (node != searchResult.nodeFound) {
newEdgesOfParent.set(added++, node);
}
}
// Note the parent might actually be the root node (which we should never merge)...
boolean parentIsRoot = (searchResult.parentNode == root);
Node newParent;
if (newEdgesOfParent.size() == 1 && searchResult.parentNode.getValue() == null && !parentIsRoot) {
// Parent is a non-root split node with only one remaining child, which can now be merged.
Node parentsRemainingChild = newEdgesOfParent.get(0);
// Merge the parent with its only remaining child...
CharSequence concatenatedEdges = CharSequences.concatenate(searchResult.parentNode.getIncomingEdge(), parentsRemainingChild.getIncomingEdge());
newParent = nodeFactory.createNode(concatenatedEdges, parentsRemainingChild.getValue(), parentsRemainingChild.getOutgoingEdges(), parentIsRoot);
} else {
// Parent is a node which either has a value of its own, has more than one remaining
// child, or is actually the root node (we never merge the root node).
// Create new parent node which is the same as is currently just without the edge to the
// node being deleted...
newParent = nodeFactory.createNode(searchResult.parentNode.getIncomingEdge(), searchResult.parentNode.getValue(), newEdgesOfParent, parentIsRoot);
}
// Re-add the parent node to its parent...
if (parentIsRoot) {
// Replace the root node...
this.root = newParent;
} else {
// Re-add the parent node to its parent...
searchResult.parentNodesParent.updateOutgoingEdge(newParent);
}
}
return true;
}
return false;
}
finally {
releaseWriteLock();
}
}
@Override
public Iterable<CharSequence> getClosestKeys(CharSequence candidate) {
SearchResult searchResult = searchTree(candidate);
Classification classification = searchResult.classification;
switch (classification) {
case EXACT_MATCH: {
return getDescendantKeys(candidate, searchResult.nodeFound);
}
case KEY_ENDS_MID_EDGE: {
// Append the remaining characters of the edge to the key.
// For example if we searched for CO, but first matching node was COFFEE,
// the key associated with the first node should be COFFEE...
CharSequence edgeSuffix = CharSequences.getSuffix(searchResult.nodeFound.getIncomingEdge(), searchResult.charsMatchedInNodeFound);
candidate = CharSequences.concatenate(candidate, edgeSuffix);
return getDescendantKeys(candidate, searchResult.nodeFound);
}
case INCOMPLETE_MATCH_TO_MIDDLE_OF_EDGE: {
// Example: if we searched for CX, but deepest matching node was CO,
// the results should include node CO and its descendants...
CharSequence keyOfParentNode = CharSequences.getPrefix(candidate, searchResult.charsMatched - searchResult.charsMatchedInNodeFound);
CharSequence keyOfNodeFound = CharSequences.concatenate(keyOfParentNode, searchResult.nodeFound.getIncomingEdge());
return getDescendantKeys(keyOfNodeFound, searchResult.nodeFound);
}
case INCOMPLETE_MATCH_TO_END_OF_EDGE: {
if (searchResult.charsMatched == 0) {
// Closest match is the root node, we don't consider this a match for anything...
break;
}
// Example: if we searched for COFFEE, but deepest matching node was CO,
// the results should include node CO and its descendants...
CharSequence keyOfNodeFound = CharSequences.getPrefix(candidate, searchResult.charsMatched);
return getDescendantKeys(keyOfNodeFound, searchResult.nodeFound);
}
}
return Collections.emptySet();
}
@Override
public Iterable<O> getValuesForClosestKeys(CharSequence candidate) {
SearchResult searchResult = searchTree(candidate);
Classification classification = searchResult.classification;
switch (classification) {
case EXACT_MATCH: {
return getDescendantValues(candidate, searchResult.nodeFound);
}
case KEY_ENDS_MID_EDGE: {
// Append the remaining characters of the edge to the key.
// For example if we searched for CO, but first matching node was COFFEE,
// the key associated with the first node should be COFFEE...
CharSequence edgeSuffix = CharSequences.getSuffix(searchResult.nodeFound.getIncomingEdge(), searchResult.charsMatchedInNodeFound);
candidate = CharSequences.concatenate(candidate, edgeSuffix);
return getDescendantValues(candidate, searchResult.nodeFound);
}
case INCOMPLETE_MATCH_TO_MIDDLE_OF_EDGE: {
// Example: if we searched for CX, but deepest matching node was CO,
// the results should include node CO and its descendants...
CharSequence keyOfParentNode = CharSequences.getPrefix(candidate, searchResult.charsMatched - searchResult.charsMatchedInNodeFound);
CharSequence keyOfNodeFound = CharSequences.concatenate(keyOfParentNode, searchResult.nodeFound.getIncomingEdge());
return getDescendantValues(keyOfNodeFound, searchResult.nodeFound);
}
case INCOMPLETE_MATCH_TO_END_OF_EDGE: {
if (searchResult.charsMatched == 0) {
// Closest match is the root node, we don't consider this a match for anything...
break;
}
// Example: if we searched for COFFEE, but deepest matching node was CO,
// the results should include node CO and its descendants...
CharSequence keyOfNodeFound = CharSequences.getPrefix(candidate, searchResult.charsMatched);
return getDescendantValues(keyOfNodeFound, searchResult.nodeFound);
}
}
return Collections.emptySet();
}
@Override
public Iterable<KeyValuePair<O>> getKeyValuePairsForClosestKeys(CharSequence candidate) {
SearchResult searchResult = searchTree(candidate);
Classification classification = searchResult.classification;
switch (classification) {
case EXACT_MATCH: {
return getDescendantKeyValuePairs(candidate, searchResult.nodeFound);
}
case KEY_ENDS_MID_EDGE: {
// Append the remaining characters of the edge to the key.
// For example if we searched for CO, but first matching node was COFFEE,
// the key associated with the first node should be COFFEE...
CharSequence edgeSuffix = CharSequences.getSuffix(searchResult.nodeFound.getIncomingEdge(), searchResult.charsMatchedInNodeFound);
candidate = CharSequences.concatenate(candidate, edgeSuffix);
return getDescendantKeyValuePairs(candidate, searchResult.nodeFound);
}
case INCOMPLETE_MATCH_TO_MIDDLE_OF_EDGE: {
// Example: if we searched for CX, but deepest matching node was CO,
// the results should include node CO and its descendants...
CharSequence keyOfParentNode = CharSequences.getPrefix(candidate, searchResult.charsMatched - searchResult.charsMatchedInNodeFound);
CharSequence keyOfNodeFound = CharSequences.concatenate(keyOfParentNode, searchResult.nodeFound.getIncomingEdge());
return getDescendantKeyValuePairs(keyOfNodeFound, searchResult.nodeFound);
}
case INCOMPLETE_MATCH_TO_END_OF_EDGE: {
if (searchResult.charsMatched == 0) {
// Closest match is the root node, we don't consider this a match for anything...
break;
}
// Example: if we searched for COFFEE, but deepest matching node was CO,
// the results should include node CO and its descendants...
CharSequence keyOfNodeFound = CharSequences.getPrefix(candidate, searchResult.charsMatched);
return getDescendantKeyValuePairs(keyOfNodeFound, searchResult.nodeFound);
}
}
return Collections.emptySet();
}
@Override
public int size() {
Deque<Node> stack = new LinkedList<>();
stack.push(this.root);
int count = 0;
while (true) {
if (stack.isEmpty()) {
return count;
}
Node current = stack.pop();
stack.addAll(current.getOutgoingEdges());
if (current.getValue() != null) {
count++;
}
}
}
/**
* Atomically adds the given value to the tree, creating a node for the value as necessary. If the value is already
* stored for the same key, either overwrites the existing value, or simply returns the existing value, depending
* on the given value of the <code>overwrite</code> flag.
*
* @param key The key against which the value should be stored
* @param value The value to store against the key
* @param overwrite If true, should replace any existing value, if false should not replace any existing value
* @return The existing value for this key, if there was one, otherwise null
*/
Object putInternal(CharSequence key, Object value, boolean overwrite) {
if (key == null) {
throw new IllegalArgumentException("The key argument was null");
}
if (key.length() == 0) {
throw new IllegalArgumentException("The key argument was zero-length");
}
if (value == null) {
throw new IllegalArgumentException("The value argument was null");
}
acquireWriteLock();
try {
// Note we search the tree here after we have acquired the write lock...
SearchResult searchResult = searchTree(key);
SearchResult.Classification classification = searchResult.classification;
switch (classification) {
case EXACT_MATCH: {
// Search found an exact match for all edges leading to this node.
// -> Add or update the value in the node found, by replacing
// the existing node with a new node containing the value...
// First check if existing node has a value, and if we are allowed to overwrite it.
// Return early without overwriting if necessary...
Object existingValue = searchResult.nodeFound.getValue();
if (!overwrite && existingValue != null) {
return existingValue;
}
// Create a replacement for the existing node containing the new value...
Node replacementNode = nodeFactory.createNode(searchResult.nodeFound.getIncomingEdge(), value, searchResult.nodeFound.getOutgoingEdges(), false);
searchResult.parentNode.updateOutgoingEdge(replacementNode);
// Return the existing value...
return existingValue;
}
case KEY_ENDS_MID_EDGE: {
// Search ran out of characters from the key while in the middle of an edge in the node.
// -> Split the node in two: Create a new parent node storing the new value,
// and a new child node holding the original value and edges from the existing node...
CharSequence keyCharsFromStartOfNodeFound = key.subSequence(searchResult.charsMatched - searchResult.charsMatchedInNodeFound, key.length());
CharSequence commonPrefix = CharSequences.getCommonPrefix(keyCharsFromStartOfNodeFound, searchResult.nodeFound.getIncomingEdge());
CharSequence suffixFromExistingEdge = CharSequences.subtractPrefix(searchResult.nodeFound.getIncomingEdge(), commonPrefix);
// Create new nodes...
Node newChild = nodeFactory.createNode(suffixFromExistingEdge, searchResult.nodeFound.getValue(), searchResult.nodeFound.getOutgoingEdges(), false);
Node newParent = nodeFactory.createNode(commonPrefix, value, Collections.singletonList(newChild), false);
// Add the new parent to the parent of the node being replaced (replacing the existing node)...
searchResult.parentNode.updateOutgoingEdge(newParent);
// Return null for the existing value...
return null;
}
case INCOMPLETE_MATCH_TO_END_OF_EDGE: {
// Search found a difference in characters between the key and the start of all child edges leaving the
// node, the key still has trailing unmatched characters.
// -> Add a new child to the node, containing the trailing characters from the key.
// NOTE: this is the only branch which allows an edge to be added to the root.
// (Root node's own edge is "" empty string, so is considered a prefixing edge of every key)
// Create a new child node containing the trailing characters...
CharSequence keySuffix = key.subSequence(searchResult.charsMatched, key.length());
Node newChild = nodeFactory.createNode(keySuffix, value, Collections.emptyList(), false);
// Clone the current node adding the new child...
List<Node> edges = new ArrayList<>(searchResult.nodeFound.getOutgoingEdges().size() + 1);
edges.addAll(searchResult.nodeFound.getOutgoingEdges());
edges.add(newChild);
Node clonedNode = nodeFactory.createNode(searchResult.nodeFound.getIncomingEdge(), searchResult.nodeFound.getValue(), edges, searchResult.nodeFound == root);
// Re-add the cloned node to its parent node...
if (searchResult.nodeFound == root) {
this.root = clonedNode;
}
else {
searchResult.parentNode.updateOutgoingEdge(clonedNode);
}
// Return null for the existing value...
return null;
}
case INCOMPLETE_MATCH_TO_MIDDLE_OF_EDGE: {
// Search found a difference in characters between the key and the characters in the middle of the
// edge in the current node, and the key still has trailing unmatched characters.
// -> Split the node in three:
// Let's call node found: NF
// (1) Create a new node N1 containing the unmatched characters from the rest of the key, and the
// value supplied to this method
// (2) Create a new node N2 containing the unmatched characters from the rest of the edge in NF, and
// copy the original edges and the value from NF unmodified into N2
// (3) Create a new node N3, which will be the split node, containing the matched characters from
// the key and the edge, and add N1 and N2 as child nodes of N3
// (4) Re-add N3 to the parent node of NF, effectively replacing NF in the tree
CharSequence keyCharsFromStartOfNodeFound = key.subSequence(searchResult.charsMatched - searchResult.charsMatchedInNodeFound, key.length());
CharSequence commonPrefix = CharSequences.getCommonPrefix(keyCharsFromStartOfNodeFound, searchResult.nodeFound.getIncomingEdge());
CharSequence suffixFromExistingEdge = CharSequences.subtractPrefix(searchResult.nodeFound.getIncomingEdge(), commonPrefix);
CharSequence suffixFromKey = key.subSequence(searchResult.charsMatched, key.length());
// Create new nodes...
Node n1 = nodeFactory.createNode(suffixFromKey, value, Collections.emptyList(), false);
Node n2 = nodeFactory.createNode(suffixFromExistingEdge, searchResult.nodeFound.getValue(), searchResult.nodeFound.getOutgoingEdges(), false);
@SuppressWarnings({"NullableProblems"})
Node n3 = nodeFactory.createNode(commonPrefix, null, Arrays.asList(n1, n2), false);
searchResult.parentNode.updateOutgoingEdge(n3);
// Return null for the existing value...
return null;
}
default: {
// This is a safeguard against a new enum constant being added in future.
throw new IllegalStateException("Unexpected classification for search result: " + searchResult);
}
}
}
finally {
releaseWriteLock();
}
}
/**
* Returns a lazy iterable which will return {@link CharSequence} keys for which the given key is a prefix.
* The results inherently will not contain duplicates (duplicate keys cannot exist in the tree).
* <p/>
* Note that this method internally converts {@link CharSequence}s to {@link String}s, to avoid set equality issues,
* because equals() and hashCode() are not specified by the CharSequence API contract.
*/
@SuppressWarnings({"JavaDoc"})
Iterable<CharSequence> getDescendantKeys(final CharSequence startKey, final Node startNode) {
return () -> new LazyIterator<>() {
final Iterator<NodeKeyPair> descendantNodes = lazyTraverseDescendants(startKey, startNode).iterator();
@Override
protected CharSequence computeNext() {
// Traverse to the next matching node in the tree and return its key and value...
while (descendantNodes.hasNext()) {
NodeKeyPair nodeKeyPair = descendantNodes.next();
Object value = nodeKeyPair.node.getValue();
if (value != null) {
// Dealing with a node explicitly added to tree (rather than an automatically-added split node).
// Call the transformKeyForResult method to allow key to be transformed before returning to client.
// Used by subclasses such as ReversedRadixTree implementations...
CharSequence optionallyTransformedKey = transformKeyForResult(nodeKeyPair.key);
// -> Convert the CharSequence to a String before returning, to avoid set equality issues,
// because equals() and hashCode() is not specified by the CharSequence API contract...
return CharSequences.toString(optionallyTransformedKey);
}
}
// Finished traversing the tree, no more matching nodes to return...
return endOfData();
}
};
}
/**
* Returns a lazy iterable which will return values which are associated with keys in the tree for which
* the given key is a prefix.
*/
<O> Iterable<O> getDescendantValues(final CharSequence startKey, final Node startNode) {
return () -> new LazyIterator<O>() {
final Iterator<NodeKeyPair> descendantNodes = lazyTraverseDescendants(startKey, startNode).iterator();
@Override
protected O computeNext() {
// Traverse to the next matching node in the tree and return its key and value...
while (descendantNodes.hasNext()) {
NodeKeyPair nodeKeyPair = descendantNodes.next();
Object value = nodeKeyPair.node.getValue();
if (value != null) {
// Dealing with a node explicitly added to tree (rather than an automatically-added split node).
// We have to cast to generic type here, because Node objects are not generically typed.
// Background: Node objects are not generically typed, because arrays can't be generically typed,
// and we use arrays in nodes. We choose to cast here (in wrapper logic around the tree) rather than
// pollute the already-complex tree manipulation logic with casts.
@SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"})
O valueTyped = (O)value;
return valueTyped;
}
}
// Finished traversing the tree, no more matching nodes to return...
return endOfData();
}
};
}
/**
* Returns a lazy iterable which will return {@link KeyValuePair} objects each containing a key and a value,
* for which the given key is a prefix of the key in the {@link KeyValuePair}. These results inherently will not
* contain duplicates (duplicate keys cannot exist in the tree).
* <p/>
* Note that this method internally converts {@link CharSequence}s to {@link String}s, to avoid set equality issues,
* because equals() and hashCode() are not specified by the CharSequence API contract.
*/
<O> Iterable<KeyValuePair<O>> getDescendantKeyValuePairs(final CharSequence startKey, final Node startNode) {
return () -> new LazyIterator<>() {
final Iterator<NodeKeyPair> descendantNodes = lazyTraverseDescendants(startKey, startNode).iterator();
@Override
protected KeyValuePair<O> computeNext() {
// Traverse to the next matching node in the tree and return its key and value...
while (descendantNodes.hasNext()) {
NodeKeyPair nodeKeyPair = descendantNodes.next();
Object value = nodeKeyPair.node.getValue();
if (value != null) {
// Dealing with a node explicitly added to tree (rather than an automatically-added split node).
// Call the transformKeyForResult method to allow key to be transformed before returning to client.
// Used by subclasses such as ReversedRadixTree implementations...
CharSequence optionallyTransformedKey = transformKeyForResult(nodeKeyPair.key);
// -> Convert the CharSequence to a String before returning, to avoid set equality issues,
// because equals() and hashCode() is not specified by the CharSequence API contract...
String keyString = CharSequences.toString(optionallyTransformedKey);
return new KeyValuePairImpl<>(keyString, value);
}
}
// Finished traversing the tree, no more matching nodes to return...
return endOfData();
}
};
}
/**
* Implementation of the {@link KeyValuePair} interface.
*/
public static class KeyValuePairImpl<O> implements KeyValuePair<O> {
final String key;
final O value;
/**
* Constructor.
*
* Implementation node: This constructor currently requires the key to be supplied as a {@link String}
* - this is to allow reliable testing of object equality; the alternative {@link CharSequence}
* does not specify a contract for {@link Object#equals(Object)}.
*
* @param key The key as a string
* @param value The value
*/
public KeyValuePairImpl(String key, Object value) {
this.key = key;
// We have to cast to generic type here, because Node objects are not generically typed.
// Background: Node objects are not generically typed, because arrays can't be generically typed,
// and we use arrays in nodes. We choose to cast here (in wrapper logic around the tree) rather than
// pollute the already-complex tree manipulation logic with casts.
@SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"})
O valueTyped = (O)value;
this.value = valueTyped;
}
@Override
public CharSequence getKey() {
return key;
}
@Override
public O getValue() {
return value;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
KeyValuePairImpl<?> that = (KeyValuePairImpl<?>) o;
return key.equals(that.key);
}
@Override
public int hashCode() {
return key.hashCode();
}
@Override
public String toString() {
return "(" + key + ", " + value + ")";
}
}
/**
* Traverses the tree using depth-first, preordered traversal, starting at the given node, using lazy evaluation
* such that the next node is only determined when next() is called on the iterator returned.
* The traversal algorithm uses iteration instead of recursion to allow deep trees to be traversed without
* requiring large JVM stack sizes.
* <p/>
* Each node that is encountered is returned from the iterator along with a key associated with that node,
* in a NodeKeyPair object. The key will be prefixed by the given start key, and will be generated by appending
* to the start key the edges traversed along the path to that node from the start node.
*
* @param startKey The key which matches the given start node
* @param startNode The start node
* @return An iterator which when iterated traverses the tree using depth-first, preordered traversal,
* starting at the given start node
*/
protected Iterable<NodeKeyPair> lazyTraverseDescendants(final CharSequence startKey, final Node startNode) {
return () -> new LazyIterator<>() {
final Deque<NodeKeyPair> stack = new LinkedList<>();
{
stack.push(new NodeKeyPair(startNode, startKey));
}
@Override
protected NodeKeyPair computeNext() {
if (stack.isEmpty()) {
return endOfData();
}
NodeKeyPair current = stack.pop();
List<Node> childNodes = current.node.getOutgoingEdges();
// -> Iterate child nodes in reverse order and so push them onto the stack in reverse order,
// to counteract that pushing them onto the stack alone would otherwise reverse their processing order.
// This ensures that we actually process nodes in ascending alphabetical order.
for (int i = childNodes.size(); i > 0; i--) {
Node child = childNodes.get(i - 1);
stack.push(new NodeKeyPair(child, CharSequences.concatenate(current.key, child.getIncomingEdge())));
}
return current;
}
};
}
/**
* Encapsulates a node and its associated key. Used internally by {@link #lazyTraverseDescendants}.
*/
protected static class NodeKeyPair {
public final Node node;
public final CharSequence key;
public NodeKeyPair(Node node, CharSequence key) {
this.node = node;
this.key = key;
}
}
/**
* A hook method which may be overridden by subclasses, to transform a key just before it is returned to
* the application, for example by the {@link #getKeysStartingWith(CharSequence)} or the
* {@link #getKeyValuePairsForKeysStartingWith(CharSequence)} methods.
* <p/>
* This hook is expected to be used by
* implementations where keys are stored in the tree in reverse order but results should be returned in normal
* order.
* <p/>
* <b>This default implementation simply returns the given key unmodified.</b>
*
* @param rawKey The raw key as stored in the tree
* @return A transformed version of the key
*/
protected CharSequence transformKeyForResult(CharSequence rawKey) {
return rawKey;
}
/**
* Traverses the tree and finds the node which matches the longest prefix of the given key.
* <p/>
* The node returned might be an <u>exact match</u> for the key, in which case {@link SearchResult#charsMatched}
* will equal the length of the key.
* <p/>
* The node returned might be an <u>inexact match</u> for the key, in which case {@link SearchResult#charsMatched}
* will be less than the length of the key.
* <p/>
* There are two types of inexact match:
* <ul>
* <li>
* An inexact match which ends evenly at the boundary between a node and its children (the rest of the key
* not matching any children at all). In this case if we we wanted to add nodes to the tree to represent the
* rest of the key, we could simply add child nodes to the node found.
* </li>
* <li>
* An inexact match which ends in the middle of a the characters for an edge stored in a node (the key
* matching only the first few characters of the edge). In this case if we we wanted to add nodes to the
* tree to represent the rest of the key, we would have to split the node (let's call this node found: NF):
* <ol>
* <li>
* Create a new node (N1) which will be the split node, containing the matched characters from the
* start of the edge in NF
* </li>
* <li>
* Create a new node (N2) which will contain the unmatched characters from the rest of the edge
* in NF, and copy the original edges from NF unmodified into N2
* </li>
* <li>
* Create a new node (N3) which will be the new branch, containing the unmatched characters from
* the rest of the key
* </li>
* <li>
* Add N2 as a child of N1
* </li>
* <li>
* Add N3 as a child of N1
* </li>
* <li>
* In the <b>parent node of NF</b>, replace the edge pointing to NF with an edge pointing instead
* to N1. If we do this step atomically, reading threads are guaranteed to never see "invalid"
* data, only either the old data or the new data
* </li>
* </ol>
* </li>
* </ul>
* The {@link SearchResult#classification} is an enum value based on its classification of the
* match according to the descriptions above.
*
* @param key a key for which the node matching the longest prefix of the key is required
* @return A {@link SearchResult} object which contains the node matching the longest prefix of the key, its
* parent node, the number of characters of the key which were matched in total and within the edge of the
* matched node, and a {@link SearchResult#classification} of the match as described above
*/
SearchResult searchTree(CharSequence key) {
Node parentNodesParent = null;
Node parentNode = null;
Node currentNode = root;
int charsMatched = 0, charsMatchedInNodeFound = 0;
final int keyLength = key.length();
outer_loop: while (charsMatched < keyLength) {
Node nextNode = currentNode.getOutgoingEdge(key.charAt(charsMatched));
if (nextNode == null) {
// Next node is a dead end...
//noinspection UnnecessaryLabelOnBreakStatement
break outer_loop;
}
parentNodesParent = parentNode;
parentNode = currentNode;
currentNode = nextNode;
charsMatchedInNodeFound = 0;
CharSequence currentNodeEdgeCharacters = currentNode.getIncomingEdge();
for (int i = 0, numEdgeChars = currentNodeEdgeCharacters.length(); i < numEdgeChars && charsMatched < keyLength; i++) {
if (currentNodeEdgeCharacters.charAt(i) != key.charAt(charsMatched)) {
// Found a difference in chars between character in key and a character in current node.
// Current node is the deepest match (inexact match)....
break outer_loop;
}
charsMatched++;
charsMatchedInNodeFound++;
}
}
return new SearchResult(key, currentNode, charsMatched, charsMatchedInNodeFound, parentNode, parentNodesParent);
}
/**
* Encapsulates results of searching the tree for a node for which a given key is a prefix. Encapsulates the node
* found, its parent node, its parent's parent node, and the number of characters matched in the current node and
* in total.
* <p/>
* Also classifies the search result so that algorithms in methods which use this SearchResult, when adding nodes
* and removing nodes from the tree, can select appropriate strategies based on the classification.
*/
static class SearchResult {
final CharSequence key;
final Node nodeFound;
final int charsMatched;
final int charsMatchedInNodeFound;
final Node parentNode;
final Node parentNodesParent;
final Classification classification;
enum Classification {
EXACT_MATCH,
INCOMPLETE_MATCH_TO_END_OF_EDGE,
INCOMPLETE_MATCH_TO_MIDDLE_OF_EDGE,
KEY_ENDS_MID_EDGE,
INVALID // INVALID is never used, except in unit testing
}
SearchResult(CharSequence key, Node nodeFound, int charsMatched, int charsMatchedInNodeFound, Node parentNode, Node parentNodesParent) {
this.key = key;
this.nodeFound = nodeFound;
this.charsMatched = charsMatched;
this.charsMatchedInNodeFound = charsMatchedInNodeFound;
this.parentNode = parentNode;
this.parentNodesParent = parentNodesParent;
// Classify this search result...
this.classification = classify(key, nodeFound, charsMatched, charsMatchedInNodeFound);
}
protected Classification classify(CharSequence key, Node nodeFound, int charsMatched, int charsMatchedInNodeFound) {
if (charsMatched == key.length()) {
if (charsMatchedInNodeFound == nodeFound.getIncomingEdge().length()) {
return Classification.EXACT_MATCH;
}
else if (charsMatchedInNodeFound < nodeFound.getIncomingEdge().length()) {
return Classification.KEY_ENDS_MID_EDGE;
}
}
else if (charsMatched < key.length()) {
if (charsMatchedInNodeFound == nodeFound.getIncomingEdge().length()) {
return Classification.INCOMPLETE_MATCH_TO_END_OF_EDGE;
}
else if (charsMatchedInNodeFound < nodeFound.getIncomingEdge().length()) {
return Classification.INCOMPLETE_MATCH_TO_MIDDLE_OF_EDGE;
}
}
throw new IllegalStateException("Unexpected failure to classify SearchResult: " + this);
}
@Override
public String toString() {
return "SearchResult{" +
"key=" + key +
", nodeFound=" + nodeFound +
", charsMatched=" + charsMatched +
", charsMatchedInNodeFound=" + charsMatchedInNodeFound +
", parentNode=" + parentNode +
", parentNodesParent=" + parentNodesParent +
", classification=" + classification +
'}';
}
}
public Node getNode() {
return root;
}
}

View file

@ -0,0 +1,145 @@
package org.xbib.datastructures.trie.concurrent;
import org.xbib.datastructures.trie.concurrent.util.KeyValuePair;
/**
* API of a radix tree, that is a tree which allows values to be looked up based on <i>prefixes</i> of the keys
* with which they were associated, as well as based on exact matches for keys. A radix tree essentially allows
* <i><u>"equals"</u></i> and <i><u>"starts with"</u></i> lookup.
* <p/>
* See documentation on each method for details.
*
* @param <O> The type of the values associated with keys in the tree
*/
public interface RadixTree<O> {
/**
* Associates the given value with the given key; replacing any previous value associated with the key.
* Returns the previous value associated with the key, if any.
* <p/>
* This operation is performed atomically.
*
* @param key The key with which the specified value should be associated
* @param value The value to associate with the key, which cannot be null
* @return The previous value associated with the key, if there was one, otherwise null
*/
O put(CharSequence key, O value);
/**
* If a value is not already associated with the given key in the tree, associates the given value with the
* key; otherwise if an existing value is already associated, returns the existing value and does not overwrite it.
* <p/>
* This operation is performed atomically.
*
* @param key The key with which the specified value should be associated
* @param value The value to associate with the key, which cannot be null
* @return The existing value associated with the key, if there was one; otherwise null in which case the new
* value was successfully associated
*/
O putIfAbsent(CharSequence key, O value);
/**
* Removes the value associated with the given key (exact match).
* If no value is associated with the key, does nothing.
*
* @param key The key for which an associated value should be removed
* @return True if a value was removed (and therefore was associated with the key), false if no value was
* associated/removed
*/
boolean remove(CharSequence key);
/**
* Returns the value associated with the given key (exact match), or returns null if no such value
* is associated with the key.
*
* @param key The key with which a sought value might be associated
* @return The value associated with the given key (exact match), or null if no value was associated with the key
*/
O getValueForExactKey(CharSequence key);
/**
* Returns a lazy iterable which returns the set of keys in the tree which start with the given prefix.
* <p/>
* This is <i>inclusive</i> - if the given prefix is an exact match for a key in the tree, that key is also
* returned.
*
* @param prefix A prefix of sought keys in the tree
* @return The set of keys in the tree which start with the given prefix, inclusive
*/
Iterable<CharSequence> getKeysStartingWith(CharSequence prefix);
/**
* Returns a lazy iterable which returns the set of values associated with keys in the tree which start with the
* given prefix.
* <p/>
* This is <i>inclusive</i> - if the given prefix is an exact match for a key in the tree, the value associated
* with that key is also returned.
* <p/>
* Note that although the same value might originally have been associated with multiple keys, the set returned
* does not contain duplicates (as determined by the value objects' implementation of {@link Object#equals(Object)}).
*
* @param prefix A prefix of keys in the tree for which associated values are sought
* @return The set of values associated with keys in the tree which start with the given prefix, inclusive
*/
Iterable<O> getValuesForKeysStartingWith(CharSequence prefix);
/**
* Returns a lazy iterable which returns the set of {@link KeyValuePair}s for keys and their associated values
* in the tree, where the keys start with the given prefix.
* <p/>
* This is <i>inclusive</i> - if the given prefix is an exact match for a key in the tree, the {@link KeyValuePair}
* for that key is also returned.
*
* @param prefix A prefix of keys in the tree for which associated {@link KeyValuePair}s are sought
* @return The set of {@link KeyValuePair}s for keys in the tree which start with the given prefix, inclusive
*/
Iterable<KeyValuePair<O>> getKeyValuePairsForKeysStartingWith(CharSequence prefix);
/**
* Returns a lazy iterable which returns the set of keys in the tree which are the closest match for the given
* candidate key.
* <p/>
* Example:<br/>
* Tree contains: {@code Ford Focus}, {@code Ford Mondeo}, {@code BMW M3}<br/>
* <code>getClosestKeys("Ford F150")</code> -> returns {@code Ford Focus}, {@code Ford Mondeo}<br/>
* <p/>
* This is <i>inclusive</i> - if the given candidate is an exact match for a key in the tree, that key is also
* returned.
*
* @param candidate A candidate key
* @return The set of keys in the tree which most closely match the candidate key, inclusive
*/
Iterable<CharSequence> getClosestKeys(CharSequence candidate);
/**
* Returns a lazy iterable which returns the set of values associated with keys in the tree which are the closest
* match for the given candidate key.
* <p/>
* See {#getClosestKeys} for more details.
*
* @param candidate A candidate key
* @return The set of values associated with keys in the tree which most closely match the candidate key, inclusive
*/
Iterable<O> getValuesForClosestKeys(CharSequence candidate);
/**
* Returns a lazy iterable which returns the set of {@link KeyValuePair}s for keys and their associated values in
* the tree which are the closest match for the given candidate key.
* <p/>
* See {#getClosestKeys} for more details.
*
* @param candidate A candidate key
* @return The set of {@link KeyValuePair}s for keys and their associated values in the tree which most closely
* match the candidate key, inclusive
*/
Iterable<KeyValuePair<O>> getKeyValuePairsForClosestKeys(CharSequence candidate);
/**
* Counts the number of keys/values stored in the tree.
* <p/>
* In the current implementation, <b>this is an expensive operation</b>, having O(n) time complexity.
*
* @return The number of keys/values stored in the tree
*/
int size();
}

View file

@ -0,0 +1,30 @@
package org.xbib.datastructures.trie.concurrent.util;
import java.util.AbstractList;
import java.util.concurrent.atomic.AtomicReferenceArray;
/**
* Wraps an {@link AtomicReferenceArray} to implement read-only methods of the {@link java.util.List} interface.
* <p/>
* This enables binary search of an {@link AtomicReferenceArray}, using
* {@link java.util.Collections#binarySearch(java.util.List, Object)}.
*/
@SuppressWarnings("serial")
public class AtomicReferenceArrayListAdapter<T> extends AbstractList<T> {
private final AtomicReferenceArray<T> atomicReferenceArray;
public AtomicReferenceArrayListAdapter(AtomicReferenceArray<T> atomicReferenceArray) {
this.atomicReferenceArray = atomicReferenceArray;
}
@Override
public T get(int index) {
return atomicReferenceArray.get(index);
}
@Override
public int size() {
return atomicReferenceArray.length();
}
}

View file

@ -0,0 +1,117 @@
package org.xbib.datastructures.trie.concurrent.util;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.atomic.AtomicReferenceArray;
/**
* A non-optimized implementation of the {@link Node} interface. Stores all variables and supports all behaviours
* required by the tree, but not very memory efficient.
* <p/>
* See {@link NodeFactory} for documentation on how alternative
* node implementations can be created to reduce memory overhead. See the {@link Node} interface for details on how
* to write memory-efficient nodes.
* <p/>
* This implementation stores references to child nodes in an {@link AtomicReferenceArray}, in ascending sorted order
* of the first character of the edges which child nodes define.
* <p/>
* The {@link #getOutgoingEdge(Character)} method uses binary search to locate a requested node, given the first character
* of an edge indicated. The node is then read and returned atomically from the {@link AtomicReferenceArray}.
* <p/>
* The {@link #updateOutgoingEdge(Node)} method ensures that any
* attempt to update a reference to a child node preserves the constraints defined in the {@link Node} interface. New
* child nodes are written atomically to the {@link AtomicReferenceArray}.
* <p/>
* The constraints defined in the {@link Node} interface ensure that the {@link AtomicReferenceArray} always remains in
* ascending sorted order regardless of modifications performed concurrently, as long as the modifications comply with
* the constraints. This node enforces those constraints.
*
* @author Niall Gallagher
*/
public class CharArrayNodeDefault implements Node {
// Characters in the edge arriving at this node from a parent node.
// Once assigned, we never modify this...
private final char[] incomingEdgeCharArray;
// References to child nodes representing outgoing edges from this node.
// Once assigned we never add or remove references, but we do update existing references to point to new child
// nodes provided new edges start with the same first character...
private final AtomicReferenceArray<Node> outgoingEdges;
// A read-only List wrapper around the outgoingEdges AtomicReferenceArray...
private final List<Node> outgoingEdgesAsList;
// An arbitrary value which the application associates with a key matching the path to this node in the tree.
// This value can be null...
private final Object value;
public CharArrayNodeDefault(CharSequence edgeCharSequence, Object value, List<Node> outgoingEdges) {
Node[] childNodeArray = outgoingEdges.toArray(new Node[outgoingEdges.size()]);
// Sort the child nodes...
Arrays.sort(childNodeArray, new NodeCharacterComparator());
this.outgoingEdges = new AtomicReferenceArray<Node>(childNodeArray);
this.incomingEdgeCharArray = CharSequences.toCharArray(edgeCharSequence);
this.value = value;
this.outgoingEdgesAsList = new AtomicReferenceArrayListAdapter<Node>(this.outgoingEdges);
}
@Override
public CharSequence getIncomingEdge() {
return CharSequences.fromCharArray(incomingEdgeCharArray);
}
@Override
public Character getIncomingEdgeFirstCharacter() {
return incomingEdgeCharArray[0];
}
@Override
public Object getValue() {
return value;
}
@Override
public Node getOutgoingEdge(Character edgeFirstCharacter) {
// Binary search for the index of the node whose edge starts with the given character.
// Note that this binary search is safe in the face of concurrent modification due to constraints
// we enforce on use of the array, as documented in the binarySearchForEdge method...
int index = NodeUtil.binarySearchForEdge(outgoingEdges, edgeFirstCharacter);
if (index < 0) {
// No such edge exists...
return null;
}
// Atomically return the child node at this index...
return outgoingEdges.get(index);
}
@Override
public void updateOutgoingEdge(Node childNode) {
// Binary search for the index of the node whose edge starts with the given character.
// Note that this binary search is safe in the face of concurrent modification due to constraints
// we enforce on use of the array, as documented in the binarySearchForEdge method...
int index = NodeUtil.binarySearchForEdge(outgoingEdges, childNode.getIncomingEdgeFirstCharacter());
if (index < 0) {
throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode);
}
// Atomically update the child node at this index...
outgoingEdges.set(index, childNode);
}
@Override
public List<Node> getOutgoingEdges() {
return outgoingEdgesAsList;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Node{");
sb.append("edge=").append(incomingEdgeCharArray);
sb.append(", value=").append(value);
sb.append(", edges=").append(getOutgoingEdges());
sb.append("}");
return sb.toString();
}
}

View file

@ -0,0 +1,61 @@
package org.xbib.datastructures.trie.concurrent.util;
import java.util.Collections;
import java.util.List;
/**
* Stores only incoming edge as a {@code char[]}.
* Returns {@code null} for the value. Does <b>not</b> store any outgoing edges.
*/
public class CharArrayNodeLeafNullValue implements Node {
// Characters in the edge arriving at this node from a parent node.
// Once assigned, we never modify this...
private final char[] incomingEdgeCharArray;
public CharArrayNodeLeafNullValue(CharSequence edgeCharSequence) {
this.incomingEdgeCharArray = CharSequences.toCharArray(edgeCharSequence);
}
@Override
public CharSequence getIncomingEdge() {
return CharSequences.fromCharArray(incomingEdgeCharArray);
}
@Override
public Character getIncomingEdgeFirstCharacter() {
return incomingEdgeCharArray[0];
}
@Override
public Object getValue() {
return null;
}
@Override
public Node getOutgoingEdge(Character edgeFirstCharacter) {
return null;
}
@Override
public void updateOutgoingEdge(Node childNode) {
throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode);
}
@Override
public List<Node> getOutgoingEdges() {
return Collections.emptyList();
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Node{");
sb.append("edge=").append(incomingEdgeCharArray);
sb.append(", value=null");
sb.append(", edges=[]");
sb.append("}");
return sb.toString();
}
}

View file

@ -0,0 +1,60 @@
package org.xbib.datastructures.trie.concurrent.util;
import java.util.Collections;
import java.util.List;
/**
* Stores only incoming edge as a {@code char[]}.
* Returns {@link VoidValue} for the value. Does <b>not</b> store any outgoing edges.
*/
public class CharArrayNodeLeafVoidValue implements Node {
// Characters in the edge arriving at this node from a parent node.
// Once assigned, we never modify this...
private final char[] incomingEdgeCharArray;
public CharArrayNodeLeafVoidValue(CharSequence edgeCharSequence) {
this.incomingEdgeCharArray = CharSequences.toCharArray(edgeCharSequence);
}
@Override
public CharSequence getIncomingEdge() {
return CharSequences.fromCharArray(incomingEdgeCharArray);
}
@Override
public Character getIncomingEdgeFirstCharacter() {
return incomingEdgeCharArray[0];
}
@Override
public Object getValue() {
return VoidValue.SINGLETON;
}
@Override
public Node getOutgoingEdge(Character edgeFirstCharacter) {
return null;
}
@Override
public void updateOutgoingEdge(Node childNode) {
throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode);
}
@Override
public List<Node> getOutgoingEdges() {
return Collections.emptyList();
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Node{");
sb.append("edge=").append(incomingEdgeCharArray);
sb.append(", value=").append(VoidValue.SINGLETON);
sb.append(", edges=[]");
sb.append("}");
return sb.toString();
}
}

View file

@ -0,0 +1,66 @@
package org.xbib.datastructures.trie.concurrent.util;
import java.util.Collections;
import java.util.List;
/**
* Stores only incoming edge as a {@code char[]}, and a reference to a value. Does <b>not</b> store any outgoing
* edges.
*/
public class CharArrayNodeLeafWithValue implements Node {
// Characters in the edge arriving at this node from a parent node.
// Once assigned, we never modify this...
private final char[] incomingEdgeCharArray;
// An arbitrary value which the application associates with a key matching the path to this node in the tree.
// This value can be null...
private final Object value;
public CharArrayNodeLeafWithValue(CharSequence edgeCharSequence, Object value) {
this.incomingEdgeCharArray = CharSequences.toCharArray(edgeCharSequence);
this.value = value;
}
@Override
public CharSequence getIncomingEdge() {
return CharSequences.fromCharArray(incomingEdgeCharArray);
}
@Override
public Character getIncomingEdgeFirstCharacter() {
return incomingEdgeCharArray[0];
}
@Override
public Object getValue() {
return value;
}
@Override
public Node getOutgoingEdge(Character edgeFirstCharacter) {
return null;
}
@Override
public void updateOutgoingEdge(Node childNode) {
throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode);
}
@Override
public List<Node> getOutgoingEdges() {
return Collections.emptyList();
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Node{");
sb.append("edge=").append(incomingEdgeCharArray);
sb.append(", value=").append(value);
sb.append(", edges=[]");
sb.append("}");
return sb.toString();
}
}

View file

@ -0,0 +1,92 @@
package org.xbib.datastructures.trie.concurrent.util;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.atomic.AtomicReferenceArray;
/**
* Stores incoming edge as a {@code char[]} and outgoing edges as an {@link AtomicReferenceArray}. Does not store a
* value and returns {@code null} for the value.
*/
public class CharArrayNodeNonLeafNullValue implements Node {
// Characters in the edge arriving at this node from a parent node.
// Once assigned, we never modify this...
private final char[] incomingEdgeCharArray;
// References to child nodes representing outgoing edges from this node.
// Once assigned we never add or remove references, but we do update existing references to point to new child
// nodes provided new edges start with the same first character...
private final AtomicReferenceArray<Node> outgoingEdges;
// A read-only List wrapper around the outgoingEdges AtomicReferenceArray...
private final List<Node> outgoingEdgesAsList;
public CharArrayNodeNonLeafNullValue(CharSequence edgeCharSequence, List<Node> outgoingEdges) {
Node[] childNodeArray = outgoingEdges.toArray(new Node[outgoingEdges.size()]);
// Sort the child nodes...
Arrays.sort(childNodeArray, new NodeCharacterComparator());
this.outgoingEdges = new AtomicReferenceArray<Node>(childNodeArray);
this.incomingEdgeCharArray = CharSequences.toCharArray(edgeCharSequence);
this.outgoingEdgesAsList = new AtomicReferenceArrayListAdapter<Node>(this.outgoingEdges);
}
@Override
public CharSequence getIncomingEdge() {
return CharSequences.fromCharArray(incomingEdgeCharArray);
}
@Override
public Character getIncomingEdgeFirstCharacter() {
return incomingEdgeCharArray[0];
}
@Override
public Object getValue() {
return null;
}
@Override
public Node getOutgoingEdge(Character edgeFirstCharacter) {
// Binary search for the index of the node whose edge starts with the given character.
// Note that this binary search is safe in the face of concurrent modification due to constraints
// we enforce on use of the array, as documented in the binarySearchForEdge method...
int index = NodeUtil.binarySearchForEdge(outgoingEdges, edgeFirstCharacter);
if (index < 0) {
// No such edge exists...
return null;
}
// Atomically return the child node at this index...
return outgoingEdges.get(index);
}
@Override
public void updateOutgoingEdge(Node childNode) {
// Binary search for the index of the node whose edge starts with the given character.
// Note that this binary search is safe in the face of concurrent modification due to constraints
// we enforce on use of the array, as documented in the binarySearchForEdge method...
int index = NodeUtil.binarySearchForEdge(outgoingEdges, childNode.getIncomingEdgeFirstCharacter());
if (index < 0) {
throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode);
}
// Atomically update the child node at this index...
outgoingEdges.set(index, childNode);
}
@Override
public List<Node> getOutgoingEdges() {
return outgoingEdgesAsList;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Node{");
sb.append("edge=").append(incomingEdgeCharArray);
sb.append(", value=null");
sb.append(", edges=").append(getOutgoingEdges());
sb.append("}");
return sb.toString();
}
}

View file

@ -0,0 +1,91 @@
package org.xbib.datastructures.trie.concurrent.util;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.atomic.AtomicReferenceArray;
/**
* Stores incoming edge as a {@code char[]} and outgoing edges as an {@link AtomicReferenceArray}. Does not store a
* value and returns {@link VoidValue} for the value.
*/
public class CharArrayNodeNonLeafVoidValue implements Node {
// Characters in the edge arriving at this node from a parent node.
// Once assigned, we never modify this...
private final char[] incomingEdgeCharArray;
// References to child nodes representing outgoing edges from this node.
// Once assigned we never add or remove references, but we do update existing references to point to new child
// nodes provided new edges start with the same first character...
private final AtomicReferenceArray<Node> outgoingEdges;
// A read-only List wrapper around the outgoingEdges AtomicReferenceArray...
private final List<Node> outgoingEdgesAsList;
public CharArrayNodeNonLeafVoidValue(CharSequence edgeCharSequence, List<Node> outgoingEdges) {
Node[] childNodeArray = outgoingEdges.toArray(new Node[outgoingEdges.size()]);
// Sort the child nodes...
Arrays.sort(childNodeArray, new NodeCharacterComparator());
this.outgoingEdges = new AtomicReferenceArray<Node>(childNodeArray);
this.incomingEdgeCharArray = CharSequences.toCharArray(edgeCharSequence);
this.outgoingEdgesAsList = new AtomicReferenceArrayListAdapter<Node>(this.outgoingEdges);
}
@Override
public CharSequence getIncomingEdge() {
return CharSequences.fromCharArray(incomingEdgeCharArray);
}
@Override
public Character getIncomingEdgeFirstCharacter() {
return incomingEdgeCharArray[0];
}
@Override
public Object getValue() {
return VoidValue.SINGLETON;
}
@Override
public Node getOutgoingEdge(Character edgeFirstCharacter) {
// Binary search for the index of the node whose edge starts with the given character.
// Note that this binary search is safe in the face of concurrent modification due to constraints
// we enforce on use of the array, as documented in the binarySearchForEdge method...
int index = NodeUtil.binarySearchForEdge(outgoingEdges, edgeFirstCharacter);
if (index < 0) {
// No such edge exists...
return null;
}
// Atomically return the child node at this index...
return outgoingEdges.get(index);
}
@Override
public void updateOutgoingEdge(Node childNode) {
// Binary search for the index of the node whose edge starts with the given character.
// Note that this binary search is safe in the face of concurrent modification due to constraints
// we enforce on use of the array, as documented in the binarySearchForEdge method...
int index = NodeUtil.binarySearchForEdge(outgoingEdges, childNode.getIncomingEdgeFirstCharacter());
if (index < 0) {
throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode);
}
// Atomically update the child node at this index...
outgoingEdges.set(index, childNode);
}
@Override
public List<Node> getOutgoingEdges() {
return outgoingEdgesAsList;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Node{");
sb.append("edge=").append(incomingEdgeCharArray);
sb.append(", value=").append(VoidValue.SINGLETON);
sb.append(", edges=").append(getOutgoingEdges());
sb.append("}");
return sb.toString();
}
}

View file

@ -0,0 +1,115 @@
package org.xbib.datastructures.trie.concurrent.util;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.atomic.AtomicReferenceArray;
/**
* A implementation of the {@link Node} interface which stores the incoming edge as a {@link CharSequence} (a
* <i>view</i> onto the original key) rather than copying the edge into a character array. Stores all variables and
* supports all behaviours required by the tree, but still could be more memory efficient.
* <p/>
* See {@link NodeFactory} for documentation on how alternative
* node implementations can be created to reduce memory overhead. See the {@link Node} interface for details on how
* to write memory-efficient nodes.
* <p/>
* This implementation stores references to child nodes in an {@link AtomicReferenceArray}, in ascending sorted order
* of the first character of the edges which child nodes define.
* <p/>
* The {@link #getOutgoingEdge(Character)} method uses binary search to locate a requested node, given the first character
* of an edge indicated. The node is then read and returned atomically from the {@link AtomicReferenceArray}.
* <p/>
* The {@link #updateOutgoingEdge(Node)} method ensures that any
* attempt to update a reference to a child node preserves the constraints defined in the {@link Node} interface. New
* child nodes are written atomically to the {@link java.util.concurrent.atomic.AtomicReferenceArray}.
* <p/>
* The constraints defined in the {@link Node} interface ensure that the {@link AtomicReferenceArray} always remains in
* ascending sorted order regardless of modifications performed concurrently, as long as the modifications comply with
* the constraints. This node enforces those constraints.
*/
public class CharSequenceNodeDefault implements Node {
// Characters in the edge arriving at this node from a parent node.
// Once assigned, we never modify this...
private final CharSequence incomingEdgeCharSequence;
// References to child nodes representing outgoing edges from this node.
// Once assigned we never add or remove references, but we do update existing references to point to new child
// nodes provided new edges start with the same first character...
private final AtomicReferenceArray<Node> outgoingEdges;
// A read-only List wrapper around the outgoingEdges AtomicReferenceArray...
private final List<Node> outgoingEdgesAsList;
// An arbitrary value which the application associates with a key matching the path to this node in the tree.
// This value can be null...
private final Object value;
public CharSequenceNodeDefault(CharSequence edgeCharSequence, Object value, List<Node> outgoingEdges) {
Node[] childNodeArray = outgoingEdges.toArray(new Node[outgoingEdges.size()]);
// Sort the child nodes...
Arrays.sort(childNodeArray, new NodeCharacterComparator());
this.outgoingEdges = new AtomicReferenceArray<Node>(childNodeArray);
this.incomingEdgeCharSequence = edgeCharSequence;
this.value = value;
this.outgoingEdgesAsList = new AtomicReferenceArrayListAdapter<Node>(this.outgoingEdges);
}
@Override
public CharSequence getIncomingEdge() {
return incomingEdgeCharSequence;
}
@Override
public Character getIncomingEdgeFirstCharacter() {
return incomingEdgeCharSequence.charAt(0);
}
@Override
public Object getValue() {
return value;
}
@Override
public Node getOutgoingEdge(Character edgeFirstCharacter) {
// Binary search for the index of the node whose edge starts with the given character.
// Note that this binary search is safe in the face of concurrent modification due to constraints
// we enforce on use of the array, as documented in the binarySearchForEdge method...
int index = NodeUtil.binarySearchForEdge(outgoingEdges, edgeFirstCharacter);
if (index < 0) {
// No such edge exists...
return null;
}
// Atomically return the child node at this index...
return outgoingEdges.get(index);
}
@Override
public void updateOutgoingEdge(Node childNode) {
// Binary search for the index of the node whose edge starts with the given character.
// Note that this binary search is safe in the face of concurrent modification due to constraints
// we enforce on use of the array, as documented in the binarySearchForEdge method...
int index = NodeUtil.binarySearchForEdge(outgoingEdges, childNode.getIncomingEdgeFirstCharacter());
if (index < 0) {
throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode);
}
// Atomically update the child node at this index...
outgoingEdges.set(index, childNode);
}
@Override
public List<Node> getOutgoingEdges() {
return outgoingEdgesAsList;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Node{");
sb.append("edge=").append(incomingEdgeCharSequence);
sb.append(", value=").append(value);
sb.append(", edges=").append(getOutgoingEdges());
sb.append("}");
return sb.toString();
}
}

View file

@ -0,0 +1,61 @@
package org.xbib.datastructures.trie.concurrent.util;
import java.util.Collections;
import java.util.List;
/**
* Stores only incoming edge as a {@link CharSequence} (a <i>view</i> onto the original key) rather than copying the
* edge into a character array. Returns {@code null} for the value. Does <b>not</b> store any outgoing edges.
*/
public class CharSequenceNodeLeafNullValue implements Node {
// Characters in the edge arriving at this node from a parent node.
// Once assigned, we never modify this...
private final CharSequence incomingEdgeCharSequence;
public CharSequenceNodeLeafNullValue(CharSequence edgeCharSequence) {
this.incomingEdgeCharSequence = edgeCharSequence;
}
@Override
public CharSequence getIncomingEdge() {
return incomingEdgeCharSequence;
}
@Override
public Character getIncomingEdgeFirstCharacter() {
return incomingEdgeCharSequence.charAt(0);
}
@Override
public Object getValue() {
return null;
}
@Override
public Node getOutgoingEdge(Character edgeFirstCharacter) {
return null;
}
@Override
public void updateOutgoingEdge(Node childNode) {
throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode);
}
@Override
public List<Node> getOutgoingEdges() {
return Collections.emptyList();
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Node{");
sb.append("edge=").append(incomingEdgeCharSequence);
sb.append(", value=null");
sb.append(", edges=[]");
sb.append("}");
return sb.toString();
}
}

View file

@ -0,0 +1,61 @@
package org.xbib.datastructures.trie.concurrent.util;
import java.util.Collections;
import java.util.List;
/**
* Stores only incoming edge as a {@link CharSequence} (a <i>view</i> onto the original key) rather than copying the
* edge into a character array. Returns {@link VoidValue} for the value. Does <b>not</b> store any outgoing edges.
*/
public class CharSequenceNodeLeafVoidValue implements Node {
// Characters in the edge arriving at this node from a parent node.
// Once assigned, we never modify this...
private final CharSequence incomingEdgeCharSequence;
public CharSequenceNodeLeafVoidValue(CharSequence edgeCharSequence) {
this.incomingEdgeCharSequence = edgeCharSequence;
}
@Override
public CharSequence getIncomingEdge() {
return incomingEdgeCharSequence;
}
@Override
public Character getIncomingEdgeFirstCharacter() {
return incomingEdgeCharSequence.charAt(0);
}
@Override
public Object getValue() {
return VoidValue.SINGLETON;
}
@Override
public Node getOutgoingEdge(Character edgeFirstCharacter) {
return null;
}
@Override
public void updateOutgoingEdge(Node childNode) {
throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode);
}
@Override
public List<Node> getOutgoingEdges() {
return Collections.emptyList();
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Node{");
sb.append("edge=").append(incomingEdgeCharSequence);
sb.append(", value=").append(VoidValue.SINGLETON);
sb.append(", edges=[]");
sb.append("}");
return sb.toString();
}
}

View file

@ -0,0 +1,67 @@
package org.xbib.datastructures.trie.concurrent.util;
import java.util.Collections;
import java.util.List;
/**
* Stores incoming edge as a {@link CharSequence} (a <i>view</i> onto the original key) rather than copying the
* edge into a character array. Also stores a reference to a value. Does <b>not</b> store any outgoing edges.
*/
public class CharSequenceNodeLeafWithValue implements Node {
// Characters in the edge arriving at this node from a parent node.
// Once assigned, we never modify this...
private final CharSequence incomingEdgeCharSequence;
// An arbitrary value which the application associates with a key matching the path to this node in the tree.
// This value can be null...
private final Object value;
public CharSequenceNodeLeafWithValue(CharSequence edgeCharSequence, Object value) {
// Sort the child nodes...
this.incomingEdgeCharSequence = edgeCharSequence;
this.value = value;
}
@Override
public CharSequence getIncomingEdge() {
return incomingEdgeCharSequence;
}
@Override
public Character getIncomingEdgeFirstCharacter() {
return incomingEdgeCharSequence.charAt(0);
}
@Override
public Object getValue() {
return value;
}
@Override
public Node getOutgoingEdge(Character edgeFirstCharacter) {
return null;
}
@Override
public void updateOutgoingEdge(Node childNode) {
throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode);
}
@Override
public List<Node> getOutgoingEdges() {
return Collections.emptyList();
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Node{");
sb.append("edge=").append(incomingEdgeCharSequence);
sb.append(", value=").append(value);
sb.append(", edges=[]");
sb.append("}");
return sb.toString();
}
}

View file

@ -0,0 +1,93 @@
package org.xbib.datastructures.trie.concurrent.util;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.atomic.AtomicReferenceArray;
/**
* Stores incoming edge as a {@link CharSequence} (a <i>view</i> onto the original key) rather than copying the edge
* into a character array, and stores outgoing edges as an {@link AtomicReferenceArray}. Does not store a
* value and returns {@code null} for the value.
*/
public class CharSequenceNodeNonLeafNullValue implements Node {
// Characters in the edge arriving at this node from a parent node.
// Once assigned, we never modify this...
private final CharSequence incomingEdgeCharSequence;
// References to child nodes representing outgoing edges from this node.
// Once assigned we never add or remove references, but we do update existing references to point to new child
// nodes provided new edges start with the same first character...
private final AtomicReferenceArray<Node> outgoingEdges;
// A read-only List wrapper around the outgoingEdges AtomicReferenceArray...
private final List<Node> outgoingEdgesAsList;
public CharSequenceNodeNonLeafNullValue(CharSequence edgeCharSequence, List<Node> outgoingEdges) {
Node[] childNodeArray = outgoingEdges.toArray(new Node[outgoingEdges.size()]);
// Sort the child nodes...
Arrays.sort(childNodeArray, new NodeCharacterComparator());
this.outgoingEdges = new AtomicReferenceArray<Node>(childNodeArray);
this.incomingEdgeCharSequence = edgeCharSequence;
this.outgoingEdgesAsList = new AtomicReferenceArrayListAdapter<Node>(this.outgoingEdges);
}
@Override
public CharSequence getIncomingEdge() {
return incomingEdgeCharSequence;
}
@Override
public Character getIncomingEdgeFirstCharacter() {
return incomingEdgeCharSequence.charAt(0);
}
@Override
public Object getValue() {
return null;
}
@Override
public Node getOutgoingEdge(Character edgeFirstCharacter) {
// Binary search for the index of the node whose edge starts with the given character.
// Note that this binary search is safe in the face of concurrent modification due to constraints
// we enforce on use of the array, as documented in the binarySearchForEdge method...
int index = NodeUtil.binarySearchForEdge(outgoingEdges, edgeFirstCharacter);
if (index < 0) {
// No such edge exists...
return null;
}
// Atomically return the child node at this index...
return outgoingEdges.get(index);
}
@Override
public void updateOutgoingEdge(Node childNode) {
// Binary search for the index of the node whose edge starts with the given character.
// Note that this binary search is safe in the face of concurrent modification due to constraints
// we enforce on use of the array, as documented in the binarySearchForEdge method...
int index = NodeUtil.binarySearchForEdge(outgoingEdges, childNode.getIncomingEdgeFirstCharacter());
if (index < 0) {
throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode);
}
// Atomically update the child node at this index...
outgoingEdges.set(index, childNode);
}
@Override
public List<Node> getOutgoingEdges() {
return outgoingEdgesAsList;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Node{");
sb.append("edge=").append(incomingEdgeCharSequence);
sb.append(", value=null");
sb.append(", edges=").append(getOutgoingEdges());
sb.append("}");
return sb.toString();
}
}

View file

@ -0,0 +1,93 @@
package org.xbib.datastructures.trie.concurrent.util;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.atomic.AtomicReferenceArray;
/**
* Stores incoming edge as a {@link CharSequence} (a <i>view</i> onto the original key) rather than copying the edge
* into a character array, and stores outgoing edges as an {@link AtomicReferenceArray}. Does not store a
* value and returns {@link VoidValue} for the value.
*/
public class CharSequenceNodeNonLeafVoidValue implements Node {
// Characters in the edge arriving at this node from a parent node.
// Once assigned, we never modify this...
private final CharSequence incomingEdgeCharSequence;
// References to child nodes representing outgoing edges from this node.
// Once assigned we never add or remove references, but we do update existing references to point to new child
// nodes provided new edges start with the same first character...
private final AtomicReferenceArray<Node> outgoingEdges;
// A read-only List wrapper around the outgoingEdges AtomicReferenceArray...
private final List<Node> outgoingEdgesAsList;
public CharSequenceNodeNonLeafVoidValue(CharSequence edgeCharSequence, List<Node> outgoingEdges) {
Node[] childNodeArray = outgoingEdges.toArray(new Node[outgoingEdges.size()]);
// Sort the child nodes...
Arrays.sort(childNodeArray, new NodeCharacterComparator());
this.outgoingEdges = new AtomicReferenceArray<Node>(childNodeArray);
this.incomingEdgeCharSequence = edgeCharSequence;
this.outgoingEdgesAsList = new AtomicReferenceArrayListAdapter<Node>(this.outgoingEdges);
}
@Override
public CharSequence getIncomingEdge() {
return incomingEdgeCharSequence;
}
@Override
public Character getIncomingEdgeFirstCharacter() {
return incomingEdgeCharSequence.charAt(0);
}
@Override
public Object getValue() {
return VoidValue.SINGLETON;
}
@Override
public Node getOutgoingEdge(Character edgeFirstCharacter) {
// Binary search for the index of the node whose edge starts with the given character.
// Note that this binary search is safe in the face of concurrent modification due to constraints
// we enforce on use of the array, as documented in the binarySearchForEdge method...
int index = NodeUtil.binarySearchForEdge(outgoingEdges, edgeFirstCharacter);
if (index < 0) {
// No such edge exists...
return null;
}
// Atomically return the child node at this index...
return outgoingEdges.get(index);
}
@Override
public void updateOutgoingEdge(Node childNode) {
// Binary search for the index of the node whose edge starts with the given character.
// Note that this binary search is safe in the face of concurrent modification due to constraints
// we enforce on use of the array, as documented in the binarySearchForEdge method...
int index = NodeUtil.binarySearchForEdge(outgoingEdges, childNode.getIncomingEdgeFirstCharacter());
if (index < 0) {
throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode);
}
// Atomically update the child node at this index...
outgoingEdges.set(index, childNode);
}
@Override
public List<Node> getOutgoingEdges() {
return outgoingEdgesAsList;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Node{");
sb.append("edge=").append(incomingEdgeCharSequence);
sb.append(", value=").append(VoidValue.SINGLETON);
sb.append(", edges=").append(getOutgoingEdges());
sb.append("}");
return sb.toString();
}
}

View file

@ -0,0 +1,130 @@
package org.xbib.datastructures.trie.concurrent.util;
import java.util.Iterator;
public class CharSequences {
/**
* Private constructor, not used.
*/
CharSequences() {
}
public static Iterable<CharSequence> generateSuffixes(final CharSequence input) {
return () -> new Iterator<CharSequence>() {
int currentIndex = 0;
@Override
public boolean hasNext() {
return currentIndex < input.length();
}
@Override
public CharSequence next() {
return input.subSequence(currentIndex++, input.length());
}
@Override
public void remove() {
throw new UnsupportedOperationException("Modification not supported");
}
};
}
public static Iterable<CharSequence> generatePrefixes(final CharSequence input) {
return () -> new Iterator<CharSequence>() {
int currentIndex = 0;
@Override
public boolean hasNext() {
return currentIndex < input.length();
}
@Override
public CharSequence next() {
return input.subSequence(0, ++currentIndex);
}
@Override
public void remove() {
throw new UnsupportedOperationException("Modification not supported");
}
};
}
public static CharSequence getCommonPrefix(CharSequence first, CharSequence second) {
int minLength = Math.min(first.length(), second.length());
for (int i = 0; i < minLength; i++) {
if (first.charAt(i) != second.charAt(i)) {
return first.subSequence(0, i);
}
}
return first.subSequence(0, minLength);
}
public static CharSequence getSuffix(CharSequence input, int startIndex) {
if (startIndex >= input.length()) {
return "";
}
return input.subSequence(startIndex, input.length());
}
public static CharSequence getPrefix(CharSequence input, int endIndex) {
if (endIndex > input.length()) {
return input;
}
return input.subSequence(0, endIndex);
}
public static CharSequence subtractPrefix(CharSequence main, CharSequence prefix) {
int startIndex = prefix.length();
int mainLength = main.length();
if (startIndex > mainLength) {
return "";
}
return main.subSequence(startIndex, mainLength);
}
public static CharSequence concatenate(final CharSequence first, final CharSequence second) {
return new StringBuilder().append(first).append(second);
}
public static CharSequence reverse(CharSequence input) {
return new StringBuilder(input.length()).append(input).reverse();
}
/**
* Returns a {@link CharSequence} which wraps the given {@code char[]}. Note that this {@link CharSequence} will
* reflect any changes to the {@code char[]}.
*
* @param characters The {@code char[]} to wrap
* @return A {@link CharSequence} which wraps the given {@code char[]}
*/
public static CharSequence fromCharArray(final char[] characters) {
return new StringBuilder(characters.length).append(characters);
}
/**
* Copies the given {@link CharSequence} into a new {@code char[]}.
*
* @param charSequence The {@link CharSequence} to copy
* @return A new {@code char[]} populated with characters from the given {@link CharSequence}
*/
public static char[] toCharArray(CharSequence charSequence) {
final int numChars = charSequence.length();
char[] charArray = new char[numChars];
for (int i = 0; i < numChars; i++) {
charArray[i] = charSequence.charAt(i);
}
return charArray;
}
public static String toString(CharSequence charSequence) {
if (charSequence == null) {
return null;
}
if (charSequence instanceof String) {
return (String)charSequence;
}
return String.valueOf(charSequence);
}
}

View file

@ -0,0 +1,63 @@
package org.xbib.datastructures.trie.concurrent.util;
import java.util.List;
/**
* A {@link NodeFactory} which creates various implementations of {@link Node} objects all of which store incoming
* edge characters as a character array inside the node.
* <p/>
* Returns an optimal node implementation depending on arguments supplied, which will be one of:
* <ul>
* <li>{@link CharArrayNodeDefault} - contains all possible fields</li>
* <li>{@link CharArrayNodeNonLeafNullValue} - does not store a value, returns {@code null} for value</li>
* <li>{@link CharArrayNodeNonLeafVoidValue} - does not store a value, returns {@link VoidValue} for value</li>
* <li>{@link CharArrayNodeLeafVoidValue} - does not store child edges or a value, returns {@link VoidValue} for value</li>
* <li>{@link CharArrayNodeLeafWithValue} - does not store child edges, but does store a value</li>
* </ul>
* <p/>
* When the application supplies {@link VoidValue} for a value, this factory will omit actually storing that value
* in the tree and will return one of the VoidValue-optimized nodes above which can reduce memory usage.
*
* @author Niall Gallagher
*/
public class DefaultCharArrayNodeFactory implements NodeFactory {
@Override
public Node createNode(CharSequence edgeCharacters, Object value, List<Node> childNodes, boolean isRoot) {
if (edgeCharacters == null) {
throw new IllegalStateException("The edgeCharacters argument was null");
}
if (!isRoot && edgeCharacters.length() == 0) {
throw new IllegalStateException("Invalid edge characters for non-root node: " + CharSequences.toString(edgeCharacters));
}
if (childNodes == null) {
throw new IllegalStateException("The childNodes argument was null");
}
NodeUtil.ensureNoDuplicateEdges(childNodes);
if (childNodes.isEmpty()) {
// Leaf node...
if (value instanceof VoidValue) {
return new CharArrayNodeLeafVoidValue(edgeCharacters);
}
else if (value != null) {
return new CharArrayNodeLeafWithValue(edgeCharacters, value);
}
else {
return new CharArrayNodeLeafNullValue(edgeCharacters);
}
}
else {
// Non-leaf node...
if (value instanceof VoidValue) {
return new CharArrayNodeNonLeafVoidValue(edgeCharacters, childNodes);
}
else if (value == null) {
return new CharArrayNodeNonLeafNullValue(edgeCharacters, childNodes);
}
else {
return new CharArrayNodeDefault(edgeCharacters, value, childNodes);
}
}
}
}

View file

@ -0,0 +1,66 @@
package org.xbib.datastructures.trie.concurrent.util;
import java.util.List;
/**
* A {@link NodeFactory} which creates various implementations of {@link Node} objects all of which store incoming
* edge characters as a {@link CharSequence} (a <i>view</i> onto the original key) rather than copying the edge into a
* character array.
* <p/>
* Returns an optimal node implementation depending on arguments supplied, which will be one of:
* <ul>
* <li>{@link CharSequenceNodeDefault} - contains all possible fields</li>
* <li>{@link CharSequenceNodeNonLeafNullValue} - does not store a value, returns {@code null} for value</li>
* <li>{@link CharSequenceNodeNonLeafVoidValue} - does not store a value, returns {@link VoidValue} for value</li>
* <li>{@link CharSequenceNodeLeafVoidValue} - does not store child edges or a value, returns {@link VoidValue} for value</li>
* <li>{@link CharSequenceNodeLeafWithValue} - does not store child edges, but does store a value</li>
* </ul>
* <p/>
* When the application supplies {@link VoidValue} for a value, this factory will omit actually storing that value
* in the tree and will return one of the Void-optimized nodes above which can reduce memory usage.
*
* @author Niall Gallagher
*/
public class DefaultCharSequenceNodeFactory implements NodeFactory {
@Override
public Node createNode(CharSequence edgeCharacters, Object value, List<Node> childNodes, boolean isRoot) {
if (edgeCharacters == null) {
throw new IllegalStateException("The edgeCharacters argument was null");
}
if (!isRoot && edgeCharacters.length() == 0) {
throw new IllegalStateException("Invalid edge characters for non-root node: " + CharSequences.toString(edgeCharacters));
}
if (childNodes == null) {
throw new IllegalStateException("The childNodes argument was null");
}
NodeUtil.ensureNoDuplicateEdges(childNodes);
if (childNodes.isEmpty()) {
// Leaf node...
if (value instanceof VoidValue) {
return new CharSequenceNodeLeafVoidValue(edgeCharacters);
}
else if (value != null) {
return new CharSequenceNodeLeafWithValue(edgeCharacters, value);
}
else {
return new CharSequenceNodeLeafNullValue(edgeCharacters);
}
}
else {
// Non-leaf node...
if (value instanceof VoidValue) {
return new CharSequenceNodeNonLeafVoidValue(edgeCharacters, childNodes);
}
else if (value == null) {
return new CharSequenceNodeNonLeafNullValue(edgeCharacters, childNodes);
}
else {
return new CharSequenceNodeDefault(edgeCharacters, value, childNodes);
}
}
}
}

View file

@ -0,0 +1,122 @@
package org.xbib.datastructures.trie.concurrent.util;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
/**
* Provides methods to convert {@link Iterable}s to {@link List}s and {@link Set}s.
* <p/>
* This class is mostly provided for backwards compatibility in applications which were programmed against
* concurrent-trees 1.0.0, in which the tree APIs returned lists and sets instead of lazily-evaluated iterables.
* <p/>
* Note that in applications which would have simply iterated through the lists and sets returned by the old APIs,
* the new approach of returning lazy iterables is more efficient. Applications can iterate the iterables returned
* in exactly the same manner, and results will be the same.
* <p/>
* These methods are provided for convenience in applications which <i>actually</i> relied on List and Set-specific
* features in the objects which were returned.
* <p/>
* Most methods in this class are somewhat similar to utilities in Google Guava; but are provided here to avoid a
* dependency on Guava. Applications could use either these methods or Guava.
*/
public class Iterables {
// Most methods in this class are somewhat similar to utilities in Google Guava; but are provided here
// to avoid a dependency on Guava. Applications could use either these methods or Guava.
/**
* Copies elements from the given {@link Iterable} into a new {@link List}.
* <p/>
* The iteration order of the list returned, will be the same as that of the iterable.
* <p/>
* Be aware of the memory implications of copying objects from a lazy iterable into a collection;
* usually it's better to just work with the iterable directly (i.e. by iterating it).
*
* @param iterable Provides elements to be copied into a new list
* @param <T> The type of elements returned by the iterable
* @return A new {@link List} which contains the elements which were returned by the iterable
*/
public static <T> List<T> toList(Iterable<T> iterable) {
if (iterable instanceof Collection) {
return new ArrayList<T>((Collection<T>)iterable);
}
else {
List<T> list = new LinkedList<T>();
for (T element : iterable) {
list.add(element);
}
return list;
}
}
/**
* Copies elements from the given {@link Iterable} into a new {@link Set}.
* <p/>
* The iteration order of the set returned, will be the same as that of the iterable.
* <p/>
* Be aware of the memory implications of copying objects from a lazy iterable into a collection;
* usually it's better to just work with the iterable directly (i.e. by iterating it).
*
* @param iterable Provides elements to be copied into a new set
* @param <T> The type of elements returned by the iterable
* @return A new {@link Set} which contains the elements which were returned by the iterable
*/
public static <T> Set<T> toSet(Iterable<T> iterable) {
if (iterable instanceof Collection) {
// Return a LinkedHashSet instead of HashSet, to preserve iteration order...
return new LinkedHashSet<T>((Collection<T>)iterable);
}
else {
Set<T> list = new LinkedHashSet<T>();
for (T element : iterable) {
list.add(element);
}
return list;
}
}
/**
* Returns a string representation of elements returned by the given {@link Iterable}.
*
* @param iterable Provides elements whose <code>toString</code> representations should be included in the string
* @return A string representation of elements returned by the given {@link Iterable}
*/
public static String toString(Iterable<?> iterable) {
StringBuilder sb = new StringBuilder();
sb.append("[");
for (Iterator<?> i = iterable.iterator(); i.hasNext();) {
sb.append(i.next());
if (i.hasNext()) {
sb.append(", ");
}
}
sb.append("]");
return sb.toString();
}
/**
* Counts the number of elements returned by the given {@link Iterable}.
*
* @param iterable Provides elements to be counted
* @return The number of elements returned by the iterable
*/
public static int count(Iterable<?> iterable) {
int count = 0;
//noinspection UnusedDeclaration
for (Object next : iterable) {
count++;
}
return count;
}
/**
* Private constructor, not used.
*/
Iterables() {
}
}

View file

@ -0,0 +1,45 @@
package org.xbib.datastructures.trie.concurrent.util;
/**
* Encapsulates a key and a value.
*
* @param <O> The type of the value
*/
public interface KeyValuePair<O> {
/**
* Returns the key with which the value is associated
* @return The key with which the value is associated
*/
CharSequence getKey();
/**
* Returns the value associated with the key
* @return The value associated with the key
*/
O getValue();
/**
* Compares this {@link KeyValuePair} object with another for equality.
* <p/>
* This is implemented based on equality of the keys.
*
* @param o The other object to compare
* @return True if the other object is also a {@link KeyValuePair} and is equal to this one as specified above
*/
@Override
boolean equals(Object o);
/**
* Returns a hash code for this object.
*/
@Override
int hashCode();
/**
* Returns a string representation as {@code (key, value)}.
* @return A string representation as {@code (key, value)}
*/
@Override
String toString();
}

View file

@ -0,0 +1,76 @@
package org.xbib.datastructures.trie.concurrent.util;
import java.util.Iterator;
import java.util.NoSuchElementException;
/**
* An unmodifiable iterator which computes the next element to return only when it is requested.
* <p/>
* This class is inspired by com.google.common.collect.AbstractIterator in Google Guava,
* which was written by the Google Guava Authors, in particular by Kevin Bourrillion.
*
* @author Niall Gallagher
*/
public abstract class LazyIterator<T> implements Iterator<T> {
T next = null;
enum State { READY, NOT_READY, DONE, FAILED }
State state = State.NOT_READY;
@Override
public void remove() {
throw new UnsupportedOperationException("Iterator.remove() is not supported");
}
@Override
public final boolean hasNext() {
if (state == State.FAILED) {
throw new IllegalStateException("This iterator is in an inconsistent state, and can no longer be used, " +
"due to an exception previously thrown by the computeNext() method");
}
switch (state) {
case DONE:
return false;
case READY:
return true;
}
return tryToComputeNext();
}
boolean tryToComputeNext() {
state = State.FAILED; // temporary pessimism
next = computeNext();
if (state != State.DONE) {
state = State.READY;
return true;
}
return false;
}
@Override
public final T next() {
if (!hasNext()) {
throw new NoSuchElementException();
}
state = State.NOT_READY;
return next;
}
/**
*
* @return a dummy value which if returned by the <code>computeNext()</code> method, signals that there are no more
* elements to return
*/
protected final T endOfData() {
state = State.DONE;
return null;
}
/**
* @return The next element which the iterator should return, or the result of calling <code>endOfData()</code>
* if there are no more elements to return
*/
protected abstract T computeNext();
}

View file

@ -0,0 +1,136 @@
package org.xbib.datastructures.trie.concurrent.util;
import java.util.List;
/**
* Specifies the methods that nodes must implement.
* <p/>
* The main function of a node is to represent an "edge" in the tree. An edge is a connection from a parent node to a
* child node which represents a sequence of characters. For practical reasons we store these characters in the child
* node, to avoid needing separate Edge objects. All nodes except the root encode at least one character for an edge.
* <p/>
* Nodes contain several fields, but not all nodes will actually need to store values in every field. Therefore
* some specialized implementations of this interface are possible, optimized for storing various combinations of
* data items in reduced numbers of fields, to reduce memory overhead.
* <p/>
* Nodes are <u><i>partially immutable</i></u>:
* <ul>
* <li>
* The <i>characters of an "edge"</i> encoded in within a node are <u>immutable</u> (these characters belong to
* the edge arriving at the current node from a parent node)
* </li>
* <li>
* The <i>number of outgoing edges</i> from a node (references to child nodes), and the <i>first characters of
* those edges</i> are <u>immutable</u>
* </li>
* <li>
* The <i>references to child nodes for existing edges</i> (as identified by their first characters) are
* <u>mutable with constraints</u>; the reference to a child node for an existing edge may be updated to point
* to a different child node as long as the new edge starts with the same first character
* </li>
* <li>
* If a node stores a value, the reference to the value is <u>immutable</u> (values can be changed but it
* requires recreating the node with the new value - this is to account for specialized node implementations
* omitting a field for the value when not required)
* </li>
* </ul>
* These constraints exist allow concurrent traversal and modifications to the tree. Nodes are required to implement
* some operations <u>atomically</u>, see documentation on each method in this interface for details.
* <p/>
* Hints for specialized implementations of this Node interface:
* <ul>
* <li>
* Leaf nodes do not need to store references to child nodes; a specialized node implementation
* could eliminate a field and associated data structure for child node references
* </li>
* <li>
* All leaf nodes store values
* </li>
* <li>
* Some non-leaf nodes store values, some do not
* </li>
* <li>
* Edge character data can be encoded using implementation-specific methods.
* <p/>
* Nodes are not required to store a {@link CharSequence} object verbatim, or use a particular implementation of
* {@link CharSequence}, the only requirement is that they provide a {@link CharSequence} <i>view</i> onto
* the character data.
* <p/>
* Character data can optionally be stored outside of the tree. {@link CharSequence}s can encode a start and
* end offset (or length) as a view onto a larger string (possibly a view onto the original key inserted).
* Furthermore end offset could be stored as length, relative to the start offset with variable length encoding
* to avoid storing 4 bytes for the length. This option would have consequences for
* garbage collection of large string keys however, therefore would mostly suit immutable data sets.
* <p/>
* Character data can be compressed. {@link CharSequence}s are free to store character data within the tree but
* in a size-reduced encoding such as UTF-8
* </li>
* </ul>
*
* @author Niall Gallagher
*/
public interface Node extends NodeCharacterProvider {
/**
* Returns the first character of the "edge" encoded in this node, belonging to the connection from a parent node to
* this node.
* <p/>
*
* @return The first character of the "edge" encoded in this node
*/
Character getIncomingEdgeFirstCharacter();
/**
* Returns all characters of the "edge" encoded in this node, belonging to the connection from a parent node to this
* node.
*
* @return All characters of the "edge" encoded in this node
*/
CharSequence getIncomingEdge();
/**
* Returns a value object which has been associated with a key and which is stored in this node, or returns
* <code>null</code> if no value is stored in this node.
*
* @return A value object which has been associated with a key and which is stored in this node, or returns
* <code>null</code> if no value is stored in this node
*/
Object getValue();
/**
* Returns the child of this node whose edge starts with the given first character.
* <p/>
* This <i>read</i> must be performed <b><u>atomically</u></b>, in relation to writes made via
* {@link #updateOutgoingEdge(Node)}.
*
* @param edgeFirstCharacter The first character of the edge for which the associated child node is required
* @return The child of this node whose edge starts with the given first character, or <code>null</code> if this
* node has no such outgoing edge
*/
Node getOutgoingEdge(Character edgeFirstCharacter);
/**
* Updates the child node reference for a given edge (identified by its first character) to point to a different
* child node.
* <p/>
* The first character of the given child node's edge must match the first character of an existing outgoing
* edge from this node.
* <p/>
* This <i>write</i> must be performed <b><u>atomically</u></b>, in relation to reads made via
* {@link #getOutgoingEdge(Character)}.
*
* @param childNode The new child node to associated with this edge
*/
void updateOutgoingEdge(Node childNode);
/**
* Returns a read-only list of the child nodes to which this node has outgoing edges, i.e. child nodes which have
* incoming edges from this node.
* <p/>
* It is intended that this method will be used for copying/cloning nodes.
*
* @return A read-only list of the child nodes to which this node has outgoing edges
*/
List<Node> getOutgoingEdges();
}

View file

@ -0,0 +1,15 @@
package org.xbib.datastructures.trie.concurrent.util;
import java.util.Comparator;
/**
* Specifies binary search compatibility, and sorting compatibility, of nodes based on
* {@link Node#getIncomingEdgeFirstCharacter()}.
*/
public class NodeCharacterComparator implements Comparator<NodeCharacterProvider> {
@Override
public int compare(NodeCharacterProvider o1, NodeCharacterProvider o2) {
return o1.getIncomingEdgeFirstCharacter().compareTo(o2.getIncomingEdgeFirstCharacter());
}
}

View file

@ -0,0 +1,20 @@
package org.xbib.datastructures.trie.concurrent.util;
/**
* A lightweight object which simply wraps a {@link Character} and implements {@link NodeCharacterProvider}, which
* can be used as a key to locate a node having the same edge first character in a list of nodes using binary search.
*
*/
public class NodeCharacterKey implements NodeCharacterProvider {
private final Character character;
public NodeCharacterKey(Character character) {
this.character = character;
}
@Override
public Character getIncomingEdgeFirstCharacter() {
return character;
}
}

View file

@ -0,0 +1,14 @@
package org.xbib.datastructures.trie.concurrent.util;
/**
* A super-interface of both {@link Node} and {@link NodeCharacterKey}
* which, by sharing this common interface, enables binary search of nodes via
* {@link java.util.Collections#binarySearch(java.util.List, Object, java.util.Comparator)}.
*
* @see NodeCharacterComparator
* @see NodeCharacterKey
*/
public interface NodeCharacterProvider {
Character getIncomingEdgeFirstCharacter();
}

View file

@ -0,0 +1,34 @@
package org.xbib.datastructures.trie.concurrent.util;
import java.util.List;
/**
* An interface for a factory which creates new {@link Node} objects on demand, to encapsulate specified variables.
* Factory objects can choose to return implementations of the {@link Node} interface which are memory-optimized for
* storing only the given variables, potentially further optimized based on variable values.
*/
public interface NodeFactory {
/**
* Returns a new {@link Node} object which encapsulates the arguments supplied, optionally returning implementations
* of the {@link Node} interface which are memory-optimized for storing only the supplied combination of variables,
* potentially further optimized based on variable values.
*
* @param edgeCharacters Provides edge characters to be stored in the node. This is never null. In the case of
* (re-)constructing the root node, this will contain zero characters, otherwise will always contain one or more
* characters
*
* @param value An arbitrary object to associate with the node. This can be null, but it will not be null if
* dealing with a leaf node (when childNodes will be empty)
*
* @param childNodes A list of child nodes to store in the node. This will never be null, but may be empty when
* building a leaf node
*
* @param isRoot Indicates if this will be the root node, in which case edge characters will be non-null but empty,
* value will be null, and child nodes will be non-null but may be empty
*
* @return An object implementing the {@link Node} interface which stores the given variables
*/
Node createNode(CharSequence edgeCharacters, Object value, List<Node> childNodes, boolean isRoot);
}

View file

@ -0,0 +1,80 @@
package org.xbib.datastructures.trie.concurrent.util;
import java.util.*;
import java.util.concurrent.atomic.AtomicReferenceArray;
/**
* Static utility methods useful when implementing {@link Node}s.
*/
public class NodeUtil {
/**
* Private constructor, not used.
*/
NodeUtil() {
}
/**
* Returns the index of the node in the given {@link AtomicReferenceArray} whose edge starts with the given
* first character.
* <p/>
* This method expects that some constraints are enforced on the {@link AtomicReferenceArray}:
* <ul>
* <li>
* The array must already be in ascending sorted order of the first character of the edge for each node
* </li>
* <li>
* No entries in the array can be null
* </li>
* <li>
* Any existing node in the array cannot be swapped concurrently for another unless the edge associated
* with the other node also starts with the same first character
* </li>
* </ul>
* If these constraints are enforced as expected, then this method will have deterministic behaviour even in the
* face of concurrent modification.
*
* @param childNodes An {@link AtomicReferenceArray} of {@link Node} objects, which is used in accordance with
* the constraints documented in this method
*
* @param edgeFirstCharacter The first character of the edge for which the associated node is required
* @return The index of the node representing the indicated edge, or a value < 0 if no such node exists in the
* array
*/
public static int binarySearchForEdge(AtomicReferenceArray<Node> childNodes, Character edgeFirstCharacter) {
// inspired by Collections#indexedBinarySearch()
int low = 0;
int high = childNodes.length() - 1;
while (low <= high) {
int mid = (low + high) >>> 1;
Node midVal = childNodes.get(mid);
int cmp = midVal.getIncomingEdgeFirstCharacter().compareTo(edgeFirstCharacter);
if (cmp < 0)
low = mid + 1;
else if (cmp > 0)
high = mid - 1;
else
return mid; // key found
}
return -(low + 1); // key not found
}
/**
* Throws an exception if any nodes in the given list represent edges having the same first character.
*
* @param nodes The list of nodes to validate
* @throws IllegalStateException If a duplicate edge is detected
*/
public static void ensureNoDuplicateEdges(List<Node> nodes) {
// Sanity check that no two nodes specify an edge with the same first character...
Set<Character> uniqueChars = new HashSet<Character>(nodes.size());
for (Node node : nodes) {
uniqueChars.add(node.getIncomingEdgeFirstCharacter());
}
if (nodes.size() != uniqueChars.size()) {
throw new IllegalStateException("Duplicate edge detected in list of nodes supplied: " + nodes);
}
}
}

View file

@ -0,0 +1,27 @@
package org.xbib.datastructures.trie.concurrent.util;
/**
* A dummy object which if supplied as a value for an entry in a tree.
*/
public class VoidValue {
@Override
public int hashCode() {
return 1;
}
@Override
public boolean equals(Object obj) {
return obj instanceof VoidValue;
}
@Override
public String toString() {
return "-";
}
VoidValue() {
}
public static final VoidValue SINGLETON = new VoidValue();
}

View file

@ -44,7 +44,7 @@ public class RegexTrie<V> {
private V value; private V value;
private final Map<CompPattern, RegexTrie<V>> children; private final Map<ComparablePattern, RegexTrie<V>> children;
public RegexTrie() { public RegexTrie() {
children = new LinkedHashMap<>(); children = new LinkedHashMap<>();
@ -74,15 +74,17 @@ public class RegexTrie<V> {
* retrieve the associated {@code value} * retrieve the associated {@code value}
*/ */
public void put(V value, List<?> patterns) { public void put(V value, List<?> patterns) {
List<CompPattern> list = new ArrayList<>(patterns.size()); List<ComparablePattern> list = new ArrayList<>(patterns.size());
for (Object object : patterns) { for (Object object : patterns) {
CompPattern compPattern = null; ComparablePattern comparablePattern = null;
if (object instanceof Pattern) { if (object instanceof Pattern) {
compPattern = new CompPattern((Pattern) object); comparablePattern = new ComparablePattern((Pattern) object);
} else if (object instanceof String) { } else if (object instanceof String) {
compPattern = new CompPattern(Pattern.compile((String) object)); if (!((String) object).isEmpty()) {
comparablePattern = new ComparablePattern(Pattern.compile((String) object));
} }
list.add(compPattern); }
list.add(comparablePattern);
} }
validateAndPut(value, list); validateAndPut(value, list);
} }
@ -129,17 +131,17 @@ public class RegexTrie<V> {
* A helper method to consolidate validation before adding an entry to the trie. * A helper method to consolidate validation before adding an entry to the trie.
* *
* @param value The value to set * @param value The value to set
* @param list The sequence of {@link CompPattern}s that must be sequentially matched to * @param list The sequence of {@link ComparablePattern}s that must be sequentially matched to
* retrieve the associated {@code value} * retrieve the associated {@code value}
*/ */
private V validateAndPut(V value, List<CompPattern> list) { private V validateAndPut(V value, List<ComparablePattern> list) {
if (list.size() == 0) { if (list.size() == 0) {
throw new IllegalArgumentException("pattern list must be non-empty"); throw new IllegalArgumentException("pattern list must be non-empty");
} }
return recursivePut(value, list); return recursivePut(value, list);
} }
private V recursivePut(V value, List<CompPattern> patterns) { private V recursivePut(V value, List<ComparablePattern> patterns) {
// Cases: // Cases:
// 1) patterns is empty -- set our value // 1) patterns is empty -- set our value
// 2) patterns is non-empty -- recurse downward, creating a child if necessary // 2) patterns is non-empty -- recurse downward, creating a child if necessary
@ -148,8 +150,8 @@ public class RegexTrie<V> {
this.value = value; this.value = value;
return oldValue; return oldValue;
} else { } else {
CompPattern curKey = patterns.get(0); ComparablePattern curKey = patterns.get(0);
List<CompPattern> nextKeys = patterns.subList(1, patterns.size()); List<ComparablePattern> nextKeys = patterns.subList(1, patterns.size());
// Create a new child to handle // Create a new child to handle
RegexTrie<V> nextChild = children.get(curKey); RegexTrie<V> nextChild = children.get(curKey);
if (nextChild == null) { if (nextChild == null) {
@ -171,8 +173,8 @@ public class RegexTrie<V> {
V wildcardValue = null; V wildcardValue = null;
String curKey = strings.get(0); String curKey = strings.get(0);
List<String> nextKeys = strings.subList(1, strings.size()); List<String> nextKeys = strings.subList(1, strings.size());
for (Map.Entry<CompPattern, RegexTrie<V>> child : children.entrySet()) { for (Map.Entry<ComparablePattern, RegexTrie<V>> child : children.entrySet()) {
CompPattern pattern = child.getKey(); ComparablePattern pattern = child.getKey();
if (pattern == null) { if (pattern == null) {
wildcardMatch = true; wildcardMatch = true;
wildcardValue = child.getValue().value; wildcardValue = child.getValue().value;
@ -213,13 +215,13 @@ public class RegexTrie<V> {
/** /**
* Patterns aren't comparable by default, which prevents you from retrieving them from a Map. * Patterns aren't comparable by default, which prevents you from retrieving them from a Map.
* This is a simple stub class that makes a Pattern with a working * This is a simple stub class that makes a Pattern with a working
* {@link CompPattern#equals(Object)} method. * {@link ComparablePattern#equals(Object)} method.
*/ */
private static class CompPattern { private static class ComparablePattern {
protected final Pattern pattern; protected final Pattern pattern;
CompPattern(Pattern pattern) { ComparablePattern(Pattern pattern) {
Objects.requireNonNull(pattern); Objects.requireNonNull(pattern);
this.pattern = pattern; this.pattern = pattern;
} }
@ -229,8 +231,8 @@ public class RegexTrie<V> {
Pattern otherPat; Pattern otherPat;
if (other instanceof Pattern) { if (other instanceof Pattern) {
otherPat = (Pattern) other; otherPat = (Pattern) other;
} else if (other instanceof CompPattern) { } else if (other instanceof RegexTrie.ComparablePattern) {
CompPattern otherCPat = (CompPattern) other; ComparablePattern otherCPat = (ComparablePattern) other;
otherPat = otherCPat.pattern; otherPat = otherCPat.pattern;
} else { } else {
return false; return false;

View file

@ -5,15 +5,16 @@ import java.util.Set;
public interface Trie<T,K extends TrieKey<T>, V> { public interface Trie<T,K extends TrieKey<T>, V> {
void add(K key, V value); void put(K key, V value);
V search(K key); V get(K key);
boolean containsKey(K key);
Set<K> getKeys();
int size();
List<V> startsWith(List<TrieKeySegment<T>> prefix); List<V> startsWith(List<TrieKeySegment<T>> prefix);
boolean contains(K key);
Set<K> getAllKeys();
int size();
} }

View file

@ -15,12 +15,12 @@ public class TrieImpl<T,V> implements Trie<T, TrieKey<T>, V> {
} }
@Override @Override
public void add(TrieKey<T> key, V value) { public void put(TrieKey<T> key, V value) {
addNode(node, key, 0, value); addNode(node, key, 0, value);
} }
@Override @Override
public V search(TrieKey<T> key) { public V get(TrieKey<T> key) {
return findKey(node, key); return findKey(node, key);
} }
@ -41,12 +41,12 @@ public class TrieImpl<T,V> implements Trie<T, TrieKey<T>, V> {
} }
@Override @Override
public boolean contains(TrieKey<T> key) { public boolean containsKey(TrieKey<T> key) {
return hasKey(node, key); return hasKey(node, key);
} }
@Override @Override
public Set<TrieKey<T>> getAllKeys() { public Set<TrieKey<T>> getKeys() {
Set<TrieKey<T>> keySet = new HashSet<>(); Set<TrieKey<T>> keySet = new HashSet<>();
getKeys(node, new TrieKeyImpl<>(), keySet); getKeys(node, new TrieKeyImpl<>(), keySet);
return keySet; return keySet;
@ -54,7 +54,7 @@ public class TrieImpl<T,V> implements Trie<T, TrieKey<T>, V> {
@Override @Override
public int size() { public int size() {
return getAllKeys().size(); return getKeys().size();
} }
private void getValues(Node<T,V> currNode, List<V> valueList) { private void getValues(Node<T,V> currNode, List<V> valueList) {
@ -79,7 +79,7 @@ public class TrieImpl<T,V> implements Trie<T, TrieKey<T>, V> {
} }
private V findKey(Node<T,V> currNode, TrieKey<T> key) { private V findKey(Node<T,V> currNode, TrieKey<T> key) {
TrieKeySegment<T> e = key.size() > 0 ? key.get(0) : null; TrieKeySegment<T> e = key.size() > 0 ? key.getSegment(0) : null;
if (currNode.getChildren().containsKey(e)) { if (currNode.getChildren().containsKey(e)) {
Node<T,V> nextNode = currNode.getChildren().get(e); Node<T,V> nextNode = currNode.getChildren().get(e);
if (key.size() <= 1) { if (key.size() <= 1) {
@ -94,7 +94,7 @@ public class TrieImpl<T,V> implements Trie<T, TrieKey<T>, V> {
} }
private boolean hasKey(Node<T,V> currNode, TrieKey<T> key) { private boolean hasKey(Node<T,V> currNode, TrieKey<T> key) {
TrieKeySegment<T> e = key.size() > 0 ? key.get(0) : null; TrieKeySegment<T> e = key.size() > 0 ? key.getSegment(0) : null;
if (currNode.getChildren().containsKey(e)) { if (currNode.getChildren().containsKey(e)) {
Node<T,V> nextNode = currNode.getChildren().get(e); Node<T,V> nextNode = currNode.getChildren().get(e);
if (key.size() <= 1) { if (key.size() <= 1) {
@ -107,7 +107,7 @@ public class TrieImpl<T,V> implements Trie<T, TrieKey<T>, V> {
} }
private void addNode(Node<T,V> currNode, TrieKey<T> key, int pos, V value) { private void addNode(Node<T,V> currNode, TrieKey<T> key, int pos, V value) {
TrieKeySegment<T> e = pos < key.size() ? key.get(pos) : null; TrieKeySegment<T> e = pos < key.size() ? key.getSegment(pos) : null;
Node<T,V> nextNode = currNode.getChildren().get(e); Node<T,V> nextNode = currNode.getChildren().get(e);
if (nextNode == null) { if (nextNode == null) {
nextNode = new NodeImpl<>(); nextNode = new NodeImpl<>();

View file

@ -1,6 +1,5 @@
package org.xbib.datastructures.trie.segment; package org.xbib.datastructures.trie.segment;
import java.util.Arrays;
import java.util.List; import java.util.List;
public interface TrieKey<T> { public interface TrieKey<T> {
@ -13,7 +12,7 @@ public interface TrieKey<T> {
void set(int i, TrieKeySegment<T> trieKeySegment); void set(int i, TrieKeySegment<T> trieKeySegment);
TrieKeySegment<T> get(int i); TrieKeySegment<T> getSegment(int i);
List<TrieKeySegment<T>> getSegments(); List<TrieKeySegment<T>> getSegments();
} }

View file

@ -48,7 +48,7 @@ public class TrieKeyImpl<T> implements TrieKey<T>, Comparable<TrieKeyImpl<T>> {
} }
@Override @Override
public TrieKeySegment<T> get(int i) { public TrieKeySegment<T> getSegment(int i) {
return segments.get(i); return segments.get(i);
} }

View file

@ -0,0 +1,962 @@
package org.xbib.datastructures.trie.concurrent;
import org.junit.jupiter.api.Test;
import org.xbib.datastructures.trie.concurrent.util.DefaultCharArrayNodeFactory;
import org.xbib.datastructures.trie.concurrent.util.DefaultCharSequenceNodeFactory;
import org.xbib.datastructures.trie.concurrent.util.Iterables;
import org.xbib.datastructures.trie.concurrent.util.KeyValuePair;
import org.xbib.datastructures.trie.concurrent.util.Node;
import org.xbib.datastructures.trie.concurrent.util.NodeFactory;
import org.xbib.datastructures.trie.concurrent.util.VoidValue;
import java.util.Arrays;
import java.util.Collections;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class ConcurrentRadixTreeTest {
private final NodeFactory nodeFactory = new DefaultCharArrayNodeFactory();
protected NodeFactory getNodeFactory() {
return nodeFactory;
}
@Test
public void testBuildTreeByHand() {
// Build the tree by hand, as if the following strings were added: B, BA, BAN, BANDANA, BANAN, BANANA
//
// B (1)
// A (2)
// N (3)
// AN (5)
// A (6)
// DANA (4)
final Node root, n1, n2, n3, n4, n5, n6;
n6 = getNodeFactory().createNode("A", 6, Collections.<Node>emptyList(), false);
n5 = getNodeFactory().createNode("AN", 5, Arrays.asList(n6), false);
n4 = getNodeFactory().createNode("DANA", 4, Collections.<Node>emptyList(), false);
n3 = getNodeFactory().createNode("N", 3, Arrays.asList(n4, n5), false); // note: it should sort alphabetically such that n5 is first
n2 = getNodeFactory().createNode("A", 2, Arrays.asList(n3), false);
n1 = getNodeFactory().createNode("B", 1, Arrays.asList(n2), false);
//noinspection NullableProblems
root = getNodeFactory().createNode("", null, Arrays.asList(n1), true);
String expected =
"\n" +
"└── ○ B (1)\n" +
" └── ○ A (2)\n" +
" └── ○ N (3)\n" +
" ├── ○ AN (5)\n" +
" │ └── ○ A (6)\n" +
" └── ○ DANA (4)\n";
String actual = PrettyPrinter.prettyPrint(root);
assertEquals(expected, actual);
}
@Test
public void testPut_AddToRoot() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
tree.put("A", 1);
String expected =
"\n" +
"└── ○ A (1)\n";
String actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
}
@Test
public void testPut_ChildNodeSorting() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
tree.put("B", 1);
tree.put("A", 2);
String expected =
"\n" +
"├── ○ A (2)\n" +
"└── ○ B (1)\n";
String actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
}
@Test
public void testPut_AppendChild() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
tree.put("FOO", 1);
tree.put("FOOBAR", 2);
String expected =
"\n" +
"└── ○ FOO (1)\n" +
" └── ○ BAR (2)\n";
String actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
}
@Test
public void testPut_SplitEdge() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
tree.put("FOOBAR", 1);
tree.put("FOO", 2);
String expected =
"\n" +
"└── ○ FOO (2)\n" +
" └── ○ BAR (1)\n";
String actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
}
@Test
public void testPut_SplitWithImplicitNode() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
tree.put("FOOBAR", 1);
tree.put("FOOD", 2);
String expected =
"\n" +
"└── ○ FOO\n" + // We never explicitly inserted FOO
" ├── ○ BAR (1)\n" +
" └── ○ D (2)\n";
String actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
}
@Test
public void testPut_SplitAndMove() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
tree.put("TEST", 1);
tree.put("TEAM", 2);
tree.put("TOAST", 3);
String expected =
"\n" +
"└── ○ T\n" + // implicit node added automatically
" ├── ○ E\n" + // implicit node added automatically
" │ ├── ○ AM (2)\n" +
" │ └── ○ ST (1)\n" +
" └── ○ OAST (3)\n";
String actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
}
@Test
public void testPut_OverwriteValue() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
Integer existing;
existing = tree.put("FOO", 1);
assertNull(existing);
existing = tree.put("FOO", 2);
assertNotNull(existing);
assertEquals(Integer.valueOf(1), existing);
assertEquals(Integer.valueOf(2), tree.getValueForExactKey("FOO"));
}
@Test
public void testPutIfAbsent_DoNotOverwriteValue() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
Integer existing = tree.putIfAbsent("FOO", 1);
assertNull(existing);
existing = tree.putIfAbsent("FOO", 2);
assertNotNull(existing);
assertEquals(Integer.valueOf(1), existing);
assertEquals(Integer.valueOf(1), tree.getValueForExactKey("FOO"));
}
@Test
public void testPutIfAbsent_SplitNode() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
//
// FOO // implicit node added automatically
// BAR (1)
// D (1)
Integer existing;
existing = tree.putIfAbsent("FOOBAR", 1);
assertNull(existing);
existing = tree.putIfAbsent("FOOD", 1);
assertNull(existing);
// This tests 'overwrite' set to true and exact match for node,
// but no existing value to return (i.e. implicit node above)...
//
// FOO (2)
// BAR (1)
// D (1)
existing = tree.putIfAbsent("FOO", 2);
assertNull(existing);
}
@Test
public void testPut_VoidValue_CharArrayNodeFactory() {
ConcurrentRadixTree<VoidValue> tree = new ConcurrentRadixTree<VoidValue>(getNodeFactory());
tree.put("FOO", VoidValue.SINGLETON);
tree.put("FOOBAR", VoidValue.SINGLETON);
String expected =
"\n" +
"└── ○ FOO (-)\n" +
" └── ○ BAR (-)\n";
String actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
}
@Test
public void testPut_VoidValue_CharSequenceNodeFactory() {
ConcurrentRadixTree<VoidValue> tree = new ConcurrentRadixTree<VoidValue>(new DefaultCharSequenceNodeFactory());
tree.put("FOO", VoidValue.SINGLETON);
tree.put("FOOBAR", VoidValue.SINGLETON);
String expected =
"\n" +
"└── ○ FOO (-)\n" +
" └── ○ BAR (-)\n";
String actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
}
@Test
public void testPutInternal_ArgumentValidation1() {
assertThrows(IllegalArgumentException.class, () -> {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
//noinspection NullableProblems
tree.put(null, 1);
});
}
@Test
public void testPutInternal_ArgumentValidation2() {
assertThrows(IllegalArgumentException.class, () -> {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
//noinspection NullableProblems
tree.put("FOO", null);
});
}
@Test
public void testPutInternal_ArgumentValidation3() {
assertThrows(IllegalArgumentException.class, () -> {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
//noinspection NullableProblems
tree.put("", 1);
});
}
@Test
public void testPutInternal_InvalidClassification() {
assertThrows(IllegalStateException.class, () -> {
// Create a dummy subclass of SearchResult which returns an INVALID classification...
class InvalidSearchResult extends ConcurrentRadixTree.SearchResult {
InvalidSearchResult(CharSequence key, Node nodeFound, int charsMatched, int charsMatchedInNodeFound, Node parentNode, Node parentNodesParent) {
super(key, nodeFound, charsMatched, charsMatchedInNodeFound, parentNode, parentNodesParent);
}
@Override
protected Classification classify(CharSequence key, Node nodeFound, int charsMatched, int charsMatchedInNodeFound) {
return Classification.INVALID;
}
}
// Override searchTree() to return the InvalidSearchResult...
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory()) {
@Override
SearchResult searchTree(CharSequence key) {
return new InvalidSearchResult("FOO", root, 4, 4, null, null);
}
};
// We expect put() to throw an IllegalStateException
// when it encounters the unsupported INVALID classification...
tree.put("FOO", 1);
});
}
@Test
public void testSize() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
assertEquals(0, tree.size());
tree.put("TEST", 1);
assertEquals(1, tree.size());
tree.put("TEAM", 2);
assertEquals(2, tree.size());
tree.put("TOAST", 3);
assertEquals(3, tree.size());
tree.remove("FOO");
assertEquals(3, tree.size()); // no change
tree.remove("TOAST");
assertEquals(2, tree.size());
tree.remove("TEAM");
assertEquals(1, tree.size());
tree.remove("TEST");
assertEquals(0, tree.size());
}
@Test
public void testGet() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
tree.put("TEST", 1);
tree.put("TEAM", 2);
tree.put("TOAST", 3);
//
// T // implicit node added automatically
// E // implicit node added automatically
// AM (2)
// ST (1)
// OAST (3)
assertEquals(Integer.valueOf(1), tree.getValueForExactKey("TEST"));
assertEquals(Integer.valueOf(2), tree.getValueForExactKey("TEAM"));
assertEquals(Integer.valueOf(3), tree.getValueForExactKey("TOAST"));
assertNull(tree.getValueForExactKey("T"));
assertNull(tree.getValueForExactKey("TE"));
assertNull(tree.getValueForExactKey("E")); // sanity check, no such edge from root
assertNull(tree.getValueForExactKey("")); // sanity check, root never has a value
}
@Test
public void testRemove_MoreThanOneChildEdge() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
tree.put("FOO", 1);
tree.put("FOOBAR", 2);
tree.put("FOOD", 3);
//
// FOO (1)
// BAR (2)
// D (3)
String expected, actual;
expected =
"\n" +
"└── ○ FOO (1)\n" +
" ├── ○ BAR (2)\n" +
" └── ○ D (3)\n";
actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
boolean removed = tree.remove("FOO");
assertTrue(removed);
//
// FOO // value removed from FOO, but node needs to stay (as implicit node)
// BAR (2)
// D (3)
expected =
"\n" +
"└── ○ FOO\n" +
" ├── ○ BAR (2)\n" +
" └── ○ D (3)\n";
actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
}
@Test
public void testRemove_ExactlyOneChildEdge() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
tree.put("FOO", 1);
tree.put("FOOBAR", 2);
tree.put("FOOBARBAZ", 3);
//
// FOO (1)
// BAR (2)
// BAZ (3)
String expected, actual;
expected =
"\n" +
"└── ○ FOO (1)\n" +
" └── ○ BAR (2)\n" +
" └── ○ BAZ (3)\n";
actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
boolean removed = tree.remove("FOO");
assertTrue(removed);
//
// FOOBAR (2) // Edges FOO and BAR merged,
// BAZ (3) // and the value and child edges from BAR also copied into merged node
expected =
"\n" +
"└── ○ FOOBAR (2)\n" +
" └── ○ BAZ (3)\n";
actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
}
@Test
public void testRemove_ZeroChildEdges_DirectChildOfRoot() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
tree.put("FOO", 1);
tree.put("BAR", 2);
//
// BAR (2)
// FOO (1)
String expected, actual;
expected =
"\n" +
"├── ○ BAR (2)\n" +
"└── ○ FOO (1)\n";
actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
boolean removed = tree.remove("FOO");
assertTrue(removed);
// // FOO removed, which involved recreating the root to change its child edges
// BAR (2)
expected =
"\n" +
"└── ○ BAR (2)\n";
actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
}
@Test
public void testRemove_LastRemainingKey() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
tree.put("FOO", 1);
//
// FOO (1)
String expected, actual;
expected =
"\n" +
"└── ○ FOO (1)\n";
actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
boolean removed = tree.remove("FOO");
assertTrue(removed);
// // FOO removed, which involved recreating the root with no remaining edges
expected =
"\n";
actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
}
@Test
public void testRemove_ZeroChildEdges_OneStepFromRoot() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
tree.put("FOO", 1);
tree.put("FOOBAR", 2);
//
// FOO (1)
// BAR (2)
String expected, actual;
expected =
"\n" +
"└── ○ FOO (1)\n" +
" └── ○ BAR (2)\n";
actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
boolean removed = tree.remove("FOOBAR");
assertTrue(removed);
//
// FOO (1) // BAR removed, which involved recreating FOO and re-adding it to root node
expected =
"\n" +
"└── ○ FOO (1)\n";
actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
}
@Test
public void testRemove_ZeroChildEdges_SeveralStepsFromRoot() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
tree.put("FOO", 1);
tree.put("FOOBAR", 2);
tree.put("FOOBARBAZ", 3);
//
// FOO (1)
// BAR (2)
// BAZ (3)
String expected, actual;
expected =
"\n" +
"└── ○ FOO (1)\n" +
" └── ○ BAR (2)\n" +
" └── ○ BAZ (3)\n";
actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
boolean removed = tree.remove("FOOBARBAZ");
assertTrue(removed);
//
// FOO (1)
// BAR (2) // BAZ removed, which involved recreating BAR and re-adding it to its parent FOO
expected =
"\n" +
"└── ○ FOO (1)\n" +
" └── ○ BAR (2)\n";
actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
}
@Test
public void testRemove_DoNotRemoveSplitNode() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
tree.put("FOOBAR", 1);
tree.put("FOOD", 2);
//
// FOO // implicit node added automatically
// BAR (1)
// D (2)
String expected, actual;
expected =
"\n" +
"└── ○ FOO\n" +
" ├── ○ BAR (1)\n" +
" └── ○ D (2)\n";
actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
boolean removed = tree.remove("FOO");
assertFalse(removed);
expected =
"\n" +
"└── ○ FOO\n" + // we expect no change
" ├── ○ BAR (1)\n" +
" └── ○ D (2)\n";
actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
}
@Test
public void testRemove_MergeSplitNode() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
tree.put("TEST", 1);
tree.put("TEAM", 2);
tree.put("TOAST", 3);
String expected, actual;
expected =
"\n" +
"└── ○ T\n" +
" ├── ○ E\n" +
" │ ├── ○ AM (2)\n" +
" │ └── ○ ST (1)\n" +
" └── ○ OAST (3)\n";
actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
boolean removed = tree.remove("TEST");
assertTrue(removed);
expected =
"\n" +
"└── ○ T\n" +
" ├── ○ EAM (2)\n" +
" └── ○ OAST (3)\n";
actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
}
@Test
public void testRemove_DoNotMergeSplitNodeWithValue() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
tree.put("TEST", 1);
tree.put("TEAM", 2);
tree.put("TOAST", 3);
tree.put("TE", 4);
String expected, actual;
expected =
"\n" +
"└── ○ T\n" +
" ├── ○ E (4)\n" +
" │ ├── ○ AM (2)\n" +
" │ └── ○ ST (1)\n" +
" └── ○ OAST (3)\n";
actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
boolean removed = tree.remove("TEST");
assertTrue(removed);
expected =
"\n" +
"└── ○ T\n" +
" ├── ○ E (4)\n" +
" │ └── ○ AM (2)\n" +
" └── ○ OAST (3)\n";
actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
}
@Test
public void testRemove_NoSuchKey() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
tree.put("FOO", 1);
tree.put("BAR", 2);
String expected, actual;
expected =
"\n" +
"├── ○ BAR (2)\n" +
"└── ○ FOO (1)\n";
actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
boolean removed = tree.remove("BAZ");
assertFalse(removed);
expected =
"\n" + // we expect no change
"├── ○ BAR (2)\n" +
"└── ○ FOO (1)\n";
actual = PrettyPrinter.prettyPrint(tree);
assertEquals(expected, actual);
}
@Test
public void testGetKeysForPrefix() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
tree.put("TEST", 1);
tree.put("TEAM", 2);
tree.put("TOAST", 3);
tree.put("TEA", 4);
tree.put("COFFEE", 5);
//
// COFFEE (5)
// T
// E
// A (4)
// M (2)
// ST (1)
// OAST (3)
assertEquals("[COFFEE, TEA, TEAM, TEST, TOAST]", Iterables.toString(tree.getKeysStartingWith("")));
assertEquals("[COFFEE]", Iterables.toString(tree.getKeysStartingWith("C")));
assertEquals("[COFFEE]", Iterables.toString(tree.getKeysStartingWith("COFFEE")));
assertEquals("[]", Iterables.toString(tree.getKeysStartingWith("COFFEES")));
assertEquals("[TEA, TEAM, TEST, TOAST]", Iterables.toString(tree.getKeysStartingWith("T")));
assertEquals("[TEA, TEAM, TEST]", Iterables.toString(tree.getKeysStartingWith("TE")));
assertEquals("[TEA, TEAM]", Iterables.toString(tree.getKeysStartingWith("TEA")));
assertEquals("[TOAST]", Iterables.toString(tree.getKeysStartingWith("TO")));
}
@Test
public void testGetClosestKeys() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
tree.put("COD", 1);
tree.put("CODFISH", 2);
tree.put("COFFEE", 3);
//
// CO
// D (1)
// FISH (2)
// FFEE (3)
assertEquals("[COD, CODFISH, COFFEE]", Iterables.toString(tree.getClosestKeys("COW")));
assertEquals("[COD, CODFISH, COFFEE]", Iterables.toString(tree.getClosestKeys("CX")));
assertEquals("[COD, CODFISH]", Iterables.toString(tree.getClosestKeys("COD")));
assertEquals("[COFFEE]", Iterables.toString(tree.getClosestKeys("COF")));
assertEquals("[]", Iterables.toString(tree.getClosestKeys("DO")));
assertEquals("[CODFISH]", Iterables.toString(tree.getClosestKeys("CODFISHES")));
}
@Test
public void testGetValuesForClosestKeys() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
tree.put("COD", 1);
tree.put("CODFISH", 2);
tree.put("COFFEE", 3);
//
// CO
// D (1)
// FISH (2)
// FFEE (3)
assertEquals("[1, 2, 3]", Iterables.toString(tree.getValuesForClosestKeys("COW")));
assertEquals("[1, 2, 3]", Iterables.toString(tree.getValuesForClosestKeys("CX")));
assertEquals("[1, 2]", Iterables.toString(tree.getValuesForClosestKeys("COD")));
assertEquals("[3]", Iterables.toString(tree.getValuesForClosestKeys("COF")));
assertEquals("[]", Iterables.toString(tree.getValuesForClosestKeys("DO")));
assertEquals("[2]", Iterables.toString(tree.getValuesForClosestKeys("CODFISHES")));
}
@Test
public void testGetKeyValuePairsForClosestKeys() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
tree.put("COD", 1);
tree.put("CODFISH", 2);
tree.put("COFFEE", 3);
//
// CO
// D (1)
// FISH (2)
// FFEE (3)
assertEquals("[(COD, 1), (CODFISH, 2), (COFFEE, 3)]", Iterables.toString(tree.getKeyValuePairsForClosestKeys("COW")));
assertEquals("[(COD, 1), (CODFISH, 2), (COFFEE, 3)]", Iterables.toString(tree.getKeyValuePairsForClosestKeys("CX")));
assertEquals("[(COD, 1), (CODFISH, 2)]", Iterables.toString(tree.getKeyValuePairsForClosestKeys("COD")));
assertEquals("[(COFFEE, 3)]", Iterables.toString(tree.getKeyValuePairsForClosestKeys("COF")));
assertEquals("[]", Iterables.toString(tree.getKeyValuePairsForClosestKeys("DO")));
assertEquals("[(CODFISH, 2)]", Iterables.toString(tree.getKeyValuePairsForClosestKeys("CODFISHES")));
}
@Test
public void testKeyValuePair_Accessor() {
KeyValuePair<Integer> pair = new ConcurrentRadixTree.KeyValuePairImpl<Integer>("FOO", 5);
assertEquals(pair.getKey(), "FOO");
assertEquals(pair.getValue(), Integer.valueOf(5));
assertEquals("(FOO, 5)", pair.toString());
}
@Test
public void testKeyValuePair_EqualsAndHashCode() {
KeyValuePair<Integer> pair1 = new ConcurrentRadixTree.KeyValuePairImpl<Integer>("FOO", 5);
KeyValuePair<Integer> pair2 = new ConcurrentRadixTree.KeyValuePairImpl<Integer>("FOO", 6);
KeyValuePair<Integer> pair3 = new ConcurrentRadixTree.KeyValuePairImpl<Integer>("BAR", 5);
assertTrue(pair1.equals(pair1));
assertTrue(pair1.equals(pair2));
assertFalse(pair1.equals(pair3));
//noinspection NullableProblems,ObjectEqualsNull
assertFalse(pair1.equals(null));
//noinspection EqualsBetweenInconvertibleTypes
assertFalse(pair1.equals("FOO"));
assertTrue(pair1.hashCode() == pair2.hashCode());
assertFalse(pair1.hashCode() == pair3.hashCode());
}
@Test
public void testGetValuesForPrefix() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
tree.put("TEST", 1);
tree.put("TEAM", 2);
tree.put("TOAST", 3);
tree.put("TEA", 4);
tree.put("COFFEE", 5);
//
// COFFEE (5)
// T
// E
// A (4)
// M (2)
// ST (1)
// OAST (3)
assertEquals("[5, 4, 2, 1, 3]", Iterables.toString(tree.getValuesForKeysStartingWith("")));
assertEquals("[5]", Iterables.toString(tree.getValuesForKeysStartingWith("C")));
assertEquals("[5]", Iterables.toString(tree.getValuesForKeysStartingWith("COFFEE")));
assertEquals("[]", Iterables.toString(tree.getValuesForKeysStartingWith("COFFEES")));
assertEquals("[4, 2, 1, 3]", Iterables.toString(tree.getValuesForKeysStartingWith("T")));
assertEquals("[4, 2, 1]", Iterables.toString(tree.getValuesForKeysStartingWith("TE")));
assertEquals("[4, 2]", Iterables.toString(tree.getValuesForKeysStartingWith("TEA")));
assertEquals("[3]", Iterables.toString(tree.getValuesForKeysStartingWith("TO")));
}
@Test
public void testGetKeyValuePairsForPrefix() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
tree.put("TEST", 1);
tree.put("TEAM", 2);
tree.put("TOAST", 3);
tree.put("TEA", 4);
tree.put("COFFEE", 5);
//
// COFFEE (5)
// T
// E
// A (4)
// M (2)
// ST (1)
// OAST (3)
assertEquals("[(COFFEE, 5), (TEA, 4), (TEAM, 2), (TEST, 1), (TOAST, 3)]", Iterables.toString(tree.getKeyValuePairsForKeysStartingWith("")));
assertEquals("[(COFFEE, 5)]", Iterables.toString(tree.getKeyValuePairsForKeysStartingWith("C")));
assertEquals("[(COFFEE, 5)]", Iterables.toString(tree.getKeyValuePairsForKeysStartingWith("COFFEE")));
assertEquals("[]", Iterables.toString(tree.getKeyValuePairsForKeysStartingWith("COFFEES")));
assertEquals("[(TEA, 4), (TEAM, 2), (TEST, 1), (TOAST, 3)]", Iterables.toString(tree.getKeyValuePairsForKeysStartingWith("T")));
assertEquals("[(TEA, 4), (TEAM, 2), (TEST, 1)]", Iterables.toString(tree.getKeyValuePairsForKeysStartingWith("TE")));
assertEquals("[(TEA, 4), (TEAM, 2)]", Iterables.toString(tree.getKeyValuePairsForKeysStartingWith("TEA")));
assertEquals("[(TOAST, 3)]", Iterables.toString(tree.getKeyValuePairsForKeysStartingWith("TO")));
}
@Test
public void testRemove_ArgumentValidation() {
assertThrows(IllegalArgumentException.class, () -> {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
//noinspection NullableProblems
tree.remove(null);
});
}
@Test
public void testSearchTree() {
ConcurrentRadixTree<Integer> tree = new ConcurrentRadixTree<Integer>(getNodeFactory());
// Build the tree by hand, as if the following strings were added: B, BA, BAN, BANDANA, BANAN, BANANA
//
// B (1)
// A (2)
// N (3)
// AN (5)
// A (6)
// DANA (4)
final Node root, n1, n2, n3, n4, n5, n6;
n6 = getNodeFactory().createNode("A", 6, Collections.<Node>emptyList(), false);
n5 = getNodeFactory().createNode("AN", 5, Arrays.asList(n6), false);
n4 = getNodeFactory().createNode("DANA", 4, Collections.<Node>emptyList(), false);
n3 = getNodeFactory().createNode("N", 3, Arrays.asList(n4, n5), false); // note: it should sort these such that n5 is first
n2 = getNodeFactory().createNode("A", 2, Arrays.asList(n3), false);
n1 = getNodeFactory().createNode("B", 1, Arrays.asList(n2), false);
//noinspection NullableProblems
root = getNodeFactory().createNode("", null, Arrays.asList(n1), true);
// Overwrite the tree's default root with the one built by hand...
tree.root = root;
// Sanity checks to assert that we built tree as expected....
String expected =
"\n" +
"└── ○ B (1)\n" +
" └── ○ A (2)\n" +
" └── ○ N (3)\n" +
" ├── ○ AN (5)\n" +
" │ └── ○ A (6)\n" +
" └── ○ DANA (4)\n";
assertEquals(expected, PrettyPrinter.prettyPrint(tree));
assertEquals(2, n3.getOutgoingEdges().size());
assertTrue(n3.getOutgoingEdges().contains(n4));
assertTrue(n3.getOutgoingEdges().contains(n5));
// Search for non-existing node. Should return root, with null parent, 0 charsMatched...
assertEquals(tree.root, tree.searchTree("Z").nodeFound);
assertNull(tree.searchTree("Z").parentNode);
assertNull(tree.searchTree("Z").parentNodesParent);
assertEquals(0, tree.searchTree("Z").charsMatched);
// Search for first child node "B". Should return n1, parent should be root, 1 charsMatched...
assertEquals(n1, tree.searchTree("B").nodeFound);
assertEquals(tree.root, tree.searchTree("B").parentNode);
assertEquals(null, tree.searchTree("B").parentNodesParent);
assertEquals(1, tree.searchTree("B").charsMatched);
// Search for node with split and multi-char child node at "BAN". Should return n3, parent n2, 3 charsMatched...
assertEquals(n3, tree.searchTree("BAN").nodeFound);
assertEquals(n2, tree.searchTree("BAN").parentNode);
assertEquals(n1, tree.searchTree("BAN").parentNodesParent);
assertEquals(3, tree.searchTree("BAN").charsMatched);
// Search for node with multi-char label (exact match) at "BANAN". Should return n5, parent n3, 5 charsMatched...
assertEquals(n5, tree.searchTree("BANAN").nodeFound);
assertEquals(n3, tree.searchTree("BANAN").parentNode);
assertEquals(n2, tree.searchTree("BANAN").parentNodesParent);
assertEquals(5, tree.searchTree("BANAN").charsMatched);
// Search for node with multi-char label (inexact match) at "BANA". Should return n5, parent n3, 4 charsMatched...
assertEquals(n5, tree.searchTree("BANA").nodeFound);
assertEquals(n3, tree.searchTree("BANA").parentNode);
assertEquals(n2, tree.searchTree("BANA").parentNodesParent);
assertEquals(4, tree.searchTree("BANA").charsMatched);
// Search for the last node in "BANANA". Should return n6, parent n5, 6 charsMatched...
assertEquals(n6, tree.searchTree("BANANA").nodeFound);
assertEquals(n5, tree.searchTree("BANANA").parentNode);
assertEquals(n3, tree.searchTree("BANANA").parentNodesParent);
assertEquals(6, tree.searchTree("BANANA").charsMatched);
// Search for string longer than anything in tree, differing after leaf node "BANANA".
// Should return n6, parent n5, 6 chars matched...
assertEquals(n6, tree.searchTree("BANANAS").nodeFound);
assertEquals(n5, tree.searchTree("BANANAS").parentNode);
assertEquals(n3, tree.searchTree("BANANAS").parentNodesParent);
assertEquals(6, tree.searchTree("BANANAS").charsMatched);
// Search for string longer than anything in tree, differing before split at "BAN".
// Should return n2, parent n1, 2 chars matched...
assertEquals(n2, tree.searchTree("BAR").nodeFound);
assertEquals(n1, tree.searchTree("BAR").parentNode);
assertEquals(tree.root, tree.searchTree("BAR").parentNodesParent);
assertEquals(2, tree.searchTree("BAR").charsMatched);
// Search for string longer than anything in tree, differing immediately after split at "BAN".
// Should return n3, parent n2, 3 chars matched...
assertEquals(n3, tree.searchTree("BANS").nodeFound);
assertEquals(n2, tree.searchTree("BANS").parentNode);
assertEquals(n1, tree.searchTree("BANS").parentNodesParent);
assertEquals(3, tree.searchTree("BANS").charsMatched);
// Search for string longer than anything in tree, differing in multi-char node "BANDANA".
// Should return n4, parent n3, 5 chars matched...
assertEquals(n4, tree.searchTree("BANDAIDS").nodeFound);
assertEquals(n3, tree.searchTree("BANDAIDS").parentNode);
assertEquals(n2, tree.searchTree("BANDAIDS").parentNodesParent);
assertEquals(5, tree.searchTree("BANDAIDS").charsMatched);
}
@Test
public void testSearchResult_FailureToClassify1() {
assertThrows(IllegalStateException.class, () -> {
// Testing the various (unlikely) ways to fall through classification to have the exception thrown...
new ConcurrentRadixTree.SearchResult("DUMMY", null, 70, 70, null, null);
});
}
@Test
public void testSearchResult_FailureToClassify2() {
assertThrows(IllegalStateException.class, () -> {
// Testing the various (unlikely) ways to fall through classification to have the exception thrown...
Node dummyNodeFound = getNodeFactory().createNode("DUMMY", 1, Collections.<Node>emptyList(), false);
new ConcurrentRadixTree.SearchResult("DUMMY", dummyNodeFound, 5, 70, null, null);
});
}
@Test
public void testSearchResult_FailureToClassify3() {
assertThrows(IllegalStateException.class, () ->{
// Testing the various (unlikely) ways to fall through classification to have the exception thrown...
Node dummyNodeFound = getNodeFactory().createNode("DUMMY", 1, Collections.<Node>emptyList(), false);
new ConcurrentRadixTree.SearchResult("DUMMY", dummyNodeFound, 4, 70, null, null);
});
}
}

View file

@ -0,0 +1,94 @@
package org.xbib.datastructures.trie.concurrent;
import org.xbib.datastructures.trie.concurrent.util.Node;
import java.io.IOException;
import java.util.List;
/**
* Utility methods to generate semi-graphical string representations of trees.
*/
public class PrettyPrinter {
/**
* Private constructor, not used.
*/
PrettyPrinter() {
}
/**
* Generates a semi-graphical string representation of a given tree.
* <p/>
* Example output:<br/>
* <pre>
*
* B (1)
* A (2)
* N (3)
* AN (5)
* A (6)
* DANA (4)
* </pre>
*
* @param tree The tree for which the semi-graphical representation should be generated
* @return A semi-graphical string representation of the tree
*/
public static String prettyPrint(ConcurrentRadixTree<?> tree) {
return prettyPrint(tree.getNode());
}
public static String prettyPrint(Node node) {
StringBuilder sb = new StringBuilder();
prettyPrint(node, sb, "", true, true);
return sb.toString();
}
/**
* Generates a semi-graphical string representation of a given tree, writing it to a given {@link Appendable}.
* <p/>
* Example output:<br/>
* <pre>
*
* B (1)
* A (2)
* N (3)
* AN (5)
* A (6)
* DANA (4)
* </pre>
*
* @param tree The tree for which the semi-graphical representation should be generated
* @param appendable The object to which the tree should be written
*/
public static void prettyPrint(ConcurrentRadixTree<?> tree, Appendable appendable) {
prettyPrint(tree.getNode(), appendable, "", true, true);
}
static void prettyPrint(Node node, Appendable sb, String prefix, boolean isTail, boolean isRoot) {
try {
StringBuilder label = new StringBuilder();
if (isRoot) {
label.append("");
if (node.getIncomingEdge().length() > 0) {
label.append(" ");
}
}
label.append(node.getIncomingEdge());
if (node.getValue() != null) {
label.append(" (").append(node.getValue()).append(")");
}
sb.append(prefix).append(isTail ? isRoot ? "" : "└── ○ " : "├── ○ ").append(label).append("\n");
List<Node> children = node.getOutgoingEdges();
for (int i = 0; i < children.size() - 1; i++) {
prettyPrint(children.get(i), sb, prefix + (isTail ? isRoot ? "" : " " : ""), false, false);
}
if (!children.isEmpty()) {
prettyPrint(children.get(children.size() - 1), sb, prefix + (isTail ? isRoot ? "" : " " : ""), true, false);
}
}
catch (IOException ioException) {
// Rethrow the checked exception as a runtime exception...
throw new IllegalStateException(ioException);
}
}
}

View file

@ -0,0 +1,26 @@
package org.xbib.datastructures.trie.regex;
import org.junit.jupiter.api.Test;
import java.util.LinkedList;
import java.util.List;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
public class RegexTrieTest {
@Test
public void testRegexTrie() {
List<List<String>> captures = new LinkedList<>();
RegexTrie<Integer> trie = new RegexTrie<>();
trie.put(2, List.of("a", ""));
trie.put(4, List.of("a", "b"));
assertEquals(2, trie.resolve(captures, List.of("a", "c", "e")));
// returns 2. captures is now [[], ["c"], ["e"]]
assertEquals(4, trie.resolve(captures, List.of("a", "b")));
// returns 4. captures is now [[], []]
assertNull(trie.resolve(captures, List.of("a", "b", "c")));
// returns null. captures is now [[], []]
}
}

View file

@ -16,7 +16,7 @@ public class TrieTest {
public void testEmptyTrie() { public void testEmptyTrie() {
Trie<String, TrieKey<String>, String> trie = new TrieImpl<>(); Trie<String, TrieKey<String>, String> trie = new TrieImpl<>();
TrieKey<String> trieKey = new TrieKeyImpl<>(); TrieKey<String> trieKey = new TrieKeyImpl<>();
String result = trie.search(trieKey); String result = trie.get(trieKey);
assertNull(result); assertNull(result);
} }
@ -24,11 +24,11 @@ public class TrieTest {
public void testEmptyKey() { public void testEmptyKey() {
Trie<String, TrieKey<String>, Integer> trie = new TrieImpl<>(); Trie<String, TrieKey<String>, Integer> trie = new TrieImpl<>();
TrieKey<String> trieKey = new TrieKeyImpl<>(); TrieKey<String> trieKey = new TrieKeyImpl<>();
trie.add(trieKey, 100); trie.put(trieKey, 100);
Integer result = trie.search(trieKey); Integer result = trie.get(trieKey);
assertEquals(result, (Integer) 100); assertEquals(result, (Integer) 100);
trie.add(trieKey, 200); trie.put(trieKey, 200);
result = trie.search(trieKey); result = trie.get(trieKey);
assertEquals(result, (Integer) 200); assertEquals(result, (Integer) 200);
} }
@ -36,8 +36,8 @@ public class TrieTest {
public void testSingletonTrie() { public void testSingletonTrie() {
Trie<String, TrieKey<String>, String> trie = new TrieImpl<>(); Trie<String, TrieKey<String>, String> trie = new TrieImpl<>();
TrieKey<String> trieKey = TrieKeyImpl.stringKey("key"); TrieKey<String> trieKey = TrieKeyImpl.stringKey("key");
trie.add(trieKey, "value"); trie.put(trieKey, "value");
String result = trie.search(trieKey); String result = trie.get(trieKey);
assertNotEquals(result, "key"); assertNotEquals(result, "key");
} }
@ -50,11 +50,11 @@ public class TrieTest {
Long value = random.nextLong(); Long value = random.nextLong();
String key = value.toString(); String key = value.toString();
TrieKey<String> trieKey = TrieKeyImpl.stringKey(key); TrieKey<String> trieKey = TrieKeyImpl.stringKey(key);
trie.add(trieKey, value); trie.put(trieKey, value);
keys.add(trieKey); keys.add(trieKey);
} }
for (TrieKey<String> key : keys) { for (TrieKey<String> key : keys) {
Long value = trie.search(key); Long value = trie.get(key);
assertEquals(key.toString(), value.toString()); assertEquals(key.toString(), value.toString());
} }
} }

View file

@ -42,6 +42,7 @@ include 'datastructures-json-dsl'
include 'datastructures-json-flat' include 'datastructures-json-flat'
include 'datastructures-json-iterator' include 'datastructures-json-iterator'
include 'datastructures-json-micro' include 'datastructures-json-micro'
include 'datastructures-json-mini'
include 'datastructures-json-minimal' include 'datastructures-json-minimal'
include 'datastructures-json-noggit' include 'datastructures-json-noggit'
include 'datastructures-json-simple' include 'datastructures-json-simple'