From bbd498482a4accd40b23a6ad36a553a09f1b0c75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=CC=88rg=20Prante?= Date: Sun, 31 Jul 2022 01:19:06 +0200 Subject: [PATCH] add trie, add mini Json --- .../xbib/datastructures/json/mini/Json.java | 681 +++++++++++++ datastructures-trie/NOTICE.txt | 0 .../datastructures/trie/compact/Trie.java | 3 +- .../trie/concurrent/ConcurrentRadixTree.java | 907 +++++++++++++++++ .../trie/concurrent/RadixTree.java | 145 +++ .../util/AtomicReferenceArrayListAdapter.java | 30 + .../concurrent/util/CharArrayNodeDefault.java | 117 +++ .../util/CharArrayNodeLeafNullValue.java | 61 ++ .../util/CharArrayNodeLeafVoidValue.java | 60 ++ .../util/CharArrayNodeLeafWithValue.java | 66 ++ .../util/CharArrayNodeNonLeafNullValue.java | 92 ++ .../util/CharArrayNodeNonLeafVoidValue.java | 91 ++ .../util/CharSequenceNodeDefault.java | 115 +++ .../util/CharSequenceNodeLeafNullValue.java | 61 ++ .../util/CharSequenceNodeLeafVoidValue.java | 61 ++ .../util/CharSequenceNodeLeafWithValue.java | 67 ++ .../CharSequenceNodeNonLeafNullValue.java | 93 ++ .../CharSequenceNodeNonLeafVoidValue.java | 93 ++ .../trie/concurrent/util/CharSequences.java | 130 +++ .../util/DefaultCharArrayNodeFactory.java | 63 ++ .../util/DefaultCharSequenceNodeFactory.java | 66 ++ .../trie/concurrent/util/Iterables.java | 122 +++ .../trie/concurrent/util/KeyValuePair.java | 45 + .../trie/concurrent/util/LazyIterator.java | 76 ++ .../trie/concurrent/util/Node.java | 136 +++ .../util/NodeCharacterComparator.java | 15 + .../concurrent/util/NodeCharacterKey.java | 20 + .../util/NodeCharacterProvider.java | 14 + .../trie/concurrent/util/NodeFactory.java | 34 + .../trie/concurrent/util/NodeUtil.java | 80 ++ .../trie/concurrent/util/VoidValue.java | 27 + .../datastructures/trie/regex/RegexTrie.java | 40 +- .../datastructures/trie/segment/Trie.java | 13 +- .../datastructures/trie/segment/TrieImpl.java | 16 +- .../datastructures/trie/segment/TrieKey.java | 3 +- .../trie/segment/TrieKeyImpl.java | 2 +- .../concurrent/ConcurrentRadixTreeTest.java | 962 ++++++++++++++++++ .../trie/concurrent/PrettyPrinter.java | 94 ++ .../trie/regex/RegexTrieTest.java | 26 + .../datastructures/trie/segment/TrieTest.java | 18 +- settings.gradle | 1 + 41 files changed, 4700 insertions(+), 46 deletions(-) create mode 100644 datastructures-json-mini/src/main/java/org/xbib/datastructures/json/mini/Json.java create mode 100644 datastructures-trie/NOTICE.txt create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/ConcurrentRadixTree.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/RadixTree.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/AtomicReferenceArrayListAdapter.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharArrayNodeDefault.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharArrayNodeLeafNullValue.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharArrayNodeLeafVoidValue.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharArrayNodeLeafWithValue.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharArrayNodeNonLeafNullValue.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharArrayNodeNonLeafVoidValue.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequenceNodeDefault.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequenceNodeLeafNullValue.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequenceNodeLeafVoidValue.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequenceNodeLeafWithValue.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequenceNodeNonLeafNullValue.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequenceNodeNonLeafVoidValue.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequences.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/DefaultCharArrayNodeFactory.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/DefaultCharSequenceNodeFactory.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/Iterables.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/KeyValuePair.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/LazyIterator.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/Node.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/NodeCharacterComparator.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/NodeCharacterKey.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/NodeCharacterProvider.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/NodeFactory.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/NodeUtil.java create mode 100644 datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/VoidValue.java create mode 100644 datastructures-trie/src/test/java/org/xbib/datastructures/trie/concurrent/ConcurrentRadixTreeTest.java create mode 100644 datastructures-trie/src/test/java/org/xbib/datastructures/trie/concurrent/PrettyPrinter.java create mode 100644 datastructures-trie/src/test/java/org/xbib/datastructures/trie/regex/RegexTrieTest.java diff --git a/datastructures-json-mini/src/main/java/org/xbib/datastructures/json/mini/Json.java b/datastructures-json-mini/src/main/java/org/xbib/datastructures/json/mini/Json.java new file mode 100644 index 0000000..29c5cf2 --- /dev/null +++ b/datastructures-json-mini/src/main/java/org/xbib/datastructures/json/mini/Json.java @@ -0,0 +1,681 @@ +package org.xbib.datastructures.json.mini; + +import java.io.IOException; +import java.time.Instant; +import java.util.Collection; +import java.util.Deque; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +/** + * A minimal JSON parser and generator for Java Maps and Collections. + */ +public class Json { + + private Json() { + } + + public static String toString(Map map) throws IOException { + return map != null ? new JsonBuilder().buildMap(map).build() : null; + } + + public static String toString(Collection collection) throws IOException { + return collection != null ? new JsonBuilder().buildCollection(collection).build() : null; + } + + @SuppressWarnings("unchecked") + public static Map toMap(String json) throws IOException { + if (json == null) { + return null; + } + JsonParser parser = new JsonParser(); + parser.parse(json); + Object object = parser.getResult(); + if (object instanceof Map) { + return (Map) parser.getResult(); + } + throw new IllegalArgumentException(("unexpected, not a map instance: " + object.getClass())); + } + + @SuppressWarnings("unchecked") + public static Collection toCollection(String json) throws IOException { + if (json == null) { + return null; + } + JsonParser parser = new JsonParser(); + parser.parse(json); + Object object = parser.getResult(); + if (object instanceof Collection) { + return (Collection) parser.getResult(); + } + throw new IllegalArgumentException(("unexpected, not a collection instance: " + object.getClass())); + } + + private static class JsonParser { + + private static final char EOS = (char) -1; + + private static final char DOUBLE_QUOTE = '"'; + + private static final char BACKSLASH = '\\'; + + private static final char OPEN_MAP = '{'; + + private static final char CLOSE_MAP = '}'; + + private static final char OPEN_LIST = '['; + + private static final char CLOSE_LIST = ']'; + + private static final char COMMA = ','; + + private static final char COLON = ':'; + + private String input; + + private int i; + + private char ch; + + private Object result; + + private final Deque stack = new LinkedList<>(); + + public JsonParser() { + } + + public void parse(String input) throws IOException { + Objects.requireNonNull(input); + this.input = input; + this.i = 0; + stack.clear(); + ch = next(); + skipWhitespace(); + parseValue(); + skipWhitespace(); + if (ch != EOS) { + throw new IOException("malformed json: " + ch); + } + } + + public Object getResult() { + return result; + } + + private void parseValue() throws IOException { + switch (ch) { + case DOUBLE_QUOTE: + ch = next(); + parseString(false); + break; + case OPEN_MAP: + parseMap(); + break; + case OPEN_LIST: + parseList(); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': + parseNumber(); + break; + case 't': + parseTrue(); + break; + case 'f': + parseFalse(); + break; + case 'n': + parseNull(); + break; + default: + throw new IOException("illegal character: " + ch); + } + } + + private void parseNumber() throws IOException { + boolean minus = false; + boolean dot = false; + boolean exponent = false; + int start = i - 1; + while (true) { + if (ch == '-') { + if (i - start > 1) { + throw new IOException("minus inside number"); + } + ch =next(); + minus = true; + } else if (ch == 'e' || ch == 'E') { + ch = next(); + if (exponent) { + throw new IOException("double exponents"); + } + exponent = true; + ch = next(); + if (ch == '-' || ch == '+') { + ch = next(); + if (ch < '0' || ch > '9') { + throw new IOException("invalid exponent"); + } + } else if (ch < '0' || ch > '9') { + throw new IOException("invalid exponent"); + } + } else if (ch == '.') { + ch = next(); + if (dot) { + throw new IOException("multiple dots"); + } + if (i - start == 1) { + throw new IOException("no digit before dot"); + } + dot = true; + } else if (ch >= '0' && ch <= '9') { + ch = next(); + } else { + break; + } + } + if (minus && i - start == 1) { + throw new IOException("isolated minus"); + } + if (dot || exponent) { + valueNode(Double.parseDouble(input.substring(start, i - 1))); + } else { + valueNode(Long.parseLong(input.substring(start, i - 1))); + } + } + + private void parseString(boolean isKey) throws IOException { + boolean escaped = false; + int start = i - 1; + while (true) { + if (ch == DOUBLE_QUOTE) { + if (escaped) { + CharSequence s = unescape(input.substring(start, i - 1)); + if (isKey) { + stack.push(new KeyNode(s)); + } else { + valueNode(s); + } + } else { + if (isKey) { + stack.push(new KeyNode(input.substring(start, i - 1))); + } else { + valueNode(input.substring(start, i - 1)); + } + } + ch = next(); + return; + } else if (ch == BACKSLASH) { + escaped = true; + ch = next(); + if (ch == DOUBLE_QUOTE || ch == '/' || ch == BACKSLASH || ch == 'b' || ch == 'f' || ch == 'n' || ch == 'r' || ch == 't') { + ch = next(); + } else if (ch == 'u') { + expectHex(); + expectHex(); + expectHex(); + expectHex(); + } else { + throw new IOException("illegal escape char: " + ch); + } + } else if (ch < 32) { + throw new IOException("illegal control char: " + ch); + } else { + ch = next(); + } + } + } + + private void parseList() throws IOException { + int count = 0; + List list = new LinkedList<>(); + stack.push(list); + ch = next(); + while (true) { + skipWhitespace(); + if (ch == CLOSE_LIST) { + result = stack.pop(); + tryAppend(result); + ch = next(); + return; + } + if (count > 0) { + expectChar(COMMA); + ch = next(); + skipWhitespace(); + } + parseValue(); + count++; + } + } + + private void parseMap() throws IOException { + int count = 0; + Map map = new LinkedHashMap<>(); + stack.push(map); + ch = next(); + while (true) { + skipWhitespace(); + if (ch == CLOSE_MAP) { + result = stack.pop(); + tryAppend(result); + ch = next(); + return; + } + if (count > 0) { + expectChar(COMMA); + ch = next(); + skipWhitespace(); + } + expectChar(DOUBLE_QUOTE); + ch = next(); + parseString(true); + skipWhitespace(); + expectChar(COLON); + ch = next(); + skipWhitespace(); + parseValue(); + count++; + } + } + + private void parseNull() throws IOException { + ch = next(); + expectChar('u'); + ch = next(); + expectChar('l'); + ch = next(); + expectChar('l'); + valueNode(null); + ch = next(); + } + + private void parseTrue() throws IOException { + ch = next(); + expectChar('r'); + ch = next(); + expectChar('u'); + ch = next(); + expectChar('e'); + valueNode(true); + ch = next(); + } + + private void parseFalse() throws IOException { + ch = next(); + expectChar('a'); + ch = next(); + expectChar('l'); + ch = next(); + expectChar('s'); + ch = next(); + expectChar('e'); + valueNode(false); + ch = next(); + } + + private void expectChar(char expected) throws IOException { + if (ch != expected) { + throw new IOException("expected char " + expected + " but got " + ch); + } + } + + private void expectHex() throws IOException { + ch = next(); + if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) { + return; + } + throw new IOException("invalid hex char " + ch); + } + + private void skipWhitespace() { + while (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') { + ch = next(); + } + } + + private static CharSequence unescape(CharSequence input) { + StringBuilder result = new StringBuilder(input.length()); + int i = 0; + while (i < input.length()) { + if (input.charAt(i) == BACKSLASH) { + i++; + switch (input.charAt(i)) { + case BACKSLASH: + result.append(BACKSLASH); + break; + case '/': + result.append('/'); + break; + case DOUBLE_QUOTE: + result.append(DOUBLE_QUOTE); + break; + case 'b': + result.append('\b'); + break; + case 'f': + result.append('\f'); + break; + case 'n': + result.append('\n'); + break; + case 'r': + result.append('\r'); + break; + case 't': + result.append('\t'); + break; + case 'u': { + result.append(Character.toChars(Integer.parseInt(input.toString().substring(i + 1, i + 5), 16))); + i += 4; + } + } + } else { + result.append(input.charAt(i)); + } + i++; + } + return result; + } + + private char next() { + try { + return input.charAt(i++); + } catch (StringIndexOutOfBoundsException e) { + return (char) -1; + } + } + + + private void valueNode(Object object) { + if (!tryAppend(object)) { + stack.push(object); + result = object; + } + } + + @SuppressWarnings("unchecked") + private boolean tryAppend(Object object) { + if (!stack.isEmpty()) { + if (stack.peek() instanceof List) { + @SuppressWarnings("unchecked") + List list = (List) stack.peek(); + list.add(object); + return true; + } else if (stack.peek() instanceof KeyNode){ + KeyNode key = (KeyNode) stack.pop(); + if (stack.peek() instanceof Map) { + @SuppressWarnings("unchecked") + Map map = (Map) stack.peek(); + if (map != null) { + String k = key != null ? key.get().toString() : null; + map.put(k, object); + return true; + } + } + } + } + return false; + } + } + + static class KeyNode { + + private final CharSequence value; + + public KeyNode(CharSequence value) { + this.value = value; + } + + public CharSequence get() { + return value; + } + + } + + static class JsonBuilder { + + private final Appendable appendable; + + private State state; + + protected JsonBuilder() { + this(new StringBuilder()); + } + + protected JsonBuilder(Appendable appendable) { + this.appendable = appendable; + this.state = new State(null, 0, Structure.DOCSTART, true); + } + + public JsonBuilder beginCollection() throws IOException { + this.state = new State(state, state.level + 1, Structure.COLLECTION, true); + appendable.append('['); + return this; + } + + public JsonBuilder endCollection() throws IOException { + if (state.structure != Structure.COLLECTION) { + throw new IOException("no array to close"); + } + appendable.append(']'); + this.state = state != null ? state.parent : null; + return this; + } + + public JsonBuilder beginMap() throws IOException { + if (state.structure == Structure.COLLECTION) { + beginArrayValue(); + } + this.state = new State(state, state.level + 1, Structure.MAP, true); + appendable.append('{'); + return this; + } + + public JsonBuilder endMap() throws IOException { + if (state.structure != Structure.MAP && state.structure != Structure.KEY) { + throw new IOException("no object to close"); + } + appendable.append('}'); + this.state = state != null ? state.parent : null; + return this; + } + + public JsonBuilder buildMap(Map map) throws IOException { + Objects.requireNonNull(map); + boolean wrap = state.structure != Structure.MAP; + if (wrap) { + beginMap(); + } + for (Map.Entry entry : map.entrySet()) { + buildKey(entry.getKey()); + buildValue(entry.getValue()); + } + if (wrap) { + endMap(); + } + return this; + } + + public JsonBuilder buildCollection(Collection collection) throws IOException { + Objects.requireNonNull(collection); + beginCollection(); + for (Object object : collection) { + buildValue(object); + } + endCollection(); + return this; + } + + @SuppressWarnings("unchecked") + public JsonBuilder buildValue(Object object) throws IOException { + if (object instanceof Map) { + buildMap((Map) object); + return this; + } else if (object instanceof Collection) { + buildCollection((Collection) object); + return this; + } + if (state.structure == Structure.COLLECTION) { + beginArrayValue(); + } + if (object == null) { + buildNull(); + } else if (object instanceof CharSequence) { + buildString((CharSequence) object, true); + } else if (object instanceof Boolean) { + buildBoolean((Boolean) object); + } else if (object instanceof Byte) { + buildNumber((byte) object); + } else if (object instanceof Integer) { + buildNumber((int) object); + } else if (object instanceof Long) { + buildNumber((long) object); + } else if (object instanceof Float) { + buildNumber((float) object); + } else if (object instanceof Double) { + buildNumber((double) object); + } else if (object instanceof Number) { + buildNumber((Number) object); + } else if (object instanceof Instant) { + buildInstant((Instant) object); + } else { + throw new IllegalArgumentException("unable to write object class " + object.getClass()); + } + return this; + } + + public JsonBuilder buildKey(CharSequence string) throws IOException { + if (state.structure == Structure.COLLECTION) { + beginArrayValue(); + } else if (state.structure == Structure.MAP || state.structure == Structure.KEY) { + beginKey(string != null ? string.toString() : null); + } + buildString(string, true); + if (state.structure == Structure.MAP || state.structure == Structure.KEY) { + endKey(string != null ? string.toString() : null); + } + state.structure = Structure.KEY; + return this; + } + + public JsonBuilder buildNull() throws IOException { + if (state.structure == Structure.COLLECTION) { + beginArrayValue(); + } + buildString("null", false); + return this; + } + + public String build() { + return appendable.toString(); + } + + private void beginKey(String k) throws IOException { + if (state.first) { + state.first = false; + } else { + appendable.append(","); + } + } + + private void endKey(String k) throws IOException { + appendable.append(":"); + } + + private void beginArrayValue() throws IOException { + if (state.first) { + state.first = false; + } else { + appendable.append(","); + } + } + + private void buildBoolean(boolean bool) throws IOException { + buildString(bool ? "true" : "false", false); + } + + private void buildNumber(Number number) throws IOException { + buildString(number != null ? number.toString() : null, false); + } + + private void buildInstant(Instant instant) throws IOException { + buildString(instant.toString(), true); + } + + private void buildString(CharSequence string, boolean escape) throws IOException { + appendable.append(escape ? escapeString(string) : string); + } + + private CharSequence escapeString(CharSequence string) { + StringBuilder sb = new StringBuilder(); + sb.append('"'); + int start = 0; + int l = string.length(); + for (int i = 0; i < l; i++) { + char c = string.charAt(i); + if (c == '"' || c == '\\' || c < 32) { + if (i > start) { + sb.append(string, start, i); + } + start = i + 1; + sb.append(escapeCharacter(c)); + } + } + if (l > start) { + sb.append(string, start, l); + } + sb.append('"'); + return sb; + } + + private static String escapeCharacter(char c) { + switch (c) { + case '\n': + return "\\n"; + case '\r': + return "\\r"; + case '\t': + return "\\t"; + case '\\': + return "\\\\"; + case '\'': + return "\\'"; + case '\"': + return "\\\""; + } + String hex = Integer.toHexString(c); + return "\\u0000".substring(0, 6 - hex.length()) + hex; + } + + private enum Structure { + DOCSTART, MAP, KEY, COLLECTION + } + + private static class State { + State parent; + int level; + Structure structure; + boolean first; + + State(State parent, int level, Structure structure, boolean first) { + this.parent = parent; + this.level = level; + this.structure = structure; + this.first = first; + } + } + } +} diff --git a/datastructures-trie/NOTICE.txt b/datastructures-trie/NOTICE.txt new file mode 100644 index 0000000..e69de29 diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/compact/Trie.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/compact/Trie.java index dcd82f1..8915d17 100644 --- a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/compact/Trie.java +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/compact/Trie.java @@ -24,7 +24,8 @@ public class Trie { if (lastNode.divergeKeyIndex == key.length) { if (lastNode.divergePatternIndex == lastNode.value.length) { lastNode.isLeaf = true; - } else {// we need to reduce length of the compressed pattern in the current node, + } else { + // we need to reduce length of the compressed pattern in the current node, // make it node leaf, and create child that carry over the original children/isLeaf char[] childValue = Arrays.copyOfRange(lastNode.value, lastNode.divergePatternIndex, lastNode.value.length); diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/ConcurrentRadixTree.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/ConcurrentRadixTree.java new file mode 100644 index 0000000..3290152 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/ConcurrentRadixTree.java @@ -0,0 +1,907 @@ +package org.xbib.datastructures.trie.concurrent; + +import org.xbib.datastructures.trie.concurrent.util.CharSequences; +import org.xbib.datastructures.trie.concurrent.util.KeyValuePair; +import org.xbib.datastructures.trie.concurrent.util.LazyIterator; +import org.xbib.datastructures.trie.concurrent.util.Node; +import org.xbib.datastructures.trie.concurrent.util.NodeFactory; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Deque; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +import static org.xbib.datastructures.trie.concurrent.ConcurrentRadixTree.SearchResult.Classification; + +/** + * An implementation of {@link RadixTree} which supports lock-free concurrent reads, and allows items to be added to and + * to be removed from the tree atomically by background thread(s), without blocking reads. + *

+ * Unlike reads, writes require locking of the tree (locking out other writing threads only; reading threads are never + * blocked). Currently write locks are coarse-grained; in fact they are tree-level. In future branch-level write locks + * might be added, but the current implementation is targeted at high concurrency read-mostly use cases. + */ +public class ConcurrentRadixTree implements RadixTree { + + private final NodeFactory nodeFactory; + + protected volatile Node root; + + // Write operations acquire write lock, read operations are lock-free. + private final Lock writeLock = new ReentrantLock(); + + /** + * Creates a new {@link ConcurrentRadixTree} which will use the given {@link NodeFactory} to create nodes. + * + * @param nodeFactory An object which creates {@link Node} objects on-demand, and which might return node + * implementations optimized for storing the values supplied to it for the creation of each node + */ + public ConcurrentRadixTree(NodeFactory nodeFactory) { + this.nodeFactory = nodeFactory; + this.root = nodeFactory.createNode("", null, Collections.emptyList(), true); + } + + protected void acquireWriteLock() { + writeLock.lock(); + } + + protected void releaseWriteLock() { + writeLock.unlock(); + } + + @Override + public O put(CharSequence key, O value) { + @SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"}) + O existingValue = (O) putInternal(key, value, true); // putInternal acquires write lock + return existingValue; + } + + @Override + public O putIfAbsent(CharSequence key, O value) { + @SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"}) + O existingValue = (O) putInternal(key, value, false); // putInternal acquires write lock + return existingValue; + } + + @Override + public O getValueForExactKey(CharSequence key) { + SearchResult searchResult = searchTree(key); + if (searchResult.classification.equals(SearchResult.Classification.EXACT_MATCH)) { + @SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"}) + O value = (O) searchResult.nodeFound.getValue(); + return value; + } + return null; + } + + @Override + public Iterable getKeysStartingWith(CharSequence prefix) { + SearchResult searchResult = searchTree(prefix); + Classification classification = searchResult.classification; + switch (classification) { + case EXACT_MATCH: { + return getDescendantKeys(prefix, searchResult.nodeFound); + } + case KEY_ENDS_MID_EDGE: { + // Append the remaining characters of the edge to the key. + // For example if we searched for CO, but first matching node was COFFEE, + // the key associated with the first node should be COFFEE... + CharSequence edgeSuffix = CharSequences.getSuffix(searchResult.nodeFound.getIncomingEdge(), searchResult.charsMatchedInNodeFound); + prefix = CharSequences.concatenate(prefix, edgeSuffix); + return getDescendantKeys(prefix, searchResult.nodeFound); + } + default: { + // Incomplete match means key is not a prefix of any node... + return Collections.emptySet(); + } + } + } + + @Override + public Iterable getValuesForKeysStartingWith(CharSequence prefix) { + SearchResult searchResult = searchTree(prefix); + Classification classification = searchResult.classification; + switch (classification) { + case EXACT_MATCH: { + return getDescendantValues(prefix, searchResult.nodeFound); + } + case KEY_ENDS_MID_EDGE: { + // Append the remaining characters of the edge to the key. + // For example if we searched for CO, but first matching node was COFFEE, + // the key associated with the first node should be COFFEE... + CharSequence edgeSuffix = CharSequences.getSuffix(searchResult.nodeFound.getIncomingEdge(), searchResult.charsMatchedInNodeFound); + prefix = CharSequences.concatenate(prefix, edgeSuffix); + return getDescendantValues(prefix, searchResult.nodeFound); + } + default: { + // Incomplete match means key is not a prefix of any node... + return Collections.emptySet(); + } + } + } + + @Override + public Iterable> getKeyValuePairsForKeysStartingWith(CharSequence prefix) { + SearchResult searchResult = searchTree(prefix); + Classification classification = searchResult.classification; + switch (classification) { + case EXACT_MATCH: { + return getDescendantKeyValuePairs(prefix, searchResult.nodeFound); + } + case KEY_ENDS_MID_EDGE: { + // Append the remaining characters of the edge to the key. + // For example if we searched for CO, but first matching node was COFFEE, + // the key associated with the first node should be COFFEE... + CharSequence edgeSuffix = CharSequences.getSuffix(searchResult.nodeFound.getIncomingEdge(), searchResult.charsMatchedInNodeFound); + prefix = CharSequences.concatenate(prefix, edgeSuffix); + return getDescendantKeyValuePairs(prefix, searchResult.nodeFound); + } + default: { + // Incomplete match means key is not a prefix of any node... + return Collections.emptySet(); + } + } + } + + @Override + public boolean remove(CharSequence key) { + if (key == null) { + throw new IllegalArgumentException("The key argument was null"); + } + acquireWriteLock(); + try { + SearchResult searchResult = searchTree(key); + SearchResult.Classification classification = searchResult.classification; + if (classification == Classification.EXACT_MATCH) { + if (searchResult.nodeFound.getValue() == null) { + // This node was created automatically as a split between two branches (implicit node). + // No need to remove it... + return false; + } + + // Proceed with deleting the node... + List childEdges = searchResult.nodeFound.getOutgoingEdges(); + if (childEdges.size() > 1) { + // This node has more than one child, so if we delete the value from this node, we still need + // to leave a similar node in place to act as the split between the child edges. + // Just delete the value associated with this node. + // -> Clone this node without its value, preserving its child nodes... + @SuppressWarnings({"NullableProblems"}) + Node cloned = nodeFactory.createNode(searchResult.nodeFound.getIncomingEdge(), null, searchResult.nodeFound.getOutgoingEdges(), false); + // Re-add the replacement node to the parent... + searchResult.parentNode.updateOutgoingEdge(cloned); + } else if (childEdges.size() == 1) { + // Node has one child edge. + // Create a new node which is the concatenation of the edges from this node and its child, + // and which has the outgoing edges of the child and the value from the child. + Node child = childEdges.get(0); + CharSequence concatenatedEdges = CharSequences.concatenate(searchResult.nodeFound.getIncomingEdge(), child.getIncomingEdge()); + Node mergedNode = nodeFactory.createNode(concatenatedEdges, child.getValue(), child.getOutgoingEdges(), false); + // Re-add the merged node to the parent... + searchResult.parentNode.updateOutgoingEdge(mergedNode); + } else { + // Node has no children. Delete this node from its parent, + // which involves re-creating the parent rather than simply updating its child edge + // (this is why we need parentNodesParent). + // However if this would leave the parent with only one remaining child edge, + // and the parent itself has no value (is a split node), and the parent is not the root node + // (a special case which we never merge), then we also need to merge the parent with its + // remaining child. + + List currentEdgesFromParent = searchResult.parentNode.getOutgoingEdges(); + // Create a list of the outgoing edges of the parent which will remain + // if we remove this child... + // Use a non-resizable list, as a sanity check to force ArrayIndexOutOfBounds... + List newEdgesOfParent = Arrays.asList(new Node[searchResult.parentNode.getOutgoingEdges().size() - 1]); + for (int i = 0, added = 0, numParentEdges = currentEdgesFromParent.size(); i < numParentEdges; i++) { + Node node = currentEdgesFromParent.get(i); + if (node != searchResult.nodeFound) { + newEdgesOfParent.set(added++, node); + } + } + + // Note the parent might actually be the root node (which we should never merge)... + boolean parentIsRoot = (searchResult.parentNode == root); + Node newParent; + if (newEdgesOfParent.size() == 1 && searchResult.parentNode.getValue() == null && !parentIsRoot) { + // Parent is a non-root split node with only one remaining child, which can now be merged. + Node parentsRemainingChild = newEdgesOfParent.get(0); + // Merge the parent with its only remaining child... + CharSequence concatenatedEdges = CharSequences.concatenate(searchResult.parentNode.getIncomingEdge(), parentsRemainingChild.getIncomingEdge()); + newParent = nodeFactory.createNode(concatenatedEdges, parentsRemainingChild.getValue(), parentsRemainingChild.getOutgoingEdges(), parentIsRoot); + } else { + // Parent is a node which either has a value of its own, has more than one remaining + // child, or is actually the root node (we never merge the root node). + // Create new parent node which is the same as is currently just without the edge to the + // node being deleted... + newParent = nodeFactory.createNode(searchResult.parentNode.getIncomingEdge(), searchResult.parentNode.getValue(), newEdgesOfParent, parentIsRoot); + } + // Re-add the parent node to its parent... + if (parentIsRoot) { + // Replace the root node... + this.root = newParent; + } else { + // Re-add the parent node to its parent... + searchResult.parentNodesParent.updateOutgoingEdge(newParent); + } + } + return true; + } + return false; + } + finally { + releaseWriteLock(); + } + } + + @Override + public Iterable getClosestKeys(CharSequence candidate) { + SearchResult searchResult = searchTree(candidate); + Classification classification = searchResult.classification; + switch (classification) { + case EXACT_MATCH: { + return getDescendantKeys(candidate, searchResult.nodeFound); + } + case KEY_ENDS_MID_EDGE: { + // Append the remaining characters of the edge to the key. + // For example if we searched for CO, but first matching node was COFFEE, + // the key associated with the first node should be COFFEE... + CharSequence edgeSuffix = CharSequences.getSuffix(searchResult.nodeFound.getIncomingEdge(), searchResult.charsMatchedInNodeFound); + candidate = CharSequences.concatenate(candidate, edgeSuffix); + return getDescendantKeys(candidate, searchResult.nodeFound); + } + case INCOMPLETE_MATCH_TO_MIDDLE_OF_EDGE: { + // Example: if we searched for CX, but deepest matching node was CO, + // the results should include node CO and its descendants... + CharSequence keyOfParentNode = CharSequences.getPrefix(candidate, searchResult.charsMatched - searchResult.charsMatchedInNodeFound); + CharSequence keyOfNodeFound = CharSequences.concatenate(keyOfParentNode, searchResult.nodeFound.getIncomingEdge()); + return getDescendantKeys(keyOfNodeFound, searchResult.nodeFound); + } + case INCOMPLETE_MATCH_TO_END_OF_EDGE: { + if (searchResult.charsMatched == 0) { + // Closest match is the root node, we don't consider this a match for anything... + break; + } + // Example: if we searched for COFFEE, but deepest matching node was CO, + // the results should include node CO and its descendants... + CharSequence keyOfNodeFound = CharSequences.getPrefix(candidate, searchResult.charsMatched); + return getDescendantKeys(keyOfNodeFound, searchResult.nodeFound); + } + } + return Collections.emptySet(); + } + + @Override + public Iterable getValuesForClosestKeys(CharSequence candidate) { + SearchResult searchResult = searchTree(candidate); + Classification classification = searchResult.classification; + switch (classification) { + case EXACT_MATCH: { + return getDescendantValues(candidate, searchResult.nodeFound); + } + case KEY_ENDS_MID_EDGE: { + // Append the remaining characters of the edge to the key. + // For example if we searched for CO, but first matching node was COFFEE, + // the key associated with the first node should be COFFEE... + CharSequence edgeSuffix = CharSequences.getSuffix(searchResult.nodeFound.getIncomingEdge(), searchResult.charsMatchedInNodeFound); + candidate = CharSequences.concatenate(candidate, edgeSuffix); + return getDescendantValues(candidate, searchResult.nodeFound); + } + case INCOMPLETE_MATCH_TO_MIDDLE_OF_EDGE: { + // Example: if we searched for CX, but deepest matching node was CO, + // the results should include node CO and its descendants... + CharSequence keyOfParentNode = CharSequences.getPrefix(candidate, searchResult.charsMatched - searchResult.charsMatchedInNodeFound); + CharSequence keyOfNodeFound = CharSequences.concatenate(keyOfParentNode, searchResult.nodeFound.getIncomingEdge()); + return getDescendantValues(keyOfNodeFound, searchResult.nodeFound); + } + case INCOMPLETE_MATCH_TO_END_OF_EDGE: { + if (searchResult.charsMatched == 0) { + // Closest match is the root node, we don't consider this a match for anything... + break; + } + // Example: if we searched for COFFEE, but deepest matching node was CO, + // the results should include node CO and its descendants... + CharSequence keyOfNodeFound = CharSequences.getPrefix(candidate, searchResult.charsMatched); + return getDescendantValues(keyOfNodeFound, searchResult.nodeFound); + } + } + return Collections.emptySet(); + } + + @Override + public Iterable> getKeyValuePairsForClosestKeys(CharSequence candidate) { + SearchResult searchResult = searchTree(candidate); + Classification classification = searchResult.classification; + switch (classification) { + case EXACT_MATCH: { + return getDescendantKeyValuePairs(candidate, searchResult.nodeFound); + } + case KEY_ENDS_MID_EDGE: { + // Append the remaining characters of the edge to the key. + // For example if we searched for CO, but first matching node was COFFEE, + // the key associated with the first node should be COFFEE... + CharSequence edgeSuffix = CharSequences.getSuffix(searchResult.nodeFound.getIncomingEdge(), searchResult.charsMatchedInNodeFound); + candidate = CharSequences.concatenate(candidate, edgeSuffix); + return getDescendantKeyValuePairs(candidate, searchResult.nodeFound); + } + case INCOMPLETE_MATCH_TO_MIDDLE_OF_EDGE: { + // Example: if we searched for CX, but deepest matching node was CO, + // the results should include node CO and its descendants... + CharSequence keyOfParentNode = CharSequences.getPrefix(candidate, searchResult.charsMatched - searchResult.charsMatchedInNodeFound); + CharSequence keyOfNodeFound = CharSequences.concatenate(keyOfParentNode, searchResult.nodeFound.getIncomingEdge()); + return getDescendantKeyValuePairs(keyOfNodeFound, searchResult.nodeFound); + } + case INCOMPLETE_MATCH_TO_END_OF_EDGE: { + if (searchResult.charsMatched == 0) { + // Closest match is the root node, we don't consider this a match for anything... + break; + } + // Example: if we searched for COFFEE, but deepest matching node was CO, + // the results should include node CO and its descendants... + CharSequence keyOfNodeFound = CharSequences.getPrefix(candidate, searchResult.charsMatched); + return getDescendantKeyValuePairs(keyOfNodeFound, searchResult.nodeFound); + } + } + return Collections.emptySet(); + } + + @Override + public int size() { + Deque stack = new LinkedList<>(); + stack.push(this.root); + int count = 0; + while (true) { + if (stack.isEmpty()) { + return count; + } + Node current = stack.pop(); + stack.addAll(current.getOutgoingEdges()); + if (current.getValue() != null) { + count++; + } + } + } + + /** + * Atomically adds the given value to the tree, creating a node for the value as necessary. If the value is already + * stored for the same key, either overwrites the existing value, or simply returns the existing value, depending + * on the given value of the overwrite flag. + * + * @param key The key against which the value should be stored + * @param value The value to store against the key + * @param overwrite If true, should replace any existing value, if false should not replace any existing value + * @return The existing value for this key, if there was one, otherwise null + */ + Object putInternal(CharSequence key, Object value, boolean overwrite) { + if (key == null) { + throw new IllegalArgumentException("The key argument was null"); + } + if (key.length() == 0) { + throw new IllegalArgumentException("The key argument was zero-length"); + } + if (value == null) { + throw new IllegalArgumentException("The value argument was null"); + } + acquireWriteLock(); + try { + // Note we search the tree here after we have acquired the write lock... + SearchResult searchResult = searchTree(key); + SearchResult.Classification classification = searchResult.classification; + + switch (classification) { + case EXACT_MATCH: { + // Search found an exact match for all edges leading to this node. + // -> Add or update the value in the node found, by replacing + // the existing node with a new node containing the value... + + // First check if existing node has a value, and if we are allowed to overwrite it. + // Return early without overwriting if necessary... + Object existingValue = searchResult.nodeFound.getValue(); + if (!overwrite && existingValue != null) { + return existingValue; + } + // Create a replacement for the existing node containing the new value... + Node replacementNode = nodeFactory.createNode(searchResult.nodeFound.getIncomingEdge(), value, searchResult.nodeFound.getOutgoingEdges(), false); + searchResult.parentNode.updateOutgoingEdge(replacementNode); + // Return the existing value... + return existingValue; + } + case KEY_ENDS_MID_EDGE: { + // Search ran out of characters from the key while in the middle of an edge in the node. + // -> Split the node in two: Create a new parent node storing the new value, + // and a new child node holding the original value and edges from the existing node... + CharSequence keyCharsFromStartOfNodeFound = key.subSequence(searchResult.charsMatched - searchResult.charsMatchedInNodeFound, key.length()); + CharSequence commonPrefix = CharSequences.getCommonPrefix(keyCharsFromStartOfNodeFound, searchResult.nodeFound.getIncomingEdge()); + CharSequence suffixFromExistingEdge = CharSequences.subtractPrefix(searchResult.nodeFound.getIncomingEdge(), commonPrefix); + + // Create new nodes... + Node newChild = nodeFactory.createNode(suffixFromExistingEdge, searchResult.nodeFound.getValue(), searchResult.nodeFound.getOutgoingEdges(), false); + Node newParent = nodeFactory.createNode(commonPrefix, value, Collections.singletonList(newChild), false); + + // Add the new parent to the parent of the node being replaced (replacing the existing node)... + searchResult.parentNode.updateOutgoingEdge(newParent); + + // Return null for the existing value... + return null; + } + case INCOMPLETE_MATCH_TO_END_OF_EDGE: { + // Search found a difference in characters between the key and the start of all child edges leaving the + // node, the key still has trailing unmatched characters. + // -> Add a new child to the node, containing the trailing characters from the key. + + // NOTE: this is the only branch which allows an edge to be added to the root. + // (Root node's own edge is "" empty string, so is considered a prefixing edge of every key) + + // Create a new child node containing the trailing characters... + CharSequence keySuffix = key.subSequence(searchResult.charsMatched, key.length()); + Node newChild = nodeFactory.createNode(keySuffix, value, Collections.emptyList(), false); + + // Clone the current node adding the new child... + List edges = new ArrayList<>(searchResult.nodeFound.getOutgoingEdges().size() + 1); + edges.addAll(searchResult.nodeFound.getOutgoingEdges()); + edges.add(newChild); + Node clonedNode = nodeFactory.createNode(searchResult.nodeFound.getIncomingEdge(), searchResult.nodeFound.getValue(), edges, searchResult.nodeFound == root); + + // Re-add the cloned node to its parent node... + if (searchResult.nodeFound == root) { + this.root = clonedNode; + } + else { + searchResult.parentNode.updateOutgoingEdge(clonedNode); + } + + // Return null for the existing value... + return null; + } + case INCOMPLETE_MATCH_TO_MIDDLE_OF_EDGE: { + // Search found a difference in characters between the key and the characters in the middle of the + // edge in the current node, and the key still has trailing unmatched characters. + // -> Split the node in three: + // Let's call node found: NF + // (1) Create a new node N1 containing the unmatched characters from the rest of the key, and the + // value supplied to this method + // (2) Create a new node N2 containing the unmatched characters from the rest of the edge in NF, and + // copy the original edges and the value from NF unmodified into N2 + // (3) Create a new node N3, which will be the split node, containing the matched characters from + // the key and the edge, and add N1 and N2 as child nodes of N3 + // (4) Re-add N3 to the parent node of NF, effectively replacing NF in the tree + + CharSequence keyCharsFromStartOfNodeFound = key.subSequence(searchResult.charsMatched - searchResult.charsMatchedInNodeFound, key.length()); + CharSequence commonPrefix = CharSequences.getCommonPrefix(keyCharsFromStartOfNodeFound, searchResult.nodeFound.getIncomingEdge()); + CharSequence suffixFromExistingEdge = CharSequences.subtractPrefix(searchResult.nodeFound.getIncomingEdge(), commonPrefix); + CharSequence suffixFromKey = key.subSequence(searchResult.charsMatched, key.length()); + + // Create new nodes... + Node n1 = nodeFactory.createNode(suffixFromKey, value, Collections.emptyList(), false); + Node n2 = nodeFactory.createNode(suffixFromExistingEdge, searchResult.nodeFound.getValue(), searchResult.nodeFound.getOutgoingEdges(), false); + @SuppressWarnings({"NullableProblems"}) + Node n3 = nodeFactory.createNode(commonPrefix, null, Arrays.asList(n1, n2), false); + + searchResult.parentNode.updateOutgoingEdge(n3); + + // Return null for the existing value... + return null; + } + default: { + // This is a safeguard against a new enum constant being added in future. + throw new IllegalStateException("Unexpected classification for search result: " + searchResult); + } + } + } + finally { + releaseWriteLock(); + } + } + + /** + * Returns a lazy iterable which will return {@link CharSequence} keys for which the given key is a prefix. + * The results inherently will not contain duplicates (duplicate keys cannot exist in the tree). + *

+ * Note that this method internally converts {@link CharSequence}s to {@link String}s, to avoid set equality issues, + * because equals() and hashCode() are not specified by the CharSequence API contract. + */ + @SuppressWarnings({"JavaDoc"}) + Iterable getDescendantKeys(final CharSequence startKey, final Node startNode) { + return () -> new LazyIterator<>() { + final Iterator descendantNodes = lazyTraverseDescendants(startKey, startNode).iterator(); + + @Override + protected CharSequence computeNext() { + // Traverse to the next matching node in the tree and return its key and value... + while (descendantNodes.hasNext()) { + NodeKeyPair nodeKeyPair = descendantNodes.next(); + Object value = nodeKeyPair.node.getValue(); + if (value != null) { + // Dealing with a node explicitly added to tree (rather than an automatically-added split node). + + // Call the transformKeyForResult method to allow key to be transformed before returning to client. + // Used by subclasses such as ReversedRadixTree implementations... + CharSequence optionallyTransformedKey = transformKeyForResult(nodeKeyPair.key); + + // -> Convert the CharSequence to a String before returning, to avoid set equality issues, + // because equals() and hashCode() is not specified by the CharSequence API contract... + return CharSequences.toString(optionallyTransformedKey); + } + } + // Finished traversing the tree, no more matching nodes to return... + return endOfData(); + } + }; + } + + /** + * Returns a lazy iterable which will return values which are associated with keys in the tree for which + * the given key is a prefix. + */ + Iterable getDescendantValues(final CharSequence startKey, final Node startNode) { + return () -> new LazyIterator() { + final Iterator descendantNodes = lazyTraverseDescendants(startKey, startNode).iterator(); + + @Override + protected O computeNext() { + // Traverse to the next matching node in the tree and return its key and value... + while (descendantNodes.hasNext()) { + NodeKeyPair nodeKeyPair = descendantNodes.next(); + Object value = nodeKeyPair.node.getValue(); + if (value != null) { + // Dealing with a node explicitly added to tree (rather than an automatically-added split node). + + // We have to cast to generic type here, because Node objects are not generically typed. + // Background: Node objects are not generically typed, because arrays can't be generically typed, + // and we use arrays in nodes. We choose to cast here (in wrapper logic around the tree) rather than + // pollute the already-complex tree manipulation logic with casts. + @SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"}) + O valueTyped = (O)value; + return valueTyped; + } + } + // Finished traversing the tree, no more matching nodes to return... + return endOfData(); + } + }; + } + + /** + * Returns a lazy iterable which will return {@link KeyValuePair} objects each containing a key and a value, + * for which the given key is a prefix of the key in the {@link KeyValuePair}. These results inherently will not + * contain duplicates (duplicate keys cannot exist in the tree). + *

+ * Note that this method internally converts {@link CharSequence}s to {@link String}s, to avoid set equality issues, + * because equals() and hashCode() are not specified by the CharSequence API contract. + */ + Iterable> getDescendantKeyValuePairs(final CharSequence startKey, final Node startNode) { + return () -> new LazyIterator<>() { + final Iterator descendantNodes = lazyTraverseDescendants(startKey, startNode).iterator(); + + @Override + protected KeyValuePair computeNext() { + // Traverse to the next matching node in the tree and return its key and value... + while (descendantNodes.hasNext()) { + NodeKeyPair nodeKeyPair = descendantNodes.next(); + Object value = nodeKeyPair.node.getValue(); + if (value != null) { + // Dealing with a node explicitly added to tree (rather than an automatically-added split node). + + // Call the transformKeyForResult method to allow key to be transformed before returning to client. + // Used by subclasses such as ReversedRadixTree implementations... + CharSequence optionallyTransformedKey = transformKeyForResult(nodeKeyPair.key); + + // -> Convert the CharSequence to a String before returning, to avoid set equality issues, + // because equals() and hashCode() is not specified by the CharSequence API contract... + String keyString = CharSequences.toString(optionallyTransformedKey); + return new KeyValuePairImpl<>(keyString, value); + } + } + // Finished traversing the tree, no more matching nodes to return... + return endOfData(); + } + }; + } + + /** + * Implementation of the {@link KeyValuePair} interface. + */ + public static class KeyValuePairImpl implements KeyValuePair { + + final String key; + final O value; + + /** + * Constructor. + * + * Implementation node: This constructor currently requires the key to be supplied as a {@link String} + * - this is to allow reliable testing of object equality; the alternative {@link CharSequence} + * does not specify a contract for {@link Object#equals(Object)}. + * + * @param key The key as a string + * @param value The value + */ + public KeyValuePairImpl(String key, Object value) { + this.key = key; + // We have to cast to generic type here, because Node objects are not generically typed. + // Background: Node objects are not generically typed, because arrays can't be generically typed, + // and we use arrays in nodes. We choose to cast here (in wrapper logic around the tree) rather than + // pollute the already-complex tree manipulation logic with casts. + @SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"}) + O valueTyped = (O)value; + this.value = valueTyped; + } + + @Override + public CharSequence getKey() { + return key; + } + + @Override + public O getValue() { + return value; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + KeyValuePairImpl that = (KeyValuePairImpl) o; + return key.equals(that.key); + + } + + @Override + public int hashCode() { + return key.hashCode(); + } + + @Override + public String toString() { + return "(" + key + ", " + value + ")"; + } + } + + /** + * Traverses the tree using depth-first, preordered traversal, starting at the given node, using lazy evaluation + * such that the next node is only determined when next() is called on the iterator returned. + * The traversal algorithm uses iteration instead of recursion to allow deep trees to be traversed without + * requiring large JVM stack sizes. + *

+ * Each node that is encountered is returned from the iterator along with a key associated with that node, + * in a NodeKeyPair object. The key will be prefixed by the given start key, and will be generated by appending + * to the start key the edges traversed along the path to that node from the start node. + * + * @param startKey The key which matches the given start node + * @param startNode The start node + * @return An iterator which when iterated traverses the tree using depth-first, preordered traversal, + * starting at the given start node + */ + protected Iterable lazyTraverseDescendants(final CharSequence startKey, final Node startNode) { + return () -> new LazyIterator<>() { + final Deque stack = new LinkedList<>(); + { + stack.push(new NodeKeyPair(startNode, startKey)); + } + + @Override + protected NodeKeyPair computeNext() { + if (stack.isEmpty()) { + return endOfData(); + } + NodeKeyPair current = stack.pop(); + List childNodes = current.node.getOutgoingEdges(); + + // -> Iterate child nodes in reverse order and so push them onto the stack in reverse order, + // to counteract that pushing them onto the stack alone would otherwise reverse their processing order. + // This ensures that we actually process nodes in ascending alphabetical order. + for (int i = childNodes.size(); i > 0; i--) { + Node child = childNodes.get(i - 1); + stack.push(new NodeKeyPair(child, CharSequences.concatenate(current.key, child.getIncomingEdge()))); + } + return current; + } + }; + } + + + /** + * Encapsulates a node and its associated key. Used internally by {@link #lazyTraverseDescendants}. + */ + protected static class NodeKeyPair { + public final Node node; + public final CharSequence key; + + public NodeKeyPair(Node node, CharSequence key) { + this.node = node; + this.key = key; + } + } + + /** + * A hook method which may be overridden by subclasses, to transform a key just before it is returned to + * the application, for example by the {@link #getKeysStartingWith(CharSequence)} or the + * {@link #getKeyValuePairsForKeysStartingWith(CharSequence)} methods. + *

+ * This hook is expected to be used by + * implementations where keys are stored in the tree in reverse order but results should be returned in normal + * order. + *

+ * This default implementation simply returns the given key unmodified. + * + * @param rawKey The raw key as stored in the tree + * @return A transformed version of the key + */ + protected CharSequence transformKeyForResult(CharSequence rawKey) { + return rawKey; + } + + + /** + * Traverses the tree and finds the node which matches the longest prefix of the given key. + *

+ * The node returned might be an exact match for the key, in which case {@link SearchResult#charsMatched} + * will equal the length of the key. + *

+ * The node returned might be an inexact match for the key, in which case {@link SearchResult#charsMatched} + * will be less than the length of the key. + *

+ * There are two types of inexact match: + *

    + *
  • + * An inexact match which ends evenly at the boundary between a node and its children (the rest of the key + * not matching any children at all). In this case if we we wanted to add nodes to the tree to represent the + * rest of the key, we could simply add child nodes to the node found. + *
  • + *
  • + * An inexact match which ends in the middle of a the characters for an edge stored in a node (the key + * matching only the first few characters of the edge). In this case if we we wanted to add nodes to the + * tree to represent the rest of the key, we would have to split the node (let's call this node found: NF): + *
      + *
    1. + * Create a new node (N1) which will be the split node, containing the matched characters from the + * start of the edge in NF + *
    2. + *
    3. + * Create a new node (N2) which will contain the unmatched characters from the rest of the edge + * in NF, and copy the original edges from NF unmodified into N2 + *
    4. + *
    5. + * Create a new node (N3) which will be the new branch, containing the unmatched characters from + * the rest of the key + *
    6. + *
    7. + * Add N2 as a child of N1 + *
    8. + *
    9. + * Add N3 as a child of N1 + *
    10. + *
    11. + * In the parent node of NF, replace the edge pointing to NF with an edge pointing instead + * to N1. If we do this step atomically, reading threads are guaranteed to never see "invalid" + * data, only either the old data or the new data + *
    12. + *
    + *
  • + *
+ * The {@link SearchResult#classification} is an enum value based on its classification of the + * match according to the descriptions above. + * + * @param key a key for which the node matching the longest prefix of the key is required + * @return A {@link SearchResult} object which contains the node matching the longest prefix of the key, its + * parent node, the number of characters of the key which were matched in total and within the edge of the + * matched node, and a {@link SearchResult#classification} of the match as described above + */ + SearchResult searchTree(CharSequence key) { + Node parentNodesParent = null; + Node parentNode = null; + Node currentNode = root; + int charsMatched = 0, charsMatchedInNodeFound = 0; + + final int keyLength = key.length(); + outer_loop: while (charsMatched < keyLength) { + Node nextNode = currentNode.getOutgoingEdge(key.charAt(charsMatched)); + if (nextNode == null) { + // Next node is a dead end... + //noinspection UnnecessaryLabelOnBreakStatement + break outer_loop; + } + + parentNodesParent = parentNode; + parentNode = currentNode; + currentNode = nextNode; + charsMatchedInNodeFound = 0; + CharSequence currentNodeEdgeCharacters = currentNode.getIncomingEdge(); + for (int i = 0, numEdgeChars = currentNodeEdgeCharacters.length(); i < numEdgeChars && charsMatched < keyLength; i++) { + if (currentNodeEdgeCharacters.charAt(i) != key.charAt(charsMatched)) { + // Found a difference in chars between character in key and a character in current node. + // Current node is the deepest match (inexact match).... + break outer_loop; + } + charsMatched++; + charsMatchedInNodeFound++; + } + } + return new SearchResult(key, currentNode, charsMatched, charsMatchedInNodeFound, parentNode, parentNodesParent); + } + + /** + * Encapsulates results of searching the tree for a node for which a given key is a prefix. Encapsulates the node + * found, its parent node, its parent's parent node, and the number of characters matched in the current node and + * in total. + *

+ * Also classifies the search result so that algorithms in methods which use this SearchResult, when adding nodes + * and removing nodes from the tree, can select appropriate strategies based on the classification. + */ + static class SearchResult { + final CharSequence key; + final Node nodeFound; + final int charsMatched; + final int charsMatchedInNodeFound; + final Node parentNode; + final Node parentNodesParent; + final Classification classification; + + enum Classification { + EXACT_MATCH, + INCOMPLETE_MATCH_TO_END_OF_EDGE, + INCOMPLETE_MATCH_TO_MIDDLE_OF_EDGE, + KEY_ENDS_MID_EDGE, + INVALID // INVALID is never used, except in unit testing + } + + SearchResult(CharSequence key, Node nodeFound, int charsMatched, int charsMatchedInNodeFound, Node parentNode, Node parentNodesParent) { + this.key = key; + this.nodeFound = nodeFound; + this.charsMatched = charsMatched; + this.charsMatchedInNodeFound = charsMatchedInNodeFound; + this.parentNode = parentNode; + this.parentNodesParent = parentNodesParent; + + // Classify this search result... + this.classification = classify(key, nodeFound, charsMatched, charsMatchedInNodeFound); + } + + protected Classification classify(CharSequence key, Node nodeFound, int charsMatched, int charsMatchedInNodeFound) { + if (charsMatched == key.length()) { + if (charsMatchedInNodeFound == nodeFound.getIncomingEdge().length()) { + return Classification.EXACT_MATCH; + } + else if (charsMatchedInNodeFound < nodeFound.getIncomingEdge().length()) { + return Classification.KEY_ENDS_MID_EDGE; + } + } + else if (charsMatched < key.length()) { + if (charsMatchedInNodeFound == nodeFound.getIncomingEdge().length()) { + return Classification.INCOMPLETE_MATCH_TO_END_OF_EDGE; + } + else if (charsMatchedInNodeFound < nodeFound.getIncomingEdge().length()) { + return Classification.INCOMPLETE_MATCH_TO_MIDDLE_OF_EDGE; + } + } + throw new IllegalStateException("Unexpected failure to classify SearchResult: " + this); + } + + @Override + public String toString() { + return "SearchResult{" + + "key=" + key + + ", nodeFound=" + nodeFound + + ", charsMatched=" + charsMatched + + ", charsMatchedInNodeFound=" + charsMatchedInNodeFound + + ", parentNode=" + parentNode + + ", parentNodesParent=" + parentNodesParent + + ", classification=" + classification + + '}'; + } + } + + public Node getNode() { + return root; + } + +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/RadixTree.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/RadixTree.java new file mode 100644 index 0000000..c2940f5 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/RadixTree.java @@ -0,0 +1,145 @@ +package org.xbib.datastructures.trie.concurrent; + +import org.xbib.datastructures.trie.concurrent.util.KeyValuePair; + +/** + * API of a radix tree, that is a tree which allows values to be looked up based on prefixes of the keys + * with which they were associated, as well as based on exact matches for keys. A radix tree essentially allows + * "equals" and "starts with" lookup. + *

+ * See documentation on each method for details. + * + * @param The type of the values associated with keys in the tree + */ +public interface RadixTree { + + /** + * Associates the given value with the given key; replacing any previous value associated with the key. + * Returns the previous value associated with the key, if any. + *

+ * This operation is performed atomically. + * + * @param key The key with which the specified value should be associated + * @param value The value to associate with the key, which cannot be null + * @return The previous value associated with the key, if there was one, otherwise null + */ + O put(CharSequence key, O value); + + /** + * If a value is not already associated with the given key in the tree, associates the given value with the + * key; otherwise if an existing value is already associated, returns the existing value and does not overwrite it. + *

+ * This operation is performed atomically. + * + * @param key The key with which the specified value should be associated + * @param value The value to associate with the key, which cannot be null + * @return The existing value associated with the key, if there was one; otherwise null in which case the new + * value was successfully associated + */ + O putIfAbsent(CharSequence key, O value); + + /** + * Removes the value associated with the given key (exact match). + * If no value is associated with the key, does nothing. + * + * @param key The key for which an associated value should be removed + * @return True if a value was removed (and therefore was associated with the key), false if no value was + * associated/removed + */ + boolean remove(CharSequence key); + + /** + * Returns the value associated with the given key (exact match), or returns null if no such value + * is associated with the key. + * + * @param key The key with which a sought value might be associated + * @return The value associated with the given key (exact match), or null if no value was associated with the key + */ + O getValueForExactKey(CharSequence key); + + /** + * Returns a lazy iterable which returns the set of keys in the tree which start with the given prefix. + *

+ * This is inclusive - if the given prefix is an exact match for a key in the tree, that key is also + * returned. + * + * @param prefix A prefix of sought keys in the tree + * @return The set of keys in the tree which start with the given prefix, inclusive + */ + Iterable getKeysStartingWith(CharSequence prefix); + + /** + * Returns a lazy iterable which returns the set of values associated with keys in the tree which start with the + * given prefix. + *

+ * This is inclusive - if the given prefix is an exact match for a key in the tree, the value associated + * with that key is also returned. + *

+ * Note that although the same value might originally have been associated with multiple keys, the set returned + * does not contain duplicates (as determined by the value objects' implementation of {@link Object#equals(Object)}). + * + * @param prefix A prefix of keys in the tree for which associated values are sought + * @return The set of values associated with keys in the tree which start with the given prefix, inclusive + */ + Iterable getValuesForKeysStartingWith(CharSequence prefix); + + /** + * Returns a lazy iterable which returns the set of {@link KeyValuePair}s for keys and their associated values + * in the tree, where the keys start with the given prefix. + *

+ * This is inclusive - if the given prefix is an exact match for a key in the tree, the {@link KeyValuePair} + * for that key is also returned. + * + * @param prefix A prefix of keys in the tree for which associated {@link KeyValuePair}s are sought + * @return The set of {@link KeyValuePair}s for keys in the tree which start with the given prefix, inclusive + */ + Iterable> getKeyValuePairsForKeysStartingWith(CharSequence prefix); + + /** + * Returns a lazy iterable which returns the set of keys in the tree which are the closest match for the given + * candidate key. + *

+ * Example:
+ * Tree contains: {@code Ford Focus}, {@code Ford Mondeo}, {@code BMW M3}
+ * getClosestKeys("Ford F150") -> returns {@code Ford Focus}, {@code Ford Mondeo}
+ *

+ * This is inclusive - if the given candidate is an exact match for a key in the tree, that key is also + * returned. + * + * @param candidate A candidate key + * @return The set of keys in the tree which most closely match the candidate key, inclusive + */ + Iterable getClosestKeys(CharSequence candidate); + + /** + * Returns a lazy iterable which returns the set of values associated with keys in the tree which are the closest + * match for the given candidate key. + *

+ * See {#getClosestKeys} for more details. + * + * @param candidate A candidate key + * @return The set of values associated with keys in the tree which most closely match the candidate key, inclusive + */ + Iterable getValuesForClosestKeys(CharSequence candidate); + + /** + * Returns a lazy iterable which returns the set of {@link KeyValuePair}s for keys and their associated values in + * the tree which are the closest match for the given candidate key. + *

+ * See {#getClosestKeys} for more details. + * + * @param candidate A candidate key + * @return The set of {@link KeyValuePair}s for keys and their associated values in the tree which most closely + * match the candidate key, inclusive + */ + Iterable> getKeyValuePairsForClosestKeys(CharSequence candidate); + + /** + * Counts the number of keys/values stored in the tree. + *

+ * In the current implementation, this is an expensive operation, having O(n) time complexity. + * + * @return The number of keys/values stored in the tree + */ + int size(); +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/AtomicReferenceArrayListAdapter.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/AtomicReferenceArrayListAdapter.java new file mode 100644 index 0000000..0d724fc --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/AtomicReferenceArrayListAdapter.java @@ -0,0 +1,30 @@ +package org.xbib.datastructures.trie.concurrent.util; + +import java.util.AbstractList; +import java.util.concurrent.atomic.AtomicReferenceArray; + +/** + * Wraps an {@link AtomicReferenceArray} to implement read-only methods of the {@link java.util.List} interface. + *

+ * This enables binary search of an {@link AtomicReferenceArray}, using + * {@link java.util.Collections#binarySearch(java.util.List, Object)}. + */ +@SuppressWarnings("serial") +public class AtomicReferenceArrayListAdapter extends AbstractList { + + private final AtomicReferenceArray atomicReferenceArray; + + public AtomicReferenceArrayListAdapter(AtomicReferenceArray atomicReferenceArray) { + this.atomicReferenceArray = atomicReferenceArray; + } + + @Override + public T get(int index) { + return atomicReferenceArray.get(index); + } + + @Override + public int size() { + return atomicReferenceArray.length(); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharArrayNodeDefault.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharArrayNodeDefault.java new file mode 100644 index 0000000..0447221 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharArrayNodeDefault.java @@ -0,0 +1,117 @@ +package org.xbib.datastructures.trie.concurrent.util; + +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.atomic.AtomicReferenceArray; + +/** + * A non-optimized implementation of the {@link Node} interface. Stores all variables and supports all behaviours + * required by the tree, but not very memory efficient. + *

+ * See {@link NodeFactory} for documentation on how alternative + * node implementations can be created to reduce memory overhead. See the {@link Node} interface for details on how + * to write memory-efficient nodes. + *

+ * This implementation stores references to child nodes in an {@link AtomicReferenceArray}, in ascending sorted order + * of the first character of the edges which child nodes define. + *

+ * The {@link #getOutgoingEdge(Character)} method uses binary search to locate a requested node, given the first character + * of an edge indicated. The node is then read and returned atomically from the {@link AtomicReferenceArray}. + *

+ * The {@link #updateOutgoingEdge(Node)} method ensures that any + * attempt to update a reference to a child node preserves the constraints defined in the {@link Node} interface. New + * child nodes are written atomically to the {@link AtomicReferenceArray}. + *

+ * The constraints defined in the {@link Node} interface ensure that the {@link AtomicReferenceArray} always remains in + * ascending sorted order regardless of modifications performed concurrently, as long as the modifications comply with + * the constraints. This node enforces those constraints. + * + * @author Niall Gallagher + */ +public class CharArrayNodeDefault implements Node { + + + // Characters in the edge arriving at this node from a parent node. + // Once assigned, we never modify this... + private final char[] incomingEdgeCharArray; + + // References to child nodes representing outgoing edges from this node. + // Once assigned we never add or remove references, but we do update existing references to point to new child + // nodes provided new edges start with the same first character... + private final AtomicReferenceArray outgoingEdges; + + // A read-only List wrapper around the outgoingEdges AtomicReferenceArray... + private final List outgoingEdgesAsList; + + // An arbitrary value which the application associates with a key matching the path to this node in the tree. + // This value can be null... + private final Object value; + + public CharArrayNodeDefault(CharSequence edgeCharSequence, Object value, List outgoingEdges) { + Node[] childNodeArray = outgoingEdges.toArray(new Node[outgoingEdges.size()]); + // Sort the child nodes... + Arrays.sort(childNodeArray, new NodeCharacterComparator()); + this.outgoingEdges = new AtomicReferenceArray(childNodeArray); + this.incomingEdgeCharArray = CharSequences.toCharArray(edgeCharSequence); + this.value = value; + this.outgoingEdgesAsList = new AtomicReferenceArrayListAdapter(this.outgoingEdges); + } + + @Override + public CharSequence getIncomingEdge() { + return CharSequences.fromCharArray(incomingEdgeCharArray); + } + + @Override + public Character getIncomingEdgeFirstCharacter() { + return incomingEdgeCharArray[0]; + } + + @Override + public Object getValue() { + return value; + } + + @Override + public Node getOutgoingEdge(Character edgeFirstCharacter) { + // Binary search for the index of the node whose edge starts with the given character. + // Note that this binary search is safe in the face of concurrent modification due to constraints + // we enforce on use of the array, as documented in the binarySearchForEdge method... + int index = NodeUtil.binarySearchForEdge(outgoingEdges, edgeFirstCharacter); + if (index < 0) { + // No such edge exists... + return null; + } + // Atomically return the child node at this index... + return outgoingEdges.get(index); + } + + @Override + public void updateOutgoingEdge(Node childNode) { + // Binary search for the index of the node whose edge starts with the given character. + // Note that this binary search is safe in the face of concurrent modification due to constraints + // we enforce on use of the array, as documented in the binarySearchForEdge method... + int index = NodeUtil.binarySearchForEdge(outgoingEdges, childNode.getIncomingEdgeFirstCharacter()); + if (index < 0) { + throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode); + } + // Atomically update the child node at this index... + outgoingEdges.set(index, childNode); + } + + @Override + public List getOutgoingEdges() { + return outgoingEdgesAsList; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("Node{"); + sb.append("edge=").append(incomingEdgeCharArray); + sb.append(", value=").append(value); + sb.append(", edges=").append(getOutgoingEdges()); + sb.append("}"); + return sb.toString(); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharArrayNodeLeafNullValue.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharArrayNodeLeafNullValue.java new file mode 100644 index 0000000..445108b --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharArrayNodeLeafNullValue.java @@ -0,0 +1,61 @@ +package org.xbib.datastructures.trie.concurrent.util; + +import java.util.Collections; +import java.util.List; + +/** + * Stores only incoming edge as a {@code char[]}. + * Returns {@code null} for the value. Does not store any outgoing edges. + */ +public class CharArrayNodeLeafNullValue implements Node { + + + // Characters in the edge arriving at this node from a parent node. + // Once assigned, we never modify this... + private final char[] incomingEdgeCharArray; + + public CharArrayNodeLeafNullValue(CharSequence edgeCharSequence) { + this.incomingEdgeCharArray = CharSequences.toCharArray(edgeCharSequence); + } + + @Override + public CharSequence getIncomingEdge() { + return CharSequences.fromCharArray(incomingEdgeCharArray); + } + + @Override + public Character getIncomingEdgeFirstCharacter() { + return incomingEdgeCharArray[0]; + } + + @Override + public Object getValue() { + return null; + } + + @Override + public Node getOutgoingEdge(Character edgeFirstCharacter) { + return null; + } + + @Override + public void updateOutgoingEdge(Node childNode) { + throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode); + } + + @Override + public List getOutgoingEdges() { + return Collections.emptyList(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("Node{"); + sb.append("edge=").append(incomingEdgeCharArray); + sb.append(", value=null"); + sb.append(", edges=[]"); + sb.append("}"); + return sb.toString(); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharArrayNodeLeafVoidValue.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharArrayNodeLeafVoidValue.java new file mode 100644 index 0000000..6badbb5 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharArrayNodeLeafVoidValue.java @@ -0,0 +1,60 @@ +package org.xbib.datastructures.trie.concurrent.util; + +import java.util.Collections; +import java.util.List; + +/** + * Stores only incoming edge as a {@code char[]}. + * Returns {@link VoidValue} for the value. Does not store any outgoing edges. + */ +public class CharArrayNodeLeafVoidValue implements Node { + + // Characters in the edge arriving at this node from a parent node. + // Once assigned, we never modify this... + private final char[] incomingEdgeCharArray; + + public CharArrayNodeLeafVoidValue(CharSequence edgeCharSequence) { + this.incomingEdgeCharArray = CharSequences.toCharArray(edgeCharSequence); + } + + @Override + public CharSequence getIncomingEdge() { + return CharSequences.fromCharArray(incomingEdgeCharArray); + } + + @Override + public Character getIncomingEdgeFirstCharacter() { + return incomingEdgeCharArray[0]; + } + + @Override + public Object getValue() { + return VoidValue.SINGLETON; + } + + @Override + public Node getOutgoingEdge(Character edgeFirstCharacter) { + return null; + } + + @Override + public void updateOutgoingEdge(Node childNode) { + throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode); + } + + @Override + public List getOutgoingEdges() { + return Collections.emptyList(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("Node{"); + sb.append("edge=").append(incomingEdgeCharArray); + sb.append(", value=").append(VoidValue.SINGLETON); + sb.append(", edges=[]"); + sb.append("}"); + return sb.toString(); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharArrayNodeLeafWithValue.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharArrayNodeLeafWithValue.java new file mode 100644 index 0000000..d2bee7d --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharArrayNodeLeafWithValue.java @@ -0,0 +1,66 @@ +package org.xbib.datastructures.trie.concurrent.util; + +import java.util.Collections; +import java.util.List; + +/** + * Stores only incoming edge as a {@code char[]}, and a reference to a value. Does not store any outgoing + * edges. + */ +public class CharArrayNodeLeafWithValue implements Node { + + + // Characters in the edge arriving at this node from a parent node. + // Once assigned, we never modify this... + private final char[] incomingEdgeCharArray; + + // An arbitrary value which the application associates with a key matching the path to this node in the tree. + // This value can be null... + private final Object value; + + public CharArrayNodeLeafWithValue(CharSequence edgeCharSequence, Object value) { + this.incomingEdgeCharArray = CharSequences.toCharArray(edgeCharSequence); + this.value = value; + } + + @Override + public CharSequence getIncomingEdge() { + return CharSequences.fromCharArray(incomingEdgeCharArray); + } + + @Override + public Character getIncomingEdgeFirstCharacter() { + return incomingEdgeCharArray[0]; + } + + @Override + public Object getValue() { + return value; + } + + @Override + public Node getOutgoingEdge(Character edgeFirstCharacter) { + return null; + } + + @Override + public void updateOutgoingEdge(Node childNode) { + throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode); + } + + @Override + public List getOutgoingEdges() { + return Collections.emptyList(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("Node{"); + sb.append("edge=").append(incomingEdgeCharArray); + sb.append(", value=").append(value); + sb.append(", edges=[]"); + sb.append("}"); + return sb.toString(); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharArrayNodeNonLeafNullValue.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharArrayNodeNonLeafNullValue.java new file mode 100644 index 0000000..c2e8dbc --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharArrayNodeNonLeafNullValue.java @@ -0,0 +1,92 @@ +package org.xbib.datastructures.trie.concurrent.util; + +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.atomic.AtomicReferenceArray; + +/** + * Stores incoming edge as a {@code char[]} and outgoing edges as an {@link AtomicReferenceArray}. Does not store a + * value and returns {@code null} for the value. + */ +public class CharArrayNodeNonLeafNullValue implements Node { + + + // Characters in the edge arriving at this node from a parent node. + // Once assigned, we never modify this... + private final char[] incomingEdgeCharArray; + + // References to child nodes representing outgoing edges from this node. + // Once assigned we never add or remove references, but we do update existing references to point to new child + // nodes provided new edges start with the same first character... + private final AtomicReferenceArray outgoingEdges; + + // A read-only List wrapper around the outgoingEdges AtomicReferenceArray... + private final List outgoingEdgesAsList; + + public CharArrayNodeNonLeafNullValue(CharSequence edgeCharSequence, List outgoingEdges) { + Node[] childNodeArray = outgoingEdges.toArray(new Node[outgoingEdges.size()]); + // Sort the child nodes... + Arrays.sort(childNodeArray, new NodeCharacterComparator()); + this.outgoingEdges = new AtomicReferenceArray(childNodeArray); + this.incomingEdgeCharArray = CharSequences.toCharArray(edgeCharSequence); + this.outgoingEdgesAsList = new AtomicReferenceArrayListAdapter(this.outgoingEdges); + } + + @Override + public CharSequence getIncomingEdge() { + return CharSequences.fromCharArray(incomingEdgeCharArray); + } + + @Override + public Character getIncomingEdgeFirstCharacter() { + return incomingEdgeCharArray[0]; + } + + @Override + public Object getValue() { + return null; + } + + @Override + public Node getOutgoingEdge(Character edgeFirstCharacter) { + // Binary search for the index of the node whose edge starts with the given character. + // Note that this binary search is safe in the face of concurrent modification due to constraints + // we enforce on use of the array, as documented in the binarySearchForEdge method... + int index = NodeUtil.binarySearchForEdge(outgoingEdges, edgeFirstCharacter); + if (index < 0) { + // No such edge exists... + return null; + } + // Atomically return the child node at this index... + return outgoingEdges.get(index); + } + + @Override + public void updateOutgoingEdge(Node childNode) { + // Binary search for the index of the node whose edge starts with the given character. + // Note that this binary search is safe in the face of concurrent modification due to constraints + // we enforce on use of the array, as documented in the binarySearchForEdge method... + int index = NodeUtil.binarySearchForEdge(outgoingEdges, childNode.getIncomingEdgeFirstCharacter()); + if (index < 0) { + throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode); + } + // Atomically update the child node at this index... + outgoingEdges.set(index, childNode); + } + + @Override + public List getOutgoingEdges() { + return outgoingEdgesAsList; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("Node{"); + sb.append("edge=").append(incomingEdgeCharArray); + sb.append(", value=null"); + sb.append(", edges=").append(getOutgoingEdges()); + sb.append("}"); + return sb.toString(); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharArrayNodeNonLeafVoidValue.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharArrayNodeNonLeafVoidValue.java new file mode 100644 index 0000000..c7d90e7 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharArrayNodeNonLeafVoidValue.java @@ -0,0 +1,91 @@ +package org.xbib.datastructures.trie.concurrent.util; + +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.atomic.AtomicReferenceArray; + +/** + * Stores incoming edge as a {@code char[]} and outgoing edges as an {@link AtomicReferenceArray}. Does not store a + * value and returns {@link VoidValue} for the value. + */ +public class CharArrayNodeNonLeafVoidValue implements Node { + + // Characters in the edge arriving at this node from a parent node. + // Once assigned, we never modify this... + private final char[] incomingEdgeCharArray; + + // References to child nodes representing outgoing edges from this node. + // Once assigned we never add or remove references, but we do update existing references to point to new child + // nodes provided new edges start with the same first character... + private final AtomicReferenceArray outgoingEdges; + + // A read-only List wrapper around the outgoingEdges AtomicReferenceArray... + private final List outgoingEdgesAsList; + + public CharArrayNodeNonLeafVoidValue(CharSequence edgeCharSequence, List outgoingEdges) { + Node[] childNodeArray = outgoingEdges.toArray(new Node[outgoingEdges.size()]); + // Sort the child nodes... + Arrays.sort(childNodeArray, new NodeCharacterComparator()); + this.outgoingEdges = new AtomicReferenceArray(childNodeArray); + this.incomingEdgeCharArray = CharSequences.toCharArray(edgeCharSequence); + this.outgoingEdgesAsList = new AtomicReferenceArrayListAdapter(this.outgoingEdges); + } + + @Override + public CharSequence getIncomingEdge() { + return CharSequences.fromCharArray(incomingEdgeCharArray); + } + + @Override + public Character getIncomingEdgeFirstCharacter() { + return incomingEdgeCharArray[0]; + } + + @Override + public Object getValue() { + return VoidValue.SINGLETON; + } + + @Override + public Node getOutgoingEdge(Character edgeFirstCharacter) { + // Binary search for the index of the node whose edge starts with the given character. + // Note that this binary search is safe in the face of concurrent modification due to constraints + // we enforce on use of the array, as documented in the binarySearchForEdge method... + int index = NodeUtil.binarySearchForEdge(outgoingEdges, edgeFirstCharacter); + if (index < 0) { + // No such edge exists... + return null; + } + // Atomically return the child node at this index... + return outgoingEdges.get(index); + } + + @Override + public void updateOutgoingEdge(Node childNode) { + // Binary search for the index of the node whose edge starts with the given character. + // Note that this binary search is safe in the face of concurrent modification due to constraints + // we enforce on use of the array, as documented in the binarySearchForEdge method... + int index = NodeUtil.binarySearchForEdge(outgoingEdges, childNode.getIncomingEdgeFirstCharacter()); + if (index < 0) { + throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode); + } + // Atomically update the child node at this index... + outgoingEdges.set(index, childNode); + } + + @Override + public List getOutgoingEdges() { + return outgoingEdgesAsList; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("Node{"); + sb.append("edge=").append(incomingEdgeCharArray); + sb.append(", value=").append(VoidValue.SINGLETON); + sb.append(", edges=").append(getOutgoingEdges()); + sb.append("}"); + return sb.toString(); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequenceNodeDefault.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequenceNodeDefault.java new file mode 100644 index 0000000..cee7978 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequenceNodeDefault.java @@ -0,0 +1,115 @@ +package org.xbib.datastructures.trie.concurrent.util; + +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.atomic.AtomicReferenceArray; + +/** + * A implementation of the {@link Node} interface which stores the incoming edge as a {@link CharSequence} (a + * view onto the original key) rather than copying the edge into a character array. Stores all variables and + * supports all behaviours required by the tree, but still could be more memory efficient. + *

+ * See {@link NodeFactory} for documentation on how alternative + * node implementations can be created to reduce memory overhead. See the {@link Node} interface for details on how + * to write memory-efficient nodes. + *

+ * This implementation stores references to child nodes in an {@link AtomicReferenceArray}, in ascending sorted order + * of the first character of the edges which child nodes define. + *

+ * The {@link #getOutgoingEdge(Character)} method uses binary search to locate a requested node, given the first character + * of an edge indicated. The node is then read and returned atomically from the {@link AtomicReferenceArray}. + *

+ * The {@link #updateOutgoingEdge(Node)} method ensures that any + * attempt to update a reference to a child node preserves the constraints defined in the {@link Node} interface. New + * child nodes are written atomically to the {@link java.util.concurrent.atomic.AtomicReferenceArray}. + *

+ * The constraints defined in the {@link Node} interface ensure that the {@link AtomicReferenceArray} always remains in + * ascending sorted order regardless of modifications performed concurrently, as long as the modifications comply with + * the constraints. This node enforces those constraints. + */ +public class CharSequenceNodeDefault implements Node { + + // Characters in the edge arriving at this node from a parent node. + // Once assigned, we never modify this... + private final CharSequence incomingEdgeCharSequence; + + // References to child nodes representing outgoing edges from this node. + // Once assigned we never add or remove references, but we do update existing references to point to new child + // nodes provided new edges start with the same first character... + private final AtomicReferenceArray outgoingEdges; + + // A read-only List wrapper around the outgoingEdges AtomicReferenceArray... + private final List outgoingEdgesAsList; + + // An arbitrary value which the application associates with a key matching the path to this node in the tree. + // This value can be null... + private final Object value; + + public CharSequenceNodeDefault(CharSequence edgeCharSequence, Object value, List outgoingEdges) { + Node[] childNodeArray = outgoingEdges.toArray(new Node[outgoingEdges.size()]); + // Sort the child nodes... + Arrays.sort(childNodeArray, new NodeCharacterComparator()); + this.outgoingEdges = new AtomicReferenceArray(childNodeArray); + this.incomingEdgeCharSequence = edgeCharSequence; + this.value = value; + this.outgoingEdgesAsList = new AtomicReferenceArrayListAdapter(this.outgoingEdges); + } + + @Override + public CharSequence getIncomingEdge() { + return incomingEdgeCharSequence; + } + + @Override + public Character getIncomingEdgeFirstCharacter() { + return incomingEdgeCharSequence.charAt(0); + } + + @Override + public Object getValue() { + return value; + } + + @Override + public Node getOutgoingEdge(Character edgeFirstCharacter) { + // Binary search for the index of the node whose edge starts with the given character. + // Note that this binary search is safe in the face of concurrent modification due to constraints + // we enforce on use of the array, as documented in the binarySearchForEdge method... + int index = NodeUtil.binarySearchForEdge(outgoingEdges, edgeFirstCharacter); + if (index < 0) { + // No such edge exists... + return null; + } + // Atomically return the child node at this index... + return outgoingEdges.get(index); + } + + @Override + public void updateOutgoingEdge(Node childNode) { + // Binary search for the index of the node whose edge starts with the given character. + // Note that this binary search is safe in the face of concurrent modification due to constraints + // we enforce on use of the array, as documented in the binarySearchForEdge method... + int index = NodeUtil.binarySearchForEdge(outgoingEdges, childNode.getIncomingEdgeFirstCharacter()); + if (index < 0) { + throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode); + } + // Atomically update the child node at this index... + outgoingEdges.set(index, childNode); + } + + @Override + public List getOutgoingEdges() { + return outgoingEdgesAsList; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("Node{"); + sb.append("edge=").append(incomingEdgeCharSequence); + sb.append(", value=").append(value); + sb.append(", edges=").append(getOutgoingEdges()); + sb.append("}"); + return sb.toString(); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequenceNodeLeafNullValue.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequenceNodeLeafNullValue.java new file mode 100644 index 0000000..1b60da7 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequenceNodeLeafNullValue.java @@ -0,0 +1,61 @@ +package org.xbib.datastructures.trie.concurrent.util; + +import java.util.Collections; +import java.util.List; + +/** + * Stores only incoming edge as a {@link CharSequence} (a view onto the original key) rather than copying the + * edge into a character array. Returns {@code null} for the value. Does not store any outgoing edges. + */ +public class CharSequenceNodeLeafNullValue implements Node { + + + // Characters in the edge arriving at this node from a parent node. + // Once assigned, we never modify this... + private final CharSequence incomingEdgeCharSequence; + + public CharSequenceNodeLeafNullValue(CharSequence edgeCharSequence) { + this.incomingEdgeCharSequence = edgeCharSequence; + } + + @Override + public CharSequence getIncomingEdge() { + return incomingEdgeCharSequence; + } + + @Override + public Character getIncomingEdgeFirstCharacter() { + return incomingEdgeCharSequence.charAt(0); + } + + @Override + public Object getValue() { + return null; + } + + @Override + public Node getOutgoingEdge(Character edgeFirstCharacter) { + return null; + } + + @Override + public void updateOutgoingEdge(Node childNode) { + throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode); + } + + @Override + public List getOutgoingEdges() { + return Collections.emptyList(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("Node{"); + sb.append("edge=").append(incomingEdgeCharSequence); + sb.append(", value=null"); + sb.append(", edges=[]"); + sb.append("}"); + return sb.toString(); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequenceNodeLeafVoidValue.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequenceNodeLeafVoidValue.java new file mode 100644 index 0000000..43d5af7 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequenceNodeLeafVoidValue.java @@ -0,0 +1,61 @@ +package org.xbib.datastructures.trie.concurrent.util; + +import java.util.Collections; +import java.util.List; + +/** + * Stores only incoming edge as a {@link CharSequence} (a view onto the original key) rather than copying the + * edge into a character array. Returns {@link VoidValue} for the value. Does not store any outgoing edges. + */ +public class CharSequenceNodeLeafVoidValue implements Node { + + + // Characters in the edge arriving at this node from a parent node. + // Once assigned, we never modify this... + private final CharSequence incomingEdgeCharSequence; + + public CharSequenceNodeLeafVoidValue(CharSequence edgeCharSequence) { + this.incomingEdgeCharSequence = edgeCharSequence; + } + + @Override + public CharSequence getIncomingEdge() { + return incomingEdgeCharSequence; + } + + @Override + public Character getIncomingEdgeFirstCharacter() { + return incomingEdgeCharSequence.charAt(0); + } + + @Override + public Object getValue() { + return VoidValue.SINGLETON; + } + + @Override + public Node getOutgoingEdge(Character edgeFirstCharacter) { + return null; + } + + @Override + public void updateOutgoingEdge(Node childNode) { + throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode); + } + + @Override + public List getOutgoingEdges() { + return Collections.emptyList(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("Node{"); + sb.append("edge=").append(incomingEdgeCharSequence); + sb.append(", value=").append(VoidValue.SINGLETON); + sb.append(", edges=[]"); + sb.append("}"); + return sb.toString(); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequenceNodeLeafWithValue.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequenceNodeLeafWithValue.java new file mode 100644 index 0000000..d44e35c --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequenceNodeLeafWithValue.java @@ -0,0 +1,67 @@ +package org.xbib.datastructures.trie.concurrent.util; + +import java.util.Collections; +import java.util.List; + +/** + * Stores incoming edge as a {@link CharSequence} (a view onto the original key) rather than copying the + * edge into a character array. Also stores a reference to a value. Does not store any outgoing edges. + */ +public class CharSequenceNodeLeafWithValue implements Node { + + + // Characters in the edge arriving at this node from a parent node. + // Once assigned, we never modify this... + private final CharSequence incomingEdgeCharSequence; + + // An arbitrary value which the application associates with a key matching the path to this node in the tree. + // This value can be null... + private final Object value; + + public CharSequenceNodeLeafWithValue(CharSequence edgeCharSequence, Object value) { + // Sort the child nodes... + this.incomingEdgeCharSequence = edgeCharSequence; + this.value = value; + } + + @Override + public CharSequence getIncomingEdge() { + return incomingEdgeCharSequence; + } + + @Override + public Character getIncomingEdgeFirstCharacter() { + return incomingEdgeCharSequence.charAt(0); + } + + @Override + public Object getValue() { + return value; + } + + @Override + public Node getOutgoingEdge(Character edgeFirstCharacter) { + return null; + } + + @Override + public void updateOutgoingEdge(Node childNode) { + throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode); + } + + @Override + public List getOutgoingEdges() { + return Collections.emptyList(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("Node{"); + sb.append("edge=").append(incomingEdgeCharSequence); + sb.append(", value=").append(value); + sb.append(", edges=[]"); + sb.append("}"); + return sb.toString(); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequenceNodeNonLeafNullValue.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequenceNodeNonLeafNullValue.java new file mode 100644 index 0000000..876e15a --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequenceNodeNonLeafNullValue.java @@ -0,0 +1,93 @@ +package org.xbib.datastructures.trie.concurrent.util; + +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.atomic.AtomicReferenceArray; + +/** + * Stores incoming edge as a {@link CharSequence} (a view onto the original key) rather than copying the edge + * into a character array, and stores outgoing edges as an {@link AtomicReferenceArray}. Does not store a + * value and returns {@code null} for the value. + */ +public class CharSequenceNodeNonLeafNullValue implements Node { + + + // Characters in the edge arriving at this node from a parent node. + // Once assigned, we never modify this... + private final CharSequence incomingEdgeCharSequence; + + // References to child nodes representing outgoing edges from this node. + // Once assigned we never add or remove references, but we do update existing references to point to new child + // nodes provided new edges start with the same first character... + private final AtomicReferenceArray outgoingEdges; + + // A read-only List wrapper around the outgoingEdges AtomicReferenceArray... + private final List outgoingEdgesAsList; + + public CharSequenceNodeNonLeafNullValue(CharSequence edgeCharSequence, List outgoingEdges) { + Node[] childNodeArray = outgoingEdges.toArray(new Node[outgoingEdges.size()]); + // Sort the child nodes... + Arrays.sort(childNodeArray, new NodeCharacterComparator()); + this.outgoingEdges = new AtomicReferenceArray(childNodeArray); + this.incomingEdgeCharSequence = edgeCharSequence; + this.outgoingEdgesAsList = new AtomicReferenceArrayListAdapter(this.outgoingEdges); + } + + @Override + public CharSequence getIncomingEdge() { + return incomingEdgeCharSequence; + } + + @Override + public Character getIncomingEdgeFirstCharacter() { + return incomingEdgeCharSequence.charAt(0); + } + + @Override + public Object getValue() { + return null; + } + + @Override + public Node getOutgoingEdge(Character edgeFirstCharacter) { + // Binary search for the index of the node whose edge starts with the given character. + // Note that this binary search is safe in the face of concurrent modification due to constraints + // we enforce on use of the array, as documented in the binarySearchForEdge method... + int index = NodeUtil.binarySearchForEdge(outgoingEdges, edgeFirstCharacter); + if (index < 0) { + // No such edge exists... + return null; + } + // Atomically return the child node at this index... + return outgoingEdges.get(index); + } + + @Override + public void updateOutgoingEdge(Node childNode) { + // Binary search for the index of the node whose edge starts with the given character. + // Note that this binary search is safe in the face of concurrent modification due to constraints + // we enforce on use of the array, as documented in the binarySearchForEdge method... + int index = NodeUtil.binarySearchForEdge(outgoingEdges, childNode.getIncomingEdgeFirstCharacter()); + if (index < 0) { + throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode); + } + // Atomically update the child node at this index... + outgoingEdges.set(index, childNode); + } + + @Override + public List getOutgoingEdges() { + return outgoingEdgesAsList; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("Node{"); + sb.append("edge=").append(incomingEdgeCharSequence); + sb.append(", value=null"); + sb.append(", edges=").append(getOutgoingEdges()); + sb.append("}"); + return sb.toString(); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequenceNodeNonLeafVoidValue.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequenceNodeNonLeafVoidValue.java new file mode 100644 index 0000000..7bd032e --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequenceNodeNonLeafVoidValue.java @@ -0,0 +1,93 @@ +package org.xbib.datastructures.trie.concurrent.util; + +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.atomic.AtomicReferenceArray; + +/** + * Stores incoming edge as a {@link CharSequence} (a view onto the original key) rather than copying the edge + * into a character array, and stores outgoing edges as an {@link AtomicReferenceArray}. Does not store a + * value and returns {@link VoidValue} for the value. + */ +public class CharSequenceNodeNonLeafVoidValue implements Node { + + + // Characters in the edge arriving at this node from a parent node. + // Once assigned, we never modify this... + private final CharSequence incomingEdgeCharSequence; + + // References to child nodes representing outgoing edges from this node. + // Once assigned we never add or remove references, but we do update existing references to point to new child + // nodes provided new edges start with the same first character... + private final AtomicReferenceArray outgoingEdges; + + // A read-only List wrapper around the outgoingEdges AtomicReferenceArray... + private final List outgoingEdgesAsList; + + public CharSequenceNodeNonLeafVoidValue(CharSequence edgeCharSequence, List outgoingEdges) { + Node[] childNodeArray = outgoingEdges.toArray(new Node[outgoingEdges.size()]); + // Sort the child nodes... + Arrays.sort(childNodeArray, new NodeCharacterComparator()); + this.outgoingEdges = new AtomicReferenceArray(childNodeArray); + this.incomingEdgeCharSequence = edgeCharSequence; + this.outgoingEdgesAsList = new AtomicReferenceArrayListAdapter(this.outgoingEdges); + } + + @Override + public CharSequence getIncomingEdge() { + return incomingEdgeCharSequence; + } + + @Override + public Character getIncomingEdgeFirstCharacter() { + return incomingEdgeCharSequence.charAt(0); + } + + @Override + public Object getValue() { + return VoidValue.SINGLETON; + } + + @Override + public Node getOutgoingEdge(Character edgeFirstCharacter) { + // Binary search for the index of the node whose edge starts with the given character. + // Note that this binary search is safe in the face of concurrent modification due to constraints + // we enforce on use of the array, as documented in the binarySearchForEdge method... + int index = NodeUtil.binarySearchForEdge(outgoingEdges, edgeFirstCharacter); + if (index < 0) { + // No such edge exists... + return null; + } + // Atomically return the child node at this index... + return outgoingEdges.get(index); + } + + @Override + public void updateOutgoingEdge(Node childNode) { + // Binary search for the index of the node whose edge starts with the given character. + // Note that this binary search is safe in the face of concurrent modification due to constraints + // we enforce on use of the array, as documented in the binarySearchForEdge method... + int index = NodeUtil.binarySearchForEdge(outgoingEdges, childNode.getIncomingEdgeFirstCharacter()); + if (index < 0) { + throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode); + } + // Atomically update the child node at this index... + outgoingEdges.set(index, childNode); + } + + @Override + public List getOutgoingEdges() { + return outgoingEdgesAsList; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("Node{"); + sb.append("edge=").append(incomingEdgeCharSequence); + sb.append(", value=").append(VoidValue.SINGLETON); + sb.append(", edges=").append(getOutgoingEdges()); + sb.append("}"); + return sb.toString(); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequences.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequences.java new file mode 100644 index 0000000..675fc13 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/CharSequences.java @@ -0,0 +1,130 @@ +package org.xbib.datastructures.trie.concurrent.util; + +import java.util.Iterator; + +public class CharSequences { + + /** + * Private constructor, not used. + */ + CharSequences() { + } + + public static Iterable generateSuffixes(final CharSequence input) { + return () -> new Iterator() { + int currentIndex = 0; + @Override + public boolean hasNext() { + return currentIndex < input.length(); + } + + @Override + public CharSequence next() { + return input.subSequence(currentIndex++, input.length()); + } + + @Override + public void remove() { + throw new UnsupportedOperationException("Modification not supported"); + } + }; + } + + public static Iterable generatePrefixes(final CharSequence input) { + return () -> new Iterator() { + int currentIndex = 0; + + @Override + public boolean hasNext() { + return currentIndex < input.length(); + } + + @Override + public CharSequence next() { + return input.subSequence(0, ++currentIndex); + } + + @Override + public void remove() { + throw new UnsupportedOperationException("Modification not supported"); + } + }; + } + + public static CharSequence getCommonPrefix(CharSequence first, CharSequence second) { + int minLength = Math.min(first.length(), second.length()); + for (int i = 0; i < minLength; i++) { + if (first.charAt(i) != second.charAt(i)) { + return first.subSequence(0, i); + } + } + return first.subSequence(0, minLength); + } + + public static CharSequence getSuffix(CharSequence input, int startIndex) { + if (startIndex >= input.length()) { + return ""; + } + return input.subSequence(startIndex, input.length()); + } + + public static CharSequence getPrefix(CharSequence input, int endIndex) { + if (endIndex > input.length()) { + return input; + } + return input.subSequence(0, endIndex); + } + + public static CharSequence subtractPrefix(CharSequence main, CharSequence prefix) { + int startIndex = prefix.length(); + int mainLength = main.length(); + if (startIndex > mainLength) { + return ""; + } + return main.subSequence(startIndex, mainLength); + } + + public static CharSequence concatenate(final CharSequence first, final CharSequence second) { + return new StringBuilder().append(first).append(second); + } + + public static CharSequence reverse(CharSequence input) { + return new StringBuilder(input.length()).append(input).reverse(); + } + + /** + * Returns a {@link CharSequence} which wraps the given {@code char[]}. Note that this {@link CharSequence} will + * reflect any changes to the {@code char[]}. + * + * @param characters The {@code char[]} to wrap + * @return A {@link CharSequence} which wraps the given {@code char[]} + */ + public static CharSequence fromCharArray(final char[] characters) { + return new StringBuilder(characters.length).append(characters); + } + + /** + * Copies the given {@link CharSequence} into a new {@code char[]}. + * + * @param charSequence The {@link CharSequence} to copy + * @return A new {@code char[]} populated with characters from the given {@link CharSequence} + */ + public static char[] toCharArray(CharSequence charSequence) { + final int numChars = charSequence.length(); + char[] charArray = new char[numChars]; + for (int i = 0; i < numChars; i++) { + charArray[i] = charSequence.charAt(i); + } + return charArray; + } + + public static String toString(CharSequence charSequence) { + if (charSequence == null) { + return null; + } + if (charSequence instanceof String) { + return (String)charSequence; + } + return String.valueOf(charSequence); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/DefaultCharArrayNodeFactory.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/DefaultCharArrayNodeFactory.java new file mode 100644 index 0000000..936a9ca --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/DefaultCharArrayNodeFactory.java @@ -0,0 +1,63 @@ +package org.xbib.datastructures.trie.concurrent.util; + +import java.util.List; + +/** + * A {@link NodeFactory} which creates various implementations of {@link Node} objects all of which store incoming + * edge characters as a character array inside the node. + *

+ * Returns an optimal node implementation depending on arguments supplied, which will be one of: + *

    + *
  • {@link CharArrayNodeDefault} - contains all possible fields
  • + *
  • {@link CharArrayNodeNonLeafNullValue} - does not store a value, returns {@code null} for value
  • + *
  • {@link CharArrayNodeNonLeafVoidValue} - does not store a value, returns {@link VoidValue} for value
  • + *
  • {@link CharArrayNodeLeafVoidValue} - does not store child edges or a value, returns {@link VoidValue} for value
  • + *
  • {@link CharArrayNodeLeafWithValue} - does not store child edges, but does store a value
  • + *
+ *

+ * When the application supplies {@link VoidValue} for a value, this factory will omit actually storing that value + * in the tree and will return one of the VoidValue-optimized nodes above which can reduce memory usage. + * + * @author Niall Gallagher + */ +public class DefaultCharArrayNodeFactory implements NodeFactory { + + @Override + public Node createNode(CharSequence edgeCharacters, Object value, List childNodes, boolean isRoot) { + if (edgeCharacters == null) { + throw new IllegalStateException("The edgeCharacters argument was null"); + } + if (!isRoot && edgeCharacters.length() == 0) { + throw new IllegalStateException("Invalid edge characters for non-root node: " + CharSequences.toString(edgeCharacters)); + } + if (childNodes == null) { + throw new IllegalStateException("The childNodes argument was null"); + } + NodeUtil.ensureNoDuplicateEdges(childNodes); + if (childNodes.isEmpty()) { + // Leaf node... + if (value instanceof VoidValue) { + return new CharArrayNodeLeafVoidValue(edgeCharacters); + } + else if (value != null) { + return new CharArrayNodeLeafWithValue(edgeCharacters, value); + } + else { + return new CharArrayNodeLeafNullValue(edgeCharacters); + } + } + else { + // Non-leaf node... + if (value instanceof VoidValue) { + return new CharArrayNodeNonLeafVoidValue(edgeCharacters, childNodes); + } + else if (value == null) { + return new CharArrayNodeNonLeafNullValue(edgeCharacters, childNodes); + } + else { + return new CharArrayNodeDefault(edgeCharacters, value, childNodes); + } + } + } + +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/DefaultCharSequenceNodeFactory.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/DefaultCharSequenceNodeFactory.java new file mode 100644 index 0000000..7d8dd62 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/DefaultCharSequenceNodeFactory.java @@ -0,0 +1,66 @@ +package org.xbib.datastructures.trie.concurrent.util; + +import java.util.List; + +/** + * A {@link NodeFactory} which creates various implementations of {@link Node} objects all of which store incoming + * edge characters as a {@link CharSequence} (a view onto the original key) rather than copying the edge into a + * character array. + *

+ * Returns an optimal node implementation depending on arguments supplied, which will be one of: + *

    + *
  • {@link CharSequenceNodeDefault} - contains all possible fields
  • + *
  • {@link CharSequenceNodeNonLeafNullValue} - does not store a value, returns {@code null} for value
  • + *
  • {@link CharSequenceNodeNonLeafVoidValue} - does not store a value, returns {@link VoidValue} for value
  • + *
  • {@link CharSequenceNodeLeafVoidValue} - does not store child edges or a value, returns {@link VoidValue} for value
  • + *
  • {@link CharSequenceNodeLeafWithValue} - does not store child edges, but does store a value
  • + *
+ *

+ * When the application supplies {@link VoidValue} for a value, this factory will omit actually storing that value + * in the tree and will return one of the Void-optimized nodes above which can reduce memory usage. + * + * @author Niall Gallagher + */ +public class DefaultCharSequenceNodeFactory implements NodeFactory { + + @Override + public Node createNode(CharSequence edgeCharacters, Object value, List childNodes, boolean isRoot) { + if (edgeCharacters == null) { + throw new IllegalStateException("The edgeCharacters argument was null"); + } + if (!isRoot && edgeCharacters.length() == 0) { + throw new IllegalStateException("Invalid edge characters for non-root node: " + CharSequences.toString(edgeCharacters)); + } + if (childNodes == null) { + throw new IllegalStateException("The childNodes argument was null"); + } + NodeUtil.ensureNoDuplicateEdges(childNodes); + + + if (childNodes.isEmpty()) { + // Leaf node... + if (value instanceof VoidValue) { + return new CharSequenceNodeLeafVoidValue(edgeCharacters); + } + else if (value != null) { + return new CharSequenceNodeLeafWithValue(edgeCharacters, value); + } + else { + return new CharSequenceNodeLeafNullValue(edgeCharacters); + } + } + else { + // Non-leaf node... + if (value instanceof VoidValue) { + return new CharSequenceNodeNonLeafVoidValue(edgeCharacters, childNodes); + } + else if (value == null) { + return new CharSequenceNodeNonLeafNullValue(edgeCharacters, childNodes); + } + else { + return new CharSequenceNodeDefault(edgeCharacters, value, childNodes); + } + } + } + +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/Iterables.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/Iterables.java new file mode 100644 index 0000000..45d7ef3 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/Iterables.java @@ -0,0 +1,122 @@ +package org.xbib.datastructures.trie.concurrent.util; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; + +/** + * Provides methods to convert {@link Iterable}s to {@link List}s and {@link Set}s. + *

+ * This class is mostly provided for backwards compatibility in applications which were programmed against + * concurrent-trees 1.0.0, in which the tree APIs returned lists and sets instead of lazily-evaluated iterables. + *

+ * Note that in applications which would have simply iterated through the lists and sets returned by the old APIs, + * the new approach of returning lazy iterables is more efficient. Applications can iterate the iterables returned + * in exactly the same manner, and results will be the same. + *

+ * These methods are provided for convenience in applications which actually relied on List and Set-specific + * features in the objects which were returned. + *

+ * Most methods in this class are somewhat similar to utilities in Google Guava; but are provided here to avoid a + * dependency on Guava. Applications could use either these methods or Guava. + */ +public class Iterables { + + // Most methods in this class are somewhat similar to utilities in Google Guava; but are provided here + // to avoid a dependency on Guava. Applications could use either these methods or Guava. + + /** + * Copies elements from the given {@link Iterable} into a new {@link List}. + *

+ * The iteration order of the list returned, will be the same as that of the iterable. + *

+ * Be aware of the memory implications of copying objects from a lazy iterable into a collection; + * usually it's better to just work with the iterable directly (i.e. by iterating it). + * + * @param iterable Provides elements to be copied into a new list + * @param The type of elements returned by the iterable + * @return A new {@link List} which contains the elements which were returned by the iterable + */ + public static List toList(Iterable iterable) { + if (iterable instanceof Collection) { + return new ArrayList((Collection)iterable); + } + else { + List list = new LinkedList(); + for (T element : iterable) { + list.add(element); + } + return list; + } + } + + /** + * Copies elements from the given {@link Iterable} into a new {@link Set}. + *

+ * The iteration order of the set returned, will be the same as that of the iterable. + *

+ * Be aware of the memory implications of copying objects from a lazy iterable into a collection; + * usually it's better to just work with the iterable directly (i.e. by iterating it). + * + * @param iterable Provides elements to be copied into a new set + * @param The type of elements returned by the iterable + * @return A new {@link Set} which contains the elements which were returned by the iterable + */ + public static Set toSet(Iterable iterable) { + if (iterable instanceof Collection) { + // Return a LinkedHashSet instead of HashSet, to preserve iteration order... + return new LinkedHashSet((Collection)iterable); + } + else { + Set list = new LinkedHashSet(); + for (T element : iterable) { + list.add(element); + } + return list; + } + } + + /** + * Returns a string representation of elements returned by the given {@link Iterable}. + * + * @param iterable Provides elements whose toString representations should be included in the string + * @return A string representation of elements returned by the given {@link Iterable} + */ + public static String toString(Iterable iterable) { + StringBuilder sb = new StringBuilder(); + sb.append("["); + for (Iterator i = iterable.iterator(); i.hasNext();) { + sb.append(i.next()); + if (i.hasNext()) { + sb.append(", "); + } + } + sb.append("]"); + return sb.toString(); + } + + /** + * Counts the number of elements returned by the given {@link Iterable}. + * + * @param iterable Provides elements to be counted + * @return The number of elements returned by the iterable + */ + public static int count(Iterable iterable) { + int count = 0; + //noinspection UnusedDeclaration + for (Object next : iterable) { + count++; + } + return count; + } + + /** + * Private constructor, not used. + */ + Iterables() { + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/KeyValuePair.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/KeyValuePair.java new file mode 100644 index 0000000..75e421c --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/KeyValuePair.java @@ -0,0 +1,45 @@ +package org.xbib.datastructures.trie.concurrent.util; + +/** + * Encapsulates a key and a value. + * + * @param The type of the value + */ +public interface KeyValuePair { + + /** + * Returns the key with which the value is associated + * @return The key with which the value is associated + */ + CharSequence getKey(); + + /** + * Returns the value associated with the key + * @return The value associated with the key + */ + O getValue(); + + /** + * Compares this {@link KeyValuePair} object with another for equality. + *

+ * This is implemented based on equality of the keys. + * + * @param o The other object to compare + * @return True if the other object is also a {@link KeyValuePair} and is equal to this one as specified above + */ + @Override + boolean equals(Object o); + + /** + * Returns a hash code for this object. + */ + @Override + int hashCode(); + + /** + * Returns a string representation as {@code (key, value)}. + * @return A string representation as {@code (key, value)} + */ + @Override + String toString(); +} \ No newline at end of file diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/LazyIterator.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/LazyIterator.java new file mode 100644 index 0000000..cc9b55d --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/LazyIterator.java @@ -0,0 +1,76 @@ +package org.xbib.datastructures.trie.concurrent.util; + +import java.util.Iterator; +import java.util.NoSuchElementException; + +/** + * An unmodifiable iterator which computes the next element to return only when it is requested. + *

+ * This class is inspired by com.google.common.collect.AbstractIterator in Google Guava, + * which was written by the Google Guava Authors, in particular by Kevin Bourrillion. + * + * @author Niall Gallagher + */ +public abstract class LazyIterator implements Iterator { + + T next = null; + + enum State { READY, NOT_READY, DONE, FAILED } + + State state = State.NOT_READY; + + @Override + public void remove() { + throw new UnsupportedOperationException("Iterator.remove() is not supported"); + } + + @Override + public final boolean hasNext() { + if (state == State.FAILED) { + throw new IllegalStateException("This iterator is in an inconsistent state, and can no longer be used, " + + "due to an exception previously thrown by the computeNext() method"); + } + switch (state) { + case DONE: + return false; + case READY: + return true; + } + return tryToComputeNext(); + } + + boolean tryToComputeNext() { + state = State.FAILED; // temporary pessimism + next = computeNext(); + if (state != State.DONE) { + state = State.READY; + return true; + } + return false; + } + + @Override + public final T next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + state = State.NOT_READY; + return next; + } + + /** + * + * @return a dummy value which if returned by the computeNext() method, signals that there are no more + * elements to return + */ + protected final T endOfData() { + state = State.DONE; + return null; + } + + /** + * @return The next element which the iterator should return, or the result of calling endOfData() + * if there are no more elements to return + */ + protected abstract T computeNext(); +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/Node.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/Node.java new file mode 100644 index 0000000..a9f0eab --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/Node.java @@ -0,0 +1,136 @@ +package org.xbib.datastructures.trie.concurrent.util; + +import java.util.List; + +/** + * Specifies the methods that nodes must implement. + *

+ * The main function of a node is to represent an "edge" in the tree. An edge is a connection from a parent node to a + * child node which represents a sequence of characters. For practical reasons we store these characters in the child + * node, to avoid needing separate Edge objects. All nodes except the root encode at least one character for an edge. + *

+ * Nodes contain several fields, but not all nodes will actually need to store values in every field. Therefore + * some specialized implementations of this interface are possible, optimized for storing various combinations of + * data items in reduced numbers of fields, to reduce memory overhead. + *

+ * Nodes are partially immutable: + *

    + *
  • + * The characters of an "edge" encoded in within a node are immutable (these characters belong to + * the edge arriving at the current node from a parent node) + *
  • + *
  • + * The number of outgoing edges from a node (references to child nodes), and the first characters of + * those edges are immutable + *
  • + *
  • + * The references to child nodes for existing edges (as identified by their first characters) are + * mutable with constraints; the reference to a child node for an existing edge may be updated to point + * to a different child node as long as the new edge starts with the same first character + *
  • + *
  • + * If a node stores a value, the reference to the value is immutable (values can be changed but it + * requires recreating the node with the new value - this is to account for specialized node implementations + * omitting a field for the value when not required) + *
  • + *
+ * These constraints exist allow concurrent traversal and modifications to the tree. Nodes are required to implement + * some operations atomically, see documentation on each method in this interface for details. + *

+ * Hints for specialized implementations of this Node interface: + *

    + *
  • + * Leaf nodes do not need to store references to child nodes; a specialized node implementation + * could eliminate a field and associated data structure for child node references + *
  • + *
  • + * All leaf nodes store values + *
  • + *
  • + * Some non-leaf nodes store values, some do not + *
  • + *
  • + * Edge character data can be encoded using implementation-specific methods. + *

    + * Nodes are not required to store a {@link CharSequence} object verbatim, or use a particular implementation of + * {@link CharSequence}, the only requirement is that they provide a {@link CharSequence} view onto + * the character data. + *

    + * Character data can optionally be stored outside of the tree. {@link CharSequence}s can encode a start and + * end offset (or length) as a view onto a larger string (possibly a view onto the original key inserted). + * Furthermore end offset could be stored as length, relative to the start offset with variable length encoding + * to avoid storing 4 bytes for the length. This option would have consequences for + * garbage collection of large string keys however, therefore would mostly suit immutable data sets. + *

    + * Character data can be compressed. {@link CharSequence}s are free to store character data within the tree but + * in a size-reduced encoding such as UTF-8 + *

  • + *
+ * + * @author Niall Gallagher + */ +public interface Node extends NodeCharacterProvider { + + /** + * Returns the first character of the "edge" encoded in this node, belonging to the connection from a parent node to + * this node. + *

+ * + * @return The first character of the "edge" encoded in this node + */ + Character getIncomingEdgeFirstCharacter(); + + /** + * Returns all characters of the "edge" encoded in this node, belonging to the connection from a parent node to this + * node. + * + * @return All characters of the "edge" encoded in this node + */ + CharSequence getIncomingEdge(); + + /** + * Returns a value object which has been associated with a key and which is stored in this node, or returns + * null if no value is stored in this node. + * + * @return A value object which has been associated with a key and which is stored in this node, or returns + * null if no value is stored in this node + */ + Object getValue(); + + + /** + * Returns the child of this node whose edge starts with the given first character. + *

+ * This read must be performed atomically, in relation to writes made via + * {@link #updateOutgoingEdge(Node)}. + * + * @param edgeFirstCharacter The first character of the edge for which the associated child node is required + * @return The child of this node whose edge starts with the given first character, or null if this + * node has no such outgoing edge + */ + Node getOutgoingEdge(Character edgeFirstCharacter); + + /** + * Updates the child node reference for a given edge (identified by its first character) to point to a different + * child node. + *

+ * The first character of the given child node's edge must match the first character of an existing outgoing + * edge from this node. + *

+ * This write must be performed atomically, in relation to reads made via + * {@link #getOutgoingEdge(Character)}. + * + * @param childNode The new child node to associated with this edge + */ + void updateOutgoingEdge(Node childNode); + + /** + * Returns a read-only list of the child nodes to which this node has outgoing edges, i.e. child nodes which have + * incoming edges from this node. + *

+ * It is intended that this method will be used for copying/cloning nodes. + * + * @return A read-only list of the child nodes to which this node has outgoing edges + */ + List getOutgoingEdges(); +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/NodeCharacterComparator.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/NodeCharacterComparator.java new file mode 100644 index 0000000..be86c5b --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/NodeCharacterComparator.java @@ -0,0 +1,15 @@ +package org.xbib.datastructures.trie.concurrent.util; + +import java.util.Comparator; + +/** + * Specifies binary search compatibility, and sorting compatibility, of nodes based on + * {@link Node#getIncomingEdgeFirstCharacter()}. + */ +public class NodeCharacterComparator implements Comparator { + + @Override + public int compare(NodeCharacterProvider o1, NodeCharacterProvider o2) { + return o1.getIncomingEdgeFirstCharacter().compareTo(o2.getIncomingEdgeFirstCharacter()); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/NodeCharacterKey.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/NodeCharacterKey.java new file mode 100644 index 0000000..2e8e381 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/NodeCharacterKey.java @@ -0,0 +1,20 @@ +package org.xbib.datastructures.trie.concurrent.util; + +/** + * A lightweight object which simply wraps a {@link Character} and implements {@link NodeCharacterProvider}, which + * can be used as a key to locate a node having the same edge first character in a list of nodes using binary search. + * + */ +public class NodeCharacterKey implements NodeCharacterProvider { + + private final Character character; + + public NodeCharacterKey(Character character) { + this.character = character; + } + + @Override + public Character getIncomingEdgeFirstCharacter() { + return character; + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/NodeCharacterProvider.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/NodeCharacterProvider.java new file mode 100644 index 0000000..9bcee9c --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/NodeCharacterProvider.java @@ -0,0 +1,14 @@ +package org.xbib.datastructures.trie.concurrent.util; + +/** + * A super-interface of both {@link Node} and {@link NodeCharacterKey} + * which, by sharing this common interface, enables binary search of nodes via + * {@link java.util.Collections#binarySearch(java.util.List, Object, java.util.Comparator)}. + * + * @see NodeCharacterComparator + * @see NodeCharacterKey + */ +public interface NodeCharacterProvider { + + Character getIncomingEdgeFirstCharacter(); +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/NodeFactory.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/NodeFactory.java new file mode 100644 index 0000000..d648934 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/NodeFactory.java @@ -0,0 +1,34 @@ +package org.xbib.datastructures.trie.concurrent.util; + +import java.util.List; + +/** + * An interface for a factory which creates new {@link Node} objects on demand, to encapsulate specified variables. + * Factory objects can choose to return implementations of the {@link Node} interface which are memory-optimized for + * storing only the given variables, potentially further optimized based on variable values. + */ +public interface NodeFactory { + + /** + * Returns a new {@link Node} object which encapsulates the arguments supplied, optionally returning implementations + * of the {@link Node} interface which are memory-optimized for storing only the supplied combination of variables, + * potentially further optimized based on variable values. + * + * @param edgeCharacters Provides edge characters to be stored in the node. This is never null. In the case of + * (re-)constructing the root node, this will contain zero characters, otherwise will always contain one or more + * characters + * + * @param value An arbitrary object to associate with the node. This can be null, but it will not be null if + * dealing with a leaf node (when childNodes will be empty) + * + * @param childNodes A list of child nodes to store in the node. This will never be null, but may be empty when + * building a leaf node + * + * @param isRoot Indicates if this will be the root node, in which case edge characters will be non-null but empty, + * value will be null, and child nodes will be non-null but may be empty + * + * @return An object implementing the {@link Node} interface which stores the given variables + */ + Node createNode(CharSequence edgeCharacters, Object value, List childNodes, boolean isRoot); + +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/NodeUtil.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/NodeUtil.java new file mode 100644 index 0000000..f2489d7 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/NodeUtil.java @@ -0,0 +1,80 @@ +package org.xbib.datastructures.trie.concurrent.util; + +import java.util.*; +import java.util.concurrent.atomic.AtomicReferenceArray; + +/** + * Static utility methods useful when implementing {@link Node}s. + */ +public class NodeUtil { + + /** + * Private constructor, not used. + */ + NodeUtil() { + } + + /** + * Returns the index of the node in the given {@link AtomicReferenceArray} whose edge starts with the given + * first character. + *

+ * This method expects that some constraints are enforced on the {@link AtomicReferenceArray}: + *

    + *
  • + * The array must already be in ascending sorted order of the first character of the edge for each node + *
  • + *
  • + * No entries in the array can be null + *
  • + *
  • + * Any existing node in the array cannot be swapped concurrently for another unless the edge associated + * with the other node also starts with the same first character + *
  • + *
+ * If these constraints are enforced as expected, then this method will have deterministic behaviour even in the + * face of concurrent modification. + * + * @param childNodes An {@link AtomicReferenceArray} of {@link Node} objects, which is used in accordance with + * the constraints documented in this method + * + * @param edgeFirstCharacter The first character of the edge for which the associated node is required + * @return The index of the node representing the indicated edge, or a value < 0 if no such node exists in the + * array + */ + public static int binarySearchForEdge(AtomicReferenceArray childNodes, Character edgeFirstCharacter) { + // inspired by Collections#indexedBinarySearch() + int low = 0; + int high = childNodes.length() - 1; + + while (low <= high) { + int mid = (low + high) >>> 1; + Node midVal = childNodes.get(mid); + int cmp = midVal.getIncomingEdgeFirstCharacter().compareTo(edgeFirstCharacter); + + if (cmp < 0) + low = mid + 1; + else if (cmp > 0) + high = mid - 1; + else + return mid; // key found + } + return -(low + 1); // key not found + } + + /** + * Throws an exception if any nodes in the given list represent edges having the same first character. + * + * @param nodes The list of nodes to validate + * @throws IllegalStateException If a duplicate edge is detected + */ + public static void ensureNoDuplicateEdges(List nodes) { + // Sanity check that no two nodes specify an edge with the same first character... + Set uniqueChars = new HashSet(nodes.size()); + for (Node node : nodes) { + uniqueChars.add(node.getIncomingEdgeFirstCharacter()); + } + if (nodes.size() != uniqueChars.size()) { + throw new IllegalStateException("Duplicate edge detected in list of nodes supplied: " + nodes); + } + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/VoidValue.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/VoidValue.java new file mode 100644 index 0000000..e0f1569 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/concurrent/util/VoidValue.java @@ -0,0 +1,27 @@ +package org.xbib.datastructures.trie.concurrent.util; + +/** + * A dummy object which if supplied as a value for an entry in a tree. + */ +public class VoidValue { + + @Override + public int hashCode() { + return 1; + } + + @Override + public boolean equals(Object obj) { + return obj instanceof VoidValue; + } + + @Override + public String toString() { + return "-"; + } + + VoidValue() { + } + + public static final VoidValue SINGLETON = new VoidValue(); +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/regex/RegexTrie.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/regex/RegexTrie.java index 4263bb1..aee99f4 100644 --- a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/regex/RegexTrie.java +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/regex/RegexTrie.java @@ -11,7 +11,7 @@ import java.util.regex.Pattern; import java.util.stream.Collectors; /** - * The RegexTrie is a trie where each _stored_ segment of the key is a regex {@link Pattern}. Thus, + * The RegexTrie is a trie where each _stored_ segment of the key is a regex {@link Pattern}. Thus, * the full _stored_ key is a List rather than a String as in a standard trie. Note that * the retrieve method requires a List, which will be matched against the * {@link Pattern}s, rather than checked for equality as in a standard trie. It will likely perform @@ -44,7 +44,7 @@ public class RegexTrie { private V value; - private final Map> children; + private final Map> children; public RegexTrie() { children = new LinkedHashMap<>(); @@ -74,15 +74,17 @@ public class RegexTrie { * retrieve the associated {@code value} */ public void put(V value, List patterns) { - List list = new ArrayList<>(patterns.size()); + List list = new ArrayList<>(patterns.size()); for (Object object : patterns) { - CompPattern compPattern = null; + ComparablePattern comparablePattern = null; if (object instanceof Pattern) { - compPattern = new CompPattern((Pattern) object); + comparablePattern = new ComparablePattern((Pattern) object); } else if (object instanceof String) { - compPattern = new CompPattern(Pattern.compile((String) object)); + if (!((String) object).isEmpty()) { + comparablePattern = new ComparablePattern(Pattern.compile((String) object)); + } } - list.add(compPattern); + list.add(comparablePattern); } validateAndPut(value, list); } @@ -129,17 +131,17 @@ public class RegexTrie { * A helper method to consolidate validation before adding an entry to the trie. * * @param value The value to set - * @param list The sequence of {@link CompPattern}s that must be sequentially matched to + * @param list The sequence of {@link ComparablePattern}s that must be sequentially matched to * retrieve the associated {@code value} */ - private V validateAndPut(V value, List list) { + private V validateAndPut(V value, List list) { if (list.size() == 0) { throw new IllegalArgumentException("pattern list must be non-empty"); } return recursivePut(value, list); } - private V recursivePut(V value, List patterns) { + private V recursivePut(V value, List patterns) { // Cases: // 1) patterns is empty -- set our value // 2) patterns is non-empty -- recurse downward, creating a child if necessary @@ -148,8 +150,8 @@ public class RegexTrie { this.value = value; return oldValue; } else { - CompPattern curKey = patterns.get(0); - List nextKeys = patterns.subList(1, patterns.size()); + ComparablePattern curKey = patterns.get(0); + List nextKeys = patterns.subList(1, patterns.size()); // Create a new child to handle RegexTrie nextChild = children.get(curKey); if (nextChild == null) { @@ -171,8 +173,8 @@ public class RegexTrie { V wildcardValue = null; String curKey = strings.get(0); List nextKeys = strings.subList(1, strings.size()); - for (Map.Entry> child : children.entrySet()) { - CompPattern pattern = child.getKey(); + for (Map.Entry> child : children.entrySet()) { + ComparablePattern pattern = child.getKey(); if (pattern == null) { wildcardMatch = true; wildcardValue = child.getValue().value; @@ -213,13 +215,13 @@ public class RegexTrie { /** * Patterns aren't comparable by default, which prevents you from retrieving them from a Map. * This is a simple stub class that makes a Pattern with a working - * {@link CompPattern#equals(Object)} method. + * {@link ComparablePattern#equals(Object)} method. */ - private static class CompPattern { + private static class ComparablePattern { protected final Pattern pattern; - CompPattern(Pattern pattern) { + ComparablePattern(Pattern pattern) { Objects.requireNonNull(pattern); this.pattern = pattern; } @@ -229,8 +231,8 @@ public class RegexTrie { Pattern otherPat; if (other instanceof Pattern) { otherPat = (Pattern) other; - } else if (other instanceof CompPattern) { - CompPattern otherCPat = (CompPattern) other; + } else if (other instanceof RegexTrie.ComparablePattern) { + ComparablePattern otherCPat = (ComparablePattern) other; otherPat = otherCPat.pattern; } else { return false; diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/Trie.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/Trie.java index 76c2631..3430ae4 100644 --- a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/Trie.java +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/Trie.java @@ -5,15 +5,16 @@ import java.util.Set; public interface Trie, V> { - void add(K key, V value); + void put(K key, V value); - V search(K key); + V get(K key); - List startsWith(List> prefix); - - boolean contains(K key); + boolean containsKey(K key); - Set getAllKeys(); + Set getKeys(); int size(); + + List startsWith(List> prefix); + } diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/TrieImpl.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/TrieImpl.java index b1fff64..5b0f0b1 100644 --- a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/TrieImpl.java +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/TrieImpl.java @@ -15,12 +15,12 @@ public class TrieImpl implements Trie, V> { } @Override - public void add(TrieKey key, V value) { + public void put(TrieKey key, V value) { addNode(node, key, 0, value); } @Override - public V search(TrieKey key) { + public V get(TrieKey key) { return findKey(node, key); } @@ -41,12 +41,12 @@ public class TrieImpl implements Trie, V> { } @Override - public boolean contains(TrieKey key) { + public boolean containsKey(TrieKey key) { return hasKey(node, key); } @Override - public Set> getAllKeys() { + public Set> getKeys() { Set> keySet = new HashSet<>(); getKeys(node, new TrieKeyImpl<>(), keySet); return keySet; @@ -54,7 +54,7 @@ public class TrieImpl implements Trie, V> { @Override public int size() { - return getAllKeys().size(); + return getKeys().size(); } private void getValues(Node currNode, List valueList) { @@ -79,7 +79,7 @@ public class TrieImpl implements Trie, V> { } private V findKey(Node currNode, TrieKey key) { - TrieKeySegment e = key.size() > 0 ? key.get(0) : null; + TrieKeySegment e = key.size() > 0 ? key.getSegment(0) : null; if (currNode.getChildren().containsKey(e)) { Node nextNode = currNode.getChildren().get(e); if (key.size() <= 1) { @@ -94,7 +94,7 @@ public class TrieImpl implements Trie, V> { } private boolean hasKey(Node currNode, TrieKey key) { - TrieKeySegment e = key.size() > 0 ? key.get(0) : null; + TrieKeySegment e = key.size() > 0 ? key.getSegment(0) : null; if (currNode.getChildren().containsKey(e)) { Node nextNode = currNode.getChildren().get(e); if (key.size() <= 1) { @@ -107,7 +107,7 @@ public class TrieImpl implements Trie, V> { } private void addNode(Node currNode, TrieKey key, int pos, V value) { - TrieKeySegment e = pos < key.size() ? key.get(pos) : null; + TrieKeySegment e = pos < key.size() ? key.getSegment(pos) : null; Node nextNode = currNode.getChildren().get(e); if (nextNode == null) { nextNode = new NodeImpl<>(); diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/TrieKey.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/TrieKey.java index 254e307..a41ed4f 100644 --- a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/TrieKey.java +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/TrieKey.java @@ -1,6 +1,5 @@ package org.xbib.datastructures.trie.segment; -import java.util.Arrays; import java.util.List; public interface TrieKey { @@ -13,7 +12,7 @@ public interface TrieKey { void set(int i, TrieKeySegment trieKeySegment); - TrieKeySegment get(int i); + TrieKeySegment getSegment(int i); List> getSegments(); } diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/TrieKeyImpl.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/TrieKeyImpl.java index 4643b3e..fe65cb7 100644 --- a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/TrieKeyImpl.java +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/TrieKeyImpl.java @@ -48,7 +48,7 @@ public class TrieKeyImpl implements TrieKey, Comparable> { } @Override - public TrieKeySegment get(int i) { + public TrieKeySegment getSegment(int i) { return segments.get(i); } diff --git a/datastructures-trie/src/test/java/org/xbib/datastructures/trie/concurrent/ConcurrentRadixTreeTest.java b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/concurrent/ConcurrentRadixTreeTest.java new file mode 100644 index 0000000..a83d43f --- /dev/null +++ b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/concurrent/ConcurrentRadixTreeTest.java @@ -0,0 +1,962 @@ +package org.xbib.datastructures.trie.concurrent; + +import org.junit.jupiter.api.Test; +import org.xbib.datastructures.trie.concurrent.util.DefaultCharArrayNodeFactory; +import org.xbib.datastructures.trie.concurrent.util.DefaultCharSequenceNodeFactory; +import org.xbib.datastructures.trie.concurrent.util.Iterables; +import org.xbib.datastructures.trie.concurrent.util.KeyValuePair; +import org.xbib.datastructures.trie.concurrent.util.Node; +import org.xbib.datastructures.trie.concurrent.util.NodeFactory; +import org.xbib.datastructures.trie.concurrent.util.VoidValue; + +import java.util.Arrays; +import java.util.Collections; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class ConcurrentRadixTreeTest { + + private final NodeFactory nodeFactory = new DefaultCharArrayNodeFactory(); + + protected NodeFactory getNodeFactory() { + return nodeFactory; + } + + @Test + public void testBuildTreeByHand() { + // Build the tree by hand, as if the following strings were added: B, BA, BAN, BANDANA, BANAN, BANANA + + // ○ + // └── ○ B (1) + // └── ○ A (2) + // └── ○ N (3) + // ├── ○ AN (5) + // │ └── ○ A (6) + // └── ○ DANA (4) + + final Node root, n1, n2, n3, n4, n5, n6; + n6 = getNodeFactory().createNode("A", 6, Collections.emptyList(), false); + n5 = getNodeFactory().createNode("AN", 5, Arrays.asList(n6), false); + n4 = getNodeFactory().createNode("DANA", 4, Collections.emptyList(), false); + n3 = getNodeFactory().createNode("N", 3, Arrays.asList(n4, n5), false); // note: it should sort alphabetically such that n5 is first + n2 = getNodeFactory().createNode("A", 2, Arrays.asList(n3), false); + n1 = getNodeFactory().createNode("B", 1, Arrays.asList(n2), false); + //noinspection NullableProblems + root = getNodeFactory().createNode("", null, Arrays.asList(n1), true); + + String expected = + "○\n" + + "└── ○ B (1)\n" + + " └── ○ A (2)\n" + + " └── ○ N (3)\n" + + " ├── ○ AN (5)\n" + + " │ └── ○ A (6)\n" + + " └── ○ DANA (4)\n"; + + String actual = PrettyPrinter.prettyPrint(root); + assertEquals(expected, actual); + } + + @Test + public void testPut_AddToRoot() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + tree.put("A", 1); + String expected = + "○\n" + + "└── ○ A (1)\n"; + String actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + } + + @Test + public void testPut_ChildNodeSorting() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + tree.put("B", 1); + tree.put("A", 2); + String expected = + "○\n" + + "├── ○ A (2)\n" + + "└── ○ B (1)\n"; + String actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + } + + @Test + public void testPut_AppendChild() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + tree.put("FOO", 1); + tree.put("FOOBAR", 2); + + String expected = + "○\n" + + "└── ○ FOO (1)\n" + + " └── ○ BAR (2)\n"; + String actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + } + + @Test + public void testPut_SplitEdge() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + tree.put("FOOBAR", 1); + tree.put("FOO", 2); + + String expected = + "○\n" + + "└── ○ FOO (2)\n" + + " └── ○ BAR (1)\n"; + String actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + } + + @Test + public void testPut_SplitWithImplicitNode() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + tree.put("FOOBAR", 1); + tree.put("FOOD", 2); + + String expected = + "○\n" + + "└── ○ FOO\n" + // We never explicitly inserted FOO + " ├── ○ BAR (1)\n" + + " └── ○ D (2)\n"; + String actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + } + + @Test + public void testPut_SplitAndMove() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + tree.put("TEST", 1); + tree.put("TEAM", 2); + tree.put("TOAST", 3); + + String expected = + "○\n" + + "└── ○ T\n" + // implicit node added automatically + " ├── ○ E\n" + // implicit node added automatically + " │ ├── ○ AM (2)\n" + + " │ └── ○ ST (1)\n" + + " └── ○ OAST (3)\n"; + String actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + } + + @Test + public void testPut_OverwriteValue() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + + Integer existing; + existing = tree.put("FOO", 1); + assertNull(existing); + existing = tree.put("FOO", 2); + assertNotNull(existing); + + assertEquals(Integer.valueOf(1), existing); + assertEquals(Integer.valueOf(2), tree.getValueForExactKey("FOO")); + } + + @Test + public void testPutIfAbsent_DoNotOverwriteValue() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + + Integer existing = tree.putIfAbsent("FOO", 1); + assertNull(existing); + + existing = tree.putIfAbsent("FOO", 2); + assertNotNull(existing); + + assertEquals(Integer.valueOf(1), existing); + assertEquals(Integer.valueOf(1), tree.getValueForExactKey("FOO")); + } + + @Test + public void testPutIfAbsent_SplitNode() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + + // ○ + // └── ○ FOO // implicit node added automatically + // ├── ○ BAR (1) + // └── ○ D (1) + + Integer existing; + existing = tree.putIfAbsent("FOOBAR", 1); + assertNull(existing); + existing = tree.putIfAbsent("FOOD", 1); + assertNull(existing); + + // This tests 'overwrite' set to true and exact match for node, + // but no existing value to return (i.e. implicit node above)... + + // ○ + // └── ○ FOO (2) + // ├── ○ BAR (1) + // └── ○ D (1) + + existing = tree.putIfAbsent("FOO", 2); + assertNull(existing); + } + + @Test + public void testPut_VoidValue_CharArrayNodeFactory() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + tree.put("FOO", VoidValue.SINGLETON); + tree.put("FOOBAR", VoidValue.SINGLETON); + String expected = + "○\n" + + "└── ○ FOO (-)\n" + + " └── ○ BAR (-)\n"; + String actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + } + + @Test + public void testPut_VoidValue_CharSequenceNodeFactory() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(new DefaultCharSequenceNodeFactory()); + tree.put("FOO", VoidValue.SINGLETON); + tree.put("FOOBAR", VoidValue.SINGLETON); + String expected = + "○\n" + + "└── ○ FOO (-)\n" + + " └── ○ BAR (-)\n"; + String actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + } + + @Test + public void testPutInternal_ArgumentValidation1() { + assertThrows(IllegalArgumentException.class, () -> { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + //noinspection NullableProblems + tree.put(null, 1); + }); + } + + @Test + public void testPutInternal_ArgumentValidation2() { + assertThrows(IllegalArgumentException.class, () -> { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + //noinspection NullableProblems + tree.put("FOO", null); + }); + } + + @Test + public void testPutInternal_ArgumentValidation3() { + assertThrows(IllegalArgumentException.class, () -> { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + //noinspection NullableProblems + tree.put("", 1); + }); + } + + @Test + public void testPutInternal_InvalidClassification() { + assertThrows(IllegalStateException.class, () -> { + // Create a dummy subclass of SearchResult which returns an INVALID classification... + class InvalidSearchResult extends ConcurrentRadixTree.SearchResult { + InvalidSearchResult(CharSequence key, Node nodeFound, int charsMatched, int charsMatchedInNodeFound, Node parentNode, Node parentNodesParent) { + super(key, nodeFound, charsMatched, charsMatchedInNodeFound, parentNode, parentNodesParent); + } + + @Override + protected Classification classify(CharSequence key, Node nodeFound, int charsMatched, int charsMatchedInNodeFound) { + return Classification.INVALID; + } + } + // Override searchTree() to return the InvalidSearchResult... + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()) { + @Override + SearchResult searchTree(CharSequence key) { + return new InvalidSearchResult("FOO", root, 4, 4, null, null); + + } + }; + // We expect put() to throw an IllegalStateException + // when it encounters the unsupported INVALID classification... + tree.put("FOO", 1); + }); + } + + @Test + public void testSize() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + assertEquals(0, tree.size()); + tree.put("TEST", 1); + assertEquals(1, tree.size()); + tree.put("TEAM", 2); + assertEquals(2, tree.size()); + tree.put("TOAST", 3); + assertEquals(3, tree.size()); + + tree.remove("FOO"); + assertEquals(3, tree.size()); // no change + tree.remove("TOAST"); + assertEquals(2, tree.size()); + tree.remove("TEAM"); + assertEquals(1, tree.size()); + tree.remove("TEST"); + assertEquals(0, tree.size()); + } + + @Test + public void testGet() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + tree.put("TEST", 1); + tree.put("TEAM", 2); + tree.put("TOAST", 3); + + // ○ + // └── ○ T // implicit node added automatically + // ├── ○ E // implicit node added automatically + // │ ├── ○ AM (2) + // │ └── ○ ST (1) + // └── ○ OAST (3) + + assertEquals(Integer.valueOf(1), tree.getValueForExactKey("TEST")); + assertEquals(Integer.valueOf(2), tree.getValueForExactKey("TEAM")); + assertEquals(Integer.valueOf(3), tree.getValueForExactKey("TOAST")); + assertNull(tree.getValueForExactKey("T")); + assertNull(tree.getValueForExactKey("TE")); + assertNull(tree.getValueForExactKey("E")); // sanity check, no such edge from root + assertNull(tree.getValueForExactKey("")); // sanity check, root never has a value + } + + @Test + public void testRemove_MoreThanOneChildEdge() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + tree.put("FOO", 1); + tree.put("FOOBAR", 2); + tree.put("FOOD", 3); + + // ○ + // └── ○ FOO (1) + // ├── ○ BAR (2) + // └── ○ D (3) + + String expected, actual; + expected = + "○\n" + + "└── ○ FOO (1)\n" + + " ├── ○ BAR (2)\n" + + " └── ○ D (3)\n"; + actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + + boolean removed = tree.remove("FOO"); + assertTrue(removed); + + // ○ + // └── ○ FOO // value removed from FOO, but node needs to stay (as implicit node) + // ├── ○ BAR (2) + // └── ○ D (3) + + expected = + "○\n" + + "└── ○ FOO\n" + + " ├── ○ BAR (2)\n" + + " └── ○ D (3)\n"; + actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + } + + @Test + public void testRemove_ExactlyOneChildEdge() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + tree.put("FOO", 1); + tree.put("FOOBAR", 2); + tree.put("FOOBARBAZ", 3); + + // ○ + // └── ○ FOO (1) + // └── ○ BAR (2) + // └── ○ BAZ (3) + + + String expected, actual; + expected = + "○\n" + + "└── ○ FOO (1)\n" + + " └── ○ BAR (2)\n" + + " └── ○ BAZ (3)\n"; + actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + + boolean removed = tree.remove("FOO"); + assertTrue(removed); + + // ○ + // └── ○ FOOBAR (2) // Edges FOO and BAR merged, + // └── ○ BAZ (3) // and the value and child edges from BAR also copied into merged node + + expected = + "○\n" + + "└── ○ FOOBAR (2)\n" + + " └── ○ BAZ (3)\n"; + actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + } + + @Test + public void testRemove_ZeroChildEdges_DirectChildOfRoot() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + tree.put("FOO", 1); + tree.put("BAR", 2); + + // ○ + // ├── ○ BAR (2) + // └── ○ FOO (1) + + String expected, actual; + expected = + "○\n" + + "├── ○ BAR (2)\n" + + "└── ○ FOO (1)\n"; + actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + + boolean removed = tree.remove("FOO"); + assertTrue(removed); + + // ○ // FOO removed, which involved recreating the root to change its child edges + // └── ○ BAR (2) + + expected = + "○\n" + + "└── ○ BAR (2)\n"; + actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + } + + @Test + public void testRemove_LastRemainingKey() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + tree.put("FOO", 1); + + // ○ + // └── ○ FOO (1) + + String expected, actual; + expected = + "○\n" + + "└── ○ FOO (1)\n"; + actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + + boolean removed = tree.remove("FOO"); + assertTrue(removed); + + // ○ // FOO removed, which involved recreating the root with no remaining edges + + expected = + "○\n"; + actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + } + + @Test + public void testRemove_ZeroChildEdges_OneStepFromRoot() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + tree.put("FOO", 1); + tree.put("FOOBAR", 2); + + // ○ + // └── ○ FOO (1) + // └── ○ BAR (2) + + + String expected, actual; + expected = + "○\n" + + "└── ○ FOO (1)\n" + + " └── ○ BAR (2)\n"; + actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + + boolean removed = tree.remove("FOOBAR"); + assertTrue(removed); + + // ○ + // └── ○ FOO (1) // BAR removed, which involved recreating FOO and re-adding it to root node + + expected = + "○\n" + + "└── ○ FOO (1)\n"; + actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + } + + @Test + public void testRemove_ZeroChildEdges_SeveralStepsFromRoot() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + tree.put("FOO", 1); + tree.put("FOOBAR", 2); + tree.put("FOOBARBAZ", 3); + + // ○ + // └── ○ FOO (1) + // └── ○ BAR (2) + // └── ○ BAZ (3) + + + String expected, actual; + expected = + "○\n" + + "└── ○ FOO (1)\n" + + " └── ○ BAR (2)\n" + + " └── ○ BAZ (3)\n"; + actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + + boolean removed = tree.remove("FOOBARBAZ"); + assertTrue(removed); + + // ○ + // └── ○ FOO (1) + // └── ○ BAR (2) // BAZ removed, which involved recreating BAR and re-adding it to its parent FOO + + expected = + "○\n" + + "└── ○ FOO (1)\n" + + " └── ○ BAR (2)\n"; + actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + } + + @Test + public void testRemove_DoNotRemoveSplitNode() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + tree.put("FOOBAR", 1); + tree.put("FOOD", 2); + + // ○ + // └── ○ FOO // implicit node added automatically + // ├── ○ BAR (1) + // └── ○ D (2) + + + String expected, actual; + expected = + "○\n" + + "└── ○ FOO\n" + + " ├── ○ BAR (1)\n" + + " └── ○ D (2)\n"; + actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + + boolean removed = tree.remove("FOO"); + assertFalse(removed); + + expected = + "○\n" + + "└── ○ FOO\n" + // we expect no change + " ├── ○ BAR (1)\n" + + " └── ○ D (2)\n"; + actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + } + + @Test + public void testRemove_MergeSplitNode() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + tree.put("TEST", 1); + tree.put("TEAM", 2); + tree.put("TOAST", 3); + + String expected, actual; + expected = + "○\n" + + "└── ○ T\n" + + " ├── ○ E\n" + + " │ ├── ○ AM (2)\n" + + " │ └── ○ ST (1)\n" + + " └── ○ OAST (3)\n"; + actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + + boolean removed = tree.remove("TEST"); + assertTrue(removed); + + expected = + "○\n" + + "└── ○ T\n" + + " ├── ○ EAM (2)\n" + + " └── ○ OAST (3)\n"; + actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + } + + @Test + public void testRemove_DoNotMergeSplitNodeWithValue() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + tree.put("TEST", 1); + tree.put("TEAM", 2); + tree.put("TOAST", 3); + tree.put("TE", 4); + + String expected, actual; + expected = + "○\n" + + "└── ○ T\n" + + " ├── ○ E (4)\n" + + " │ ├── ○ AM (2)\n" + + " │ └── ○ ST (1)\n" + + " └── ○ OAST (3)\n"; + actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + + boolean removed = tree.remove("TEST"); + assertTrue(removed); + + expected = + "○\n" + + "└── ○ T\n" + + " ├── ○ E (4)\n" + + " │ └── ○ AM (2)\n" + + " └── ○ OAST (3)\n"; + actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + } + + @Test + public void testRemove_NoSuchKey() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + tree.put("FOO", 1); + tree.put("BAR", 2); + + String expected, actual; + expected = + "○\n" + + "├── ○ BAR (2)\n" + + "└── ○ FOO (1)\n"; + actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + + boolean removed = tree.remove("BAZ"); + assertFalse(removed); + + expected = + "○\n" + // we expect no change + "├── ○ BAR (2)\n" + + "└── ○ FOO (1)\n"; + actual = PrettyPrinter.prettyPrint(tree); + assertEquals(expected, actual); + } + + @Test + public void testGetKeysForPrefix() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + tree.put("TEST", 1); + tree.put("TEAM", 2); + tree.put("TOAST", 3); + tree.put("TEA", 4); + tree.put("COFFEE", 5); + + // ○ + // ├── ○ COFFEE (5) + // └── ○ T + // ├── ○ E + // │ ├── ○ A (4) + // │ │ └── ○ M (2) + // │ └── ○ ST (1) + // └── ○ OAST (3) + + assertEquals("[COFFEE, TEA, TEAM, TEST, TOAST]", Iterables.toString(tree.getKeysStartingWith(""))); + assertEquals("[COFFEE]", Iterables.toString(tree.getKeysStartingWith("C"))); + assertEquals("[COFFEE]", Iterables.toString(tree.getKeysStartingWith("COFFEE"))); + assertEquals("[]", Iterables.toString(tree.getKeysStartingWith("COFFEES"))); + assertEquals("[TEA, TEAM, TEST, TOAST]", Iterables.toString(tree.getKeysStartingWith("T"))); + assertEquals("[TEA, TEAM, TEST]", Iterables.toString(tree.getKeysStartingWith("TE"))); + assertEquals("[TEA, TEAM]", Iterables.toString(tree.getKeysStartingWith("TEA"))); + assertEquals("[TOAST]", Iterables.toString(tree.getKeysStartingWith("TO"))); + } + + @Test + public void testGetClosestKeys() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + tree.put("COD", 1); + tree.put("CODFISH", 2); + tree.put("COFFEE", 3); + + // ○ + // └── ○ CO + // ├── ○ D (1) + // │ └── ○ FISH (2) + // └── ○ FFEE (3) + + assertEquals("[COD, CODFISH, COFFEE]", Iterables.toString(tree.getClosestKeys("COW"))); + assertEquals("[COD, CODFISH, COFFEE]", Iterables.toString(tree.getClosestKeys("CX"))); + assertEquals("[COD, CODFISH]", Iterables.toString(tree.getClosestKeys("COD"))); + assertEquals("[COFFEE]", Iterables.toString(tree.getClosestKeys("COF"))); + assertEquals("[]", Iterables.toString(tree.getClosestKeys("DO"))); + assertEquals("[CODFISH]", Iterables.toString(tree.getClosestKeys("CODFISHES"))); + } + + @Test + public void testGetValuesForClosestKeys() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + tree.put("COD", 1); + tree.put("CODFISH", 2); + tree.put("COFFEE", 3); + + // ○ + // └── ○ CO + // ├── ○ D (1) + // │ └── ○ FISH (2) + // └── ○ FFEE (3) + + assertEquals("[1, 2, 3]", Iterables.toString(tree.getValuesForClosestKeys("COW"))); + assertEquals("[1, 2, 3]", Iterables.toString(tree.getValuesForClosestKeys("CX"))); + assertEquals("[1, 2]", Iterables.toString(tree.getValuesForClosestKeys("COD"))); + assertEquals("[3]", Iterables.toString(tree.getValuesForClosestKeys("COF"))); + assertEquals("[]", Iterables.toString(tree.getValuesForClosestKeys("DO"))); + assertEquals("[2]", Iterables.toString(tree.getValuesForClosestKeys("CODFISHES"))); + } + + @Test + public void testGetKeyValuePairsForClosestKeys() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + tree.put("COD", 1); + tree.put("CODFISH", 2); + tree.put("COFFEE", 3); + + // ○ + // └── ○ CO + // ├── ○ D (1) + // │ └── ○ FISH (2) + // └── ○ FFEE (3) + + assertEquals("[(COD, 1), (CODFISH, 2), (COFFEE, 3)]", Iterables.toString(tree.getKeyValuePairsForClosestKeys("COW"))); + assertEquals("[(COD, 1), (CODFISH, 2), (COFFEE, 3)]", Iterables.toString(tree.getKeyValuePairsForClosestKeys("CX"))); + assertEquals("[(COD, 1), (CODFISH, 2)]", Iterables.toString(tree.getKeyValuePairsForClosestKeys("COD"))); + assertEquals("[(COFFEE, 3)]", Iterables.toString(tree.getKeyValuePairsForClosestKeys("COF"))); + assertEquals("[]", Iterables.toString(tree.getKeyValuePairsForClosestKeys("DO"))); + assertEquals("[(CODFISH, 2)]", Iterables.toString(tree.getKeyValuePairsForClosestKeys("CODFISHES"))); + } + + @Test + public void testKeyValuePair_Accessor() { + KeyValuePair pair = new ConcurrentRadixTree.KeyValuePairImpl("FOO", 5); + assertEquals(pair.getKey(), "FOO"); + assertEquals(pair.getValue(), Integer.valueOf(5)); + assertEquals("(FOO, 5)", pair.toString()); + } + + @Test + public void testKeyValuePair_EqualsAndHashCode() { + KeyValuePair pair1 = new ConcurrentRadixTree.KeyValuePairImpl("FOO", 5); + KeyValuePair pair2 = new ConcurrentRadixTree.KeyValuePairImpl("FOO", 6); + KeyValuePair pair3 = new ConcurrentRadixTree.KeyValuePairImpl("BAR", 5); + assertTrue(pair1.equals(pair1)); + assertTrue(pair1.equals(pair2)); + assertFalse(pair1.equals(pair3)); + //noinspection NullableProblems,ObjectEqualsNull + assertFalse(pair1.equals(null)); + //noinspection EqualsBetweenInconvertibleTypes + assertFalse(pair1.equals("FOO")); + assertTrue(pair1.hashCode() == pair2.hashCode()); + assertFalse(pair1.hashCode() == pair3.hashCode()); + } + + @Test + public void testGetValuesForPrefix() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + tree.put("TEST", 1); + tree.put("TEAM", 2); + tree.put("TOAST", 3); + tree.put("TEA", 4); + tree.put("COFFEE", 5); + + // ○ + // ├── ○ COFFEE (5) + // └── ○ T + // ├── ○ E + // │ ├── ○ A (4) + // │ │ └── ○ M (2) + // │ └── ○ ST (1) + // └── ○ OAST (3) + + assertEquals("[5, 4, 2, 1, 3]", Iterables.toString(tree.getValuesForKeysStartingWith(""))); + assertEquals("[5]", Iterables.toString(tree.getValuesForKeysStartingWith("C"))); + assertEquals("[5]", Iterables.toString(tree.getValuesForKeysStartingWith("COFFEE"))); + assertEquals("[]", Iterables.toString(tree.getValuesForKeysStartingWith("COFFEES"))); + assertEquals("[4, 2, 1, 3]", Iterables.toString(tree.getValuesForKeysStartingWith("T"))); + assertEquals("[4, 2, 1]", Iterables.toString(tree.getValuesForKeysStartingWith("TE"))); + assertEquals("[4, 2]", Iterables.toString(tree.getValuesForKeysStartingWith("TEA"))); + assertEquals("[3]", Iterables.toString(tree.getValuesForKeysStartingWith("TO"))); + } + + @Test + public void testGetKeyValuePairsForPrefix() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + tree.put("TEST", 1); + tree.put("TEAM", 2); + tree.put("TOAST", 3); + tree.put("TEA", 4); + tree.put("COFFEE", 5); + + // ○ + // ├── ○ COFFEE (5) + // └── ○ T + // ├── ○ E + // │ ├── ○ A (4) + // │ │ └── ○ M (2) + // │ └── ○ ST (1) + // └── ○ OAST (3) + + assertEquals("[(COFFEE, 5), (TEA, 4), (TEAM, 2), (TEST, 1), (TOAST, 3)]", Iterables.toString(tree.getKeyValuePairsForKeysStartingWith(""))); + assertEquals("[(COFFEE, 5)]", Iterables.toString(tree.getKeyValuePairsForKeysStartingWith("C"))); + assertEquals("[(COFFEE, 5)]", Iterables.toString(tree.getKeyValuePairsForKeysStartingWith("COFFEE"))); + assertEquals("[]", Iterables.toString(tree.getKeyValuePairsForKeysStartingWith("COFFEES"))); + assertEquals("[(TEA, 4), (TEAM, 2), (TEST, 1), (TOAST, 3)]", Iterables.toString(tree.getKeyValuePairsForKeysStartingWith("T"))); + assertEquals("[(TEA, 4), (TEAM, 2), (TEST, 1)]", Iterables.toString(tree.getKeyValuePairsForKeysStartingWith("TE"))); + assertEquals("[(TEA, 4), (TEAM, 2)]", Iterables.toString(tree.getKeyValuePairsForKeysStartingWith("TEA"))); + assertEquals("[(TOAST, 3)]", Iterables.toString(tree.getKeyValuePairsForKeysStartingWith("TO"))); + } + + @Test + public void testRemove_ArgumentValidation() { + assertThrows(IllegalArgumentException.class, () -> { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + //noinspection NullableProblems + tree.remove(null); + }); + } + + @Test + public void testSearchTree() { + ConcurrentRadixTree tree = new ConcurrentRadixTree(getNodeFactory()); + // Build the tree by hand, as if the following strings were added: B, BA, BAN, BANDANA, BANAN, BANANA + + // ○ + // └── ○ B (1) + // └── ○ A (2) + // └── ○ N (3) + // ├── ○ AN (5) + // │ └── ○ A (6) + // └── ○ DANA (4) + + final Node root, n1, n2, n3, n4, n5, n6; + n6 = getNodeFactory().createNode("A", 6, Collections.emptyList(), false); + n5 = getNodeFactory().createNode("AN", 5, Arrays.asList(n6), false); + n4 = getNodeFactory().createNode("DANA", 4, Collections.emptyList(), false); + n3 = getNodeFactory().createNode("N", 3, Arrays.asList(n4, n5), false); // note: it should sort these such that n5 is first + n2 = getNodeFactory().createNode("A", 2, Arrays.asList(n3), false); + n1 = getNodeFactory().createNode("B", 1, Arrays.asList(n2), false); + //noinspection NullableProblems + root = getNodeFactory().createNode("", null, Arrays.asList(n1), true); + + // Overwrite the tree's default root with the one built by hand... + tree.root = root; + + // Sanity checks to assert that we built tree as expected.... + String expected = + "○\n" + + "└── ○ B (1)\n" + + " └── ○ A (2)\n" + + " └── ○ N (3)\n" + + " ├── ○ AN (5)\n" + + " │ └── ○ A (6)\n" + + " └── ○ DANA (4)\n"; + + assertEquals(expected, PrettyPrinter.prettyPrint(tree)); + assertEquals(2, n3.getOutgoingEdges().size()); + assertTrue(n3.getOutgoingEdges().contains(n4)); + assertTrue(n3.getOutgoingEdges().contains(n5)); + + // Search for non-existing node. Should return root, with null parent, 0 charsMatched... + assertEquals(tree.root, tree.searchTree("Z").nodeFound); + assertNull(tree.searchTree("Z").parentNode); + assertNull(tree.searchTree("Z").parentNodesParent); + assertEquals(0, tree.searchTree("Z").charsMatched); + + // Search for first child node "B". Should return n1, parent should be root, 1 charsMatched... + assertEquals(n1, tree.searchTree("B").nodeFound); + assertEquals(tree.root, tree.searchTree("B").parentNode); + assertEquals(null, tree.searchTree("B").parentNodesParent); + assertEquals(1, tree.searchTree("B").charsMatched); + + // Search for node with split and multi-char child node at "BAN". Should return n3, parent n2, 3 charsMatched... + assertEquals(n3, tree.searchTree("BAN").nodeFound); + assertEquals(n2, tree.searchTree("BAN").parentNode); + assertEquals(n1, tree.searchTree("BAN").parentNodesParent); + assertEquals(3, tree.searchTree("BAN").charsMatched); + + // Search for node with multi-char label (exact match) at "BANAN". Should return n5, parent n3, 5 charsMatched... + assertEquals(n5, tree.searchTree("BANAN").nodeFound); + assertEquals(n3, tree.searchTree("BANAN").parentNode); + assertEquals(n2, tree.searchTree("BANAN").parentNodesParent); + assertEquals(5, tree.searchTree("BANAN").charsMatched); + + // Search for node with multi-char label (inexact match) at "BANA". Should return n5, parent n3, 4 charsMatched... + assertEquals(n5, tree.searchTree("BANA").nodeFound); + assertEquals(n3, tree.searchTree("BANA").parentNode); + assertEquals(n2, tree.searchTree("BANA").parentNodesParent); + assertEquals(4, tree.searchTree("BANA").charsMatched); + + // Search for the last node in "BANANA". Should return n6, parent n5, 6 charsMatched... + assertEquals(n6, tree.searchTree("BANANA").nodeFound); + assertEquals(n5, tree.searchTree("BANANA").parentNode); + assertEquals(n3, tree.searchTree("BANANA").parentNodesParent); + assertEquals(6, tree.searchTree("BANANA").charsMatched); + + // Search for string longer than anything in tree, differing after leaf node "BANANA". + // Should return n6, parent n5, 6 chars matched... + assertEquals(n6, tree.searchTree("BANANAS").nodeFound); + assertEquals(n5, tree.searchTree("BANANAS").parentNode); + assertEquals(n3, tree.searchTree("BANANAS").parentNodesParent); + assertEquals(6, tree.searchTree("BANANAS").charsMatched); + + // Search for string longer than anything in tree, differing before split at "BAN". + // Should return n2, parent n1, 2 chars matched... + assertEquals(n2, tree.searchTree("BAR").nodeFound); + assertEquals(n1, tree.searchTree("BAR").parentNode); + assertEquals(tree.root, tree.searchTree("BAR").parentNodesParent); + assertEquals(2, tree.searchTree("BAR").charsMatched); + + // Search for string longer than anything in tree, differing immediately after split at "BAN". + // Should return n3, parent n2, 3 chars matched... + assertEquals(n3, tree.searchTree("BANS").nodeFound); + assertEquals(n2, tree.searchTree("BANS").parentNode); + assertEquals(n1, tree.searchTree("BANS").parentNodesParent); + assertEquals(3, tree.searchTree("BANS").charsMatched); + + // Search for string longer than anything in tree, differing in multi-char node "BANDANA". + // Should return n4, parent n3, 5 chars matched... + assertEquals(n4, tree.searchTree("BANDAIDS").nodeFound); + assertEquals(n3, tree.searchTree("BANDAIDS").parentNode); + assertEquals(n2, tree.searchTree("BANDAIDS").parentNodesParent); + assertEquals(5, tree.searchTree("BANDAIDS").charsMatched); + } + + @Test + public void testSearchResult_FailureToClassify1() { + assertThrows(IllegalStateException.class, () -> { + // Testing the various (unlikely) ways to fall through classification to have the exception thrown... + new ConcurrentRadixTree.SearchResult("DUMMY", null, 70, 70, null, null); + }); + } + + @Test + public void testSearchResult_FailureToClassify2() { + assertThrows(IllegalStateException.class, () -> { + // Testing the various (unlikely) ways to fall through classification to have the exception thrown... + Node dummyNodeFound = getNodeFactory().createNode("DUMMY", 1, Collections.emptyList(), false); + new ConcurrentRadixTree.SearchResult("DUMMY", dummyNodeFound, 5, 70, null, null); + }); + } + + @Test + public void testSearchResult_FailureToClassify3() { + assertThrows(IllegalStateException.class, () ->{ + // Testing the various (unlikely) ways to fall through classification to have the exception thrown... + Node dummyNodeFound = getNodeFactory().createNode("DUMMY", 1, Collections.emptyList(), false); + new ConcurrentRadixTree.SearchResult("DUMMY", dummyNodeFound, 4, 70, null, null); + }); + } +} diff --git a/datastructures-trie/src/test/java/org/xbib/datastructures/trie/concurrent/PrettyPrinter.java b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/concurrent/PrettyPrinter.java new file mode 100644 index 0000000..e7e30e3 --- /dev/null +++ b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/concurrent/PrettyPrinter.java @@ -0,0 +1,94 @@ +package org.xbib.datastructures.trie.concurrent; + +import org.xbib.datastructures.trie.concurrent.util.Node; + +import java.io.IOException; +import java.util.List; + +/** + * Utility methods to generate semi-graphical string representations of trees. + */ +public class PrettyPrinter { + + /** + * Private constructor, not used. + */ + PrettyPrinter() { + } + + /** + * Generates a semi-graphical string representation of a given tree. + *

+ * Example output:
+ *

+     * ○
+     * └── ○ B (1)
+     *     └── ○ A (2)
+     *         └── ○ N (3)
+     *             ├── ○ AN (5)
+     *             │   └── ○ A (6)
+     *             └── ○ DANA (4)
+     * 
+ * + * @param tree The tree for which the semi-graphical representation should be generated + * @return A semi-graphical string representation of the tree + */ + public static String prettyPrint(ConcurrentRadixTree tree) { + return prettyPrint(tree.getNode()); + } + + public static String prettyPrint(Node node) { + StringBuilder sb = new StringBuilder(); + prettyPrint(node, sb, "", true, true); + return sb.toString(); + } + + /** + * Generates a semi-graphical string representation of a given tree, writing it to a given {@link Appendable}. + *

+ * Example output:
+ *

+     * ○
+     * └── ○ B (1)
+     *     └── ○ A (2)
+     *         └── ○ N (3)
+     *             ├── ○ AN (5)
+     *             │   └── ○ A (6)
+     *             └── ○ DANA (4)
+     * 
+ * + * @param tree The tree for which the semi-graphical representation should be generated + * @param appendable The object to which the tree should be written + */ + public static void prettyPrint(ConcurrentRadixTree tree, Appendable appendable) { + prettyPrint(tree.getNode(), appendable, "", true, true); + } + + static void prettyPrint(Node node, Appendable sb, String prefix, boolean isTail, boolean isRoot) { + try { + StringBuilder label = new StringBuilder(); + if (isRoot) { + label.append("○"); + if (node.getIncomingEdge().length() > 0) { + label.append(" "); + } + } + label.append(node.getIncomingEdge()); + if (node.getValue() != null) { + label.append(" (").append(node.getValue()).append(")"); + } + sb.append(prefix).append(isTail ? isRoot ? "" : "└── ○ " : "├── ○ ").append(label).append("\n"); + List children = node.getOutgoingEdges(); + for (int i = 0; i < children.size() - 1; i++) { + prettyPrint(children.get(i), sb, prefix + (isTail ? isRoot ? "" : " " : "│ "), false, false); + } + if (!children.isEmpty()) { + prettyPrint(children.get(children.size() - 1), sb, prefix + (isTail ? isRoot ? "" : " " : "│ "), true, false); + } + } + catch (IOException ioException) { + // Rethrow the checked exception as a runtime exception... + throw new IllegalStateException(ioException); + } + } +} diff --git a/datastructures-trie/src/test/java/org/xbib/datastructures/trie/regex/RegexTrieTest.java b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/regex/RegexTrieTest.java new file mode 100644 index 0000000..c07416e --- /dev/null +++ b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/regex/RegexTrieTest.java @@ -0,0 +1,26 @@ +package org.xbib.datastructures.trie.regex; + +import org.junit.jupiter.api.Test; + +import java.util.LinkedList; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; + +public class RegexTrieTest { + + @Test + public void testRegexTrie() { + List> captures = new LinkedList<>(); + RegexTrie trie = new RegexTrie<>(); + trie.put(2, List.of("a", "")); + trie.put(4, List.of("a", "b")); + assertEquals(2, trie.resolve(captures, List.of("a", "c", "e"))); + // returns 2. captures is now [[], ["c"], ["e"]] + assertEquals(4, trie.resolve(captures, List.of("a", "b"))); + // returns 4. captures is now [[], []] + assertNull(trie.resolve(captures, List.of("a", "b", "c"))); + // returns null. captures is now [[], []] + } +} diff --git a/datastructures-trie/src/test/java/org/xbib/datastructures/trie/segment/TrieTest.java b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/segment/TrieTest.java index 1577a59..48f4199 100644 --- a/datastructures-trie/src/test/java/org/xbib/datastructures/trie/segment/TrieTest.java +++ b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/segment/TrieTest.java @@ -16,7 +16,7 @@ public class TrieTest { public void testEmptyTrie() { Trie, String> trie = new TrieImpl<>(); TrieKey trieKey = new TrieKeyImpl<>(); - String result = trie.search(trieKey); + String result = trie.get(trieKey); assertNull(result); } @@ -24,11 +24,11 @@ public class TrieTest { public void testEmptyKey() { Trie, Integer> trie = new TrieImpl<>(); TrieKey trieKey = new TrieKeyImpl<>(); - trie.add(trieKey, 100); - Integer result = trie.search(trieKey); + trie.put(trieKey, 100); + Integer result = trie.get(trieKey); assertEquals(result, (Integer) 100); - trie.add(trieKey, 200); - result = trie.search(trieKey); + trie.put(trieKey, 200); + result = trie.get(trieKey); assertEquals(result, (Integer) 200); } @@ -36,8 +36,8 @@ public class TrieTest { public void testSingletonTrie() { Trie, String> trie = new TrieImpl<>(); TrieKey trieKey = TrieKeyImpl.stringKey("key"); - trie.add(trieKey, "value"); - String result = trie.search(trieKey); + trie.put(trieKey, "value"); + String result = trie.get(trieKey); assertNotEquals(result, "key"); } @@ -50,11 +50,11 @@ public class TrieTest { Long value = random.nextLong(); String key = value.toString(); TrieKey trieKey = TrieKeyImpl.stringKey(key); - trie.add(trieKey, value); + trie.put(trieKey, value); keys.add(trieKey); } for (TrieKey key : keys) { - Long value = trie.search(key); + Long value = trie.get(key); assertEquals(key.toString(), value.toString()); } } diff --git a/settings.gradle b/settings.gradle index 35adf57..081eb2a 100644 --- a/settings.gradle +++ b/settings.gradle @@ -42,6 +42,7 @@ include 'datastructures-json-dsl' include 'datastructures-json-flat' include 'datastructures-json-iterator' include 'datastructures-json-micro' +include 'datastructures-json-mini' include 'datastructures-json-minimal' include 'datastructures-json-noggit' include 'datastructures-json-simple'