Merge branch 'main' of alkmene:joerg/datastructures

This commit is contained in:
Jörg Prante 2022-06-22 16:51:32 +02:00
commit 7a4716dd7b
96 changed files with 8938 additions and 1397 deletions

View file

@ -0,0 +1,5 @@
dependencies {
testImplementation(libs.mockito.core) {
exclude group: 'org.hamcrest'
}
}

View file

@ -0,0 +1,12 @@
module org.xbib.datastructures.trie {
exports org.xbib.datastructures.trie.ahocorasick;
exports org.xbib.datastructures.trie.compact;
exports org.xbib.datastructures.trie.limewire;
exports org.xbib.datastructures.trie.patricia;
exports org.xbib.datastructures.trie.radix;
exports org.xbib.datastructures.trie.radix.adaptive;
exports org.xbib.datastructures.trie.radix.adaptive.persistent;
exports org.xbib.datastructures.trie.radix.pruning;
exports org.xbib.datastructures.trie.regex;
exports org.xbib.datastructures.trie.simple;
}

View file

@ -0,0 +1,22 @@
package org.xbib.datastructures.trie.ahocorasick;
/***
* This class holds a text ("the fragment") and emits some output. If
* {@link #isMatch()} returns {@code true}, the token matched a search.
*
* @param <T> The Type of the emitted payloads.
*/
public abstract class AbstractToken<T> implements Token<T> {
private final String fragment;
public AbstractToken(String fragment) {
this.fragment = fragment;
}
@Override
public String getFragment() {
return this.fragment;
}
}

View file

@ -0,0 +1,7 @@
package org.xbib.datastructures.trie.ahocorasick;
import java.util.List;
public interface CollectingOutputHandler<T> extends OutputHandler<T> {
List<EntryOutput<T>> getOutputs();
}

View file

@ -0,0 +1,20 @@
package org.xbib.datastructures.trie.ahocorasick;
import java.util.ArrayList;
import java.util.List;
public class DefaultOutputHandler<T> implements CollectingOutputHandler<T> {
private final List<EntryOutput<T>> outputs = new ArrayList<>();
@Override
public boolean output(EntryOutput<T> emit) {
outputs.add(emit);
return true;
}
@Override
public List<EntryOutput<T>> getOutputs() {
return outputs;
}
}

View file

@ -0,0 +1,6 @@
package org.xbib.datastructures.trie.ahocorasick;
public enum Direction {
LEFT,
RIGHT
}

View file

@ -0,0 +1,32 @@
package org.xbib.datastructures.trie.ahocorasick;
/**
* An entry, a key with a value
*
* @param <T> The type of the value.
*/
public class Entry<T> implements Comparable<Entry<T>> {
private final String key;
private final T value;
public Entry(String key, T value) {
super();
this.key = key;
this.value = value;
}
public String getKey() {
return key;
}
public T getValue() {
return value;
}
@Override
public int compareTo(Entry<T> other) {
return key.compareTo(other.getKey());
}
}

View file

@ -0,0 +1,32 @@
package org.xbib.datastructures.trie.ahocorasick;
/**
* This class is a match, for output.
*
* @param <T> Type of the value
*/
public class EntryOutput<T> extends Interval {
private final String key;
private final T value;
public EntryOutput(int start, int end, String key, T value) {
super(start, end);
this.key = key;
this.value = value;
}
public String getKey() {
return key;
}
public T getValue() {
return value;
}
@Override
public String toString() {
return super.toString() + "=" + key + (value != null ? "->" + value : "");
}
}

View file

@ -0,0 +1,25 @@
package org.xbib.datastructures.trie.ahocorasick;
/***
* Class for a token ("the fragment") that can emit an entry.
* This token indicates a matching search term was not found, so
* {@link #isMatch()} always returns {@code false}.
*
* @param <T> The Type of the emitted payloads.
*/
public class FragmentToken<T> extends AbstractToken<T> {
public FragmentToken(String fragment) {
super(fragment);
}
@Override
public boolean isMatch() {
return false;
}
@Override
public EntryOutput<T> getOutput() {
return null;
}
}

View file

@ -0,0 +1,57 @@
package org.xbib.datastructures.trie.ahocorasick;
import java.util.Objects;
/**
* Responsible for tracking the start and end bounds.
*/
public class Interval implements Comparable<Interval> {
private final int start;
private final int end;
public Interval(int start, int end) {
this.start = start;
this.end = end;
}
public int getStart() {
return start;
}
public int getEnd() {
return end;
}
public boolean overlapsWith(final Interval other) {
return start <= other.getEnd() && end >= other.getStart();
}
public boolean overlapsWith(int point) {
return start <= point && point <= end;
}
@Override
public int compareTo(Interval other) {
int comparison = start - other.getStart();
return comparison != 0 ? comparison : end - other.getEnd();
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
Interval interval = (Interval) o;
return start == interval.start && end == interval.end;
}
@Override
public int hashCode() {
return Objects.hash(start, end);
}
}

View file

@ -0,0 +1,110 @@
package org.xbib.datastructures.trie.ahocorasick;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class IntervalNode {
private IntervalNode left;
private IntervalNode right;
private final int point;
private final List<Interval> intervals;
public IntervalNode(List<Interval> intervals) {
this.intervals = new ArrayList<>();
this.point = determineMedian(intervals);
List<Interval> toLeft = new ArrayList<>();
List<Interval> toRight = new ArrayList<>();
for (Interval interval : intervals) {
if (interval.getEnd() < point) {
toLeft.add(interval);
} else if (interval.getStart() > point) {
toRight.add(interval);
} else {
this.intervals.add(interval);
}
}
if (toLeft.size() > 0) {
left = new IntervalNode(toLeft);
}
if (toRight.size() > 0) {
right = new IntervalNode(toRight);
}
}
public int determineMedian(List<Interval> intervals) {
int start = -1;
int end = -1;
for (Interval interval : intervals) {
int currentStart = interval.getStart();
int currentEnd = interval.getEnd();
if (start == -1 || currentStart < start) {
start = currentStart;
}
if (end == -1 || currentEnd > end) {
end = currentEnd;
}
}
return (start + end) / 2;
}
public List<Interval> findOverlaps(Interval interval) {
List<Interval> overlaps = new ArrayList<>();
if (point < interval.getStart()) {
addToOverlaps(interval, overlaps, findOverlappingRanges(right, interval));
addToOverlaps(interval, overlaps, checkForOverlapsToTheRight(interval));
} else if (point > interval.getEnd()) {
addToOverlaps(interval, overlaps, findOverlappingRanges(left, interval));
addToOverlaps(interval, overlaps, checkForOverlapsToTheLeft(interval));
} else {
addToOverlaps(interval, overlaps, intervals);
addToOverlaps(interval, overlaps, findOverlappingRanges(left, interval));
addToOverlaps(interval, overlaps, findOverlappingRanges(right, interval));
}
return overlaps;
}
protected void addToOverlaps(Interval interval, List<Interval> overlaps, List<Interval> newOverlaps) {
for (Interval currentInterval : newOverlaps) {
if (!currentInterval.equals(interval)) {
overlaps.add(currentInterval);
}
}
}
protected List<Interval> checkForOverlapsToTheLeft(Interval interval) {
return checkForOverlaps(interval, Direction.LEFT);
}
protected List<Interval> checkForOverlapsToTheRight(Interval interval) {
return checkForOverlaps(interval, Direction.RIGHT);
}
protected List<Interval> checkForOverlaps(Interval interval, Direction direction) {
List<Interval> overlaps = new ArrayList<>();
for (Interval currentInterval : intervals) {
switch (direction) {
case LEFT:
if (currentInterval.getStart() <= interval.getEnd()) {
overlaps.add(currentInterval);
}
break;
case RIGHT:
if (currentInterval.getEnd() >= interval.getStart()) {
overlaps.add(currentInterval);
}
break;
}
}
return overlaps;
}
protected List<Interval> findOverlappingRanges(IntervalNode node, Interval interval) {
return node == null ? Collections.emptyList() : node.findOverlaps(interval);
}
}

View file

@ -0,0 +1,42 @@
package org.xbib.datastructures.trie.ahocorasick;
import java.util.Comparator;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
public class IntervalTree {
private final IntervalNode rootNode;
public IntervalTree(List<Interval> intervals) {
this.rootNode = new IntervalNode(intervals);
}
public List<Interval> removeOverlaps(List<Interval> intervals) {
intervals.sort((i1, i2) -> {
int i = (i2.getEnd() - i2.getStart() + 1) - (i1.getEnd() - i1.getStart() + 1);
if (i == 0) {
i = i1.getStart() - i2.getStart();
}
return i;
});
Set<Interval> removeIntervals = new TreeSet<>();
for (final Interval interval : intervals) {
if (removeIntervals.contains(interval)) {
continue;
}
removeIntervals.addAll(findOverlaps(interval));
}
for (final Interval removeInterval : removeIntervals) {
intervals.remove(removeInterval);
}
intervals.sort(Comparator.comparingInt(Interval::getStart));
return intervals;
}
public List<Interval> findOverlaps(Interval interval) {
return rootNode.findOverlaps(interval);
}
}

View file

@ -0,0 +1,28 @@
package org.xbib.datastructures.trie.ahocorasick;
/**
* Class for a token ("the fragment") that can emit an entry.
* This token indicates a match, so {@link #isMatch()}
* always returns {@code true}.
*
* @param <T> The type of the emitted entry value.
*/
public class MatchToken<T> extends AbstractToken<T> {
private final EntryOutput<T> output;
public MatchToken(String fragment, EntryOutput<T> output) {
super(fragment);
this.output = output;
}
@Override
public boolean isMatch() {
return true;
}
@Override
public EntryOutput<T> getOutput() {
return output;
}
}

View file

@ -0,0 +1,7 @@
package org.xbib.datastructures.trie.ahocorasick;
@FunctionalInterface
public interface OutputHandler<T> {
boolean output(EntryOutput<T> entryOutput);
}

View file

@ -0,0 +1,108 @@
package org.xbib.datastructures.trie.ahocorasick;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
/**
* A state has various important tasks it must attend to:
* <ul>
* <li>success; when a character points to another state, it must return that
* state</li>
* <li>failure; when a character has no matching state, the algorithm must be
* able to fall back on a state with less depth</li>
* <li>emits; when this state is passed and keys have been matched, the
* matches and their values must be output so they can be used later
* on.</li>
* </ul>
* The root state is special in the sense that it has no failure state; it
* cannot fail. If it 'fails' it will still parse the next character and start
* from the root node. This ensures that the algorithm always runs. All other
* states always have a fail state.
*/
public class State<T> {
private final int depth;
private final State<T> rootState;
private final Map<Character, State<T>> success;
private final Set<Entry<T>> entries;
private State<T> failure;
public State() {
this(0);
}
public State(final int depth) {
this.depth = depth;
rootState = depth == 0 ? this : null;
success = new HashMap<>();
entries = new TreeSet<>();
}
private State<T> nextState(final Character character, final boolean ignoreRootState) {
State<T> nextState = this.success.get(character);
if (!ignoreRootState && nextState == null && this.rootState != null) {
nextState = this.rootState;
}
return nextState;
}
public State<T> nextState(final Character character) {
return nextState(character, false);
}
public State<T> nextStateIgnoreRootState(Character character) {
return nextState(character, true);
}
public State<T> addState(Character character) {
State<T> nextState = nextStateIgnoreRootState(character);
if (nextState == null) {
nextState = new State<>(this.depth + 1);
this.success.put(character, nextState);
}
return nextState;
}
public int getDepth() {
return this.depth;
}
public void add(Entry<T> entry) {
entries.add(entry);
}
public void add(Collection<Entry<T>> emits) {
for (Entry<T> emit : emits) {
add(emit);
}
}
public Collection<Entry<T>> entries() {
return entries;
}
public State<T> failure() {
return this.failure;
}
public void setFailure(State<T> failState) {
this.failure = failState;
}
public Collection<State<T>> getStates() {
return this.success.values();
}
public Collection<Character> getTransitions() {
return this.success.keySet();
}
}

View file

@ -0,0 +1,10 @@
package org.xbib.datastructures.trie.ahocorasick;
public interface Token<T> {
String getFragment();
boolean isMatch();
EntryOutput<T> getOutput();
}

View file

@ -0,0 +1,257 @@
package org.xbib.datastructures.trie.ahocorasick;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
/**
* A trie implementation.
*
* @param <T> The type of the supplied of the payload.
*/
public class Trie<T> {
private final TrieConfig trieConfig;
private final State<T> rootState;
protected Trie(TrieConfig trieConfig) {
this.trieConfig = trieConfig;
this.rootState = new State<>();
}
public static <T> Builder<T> builder() {
return new Builder<>();
}
public Collection<Token<T>> tokenize(String text) {
Collection<Token<T>> tokens = new LinkedList<>();
Collection<EntryOutput<T>> outputs = parse(text);
int lastCollectedPosition = -1;
for (EntryOutput<T> output : outputs) {
if (output.getStart() - lastCollectedPosition > 1) {
tokens.add(createFragment(output, text, lastCollectedPosition));
}
tokens.add(createMatch(output, text));
lastCollectedPosition = output.getEnd();
}
if (text.length() - lastCollectedPosition > 1) {
tokens.add(createFragment(null, text, lastCollectedPosition));
}
return tokens;
}
public Collection<EntryOutput<T>> parse(CharSequence text) {
return parse(text, new DefaultOutputHandler<>());
}
@SuppressWarnings("unchecked")
public Collection<EntryOutput<T>> parse(CharSequence text, CollectingOutputHandler<T> handler) {
parse(text, (OutputHandler<T>) handler);
List<EntryOutput<T>> outputs = handler.getOutputs();
if (!trieConfig.isAllowOverlaps()) {
IntervalTree intervalTree = new IntervalTree((List<Interval>) (List<?>) outputs);
intervalTree.removeOverlaps((List<Interval>) (List<?>) outputs);
}
return outputs;
}
public void parse(CharSequence text, OutputHandler<T> outputHandler) {
State<T> currentState = getRootState();
for (int position = 0; position < text.length(); position++) {
char character = text.charAt(position);
if (trieConfig.isCaseInsensitive()) {
character = Character.toLowerCase(character);
}
currentState = getState(currentState, character);
Collection<Entry<T>> entries = currentState.entries();
if (processOutputs(text, position, entries, outputHandler) && trieConfig.isStopOnHit()) {
return;
}
}
}
public boolean match(CharSequence text) {
return firstMatch(text) != null;
}
public EntryOutput<T> firstMatch(CharSequence text) {
if (!trieConfig.isAllowOverlaps()) {
Collection<EntryOutput<T>> parseText = parse(text);
if (parseText != null && !parseText.isEmpty()) {
return parseText.iterator().next();
}
} else {
State<T> currentState = getRootState();
for (int i = 0; i < text.length(); i++) {
char character = text.charAt(i);
if (trieConfig.isCaseInsensitive()) {
character = Character.toLowerCase(character);
}
currentState = getState(currentState, character);
Collection<Entry<T>> entries = currentState.entries();
if (entries != null && !entries.isEmpty()) {
for (Entry<T> entry : entries) {
EntryOutput<T> output =
new EntryOutput<>(i - entry.getKey().length() + 1, i, entry.getKey(), entry.getValue());
if (trieConfig.isOnlyWholeWords()) {
if (!isPartialMatch(text, output)) {
return output;
}
} else {
return output;
}
}
}
}
}
return null;
}
private Token<T> createFragment(EntryOutput<T> output, String text, int lastCollectedPosition) {
return new FragmentToken<>(text.substring(lastCollectedPosition + 1, output == null ? text.length() : output.getStart()));
}
private Token<T> createMatch(EntryOutput<T> output, String text) {
return new MatchToken<>(text.substring(output.getStart(), output.getEnd() + 1), output);
}
private State<T> addState(String key) {
State<T> state = getRootState();
for (Character character : key.toCharArray()) {
Character adjustedChar = trieConfig.isCaseInsensitive() ? Character.toLowerCase(character) : character;
state = state.addState(adjustedChar);
}
return state;
}
private boolean isPartialMatch(CharSequence searchText, EntryOutput<T> output) {
return (output.getStart() != 0 && Character.isAlphabetic(searchText.charAt(output.getStart() - 1)))
|| (output.getEnd() + 1 != searchText.length() && Character.isAlphabetic(searchText.charAt(output.getEnd() + 1)));
}
private boolean isPartialMatchWhiteSpaceSeparated(CharSequence searchText, EntryOutput<T> output) {
long size = searchText.length();
return (output.getStart() != 0 && !Character.isWhitespace(searchText.charAt(output.getStart() - 1)))
|| (output.getEnd() + 1 != size && !Character.isWhitespace(searchText.charAt(output.getEnd() + 1)));
}
private State<T> getState(State<T> currentState, Character character) {
State<T> newCurrentState = currentState.nextState(character);
while (newCurrentState == null) {
currentState = currentState.failure();
newCurrentState = currentState.nextState(character);
}
return newCurrentState;
}
private void constructFailureStates() {
Queue<State<T>> queue = new LinkedList<>();
State<T> startState = getRootState();
for (State<T> depthOneState : startState.getStates()) {
depthOneState.setFailure(startState);
queue.add(depthOneState);
}
while (!queue.isEmpty()) {
State<T> currentState = queue.remove();
for (Character transition : currentState.getTransitions()) {
State<T> targetState = currentState.nextState(transition);
queue.add(targetState);
State<T> traceFailureState = currentState.failure();
while (traceFailureState.nextState(transition) == null) {
traceFailureState = traceFailureState.failure();
}
State<T> newFailureState = traceFailureState.nextState(transition);
targetState.setFailure(newFailureState);
targetState.add(newFailureState.entries());
}
}
}
private boolean processOutputs(CharSequence text,
int position,
Collection<Entry<T>> entries,
OutputHandler<T> outputHandler) {
boolean output = false;
for (Entry<T> entry : entries) {
EntryOutput<T> entryOutput =
new EntryOutput<>(position - entry.getKey().length() + 1, position, entry.getKey(), entry.getValue());
if (!(trieConfig.isOnlyWholeWords() && isPartialMatch(text, entryOutput)) &&
!(trieConfig.isOnlyWholeWordsWhiteSpaceSeparated() &&
isPartialMatchWhiteSpaceSeparated(text, entryOutput))) {
output = outputHandler.output(entryOutput) || output;
if (output && trieConfig.isStopOnHit()) {
break;
}
}
}
return output;
}
private State<T> getRootState() {
return rootState;
}
public static class Builder<T> {
private final TrieConfig trieConfig;
private final Trie<T> trie;
private Builder() {
trieConfig = new TrieConfig();
trie = new Trie<>(trieConfig);
}
public Builder<T> ignoreCase() {
trieConfig.setCaseInsensitive(true);
return this;
}
public Builder<T> ignoreOverlaps() {
trieConfig.setAllowOverlaps(false);
return this;
}
public Builder<T> onlyWholeWords() {
trieConfig.setOnlyWholeWords(true);
return this;
}
public Builder<T> onlyWholeWordsWhiteSpaceSeparated() {
trieConfig.setOnlyWholeWordsWhiteSpaceSeparated(true);
return this;
}
public Builder<T> stopOnHit() {
trie.trieConfig.setStopOnHit(true);
return this;
}
public Builder<T> add(String key) {
add(key, null);
return this;
}
public Builder<T> add(String key, T value) {
if (key == null || key.isEmpty()) {
return this;
}
trie.addState(key).add(new Entry<>(key, value));
return this;
}
public Builder<T> add(Collection<Entry<T>> keys) {
for (Entry<T> entry : keys) {
add(entry.getKey(), entry.getValue());
}
return this;
}
public Trie<T> build() {
trie.constructFailureStates();
return this.trie;
}
}
}

View file

@ -0,0 +1,54 @@
package org.xbib.datastructures.trie.ahocorasick;
public class TrieConfig {
private boolean allowOverlaps = true;
private boolean onlyWholeWords = false;
private boolean onlyWholeWordsWhiteSpaceSeparated = false;
private boolean caseInsensitive = false;
private boolean stopOnHit = false;
public boolean isStopOnHit() {
return stopOnHit;
}
public void setStopOnHit(boolean stopOnHit) {
this.stopOnHit = stopOnHit;
}
public boolean isAllowOverlaps() {
return allowOverlaps;
}
public void setAllowOverlaps(boolean allowOverlaps) {
this.allowOverlaps = allowOverlaps;
}
public boolean isOnlyWholeWords() {
return onlyWholeWords;
}
public void setOnlyWholeWords(boolean onlyWholeWords) {
this.onlyWholeWords = onlyWholeWords;
}
public boolean isOnlyWholeWordsWhiteSpaceSeparated() {
return onlyWholeWordsWhiteSpaceSeparated;
}
public void setOnlyWholeWordsWhiteSpaceSeparated(boolean onlyWholeWordsWhiteSpaceSeparated) {
this.onlyWholeWordsWhiteSpaceSeparated = onlyWholeWordsWhiteSpaceSeparated;
}
public boolean isCaseInsensitive() {
return caseInsensitive;
}
public void setCaseInsensitive(boolean caseInsensitive) {
this.caseInsensitive = caseInsensitive;
}
}

View file

@ -0,0 +1,10 @@
/**
* Taken from
*
* https://github.com/robert-bor/aho-corasick
*
* Apache License
* Version 2.0, January 2004
* http://www.apache.org/licenses/
*/
package org.xbib.datastructures.trie.ahocorasick;

View file

@ -0,0 +1,6 @@
/**
* Taken from
*
* https://leetcode.com/problems/implement-trie-prefix-tree/discuss/467046/Java-Radix-tree-(compact-prefix-tree)-beats-99.7-runtime-and-100-memory
*/
package org.xbib.datastructures.trie.compact;

View file

@ -0,0 +1,13 @@
package org.xbib.datastructures.trie.limewire;
/**
* An abstract implementation of {@link KeyAnalyzer}.
*/
public abstract class AbstractKeyAnalyzer<K> implements KeyAnalyzer<K> {
@SuppressWarnings("unchecked")
@Override
public int compare(K o1, K o2) {
return ((Comparable<K>) o1).compareTo(o2);
}
}

View file

@ -0,0 +1,48 @@
package org.xbib.datastructures.trie.limewire;
import java.util.Map;
/**
* An interface used by a {@link Trie}. A {@link Trie} selects items by
* closeness and passes the items to the <code>Cursor</code>. You can then
* decide what to do with the key-value pair and the return value
* from {@link #select(java.util.Map.Entry)} tells the <code>Trie</code>
* what to do next.
* <p>
* <code>Cursor</code> returns status/selection status might be:
* <table cellspace="5">
* <tr><td><b>Return Value</b></td><td><b>Status</b></td></tr>
* <tr><td>EXIT</td><td>Finish the Trie operation</td></tr>
* <tr><td>CONTINUE</td><td>Look at the next element in the traversal</td></tr>
* <tr><td>REMOVE_AND_EXIT</td><td>Remove the entry and stop iterating</td></tr>
* <tr><td>REMOVE</td><td>Remove the entry and continue iterating</td></tr>
* </table>
* <p>
* Note: {@link Trie#select(Object, Cursor)} does
* not support <code>REMOVE</code>.
*
* @param <K> Key Type
* @param <V> Key Value
*/
public interface Cursor<K, V> {
/**
* Notification that the Trie is currently looking at the given entry.
* Return <code>EXIT</code> to finish the Trie operation,
* <code>CONTINUE</code> to look at the next entry, <code>REMOVE</code>
* to remove the entry and continue iterating, or
* <code>REMOVE_AND_EXIT</code> to remove the entry and stop iterating.
* Not all operations support <code>REMOVE</code>.
*/
SelectStatus select(Map.Entry<? extends K, ? extends V> entry);
/**
* The mode during selection.
*/
enum SelectStatus {
EXIT,
CONTINUE,
REMOVE,
REMOVE_AND_EXIT
}
}

View file

@ -0,0 +1,40 @@
package org.xbib.datastructures.trie.limewire;
/**
* An implementation of {@link KeyAnalyzer}
* that assumes all keys have the {@link Key} interface implemented.
*/
public class DefaultKeyAnalyzer<K extends Key<K>> extends AbstractKeyAnalyzer<K> {
@SuppressWarnings("rawtypes")
private static final DefaultKeyAnalyzer INSTANCE = new DefaultKeyAnalyzer();
@SuppressWarnings("unchecked")
public static <K> KeyAnalyzer<K> singleton() {
return (KeyAnalyzer<K>) INSTANCE;
}
@Override
public int lengthInBits(K key) {
return key.lengthInBits();
}
@Override
public boolean isBitSet(K key, int keyLength, int bitIndex) {
return key.isBitSet(bitIndex);
}
@Override
public int bitIndex(K key, int keyStart, int keyLength, K found, int foundStart, int foundLength) {
return key.bitIndex(found);
}
@Override
public int bitsPerElement() {
return 16;
}
@Override
public boolean isPrefix(K prefix, int offset, int length, K key) {
return key.isPrefixedBy(prefix);
}
}

View file

@ -1,44 +0,0 @@
package org.xbib.datastructures.trie.limewire;
import java.util.Iterator;
import java.util.NoSuchElementException;
/**
* Provides an unmodifiable empty iterator. <code>EmptyIterator</code> always
* returns that there aren't any more items and throws a
* {@link NoSuchElementException} when attempting to move to the next item.
*
* <pre>
* try{
* EmptyIterator ei = new EmptyIterator();
* ei.next();
* } catch (Exception e) {
* System.out.println("Expected to get NoSuchElementException exception: " + e.toString());
* }
*
* Output:
* Expected to get NoSuchElementException exception: java.util.NoSuchElementException
* </pre>
*/
public class EmptyIterator extends UnmodifiableIterator {
/**
* A constant EmptyIterator.
*/
public final static Iterator EMPTY_ITERATOR = new EmptyIterator();
@SuppressWarnings("unchecked")
public static <T> Iterator<T> emptyIterator() {
return EMPTY_ITERATOR;
}
// inherits javadoc comment
public boolean hasNext() {
return false;
}
// inherits javadoc comment
public Object next() {
throw new NoSuchElementException();
}
}

View file

@ -0,0 +1,30 @@
package org.xbib.datastructures.trie.limewire;
/**
* An interface that {@link PatriciaTrie} keys may implement.
*
* @see KeyAnalyzer
* @see DefaultKeyAnalyzer
*/
public interface Key<K> {
/**
* Returns the key's length in bits.
*/
int lengthInBits();
/**
* Returns {@code true} if the given bit is set.
*/
boolean isBitSet(int bitIndex);
/**
* Returns the index of the first bit that is different in the two keys.
*/
int bitIndex(K otherKey);
/**
* Returns {@code true} if this key is prefixed by the given key.
*/
boolean isPrefixedBy(K prefix);
}

View file

@ -0,0 +1,64 @@
package org.xbib.datastructures.trie.limewire;
import java.util.Comparator;
/**
* Defines the interface to analyze {@link Trie} keys on a bit
* level. <code>KeyAnalyzer</code>'s
* methods return the length of the key in bits, whether or not a bit is
* set, and bits per element in the key.
* <p>
* Additionally, a method determines if a key is a prefix of another key and
* returns the bit index where one key is different from another key (if
* the key and found key are equal than the return value is EQUAL_BIT_KEY).
* <p>
* <code>KeyAnalyzer</code> defines:<br>
* <table cellspace="5">
* <tr><td>NULL_BIT_KEY</td><td>When key's bits are all zero</td></tr>
* <tr><td> EQUAL_BIT_KEY </td><td>When keys are the same </td></tr>
* </table>
*/
public interface KeyAnalyzer<K> extends Comparator<K> {
/**
* Returned by bitIndex if key's bits are all 0.
*/
int NULL_BIT_KEY = -1;
/**
* Returned by bitIndex if key and found key are
* equal. This is a very very specific case and
* shouldn't happen on a regular basis.
*/
int EQUAL_BIT_KEY = -2;
/**
* Returns the length of the Key in bits.
*/
int lengthInBits(K key);
/**
* Returns whether or not a bit is set.
*/
boolean isBitSet(K key, int keyLength, int bitIndex);
/**
* Returns the n-th different bit between key and found.
* This starts the comparison in key at 'keyStart' and goes
* for 'keyLength' bits, and compares to the found key
* starting at 'foundStart' and going for 'foundLength' bits.
*/
int bitIndex(K key, int keyStart, int keyLength, K found, int foundStart, int foundLength);
/**
* Returns the number of bits per element in the key.
* This is only useful for variable-length keys, such as Strings.
*/
int bitsPerElement();
/**
* Determines whether or not the given prefix (from offset to length)
* is a prefix of the given key.
*/
boolean isPrefix(K prefix, int offset, int length, K key);
}

View file

@ -4,11 +4,13 @@ import java.util.AbstractCollection;
import java.util.AbstractMap;
import java.util.AbstractSet;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.ConcurrentModificationException;
import java.util.Iterator;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.Set;
import java.util.SortedMap;
@ -36,7 +38,7 @@ import java.util.SortedMap;
* closeness is determined by the {@link KeyAnalyzer} returning true or
* false for a bit being set or not in a given key.
* <p>
* This PATRICIA Trie supports both variable length & fixed length keys.
* This PATRICIA Trie supports both variable length and fixed length keys.
* Some methods, such as <code>getPrefixedBy(...)</code> are suited only to
* variable length keys, whereas <code>getPrefixedByBits(...)</code> is suited
* to fixed-size keys.
@ -50,7 +52,7 @@ import java.util.SortedMap;
* (and it isn't K).
*
* <pre>
* PatriciaTrie<String, String> trie = new PatriciaTrie<String, String>
* PatriciaTrie&lt;String, String&gt; trie = new PatriciaTrie&lt;String, String&gt;
* (new CharSequenceKeyAnalyzer());
*
* trie.put("Lime", "Lime");
@ -113,7 +115,7 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
* Returns true if bitIndex is a valid index
*/
private static boolean isValidBitIndex(int bitIndex) {
return 0 <= bitIndex && bitIndex <= Integer.MAX_VALUE;
return 0 <= bitIndex;
}
/**
@ -209,7 +211,7 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
}
/**
* Adds a new <key, value> pair to the Trie and if a pair already
* Adds a new &lt;key, value&gt; pair to the Trie and if a pair already
* exists it will be replaced. In the latter case it will return
* the old value.
*/
@ -394,10 +396,10 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
* Returns the Value whose Key has the longest prefix
* in common with our lookup key.
*/
@SuppressWarnings("unchecked")
@SuppressWarnings({"unchecked","rawtypes"})
public V select(K key) {
int keyLength = length(key);
TrieEntry[] result = new TrieEntry[1];
TrieEntry<K, V>[] result = new TrieEntry[1];
if (!selectR(root.left, -1, key, keyLength, result)) {
TrieEntry<K, V> e = result[0];
return e.getValue();
@ -411,8 +413,7 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
* Entry from the Trie.
*/
private boolean selectR(TrieEntry<K, V> h, int bitIndex,
final K key, final int keyLength, final TrieEntry[] result) {
final K key, final int keyLength, final TrieEntry<K, V>[] result) {
if (h.bitIndex <= bitIndex) {
// If we hit the root Node and it is empty
// we have to look for an alternative best
@ -423,7 +424,6 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
}
return true;
}
if (!isBitSet(key, keyLength, h.bitIndex)) {
if (selectR(h.left, h.bitIndex, key, keyLength, result)) {
return selectR(h.right, h.bitIndex, key, keyLength, result);
@ -436,10 +436,10 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
return false;
}
@SuppressWarnings("unchecked")
@SuppressWarnings({"unchecked","rawtypes"})
public Map.Entry<K, V> select(K key, Cursor<? super K, ? super V> cursor) {
int keyLength = length(key);
TrieEntry[] result = new TrieEntry[]{null};
TrieEntry<K, V>[] result = new TrieEntry[] { null };
selectR(root.left, -1, key, keyLength, cursor, result);
return result[0];
}
@ -448,8 +448,7 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
final K key,
final int keyLength,
final Cursor<? super K, ? super V> cursor,
final TrieEntry[] result) {
final TrieEntry<K, V>[] result) {
if (h.bitIndex <= bitIndex) {
if (!h.isEmpty()) {
Cursor.SelectStatus ret = cursor.select(h);
@ -470,7 +469,6 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
}
return true; // continue
}
if (!isBitSet(key, keyLength, h.bitIndex)) {
if (selectR(h.left, h.bitIndex, key, keyLength, cursor, result)) {
return selectR(h.right, h.bitIndex, key, keyLength, cursor, result);
@ -480,7 +478,6 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
return selectR(h.left, h.bitIndex, key, keyLength, cursor, result);
}
}
return false;
}
@ -495,7 +492,7 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
* a lookup of 'Lime' would return 'Lime', 'LimeRadio', and 'LimeWire'.
* <p>
* The view that this returns is optimized to have a very efficient
* Iterator. The firstKey, lastKey & size methods must iterate
* Iterator. The firstKey, lastKey &amp; size methods must iterate
* over all possible values in order to determine the results. This
* information is cached until the Patricia tree changes. All other
* methods (except Iterator) must compare the given key to the prefix
@ -505,7 +502,7 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
* Changing the subtree takes O(K) time.
*/
public SortedMap<K, V> getPrefixedBy(K key) {
return getPrefixedByBits(key, 0, keyAnalyzer.length(key));
return getPrefixedByBits(key, 0, keyAnalyzer.lengthInBits(key));
}
/**
@ -521,7 +518,7 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
* return 'Lime', 'LimeRadio', and 'LimeWire'.
* <p>
* The view that this returns is optimized to have a very efficient
* Iterator. The firstKey, lastKey & size methods must iterate
* Iterator. The firstKey, lastKey &amp; size methods must iterate
* over all possible values in order to determine the results. This
* information is cached until the Patricia tree changes. All other
* methods (except Iterator) must compare the given key to the prefix
@ -547,7 +544,7 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
* length of 4 would return 'Lime', 'LimeRadio', and 'LimeWire'.
* <p>
* The view that this returns is optimized to have a very efficient
* Iterator. The firstKey, lastKey & size methods must iterate
* Iterator. The firstKey, lastKey &amp; size methods must iterate
* over all possible values in order to determine the results. This
* information is cached until the Patricia tree changes. All other
* methods (except Iterator) must compare the given key to the prefix
@ -571,7 +568,7 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
* would return all addresses that begin with '192.168'.
* <p>
* The view that this returns is optimized to have a very efficient
* Iterator. The firstKey, lastKey & size methods must iterate
* Iterator. The firstKey, lastKey &amp; size methods must iterate
* over all possible values in order to determine the results. This
* information is cached until the Patricia tree changes. All other
* methods (except Iterator) must compare the given key to the prefix
@ -605,9 +602,9 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
throw new IllegalArgumentException(offset + " + " + length + " > " + length(key));
}
if (offsetLength == 0)
if (offsetLength == 0) {
return this;
}
return new PrefixSubMap(key, offset, length);
}
@ -620,11 +617,11 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
@Override
public boolean containsKey(Object k) {
K key = asKey(k);
if (key == null)
if (key == null) {
return false;
}
int keyLength = length(key);
TrieEntry entry = getNearestEntryForKey(key, keyLength);
TrieEntry<K, V> entry = getNearestEntryForKey(key, keyLength);
return !entry.isEmpty() && key.equals(entry.key);
}
@ -633,9 +630,11 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
*/
@Override
public boolean containsValue(Object o) {
for (V v : values())
if (valEquals(v, o))
for (V v : values()) {
if (valEquals(v, o)) {
return true;
}
}
return false;
}
@ -1050,7 +1049,7 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
return 0;
}
return keyAnalyzer.length(key);
return keyAnalyzer.lengthInBits(key);
}
/**
@ -1162,7 +1161,7 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
/**
* Traverses down the right path until it finds an uplink.
*/
protected TrieEntry<K, V> followRight(TrieEntry<K, V> node) {
private TrieEntry<K, V> followRight(TrieEntry<K, V> node) {
// if Trie is empty, no last entry.
if (node.right == null)
return null;
@ -1174,14 +1173,17 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
return node.right;
}
@Override
public K firstKey() {
return firstEntry().getKey();
}
@Override
public SortedMap<K, V> headMap(K toKey) {
return new SubMap(null, toKey);
}
@Override
public K lastKey() {
TrieEntry<K, V> entry = lastEntry();
if (entry != null)
@ -1190,10 +1192,12 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
return null;
}
@Override
public SortedMap<K, V> subMap(K fromKey, K toKey) {
return new SubMap(fromKey, toKey);
}
@Override
public SortedMap<K, V> tailMap(K fromKey) {
return new SubMap(fromKey, null);
}
@ -1202,7 +1206,7 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
* Returns an entry strictly higher than the given key,
* or null if no such entry exists.
*/
protected TrieEntry<K, V> higherEntry(K key) {
private TrieEntry<K, V> higherEntry(K key) {
// TODO: Cleanup so that we don't actually have to add/remove from the
// tree. (We do it here because there are other well-defined
// functions to perform the search.)
@ -1254,7 +1258,7 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
* Returns a key-value mapping associated with the least key greater
* than or equal to the given key, or null if there is no such key.
*/
protected TrieEntry<K, V> ceilingEntry(K key) {
private TrieEntry<K, V> ceilingEntry(K key) {
// Basically:
// Follow the steps of adding an entry, but instead...
//
@ -1312,7 +1316,7 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
* Returns a key-value mapping associated with the greatest key
* strictly less than the given key, or null if there is no such key.
*/
protected TrieEntry<K, V> lowerEntry(K key) {
private TrieEntry<K, V> lowerEntry(K key) {
// Basically:
// Follow the steps of adding an entry, but instead...
//
@ -1363,7 +1367,7 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
* Returns a key-value mapping associated with the greatest key
* less than or equal to the given key, or null if there is no such key.
*/
protected TrieEntry<K, V> floorEntry(K key) {
private TrieEntry<K, V> floorEntry(K key) {
// TODO: Cleanup so that we don't actually have to add/remove from the
// tree. (We do it here because there are other well-defined
// functions to perform the search.)
@ -1448,68 +1452,6 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
return entry;
}
/**
* Defines the interface to analyze {@link Trie} keys on a bit
* level. <code>KeyAnalyzer</code>'s
* methods return the length of the key in bits, whether or not a bit is
* set, and bits per element in the key.
* <p>
* Additionally, a method determines if a key is a prefix of another key and
* returns the bit index where one key is different from another key (if
* the key and found key are equal than the return value is EQUAL_BIT_KEY).
* <p>
* <code>KeyAnalyzer</code> defines:<br>
* <table cellspace="5">
* <tr><td>NULL_BIT_KEY</td><td>When key's bits are all zero</td></tr>
* <tr><td> EQUAL_BIT_KEY </td><td>When keys are the same </td></tr>
* </table>
*/
public interface KeyAnalyzer<K> extends Comparator<K> {
/**
* Returned by bitIndex if key's bits are all 0.
*/
int NULL_BIT_KEY = -1;
/**
* Returned by bitIndex if key and found key are
* equal. This is a very very specific case and
* shouldn't happen on a regular basis.
*/
int EQUAL_BIT_KEY = -2;
/**
* Returns the length of the Key in bits.
*/
int length(K key);
/**
* Returns whether or not a bit is set.
*/
boolean isBitSet(K key, int keyLength, int bitIndex);
/**
* Returns the n-th different bit between key and found.
* This starts the comparison in key at 'keyStart' and goes
* for 'keyLength' bits, and compares to the found key
* starting at 'foundStart' and going for 'foundLength' bits.
*/
int bitIndex(K key, int keyStart, int keyLength,
K found, int foundStart, int foundLength);
/**
* Returns the number of bits per element in the key.
* This is only useful for variable-length keys, such as Strings.
*/
int bitsPerElement();
/**
* Determines whether or not the given prefix (from offset to length)
* is a prefix of the given key.
*/
boolean isPrefix(K prefix, int offset, int length, K key);
}
/**
* The actual Trie nodes.
*/
@ -1540,18 +1482,17 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
*/
private TrieEntry<K, V> predecessor;
private TrieEntry(K key, V value, int bitIndex) {
TrieEntry(K key, V value, int bitIndex) {
this.key = key;
this.value = value;
this.bitIndex = bitIndex;
this.parent = null;
this.left = this;
this.right = null;
this.predecessor = this;
}
@SuppressWarnings("unchecked")
@Override
public boolean equals(Object o) {
if (o == this) {
@ -1571,6 +1512,11 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
}
}
@Override
public int hashCode() {
return Objects.hash(key, value, bitIndex, parent, left, right, predecessor);
}
/**
* Whether or not the entry is storing a key.
* Only the root can potentially be empty, all other
@ -1580,6 +1526,7 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
return key == null;
}
@Override
public K getKey() {
return key;
}
@ -1676,41 +1623,10 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
}
}
/**
* An iterator that stores a single TrieEntry.
*/
private class SingletonIterator implements Iterator<Map.Entry<K, V>> {
private final TrieEntry<K, V> entry;
private int hit = 0;
public SingletonIterator(TrieEntry<K, V> entry) {
this.entry = entry;
}
public boolean hasNext() {
return hit == 0;
}
public Map.Entry<K, V> next() {
if (hit != 0)
throw new NoSuchElementException();
hit++;
return entry;
}
public void remove() {
if (hit != 1)
throw new IllegalStateException();
hit++;
PatriciaTrie.this.removeEntry(entry);
}
}
/**
* An iterator for the entries.
*/
private abstract class NodeIterator<E> implements Iterator<E> {
abstract class NodeIterator<E> implements Iterator<E> {
protected int expectedModCount = modCount; // For fast-fail
protected TrieEntry<K, V> next; // the next node to return
protected TrieEntry<K, V> current; // the current entry we're on
@ -1761,7 +1677,7 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
private class ValueIterator extends NodeIterator<V> {
public V next() {
return nextEntry().value;
return nextEntry().getValue();
}
}
@ -1777,6 +1693,39 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
}
}
class SingletonIterator implements Iterator<Map.Entry<K, V>> {
private final PatriciaTrie<K, V> patriciaTrie;
private final TrieEntry<K, V> entry;
private int hit = 0;
public SingletonIterator(PatriciaTrie<K, V> patriciaTrie, TrieEntry<K, V> entry) {
this.patriciaTrie = patriciaTrie;
this.entry = entry;
}
public boolean hasNext() {
return hit == 0;
}
public Map.Entry<K, V> next() {
if (hit != 0)
throw new NoSuchElementException();
hit++;
return entry;
}
public void remove() {
if (hit != 1)
throw new IllegalStateException();
hit++;
patriciaTrie.removeEntry(entry);
}
}
/**
* An iterator for iterating over a prefix search.
*/
@ -2082,11 +2031,11 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
prefixStart = subtree(prefix, offset, length);
iterModCount = modCount;
}
if (prefixStart == null) {
return EmptyIterator.emptyIterator();
Set<Map.Entry<K, V>> set = Collections.emptySet();
return set.iterator();
} else if (length >= prefixStart.bitIndex) {
return new SingletonIterator(prefixStart);
return new SingletonIterator(PatriciaTrie.this, prefixStart);
} else {
return new PrefixEntryIterator(prefixStart, prefix, offset, length);
}
@ -2273,10 +2222,8 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
if (size == -1 || sizeModCount != PatriciaTrie.this.modCount) {
size = 0;
sizeModCount = PatriciaTrie.this.modCount;
Iterator i = iterator();
while (i.hasNext()) {
for (Entry<K, V> kvEntry : this) {
size++;
i.next();
}
}
return size;
@ -2304,12 +2251,14 @@ public class PatriciaTrie<K, V> extends AbstractMap<K, V> implements Trie<K, V>
@Override
@SuppressWarnings("unchecked")
public boolean remove(Object o) {
if (!(o instanceof Map.Entry))
if (!(o instanceof Map.Entry)) {
return false;
}
Map.Entry<K, V> entry = (Map.Entry<K, V>) o;
K key = entry.getKey();
if (!inRange(key))
if (!inRange(key)) {
return false;
}
TrieEntry<K, V> node = getEntry(key);
if (node != null && valEquals(node.getValue(), entry.getValue())) {
removeEntry(node);

View file

@ -73,8 +73,8 @@ public interface Trie<K, V> extends SortedMap<K, V> {
* L = 1001100 <br>
* <p>
* If the Trie contained 'H' and 'L', a lookup of 'D' would return 'L',
* because the XOR distance between D & L is smaller than the XOR distance
* between D & H.
* because the XOR distance between D &amp; L is smaller than the XOR distance
* between D &amp; H.
*/
V select(K key);
@ -111,47 +111,4 @@ public interface Trie<K, V> extends SortedMap<K, V> {
* till the end.
*/
Map.Entry<K, V> traverse(Cursor<? super K, ? super V> cursor);
/**
* An interface used by a {@link Trie}. A {@link Trie} selects items by
* closeness and passes the items to the <code>Cursor</code>. You can then
* decide what to do with the key-value pair and the return value
* from {@link #select(java.util.Map.Entry)} tells the <code>Trie</code>
* what to do next.
* <p>
* <code>Cursor</code> returns status/selection status might be:
* <table cellspace="5">
* <tr><td><b>Return Value</b></td><td><b>Status</b></td></tr>
* <tr><td>EXIT</td><td>Finish the Trie operation</td></tr>
* <tr><td>CONTINUE</td><td>Look at the next element in the traversal</td></tr>
* <tr><td>REMOVE_AND_EXIT</td><td>Remove the entry and stop iterating</td></tr>
* <tr><td>REMOVE</td><td>Remove the entry and continue iterating</td></tr>
* </table>
* <p>
* Note: {@link Trie#select(Object, Trie.Cursor)} does
* not support <code>REMOVE</code>.
*
* @param <K> Key Type
* @param <V> Key Value
*/
interface Cursor<K, V> {
/**
* Notification that the Trie is currently looking at the given entry.
* Return <code>EXIT</code> to finish the Trie operation,
* <code>CONTINUE</code> to look at the next entry, <code>REMOVE</code>
* to remove the entry and continue iterating, or
* <code>REMOVE_AND_EXIT</code> to remove the entry and stop iterating.
* Not all operations support <code>REMOVE</code>.
*/
SelectStatus select(Map.Entry<? extends K, ? extends V> entry);
/**
* The mode during selection.
*/
enum SelectStatus {
EXIT, CONTINUE, REMOVE, REMOVE_AND_EXIT
}
}
}

View file

@ -1,21 +0,0 @@
package org.xbib.datastructures.trie.limewire;
import java.util.Iterator;
/**
* A convenience class to aid in developing iterators that cannot be modified.
*/
public abstract class UnmodifiableIterator<E> implements Iterator<E> {
/**
* Throws <code>UnsupportedOperationException</code>.
*/
public final void remove() {
throw new UnsupportedOperationException();
}
}

View file

@ -0,0 +1,6 @@
/**
* WireShare (LimeWire "Pirate Edition") PatriciaTrie
*
* https://sourceforge.net/projects/wireshare/
*/
package org.xbib.datastructures.trie.limewire;

View file

@ -0,0 +1,5 @@
/**
* Taken from
* https://github.com/rkapsi/patricia-trie
*/
package org.xbib.datastructures.trie.patricia;

View file

@ -3,8 +3,10 @@ package org.xbib.datastructures.trie.radix;
/**
* Exception thrown if a duplicate key is inserted in a {@link RadixTree}
*/
@SuppressWarnings("serial")
public class DuplicateKeyException extends RuntimeException {
public DuplicateKeyException(String msg) {
super(msg);
}
}
}

View file

@ -8,7 +8,7 @@ import java.util.List;
*
* @param <T>
*/
class Node<T> {
public class Node<T> {
private String key;

View file

@ -0,0 +1,121 @@
package org.xbib.datastructures.trie.radix.adaptive;
import java.util.Comparator;
import java.util.Iterator;
import java.util.NavigableMap;
import java.util.Set;
import java.util.Spliterator;
final class AscendingSubMap<K, V> extends NavigableSubMap<K, V> {
AscendingSubMap(AdaptiveRadixTree<K, V> m,
boolean fromStart, K lo, boolean loInclusive,
boolean toEnd, K hi, boolean hiInclusive) {
super(m, fromStart, lo, loInclusive, toEnd, hi, hiInclusive);
}
@Override
public Comparator<? super K> comparator() {
return m.comparator();
}
@Override
public NavigableMap<K, V> subMap(K fromKey, boolean fromInclusive,
K toKey, boolean toInclusive) {
if (!inRange(fromKey, fromInclusive))
throw new IllegalArgumentException("fromKey out of range");
if (!inRange(toKey, toInclusive))
throw new IllegalArgumentException("toKey out of range");
return new AscendingSubMap<>(m,
false, fromKey, fromInclusive,
false, toKey, toInclusive);
}
// TODO: offer another ctor to take in loBytes
@Override
public NavigableMap<K, V> headMap(K toKey, boolean inclusive) {
if (!inRange(toKey, inclusive))
throw new IllegalArgumentException("toKey out of range");
return new AscendingSubMap<>(m,
fromStart, lo, loInclusive,
false, toKey, inclusive);
}
// TODO: offer another ctor to take in hiBytes
@Override
public NavigableMap<K, V> tailMap(K fromKey, boolean inclusive) {
if (!inRange(fromKey, inclusive))
throw new IllegalArgumentException("fromKey out of range");
return new AscendingSubMap<>(m,
false, fromKey, inclusive,
toEnd, hi, hiInclusive);
}
@Override
public NavigableMap<K, V> descendingMap() {
NavigableMap<K, V> mv = descendingMapView;
return (mv != null) ? mv :
(descendingMapView =
new DescendingSubMap<>(m,
fromStart, lo, loInclusive,
toEnd, hi, hiInclusive));
}
@Override
Iterator<K> keyIterator() {
return new SubMapKeyIterator(absLowest(), absHighFence());
}
@Override
Spliterator<K> keySpliterator() {
return new SubMapKeyIterator(absLowest(), absHighFence());
}
@Override
Iterator<K> descendingKeyIterator() {
return new DescendingSubMapKeyIterator(absHighest(), absLowFence());
}
@Override
public Set<Entry<K, V>> entrySet() {
EntrySetView es = entrySetView;
return (es != null) ? es : (entrySetView = new AscendingEntrySetView());
}
@Override
LeafNode<K, V> subLowest() {
return absLowest();
}
@Override
LeafNode<K, V> subHighest() {
return absHighest();
}
@Override
LeafNode<K, V> subCeiling(K key) {
return absCeiling(key);
}
@Override
LeafNode<K, V> subHigher(K key) {
return absHigher(key);
}
@Override
LeafNode<K, V> subFloor(K key) {
return absFloor(key);
}
@Override
LeafNode<K, V> subLower(K key) {
return absLower(key);
}
final class AscendingEntrySetView extends EntrySetView {
@Override
public Iterator<Entry<K, V>> iterator() {
return new SubMapEntryIterator(absLowest(), absHighFence());
}
}
}

View file

@ -0,0 +1,54 @@
package org.xbib.datastructures.trie.radix.adaptive;
/**
* For using {@link AdaptiveRadixTree}, the keys need to be transformed into binary comparable keys
* which are the byte array representation of your keys such that the result of doing
* lexicographic comparison over them is the same as doing the key comparison.
*
* <h2>Example of key transformation</h2>
* <h3>Signed integers</h3>
* Signed integers are stored in two's complement notation.
* This means that negative integers always have their MSB set and hence are
* bitwise lexicographically greater than positive integers.
* <p>
* For example -1 in 2's complement form is 1111 1111 1111 1111 1111 1111 1111 1111,
* whereas +1 is 0000 0000 0000 0000 0000 0000 0000 0001.
* <p>
* This is not the correct binary comparable transformation since
* +1 &gt; -1 but the above transformation lexicographically orders +1 before -1.
* <p>
* In this case, the right transformation is obtained by flipping the sign bit.
* <p>
* Therefore -1 will be 0111 1111 1111 1111 1111 1111 1111 1111 and +1 as 1000 0000 0000 0000 0000 0000 0000 0001.
*
* <h3>ASCII encoded character strings</h3>
* Naturally yield the expected order as 'a' &lt; 'b' and their respective byte values 97 &lt; 98 obey the order.
*
* <h3>IPv4 addresses</h3>
* Naturally yield the expected order since each octet is an unsigned byte and unsigned types in binary have the expected lexicographic ordering.
* <p>
* For example, 12.10.192.0 &lt; 12.10.199.255 and their respective binary representation 00001100.00001010.11000000.00000000 is lexicographically smaller than 00001100.00001010.11000111.11111111.
*
* <h2>Implementing the interface</h2>
* <h3>Simple keys based on primitives and String</h3>
*
* <h3>Compound keys</h3>
* <h4>With only fixed length attributes</h4>
* Transform each attribute separately and concatenate the results.
* <p>
* This example shows the transformation for a compound key made up of two integers.
*
* <h4>With variable length attributes</h4>
* Transformation of a variable length attribute that is succeeded by another attribute is required to end with a byte 0 for the right transformation. Without it, compound key ("a", "bc") and ("ab", "c") would be incorrectly treated equal. Note this only works if byte 0 is not part of the variable length attribute's key space, otherwise ("a\0", "b") would be incorrectly ordered before ("a", "b").
* <p>
* If byte 0 is part of the key space then the key transformation requires remapping every byte 0 as byte 0 followed by byte 1 and ending with two byte 0s. This is described in section IV.B (e).
*
* <h2>Further reading</h2>
* Section IV of the paper.
*
* @param <K> the key type to be used in {@link AdaptiveRadixTree}
*/
public interface BinaryComparable<K> {
byte[] get(K key);
}

View file

@ -0,0 +1,12 @@
package org.xbib.datastructures.trie.radix.adaptive;
final class DescendingKeyIterator<K, V> extends PrivateEntryIterator<K, V, K> {
DescendingKeyIterator(AdaptiveRadixTree<K, V> m, LeafNode<K, V> last) {
super(m, last);
}
@Override
public K next() {
return prevEntry().getKey();
}
}

View file

@ -0,0 +1,121 @@
package org.xbib.datastructures.trie.radix.adaptive;
import java.util.Comparator;
import java.util.Iterator;
import java.util.NavigableMap;
import java.util.Set;
import java.util.Spliterator;
final class DescendingSubMap<K, V> extends NavigableSubMap<K, V> {
DescendingSubMap(AdaptiveRadixTree<K, V> m,
boolean fromStart, K lo, boolean loInclusive,
boolean toEnd, K hi, boolean hiInclusive) {
super(m, fromStart, lo, loInclusive, toEnd, hi, hiInclusive);
}
@Override
public Comparator<? super K> comparator() {
return m.comparator();
}
// create a new submap out of a submap.
// the new bounds should be within the current submap's bounds
@Override
public NavigableMap<K, V> subMap(K fromKey, boolean fromInclusive,
K toKey, boolean toInclusive) {
if (!inRange(fromKey, fromInclusive))
throw new IllegalArgumentException("fromKey out of range");
if (!inRange(toKey, toInclusive))
throw new IllegalArgumentException("toKey out of range");
return new DescendingSubMap<>(m,
false, toKey, toInclusive,
false, fromKey, fromInclusive);
}
@Override
public NavigableMap<K, V> headMap(K toKey, boolean inclusive) {
if (!inRange(toKey, inclusive))
throw new IllegalArgumentException("toKey out of range");
return new DescendingSubMap<>(m,
false, toKey, inclusive,
toEnd, hi, hiInclusive);
}
@Override
public NavigableMap<K, V> tailMap(K fromKey, boolean inclusive) {
if (!inRange(fromKey, inclusive))
throw new IllegalArgumentException("fromKey out of range");
return new DescendingSubMap<>(m,
fromStart, lo, loInclusive,
false, fromKey, inclusive);
}
@Override
public NavigableMap<K, V> descendingMap() {
NavigableMap<K, V> mv = descendingMapView;
return (mv != null) ? mv :
(descendingMapView =
new AscendingSubMap<>(m,
fromStart, lo, loInclusive,
toEnd, hi, hiInclusive));
}
@Override
Iterator<K> keyIterator() {
return new DescendingSubMapKeyIterator(absHighest(), absLowFence());
}
@Override
Spliterator<K> keySpliterator() {
return new DescendingSubMapKeyIterator(absHighest(), absLowFence());
}
@Override
Iterator<K> descendingKeyIterator() {
return new SubMapKeyIterator(absLowest(), absHighFence());
}
@Override
public Set<Entry<K, V>> entrySet() {
EntrySetView es = entrySetView;
return (es != null) ? es : (entrySetView = new DescendingEntrySetView());
}
@Override
LeafNode<K, V> subLowest() {
return absHighest();
}
@Override
LeafNode<K, V> subHighest() {
return absLowest();
}
@Override
LeafNode<K, V> subCeiling(K key) {
return absFloor(key);
}
@Override
LeafNode<K, V> subHigher(K key) {
return absLower(key);
}
@Override
LeafNode<K, V> subFloor(K key) {
return absCeiling(key);
}
@Override
LeafNode<K, V> subLower(K key) {
return absHigher(key);
}
final class DescendingEntrySetView extends EntrySetView {
@Override
public Iterator<Entry<K, V>> iterator() {
return new DescendingSubMapEntryIterator(absHighest(), absLowFence());
}
}
}

View file

@ -0,0 +1,14 @@
package org.xbib.datastructures.trie.radix.adaptive;
import java.util.Map;
final class EntryIterator<K, V> extends PrivateEntryIterator<K, V, Map.Entry<K, V>> {
EntryIterator(AdaptiveRadixTree<K, V> m, LeafNode<K, V> first) {
super(m, first);
}
@Override
public Map.Entry<K, V> next() {
return nextEntry();
}
}

View file

@ -0,0 +1,54 @@
package org.xbib.datastructures.trie.radix.adaptive;
import java.util.AbstractSet;
import java.util.Iterator;
import java.util.Map;
class EntrySet<K, V> extends AbstractSet<Map.Entry<K, V>> {
private final AdaptiveRadixTree<K, V> m;
EntrySet(AdaptiveRadixTree<K, V> m) {
this.m = m;
}
@Override
public Iterator<Map.Entry<K, V>> iterator() {
return m.entryIterator();
}
@Override
public boolean contains(Object o) {
if (!(o instanceof Map.Entry))
return false;
Map.Entry<?, ?> entry = (Map.Entry<?, ?>) o;
Object value = entry.getValue();
LeafNode<K, V> p = m.getEntry(entry.getKey());
return p != null && AdaptiveRadixTree.valEquals(p.getValue(), value);
}
@Override
public boolean remove(Object o) {
if (!(o instanceof Map.Entry))
return false;
Map.Entry<?, ?> entry = (Map.Entry<?, ?>) o;
Object value = entry.getValue();
LeafNode<K, V> p = m.getEntry(entry.getKey());
if (p != null && AdaptiveRadixTree.valEquals(p.getValue(), value)) {
m.deleteEntry(p);
return true;
}
return false;
}
@Override
public int size() {
return m.size();
}
@Override
public void clear() {
m.clear();
}
// TODO: implement Spliterator
}

View file

@ -0,0 +1,159 @@
package org.xbib.datastructures.trie.radix.adaptive;
/*
These are internal contracts/interfaces
They've been written with only what they're used for internally
For example InnerNode#remove could have returned a false indicative of a failed remove
due to partialKey entry not actually existing, but the return value is of no use in code till now
and is sure to be called from places where it'll surely exist.
since they're internal, we could change them later if a better contract makes more sense.
The impls have assert conditions all around to make sure the methods are called being in the right
state. For example you should not call shrink() if the Node is not ready to shrink, etc.
Or for example when calling last() on Node16 or higher, we're sure we'll have at least
X amount of children hence safe to return child[noOfChildren-1], without worrying about bounds.
*/
abstract class InnerNode extends Node {
static final int PESSIMISTIC_PATH_COMPRESSION_LIMIT = 8;
// max limit of 8 bytes (Pessimistic)
final byte[] prefixKeys;
final Node[] child;
// Optimistic
int prefixLen; // 4 bytes
// TODO: we could save space by making this a byte and returning
// Byte.toUnsignedInt wherever comparison with it is done.
short noOfChildren;
InnerNode(int size) {
prefixKeys = new byte[PESSIMISTIC_PATH_COMPRESSION_LIMIT];
child = new Node[size + 1];
}
// copy ctor. called when growing/shrinking
InnerNode(InnerNode node, int size) {
super(node);
child = new Node[size + 1];
// copy header
this.noOfChildren = node.noOfChildren;
this.prefixLen = node.prefixLen;
this.prefixKeys = node.prefixKeys;
// copy leaf & replace uplink
child[size] = node.getLeaf();
if (child[size] != null) {
replaceUplink(this, child[size]);
}
}
public void removeLeaf() {
removeUplink(child[child.length - 1]);
child[child.length - 1] = null;
}
public boolean hasLeaf() {
return child[child.length - 1] != null;
}
public LeafNode<?, ?> getLeaf() {
return (LeafNode<?, ?>) child[child.length - 1];
}
public void setLeaf(LeafNode<?, ?> leaf) {
child[child.length - 1] = leaf;
createUplink(this, leaf);
}
@Override
public Node firstOrLeaf() {
if (hasLeaf()) {
return getLeaf();
}
return first();
}
Node[] getChild() {
return child;
}
/**
* @return no of children this Node has
*/
public short size() {
return noOfChildren;
}
/**
* @param partialKey search if this node has an entry for given partialKey
* @return if it does, then return the following child pointer.
* Returns null if there is no corresponding entry.
*/
abstract Node findChild(byte partialKey);
/**
* @param partialKey
* @return a child which is equal or greater than given partial key, or null if there is no such child
*/
abstract Node ceil(byte partialKey);
/**
* @param partialKey
* @return a child which is equal or lesser than given partial key, or null if there is no such child
*/
abstract Node floor(byte partialKey);
/**
* Note: caller needs to check if {@link InnerNode} {@link #isFull()} before calling this.
* If it is full then call {@link #grow()} followed by {@link #addChild(byte, Node)} on the new node.
*
* @param partialKey partialKey to be mapped
* @param child the child node to be added
*/
abstract void addChild(byte partialKey, Node child);
/**
* @param partialKey for which the child pointer mapping is to be updated
* @param newChild the new mapping to be added for given partialKey
*/
abstract void replace(byte partialKey, Node newChild);
/**
* @param partialKey for which the child pointer mapping is to be removed
*/
abstract void removeChild(byte partialKey);
/**
* creates and returns the next larger node type with the same mappings as this node
*
* @return a new node with the same mappings
*/
abstract InnerNode grow();
abstract boolean shouldShrink();
/**
* creates and returns the a smaller node type with the same mappings as this node
*
* @return a smaller node with the same mappings
*/
abstract InnerNode shrink();
/**
* @return true if Node has reached it's capacity
*/
abstract boolean isFull();
/**
* @return returns the smallest child node for the partialKey strictly greater than the partialKey passed.
* Returns null if no such child.
*/
abstract Node greater(byte partialKey);
/**
* @return returns the greatest child node for the partialKey strictly lesser than the partialKey passed.
* Returns null if no such child.
*/
abstract Node lesser(byte partialKey);
}

View file

@ -0,0 +1,12 @@
package org.xbib.datastructures.trie.radix.adaptive;
final class KeyIterator<K, V> extends PrivateEntryIterator<K, V, K> {
KeyIterator(AdaptiveRadixTree<K, V> m, LeafNode<K, V> first) {
super(m, first);
}
@Override
public K next() {
return nextEntry().getKey();
}
}

View file

@ -0,0 +1,152 @@
package org.xbib.datastructures.trie.radix.adaptive;
import java.util.AbstractSet;
import java.util.Comparator;
import java.util.Iterator;
import java.util.Map;
import java.util.NavigableMap;
import java.util.NavigableSet;
import java.util.SortedSet;
// implementation simply relays/delegates calls to backing map's methods
final class KeySet<E> extends AbstractSet<E> implements NavigableSet<E> {
private final NavigableMap<E, ?> m;
KeySet(NavigableMap<E, ?> map) {
m = map;
}
// this KeySet can only be created either on ART or on one of it's subMaps
@Override
@SuppressWarnings("unchecked")
public Iterator<E> iterator() {
if (m instanceof AdaptiveRadixTree)
return ((AdaptiveRadixTree<E, ?>) m).keyIterator();
else
return ((NavigableSubMap<E, ?>) m).keyIterator();
}
// this KeySet can only be created either on ART or on one of it's subMaps
@Override
@SuppressWarnings("unchecked")
public Iterator<E> descendingIterator() {
if (m instanceof AdaptiveRadixTree)
return ((AdaptiveRadixTree<E, ?>) m).descendingKeyIterator();
else
return ((NavigableSubMap<E, ?>) m).descendingKeyIterator();
}
@Override
public int size() {
return m.size();
}
@Override
public boolean isEmpty() {
return m.isEmpty();
}
@Override
public boolean contains(Object o) {
return m.containsKey(o);
}
@Override
public void clear() {
m.clear();
}
@Override
public E lower(E e) {
return m.lowerKey(e);
}
@Override
public E floor(E e) {
return m.floorKey(e);
}
@Override
public E ceiling(E e) {
return m.ceilingKey(e);
}
@Override
public E higher(E e) {
return m.higherKey(e);
}
@Override
public E first() {
return m.firstKey();
}
@Override
public E last() {
return m.lastKey();
}
@Override
public Comparator<? super E> comparator() {
return m.comparator();
}
@Override
public E pollFirst() {
Map.Entry<E, ?> e = m.pollFirstEntry();
return (e == null) ? null : e.getKey();
}
@Override
public E pollLast() {
Map.Entry<E, ?> e = m.pollLastEntry();
return (e == null) ? null : e.getKey();
}
@Override
public boolean remove(Object o) {
int oldSize = size();
m.remove(o);
return size() != oldSize;
}
@Override
public NavigableSet<E> subSet(E fromElement, boolean fromInclusive,
E toElement, boolean toInclusive) {
return new KeySet<>(m.subMap(fromElement, fromInclusive,
toElement, toInclusive));
}
@Override
public NavigableSet<E> headSet(E toElement, boolean inclusive) {
return new KeySet<>(m.headMap(toElement, inclusive));
}
@Override
public NavigableSet<E> tailSet(E fromElement, boolean inclusive) {
return new KeySet<>(m.tailMap(fromElement, inclusive));
}
@Override
public SortedSet<E> subSet(E fromElement, E toElement) {
return subSet(fromElement, true, toElement, false);
}
@Override
public SortedSet<E> headSet(E toElement) {
return headSet(toElement, false);
}
@Override
public SortedSet<E> tailSet(E fromElement) {
return tailSet(fromElement, true);
}
@Override
public NavigableSet<E> descendingSet() {
return new KeySet<>(m.descendingMap());
}
// TODO: implement Spliterator
}

View file

@ -0,0 +1,92 @@
package org.xbib.datastructures.trie.radix.adaptive;
import java.util.Arrays;
import java.util.Map;
/*
currently we use what the paper mentions as "Single-value" leaves
*/
class LeafNode<K, V> extends Node implements Map.Entry<K, V> {
// we have to save the keyBytes, because leaves are lazy expanded at times
private final byte[] keyBytes;
private final K key;
private V value;
LeafNode(byte[] keyBytes, K key, V value) {
this.value = value;
// defensive copy
this.keyBytes = Arrays.copyOf(keyBytes, keyBytes.length);
this.key = key;
}
public V setValue(V value) {
V oldValue = this.value;
this.value = value;
return oldValue;
}
public V getValue() {
return value;
}
byte[] getKeyBytes() {
return keyBytes;
}
public K getKey() {
return key;
}
/**
* Dev note: first() is implemented to detect end of the SortedMap.firstKey()
*/
@Override
public Node first() {
return null;
}
@Override
public Node firstOrLeaf() {
return null;
}
/**
* Dev note: last() is implemented to detect end of the SortedMap.lastKey()
*/
@Override
public Node last() {
return null;
}
/**
* Compares this <code>Map.Entry</code> with another <code>Map.Entry</code>.
* <p>
* Implemented per API documentation of {@link java.util.Map.Entry#equals(Object)}
*
* @param obj the object to compare to
* @return true if equal key and value
*/
@Override
public boolean equals(final Object obj) {
if (obj == this) {
return true;
}
if (!(obj instanceof Map.Entry)) {
return false;
}
final Map.Entry<?, ?> other = (Map.Entry<?, ?>) obj;
return (getKey() == null ? other.getKey() == null : getKey().equals(other.getKey())) &&
(getValue() == null ? other.getValue() == null : getValue().equals(other.getValue()));
}
@Override
public int hashCode() {
return (getKey() == null ? 0 : getKey().hashCode()) ^
(getValue() == null ? 0 : getValue().hashCode());
}
@Override
public String toString() {
return key + "=" + value;
}
}

View file

@ -0,0 +1,596 @@
package org.xbib.datastructures.trie.radix.adaptive;
import java.util.AbstractMap;
import java.util.AbstractSet;
import java.util.Comparator;
import java.util.ConcurrentModificationException;
import java.util.Iterator;
import java.util.Map;
import java.util.NavigableMap;
import java.util.NavigableSet;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.SortedMap;
import java.util.Spliterator;
import java.util.function.Consumer;
/**
* A NavigableMap that adds range checking (if passed in key is within lower and upper bound)
* for all the map methods and then relays the call
* into the backing map
*/
abstract class NavigableSubMap<K, V> extends AbstractMap<K, V>
implements NavigableMap<K, V> {
// Dummy value serving as unmatchable fence key for unbounded SubMapIterators
private static final Object UNBOUNDED = new Object();
final AdaptiveRadixTree<K, V> m;
/**
* Endpoints are represented as triples (fromStart, lo,
* loInclusive) and (toEnd, hi, hiInclusive). If fromStart is
* true, then the low (absolute) bound is the start of the
* backing map, and the other values are ignored. Otherwise,
* if loInclusive is true, lo is the inclusive bound, else lo
* is the exclusive bound. Similarly for the upper bound.
*/
final K lo, hi;
final byte[] loBytes, hiBytes;
final boolean fromStart, toEnd;
final boolean loInclusive, hiInclusive;
transient NavigableMap<K, V> descendingMapView;
transient NavigableSubMap<K, V>.EntrySetView entrySetView;
transient KeySet<K> navigableKeySetView;
NavigableSubMap(AdaptiveRadixTree<K, V> m,
boolean fromStart, K lo, boolean loInclusive,
boolean toEnd, K hi, boolean hiInclusive) {
this.loBytes = fromStart ? null : m.binaryComparable().get(lo);
this.hiBytes = toEnd ? null : m.binaryComparable().get(hi);
if (!fromStart && !toEnd) {
if (AdaptiveRadixTree.compare(loBytes, 0, loBytes.length, hiBytes, 0, hiBytes.length) > 0)
throw new IllegalArgumentException("fromKey > toKey");
}
this.m = m;
this.fromStart = fromStart;
this.lo = lo;
this.loInclusive = loInclusive;
this.toEnd = toEnd;
this.hi = hi;
this.hiInclusive = hiInclusive;
}
final boolean tooLow(K key) {
if (!fromStart) {
int c = m.compare(key, loBytes);
// if c == 0 and if lower bound is exclusive
// then this key is too low
// else it is not, since it is as low as our lower bound
return c < 0 || (c == 0 && !loInclusive);
}
// we don't have a lower bound
return false;
}
/*
* Absolute versions of relation operations.
* Subclasses map to these using like-named "sub"
* versions that invert senses for descending maps
*/
final boolean tooHigh(K key) {
if (!toEnd) {
int c = m.compare(key, hiBytes);
// if c == 0 and if upper bound is exclusive
// then this key is too higher
// else it is not, since it is as greater as our upper bound
return c > 0 || (c == 0 && !hiInclusive);
}
// we don't have an upper bound
return false;
}
final boolean inRange(K key) {
return !tooLow(key) && !tooHigh(key);
}
final boolean inClosedRange(K key) {
// if we don't have any upper nor lower bounds, then all keys are always in range.
// if we have a lower bound, then this key ought to be higher than our lower bound (closed, hence including).
// if we have an upper bound, then this key ought to be lower than our upper bound (closed, hence including).
return (fromStart || m.compare(key, loBytes) >= 0)
&& (toEnd || m.compare(key, hiBytes) <= 0);
}
final boolean inRange(K key, boolean inclusive) {
return inclusive ? inRange(key) : inClosedRange(key);
}
final LeafNode<K, V> absLowest() {
LeafNode<K, V> e =
(fromStart ? m.getFirstEntry() :
(loInclusive ? m.getCeilingEntry(loBytes) :
m.getHigherEntry(loBytes)));
return (e == null || tooHigh(e.getKey())) ? null : e;
}
final LeafNode<K, V> absHighest() {
LeafNode<K, V> e =
(toEnd ? m.getLastEntry() :
(hiInclusive ? m.getFloorEntry(hiBytes) :
m.getLowerEntry(hiBytes)));
return (e == null || tooLow(e.getKey())) ? null : e;
}
final LeafNode<K, V> absCeiling(K key) {
if (tooLow(key))
return absLowest();
LeafNode<K, V> e = m.getCeilingEntry(key);
return (e == null || tooHigh(e.getKey())) ? null : e;
}
final LeafNode<K, V> absHigher(K key) {
if (tooLow(key))
return absLowest();
LeafNode<K, V> e = m.getHigherEntry(key);
return (e == null || tooHigh(e.getKey())) ? null : e;
}
// Abstract methods defined in ascending vs descending classes
// These relay to the appropriate absolute versions
final LeafNode<K, V> absFloor(K key) {
if (tooHigh(key))
return absHighest();
LeafNode<K, V> e = m.getFloorEntry(key);
return (e == null || tooLow(e.getKey())) ? null : e;
}
final LeafNode<K, V> absLower(K key) {
if (tooHigh(key))
return absHighest();
LeafNode<K, V> e = m.getLowerEntry(key);
return (e == null || tooLow(e.getKey())) ? null : e;
}
/**
* Returns the absolute high fence for ascending traversal
*/
final LeafNode<K, V> absHighFence() {
return (toEnd ? null : (hiInclusive ?
m.getHigherEntry(hiBytes) :
m.getCeilingEntry(hiBytes))); // then hi itself (but we want the entry, hence traversal is required)
}
/**
* Return the absolute low fence for descending traversal
*/
final LeafNode<K, V> absLowFence() {
return (fromStart ? null : (loInclusive ?
m.getLowerEntry(loBytes) :
m.getFloorEntry(loBytes))); // then lo itself (but we want the entry, hence traversal is required)
}
abstract LeafNode<K, V> subLowest();
abstract LeafNode<K, V> subHighest();
/* Returns ascending iterator from the perspective of this submap */
abstract LeafNode<K, V> subCeiling(K key);
abstract LeafNode<K, V> subHigher(K key);
/* Returns descending iterator from the perspective of this submap*/
abstract LeafNode<K, V> subFloor(K key);
abstract LeafNode<K, V> subLower(K key);
abstract Iterator<K> keyIterator();
abstract Spliterator<K> keySpliterator();
abstract Iterator<K> descendingKeyIterator();
// public methods
@Override
public boolean isEmpty() {
return (fromStart && toEnd) ? m.isEmpty() : entrySet().isEmpty();
}
@Override
public int size() {
return (fromStart && toEnd) ? m.size() : entrySet().size();
}
@SuppressWarnings("unchecked")
@Override
public final boolean containsKey(Object key) {
return inRange((K) key) && m.containsKey(key);
}
@Override
public final V put(K key, V value) {
if (!inRange(key))
throw new IllegalArgumentException("key out of range");
return m.put(key, value);
}
@SuppressWarnings("unchecked")
@Override
public final V get(Object key) {
return !inRange((K) key) ? null : m.get(key);
}
@SuppressWarnings("unchecked")
@Override
public final V remove(Object key) {
return !inRange((K) key) ? null : m.remove(key);
}
@Override
public final Map.Entry<K, V> ceilingEntry(K key) {
return AdaptiveRadixTree.exportEntry(subCeiling(key));
}
@Override
public final K ceilingKey(K key) {
return AdaptiveRadixTree.keyOrNull(subCeiling(key));
}
@Override
public final Map.Entry<K, V> higherEntry(K key) {
return AdaptiveRadixTree.exportEntry(subHigher(key));
}
@Override
public final K higherKey(K key) {
return AdaptiveRadixTree.keyOrNull(subHigher(key));
}
@Override
public final Map.Entry<K, V> floorEntry(K key) {
return AdaptiveRadixTree.exportEntry(subFloor(key));
}
@Override
public final K floorKey(K key) {
return AdaptiveRadixTree.keyOrNull(subFloor(key));
}
@Override
public final Map.Entry<K, V> lowerEntry(K key) {
return AdaptiveRadixTree.exportEntry(subLower(key));
}
@Override
public final K lowerKey(K key) {
return AdaptiveRadixTree.keyOrNull(subLower(key));
}
@Override
public final K firstKey() {
return AdaptiveRadixTree.key(subLowest());
}
@Override
public final K lastKey() {
return AdaptiveRadixTree.key(subHighest());
}
@Override
public final Map.Entry<K, V> firstEntry() {
return AdaptiveRadixTree.exportEntry(subLowest());
}
@Override
public final Map.Entry<K, V> lastEntry() {
return AdaptiveRadixTree.exportEntry(subHighest());
}
@Override
public final Map.Entry<K, V> pollFirstEntry() {
LeafNode<K, V> e = subLowest();
Map.Entry<K, V> result = AdaptiveRadixTree.exportEntry(e);
if (e != null)
m.deleteEntry(e);
return result;
}
@Override
public final Map.Entry<K, V> pollLastEntry() {
LeafNode<K, V> e = subHighest();
Map.Entry<K, V> result = AdaptiveRadixTree.exportEntry(e);
if (e != null)
m.deleteEntry(e);
return result;
}
@Override
public final NavigableSet<K> navigableKeySet() {
KeySet<K> nksv = navigableKeySetView;
return (nksv != null) ? nksv :
(navigableKeySetView = new KeySet<>(this));
}
@Override
public final Set<K> keySet() {
return navigableKeySet();
}
@Override
public NavigableSet<K> descendingKeySet() {
return descendingMap().navigableKeySet();
}
@Override
public final SortedMap<K, V> subMap(K fromKey, K toKey) {
return subMap(fromKey, true, toKey, false);
}
@Override
public final SortedMap<K, V> headMap(K toKey) {
return headMap(toKey, false);
}
// View classes
@Override
public final SortedMap<K, V> tailMap(K fromKey) {
return tailMap(fromKey, true);
}
// entry set views for submaps
abstract class EntrySetView extends AbstractSet<Entry<K, V>> {
private transient int size = -1, sizeModCount;
// if the submap does not define any upper and lower bounds
// i.e. it is the same view as the original map (very unlikely)
// then no need to explicitly calculate the size.
@Override
public int size() {
if (fromStart && toEnd)
return m.size();
// if size == -1, it is the first time we're calculating the size
// if sizeModCount != m.getModCount(), the map has had modification operations
// so it's size must've changed, recalculate.
if (size == -1 || sizeModCount != m.getModCount()) {
sizeModCount = m.getModCount();
size = 0;
Iterator<?> i = iterator();
while (i.hasNext()) {
size++;
i.next();
}
}
return size;
}
@Override
public boolean isEmpty() {
LeafNode<K, V> n = absLowest();
return n == null || tooHigh(n.getKey());
}
@SuppressWarnings("unchecked")
@Override
public boolean contains(Object o) {
if (!(o instanceof Map.Entry))
return false;
Map.Entry<?, ?> entry = (Map.Entry<?, ?>) o;
Object key = entry.getKey();
if (!inRange((K) key))
return false;
LeafNode<?, ?> node = m.getEntry(key);
return node != null &&
AdaptiveRadixTree.valEquals(node.getValue(), entry.getValue());
}
@SuppressWarnings("unchecked")
@Override
public boolean remove(Object o) {
if (!(o instanceof Map.Entry))
return false;
Map.Entry<?, ?> entry = (Map.Entry<?, ?>) o;
Object key = entry.getKey();
if (!inRange((K) key))
return false;
LeafNode<K, V> node = m.getEntry(key);
if (node != null && AdaptiveRadixTree.valEquals(node.getValue(),
entry.getValue())) {
m.deleteEntry(node);
return true;
}
return false;
}
}
/*
* Iterators for SubMaps
* that understand the submap's upper and lower bound while iterating.
* Fence is one of the bounds depending on the kind of iterator (ascending, descending)
* and first becomes the other one to start from.
*/
abstract class SubMapIterator<T> implements Iterator<T> {
final Object fenceKey;
LeafNode<K, V> lastReturned;
LeafNode<K, V> next;
int expectedModCount;
SubMapIterator(LeafNode<K, V> first,
LeafNode<K, V> fence) {
expectedModCount = m.getModCount();
lastReturned = null;
next = first;
fenceKey = fence == null ? UNBOUNDED : fence.getKey();
}
@Override
public final boolean hasNext() {
return next != null && next.getKey() != fenceKey;
}
final LeafNode<K, V> nextEntry() {
LeafNode<K, V> e = next;
if (e == null || e.getKey() == fenceKey)
throw new NoSuchElementException();
if (m.getModCount() != expectedModCount)
throw new ConcurrentModificationException();
next = AdaptiveRadixTree.successor(e);
lastReturned = e;
return e;
}
final LeafNode<K, V> prevEntry() {
LeafNode<K, V> e = next;
if (e == null || e.getKey() == fenceKey)
throw new NoSuchElementException();
if (m.getModCount() != expectedModCount)
throw new ConcurrentModificationException();
next = AdaptiveRadixTree.predecessor(e);
lastReturned = e;
return e;
}
@Override
public void remove() {
if (lastReturned == null)
throw new IllegalStateException();
if (m.getModCount() != expectedModCount)
throw new ConcurrentModificationException();
// deleted entries are replaced by their successors
// if (lastReturned.left != null && lastReturned.right != null)
// next = lastReturned;
m.deleteEntry(lastReturned);
lastReturned = null;
expectedModCount = m.getModCount();
}
}
final class SubMapEntryIterator extends SubMapIterator<Map.Entry<K, V>> {
SubMapEntryIterator(LeafNode<K, V> first,
LeafNode<K, V> fence) {
super(first, fence);
}
@Override
public Map.Entry<K, V> next() {
return nextEntry();
}
}
final class DescendingSubMapEntryIterator extends SubMapIterator<Map.Entry<K, V>> {
DescendingSubMapEntryIterator(LeafNode<K, V> last,
LeafNode<K, V> fence) {
super(last, fence);
}
@Override
public Map.Entry<K, V> next() {
return prevEntry();
}
}
// Implement minimal Spliterator as KeySpliterator backup
final class SubMapKeyIterator extends SubMapIterator<K>
implements Spliterator<K> {
SubMapKeyIterator(LeafNode<K, V> first,
LeafNode<K, V> fence) {
super(first, fence);
}
@Override
public K next() {
return nextEntry().getKey();
}
@Override
public Spliterator<K> trySplit() {
return null;
}
@Override
public void forEachRemaining(Consumer<? super K> action) {
while (hasNext())
action.accept(next());
}
@Override
public boolean tryAdvance(Consumer<? super K> action) {
if (hasNext()) {
action.accept(next());
return true;
}
return false;
}
// estimating size of submap would be expensive
// since we'd have to traverse from lower bound to upper bound
// for this submap
@Override
public long estimateSize() {
return Long.MAX_VALUE;
}
@Override
public int characteristics() {
return Spliterator.DISTINCT | Spliterator.ORDERED |
Spliterator.SORTED;
}
@Override
public final Comparator<? super K> getComparator() {
return NavigableSubMap.this.comparator();
}
}
final class DescendingSubMapKeyIterator extends SubMapIterator<K>
implements Spliterator<K> {
DescendingSubMapKeyIterator(LeafNode<K, V> last,
LeafNode<K, V> fence) {
super(last, fence);
}
@Override
public K next() {
return prevEntry().getKey();
}
@Override
public Spliterator<K> trySplit() {
return null;
}
@Override
public void forEachRemaining(Consumer<? super K> action) {
while (hasNext())
action.accept(next());
}
@Override
public boolean tryAdvance(Consumer<? super K> action) {
if (hasNext()) {
action.accept(next());
return true;
}
return false;
}
@Override
public long estimateSize() {
return Long.MAX_VALUE;
}
@Override
public int characteristics() {
return Spliterator.DISTINCT | Spliterator.ORDERED;
}
}
}

View file

@ -0,0 +1,65 @@
package org.xbib.datastructures.trie.radix.adaptive;
abstract class Node {
// for upwards traversal
// dev note: wherever you setup downlinks, you setup uplinks as well
private InnerNode parent;
private byte partialKey;
Node() {
}
// copy ctor. called when growing/shrinking
Node(Node node) {
this.partialKey = node.partialKey;
this.parent = node.parent;
}
// do we need partial key for leaf nodes? we'll find out
static void createUplink(InnerNode parent, LeafNode<?, ?> child) {
Node c = child;
c.parent = parent;
}
static void createUplink(InnerNode parent, Node child, byte partialKey) {
child.parent = parent;
child.partialKey = partialKey;
}
// called when growing/shrinking and all children now have a new parent
static void replaceUplink(InnerNode parent, Node child) {
child.parent = parent;
}
static void removeUplink(Node child) {
child.parent = null;
}
/**
* @return child pointer for the smallest partialKey stored in this Node.
* Returns null if this node has no children.
*/
abstract Node first();
abstract Node firstOrLeaf();
/**
* @return child pointer for the largest partialKey stored in this Node.
* Returns null if this node has no children.
*/
abstract Node last();
/**
* @return the parent of this node. Returns null for root node.
*/
public InnerNode parent() {
return parent;
}
/**
* @return the uplinking partial key to parent
*/
public byte uplinkKey() {
return partialKey;
}
}

View file

@ -0,0 +1,186 @@
package org.xbib.datastructures.trie.radix.adaptive;
import java.util.Arrays;
class Node16 extends InnerNode {
static final int NODE_SIZE = 16;
private static final int BYTE_SHIFT = 1 << Byte.SIZE - 1;
private final byte[] keys = new byte[NODE_SIZE];
Node16(Node4 node) {
super(node, NODE_SIZE);
assert node.isFull();
byte[] keys = node.getKeys();
Node[] child = node.getChild();
System.arraycopy(keys, 0, this.keys, 0, node.noOfChildren);
System.arraycopy(child, 0, this.child, 0, node.noOfChildren);
// update up links
for (int i = 0; i < noOfChildren; i++) {
replaceUplink(this, this.child[i]);
}
}
Node16(Node48 node48) {
super(node48, NODE_SIZE);
assert node48.shouldShrink();
byte[] keyIndex = node48.getKeyIndex();
Node[] children = node48.getChild();
// keyIndex by virtue of being "array indexed" is already sorted
// so we can iterate and keep adding into Node16
for (int i = 0, j = 0; i < Node48.KEY_INDEX_SIZE; i++) {
if (keyIndex[i] != Node48.ABSENT) {
child[j] = children[keyIndex[i]];
keys[j] = unsigned(child[j].uplinkKey());
replaceUplink(this, child[j]);
j++;
}
}
}
static byte unsigned(byte b) {
return (byte) (b ^ BYTE_SHIFT);
}
@Override
public Node findChild(byte partialKey) {
// TODO: use simple loop to see if -XX:+SuperWord applies SIMD JVM instrinsics
partialKey = unsigned(partialKey);
for (int i = 0; i < noOfChildren; i++) {
if (keys[i] == partialKey) {
return child[i];
}
}
return null;
}
@Override
public void addChild(byte partialKey, Node child) {
assert !isFull();
byte unsignedPartialKey = unsigned(partialKey);
int index = Arrays.binarySearch(keys, 0, noOfChildren, unsignedPartialKey);
// the partialKey should not exist
assert index < 0;
int insertionPoint = -(index + 1);
// shift elements from this point to right by one place
assert insertionPoint <= noOfChildren;
for (int i = noOfChildren; i > insertionPoint; i--) {
keys[i] = keys[i - 1];
this.child[i] = this.child[i - 1];
}
keys[insertionPoint] = unsignedPartialKey;
this.child[insertionPoint] = child;
noOfChildren++;
createUplink(this, child, partialKey);
}
@Override
public void replace(byte partialKey, Node newChild) {
byte unsignedPartialKey = unsigned(partialKey);
int index = Arrays.binarySearch(keys, 0, noOfChildren, unsignedPartialKey);
assert index >= 0;
child[index] = newChild;
createUplink(this, newChild, partialKey);
}
@Override
public void removeChild(byte partialKey) {
assert !shouldShrink();
byte unsignedPartialKey = unsigned(partialKey);
int index = Arrays.binarySearch(keys, 0, noOfChildren, unsignedPartialKey);
// if this fails, the question is, how could you reach the leaf node?
// this node must've been your follow on pointer holding the partialKey
assert index >= 0;
removeUplink(child[index]);
for (int i = index; i < noOfChildren - 1; i++) {
keys[i] = keys[i + 1];
child[i] = child[i + 1];
}
child[noOfChildren - 1] = null;
noOfChildren--;
}
@Override
public InnerNode grow() {
assert isFull();
return new Node48(this);
}
@Override
public boolean shouldShrink() {
return noOfChildren == Node4.NODE_SIZE;
}
@Override
public InnerNode shrink() {
assert shouldShrink() : "Haven't crossed shrinking threshold yet";
return new Node4(this);
}
@Override
public Node first() {
assert noOfChildren > Node4.NODE_SIZE;
return child[0];
}
@Override
public Node last() {
assert noOfChildren > Node4.NODE_SIZE;
return child[noOfChildren - 1];
}
@Override
public Node ceil(byte partialKey) {
partialKey = unsigned(partialKey);
for (int i = 0; i < noOfChildren; i++) {
if (keys[i] >= partialKey) {
return child[i];
}
}
return null;
}
@Override
public Node greater(byte partialKey) {
partialKey = unsigned(partialKey);
for (int i = 0; i < noOfChildren; i++) {
if (keys[i] > partialKey) {
return child[i];
}
}
return null;
}
@Override
public Node lesser(byte partialKey) {
partialKey = unsigned(partialKey);
for (int i = noOfChildren - 1; i >= 0; i--) {
if (keys[i] < partialKey) {
return child[i];
}
}
return null;
}
@Override
public Node floor(byte partialKey) {
partialKey = unsigned(partialKey);
for (int i = noOfChildren - 1; i >= 0; i--) {
if (keys[i] <= partialKey) {
return child[i];
}
}
return null;
}
@Override
public boolean isFull() {
return noOfChildren == NODE_SIZE;
}
byte[] getKeys() {
return keys;
}
}

View file

@ -0,0 +1,140 @@
package org.xbib.datastructures.trie.radix.adaptive;
class Node256 extends InnerNode {
static final int NODE_SIZE = 256;
Node256(Node48 node) {
super(node, NODE_SIZE);
assert node.isFull();
byte[] keyIndex = node.getKeyIndex();
Node[] child = node.getChild();
for (int i = 0; i < Node48.KEY_INDEX_SIZE; i++) {
byte index = keyIndex[i];
if (index == Node48.ABSENT) {
continue;
}
assert index >= 0 && index <= 47;
// index is byte, but gets type promoted
// https://docs.oracle.com/javase/specs/jls/se7/html/jls-10.html#jls-10.4-120
this.child[i] = child[index];
// update up link
replaceUplink(this, this.child[i]);
}
}
@Override
public Node findChild(byte partialKey) {
// We treat the 8 bits as unsigned int since we've got 256 slots
int index = Byte.toUnsignedInt(partialKey);
return child[index];
}
@Override
public void addChild(byte partialKey, Node child) {
// addChild would never be called on a full Node256
// since the corresponding findChild for any byte key
// would always find the byte since the Node is full.
assert !isFull();
int index = Byte.toUnsignedInt(partialKey);
assert this.child[index] == null;
createUplink(this, child, partialKey);
this.child[index] = child;
noOfChildren++;
}
@Override
public void replace(byte partialKey, Node newChild) {
int index = Byte.toUnsignedInt(partialKey);
assert child[index] != null;
child[index] = newChild;
createUplink(this, newChild, partialKey);
}
@Override
public void removeChild(byte partialKey) {
int index = Byte.toUnsignedInt(partialKey);
assert child[index] != null;
removeUplink(child[index]);
child[index] = null;
noOfChildren--;
}
@Override
public InnerNode grow() {
throw new UnsupportedOperationException("Span of ART is 8 bits, so Node256 is the largest node type.");
}
@Override
public boolean shouldShrink() {
return noOfChildren == Node48.NODE_SIZE;
}
@Override
public InnerNode shrink() {
assert shouldShrink();
return new Node48(this);
}
@Override
public Node first() {
assert noOfChildren > Node48.NODE_SIZE;
int i = 0;
while (child[i] == null) i++;
return child[i];
}
@Override
public Node last() {
assert noOfChildren > Node48.NODE_SIZE;
int i = NODE_SIZE - 1;
while (child[i] == null) i--;
return child[i];
}
@Override
public Node ceil(byte partialKey) {
for (int i = Byte.toUnsignedInt(partialKey); i < NODE_SIZE; i++) {
if (child[i] != null) {
return child[i];
}
}
return null;
}
@Override
public Node greater(byte partialKey) {
for (int i = Byte.toUnsignedInt(partialKey) + 1; i < NODE_SIZE; i++) {
if (child[i] != null) {
return child[i];
}
}
return null;
}
@Override
public Node lesser(byte partialKey) {
for (int i = Byte.toUnsignedInt(partialKey) - 1; i >= 0; i--) {
if (child[i] != null) {
return child[i];
}
}
return null;
}
@Override
public Node floor(byte partialKey) {
for (int i = Byte.toUnsignedInt(partialKey); i >= 0; i--) {
if (child[i] != null) {
return child[i];
}
}
return null;
}
@Override
public boolean isFull() {
return noOfChildren == NODE_SIZE;
}
}

View file

@ -0,0 +1,195 @@
package org.xbib.datastructures.trie.radix.adaptive;
class Node4 extends InnerNode {
static final int NODE_SIZE = 4;
// 2^7 = 128
private static final int BYTE_SHIFT = 1 << Byte.SIZE - 1;
// each array element would contain the partial byte key to match
// if key matches then take up the same index from the child pointer array
private final byte[] keys = new byte[NODE_SIZE];
Node4() {
super(NODE_SIZE);
}
Node4(Node16 node16) {
super(node16, NODE_SIZE);
assert node16.shouldShrink();
byte[] keys = node16.getKeys();
Node[] child = node16.getChild();
System.arraycopy(keys, 0, this.keys, 0, node16.noOfChildren);
System.arraycopy(child, 0, this.child, 0, node16.noOfChildren);
// update up links
for (int i = 0; i < noOfChildren; i++) {
replaceUplink(this, this.child[i]);
}
}
/**
* For Node4, Node16 to interpret every byte as unsigned when storing partial keys.
* Node 48, Node256 simply use {@link Byte#toUnsignedInt(byte)}
* to index into their key arrays.
*/
static byte unsigned(byte b) {
return (byte) (b ^ BYTE_SHIFT);
}
// passed b must have been interpreted as unsigned already
// this is the reverse of unsigned
static byte signed(byte b) {
return unsigned(b);
}
@Override
public Node findChild(byte partialKey) {
partialKey = unsigned(partialKey);
// paper does simple loop over because it's a tiny array of size 4
for (int i = 0; i < noOfChildren; i++) {
if (keys[i] == partialKey) {
return child[i];
}
}
return null;
}
@Override
public void addChild(byte partialKey, Node child) {
assert !isFull();
byte unsignedPartialKey = unsigned(partialKey);
// shift elements from this point to right by one place
// noOfChildren here would never be == Node_SIZE (since we have isFull() check)
int i = noOfChildren;
for (; i > 0 && unsignedPartialKey < keys[i - 1]; i--) {
keys[i] = keys[i - 1];
this.child[i] = this.child[i - 1];
}
keys[i] = unsignedPartialKey;
this.child[i] = child;
noOfChildren++;
createUplink(this, child, partialKey);
}
@Override
public void replace(byte partialKey, Node newChild) {
byte unsignedPartialKey = unsigned(partialKey);
int index = 0;
for (; index < noOfChildren; index++) {
if (keys[index] == unsignedPartialKey) {
break;
}
}
// replace will be called from in a state where you know partialKey entry surely exists
assert index < noOfChildren : "Partial key does not exist";
child[index] = newChild;
createUplink(this, newChild, partialKey);
}
@Override
public void removeChild(byte partialKey) {
partialKey = unsigned(partialKey);
int index = 0;
for (; index < noOfChildren; index++) {
if (keys[index] == partialKey) {
break;
}
}
// if this fails, the question is, how could you reach the leaf node?
// this node must've been your follow on pointer holding the partialKey
assert index < noOfChildren : "Partial key does not exist";
removeUplink(child[index]);
for (int i = index; i < noOfChildren - 1; i++) {
keys[i] = keys[i + 1];
child[i] = child[i + 1];
}
child[noOfChildren - 1] = null;
noOfChildren--;
}
@Override
public InnerNode grow() {
assert isFull();
// grow from Node4 to Node16
return new Node16(this);
}
@Override
public boolean shouldShrink() {
return false;
}
@Override
public InnerNode shrink() {
throw new UnsupportedOperationException("Node4 is smallest node type");
}
@Override
public Node first() {
return child[0];
}
@Override
public Node last() {
return child[Math.max(0, noOfChildren - 1)];
}
@Override
public Node ceil(byte partialKey) {
partialKey = unsigned(partialKey);
for (int i = 0; i < noOfChildren; i++) {
if (keys[i] >= partialKey) {
return child[i];
}
}
return null;
}
@Override
public Node greater(byte partialKey) {
partialKey = unsigned(partialKey);
for (int i = 0; i < noOfChildren; i++) {
if (keys[i] > partialKey) {
return child[i];
}
}
return null;
}
@Override
public Node lesser(byte partialKey) {
partialKey = unsigned(partialKey);
for (int i = noOfChildren - 1; i >= 0; i--) {
if (keys[i] < partialKey) {
return child[i];
}
}
return null;
}
@Override
public Node floor(byte partialKey) {
partialKey = unsigned(partialKey);
for (int i = noOfChildren - 1; i >= 0; i--) {
if (keys[i] <= partialKey) {
return child[i];
}
}
return null;
}
@Override
public boolean isFull() {
return noOfChildren == NODE_SIZE;
}
byte[] getKeys() {
return keys;
}
byte getOnlyChildKey() {
assert noOfChildren == 1;
return signed(keys[0]);
}
}

View file

@ -0,0 +1,194 @@
package org.xbib.datastructures.trie.radix.adaptive;
import java.util.Arrays;
class Node48 extends InnerNode {
/*
48 * 8 (child pointers) + 256 = 640 bytes
*/
static final int NODE_SIZE = 48;
static final int KEY_INDEX_SIZE = 256;
// so that when you use the partial key to index into keyIndex
// and you see a -1, you know there's no mapping for this key
static final byte ABSENT = -1;
private static final int BYTE_SHIFT = 1 << Byte.SIZE - 1;
// for partial keys of one byte size, you index directly into this array to find the
// array index of the child pointer array
// the index value can only be between 0 to 47 (to index into the child pointer array)
private final byte[] keyIndex = new byte[KEY_INDEX_SIZE];
Node48(Node16 node) {
super(node, NODE_SIZE);
assert node.isFull();
Arrays.fill(keyIndex, ABSENT);
byte[] keys = node.getKeys();
Node[] child = node.getChild();
for (int i = 0; i < Node16.NODE_SIZE; i++) {
byte key = signed(keys[i]);
int index = Byte.toUnsignedInt(key);
keyIndex[index] = (byte) i;
this.child[i] = child[i];
// update up link
replaceUplink(this, this.child[i]);
}
}
Node48(Node256 node256) {
super(node256, NODE_SIZE);
assert node256.shouldShrink();
Arrays.fill(keyIndex, ABSENT);
Node[] children = node256.getChild();
byte j = 0;
for (int i = 0; i < Node256.NODE_SIZE; i++) {
if (children[i] != null) {
keyIndex[i] = j;
child[j] = children[i];
replaceUplink(this, child[j]);
j++;
}
}
assert j == NODE_SIZE;
}
static byte signed(byte b) {
return unsigned(b);
}
static byte unsigned(byte b) {
return (byte) (b ^ BYTE_SHIFT);
}
@Override
public Node findChild(byte partialKey) {
byte index = keyIndex[Byte.toUnsignedInt(partialKey)];
if (index == ABSENT) {
return null;
}
assert index >= 0 && index <= 47;
return child[index];
}
@Override
public void addChild(byte partialKey, Node child) {
assert !isFull();
int index = Byte.toUnsignedInt(partialKey);
assert keyIndex[index] == ABSENT;
// find a null place, left fragmented by a removeChild or has always been null
byte insertPosition = 0;
for (; this.child[insertPosition] != null && insertPosition < NODE_SIZE; insertPosition++) ;
this.child[insertPosition] = child;
keyIndex[index] = insertPosition;
noOfChildren++;
createUplink(this, child, partialKey);
}
@Override
public void replace(byte partialKey, Node newChild) {
byte index = keyIndex[Byte.toUnsignedInt(partialKey)];
assert index >= 0 && index <= 47;
child[index] = newChild;
createUplink(this, newChild, partialKey);
}
@Override
public void removeChild(byte partialKey) {
assert !shouldShrink();
int index = Byte.toUnsignedInt(partialKey);
int pos = keyIndex[index];
assert pos != ABSENT;
removeUplink(child[pos]);
child[pos] = null; // fragment
keyIndex[index] = ABSENT;
noOfChildren--;
}
@Override
public InnerNode grow() {
assert isFull();
return new Node256(this);
}
@Override
public boolean shouldShrink() {
return noOfChildren == Node16.NODE_SIZE;
}
@Override
public InnerNode shrink() {
assert shouldShrink();
return new Node16(this);
}
@Override
public Node first() {
assert noOfChildren > Node16.NODE_SIZE;
int i = 0;
while (keyIndex[i] == ABSENT) i++;
return child[keyIndex[i]];
}
@Override
public Node last() {
assert noOfChildren > Node16.NODE_SIZE;
int i = KEY_INDEX_SIZE - 1;
while (keyIndex[i] == ABSENT) i--;
return child[keyIndex[i]];
}
@Override
public boolean isFull() {
return noOfChildren == NODE_SIZE;
}
@Override
public Node ceil(byte partialKey) {
for (int i = Byte.toUnsignedInt(partialKey); i < KEY_INDEX_SIZE; i++) {
if (keyIndex[i] != ABSENT) {
return child[keyIndex[i]];
}
}
return null;
}
@Override
public Node greater(byte partialKey) {
for (int i = Byte.toUnsignedInt(partialKey) + 1; i < KEY_INDEX_SIZE; i++) {
if (keyIndex[i] != ABSENT) {
return child[keyIndex[i]];
}
}
return null;
}
@Override
public Node lesser(byte partialKey) {
for (int i = Byte.toUnsignedInt(partialKey) - 1; i >= 0; i--) {
if (keyIndex[i] != ABSENT) {
return child[keyIndex[i]];
}
}
return null;
}
@Override
public Node floor(byte partialKey) {
for (int i = Byte.toUnsignedInt(partialKey); i >= 0; i--) {
if (keyIndex[i] != ABSENT) {
return child[keyIndex[i]];
}
}
return null;
}
byte[] getKeyIndex() {
return keyIndex;
}
}

View file

@ -0,0 +1,72 @@
package org.xbib.datastructures.trie.radix.adaptive;
import java.util.ConcurrentModificationException;
import java.util.Iterator;
import java.util.NoSuchElementException;
/**
* Base class for AdaptiveRadixTree Iterators
* note: taken from TreeMap
*/
abstract class PrivateEntryIterator<K, V, T> implements Iterator<T> {
private final AdaptiveRadixTree<K, V> m;
private LeafNode<K, V> next;
private LeafNode<K, V> lastReturned;
private int expectedModCount;
PrivateEntryIterator(AdaptiveRadixTree<K, V> m, LeafNode<K, V> first) {
expectedModCount = m.getModCount();
lastReturned = null;
next = first;
this.m = m;
}
public final boolean hasNext() {
return next != null;
}
final LeafNode<K, V> nextEntry() {
LeafNode<K, V> e = next;
if (e == null)
throw new NoSuchElementException();
if (m.getModCount() != expectedModCount)
throw new ConcurrentModificationException();
next = AdaptiveRadixTree.successor(e);
lastReturned = e;
return e;
}
final LeafNode<K, V> prevEntry() {
LeafNode<K, V> e = next;
if (e == null)
throw new NoSuchElementException();
if (m.getModCount() != expectedModCount)
throw new ConcurrentModificationException();
next = AdaptiveRadixTree.predecessor(e);
lastReturned = e;
return e;
}
public void remove() {
if (lastReturned == null)
throw new IllegalStateException();
if (m.getModCount() != expectedModCount)
throw new ConcurrentModificationException();
/*
next already points to the next leaf node (that might be a sibling to this lastReturned).
if next is the only sibling left, then the parent gets path compressed.
BUT the reference that next holds to the sibling leaf node remains the same, just it's parent changes.
Therefore at all times, next is a valid reference to be simply returned on the
next call to next().
Is there any scenario in which the next leaf pointer gets changed and iterator next
points to a stale leaf?
No.
Infact the LeafNode ctor is only ever called in a put and that too for the newer leaf
to be created/entered.
So references to an existing LeafNode won't get stale.
*/
m.deleteEntry(lastReturned);
expectedModCount = m.getModCount();
lastReturned = null;
}
}

View file

@ -0,0 +1,12 @@
package org.xbib.datastructures.trie.radix.adaptive;
final class ValueIterator<K, V> extends PrivateEntryIterator<K, V, V> {
ValueIterator(AdaptiveRadixTree<K, V> m, LeafNode<K, V> first) {
super(m, first);
}
@Override
public V next() {
return nextEntry().getValue();
}
}

View file

@ -0,0 +1,51 @@
package org.xbib.datastructures.trie.radix.adaptive;
import java.util.AbstractCollection;
import java.util.Iterator;
/**
* Contains all stuff borrowed from TreeMap
* such methods/utilities should be taken out and made a library of their own
* so any implementation of NavigableMap can reuse it, while the implementation
* provides certain primitive methods (getEntry, successor, predecessor, etc)
*/
class Values<K, V> extends AbstractCollection<V> {
private final AdaptiveRadixTree<K, V> m;
Values(AdaptiveRadixTree<K, V> m) {
this.m = m;
}
@Override
public Iterator<V> iterator() {
return m.valueIterator();
}
@Override
public int size() {
return m.size();
}
@SuppressWarnings("unchecked")
@Override
public boolean contains(Object o) {
return m.containsValue((V) o);
}
@Override
public boolean remove(Object o) {
for (LeafNode<K, V> e = m.getFirstEntry(); e != null; e = AdaptiveRadixTree.successor(e)) {
if (AdaptiveRadixTree.valEquals(e.getValue(), o)) {
m.deleteEntry(e);
return true;
}
}
return false;
}
@Override
public void clear() {
m.clear();
}
}

View file

@ -0,0 +1,21 @@
package org.xbib.datastructures.trie.radix.adaptive.persistent;
class ArrayChildPtr extends ChildPtr {
private final int i;
private final Node[] children;
public ArrayChildPtr(Node[] children, int i) {
this.children = children;
this.i = i;
}
@Override
public Node get() {
return children[i];
}
@Override
public void set(Node n) {
children[i] = n;
}
}

View file

@ -0,0 +1,79 @@
package org.xbib.datastructures.trie.radix.adaptive.persistent;
import java.util.ArrayDeque;
import java.util.Deque;
import java.util.Iterator;
import java.util.NoSuchElementException;
class ArtIterator implements Iterator<Tuple2<byte[], Object>> {
private final Deque<Node> elemStack = new ArrayDeque<Node>();
private final Deque<Integer> idxStack = new ArrayDeque<Integer>();
public ArtIterator(Node root) {
if (root != null) {
elemStack.push(root);
idxStack.push(0);
maybeAdvance();
}
}
@Override
public boolean hasNext() {
return !elemStack.isEmpty();
}
@Override
public Tuple2<byte[], Object> next() {
if (hasNext()) {
Leaf leaf = (Leaf) elemStack.peek();
byte[] key = leaf.key;
Object value = leaf.value;
// Mark the leaf as consumed
idxStack.push(idxStack.pop() + 1);
maybeAdvance();
return new Tuple2<byte[], Object>(key, value);
} else {
throw new NoSuchElementException("end of iterator");
}
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
// Postcondition: if the stack is nonempty, the top of the stack must contain a leaf
private void maybeAdvance() {
// Pop exhausted nodes
while (!elemStack.isEmpty() && elemStack.peek().exhausted(idxStack.peek())) {
elemStack.pop();
idxStack.pop();
if (!elemStack.isEmpty()) {
// Move on by advancing the exhausted node's parent
idxStack.push(idxStack.pop() + 1);
}
}
if (!elemStack.isEmpty()) {
// Descend to the next leaf node element
while (true) {
if (elemStack.peek() instanceof Leaf) {
// Done - reached the next element
break;
} else {
// Advance to the next child of this node
ArtNode cur = (ArtNode) elemStack.peek();
idxStack.push(cur.nextChildAtOrAfter(idxStack.pop()));
Node child = cur.childAt(idxStack.peek());
// Push it onto the stack
elemStack.push(child);
idxStack.push(0);
}
}
}
}
}

View file

@ -0,0 +1,174 @@
package org.xbib.datastructures.trie.radix.adaptive.persistent;
abstract class ArtNode extends Node {
final byte[] partial = new byte[Node.MAX_PREFIX_LEN];
int num_children = 0;
int partial_len = 0;
public ArtNode() {
super();
}
public ArtNode(final ArtNode other) {
super();
this.num_children = other.num_children;
this.partial_len = other.partial_len;
System.arraycopy(other.partial, 0,
partial, 0,
Math.min(Node.MAX_PREFIX_LEN, partial_len));
}
/**
* Returns the number of prefix characters shared between
* the key and node.
*/
public int check_prefix(final byte[] key, int depth) {
int max_cmp = Math.min(Math.min(partial_len, Node.MAX_PREFIX_LEN), key.length - depth);
int idx;
for (idx = 0; idx < max_cmp; idx++) {
if (partial[idx] != key[depth + idx])
return idx;
}
return idx;
}
/**
* Calculates the index at which the prefixes mismatch
*/
public int prefix_mismatch(final byte[] key, int depth) {
int max_cmp = Math.min(Math.min(Node.MAX_PREFIX_LEN, partial_len), key.length - depth);
int idx;
for (idx = 0; idx < max_cmp; idx++) {
if (partial[idx] != key[depth + idx])
return idx;
}
// If the prefix is short we can avoid finding a leaf
if (partial_len > Node.MAX_PREFIX_LEN) {
// Prefix is longer than what we've checked, find a leaf
final Leaf l = this.minimum();
max_cmp = Math.min(l.key.length, key.length) - depth;
for (; idx < max_cmp; idx++) {
if (l.key[idx + depth] != key[depth + idx])
return idx;
}
}
return idx;
}
public abstract ChildPtr find_child(byte c);
public abstract void add_child(ChildPtr ref, byte c, Node child);
public abstract void remove_child(ChildPtr ref, byte c);
// Precondition: isLastChild(i) == false
public abstract int nextChildAtOrAfter(int i);
public abstract Node childAt(int i);
@Override
public boolean insert(ChildPtr ref, final byte[] key, Object value,
int depth, boolean force_clone) {
boolean do_clone = force_clone || this.refcount > 1;
// Check if given node has a prefix
if (partial_len > 0) {
// Determine if the prefixes differ, since we need to split
int prefix_diff = prefix_mismatch(key, depth);
if (prefix_diff >= partial_len) {
depth += partial_len;
} else {
// Create a new node
ArtNode4 result = new ArtNode4();
Node ref_old = ref.get();
ref.change_no_decrement(result); // don't decrement yet, because doing so might destroy self
result.partial_len = prefix_diff;
System.arraycopy(partial, 0,
result.partial, 0,
Math.min(Node.MAX_PREFIX_LEN, prefix_diff));
// Adjust the prefix of the old node
ArtNode this_writable = do_clone ? (ArtNode) this.n_clone() : this;
if (partial_len <= Node.MAX_PREFIX_LEN) {
result.add_child(ref, this_writable.partial[prefix_diff], this_writable);
this_writable.partial_len -= (prefix_diff + 1);
System.arraycopy(this_writable.partial, prefix_diff + 1,
this_writable.partial, 0,
Math.min(Node.MAX_PREFIX_LEN, this_writable.partial_len));
} else {
this_writable.partial_len -= (prefix_diff + 1);
final Leaf l = this.minimum();
result.add_child(ref, l.key[depth + prefix_diff], this_writable);
System.arraycopy(l.key, depth + prefix_diff + 1,
this_writable.partial, 0,
Math.min(Node.MAX_PREFIX_LEN, this_writable.partial_len));
}
// Insert the new leaf
Leaf l = new Leaf(key, value);
result.add_child(ref, key[depth + prefix_diff], l);
ref_old.decrement_refcount();
return true;
}
}
// Clone self if necessary
ArtNode this_writable = do_clone ? (ArtNode) this.n_clone() : this;
if (do_clone) {
ref.change(this_writable);
}
// Do the insert, either in a child (if a matching child already exists) or in self
ChildPtr child = this_writable.find_child(key[depth]);
if (child != null) {
return Node.insert(child.get(), child, key, value, depth + 1, force_clone);
} else {
// No child, node goes within us
Leaf l = new Leaf(key, value);
this_writable.add_child(ref, key[depth], l);
// If `this` was full and `do_clone` is true, we will clone a full node
// and then immediately delete the clone in favor of a larger node.
// TODO: avoid this
return true;
}
}
@Override
public boolean delete(ChildPtr ref, final byte[] key, int depth,
boolean force_clone) {
// Bail if the prefix does not match
if (partial_len > 0) {
int prefix_len = check_prefix(key, depth);
if (prefix_len != Math.min(MAX_PREFIX_LEN, partial_len)) {
return false;
}
depth += partial_len;
}
boolean do_clone = force_clone || this.refcount > 1;
// Clone self if necessary. Note: this allocation will be wasted if the
// key does not exist in the child's subtree
ArtNode this_writable = do_clone ? (ArtNode) this.n_clone() : this;
// Find child node
ChildPtr child = this_writable.find_child(key[depth]);
if (child == null) return false; // when translating to C++, make sure to delete this_writable
if (do_clone) {
ref.change(this_writable);
}
boolean child_is_leaf = child.get() instanceof Leaf;
boolean do_delete = child.get().delete(child, key, depth + 1, do_clone);
if (do_delete && child_is_leaf) {
// The leaf to delete is our child, so we must remove it
this_writable.remove_child(ref, key[depth]);
}
return do_delete;
}
}

View file

@ -0,0 +1,168 @@
package org.xbib.datastructures.trie.radix.adaptive.persistent;
class ArtNode16 extends ArtNode {
public static int count;
byte[] keys = new byte[16];
Node[] children = new Node[16];
public ArtNode16() {
super();
count++;
}
public ArtNode16(final ArtNode16 other) {
super(other);
System.arraycopy(other.keys, 0, keys, 0, other.num_children);
for (int i = 0; i < other.num_children; i++) {
children[i] = other.children[i];
children[i].refcount++;
}
count++;
}
public ArtNode16(final ArtNode4 other) {
this();
// ArtNode
this.num_children = other.num_children;
this.partial_len = other.partial_len;
System.arraycopy(other.partial, 0,
this.partial, 0,
Math.min(MAX_PREFIX_LEN, this.partial_len));
// ArtNode16 from ArtNode4
System.arraycopy(other.keys, 0, keys, 0, this.num_children);
for (int i = 0; i < this.num_children; i++) {
children[i] = other.children[i];
children[i].refcount++;
}
}
public ArtNode16(final ArtNode48 other) {
this();
assert (other.num_children <= 16);
// ArtNode
this.num_children = other.num_children;
this.partial_len = other.partial_len;
System.arraycopy(other.partial, 0,
this.partial, 0,
Math.min(MAX_PREFIX_LEN, this.partial_len));
// ArtNode16 from ArtNode48
int child = 0;
for (int i = 0; i < 256; i++) {
int pos = to_uint(other.keys[i]);
if (pos != 0) {
keys[child] = (byte) i;
children[child] = other.children[pos - 1];
children[child].refcount++;
child++;
}
}
}
@Override
public Node n_clone() {
return new ArtNode16(this);
}
@Override
public ChildPtr find_child(byte c) {
// TODO: avoid linear search using intrinsics if available
for (int i = 0; i < this.num_children; i++) {
if (keys[i] == c) {
return new ArrayChildPtr(children, i);
}
}
return null;
}
@Override
public Leaf minimum() {
return Node.minimum(children[0]);
}
@Override
public void add_child(ChildPtr ref, byte c, Node child) {
assert (refcount <= 1);
if (this.num_children < 16) {
// TODO: avoid linear search using intrinsics if available
int idx;
for (idx = 0; idx < this.num_children; idx++) {
if (to_uint(c) < to_uint(keys[idx])) break;
}
// Shift to make room
System.arraycopy(this.keys, idx, this.keys, idx + 1, this.num_children - idx);
System.arraycopy(this.children, idx, this.children, idx + 1, this.num_children - idx);
// Insert element
this.keys[idx] = c;
this.children[idx] = child;
child.refcount++;
this.num_children++;
} else {
// Copy the node16 into a new node48
ArtNode48 result = new ArtNode48(this);
// Update the parent pointer to the node48
ref.change(result);
// Insert the element into the node48 instead
result.add_child(ref, c, child);
}
}
@Override
public void remove_child(ChildPtr ref, byte c) {
assert (refcount <= 1);
int idx;
for (idx = 0; idx < this.num_children; idx++) {
if (c == keys[idx]) break;
}
if (idx == this.num_children) return;
children[idx].decrement_refcount();
// Shift to fill the hole
System.arraycopy(this.keys, idx + 1, this.keys, idx, this.num_children - idx - 1);
System.arraycopy(this.children, idx + 1, this.children, idx, this.num_children - idx - 1);
this.num_children--;
if (num_children == 3) {
ArtNode4 result = new ArtNode4(this);
ref.change(result);
}
}
@Override
public boolean exhausted(int i) {
return i >= num_children;
}
@Override
public int nextChildAtOrAfter(int i) {
return i;
}
@Override
public Node childAt(int i) {
return children[i];
}
@Override
public int decrement_refcount() {
if (--this.refcount <= 0) {
int freed = 0;
for (int i = 0; i < this.num_children; i++) {
freed += children[i].decrement_refcount();
}
count--;
// delete this;
return freed + 232;
// object size (8) + refcount (4) +
// num_children int (4) + partial_len int (4) +
// pointer to partial array (8) + partial array size (8+4+1*MAX_PREFIX_LEN)
// pointer to key array (8) + key array size (8+4+1*16) +
// pointer to children array (8) + children array size (8+4+8*16)
}
return 0;
}
}

View file

@ -0,0 +1,125 @@
package org.xbib.datastructures.trie.radix.adaptive.persistent;
class ArtNode256 extends ArtNode {
public static int count;
Node[] children = new Node[256];
public ArtNode256() {
super();
count++;
}
public ArtNode256(final ArtNode256 other) {
super(other);
for (int i = 0; i < 256; i++) {
children[i] = other.children[i];
if (children[i] != null) {
children[i].refcount++;
}
}
count++;
}
public ArtNode256(final ArtNode48 other) {
this();
// ArtNode
this.num_children = other.num_children;
this.partial_len = other.partial_len;
System.arraycopy(other.partial, 0, this.partial, 0, Math.min(MAX_PREFIX_LEN, this.partial_len));
// ArtNode256 from ArtNode48
for (int i = 0; i < 256; i++) {
if (other.keys[i] != 0) {
children[i] = other.children[to_uint(other.keys[i]) - 1];
children[i].refcount++;
}
}
}
@Override
public Node n_clone() {
return new ArtNode256(this);
}
@Override
public ChildPtr find_child(byte c) {
if (children[to_uint(c)] != null) return new ArrayChildPtr(children, to_uint(c));
return null;
}
@Override
public Leaf minimum() {
int idx = 0;
while (children[idx] == null) idx++;
return Node.minimum(children[idx]);
}
@Override
public void add_child(ChildPtr ref, byte c, Node child) {
assert (refcount <= 1);
this.num_children++;
this.children[to_uint(c)] = child;
child.refcount++;
}
@Override
public void remove_child(ChildPtr ref, byte c) {
assert (refcount <= 1);
children[to_uint(c)].decrement_refcount();
children[to_uint(c)] = null;
num_children--;
if (num_children == 37) {
ArtNode48 result = new ArtNode48(this);
ref.change(result);
}
}
@Override
public boolean exhausted(int c) {
for (int i = c; i < 256; i++) {
if (children[i] != null) {
return false;
}
}
return true;
}
@Override
public int nextChildAtOrAfter(int c) {
int pos = c;
for (; pos < 256; pos++) {
if (children[pos] != null) {
break;
}
}
return pos;
}
@Override
public Node childAt(int pos) {
return children[pos];
}
@Override
public int decrement_refcount() {
if (--this.refcount <= 0) {
int freed = 0;
for (int i = 0; i < 256; i++) {
if (children[i] != null) {
freed += children[i].decrement_refcount();
}
}
count--;
// delete this;
return freed + 2120;
// object size (8) + refcount (4) +
// num_children int (4) + partial_len int (4) +
// pointer to partial array (8) + partial array size (8+4+1*MAX_PREFIX_LEN)
// pointer to children array (8) + children array size (8+4+8*256) +
// padding (4)
}
return 0;
}
}

View file

@ -0,0 +1,169 @@
package org.xbib.datastructures.trie.radix.adaptive.persistent;
class ArtNode4 extends ArtNode {
public static int count;
byte[] keys = new byte[4];
Node[] children = new Node[4];
public ArtNode4() {
super();
count++;
}
public ArtNode4(final ArtNode4 other) {
super(other);
System.arraycopy(other.keys, 0, keys, 0, other.num_children);
for (int i = 0; i < other.num_children; i++) {
children[i] = other.children[i];
children[i].refcount++;
}
count++;
}
public ArtNode4(final ArtNode16 other) {
this();
assert (other.num_children <= 4);
// ArtNode
this.num_children = other.num_children;
this.partial_len = other.partial_len;
System.arraycopy(other.partial, 0,
this.partial, 0,
Math.min(MAX_PREFIX_LEN, this.partial_len));
// ArtNode4 from ArtNode16
System.arraycopy(other.keys, 0, keys, 0, this.num_children);
for (int i = 0; i < this.num_children; i++) {
children[i] = other.children[i];
children[i].refcount++;
}
}
@Override
public Node n_clone() {
return new ArtNode4(this);
}
@Override
public ChildPtr find_child(byte c) {
for (int i = 0; i < this.num_children; i++) {
if (keys[i] == c) {
return new ArrayChildPtr(children, i);
}
}
return null;
}
@Override
public Leaf minimum() {
return Node.minimum(children[0]);
}
@Override
public void add_child(ChildPtr ref, byte c, Node child) {
assert (refcount <= 1);
if (this.num_children < 4) {
int idx;
for (idx = 0; idx < this.num_children; idx++) {
if (to_uint(c) < to_uint(keys[idx])) break;
}
// Shift to make room
System.arraycopy(this.keys, idx, this.keys, idx + 1, this.num_children - idx);
System.arraycopy(this.children, idx, this.children, idx + 1, this.num_children - idx);
// Insert element
this.keys[idx] = c;
this.children[idx] = child;
child.refcount++;
this.num_children++;
} else {
// Copy the node4 into a new node16
ArtNode16 result = new ArtNode16(this);
// Update the parent pointer to the node16
ref.change(result);
// Insert the element into the node16 instead
result.add_child(ref, c, child);
}
}
@Override
public void remove_child(ChildPtr ref, byte c) {
assert (refcount <= 1);
int idx;
for (idx = 0; idx < this.num_children; idx++) {
if (c == keys[idx]) break;
}
if (idx == this.num_children) return;
assert (children[idx] instanceof Leaf);
children[idx].decrement_refcount();
// Shift to fill the hole
System.arraycopy(this.keys, idx + 1, this.keys, idx, this.num_children - idx - 1);
System.arraycopy(this.children, idx + 1, this.children, idx, this.num_children - idx - 1);
this.num_children--;
// Remove nodes with only a single child
if (num_children == 1) {
Node child = children[0];
if (!(child instanceof Leaf)) {
if (child.refcount > 1) {
child = child.n_clone();
}
ArtNode an_child = (ArtNode) child;
// Concatenate the prefixes
int prefix = partial_len;
if (prefix < MAX_PREFIX_LEN) {
partial[prefix] = keys[0];
prefix++;
}
if (prefix < MAX_PREFIX_LEN) {
int sub_prefix = Math.min(an_child.partial_len, MAX_PREFIX_LEN - prefix);
System.arraycopy(an_child.partial, 0, partial, prefix, sub_prefix);
prefix += sub_prefix;
}
// Store the prefix in the child
System.arraycopy(partial, 0, an_child.partial, 0, Math.min(prefix, MAX_PREFIX_LEN));
an_child.partial_len += partial_len + 1;
}
ref.change(child);
}
}
@Override
public boolean exhausted(int i) {
return i >= num_children;
}
@Override
public int nextChildAtOrAfter(int i) {
return i;
}
@Override
public Node childAt(int i) {
return children[i];
}
@Override
public int decrement_refcount() {
if (--this.refcount <= 0) {
int freed = 0;
for (int i = 0; i < this.num_children; i++) {
freed += children[i].decrement_refcount();
}
count--;
// delete this;
return freed + 128;
// object size (8) + refcount (4) +
// num_children int (4) + partial_len int (4) +
// pointer to partial array (8) + partial array size (8+4+1*MAX_PREFIX_LEN)
// pointer to key array (8) + key array size (8+4+1*4) +
// pointer to children array (8) + children array size (8+4+8*4) +
// padding (4)
}
return 0;
}
}

View file

@ -0,0 +1,173 @@
package org.xbib.datastructures.trie.radix.adaptive.persistent;
class ArtNode48 extends ArtNode {
public static int count;
byte[] keys = new byte[256];
Node[] children = new Node[48];
public ArtNode48() {
super();
count++;
}
public ArtNode48(final ArtNode48 other) {
super(other);
System.arraycopy(other.keys, 0, keys, 0, 256);
// Copy the children. We have to look at all elements of `children`
// rather than just the first num_children elements because `children`
// may not be contiguous due to deletion
for (int i = 0; i < 48; i++) {
children[i] = other.children[i];
if (children[i] != null) {
children[i].refcount++;
}
}
count++;
}
public ArtNode48(final ArtNode16 other) {
this();
// ArtNode
this.num_children = other.num_children;
this.partial_len = other.partial_len;
System.arraycopy(other.partial, 0, this.partial, 0,
Math.min(MAX_PREFIX_LEN, this.partial_len));
// ArtNode48 from ArtNode16
for (int i = 0; i < this.num_children; i++) {
keys[to_uint(other.keys[i])] = (byte) (i + 1);
children[i] = other.children[i];
children[i].refcount++;
}
}
public ArtNode48(final ArtNode256 other) {
this();
assert (other.num_children <= 48);
// ArtNode
this.num_children = other.num_children;
this.partial_len = other.partial_len;
System.arraycopy(other.partial, 0, this.partial, 0,
Math.min(MAX_PREFIX_LEN, this.partial_len));
// ArtNode48 from ArtNode256
int pos = 0;
for (int i = 0; i < 256; i++) {
if (other.children[i] != null) {
keys[i] = (byte) (pos + 1);
children[pos] = other.children[i];
children[pos].refcount++;
pos++;
}
}
}
@Override
public Node n_clone() {
return new ArtNode48(this);
}
@Override
public ChildPtr find_child(byte c) {
int idx = to_uint(keys[to_uint(c)]);
if (idx != 0) return new ArrayChildPtr(children, idx - 1);
return null;
}
@Override
public Leaf minimum() {
int idx = 0;
while (keys[idx] == 0) idx++;
Node child = children[to_uint(keys[idx]) - 1];
return Node.minimum(child);
}
@Override
public void add_child(ChildPtr ref, byte c, Node child) {
assert (refcount <= 1);
if (this.num_children < 48) {
// Have to do a linear scan because deletion may create holes in
// children array
int pos = 0;
while (children[pos] != null) pos++;
this.children[pos] = child;
child.refcount++;
this.keys[to_uint(c)] = (byte) (pos + 1);
this.num_children++;
} else {
// Copy the node48 into a new node256
ArtNode256 result = new ArtNode256(this);
// Update the parent pointer to the node256
ref.change(result);
// Insert the element into the node256 instead
result.add_child(ref, c, child);
}
}
@Override
public void remove_child(ChildPtr ref, byte c) {
assert (refcount <= 1);
// Delete the child, leaving a hole in children. We can't shift children
// because that would require decrementing many elements of keys
int pos = to_uint(keys[to_uint(c)]);
keys[to_uint(c)] = 0;
children[pos - 1].decrement_refcount();
children[pos - 1] = null;
num_children--;
if (num_children == 12) {
ArtNode16 result = new ArtNode16(this);
ref.change(result);
}
}
@Override
public boolean exhausted(int c) {
for (int i = c; i < 256; i++) {
if (keys[i] != 0) {
return false;
}
}
return true;
}
@Override
public int nextChildAtOrAfter(int c) {
int pos = c;
for (; pos < 256; pos++) {
if (keys[pos] != 0) {
break;
}
}
return pos;
}
@Override
public Node childAt(int c) {
return children[to_uint(keys[c]) - 1];
}
@Override
public int decrement_refcount() {
if (--this.refcount <= 0) {
int freed = 0;
for (int i = 0; i < this.num_children; i++) {
if (children[i] != null) {
freed += children[i].decrement_refcount();
}
}
count--;
// delete this;
return freed + 728;
// object size (8) + refcount (4) +
// num_children int (4) + partial_len int (4) +
// pointer to partial array (8) + partial array size (8+4+1*MAX_PREFIX_LEN)
// pointer to key array (8) + key array size (8+4+1*256) +
// pointer to children array (8) + children array size (8+4+8*48)
}
return 0;
}
}

View file

@ -0,0 +1,156 @@
package org.xbib.datastructures.trie.radix.adaptive.persistent;
import java.util.Iterator;
public class ArtTree extends ChildPtr {
Node root = null;
long num_elements = 0;
public ArtTree() {
}
public ArtTree(final ArtTree other) {
root = other.root;
num_elements = other.num_elements;
}
public ArtTree snapshot() {
ArtTree b = new ArtTree();
if (root != null) {
b.root = Node.n_clone(root);
b.root.refcount++;
}
b.num_elements = num_elements;
return b;
}
@Override
Node get() {
return root;
}
@Override
void set(Node n) {
root = n;
}
public Object search(final byte[] key) {
Node n = root;
int prefix_len, depth = 0;
while (n != null) {
if (n instanceof Leaf) {
Leaf l = (Leaf) n;
// Check if the expanded path matches
if (l.matches(key)) {
return l.value;
} else {
return null;
}
} else {
ArtNode an = (ArtNode) (n);
// Bail if the prefix does not match
if (an.partial_len > 0) {
prefix_len = an.check_prefix(key, depth);
if (prefix_len != Math.min(Node.MAX_PREFIX_LEN, an.partial_len)) {
return null;
}
depth += an.partial_len;
}
if (depth >= key.length) return null;
// Recursively search
ChildPtr child = an.find_child(key[depth]);
n = (child != null) ? child.get() : null;
depth++;
}
}
return null;
}
public void insert(final byte[] key, Object value) throws UnsupportedOperationException {
if (Node.insert(root, this, key, value, 0, false)) num_elements++;
}
public void delete(final byte[] key) {
if (root != null) {
boolean child_is_leaf = root instanceof Leaf;
boolean do_delete = root.delete(this, key, 0, false);
if (do_delete) {
num_elements--;
if (child_is_leaf) {
// The leaf to delete is the root, so we must remove it
root = null;
}
}
}
}
public Iterator<Tuple2<byte[], Object>> iterator() {
return new ArtIterator(root);
}
public Iterator<Tuple2<byte[], Object>> prefixIterator(final byte[] prefix) {
// Find the root node for the prefix
Node n = root;
int prefix_len, depth = 0;
while (n != null) {
if (n instanceof Leaf) {
Leaf l = (Leaf) n;
// Check if the expanded path matches
if (l.prefix_matches(prefix)) {
return new ArtIterator(l);
} else {
return new ArtIterator(null);
}
} else {
if (depth == prefix.length) {
// If we have reached appropriate depth, return the iterator
if (n.minimum().prefix_matches(prefix)) {
return new ArtIterator(n);
} else {
return new ArtIterator(null);
}
} else {
ArtNode an = (ArtNode) (n);
// Bail if the prefix does not match
if (an.partial_len > 0) {
prefix_len = an.prefix_mismatch(prefix, depth);
if (prefix_len == 0) {
// No match, return empty
return new ArtIterator(null);
} else if (depth + prefix_len == prefix.length) {
// Prefix match, return iterator
return new ArtIterator(n);
} else {
// Full match, go deeper
depth += an.partial_len;
}
}
// Recursively search
ChildPtr child = an.find_child(prefix[depth]);
n = (child != null) ? child.get() : null;
depth++;
}
}
}
return new ArtIterator(null);
}
public long size() {
return num_elements;
}
public int destroy() {
if (root != null) {
int result = root.decrement_refcount();
root = null;
return result;
} else {
return 0;
}
}
}

View file

@ -0,0 +1,22 @@
package org.xbib.datastructures.trie.radix.adaptive.persistent;
abstract class ChildPtr {
abstract Node get();
abstract void set(Node n);
void change(Node n) {
// First increment the refcount of the new node, in case it would
// otherwise have been deleted by the decrement of the old node
n.refcount++;
if (get() != null) {
get().decrement_refcount();
}
set(n);
}
void change_no_decrement(Node n) {
n.refcount++;
set(n);
}
}

View file

@ -0,0 +1,136 @@
package org.xbib.datastructures.trie.radix.adaptive.persistent;
class Leaf extends Node {
public static int count;
final byte[] key;
Object value;
public Leaf(final byte[] key, Object value) {
super();
this.key = key;
this.value = value;
count++;
}
public Leaf(final Leaf other) {
super();
this.key = other.key;
this.value = other.value;
count++;
}
@Override
public Node n_clone() {
return new Leaf(this);
}
public boolean matches(final byte[] key) {
if (this.key.length != key.length) return false;
for (int i = 0; i < key.length; i++) {
if (this.key[i] != key[i]) {
return false;
}
}
return true;
}
public boolean prefix_matches(final byte[] prefix) {
if (this.key.length < prefix.length) return false;
for (int i = 0; i < prefix.length; i++) {
if (this.key[i] != prefix[i]) {
return false;
}
}
return true;
}
@Override
public Leaf minimum() {
return this;
}
public int longest_common_prefix(Leaf other, int depth) {
int max_cmp = Math.min(key.length, other.key.length) - depth;
int idx;
for (idx = 0; idx < max_cmp; idx++) {
if (key[depth + idx] != other.key[depth + idx]) {
return idx;
}
}
return idx;
}
@Override
public boolean insert(ChildPtr ref, final byte[] key, Object value,
int depth, boolean force_clone) throws UnsupportedOperationException {
boolean clone = force_clone || this.refcount > 1;
if (matches(key)) {
if (clone) {
// Updating an existing value, but need to create a new leaf to
// reflect the change
ref.change(new Leaf(key, value));
} else {
// Updating an existing value, and safe to make the change in
// place
this.value = value;
}
return false;
} else {
// New value
// Create a new leaf
Leaf l2 = new Leaf(key, value);
// Determine longest prefix
int longest_prefix = longest_common_prefix(l2, depth);
if (depth + longest_prefix >= this.key.length ||
depth + longest_prefix >= key.length) {
throw new UnsupportedOperationException("keys cannot be prefixes of other keys");
}
// Split the current leaf into a node4
ArtNode4 result = new ArtNode4();
result.partial_len = longest_prefix;
Node ref_old = ref.get();
ref.change_no_decrement(result);
System.arraycopy(key, depth,
result.partial, 0,
Math.min(Node.MAX_PREFIX_LEN, longest_prefix));
// Add the leafs to the new node4
result.add_child(ref, this.key[depth + longest_prefix], this);
result.add_child(ref, l2.key[depth + longest_prefix], l2);
ref_old.decrement_refcount();
// TODO: avoid the increment to self immediately followed by decrement
return true;
}
}
@Override
public boolean delete(ChildPtr ref, final byte[] key, int depth,
boolean force_clone) {
return matches(key);
}
@Override
public boolean exhausted(int i) {
return i > 0;
}
@Override
public int decrement_refcount() {
if (--this.refcount <= 0) {
count--;
// delete this;
// Don't delete the actual key or value because they may be used
// elsewhere
return 32;
// object size (8) + refcount (4) + pointer to key array (8) +
// pointer to value (8) + padding (4)
}
return 0;
}
}

View file

@ -0,0 +1,54 @@
package org.xbib.datastructures.trie.radix.adaptive.persistent;
abstract class Node {
static final int MAX_PREFIX_LEN = 8;
int refcount;
public Node() {
refcount = 0;
}
public static Node n_clone(Node n) {
if (n == null) return null;
else return n.n_clone();
}
public static Leaf minimum(Node n) {
if (n == null) return null;
else return n.minimum();
}
public static boolean insert(Node n, ChildPtr ref, final byte[] key, Object value, int depth,
boolean force_clone) {
// If we are at a NULL node, inject a leaf
if (n == null) {
ref.change(new Leaf(key, value));
return true;
} else {
return n.insert(ref, key, value, depth, force_clone);
}
}
public static boolean exhausted(Node n, int i) {
if (n == null) return true;
else return n.exhausted(i);
}
static int to_uint(byte b) {
return ((int) b) & 0xFF;
}
public abstract Node n_clone();
public abstract Leaf minimum();
public abstract boolean insert(ChildPtr ref, final byte[] key, Object value, int depth,
boolean force_clone) throws UnsupportedOperationException;
public abstract boolean delete(ChildPtr ref, final byte[] key, int depth,
boolean force_clone);
public abstract int decrement_refcount();
public abstract boolean exhausted(int i);
}

View file

@ -0,0 +1,33 @@
package org.xbib.datastructures.trie.radix.adaptive.persistent;
public class Tuple2<A, B> {
private A a;
private B b;
public Tuple2() {
}
public Tuple2(A a, B b) {
this.a = a;
this.b = b;
}
public A getA() {
return a;
}
public void setA(A a) {
this.a = a;
}
public B getB() {
return b;
}
public void setB(B b) {
this.b = b;
}
}

View file

@ -0,0 +1,4 @@
/**
* Taken from Ankur Dave, https://github.com/ankurdave/part
*/
package org.xbib.datastructures.trie.radix.adaptive.persistent;

View file

@ -0,0 +1,5 @@
/**
* Taken from https://code.google.com/archive/p/radixtree/
*
*/
package org.xbib.datastructures.trie.radix;

View file

@ -0,0 +1,255 @@
package org.xbib.datastructures.trie.regex;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
/**
* The RegexTrie is a trie where each _stored_ segment of the key is a regex {@link Pattern}. Thus,
* the full _stored_ key is a List<Pattern> rather than a String as in a standard trie. Note that
* the retrieve method requires a List<String>, which will be matched against the
* {@link Pattern}s, rather than checked for equality as in a standard trie. It will likely perform
* poorly for large datasets.
* <p/>
* One can also use a {@code null} entry in the {@code Pattern} sequence to serve as a wildcard. If
* a {@code null} is encountered, all subsequent entries in the sequence will be ignored.
* When the retrieval code encounters a {@code null} {@code Pattern}, it will first wait to see if a
* more-specific entry matches the sequence. If one does, that more-specific entry will proceed,
* even if it subsequently fails to match.
* <p/>
* If no more-specific entry matches, the wildcard match will add all remaining {@code String}s
* to the list of captures (if enabled) and return the value associated with the wildcard.
* <p/>
* A short sample of the wildcard functionality:
* <pre>
* List&lt;List&lt;String&gt;&gt; captures = new LinkedList&lt;List&lt;String&gt;&gt;();
* RegexTrie<Integer> trie = new RegexTrie<Integer>();
* trie.put(2, "a", null);
* trie.put(4, "a", "b");
* trie.retrieve(captures, "a", "c", "e");
* // returns 2. captures is now [[], ["c"], ["e"]]
* trie.retrieve(captures, "a", "b");
* // returns 4. captures is now [[], []]
* trie.retrieve(captures, "a", "b", "c");
* // returns null. captures is now [[], []]
* </pre>
*/
public class RegexTrie<V> {
private V value;
private final Map<CompPattern, RegexTrie<V>> children;
public RegexTrie() {
children = new LinkedHashMap<>();
}
public void clear() {
value = null;
for (RegexTrie<V> child : children.values()) {
child.clear();
}
children.clear();
}
public boolean containsKey(List<String> strings) {
return resolve(strings) != null;
}
public void add(String pattern, V value) {
put(value, Arrays.stream(pattern.split("/")).distinct().collect(Collectors.toList()));
}
/**
* Add an entry to the trie.
*
* @param value The value to set
* @param patterns The sequence of {@link Pattern}s that must be sequentially matched to
* retrieve the associated {@code value}
*/
public void put(V value, List<?> patterns) {
List<CompPattern> list = new ArrayList<>(patterns.size());
for (Object object : patterns) {
CompPattern compPattern = null;
if (object instanceof Pattern) {
compPattern = new CompPattern((Pattern) object);
} else if (object instanceof String) {
compPattern = new CompPattern(Pattern.compile((String) object));
}
list.add(compPattern);
}
validateAndPut(value, list);
}
/**
* Resolve a value from the trie, by matching the provided sequence of {@link String}s to a
* sequence of {@link Pattern}s stored in the trie.
*
* @param strings A sequence of {@link String}s to match
* @return The associated value, or {@code null} if no value was found
*/
public V resolve(List<String> strings) {
return resolve(null, strings);
}
/**
* Resolve a value from the trie, by matching the provided sequence of {@link String}s to a
* sequence of {@link Pattern}s stored in the trie. This version of the method also returns
* a {@link List} of capture groups for each {@link Pattern} that was matched.
* <p/>
* Each entry in the outer List corresponds to one level of {@code Pattern} in the trie.
* For each level, the list of capture groups will be stored. If there were no captures
* for a particular level, an empty list will be stored.
* <p/>
* Note that {@code captures} will be {@link List#clear()}ed before the retrieval begins.
* Also, if the retrieval fails after a partial sequence of matches, {@code captures} will
* still reflect the capture groups from the partial match.
*
* @param captures A {@code List<List<String>>} through which capture groups will be returned.
* @param strings A sequence of {@link String}s to match
* @return The associated value, or {@code null} if no value was found
*/
public V resolve(List<List<String>> captures, List<String> strings) {
if (strings.size() == 0) {
throw new IllegalArgumentException("string list must be non-empty");
}
if (captures != null) {
captures.clear();
}
return recursiveRetrieve(captures, strings);
}
/**
* A helper method to consolidate validation before adding an entry to the trie.
*
* @param value The value to set
* @param list The sequence of {@link CompPattern}s that must be sequentially matched to
* retrieve the associated {@code value}
*/
private V validateAndPut(V value, List<CompPattern> list) {
if (list.size() == 0) {
throw new IllegalArgumentException("pattern list must be non-empty");
}
return recursivePut(value, list);
}
private V recursivePut(V value, List<CompPattern> patterns) {
// Cases:
// 1) patterns is empty -- set our value
// 2) patterns is non-empty -- recurse downward, creating a child if necessary
if (patterns.isEmpty()) {
V oldValue = this.value;
this.value = value;
return oldValue;
} else {
CompPattern curKey = patterns.get(0);
List<CompPattern> nextKeys = patterns.subList(1, patterns.size());
// Create a new child to handle
RegexTrie<V> nextChild = children.get(curKey);
if (nextChild == null) {
nextChild = new RegexTrie<V>();
children.put(curKey, nextChild);
}
return nextChild.recursivePut(value, nextKeys);
}
}
private V recursiveRetrieve(List<List<String>> captures, List<String> strings) {
// Cases:
// 1) strings is empty -- return our value
// 2) strings is non-empty -- find the first child that matches, recurse downward
if (strings.isEmpty()) {
return value;
} else {
boolean wildcardMatch = false;
V wildcardValue = null;
String curKey = strings.get(0);
List<String> nextKeys = strings.subList(1, strings.size());
for (Map.Entry<CompPattern, RegexTrie<V>> child : children.entrySet()) {
CompPattern pattern = child.getKey();
if (pattern == null) {
wildcardMatch = true;
wildcardValue = child.getValue().value;
continue;
}
Matcher matcher = pattern.matcher(curKey);
if (matcher.matches()) {
if (captures != null) {
List<String> curCaptures = new ArrayList<>(matcher.groupCount());
for (int i = 0; i < matcher.groupCount(); i++) {
// i+1 since group 0 is the entire matched string
curCaptures.add(matcher.group(i + 1));
}
captures.add(curCaptures);
}
return child.getValue().recursiveRetrieve(captures, nextKeys);
}
}
if (wildcardMatch) {
// stick the rest of the query string into the captures list and return
if (captures != null) {
for (String str : strings) {
captures.add(List.of(str));
}
}
return wildcardValue;
}
// no match
return null;
}
}
@Override
public String toString() {
return String.format("{V: %s, C: %s}", value, children);
}
/**
* Patterns aren't comparable by default, which prevents you from retrieving them from a Map.
* This is a simple stub class that makes a Pattern with a working
* {@link CompPattern#equals(Object)} method.
*/
private static class CompPattern {
protected final Pattern pattern;
CompPattern(Pattern pattern) {
Objects.requireNonNull(pattern);
this.pattern = pattern;
}
@Override
public boolean equals(Object other) {
Pattern otherPat;
if (other instanceof Pattern) {
otherPat = (Pattern) other;
} else if (other instanceof CompPattern) {
CompPattern otherCPat = (CompPattern) other;
otherPat = otherCPat.pattern;
} else {
return false;
}
return pattern.toString().equals(otherPat.toString());
}
@Override
public int hashCode() {
return pattern.toString().hashCode();
}
@Override
public String toString() {
return String.format("P(%s)", pattern);
}
public Matcher matcher(String string) {
return pattern.matcher(string);
}
}
}

View file

@ -0,0 +1,20 @@
package org.xbib.datastructures.trie.segment;
import java.util.Map;
public interface Node<T, V> {
void setKey(TrieKeySegment<T> key);
TrieKeySegment<T> getKey();
void setValue(V value);
V getValue();
void setTerminal(boolean terminal);
boolean isTerminal();
Map<TrieKeySegment<T>, Node<T,V>> getChildren();
}

View file

@ -0,0 +1,54 @@
package org.xbib.datastructures.trie.segment;
import java.util.HashMap;
import java.util.Map;
public class NodeImpl<T,V> implements Node<T,V> {
private TrieKeySegment<T> key;
private V value;
private boolean terminal;
private final Map<TrieKeySegment<T>, Node<T,V>> children;
public NodeImpl() {
this.children = new HashMap<>();
}
@Override
public void setKey(TrieKeySegment<T> key) {
this.key = key;
}
@Override
public TrieKeySegment<T> getKey() {
return key;
}
@Override
public void setValue(V value) {
this.value = value;
}
@Override
public V getValue() {
return value;
}
@Override
public void setTerminal(boolean terminal) {
this.terminal = terminal;
}
@Override
public boolean isTerminal() {
return terminal;
}
@Override
public Map<TrieKeySegment<T>, Node<T,V>> getChildren() {
return children;
}
}

View file

@ -0,0 +1,24 @@
package org.xbib.datastructures.trie.segment;
public class StringSegment implements TrieKeySegment<String> {
private final String segment;
public StringSegment(String segment) {
this.segment = segment;
}
public static StringSegment of(String segment) {
return new StringSegment(segment);
}
@Override
public int compareTo(String o) {
return segment.compareTo(o);
}
@Override
public String toString() {
return segment;
}
}

View file

@ -0,0 +1,19 @@
package org.xbib.datastructures.trie.segment;
import java.util.List;
import java.util.Set;
public interface Trie<T,K extends TrieKey<T>, V> {
void add(K key, V value);
V search(K key);
List<V> startsWith(List<TrieKeySegment<T>> prefix);
boolean contains(K key);
Set<K> getAllKeys();
int size();
}

View file

@ -0,0 +1,131 @@
package org.xbib.datastructures.trie.segment;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class TrieImpl<T,V> implements Trie<T, TrieKey<T>, V> {
private final Node<T,V> node;
public TrieImpl() {
this.node = new NodeImpl<>();
}
@Override
public void add(TrieKey<T> key, V value) {
addNode(node, key, 0, value);
}
@Override
public V search(TrieKey<T> key) {
return findKey(node, key);
}
@Override
public List<V> startsWith(List<TrieKeySegment<T>> prefix) {
List<V> list = new ArrayList<>();
Node<T,V> node = this.node;
for (TrieKeySegment<T> e : prefix) {
node = node.getChildren().get(e);
if (node == null) {
break;
}
}
if (node != null) {
getValues(node, list);
}
return list;
}
@Override
public boolean contains(TrieKey<T> key) {
return hasKey(node, key);
}
@Override
public Set<TrieKey<T>> getAllKeys() {
Set<TrieKey<T>> keySet = new HashSet<>();
getKeys(node, new TrieKeyImpl<>(), keySet);
return keySet;
}
@Override
public int size() {
return getAllKeys().size();
}
private void getValues(Node<T,V> currNode, List<V> valueList) {
if (currNode.isTerminal()) {
valueList.add(currNode.getValue());
}
Map<TrieKeySegment<T>, Node<T,V>> children = currNode.getChildren();
for (Map.Entry<TrieKeySegment<T>, Node<T,V>> entry : children.entrySet()) {
getValues(entry.getValue(), valueList);
}
}
private void getKeys(Node<T,V> currNode, TrieKey<T> key, Set<TrieKey<T>> keySet) {
if (currNode.isTerminal()) {
keySet.add(key);
}
Map<TrieKeySegment<T>, Node<T,V>> children = currNode.getChildren();
for (Map.Entry<TrieKeySegment<T>, Node<T,V>> entry : children.entrySet()) {
TrieKey<T> k = key.append(entry.getValue().getKey());
getKeys(entry.getValue(), k, keySet);
}
}
private V findKey(Node<T,V> currNode, TrieKey<T> key) {
TrieKeySegment<T> e = key.size() > 0 ? key.get(0) : null;
if (currNode.getChildren().containsKey(e)) {
Node<T,V> nextNode = currNode.getChildren().get(e);
if (key.size() <= 1) {
if (nextNode.isTerminal()) {
return nextNode.getValue();
}
} else {
return findKey(nextNode, key.subKey(1));
}
}
return null;
}
private boolean hasKey(Node<T,V> currNode, TrieKey<T> key) {
TrieKeySegment<T> e = key.size() > 0 ? key.get(0) : null;
if (currNode.getChildren().containsKey(e)) {
Node<T,V> nextNode = currNode.getChildren().get(e);
if (key.size() <= 1) {
return nextNode.isTerminal();
} else {
return hasKey(nextNode, key.subKey(1));
}
}
return false;
}
private void addNode(Node<T,V> currNode, TrieKey<T> key, int pos, V value) {
TrieKeySegment<T> e = pos < key.size() ? key.get(pos) : null;
Node<T,V> nextNode = currNode.getChildren().get(e);
if (nextNode == null) {
nextNode = new NodeImpl<>();
nextNode.setKey(e);
if (pos < key.size() - 1) {
addNode(nextNode, key, pos + 1, value);
} else {
nextNode.setValue(value);
nextNode.setTerminal(true);
}
currNode.getChildren().put(e, nextNode);
} else {
if (pos < key.size() - 1) {
addNode(nextNode, key, pos + 1, value);
} else {
nextNode.setValue(value);
nextNode.setTerminal(true);
}
}
}
}

View file

@ -0,0 +1,19 @@
package org.xbib.datastructures.trie.segment;
import java.util.Arrays;
import java.util.List;
public interface TrieKey<T> {
int size();
TrieKey<T> subKey(int i);
TrieKey<T> append(TrieKeySegment<T> trieKeySegment);
void set(int i, TrieKeySegment<T> trieKeySegment);
TrieKeySegment<T> get(int i);
List<TrieKeySegment<T>> getSegments();
}

View file

@ -0,0 +1,88 @@
package org.xbib.datastructures.trie.segment;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class TrieKeyImpl<T> implements TrieKey<T>, Comparable<TrieKeyImpl<T>> {
private final List<TrieKeySegment<T>> segments;
public TrieKeyImpl() {
this(new ArrayList<>());
}
public TrieKeyImpl(List<TrieKeySegment<T>> segments) {
this.segments = segments;
}
public TrieKeyImpl<T> add(TrieKeySegment<T> segment) {
segments.add(segment);
return this;
}
@Override
public List<TrieKeySegment<T>> getSegments() {
return segments;
}
@Override
public int size() {
return segments.size();
}
@Override
public TrieKey<T> subKey(int i) {
return new TrieKeyImpl<>(segments.subList(1, segments.size()));
}
@Override
public TrieKey<T> append(TrieKeySegment<T> trieKeySegment) {
segments.add(trieKeySegment);
return this;
}
@Override
public void set(int i, TrieKeySegment<T> trieKeySegment) {
segments.set(i, trieKeySegment);
}
@Override
public TrieKeySegment<T> get(int i) {
return segments.get(i);
}
@SuppressWarnings("unchecked")
@Override
public int compareTo(TrieKeyImpl<T> o) {
for (int i = 0; i < segments.size(); i++) {
TrieKeySegment<T> segment1 = segments.get(i);
T segment2 = i < o.segments.size() ? (T) o.segments.get(i) : null;
if (segment2 == null) {
return 1;
}
int c = segment1.compareTo(segment2);
if (c != 0) {
return c;
}
}
return 0;
}
public static TrieKey<String> stringKey(String... segments ) {
TrieKey<String> trieKey = new TrieKeyImpl<>();
Arrays.stream(segments).forEach(s -> {
trieKey.append(new StringSegment(s));
});
return trieKey;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
for (TrieKeySegment<T> segment : segments) {
sb.append(segment.toString());
}
return sb.toString();
}
}

View file

@ -0,0 +1,4 @@
package org.xbib.datastructures.trie.segment;
public interface TrieKeySegment<T> extends Comparable<T> {
}

View file

@ -1,4 +1,4 @@
package org.xbib.datastructures.trie;
package org.xbib.datastructures.trie.simple;
import java.util.HashMap;
import java.util.Map;

View file

@ -1,4 +1,4 @@
package org.xbib.datastructures.trie;
package org.xbib.datastructures.trie.simple;
import java.util.List;
import java.util.Set;

View file

@ -1,4 +1,4 @@
package org.xbib.datastructures.trie;
package org.xbib.datastructures.trie.simple;
import java.util.ArrayList;
import java.util.HashSet;

View file

@ -0,0 +1,153 @@
package org.xbib.datastructures.trie;
import org.junit.jupiter.api.Test;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Objects;
public class WordTreeTest {
static class Node {
private final char ch;
private boolean leaf;
private LinkedList<Node> children;
public Node(char ch) {
this.ch = ch;
}
public void addChild(Node node) {
if (children == null) {
children = new LinkedList<>();
}
children.add(node);
}
public Node getNode(char ch) {
if (children == null) {
return null;
}
for (Node child : children) {
if (child.getChar() == ch) {
return child;
}
}
return null;
}
public char getChar() {
return ch;
}
public List<Node> getChildren() {
return Objects.requireNonNullElse(this.children, Collections.emptyList());
}
public boolean isLeaf() {
return leaf;
}
public void setLeaf(boolean leaf) {
this.leaf = leaf;
}
}
Node root = new Node(' ');
public WordTreeTest() {
}
public List<String> getWordsForPrefix(String prefix) {
if (prefix.length() == 0) {
return Collections.emptyList();
}
Node node = getNodeForPrefix(root, prefix);
if (node == null) {
return Collections.emptyList();
}
List<LinkedList<Character>> chars = collectChars(node);
List<String> words = new ArrayList<>(chars.size());
for (List<Character> charList : chars) {
words.add(combine(prefix.substring(0, prefix.length() - 1), charList));
}
return words;
}
private String combine(String prefix, List<Character> charList) {
StringBuilder sb = new StringBuilder(prefix);
for (Character character : charList) {
sb.append(character);
}
return sb.toString();
}
private Node getNodeForPrefix(Node node, String prefix) {
if (prefix.length() == 0) {
return node;
}
Node next = node.getNode(prefix.charAt(0));
if (next == null) {
return null;
}
return getNodeForPrefix(next, prefix.substring(1));
}
private List<LinkedList<Character>> collectChars(Node node) {
List<LinkedList<Character>> chars = new ArrayList<>();
if (node.getChildren().size() == 0) {
chars.add(new LinkedList<>(Collections.singletonList(node.getChar())));
} else {
if (node.isLeaf()) {
chars.add(new LinkedList<>(Collections.singletonList(node.getChar())));
}
List<Node> children = node.getChildren();
for (Node child : children) {
List<LinkedList<Character>> childList = collectChars(child);
for (LinkedList<Character> characters : childList) {
characters.push(node.getChar());
chars.add(characters);
}
}
}
return chars;
}
public void addWord(String word) {
addWord(root, word);
}
private void addWord(Node parent, String word) {
if (word.trim().length() == 0) {
return;
}
Node child = parent.getNode(word.charAt(0));
if (child == null) {
child = new Node(word.charAt(0));
parent.addChild(child);
}
if (word.length() == 1) {
child.setLeaf(true);
} else {
addWord(child, word.substring(1));
}
}
@Test
public void testWordTree() {
WordTreeTest tree = new WordTreeTest();
tree.addWord("world");
tree.addWord("work");
tree.addWord("wolf");
tree.addWord("life");
tree.addWord("love");
System.out.println(tree.getWordsForPrefix("wo"));
}
}

View file

@ -0,0 +1,59 @@
package org.xbib.datastructures.trie.ahocorasick;
import org.junit.jupiter.api.Test;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class IntervalTest {
@Test
public void test_construct() {
final Interval i = new Interval(1, 3);
assertEquals(1, i.getStart());
assertEquals(3, i.getEnd());
}
@Test
public void test_size() {
Interval interval = new Interval(0, 2);
assertEquals(3, interval.getEnd() - interval.getStart() + 1);
}
@Test
public void test_intervaloverlaps() {
assertTrue(new Interval(1, 3).overlapsWith(new Interval(2, 4)));
}
@Test
public void test_intervalDoesNotOverlap() {
assertFalse(new Interval(1, 13).overlapsWith(new Interval(27, 42)));
}
@Test
public void test_pointOverlaps() {
assertTrue(new Interval(1, 3).overlapsWith(2));
}
@Test
public void test_pointDoesNotOverlap() {
assertFalse(new Interval(1, 13).overlapsWith(42));
}
@Test
public void test_comparable() {
final Set<Interval> intervals = new TreeSet<>();
intervals.add(new Interval(4, 6));
intervals.add(new Interval(2, 7));
intervals.add(new Interval(3, 4));
final Iterator<Interval> it = intervals.iterator();
assertEquals(2, it.next().getStart());
assertEquals(3, it.next().getStart());
assertEquals(4, it.next().getStart());
}
}

View file

@ -0,0 +1,49 @@
package org.xbib.datastructures.trie.ahocorasick;
import org.junit.jupiter.api.Test;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class IntervalTreeTest {
@Test
public void findOverlaps() {
List<Interval> intervals = new ArrayList<>();
intervals.add(new Interval(0, 2));
intervals.add(new Interval(1, 3));
intervals.add(new Interval(2, 4));
intervals.add(new Interval(3, 5));
intervals.add(new Interval(4, 6));
intervals.add(new Interval(5, 7));
IntervalTree intervalTree = new IntervalTree(intervals);
List<Interval> overlaps = intervalTree.findOverlaps(new Interval(1, 3));
assertEquals(3, overlaps.size());
Iterator<Interval> overlapsIt = overlaps.iterator();
assertOverlap(overlapsIt.next(), 2, 4);
assertOverlap(overlapsIt.next(), 3, 5);
assertOverlap(overlapsIt.next(), 0, 2);
}
@Test
public void removeOverlaps() {
List<Interval> intervals = new ArrayList<>();
intervals.add(new Interval(0, 2));
intervals.add(new Interval(4, 5));
intervals.add(new Interval(2, 10));
intervals.add(new Interval(6, 13));
intervals.add(new Interval(9, 15));
intervals.add(new Interval(12, 16));
IntervalTree intervalTree = new IntervalTree(intervals);
intervals = intervalTree.removeOverlaps(intervals);
assertEquals(2, intervals.size());
}
protected void assertOverlap(Interval interval, int expectedStart, int expectedEnd) {
assertEquals(expectedStart, interval.getStart());
assertEquals(expectedEnd, interval.getEnd());
}
}

View file

@ -0,0 +1,567 @@
package org.xbib.datastructures.trie.ahocorasick;
import org.junit.jupiter.api.Test;
import java.security.SecureRandom;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class TrieTest {
private static final SecureRandom random = new SecureRandom();
private final static String[] ALPHABET = new String[] { "abc", "bcd", "cde" };
private final static String[] ALPHABET_PAYLOAD = new String[] { "alpha:abc", "alpha:bcd", "alpha:cde" };
private final static List<Entry<String>> ALPHABET_WITH_ENTRIES = Arrays.asList(
new Entry<>( ALPHABET[ 0 ], ALPHABET_PAYLOAD[ 0 ] ),
new Entry<>( ALPHABET[ 1 ], ALPHABET_PAYLOAD[ 1 ] ),
new Entry<>( ALPHABET[ 2 ], ALPHABET_PAYLOAD[ 2 ] ));
private final static String[] PRONOUNS = new String[] { "hers", "his", "she", "he" };
private final static int[] PRONOUNS_PAYLOAD_ID = new int[] { 9, 12, 4, 20 };
private final static List<Entry<Integer>> PRONOUNS_WITH_ENTRIES = Arrays.asList(
new Entry<>( PRONOUNS[ 0 ], PRONOUNS_PAYLOAD_ID[ 0 ] ),
new Entry<>( PRONOUNS[ 1 ], PRONOUNS_PAYLOAD_ID[ 1 ] ),
new Entry<>( PRONOUNS[ 2 ], PRONOUNS_PAYLOAD_ID[ 2 ] ),
new Entry<>( PRONOUNS[ 3 ], PRONOUNS_PAYLOAD_ID[ 3 ] )
);
private final static String[] FOOD = new String[] { "veal", "cauliflower", "broccoli", "tomatoes" };
private final static Food[] FOOD_PAYLOAD = new Food[] { new Food("veal"), new Food("cauliflower"), new Food("broccoli"),
new Food("tomatoes") };
private final static List<Entry<Food>> FOOD_WITH_ENTRIES = Arrays.asList(
new Entry<>( FOOD[ 0 ], FOOD_PAYLOAD[ 0 ] ),
new Entry<>( FOOD[ 1 ], FOOD_PAYLOAD[ 1 ] ),
new Entry<>( FOOD[ 2 ], FOOD_PAYLOAD[ 2 ] ),
new Entry<>( FOOD[ 3 ], FOOD_PAYLOAD[ 3 ] )
);
private final static String[] GREEK_LETTERS = new String[] { "Alpha", "Beta", "Gamma" };
private final static String[] GREEK_LETTERS_PAYLOAD = new String[] { "greek:Alpha", "greek:Beta", "greek:Gamma" };
private final static List<Entry<String>> GREEK_LETTERS_WITH_ENTRIES = Arrays.asList(
new Entry<>( GREEK_LETTERS[ 0 ], GREEK_LETTERS_PAYLOAD[ 0 ] ),
new Entry<>( GREEK_LETTERS[ 1 ], GREEK_LETTERS_PAYLOAD[ 1 ] ),
new Entry<>( GREEK_LETTERS[ 2 ], GREEK_LETTERS_PAYLOAD[ 2 ] ));
private final static String[] UNICODE = new String[] { "turning", "once", "again", "börkü" };
private final static String[] UNICODE_PAYLOAD = new String[] { "uni:turning", "uni:once", "uni:again", "uni:börkü" };
private final static List<Entry<String>> UNICODE_WITH_ENTRIES = Arrays.asList(
new Entry<>( UNICODE[ 0 ], UNICODE_PAYLOAD[ 0 ] ),
new Entry<>( UNICODE[ 1 ], UNICODE_PAYLOAD[ 1 ] ),
new Entry<>( UNICODE[ 2 ], UNICODE_PAYLOAD[ 2 ] ),
new Entry<>( UNICODE[ 3 ], UNICODE_PAYLOAD[ 3 ] ));
public static class Food {
private final String name;
public Food(String name) {
this.name = name;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((name == null) ? 0 : name.hashCode());
return result;
}
@Override
public boolean equals( Object obj ) {
if( this == obj ) {
return true;
}
if( obj == null ) {
return false;
}
if( getClass() != obj.getClass() ) {
return false;
}
Food other = (Food) obj;
if( name == null ) {
return other.name == null;
}
else {
return name.equals( other.name );
}
}
}
@Test
public void keyAndTextAreTheSame() {
Trie<String> trie = Trie.<String>builder()
.add(ALPHABET[0], ALPHABET_PAYLOAD[0])
.build();
Collection<EntryOutput<String>> outputs = trie.parse(ALPHABET[0]);
Iterator<EntryOutput<String>> iterator = outputs.iterator();
checkOutput(iterator.next(), 0, 2, ALPHABET[0], ALPHABET_PAYLOAD[0]);
}
@Test
public void keyAndTextAreTheSameFirstMatch() {
Trie<String> trie = Trie.<String>builder()
.add(ALPHABET[0], ALPHABET_PAYLOAD[0])
.build();
EntryOutput<String> firstMatch = trie.firstMatch(ALPHABET[0]);
checkOutput(firstMatch, 0, 2, ALPHABET[0], ALPHABET_PAYLOAD[0]);
}
@Test
public void textIsLongerThanKey() {
Trie<String> trie = Trie.<String>builder()
.add(ALPHABET[0], ALPHABET_PAYLOAD[0])
.build();
Collection<EntryOutput<String>> emits = trie.parse(" " + ALPHABET[0]);
Iterator<EntryOutput<String>> iterator = emits.iterator();
checkOutput(iterator.next(), 1, 3, ALPHABET[0], ALPHABET_PAYLOAD[0]);
}
@Test
public void textIsLongerThanKeyFirstMatch() {
Trie<String> trie = Trie.<String>builder()
.add(ALPHABET[0], ALPHABET_PAYLOAD[0])
.build();
EntryOutput<String> firstMatch = trie.firstMatch(" " + ALPHABET[0]);
checkOutput(firstMatch, 1, 3, ALPHABET[0], ALPHABET_PAYLOAD[0]);
}
@Test
public void variousKeysOneMatch() {
Trie<String> trie = Trie.<String>builder()
.add(ALPHABET_WITH_ENTRIES)
.build();
Collection<EntryOutput<String>> outputs = trie.parse("bcd");
Iterator<EntryOutput<String>> iterator = outputs.iterator();
checkOutput(iterator.next(), 0, 2, "bcd", "alpha:bcd");
}
@Test
public void variousKeysFirstMatch() {
Trie<String> trie = Trie.<String>builder().add(ALPHABET_WITH_ENTRIES).build();
EntryOutput<String> firstMatch = trie.firstMatch("bcd");
checkOutput(firstMatch, 0, 2, "bcd", "alpha:bcd");
}
@Test
public void ushersTestAndStopOnHit() {
Trie<Integer> trie = Trie.<Integer>builder().add(PRONOUNS_WITH_ENTRIES).stopOnHit().build();
Collection<EntryOutput<Integer>> emits = trie.parse("ushers");
assertEquals(1, emits.size()); // she @ 3, he @ 3, hers @ 5
Iterator<EntryOutput<Integer>> iterator = emits.iterator();
checkOutput(iterator.next(), 2, 3, "he", 20);
}
@Test
public void ushersTestStopOnHitSkipOne() {
Trie<Integer> trie = Trie.<Integer>builder().add(PRONOUNS_WITH_ENTRIES).stopOnHit().build();
CollectingOutputHandler<Integer> testEmitHandler = new AbstractCollectingOutputHandler<>() {
boolean first = true;
@Override
public boolean output(final EntryOutput<Integer> emit) {
if (first) {
// return false for the first element
first = false;
return false;
}
add(emit);
return true;
}
};
trie.parse("ushers", testEmitHandler);
Collection<EntryOutput<Integer>> emits = testEmitHandler.getOutputs();
assertEquals(1, emits.size()); // she @ 3, he @ 3, hers @ 5
Iterator<EntryOutput<Integer>> iterator = emits.iterator();
checkOutput(iterator.next(), 1, 3, "she", 4);
}
@Test
public void ushersTest() {
Trie<Integer> trie = Trie.<Integer>builder().add(PRONOUNS_WITH_ENTRIES).build();
Collection<EntryOutput<Integer>> emits = trie.parse("ushers");
assertEquals(3, emits.size()); // she @ 3, he @ 3, hers @ 5
Iterator<EntryOutput<Integer>> iterator = emits.iterator();
checkOutput(iterator.next(), 2, 3, "he", 20);
checkOutput(iterator.next(), 1, 3, "she", 4);
checkOutput(iterator.next(), 2, 5, "hers", 9);
}
@Test
public void ushersTestWithCapitalKeywords() {
Trie<String> trie = Trie.<String>builder().ignoreCase().add("HERS", "hers").add("HIS", "his")
.add("SHE", "she").add("HE", "he").build();
Collection<EntryOutput<String>> emits = trie.parse("ushers");
assertEquals(3, emits.size()); // she @ 3, he @ 3, hers @ 5
Iterator<EntryOutput<String>> iterator = emits.iterator();
checkOutput(iterator.next(), 2, 3, "HE", "he");
checkOutput(iterator.next(), 1, 3, "SHE", "she");
checkOutput(iterator.next(), 2, 5, "HERS", "hers");
}
@Test
public void ushersTestFirstMatch() {
Trie<Integer> trie = Trie.<Integer>builder().add(PRONOUNS_WITH_ENTRIES).build();
EntryOutput<Integer> firstMatch = trie.firstMatch("ushers");
checkOutput(firstMatch, 2, 3, "he", 20);
}
@Test
public void ushersTestByCallback() {
Trie<Integer> trie = Trie.<Integer>builder().add(PRONOUNS_WITH_ENTRIES).build();
final List<EntryOutput<Integer>> emits = new LinkedList<>();
OutputHandler<Integer> emitHandler = emit -> {
emits.add(emit);
return true;
};
trie.parse("ushers", emitHandler);
assertEquals(3, emits.size()); // she @ 3, he @ 3, hers @ 5
Iterator<EntryOutput<Integer>> iterator = emits.iterator();
checkOutput(iterator.next(), 2, 3, "he", 20);
checkOutput(iterator.next(), 1, 3, "she", 4);
checkOutput(iterator.next(), 2, 5, "hers", 9);
}
@Test
public void misleadingTest() {
Trie<String> trie = Trie.<String>builder().add("hers", "pronon:hers").build();
Collection<EntryOutput<String>> emits = trie.parse("h he her hers");
Iterator<EntryOutput<String>> iterator = emits.iterator();
checkOutput(iterator.next(), 9, 12, "hers", "pronon:hers");
}
@Test
public void misleadingTestFirstMatch() {
Trie<String> trie = Trie.<String>builder().add("hers", "pronon:hers").build();
EntryOutput<String> firstMatch = trie.firstMatch("h he her hers");
checkOutput(firstMatch, 9, 12, "hers", "pronon:hers");
}
@Test
public void recipes() {
Trie<Food> trie = Trie.<Food>builder().add(FOOD_WITH_ENTRIES).build();
Collection<EntryOutput<Food>> emits = trie.parse("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli");
Iterator<EntryOutput<Food>> iterator = emits.iterator();
checkOutput(iterator.next(), 2, 12, "cauliflower", new Food("cauliflower"));
checkOutput(iterator.next(), 18, 25, "tomatoes", new Food("tomatoes"));
checkOutput(iterator.next(), 40, 43, "veal", new Food("veal"));
checkOutput(iterator.next(), 51, 58, "broccoli", new Food("broccoli"));
}
@Test
public void recipesFirstMatch() {
Trie<Food> trie = Trie.<Food>builder().add(FOOD_WITH_ENTRIES).build();
EntryOutput<Food> firstMatch = trie.firstMatch("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli");
checkOutput(firstMatch, 2, 12, "cauliflower", new Food("cauliflower"));
}
@Test
public void longAndShortOverlappingMatch() {
Trie<String> trie = Trie.<String>builder().add("he", "pronon:he").add("hehehehe", "garbage")
.build();
Collection<EntryOutput<String>> emits = trie.parse("hehehehehe");
Iterator<EntryOutput<String>> iterator = emits.iterator();
checkOutput(iterator.next(), 0, 1, "he", "pronon:he");
checkOutput(iterator.next(), 2, 3, "he", "pronon:he");
checkOutput(iterator.next(), 4, 5, "he", "pronon:he");
checkOutput(iterator.next(), 6, 7, "he", "pronon:he");
checkOutput(iterator.next(), 0, 7, "hehehehe", "garbage");
checkOutput(iterator.next(), 8, 9, "he", "pronon:he");
checkOutput(iterator.next(), 2, 9, "hehehehe", "garbage");
}
@Test
public void nonOverlapping() {
Trie<String> trie = Trie.<String>builder().ignoreOverlaps().add("ab", "alpha:ab")
.add("cba", "alpha:cba").add("ababc", "alpha:ababc").build();
Collection<EntryOutput<String>> emits = trie.parse("ababcbab");
assertEquals(2, emits.size());
Iterator<EntryOutput<String>> iterator = emits.iterator();
// With overlaps: ab@1, ab@3, ababc@4, cba@6, ab@7
checkOutput(iterator.next(), 0, 4, "ababc", "alpha:ababc");
checkOutput(iterator.next(), 6, 7, "ab", "alpha:ab");
}
@Test
public void nonOverlappingFirstMatch() {
Trie<String> trie = Trie.<String>builder().ignoreOverlaps().add("ab", "alpha:ab")
.add("cba", "alpha:cba").add("ababc", "alpha:ababc").build();
EntryOutput<String> firstMatch = trie.firstMatch("ababcbab");
checkOutput(firstMatch, 0, 4, "ababc", "alpha:ababc");
}
@Test
public void containsMatch() {
Trie<String> trie = Trie.<String>builder().ignoreOverlaps().add("ab", "alpha:ab")
.add("cba", "alpha:cba").add("ababc", "alpha:ababc").build();
assertTrue(trie.match("ababcbab"));
}
@Test
public void startOfChurchillSpeech() {
Trie<String> trie = Trie.<String>builder().ignoreOverlaps().add("T").add("u").add("ur")
.add("r").add("urn").add("ni").add("i").add("in").add("n")
.add("urning").build();
Collection<EntryOutput<String>> emits = trie.parse("Turning");
assertEquals(2, emits.size());
}
@Test
public void partialMatch() {
Trie<String> trie = Trie.<String>builder().onlyWholeWords().add("sugar", "food:sugar").build();
Collection<EntryOutput<String>> emits = trie.parse("sugarcane sugarcane sugar canesugar"); // left, middle, right test
assertEquals(1, emits.size()); // Match must not be made
checkOutput(emits.iterator().next(), 20, 24, "sugar", "food:sugar");
}
@Test
public void partialMatchFirstMatch() {
Trie<String> trie = Trie.<String>builder().onlyWholeWords().add("sugar", "food:sugar").build();
EntryOutput<String> firstMatch = trie.firstMatch("sugarcane sugarcane sugar canesugar"); // left, middle, right test
checkOutput(firstMatch, 20, 24, "sugar", "food:sugar");
}
@Test
public void tokenizeFullSentence() {
Trie<String> trie = Trie.<String>builder().add(GREEK_LETTERS_WITH_ENTRIES).build();
Collection<Token<String>> tokens = trie.tokenize("Hear: Alpha team first, Beta from the rear, Gamma in reserve");
assertEquals(7, tokens.size());
Iterator<Token<String>> tokensIt = tokens.iterator();
assertEquals("Hear: ", tokensIt.next().getFragment());
assertEquals("Alpha", tokensIt.next().getFragment());
assertEquals(" team first, ", tokensIt.next().getFragment());
assertEquals("Beta", tokensIt.next().getFragment());
assertEquals(" from the rear, ", tokensIt.next().getFragment());
assertEquals("Gamma", tokensIt.next().getFragment());
assertEquals(" in reserve", tokensIt.next().getFragment());
}
// @see https://github.com/robert-bor/aho-corasick/issues/5
@Test
public void testStringIndexOutOfBoundsException() {
Trie<String> trie = Trie.<String>builder().ignoreCase().onlyWholeWords().add(UNICODE_WITH_ENTRIES)
.build();
Collection<EntryOutput<String>> emits = trie.parse("TurninG OnCe AgAiN BÖRKÜ");
assertEquals(4, emits.size()); // Match must not be made
Iterator<EntryOutput<String>> it = emits.iterator();
checkOutput(it.next(), 0, 6, "turning", "uni:turning");
checkOutput(it.next(), 8, 11, "once", "uni:once");
checkOutput(it.next(), 13, 17, "again", "uni:again");
checkOutput(it.next(), 19, 23, "börkü", "uni:börkü");
}
@Test
public void testIgnoreCase() {
Trie<String> trie = Trie.<String>builder().ignoreCase().add(UNICODE_WITH_ENTRIES).build();
Collection<EntryOutput<String>> emits = trie.parse("TurninG OnCe AgAiN BÖRKÜ");
assertEquals(4, emits.size()); // Match must not be made
Iterator<EntryOutput<String>> it = emits.iterator();
checkOutput(it.next(), 0, 6, "turning", "uni:turning");
checkOutput(it.next(), 8, 11, "once", "uni:once");
checkOutput(it.next(), 13, 17, "again", "uni:again");
checkOutput(it.next(), 19, 23, "börkü", "uni:börkü");
}
@Test
public void testIgnoreCaseFirstMatch() {
Trie<String> trie = Trie.<String>builder().ignoreCase().add(UNICODE_WITH_ENTRIES).build();
EntryOutput<String> firstMatch = trie.firstMatch("TurninG OnCe AgAiN BÖRKÜ");
checkOutput(firstMatch, 0, 6, "turning", "uni:turning");
}
@Test
public void tokenizeTokensInSequence() {
Trie<String> trie = Trie.<String>builder().add(GREEK_LETTERS_WITH_ENTRIES).build();
Collection<Token<String>> tokens = trie.tokenize("Alpha Beta Gamma");
assertEquals(5, tokens.size());
}
// @see https://github.com/robert-bor/aho-corasick/issues/7
@Test
public void testZeroLength() {
Trie<String> trie = Trie.<String>builder().ignoreOverlaps().onlyWholeWords().ignoreCase().add("")
.build();
trie.tokenize(
"Try a natural lip and subtle bronzer to keep all the focus on those big bright eyes with NARS Eyeshadow Duo in Rated R And the winner is... Boots No7 Advanced Renewal Anti-ageing Glycolic Peel Kit ($25 amazon.com) won most-appealing peel.");
}
// @see https://github.com/robert-bor/aho-corasick/issues/8
@Test
public void testUnicode1() {
String target = "LİKE THIS"; // The second character ('İ') is Unicode, which was read by AC as a 2-byte char
assertEquals("THIS", target.substring(5, 9)); // Java does it the right way
Trie<String> trie = Trie.<String>builder().ignoreCase().onlyWholeWords().add("this", "pronon:this")
.build();
Collection<EntryOutput<String>> emits = trie.parse(target);
assertEquals(1, emits.size());
Iterator<EntryOutput<String>> it = emits.iterator();
checkOutput(it.next(), 5, 8, "this", "pronon:this");
}
// @see https://github.com/robert-bor/aho-corasick/issues/8
@Test
public void testUnicode2() {
String target = "LİKE THIS"; // The second character ('İ') is Unicode, which was read by AC as a 2-byte char
Trie<String> trie = Trie.<String>builder().ignoreCase().onlyWholeWords().add("this", "pronon:this")
.build();
assertEquals("THIS", target.substring(5, 9)); // Java does it the right way
EntryOutput<String> firstMatch = trie.firstMatch(target);
checkOutput(firstMatch, 5, 8, "this", "pronon:this");
}
@Test
public void testPartialMatchWhiteSpaces() {
Trie<String> trie = Trie.<String>builder().onlyWholeWordsWhiteSpaceSeparated()
.add("#sugar-123", "sugar").build();
Collection<EntryOutput<String>> emits = trie.parse("#sugar-123 #sugar-1234"); // left, middle, right test
assertEquals(1, emits.size()); // Match must not be made
checkOutput(emits.iterator().next(), 0, 9, "#sugar-123", "sugar");
}
@Test
public void testLargeString() {
final int interval = 100;
final int textSize = 1000000;
final String keyword = FOOD[1];
final Food payload = FOOD_PAYLOAD[1];
final StringBuilder text = randomNumbers(textSize);
injectKeyword(text, keyword, interval);
Trie<Food> trie = Trie.<Food>builder().onlyWholeWords().add(keyword, payload).build();
final Collection<EntryOutput<Food>> emits = trie.parse(text);
assertEquals(textSize / interval, emits.size());
}
@Test
public void test_containsMatchWithCaseInsensitive() {
Trie<String> trie = Trie.<String>builder().ignoreCase().add("foo", "bar").build();
assertTrue(trie.match("FOOBAR"));
assertFalse(trie.match("FO!?AR"));
}
// @see https://github.com/robert-bor/aho-corasick/issues/85
@Test
public void test_wholeWords() {
Trie<String> trie = Trie.<String>builder().add("foo", "bar").onlyWholeWords().build();
// access via PayloadTrie.parseText(CharSequence)
Collection<EntryOutput<String>> result1 = trie.parse("foobar");
// access via PayloadTrie.parseText(CharSequence, PayloadEmitHandler<String>)
Collection<EntryOutput<String>> result2 = new LinkedList<>();
trie.parse("foobar", result2::add);
assertTrue(result1.isEmpty());
assertEquals(result1, result2);
}
// @see https://github.com/robert-bor/aho-corasick/issues/85
@Test
public void test_wholeWordsWhiteSpaceSeparated() {
Trie<String> trie = Trie.<String>builder().add("foo", "bar").onlyWholeWordsWhiteSpaceSeparated().build();
// access via PayloadTrie.parseText(CharSequence)
Collection<EntryOutput<String>> result1 = trie.parse("foo#bar");
// access via PayloadTrie.parseText(CharSequence, PayloadEmitHandler<String>)
Collection<EntryOutput<String>> result2 = new LinkedList<>();
trie.parse("foo#bar", result2::add);
assertTrue(result1.isEmpty());
assertEquals(result1, result2);
}
/**
* Generates a random sequence of ASCII numbers.
*
* @param count The number of numbers to generate.
* @return A character sequence filled with random digits.
*/
private StringBuilder randomNumbers(int count) {
final StringBuilder sb = new StringBuilder(count);
while (--count > 0) {
sb.append(randomInt(10));
}
return sb;
}
/**
* Injects keywords into a string builder.
*
* @param source Should contain a bunch of random data that cannot match any
* keyword.
* @param keyword A keyword to inject repeatedly in the text.
* @param interval How often to inject the keyword.
*/
private void injectKeyword(final StringBuilder source, final String keyword, final int interval) {
final int length = source.length();
for (int i = 0; i < length; i += interval) {
source.replace(i, i + keyword.length(), keyword);
}
}
private int randomInt(final int bound) {
return random.nextInt(bound);
}
private void checkOutput(EntryOutput<Food> next, int expectedStart, int expectedEnd, String expectedKeyword,
Food expectedPayload) {
assertEquals(expectedStart, next.getStart(), "Start of emit should have been " + expectedStart);
assertEquals(expectedEnd, next.getEnd(), "End of emit should have been " + expectedEnd);
assertEquals(expectedKeyword, next.getKey(), "Keyword of emit shoud be " + expectedKeyword);
assertEquals(expectedPayload, next.getValue(), "Payload of emit shoud be " + expectedPayload);
}
private void checkOutput(EntryOutput<Integer> next, int expectedStart, int expectedEnd, String expectedKeyword,
Integer expectedPayload) {
assertEquals(expectedStart, next.getStart(), "Start of emit should have been " + expectedStart);
assertEquals(expectedEnd, next.getEnd(), "End of emit should have been " + expectedEnd);
assertEquals(expectedKeyword, next.getKey(), "Keyword of emit shoud be " + expectedKeyword);
assertEquals(expectedPayload, next.getValue(), "Payload of emit shoud be " + expectedPayload);
}
private void checkOutput(EntryOutput<String> next, int expectedStart, int expectedEnd, String expectedKeyword,
String expectedPayload) {
assertEquals(expectedStart, next.getStart(), "Start of emit should have been " + expectedStart);
assertEquals(expectedEnd, next.getEnd(), "End of emit should have been " + expectedEnd);
assertEquals(expectedKeyword, next.getKey(), "Keyword of emit shoud be " + expectedKeyword);
assertEquals(expectedPayload, next.getValue(), "Payload of emit shoud be " + expectedPayload);
}
static abstract class AbstractCollectingOutputHandler<T> implements CollectingOutputHandler<T> {
private final List<EntryOutput<T>> outputs = new ArrayList<>();
public void add(final EntryOutput<T> emit) {
outputs.add(emit);
}
@Override
public List<EntryOutput<T>> getOutputs() {
return this.outputs;
}
}
}

View file

@ -61,10 +61,8 @@ public class StringKeyAnalyzer extends AbstractKeyAnalyzer<String> {
if (bitIndex >= lengthInBits(key)) {
return false;
}
int index = bitIndex / size;
int bit = bitIndex % size;
return (key.charAt(index) & mask(bit)) != 0;
}
@ -75,15 +73,11 @@ public class StringKeyAnalyzer extends AbstractKeyAnalyzer<String> {
@Override
public int bitIndex(String key, String otherKey) {
boolean allNull = true;
int length = Math.max(key.length(), otherKey.length());
for (int i = 0; i < length; i++) {
char ch1 = valueAt(key, i);
char ch2 = valueAt(otherKey, i);
if (ch1 != ch2) {
int xor = ch1 ^ ch2;
for (int j = 0; j < size; j++) {
@ -92,17 +86,14 @@ public class StringKeyAnalyzer extends AbstractKeyAnalyzer<String> {
}
}
}
if (ch1 != 0) {
allNull = false;
}
}
// All bits are 0
if (allNull) {
return KeyAnalyzer.NULL_BIT_KEY;
}
// Both keys are equal
return KeyAnalyzer.EQUAL_BIT_KEY;
}

View file

@ -0,0 +1,401 @@
package org.xbib.datastructures.trie.radix.adaptive;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import static org.junit.jupiter.api.Assertions.*;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.Mockito;
public abstract class InnerNodeUnitTest {
protected static class Pair implements Comparable<Pair> {
final byte partialKey;
final Node child;
Pair(byte partialKey, Node child) {
this.partialKey = partialKey;
this.child = child;
}
@Override
public int compareTo(Pair o) {
return compare(partialKey, o.partialKey);
}
}
public static int compare(byte a, byte b) {
return toInt(a) - toInt(b);
}
public static int toInt(byte value) {
return value & 0xFF;
}
protected InnerNode node;
protected Pair[] existingData;
InnerNodeUnitTest(int nodeSize) {
InnerNode node = new Node4();
existingData = new Pair[nodeSize + 1];
for (int j = 0, i = -nodeSize / 2; j < nodeSize + 1; i++, j++) {
if (node.isFull()) {
node = node.grow();
}
Pair p = new Pair((byte) i, Mockito.spy(Node.class));
existingData[j] = p;
node.addChild(p.partialKey, p.child);
}
this.node = node;
}
@BeforeEach
public void setup() {
int i = 0;
for (; i < existingData.length; i++) {
if (existingData[i].partialKey < 0) {
break;
}
}
assertTrue(i < existingData.length, "sample key set should contain at least"
+ " one negative integer to test for unsigned lexicographic ordering");
}
// lexicographic sorted order: 0, 1, -2, -1
// -2, -1, 0, 1
byte[] existingKeys() {
byte[] keys = new byte[existingData.length];
for (int i = 0; i < keys.length; i++) {
keys[i] = existingData[i].partialKey;
}
return keys;
}
void verifyUnsignedLexicographicOrder() {
verifyUnsignedLexicographicOrder(node);
}
/*
work only with interface methods
we don't care about the implementation details
for example how Node4 stores bytes as unsigned, etc.
we just care about the right lexicographic ordering.
of course this requires us to test with negative as well as
positive data set and hence the check in test setup.
we don't test child mappings, since that is tested in findChild already (if the same mappings
are maintained).
this really is making sure negative bytes come after positives.
we don't really want to test that children storage is sorted,
all we want is if the lexicographic order dependant methods (first, last, greater, lesser)
are answered correctly.
they might be answered correctly even without storing the children in sorted order,
but we don't care as a generic test suite.
we base our assertions on invariants.
*/
void verifyUnsignedLexicographicOrder(InnerNode node) {
boolean negExist = false;
byte prev = node.first().uplinkKey();
if (prev < 0) {
negExist = true;
}
for (int i = 1; i < node.size(); i++) {
byte next = node.greater(prev).uplinkKey();
assertTrue(compare(prev, next) < 0);
prev = next;
if (prev < 0) {
negExist = true;
}
}
assertTrue(negExist, "expected at least one negative byte to test lexicographic ordering");
prev = node.last().uplinkKey();
for (int i = node.size() - 2; i >= 0; i--) {
byte next = node.lesser(prev).uplinkKey();
assertTrue(compare(prev, next) > 0);
prev = next;
}
}
/*
add partial keys
all key, child mappings should exist
size increase
uplinks setup
expect keys to be in the right unsigned lexicographic order
*/
@Test
public void testAddAndFindChild() {
List<Pair> pairs = new ArrayList<>(Arrays.asList(existingData));
for (byte i = 0; !node.isFull(); i++) {
if (node.findChild(i) != null) {
continue;
}
Pair p = new Pair(i, Mockito.spy(Node.class));
pairs.add(p);
node.addChild(p.partialKey, p.child);
}
// size
assertEquals(node.size(), pairs.size());
for (Pair p : pairs) {
// uplinks setup
assertEquals(node, p.child.parent());
assertEquals(p.partialKey, p.child.uplinkKey());
// all added partial keys exist
assertEquals(p.child, node.findChild(p.partialKey));
}
verifyUnsignedLexicographicOrder();
}
/*
sort sample data and expect the smallest lexicographic byte
*/
@Test
public void testFirst() {
byte[] data = existingKeys();
sort(data);
assertEquals(node.first().uplinkKey(), data[0]);
}
/*
sort sample data and expect the largest lexicographic byte
*/
@Test
public void testLast() {
byte[] data = existingKeys();
sortDescending(data);
assertEquals(node.last().uplinkKey(), data[0]);
}
/*
nothing greater than greatest
first is greater than smallest lexicographic unsigned i.e. 0 (0000 0000)
*/
@Test
public void testGreater() {
Node last = node.last();
assertNull(node.greater(last.uplinkKey()));
Arrays.sort(existingData);
for (int i = 0; i < node.size() - 1; i++) {
Node greater = node.greater(existingData[i].partialKey);
assertEquals(existingData[i + 1].child, greater);
}
}
/*
nothing lesser than least
last is lesser than largest lexicographic unsigned i.e. -1 (1111 1111)
*/
@Test
public void testLesser() {
Node first = node.first();
assertNull(node.lesser(first.uplinkKey()));
Arrays.sort(existingData);
for (int i = 1; i < node.size(); i++) {
Node lesser = node.lesser(existingData[i].partialKey);
assertEquals(existingData[i - 1].child, lesser);
}
}
/*
remove child
unsigned lexicopgrahic order maintained
removes uplink
reduces size
child no longer exists (findChild)
*/
@Test
public void testRemove() {
// since we remove two in the test
// we must not break constraint of a node that it must have
// a number of minimum elements (check node size assert in first, last assert)
byte minByte = Byte.MAX_VALUE, maxByte = Byte.MIN_VALUE;
for(int i = 0; i < existingKeys().length; i++){
if(existingData[i].partialKey > maxByte){
maxByte = existingData[i].partialKey;
}
if(existingData[i].partialKey < minByte){
minByte = existingData[i].partialKey;
}
}
Pair p = new Pair((byte)(minByte-1), Mockito.spy(Node.class));
node.addChild(p.partialKey, p.child);
p = new Pair((byte)(maxByte+1), Mockito.spy(Node.class));
if(!node.isFull()){ // need for Node4 since we add 3 elements in test setup already
node.addChild(p.partialKey, p.child);
}
int initialSize = node.size();
// remove at head
Node head = node.first();
node.removeChild(head.uplinkKey());
assertNull(node.findChild(head.uplinkKey()));
assertEquals(initialSize - 1, node.size());
assertNull(head.parent());
// remove at tail
Node tail = node.last();
node.removeChild(tail.uplinkKey());
assertNull(node.findChild(tail.uplinkKey()));
assertEquals(initialSize - 2, node.size());
assertNull(tail.parent());
verifyUnsignedLexicographicOrder();
}
/*
after growing, new node:
contains same key, child mappings in same lexicographic order but with uplinks to new grown node
same prefix key, no of children, uplink key, parent
*/
@Test
public void testGrow() {
List<Pair> pairs = new ArrayList<>(Arrays.asList(existingData));
byte i;
Pair pair;
// fill node to capacity
for (i = 0; ; i++) {
if (node.findChild(i) != null) {
continue; // find at least one non existent child to force add
}
pair = new Pair(i, Mockito.spy(Node.class));
if (node.isFull()) {
break;
}
pairs.add(pair);
node.addChild(pair.partialKey, pair.child);
}
// capacity reached
assertTrue(node.isFull());
// hence we need to grow
InnerNode grown = node.grow();
assertEquals(node.size(), grown.size());
assertEqualHeader(node, grown);
// add child on newly grown node
grown.addChild(pair.partialKey, pair.child);
pairs.add(pair);
// verify same key, child mappings exist
for (i = 0; i < pairs.size(); i++) {
Pair p = pairs.get(i);
// uplinks setup
assertEquals(grown, p.child.parent());
assertEquals(p.partialKey, p.child.uplinkKey());
// all added partial keys exist
assertEquals(p.child, grown.findChild(p.partialKey));
}
verifyUnsignedLexicographicOrder(grown);
}
/*
after shrinking contains same key, child mappings
lexicographic order maintained
same parent as before, prefix len, prefix keys
*/
@Test
public void testShrink() {
List<Pair> pairs = new ArrayList<>(Arrays.asList(existingData));
while (!node.shouldShrink()) {
node.removeChild(pairs.remove(0).partialKey);
}
assertTrue(node.shouldShrink());
InnerNode shrunk = node.shrink();
assertEquals(shrunk.size(), node.size());
assertEqualHeader(node, shrunk);
// verify same key, child mappings exist
for (Pair p : pairs) {
// uplinks setup
assertEquals(shrunk, p.child.parent());
assertEquals(p.partialKey, p.child.uplinkKey());
// all added partial keys exist
assertEquals(p.child, shrunk.findChild(p.partialKey));
}
verifyUnsignedLexicographicOrder(shrunk);
}
void assertEqualHeader(Node a, Node b) {
InnerNode aa = (InnerNode) a;
InnerNode bb = (InnerNode) b;
assertEquals(aa.prefixLen, bb.prefixLen);
assertArrayEquals(getValidPrefixKey(aa), getValidPrefixKey(bb));
assertEquals(aa.parent(), bb.parent());
assertEquals(aa.uplinkKey(), bb.uplinkKey());
}
static byte[] getValidPrefixKey(InnerNode innerNode) {
int limit = Math.min(InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT, innerNode.prefixLen);
byte[] valid = new byte[limit];
System.arraycopy(innerNode.prefixKeys, 0, valid, 0, limit);
return valid;
}
/*
replace the child associated with a key
assert new child found
same size
lexicographic order maintained
uplink setup for new child
old child uplink stays:
why? because in lazy leaf expansion case, we first link current leaf node with a
new Node4() and later replace current down pointer to this leaf node with this new
Node4() parent. If we remove old child's uplink, it could be the case that the old child
has been linked with a new parent.
Well we could make sure that explicitly in the branch, but it is fine
to not do in replace as well.
*/
@Test
public void testReplace() {
Node first = node.first();
Node newChild = Mockito.spy(Node.class);
node.replace(first.uplinkKey(), newChild);
assertEquals(newChild, node.findChild(first.uplinkKey()));
assertEquals(existingData.length, node.size());
assertEquals(newChild.uplinkKey(), first.uplinkKey());
assertEquals(node, newChild.parent());
assertEquals(first.uplinkKey(), first.uplinkKey());
assertEquals(node, first.parent());
}
public static void sort(byte[] array) {
sort(array, 0, array.length);
}
public static void sort(byte[] array, int fromIndex, int toIndex) {
for (int i = fromIndex; i < toIndex; i++) {
array[i] = flip(array[i]);
}
Arrays.sort(array, fromIndex, toIndex);
for (int i = fromIndex; i < toIndex; i++) {
array[i] = flip(array[i]);
}
}
private static byte flip(byte b) {
return (byte) (b ^ 0x80);
}
public static void sortDescending(byte[] array) {
sortDescending(array, 0, array.length);
}
public static void sortDescending(byte[] array, int fromIndex, int toIndex) {
for (int i = fromIndex; i < toIndex; i++) {
array[i] ^= Byte.MAX_VALUE;
}
Arrays.sort(array, fromIndex, toIndex);
for (int i = fromIndex; i < toIndex; i++) {
array[i] ^= Byte.MAX_VALUE;
}
}
}

View file

@ -0,0 +1,41 @@
package org.xbib.datastructures.trie.radix.adaptive;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
public class Node4UnitTest extends InnerNodeUnitTest {
Node4UnitTest() {
super(2);
}
@Test
public void testGetOnlyChild() {
// remove until only one child
while (node.size() != 1) {
node.removeChild(node.first().uplinkKey());
}
byte[] keys = existingKeys();
sortDescending(keys);
assertEquals(keys[0], ((Node4) node).getOnlyChildKey());
}
@Override
@Test
public void testShrink() {
Assertions.assertThrows(UnsupportedOperationException.class, () -> node.shrink());
}
@Test
public void testShouldShrinkAlwaysFalse() {
// remove all
while (node.size() != 0) {
node.removeChild(node.first().uplinkKey());
}
assertFalse(node.shouldShrink());
}
}

View file

@ -0,0 +1,61 @@
package org.xbib.datastructures.trie.segment;
import org.junit.jupiter.api.Test;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
public class TrieTest {
@Test
public void testEmptyTrie() {
Trie<String, TrieKey<String>, String> trie = new TrieImpl<>();
TrieKey<String> trieKey = new TrieKeyImpl<>();
String result = trie.search(trieKey);
assertNull(result);
}
@Test
public void testEmptyKey() {
Trie<String, TrieKey<String>, Integer> trie = new TrieImpl<>();
TrieKey<String> trieKey = new TrieKeyImpl<>();
trie.add(trieKey, 100);
Integer result = trie.search(trieKey);
assertEquals(result, (Integer) 100);
trie.add(trieKey, 200);
result = trie.search(trieKey);
assertEquals(result, (Integer) 200);
}
@Test
public void testSingletonTrie() {
Trie<String, TrieKey<String>, String> trie = new TrieImpl<>();
TrieKey<String> trieKey = TrieKeyImpl.stringKey("key");
trie.add(trieKey, "value");
String result = trie.search(trieKey);
assertNotEquals(result, "key");
}
@Test
public void testLargeInsertionAndSearch() {
Trie<String, TrieKey<String>, Long> trie = new TrieImpl<>();
List<TrieKey<String>> keys = new ArrayList<>();
for (int i = 0; i < 10000; i++) {
Random random = new Random();
Long value = random.nextLong();
String key = value.toString();
TrieKey<String> trieKey = TrieKeyImpl.stringKey(key);
trie.add(trieKey, value);
keys.add(trieKey);
}
for (TrieKey<String> key : keys) {
Long value = trie.search(key);
assertEquals(key.toString(), value.toString());
}
}
}

View file

@ -1,4 +1,4 @@
package org.xbib.datastructures.trie;
package org.xbib.datastructures.trie.simple;
import org.junit.jupiter.api.Test;

View file

@ -4898,7 +4898,7 @@ QUEEN GERTRUDE There is a willow grows aslant a brook,
Which time she chanted snatches of old tunes;
As one incapable of her own distress,
Or like a creature native and indued
Unto that element: but long it could not be
Unto that trieKeySegment: but long it could not be
Till that her garments, heavy with their drink,
Pull'd the poor wretch from her melodious lay
To muddy death.

View file

View file

@ -1,5 +1,5 @@
dependencies {
api libs.jsr305.v2
api libs.jsr305
testImplementation libs.assertj
testImplementation libs.compile.testing
}

View file

@ -25,7 +25,7 @@
# presentation when isolated.
# • The RGI set is covered by the listed fully-qualified emoji.
# • The listed minimally-qualified and unqualified cover all cases where an
# element of the RGI set is missing one or more emoji presentation selectors.
# trieKeySegment of the RGI set is missing one or more emoji presentation selectors.
# • The file is in CLDR order, not codepoint order. This is recommended (but not required!) for keyboard palettes.
# • The groups and subgroups are illustrative. See the Emoji Order chart for more information.

View file

@ -39,7 +39,7 @@ artifacts {
tasks.withType(JavaCompile) {
// commented out mostly because of jmh generated code
// options.compilerArgs << '-Xlint:all'
options.compilerArgs << '-Xlint:all'
}
javadoc {

View file

@ -12,8 +12,7 @@ dependencyResolutionManagement {
library('junit4', 'junit', 'junit').version('4.13.2')
library('chronicle-core', 'net.openhft', 'chronicle-core').version('2.21ea14')
library('affinity', 'net.openhft', 'affinity').version('3.21ea0')
library('jsr305', 'org.xbib', 'jsr-305').version('1.0.0')
library('jsr305-v2', 'org.xbib', 'jsr-305').version('2.0.0')
library('jsr305', 'org.xbib', 'jsr-305').version('2.0.0')
library('javassist', 'org.javassist', 'javassist').version('3.28.0-GA')
library('jackson', 'com.fasterxml.jackson.core', 'jackson-databind').versionRef('jackson')
library('gson', 'com.google.code.gson', 'gson').version('2.8.9')