diff --git a/datastructures-trie/build.gradle b/datastructures-trie/build.gradle new file mode 100644 index 0000000..97b63e6 --- /dev/null +++ b/datastructures-trie/build.gradle @@ -0,0 +1,5 @@ +dependencies { + testImplementation("org.mockito:mockito-core:${project.property('mockito.version')}") { + exclude group: 'org.hamcrest' + } +} \ No newline at end of file diff --git a/datastructures-trie/src/main/java/module-info.java b/datastructures-trie/src/main/java/module-info.java new file mode 100644 index 0000000..4402525 --- /dev/null +++ b/datastructures-trie/src/main/java/module-info.java @@ -0,0 +1,12 @@ +module org.xbib.datastructures.trie { + exports org.xbib.datastructures.trie.ahocorasick; + exports org.xbib.datastructures.trie.compact; + exports org.xbib.datastructures.trie.limewire; + exports org.xbib.datastructures.trie.patricia; + exports org.xbib.datastructures.trie.radix; + exports org.xbib.datastructures.trie.radix.adaptive; + exports org.xbib.datastructures.trie.radix.adaptive.persistent; + exports org.xbib.datastructures.trie.radix.pruning; + exports org.xbib.datastructures.trie.regex; + exports org.xbib.datastructures.trie.simple; +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/AbstractToken.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/AbstractToken.java new file mode 100644 index 0000000..0a82faa --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/AbstractToken.java @@ -0,0 +1,22 @@ +package org.xbib.datastructures.trie.ahocorasick; + +/*** + * This class holds a text ("the fragment") and emits some output. If + * {@link #isMatch()} returns {@code true}, the token matched a search. + * + * @param The Type of the emitted payloads. + */ +public abstract class AbstractToken implements Token { + + private final String fragment; + + public AbstractToken(String fragment) { + this.fragment = fragment; + } + + @Override + public String getFragment() { + return this.fragment; + } + +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/CollectingOutputHandler.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/CollectingOutputHandler.java new file mode 100644 index 0000000..db66c87 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/CollectingOutputHandler.java @@ -0,0 +1,7 @@ +package org.xbib.datastructures.trie.ahocorasick; + +import java.util.List; + +public interface CollectingOutputHandler extends OutputHandler { + List> getOutputs(); +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/DefaultOutputHandler.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/DefaultOutputHandler.java new file mode 100644 index 0000000..ab4e360 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/DefaultOutputHandler.java @@ -0,0 +1,20 @@ +package org.xbib.datastructures.trie.ahocorasick; + +import java.util.ArrayList; +import java.util.List; + +public class DefaultOutputHandler implements CollectingOutputHandler { + + private final List> outputs = new ArrayList<>(); + + @Override + public boolean output(EntryOutput emit) { + outputs.add(emit); + return true; + } + + @Override + public List> getOutputs() { + return outputs; + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/Direction.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/Direction.java new file mode 100644 index 0000000..5ecc19e --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/Direction.java @@ -0,0 +1,6 @@ +package org.xbib.datastructures.trie.ahocorasick; + +public enum Direction { + LEFT, + RIGHT +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/Entry.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/Entry.java new file mode 100644 index 0000000..34559fc --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/Entry.java @@ -0,0 +1,32 @@ +package org.xbib.datastructures.trie.ahocorasick; + +/** + * An entry, a key with a value + * + * @param The type of the value. + */ +public class Entry implements Comparable> { + + private final String key; + + private final T value; + + public Entry(String key, T value) { + super(); + this.key = key; + this.value = value; + } + + public String getKey() { + return key; + } + + public T getValue() { + return value; + } + + @Override + public int compareTo(Entry other) { + return key.compareTo(other.getKey()); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/EntryOutput.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/EntryOutput.java new file mode 100644 index 0000000..ff617ec --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/EntryOutput.java @@ -0,0 +1,32 @@ +package org.xbib.datastructures.trie.ahocorasick; + +/** + * This class is a match, for output. + * + * @param Type of the value + */ +public class EntryOutput extends Interval { + + private final String key; + + private final T value; + + public EntryOutput(int start, int end, String key, T value) { + super(start, end); + this.key = key; + this.value = value; + } + + public String getKey() { + return key; + } + + public T getValue() { + return value; + } + + @Override + public String toString() { + return super.toString() + "=" + key + (value != null ? "->" + value : ""); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/FragmentToken.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/FragmentToken.java new file mode 100644 index 0000000..9d3fdfe --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/FragmentToken.java @@ -0,0 +1,25 @@ +package org.xbib.datastructures.trie.ahocorasick; + +/*** + * Class for a token ("the fragment") that can emit an entry. + * This token indicates a matching search term was not found, so + * {@link #isMatch()} always returns {@code false}. + * + * @param The Type of the emitted payloads. + */ +public class FragmentToken extends AbstractToken { + + public FragmentToken(String fragment) { + super(fragment); + } + + @Override + public boolean isMatch() { + return false; + } + + @Override + public EntryOutput getOutput() { + return null; + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/Interval.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/Interval.java new file mode 100644 index 0000000..160970d --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/Interval.java @@ -0,0 +1,57 @@ +package org.xbib.datastructures.trie.ahocorasick; + +import java.util.Objects; + +/** + * Responsible for tracking the start and end bounds. + */ +public class Interval implements Comparable { + + private final int start; + + private final int end; + + public Interval(int start, int end) { + this.start = start; + this.end = end; + } + + public int getStart() { + return start; + } + + public int getEnd() { + return end; + } + + public boolean overlapsWith(final Interval other) { + return start <= other.getEnd() && end >= other.getStart(); + } + + public boolean overlapsWith(int point) { + return start <= point && point <= end; + } + + @Override + public int compareTo(Interval other) { + int comparison = start - other.getStart(); + return comparison != 0 ? comparison : end - other.getEnd(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + Interval interval = (Interval) o; + return start == interval.start && end == interval.end; + } + + @Override + public int hashCode() { + return Objects.hash(start, end); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/IntervalNode.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/IntervalNode.java new file mode 100644 index 0000000..9114bdc --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/IntervalNode.java @@ -0,0 +1,110 @@ +package org.xbib.datastructures.trie.ahocorasick; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public class IntervalNode { + + private IntervalNode left; + + private IntervalNode right; + + private final int point; + + private final List intervals; + + public IntervalNode(List intervals) { + this.intervals = new ArrayList<>(); + this.point = determineMedian(intervals); + List toLeft = new ArrayList<>(); + List toRight = new ArrayList<>(); + for (Interval interval : intervals) { + if (interval.getEnd() < point) { + toLeft.add(interval); + } else if (interval.getStart() > point) { + toRight.add(interval); + } else { + this.intervals.add(interval); + } + } + if (toLeft.size() > 0) { + left = new IntervalNode(toLeft); + } + if (toRight.size() > 0) { + right = new IntervalNode(toRight); + } + } + + public int determineMedian(List intervals) { + int start = -1; + int end = -1; + for (Interval interval : intervals) { + int currentStart = interval.getStart(); + int currentEnd = interval.getEnd(); + if (start == -1 || currentStart < start) { + start = currentStart; + } + if (end == -1 || currentEnd > end) { + end = currentEnd; + } + } + return (start + end) / 2; + } + + public List findOverlaps(Interval interval) { + List overlaps = new ArrayList<>(); + if (point < interval.getStart()) { + addToOverlaps(interval, overlaps, findOverlappingRanges(right, interval)); + addToOverlaps(interval, overlaps, checkForOverlapsToTheRight(interval)); + } else if (point > interval.getEnd()) { + addToOverlaps(interval, overlaps, findOverlappingRanges(left, interval)); + addToOverlaps(interval, overlaps, checkForOverlapsToTheLeft(interval)); + } else { + addToOverlaps(interval, overlaps, intervals); + addToOverlaps(interval, overlaps, findOverlappingRanges(left, interval)); + addToOverlaps(interval, overlaps, findOverlappingRanges(right, interval)); + } + return overlaps; + } + + protected void addToOverlaps(Interval interval, List overlaps, List newOverlaps) { + for (Interval currentInterval : newOverlaps) { + if (!currentInterval.equals(interval)) { + overlaps.add(currentInterval); + } + } + } + + protected List checkForOverlapsToTheLeft(Interval interval) { + return checkForOverlaps(interval, Direction.LEFT); + } + + protected List checkForOverlapsToTheRight(Interval interval) { + return checkForOverlaps(interval, Direction.RIGHT); + } + + protected List checkForOverlaps(Interval interval, Direction direction) { + List overlaps = new ArrayList<>(); + for (Interval currentInterval : intervals) { + switch (direction) { + case LEFT: + if (currentInterval.getStart() <= interval.getEnd()) { + overlaps.add(currentInterval); + } + break; + case RIGHT: + if (currentInterval.getEnd() >= interval.getStart()) { + overlaps.add(currentInterval); + } + break; + } + } + return overlaps; + } + + protected List findOverlappingRanges(IntervalNode node, Interval interval) { + return node == null ? Collections.emptyList() : node.findOverlaps(interval); + } + +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/IntervalTree.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/IntervalTree.java new file mode 100644 index 0000000..15e4b37 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/IntervalTree.java @@ -0,0 +1,42 @@ +package org.xbib.datastructures.trie.ahocorasick; + +import java.util.Comparator; +import java.util.List; +import java.util.Set; +import java.util.TreeSet; + +public class IntervalTree { + + private final IntervalNode rootNode; + + public IntervalTree(List intervals) { + this.rootNode = new IntervalNode(intervals); + } + + public List removeOverlaps(List intervals) { + intervals.sort((i1, i2) -> { + int i = (i2.getEnd() - i2.getStart() + 1) - (i1.getEnd() - i1.getStart() + 1); + if (i == 0) { + i = i1.getStart() - i2.getStart(); + } + return i; + }); + Set removeIntervals = new TreeSet<>(); + for (final Interval interval : intervals) { + if (removeIntervals.contains(interval)) { + continue; + } + removeIntervals.addAll(findOverlaps(interval)); + } + for (final Interval removeInterval : removeIntervals) { + intervals.remove(removeInterval); + } + intervals.sort(Comparator.comparingInt(Interval::getStart)); + return intervals; + } + + public List findOverlaps(Interval interval) { + return rootNode.findOverlaps(interval); + } + +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/MatchToken.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/MatchToken.java new file mode 100644 index 0000000..e895a0f --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/MatchToken.java @@ -0,0 +1,28 @@ +package org.xbib.datastructures.trie.ahocorasick; + +/** + * Class for a token ("the fragment") that can emit an entry. + * This token indicates a match, so {@link #isMatch()} + * always returns {@code true}. + * + * @param The type of the emitted entry value. + */ +public class MatchToken extends AbstractToken { + + private final EntryOutput output; + + public MatchToken(String fragment, EntryOutput output) { + super(fragment); + this.output = output; + } + + @Override + public boolean isMatch() { + return true; + } + + @Override + public EntryOutput getOutput() { + return output; + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/OutputHandler.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/OutputHandler.java new file mode 100644 index 0000000..593bee7 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/OutputHandler.java @@ -0,0 +1,7 @@ +package org.xbib.datastructures.trie.ahocorasick; + +@FunctionalInterface +public interface OutputHandler { + + boolean output(EntryOutput entryOutput); +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/State.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/State.java new file mode 100644 index 0000000..57f80c6 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/State.java @@ -0,0 +1,108 @@ +package org.xbib.datastructures.trie.ahocorasick; + +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; + +/** + * A state has various important tasks it must attend to: + *
    + *
  • success; when a character points to another state, it must return that + * state
  • + *
  • failure; when a character has no matching state, the algorithm must be + * able to fall back on a state with less depth
  • + *
  • emits; when this state is passed and keys have been matched, the + * matches and their values must be output so they can be used later + * on.
  • + *
+ * The root state is special in the sense that it has no failure state; it + * cannot fail. If it 'fails' it will still parse the next character and start + * from the root node. This ensures that the algorithm always runs. All other + * states always have a fail state. + */ +public class State { + + private final int depth; + + private final State rootState; + + private final Map> success; + + private final Set> entries; + + private State failure; + + public State() { + this(0); + } + + public State(final int depth) { + this.depth = depth; + rootState = depth == 0 ? this : null; + success = new HashMap<>(); + entries = new TreeSet<>(); + } + + private State nextState(final Character character, final boolean ignoreRootState) { + State nextState = this.success.get(character); + + if (!ignoreRootState && nextState == null && this.rootState != null) { + nextState = this.rootState; + } + + return nextState; + } + + public State nextState(final Character character) { + return nextState(character, false); + } + + public State nextStateIgnoreRootState(Character character) { + return nextState(character, true); + } + + public State addState(Character character) { + State nextState = nextStateIgnoreRootState(character); + if (nextState == null) { + nextState = new State<>(this.depth + 1); + this.success.put(character, nextState); + } + return nextState; + } + + public int getDepth() { + return this.depth; + } + + public void add(Entry entry) { + entries.add(entry); + } + + public void add(Collection> emits) { + for (Entry emit : emits) { + add(emit); + } + } + + public Collection> entries() { + return entries; + } + + public State failure() { + return this.failure; + } + + public void setFailure(State failState) { + this.failure = failState; + } + + public Collection> getStates() { + return this.success.values(); + } + + public Collection getTransitions() { + return this.success.keySet(); + } +} \ No newline at end of file diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/Token.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/Token.java new file mode 100644 index 0000000..7d4a587 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/Token.java @@ -0,0 +1,10 @@ +package org.xbib.datastructures.trie.ahocorasick; + +public interface Token { + + String getFragment(); + + boolean isMatch(); + + EntryOutput getOutput(); +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/Trie.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/Trie.java new file mode 100644 index 0000000..3bb68bb --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/Trie.java @@ -0,0 +1,257 @@ +package org.xbib.datastructures.trie.ahocorasick; + +import java.util.Collection; +import java.util.LinkedList; +import java.util.List; +import java.util.Queue; + +/** + * A trie implementation. + * + * @param The type of the supplied of the payload. + */ +public class Trie { + + private final TrieConfig trieConfig; + + private final State rootState; + + protected Trie(TrieConfig trieConfig) { + this.trieConfig = trieConfig; + this.rootState = new State<>(); + } + + public static Builder builder() { + return new Builder<>(); + } + + public Collection> tokenize(String text) { + Collection> tokens = new LinkedList<>(); + Collection> outputs = parse(text); + int lastCollectedPosition = -1; + for (EntryOutput output : outputs) { + if (output.getStart() - lastCollectedPosition > 1) { + tokens.add(createFragment(output, text, lastCollectedPosition)); + } + tokens.add(createMatch(output, text)); + lastCollectedPosition = output.getEnd(); + } + if (text.length() - lastCollectedPosition > 1) { + tokens.add(createFragment(null, text, lastCollectedPosition)); + } + return tokens; + } + + public Collection> parse(CharSequence text) { + return parse(text, new DefaultOutputHandler<>()); + } + + @SuppressWarnings("unchecked") + public Collection> parse(CharSequence text, CollectingOutputHandler handler) { + parse(text, (OutputHandler) handler); + List> outputs = handler.getOutputs(); + if (!trieConfig.isAllowOverlaps()) { + IntervalTree intervalTree = new IntervalTree((List) (List) outputs); + intervalTree.removeOverlaps((List) (List) outputs); + } + return outputs; + } + + public void parse(CharSequence text, OutputHandler outputHandler) { + State currentState = getRootState(); + for (int position = 0; position < text.length(); position++) { + char character = text.charAt(position); + if (trieConfig.isCaseInsensitive()) { + character = Character.toLowerCase(character); + } + currentState = getState(currentState, character); + Collection> entries = currentState.entries(); + if (processOutputs(text, position, entries, outputHandler) && trieConfig.isStopOnHit()) { + return; + } + } + } + + public boolean match(CharSequence text) { + return firstMatch(text) != null; + } + + public EntryOutput firstMatch(CharSequence text) { + if (!trieConfig.isAllowOverlaps()) { + Collection> parseText = parse(text); + if (parseText != null && !parseText.isEmpty()) { + return parseText.iterator().next(); + } + } else { + State currentState = getRootState(); + for (int i = 0; i < text.length(); i++) { + char character = text.charAt(i); + if (trieConfig.isCaseInsensitive()) { + character = Character.toLowerCase(character); + } + currentState = getState(currentState, character); + Collection> entries = currentState.entries(); + if (entries != null && !entries.isEmpty()) { + for (Entry entry : entries) { + EntryOutput output = + new EntryOutput<>(i - entry.getKey().length() + 1, i, entry.getKey(), entry.getValue()); + if (trieConfig.isOnlyWholeWords()) { + if (!isPartialMatch(text, output)) { + return output; + } + } else { + return output; + } + } + } + } + } + return null; + } + + private Token createFragment(EntryOutput output, String text, int lastCollectedPosition) { + return new FragmentToken<>(text.substring(lastCollectedPosition + 1, output == null ? text.length() : output.getStart())); + } + + private Token createMatch(EntryOutput output, String text) { + return new MatchToken<>(text.substring(output.getStart(), output.getEnd() + 1), output); + } + + private State addState(String key) { + State state = getRootState(); + for (Character character : key.toCharArray()) { + Character adjustedChar = trieConfig.isCaseInsensitive() ? Character.toLowerCase(character) : character; + state = state.addState(adjustedChar); + } + return state; + } + + private boolean isPartialMatch(CharSequence searchText, EntryOutput output) { + return (output.getStart() != 0 && Character.isAlphabetic(searchText.charAt(output.getStart() - 1))) + || (output.getEnd() + 1 != searchText.length() && Character.isAlphabetic(searchText.charAt(output.getEnd() + 1))); + } + + private boolean isPartialMatchWhiteSpaceSeparated(CharSequence searchText, EntryOutput output) { + long size = searchText.length(); + return (output.getStart() != 0 && !Character.isWhitespace(searchText.charAt(output.getStart() - 1))) + || (output.getEnd() + 1 != size && !Character.isWhitespace(searchText.charAt(output.getEnd() + 1))); + } + + private State getState(State currentState, Character character) { + State newCurrentState = currentState.nextState(character); + while (newCurrentState == null) { + currentState = currentState.failure(); + newCurrentState = currentState.nextState(character); + } + return newCurrentState; + } + + private void constructFailureStates() { + Queue> queue = new LinkedList<>(); + State startState = getRootState(); + for (State depthOneState : startState.getStates()) { + depthOneState.setFailure(startState); + queue.add(depthOneState); + } + while (!queue.isEmpty()) { + State currentState = queue.remove(); + for (Character transition : currentState.getTransitions()) { + State targetState = currentState.nextState(transition); + queue.add(targetState); + State traceFailureState = currentState.failure(); + while (traceFailureState.nextState(transition) == null) { + traceFailureState = traceFailureState.failure(); + } + State newFailureState = traceFailureState.nextState(transition); + targetState.setFailure(newFailureState); + targetState.add(newFailureState.entries()); + } + } + } + + private boolean processOutputs(CharSequence text, + int position, + Collection> entries, + OutputHandler outputHandler) { + boolean output = false; + for (Entry entry : entries) { + EntryOutput entryOutput = + new EntryOutput<>(position - entry.getKey().length() + 1, position, entry.getKey(), entry.getValue()); + if (!(trieConfig.isOnlyWholeWords() && isPartialMatch(text, entryOutput)) && + !(trieConfig.isOnlyWholeWordsWhiteSpaceSeparated() && + isPartialMatchWhiteSpaceSeparated(text, entryOutput))) { + output = outputHandler.output(entryOutput) || output; + if (output && trieConfig.isStopOnHit()) { + break; + } + } + } + return output; + } + + private State getRootState() { + return rootState; + } + + public static class Builder { + + private final TrieConfig trieConfig; + + private final Trie trie; + + private Builder() { + trieConfig = new TrieConfig(); + trie = new Trie<>(trieConfig); + } + + public Builder ignoreCase() { + trieConfig.setCaseInsensitive(true); + return this; + } + + public Builder ignoreOverlaps() { + trieConfig.setAllowOverlaps(false); + return this; + } + + public Builder onlyWholeWords() { + trieConfig.setOnlyWholeWords(true); + return this; + } + + public Builder onlyWholeWordsWhiteSpaceSeparated() { + trieConfig.setOnlyWholeWordsWhiteSpaceSeparated(true); + return this; + } + + public Builder stopOnHit() { + trie.trieConfig.setStopOnHit(true); + return this; + } + + public Builder add(String key) { + add(key, null); + return this; + } + + public Builder add(String key, T value) { + if (key == null || key.isEmpty()) { + return this; + } + trie.addState(key).add(new Entry<>(key, value)); + return this; + } + + public Builder add(Collection> keys) { + for (Entry entry : keys) { + add(entry.getKey(), entry.getValue()); + } + return this; + } + + public Trie build() { + trie.constructFailureStates(); + return this.trie; + } + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/TrieConfig.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/TrieConfig.java new file mode 100644 index 0000000..f709e31 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/TrieConfig.java @@ -0,0 +1,54 @@ +package org.xbib.datastructures.trie.ahocorasick; + +public class TrieConfig { + + private boolean allowOverlaps = true; + + private boolean onlyWholeWords = false; + + private boolean onlyWholeWordsWhiteSpaceSeparated = false; + + private boolean caseInsensitive = false; + + private boolean stopOnHit = false; + + public boolean isStopOnHit() { + return stopOnHit; + } + + public void setStopOnHit(boolean stopOnHit) { + this.stopOnHit = stopOnHit; + } + + public boolean isAllowOverlaps() { + return allowOverlaps; + } + + public void setAllowOverlaps(boolean allowOverlaps) { + this.allowOverlaps = allowOverlaps; + } + + public boolean isOnlyWholeWords() { + return onlyWholeWords; + } + + public void setOnlyWholeWords(boolean onlyWholeWords) { + this.onlyWholeWords = onlyWholeWords; + } + + public boolean isOnlyWholeWordsWhiteSpaceSeparated() { + return onlyWholeWordsWhiteSpaceSeparated; + } + + public void setOnlyWholeWordsWhiteSpaceSeparated(boolean onlyWholeWordsWhiteSpaceSeparated) { + this.onlyWholeWordsWhiteSpaceSeparated = onlyWholeWordsWhiteSpaceSeparated; + } + + public boolean isCaseInsensitive() { + return caseInsensitive; + } + + public void setCaseInsensitive(boolean caseInsensitive) { + this.caseInsensitive = caseInsensitive; + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/package-info.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/package-info.java new file mode 100644 index 0000000..9cc0b2c --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/ahocorasick/package-info.java @@ -0,0 +1,10 @@ +/** + * Taken from + * + * https://github.com/robert-bor/aho-corasick + * + * Apache License + * Version 2.0, January 2004 + * http://www.apache.org/licenses/ + */ +package org.xbib.datastructures.trie.ahocorasick; \ No newline at end of file diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/compact/package-info.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/compact/package-info.java new file mode 100644 index 0000000..81859e2 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/compact/package-info.java @@ -0,0 +1,6 @@ +/** + * Taken from + * + * https://leetcode.com/problems/implement-trie-prefix-tree/discuss/467046/Java-Radix-tree-(compact-prefix-tree)-beats-99.7-runtime-and-100-memory + */ +package org.xbib.datastructures.trie.compact; diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/AbstractKeyAnalyzer.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/AbstractKeyAnalyzer.java new file mode 100644 index 0000000..34da586 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/AbstractKeyAnalyzer.java @@ -0,0 +1,13 @@ +package org.xbib.datastructures.trie.limewire; + +/** + * An abstract implementation of {@link KeyAnalyzer}. + */ +public abstract class AbstractKeyAnalyzer implements KeyAnalyzer { + + @SuppressWarnings("unchecked") + @Override + public int compare(K o1, K o2) { + return ((Comparable) o1).compareTo(o2); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/Cursor.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/Cursor.java new file mode 100644 index 0000000..1185430 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/Cursor.java @@ -0,0 +1,48 @@ +package org.xbib.datastructures.trie.limewire; + +import java.util.Map; + +/** + * An interface used by a {@link Trie}. A {@link Trie} selects items by + * closeness and passes the items to the Cursor. You can then + * decide what to do with the key-value pair and the return value + * from {@link #select(java.util.Map.Entry)} tells the Trie + * what to do next. + *

+ * Cursor returns status/selection status might be: + * + * + * + * + * + * + *
Return ValueStatus
EXITFinish the Trie operation
CONTINUELook at the next element in the traversal
REMOVE_AND_EXITRemove the entry and stop iterating
REMOVERemove the entry and continue iterating
+ *

+ * Note: {@link Trie#select(Object, Cursor)} does + * not support REMOVE. + * + * @param Key Type + * @param Key Value + */ +public interface Cursor { + + /** + * Notification that the Trie is currently looking at the given entry. + * Return EXIT to finish the Trie operation, + * CONTINUE to look at the next entry, REMOVE + * to remove the entry and continue iterating, or + * REMOVE_AND_EXIT to remove the entry and stop iterating. + * Not all operations support REMOVE. + */ + SelectStatus select(Map.Entry entry); + + /** + * The mode during selection. + */ + enum SelectStatus { + EXIT, + CONTINUE, + REMOVE, + REMOVE_AND_EXIT + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/DefaultKeyAnalyzer.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/DefaultKeyAnalyzer.java new file mode 100644 index 0000000..c819c13 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/DefaultKeyAnalyzer.java @@ -0,0 +1,40 @@ +package org.xbib.datastructures.trie.limewire; + +/** + * An implementation of {@link KeyAnalyzer} + * that assumes all keys have the {@link Key} interface implemented. + */ +public class DefaultKeyAnalyzer> extends AbstractKeyAnalyzer { + @SuppressWarnings("rawtypes") + private static final DefaultKeyAnalyzer INSTANCE = new DefaultKeyAnalyzer(); + + @SuppressWarnings("unchecked") + public static KeyAnalyzer singleton() { + return (KeyAnalyzer) INSTANCE; + } + + @Override + public int lengthInBits(K key) { + return key.lengthInBits(); + } + + @Override + public boolean isBitSet(K key, int keyLength, int bitIndex) { + return key.isBitSet(bitIndex); + } + + @Override + public int bitIndex(K key, int keyStart, int keyLength, K found, int foundStart, int foundLength) { + return key.bitIndex(found); + } + + @Override + public int bitsPerElement() { + return 16; + } + + @Override + public boolean isPrefix(K prefix, int offset, int length, K key) { + return key.isPrefixedBy(prefix); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/EmptyIterator.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/EmptyIterator.java deleted file mode 100644 index bce47b8..0000000 --- a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/EmptyIterator.java +++ /dev/null @@ -1,44 +0,0 @@ -package org.xbib.datastructures.trie.limewire; - -import java.util.Iterator; -import java.util.NoSuchElementException; - - -/** - * Provides an unmodifiable empty iterator. EmptyIterator always - * returns that there aren't any more items and throws a - * {@link NoSuchElementException} when attempting to move to the next item. - * - *

- * try{
- * EmptyIterator ei = new EmptyIterator();
- * ei.next();
- * } catch (Exception e) {
- * System.out.println("Expected to get NoSuchElementException exception: " + e.toString());
- * }
- *
- * Output:
- * Expected to get NoSuchElementException exception: java.util.NoSuchElementException
- * 
- */ -public class EmptyIterator extends UnmodifiableIterator { - /** - * A constant EmptyIterator. - */ - public final static Iterator EMPTY_ITERATOR = new EmptyIterator(); - - @SuppressWarnings("unchecked") - public static Iterator emptyIterator() { - return EMPTY_ITERATOR; - } - - // inherits javadoc comment - public boolean hasNext() { - return false; - } - - // inherits javadoc comment - public Object next() { - throw new NoSuchElementException(); - } -} \ No newline at end of file diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/Key.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/Key.java new file mode 100644 index 0000000..ca27a6d --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/Key.java @@ -0,0 +1,30 @@ +package org.xbib.datastructures.trie.limewire; + +/** + * An interface that {@link PatriciaTrie} keys may implement. + * + * @see KeyAnalyzer + * @see DefaultKeyAnalyzer + */ +public interface Key { + + /** + * Returns the key's length in bits. + */ + int lengthInBits(); + + /** + * Returns {@code true} if the given bit is set. + */ + boolean isBitSet(int bitIndex); + + /** + * Returns the index of the first bit that is different in the two keys. + */ + int bitIndex(K otherKey); + + /** + * Returns {@code true} if this key is prefixed by the given key. + */ + boolean isPrefixedBy(K prefix); +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/KeyAnalyzer.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/KeyAnalyzer.java new file mode 100644 index 0000000..cb8c72f --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/KeyAnalyzer.java @@ -0,0 +1,64 @@ +package org.xbib.datastructures.trie.limewire; + +import java.util.Comparator; + +/** + * Defines the interface to analyze {@link Trie} keys on a bit + * level. KeyAnalyzer's + * methods return the length of the key in bits, whether or not a bit is + * set, and bits per element in the key. + *

+ * Additionally, a method determines if a key is a prefix of another key and + * returns the bit index where one key is different from another key (if + * the key and found key are equal than the return value is EQUAL_BIT_KEY). + *

+ * KeyAnalyzer defines:
+ * + * + * + *
NULL_BIT_KEYWhen key's bits are all zero
EQUAL_BIT_KEY When keys are the same
+ */ +public interface KeyAnalyzer extends Comparator { + + /** + * Returned by bitIndex if key's bits are all 0. + */ + int NULL_BIT_KEY = -1; + + /** + * Returned by bitIndex if key and found key are + * equal. This is a very very specific case and + * shouldn't happen on a regular basis. + */ + int EQUAL_BIT_KEY = -2; + + /** + * Returns the length of the Key in bits. + */ + int lengthInBits(K key); + + /** + * Returns whether or not a bit is set. + */ + boolean isBitSet(K key, int keyLength, int bitIndex); + + /** + * Returns the n-th different bit between key and found. + * This starts the comparison in key at 'keyStart' and goes + * for 'keyLength' bits, and compares to the found key + * starting at 'foundStart' and going for 'foundLength' bits. + */ + int bitIndex(K key, int keyStart, int keyLength, K found, int foundStart, int foundLength); + + /** + * Returns the number of bits per element in the key. + * This is only useful for variable-length keys, such as Strings. + */ + int bitsPerElement(); + + /** + * Determines whether or not the given prefix (from offset to length) + * is a prefix of the given key. + */ + boolean isPrefix(K prefix, int offset, int length, K key); +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/PatriciaTrie.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/PatriciaTrie.java index f32d24a..a8b491b 100644 --- a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/PatriciaTrie.java +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/PatriciaTrie.java @@ -4,11 +4,13 @@ import java.util.AbstractCollection; import java.util.AbstractMap; import java.util.AbstractSet; import java.util.Collection; +import java.util.Collections; import java.util.Comparator; import java.util.ConcurrentModificationException; import java.util.Iterator; import java.util.Map; import java.util.NoSuchElementException; +import java.util.Objects; import java.util.Set; import java.util.SortedMap; @@ -36,7 +38,7 @@ import java.util.SortedMap; * closeness is determined by the {@link KeyAnalyzer} returning true or * false for a bit being set or not in a given key. *

- * This PATRICIA Trie supports both variable length & fixed length keys. + * This PATRICIA Trie supports both variable length and fixed length keys. * Some methods, such as getPrefixedBy(...) are suited only to * variable length keys, whereas getPrefixedByBits(...) is suited * to fixed-size keys. @@ -50,7 +52,7 @@ import java.util.SortedMap; * (and it isn't K). * *

- * PatriciaTrie trie = new PatriciaTrie
+ * PatriciaTrie<String, String> trie = new PatriciaTrie<String, String>
  * (new CharSequenceKeyAnalyzer());
  *
  * trie.put("Lime", "Lime");
@@ -113,7 +115,7 @@ public class PatriciaTrie extends AbstractMap implements Trie
      * Returns true if bitIndex is a valid index
      */
     private static boolean isValidBitIndex(int bitIndex) {
-        return 0 <= bitIndex && bitIndex <= Integer.MAX_VALUE;
+        return 0 <= bitIndex;
     }
 
     /**
@@ -209,7 +211,7 @@ public class PatriciaTrie extends AbstractMap implements Trie
     }
 
     /**
-     * Adds a new  pair to the Trie and if a pair already
+     * Adds a new <key, value> pair to the Trie and if a pair already
      * exists it will be replaced. In the latter case it will return
      * the old value.
      */
@@ -394,10 +396,10 @@ public class PatriciaTrie extends AbstractMap implements Trie
      * Returns the Value whose Key has the longest prefix
      * in common with our lookup key.
      */
-    @SuppressWarnings("unchecked")
+    @SuppressWarnings({"unchecked","rawtypes"})
     public V select(K key) {
         int keyLength = length(key);
-        TrieEntry[] result = new TrieEntry[1];
+        TrieEntry[] result = new TrieEntry[1];
         if (!selectR(root.left, -1, key, keyLength, result)) {
             TrieEntry e = result[0];
             return e.getValue();
@@ -411,8 +413,7 @@ public class PatriciaTrie extends AbstractMap implements Trie
      * Entry from the Trie.
      */
     private boolean selectR(TrieEntry h, int bitIndex,
-                            final K key, final int keyLength, final TrieEntry[] result) {
-
+                            final K key, final int keyLength, final TrieEntry[] result) {
         if (h.bitIndex <= bitIndex) {
             // If we hit the root Node and it is empty
             // we have to look for an alternative best
@@ -423,7 +424,6 @@ public class PatriciaTrie extends AbstractMap implements Trie
             }
             return true;
         }
-
         if (!isBitSet(key, keyLength, h.bitIndex)) {
             if (selectR(h.left, h.bitIndex, key, keyLength, result)) {
                 return selectR(h.right, h.bitIndex, key, keyLength, result);
@@ -436,10 +436,10 @@ public class PatriciaTrie extends AbstractMap implements Trie
         return false;
     }
 
-    @SuppressWarnings("unchecked")
+    @SuppressWarnings({"unchecked","rawtypes"})
     public Map.Entry select(K key, Cursor cursor) {
         int keyLength = length(key);
-        TrieEntry[] result = new TrieEntry[]{null};
+        TrieEntry[] result = new TrieEntry[] { null };
         selectR(root.left, -1, key, keyLength, cursor, result);
         return result[0];
     }
@@ -448,8 +448,7 @@ public class PatriciaTrie extends AbstractMap implements Trie
                             final K key,
                             final int keyLength,
                             final Cursor cursor,
-                            final TrieEntry[] result) {
-
+                            final TrieEntry[] result) {
         if (h.bitIndex <= bitIndex) {
             if (!h.isEmpty()) {
                 Cursor.SelectStatus ret = cursor.select(h);
@@ -470,7 +469,6 @@ public class PatriciaTrie extends AbstractMap implements Trie
             }
             return true; // continue
         }
-
         if (!isBitSet(key, keyLength, h.bitIndex)) {
             if (selectR(h.left, h.bitIndex, key, keyLength, cursor, result)) {
                 return selectR(h.right, h.bitIndex, key, keyLength, cursor, result);
@@ -480,7 +478,6 @@ public class PatriciaTrie extends AbstractMap implements Trie
                 return selectR(h.left, h.bitIndex, key, keyLength, cursor, result);
             }
         }
-
         return false;
     }
 
@@ -495,7 +492,7 @@ public class PatriciaTrie extends AbstractMap implements Trie
      * a lookup of 'Lime' would return 'Lime', 'LimeRadio', and 'LimeWire'.
      * 

* The view that this returns is optimized to have a very efficient - * Iterator. The firstKey, lastKey & size methods must iterate + * Iterator. The firstKey, lastKey & size methods must iterate * over all possible values in order to determine the results. This * information is cached until the Patricia tree changes. All other * methods (except Iterator) must compare the given key to the prefix @@ -505,7 +502,7 @@ public class PatriciaTrie extends AbstractMap implements Trie * Changing the subtree takes O(K) time. */ public SortedMap getPrefixedBy(K key) { - return getPrefixedByBits(key, 0, keyAnalyzer.length(key)); + return getPrefixedByBits(key, 0, keyAnalyzer.lengthInBits(key)); } /** @@ -521,7 +518,7 @@ public class PatriciaTrie extends AbstractMap implements Trie * return 'Lime', 'LimeRadio', and 'LimeWire'. *

* The view that this returns is optimized to have a very efficient - * Iterator. The firstKey, lastKey & size methods must iterate + * Iterator. The firstKey, lastKey & size methods must iterate * over all possible values in order to determine the results. This * information is cached until the Patricia tree changes. All other * methods (except Iterator) must compare the given key to the prefix @@ -547,7 +544,7 @@ public class PatriciaTrie extends AbstractMap implements Trie * length of 4 would return 'Lime', 'LimeRadio', and 'LimeWire'. *

* The view that this returns is optimized to have a very efficient - * Iterator. The firstKey, lastKey & size methods must iterate + * Iterator. The firstKey, lastKey & size methods must iterate * over all possible values in order to determine the results. This * information is cached until the Patricia tree changes. All other * methods (except Iterator) must compare the given key to the prefix @@ -571,7 +568,7 @@ public class PatriciaTrie extends AbstractMap implements Trie * would return all addresses that begin with '192.168'. *

* The view that this returns is optimized to have a very efficient - * Iterator. The firstKey, lastKey & size methods must iterate + * Iterator. The firstKey, lastKey & size methods must iterate * over all possible values in order to determine the results. This * information is cached until the Patricia tree changes. All other * methods (except Iterator) must compare the given key to the prefix @@ -605,9 +602,9 @@ public class PatriciaTrie extends AbstractMap implements Trie throw new IllegalArgumentException(offset + " + " + length + " > " + length(key)); } - if (offsetLength == 0) + if (offsetLength == 0) { return this; - + } return new PrefixSubMap(key, offset, length); } @@ -620,11 +617,11 @@ public class PatriciaTrie extends AbstractMap implements Trie @Override public boolean containsKey(Object k) { K key = asKey(k); - if (key == null) + if (key == null) { return false; - + } int keyLength = length(key); - TrieEntry entry = getNearestEntryForKey(key, keyLength); + TrieEntry entry = getNearestEntryForKey(key, keyLength); return !entry.isEmpty() && key.equals(entry.key); } @@ -633,9 +630,11 @@ public class PatriciaTrie extends AbstractMap implements Trie */ @Override public boolean containsValue(Object o) { - for (V v : values()) - if (valEquals(v, o)) + for (V v : values()) { + if (valEquals(v, o)) { return true; + } + } return false; } @@ -1050,7 +1049,7 @@ public class PatriciaTrie extends AbstractMap implements Trie return 0; } - return keyAnalyzer.length(key); + return keyAnalyzer.lengthInBits(key); } /** @@ -1162,7 +1161,7 @@ public class PatriciaTrie extends AbstractMap implements Trie /** * Traverses down the right path until it finds an uplink. */ - protected TrieEntry followRight(TrieEntry node) { + private TrieEntry followRight(TrieEntry node) { // if Trie is empty, no last entry. if (node.right == null) return null; @@ -1174,14 +1173,17 @@ public class PatriciaTrie extends AbstractMap implements Trie return node.right; } + @Override public K firstKey() { return firstEntry().getKey(); } + @Override public SortedMap headMap(K toKey) { return new SubMap(null, toKey); } + @Override public K lastKey() { TrieEntry entry = lastEntry(); if (entry != null) @@ -1190,10 +1192,12 @@ public class PatriciaTrie extends AbstractMap implements Trie return null; } + @Override public SortedMap subMap(K fromKey, K toKey) { return new SubMap(fromKey, toKey); } + @Override public SortedMap tailMap(K fromKey) { return new SubMap(fromKey, null); } @@ -1202,7 +1206,7 @@ public class PatriciaTrie extends AbstractMap implements Trie * Returns an entry strictly higher than the given key, * or null if no such entry exists. */ - protected TrieEntry higherEntry(K key) { + private TrieEntry higherEntry(K key) { // TODO: Cleanup so that we don't actually have to add/remove from the // tree. (We do it here because there are other well-defined // functions to perform the search.) @@ -1254,7 +1258,7 @@ public class PatriciaTrie extends AbstractMap implements Trie * Returns a key-value mapping associated with the least key greater * than or equal to the given key, or null if there is no such key. */ - protected TrieEntry ceilingEntry(K key) { + private TrieEntry ceilingEntry(K key) { // Basically: // Follow the steps of adding an entry, but instead... // @@ -1312,7 +1316,7 @@ public class PatriciaTrie extends AbstractMap implements Trie * Returns a key-value mapping associated with the greatest key * strictly less than the given key, or null if there is no such key. */ - protected TrieEntry lowerEntry(K key) { + private TrieEntry lowerEntry(K key) { // Basically: // Follow the steps of adding an entry, but instead... // @@ -1363,7 +1367,7 @@ public class PatriciaTrie extends AbstractMap implements Trie * Returns a key-value mapping associated with the greatest key * less than or equal to the given key, or null if there is no such key. */ - protected TrieEntry floorEntry(K key) { + private TrieEntry floorEntry(K key) { // TODO: Cleanup so that we don't actually have to add/remove from the // tree. (We do it here because there are other well-defined // functions to perform the search.) @@ -1448,68 +1452,6 @@ public class PatriciaTrie extends AbstractMap implements Trie return entry; } - /** - * Defines the interface to analyze {@link Trie} keys on a bit - * level. KeyAnalyzer's - * methods return the length of the key in bits, whether or not a bit is - * set, and bits per element in the key. - *

- * Additionally, a method determines if a key is a prefix of another key and - * returns the bit index where one key is different from another key (if - * the key and found key are equal than the return value is EQUAL_BIT_KEY). - *

- * KeyAnalyzer defines:
- * - * - * - *
NULL_BIT_KEYWhen key's bits are all zero
EQUAL_BIT_KEY When keys are the same
- */ - public interface KeyAnalyzer extends Comparator { - - /** - * Returned by bitIndex if key's bits are all 0. - */ - int NULL_BIT_KEY = -1; - - /** - * Returned by bitIndex if key and found key are - * equal. This is a very very specific case and - * shouldn't happen on a regular basis. - */ - int EQUAL_BIT_KEY = -2; - - /** - * Returns the length of the Key in bits. - */ - int length(K key); - - /** - * Returns whether or not a bit is set. - */ - boolean isBitSet(K key, int keyLength, int bitIndex); - - /** - * Returns the n-th different bit between key and found. - * This starts the comparison in key at 'keyStart' and goes - * for 'keyLength' bits, and compares to the found key - * starting at 'foundStart' and going for 'foundLength' bits. - */ - int bitIndex(K key, int keyStart, int keyLength, - K found, int foundStart, int foundLength); - - /** - * Returns the number of bits per element in the key. - * This is only useful for variable-length keys, such as Strings. - */ - int bitsPerElement(); - - /** - * Determines whether or not the given prefix (from offset to length) - * is a prefix of the given key. - */ - boolean isPrefix(K prefix, int offset, int length, K key); - } - /** * The actual Trie nodes. */ @@ -1540,18 +1482,17 @@ public class PatriciaTrie extends AbstractMap implements Trie */ private TrieEntry predecessor; - private TrieEntry(K key, V value, int bitIndex) { + TrieEntry(K key, V value, int bitIndex) { this.key = key; this.value = value; - this.bitIndex = bitIndex; - this.parent = null; this.left = this; this.right = null; this.predecessor = this; } + @SuppressWarnings("unchecked") @Override public boolean equals(Object o) { if (o == this) { @@ -1571,6 +1512,11 @@ public class PatriciaTrie extends AbstractMap implements Trie } } + @Override + public int hashCode() { + return Objects.hash(key, value, bitIndex, parent, left, right, predecessor); + } + /** * Whether or not the entry is storing a key. * Only the root can potentially be empty, all other @@ -1580,6 +1526,7 @@ public class PatriciaTrie extends AbstractMap implements Trie return key == null; } + @Override public K getKey() { return key; } @@ -1676,41 +1623,10 @@ public class PatriciaTrie extends AbstractMap implements Trie } } - /** - * An iterator that stores a single TrieEntry. - */ - private class SingletonIterator implements Iterator> { - private final TrieEntry entry; - private int hit = 0; - - public SingletonIterator(TrieEntry entry) { - this.entry = entry; - } - - public boolean hasNext() { - return hit == 0; - } - - public Map.Entry next() { - if (hit != 0) - throw new NoSuchElementException(); - hit++; - return entry; - } - - public void remove() { - if (hit != 1) - throw new IllegalStateException(); - hit++; - PatriciaTrie.this.removeEntry(entry); - } - - } - /** * An iterator for the entries. */ - private abstract class NodeIterator implements Iterator { + abstract class NodeIterator implements Iterator { protected int expectedModCount = modCount; // For fast-fail protected TrieEntry next; // the next node to return protected TrieEntry current; // the current entry we're on @@ -1761,7 +1677,7 @@ public class PatriciaTrie extends AbstractMap implements Trie private class ValueIterator extends NodeIterator { public V next() { - return nextEntry().value; + return nextEntry().getValue(); } } @@ -1777,6 +1693,39 @@ public class PatriciaTrie extends AbstractMap implements Trie } } + class SingletonIterator implements Iterator> { + + private final PatriciaTrie patriciaTrie; + + private final TrieEntry entry; + + private int hit = 0; + + public SingletonIterator(PatriciaTrie patriciaTrie, TrieEntry entry) { + this.patriciaTrie = patriciaTrie; + this.entry = entry; + } + + public boolean hasNext() { + return hit == 0; + } + + public Map.Entry next() { + if (hit != 0) + throw new NoSuchElementException(); + hit++; + return entry; + } + + public void remove() { + if (hit != 1) + throw new IllegalStateException(); + hit++; + patriciaTrie.removeEntry(entry); + } + + } + /** * An iterator for iterating over a prefix search. */ @@ -2082,11 +2031,11 @@ public class PatriciaTrie extends AbstractMap implements Trie prefixStart = subtree(prefix, offset, length); iterModCount = modCount; } - if (prefixStart == null) { - return EmptyIterator.emptyIterator(); + Set> set = Collections.emptySet(); + return set.iterator(); } else if (length >= prefixStart.bitIndex) { - return new SingletonIterator(prefixStart); + return new SingletonIterator(PatriciaTrie.this, prefixStart); } else { return new PrefixEntryIterator(prefixStart, prefix, offset, length); } @@ -2273,10 +2222,8 @@ public class PatriciaTrie extends AbstractMap implements Trie if (size == -1 || sizeModCount != PatriciaTrie.this.modCount) { size = 0; sizeModCount = PatriciaTrie.this.modCount; - Iterator i = iterator(); - while (i.hasNext()) { + for (Entry kvEntry : this) { size++; - i.next(); } } return size; @@ -2304,12 +2251,14 @@ public class PatriciaTrie extends AbstractMap implements Trie @Override @SuppressWarnings("unchecked") public boolean remove(Object o) { - if (!(o instanceof Map.Entry)) + if (!(o instanceof Map.Entry)) { return false; + } Map.Entry entry = (Map.Entry) o; K key = entry.getKey(); - if (!inRange(key)) + if (!inRange(key)) { return false; + } TrieEntry node = getEntry(key); if (node != null && valEquals(node.getValue(), entry.getValue())) { removeEntry(node); diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/Trie.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/Trie.java index e4131dd..5c84ffd 100644 --- a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/Trie.java +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/Trie.java @@ -73,8 +73,8 @@ public interface Trie extends SortedMap { * L = 1001100
*

* If the Trie contained 'H' and 'L', a lookup of 'D' would return 'L', - * because the XOR distance between D & L is smaller than the XOR distance - * between D & H. + * because the XOR distance between D & L is smaller than the XOR distance + * between D & H. */ V select(K key); @@ -111,47 +111,4 @@ public interface Trie extends SortedMap { * till the end. */ Map.Entry traverse(Cursor cursor); - - /** - * An interface used by a {@link Trie}. A {@link Trie} selects items by - * closeness and passes the items to the Cursor. You can then - * decide what to do with the key-value pair and the return value - * from {@link #select(java.util.Map.Entry)} tells the Trie - * what to do next. - *

- * Cursor returns status/selection status might be: - * - * - * - * - * - * - *
Return ValueStatus
EXITFinish the Trie operation
CONTINUELook at the next element in the traversal
REMOVE_AND_EXITRemove the entry and stop iterating
REMOVERemove the entry and continue iterating
- *

- * Note: {@link Trie#select(Object, Trie.Cursor)} does - * not support REMOVE. - * - * @param Key Type - * @param Key Value - */ - interface Cursor { - - /** - * Notification that the Trie is currently looking at the given entry. - * Return EXIT to finish the Trie operation, - * CONTINUE to look at the next entry, REMOVE - * to remove the entry and continue iterating, or - * REMOVE_AND_EXIT to remove the entry and stop iterating. - * Not all operations support REMOVE. - */ - SelectStatus select(Map.Entry entry); - - /** - * The mode during selection. - */ - enum SelectStatus { - EXIT, CONTINUE, REMOVE, REMOVE_AND_EXIT - } - } } - diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/UnmodifiableIterator.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/UnmodifiableIterator.java deleted file mode 100644 index 7b9c2e4..0000000 --- a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/UnmodifiableIterator.java +++ /dev/null @@ -1,21 +0,0 @@ -package org.xbib.datastructures.trie.limewire; - -import java.util.Iterator; - -/** - * A convenience class to aid in developing iterators that cannot be modified. - */ -public abstract class UnmodifiableIterator implements Iterator { - /** - * Throws UnsupportedOperationException. - */ - public final void remove() { - throw new UnsupportedOperationException(); - } -} - - - - - - diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/package-info.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/package-info.java new file mode 100644 index 0000000..01b8619 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/limewire/package-info.java @@ -0,0 +1,6 @@ +/** + * WireShare (LimeWire "Pirate Edition") PatriciaTrie + * + * https://sourceforge.net/projects/wireshare/ + */ +package org.xbib.datastructures.trie.limewire; \ No newline at end of file diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/patricia/AbstractPatriciaTrie.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/patricia/AbstractPatriciaTrie.java deleted file mode 100644 index 36d9397..0000000 --- a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/patricia/AbstractPatriciaTrie.java +++ /dev/null @@ -1,1105 +0,0 @@ -package org.xbib.datastructures.trie.patricia; - -import java.util.AbstractCollection; -import java.util.AbstractSet; -import java.util.Collection; -import java.util.ConcurrentModificationException; -import java.util.Iterator; -import java.util.Map; -import java.util.NoSuchElementException; -import java.util.Set; - -/** - * This class implements the base PATRICIA algorithm and everything that - * is related to the {@link Map} interface. - */ -abstract class AbstractPatriciaTrie extends AbstractTrie { - - /** - * The root node of the {@link PrefixTree}. - */ - final TrieEntry root = new TrieEntry<>(null, null, -1); - /** - * The number of times this {@link PrefixTree} has been modified. - * It's used to detect concurrent modifications and fail-fast - * the {@link Iterator}s. - */ - transient int modCount = 0; - /** - * Each of these fields are initialized to contain an instance of the - * appropriate view the first time this view is requested. The views are - * stateless, so there's no reason to create more than one of each. - */ - private transient volatile Set keySet; - - private transient volatile Collection values; - - private transient volatile Set> entrySet; - - /** - * The current size of the {@link PrefixTree} - */ - private int size = 0; - - public AbstractPatriciaTrie() { - super(); - } - - public AbstractPatriciaTrie(KeyAnalyzer keyAnalyzer) { - super(keyAnalyzer); - } - - public AbstractPatriciaTrie(Map m) { - super(); - putAll(m); - } - - public AbstractPatriciaTrie(KeyAnalyzer keyAnalyzer, - Map m) { - super(keyAnalyzer); - putAll(m); - } - - /** - * Returns true if 'next' is a valid uplink coming from 'from'. - */ - static boolean isValidUplink(TrieEntry next, TrieEntry from) { - return next != null && next.bitIndex <= from.bitIndex && !next.isEmpty(); - } - - @Override - public void clear() { - root.key = null; - root.bitIndex = -1; - root.value = null; - root.parent = null; - root.left = root; - root.right = null; - root.predecessor = root; - size = 0; - incrementModCount(); - } - - @Override - public int size() { - return size; - } - - /** - * A helper method to increment the {@link PrefixTree} size - * and the modification counter. - */ - void incrementSize() { - size++; - incrementModCount(); - } - - /** - * A helper method to decrement the {@link PrefixTree} size - * and increment the modification counter. - */ - void decrementSize() { - size--; - incrementModCount(); - } - - /** - * A helper method to increment the modification counter. - */ - private void incrementModCount() { - ++modCount; - } - - @Override - public V put(K key, V value) { - if (key == null) { - throw new NullPointerException("Key cannot be null"); - } - int lengthInBits = lengthInBits(key); - // The only place to store a key with a length - // of zero bits is the root node - if (lengthInBits == 0) { - if (root.isEmpty()) { - incrementSize(); - } else { - incrementModCount(); - } - return root.setKeyValue(key, value); - } - TrieEntry found = getNearestEntryForKey(key); - if (compareKeys(key, found.key)) { - if (found.isEmpty()) { // <- must be the root - incrementSize(); - } else { - incrementModCount(); - } - return found.setKeyValue(key, value); - } - - int bitIndex = bitIndex(key, found.key); - if (bitIndex != KeyAnalyzer.OUT_OF_BOUNDS_BIT_KEY) { - if (0 <= bitIndex) { // in 99.999...9% the case - /* NEW KEY+VALUE TUPLE */ - TrieEntry t = new TrieEntry<>(key, value, bitIndex); - addEntry(t); - incrementSize(); - return null; - } else if (bitIndex == KeyAnalyzer.NULL_BIT_KEY) { - // A bits of the Key are zero. The only place to - // store such a Key is the root Node! - /* NULL BIT KEY */ - if (root.isEmpty()) { - incrementSize(); - } else { - incrementModCount(); - } - return root.setKeyValue(key, value); - } else if (bitIndex == KeyAnalyzer.EQUAL_BIT_KEY) { - // This is a very special and rare case. - /* REPLACE OLD KEY+VALUE */ - if (found != root) { - incrementModCount(); - return found.setKeyValue(key, value); - } - } - } - throw new IndexOutOfBoundsException("Failed to put: " + key + " -> " + value + ", " + bitIndex); - } - - /** - * Adds the given {@link TrieEntry} to the {@link PrefixTree} - */ - TrieEntry addEntry(TrieEntry entry) { - TrieEntry current = root.left; - TrieEntry path = root; - while (true) { - if (current.bitIndex >= entry.bitIndex - || current.bitIndex <= path.bitIndex) { - entry.predecessor = entry; - - if (!isBitSet(entry.key, entry.bitIndex)) { - entry.left = entry; - entry.right = current; - } else { - entry.left = current; - entry.right = entry; - } - - entry.parent = path; - if (current.bitIndex >= entry.bitIndex) { - current.parent = entry; - } - - // if we inserted an uplink, set the predecessor on it - if (current.bitIndex <= path.bitIndex) { - current.predecessor = entry; - } - - if (path == root || !isBitSet(entry.key, path.bitIndex)) { - path.left = entry; - } else { - path.right = entry; - } - - return entry; - } - - path = current; - - if (!isBitSet(entry.key, current.bitIndex)) { - current = current.left; - } else { - current = current.right; - } - } - } - - @Override - public V get(Object k) { - TrieEntry entry = getEntry(k); - return entry != null ? entry.getValue() : null; - } - - /** - * Returns the entry associated with the specified key in the - * AbstractPatriciaTrie. Returns null if the map contains no mapping - * for this key. - *

- * This may throw ClassCastException if the object is not of type K. - */ - @SuppressWarnings("unchecked") - TrieEntry getEntry(Object k) { - K key = (K) k; - if (key == null) { - return null; - } - - TrieEntry entry = getNearestEntryForKey(key); - return !entry.isEmpty() && compareKeys(key, entry.key) ? entry : null; - } - - @Override - public Map.Entry select(K key) { - Reference> reference - = new Reference>(); - if (!selectR(root.left, -1, key, reference)) { - return reference.get(); - } - return null; - } - - @Override - public Map.Entry select(K key, Cursor cursor) { - Reference> reference - = new Reference>(); - selectR(root.left, -1, key, cursor, reference); - return reference.get(); - } - - private boolean selectR(TrieEntry h, int bitIndex, - final K key, final Reference> reference) { - if (h.bitIndex <= bitIndex) { - // If we hit the root Node and it is empty - // we have to look for an alternative best - // matching node. - if (!h.isEmpty()) { - reference.set(h); - return false; - } - return true; - } - - if (!isBitSet(key, h.bitIndex)) { - if (selectR(h.left, h.bitIndex, key, reference)) { - return selectR(h.right, h.bitIndex, key, reference); - } - } else { - if (selectR(h.right, h.bitIndex, key, reference)) { - return selectR(h.left, h.bitIndex, key, reference); - } - } - return false; - } - - /** - * - */ - private boolean selectR(TrieEntry h, int bitIndex, - final K key, final Cursor cursor, - final Reference> reference) { - - if (h.bitIndex <= bitIndex) { - if (!h.isEmpty()) { - Cursor.Decision decision = cursor.select(h); - switch (decision) { - case REMOVE: - throw new UnsupportedOperationException( - "Cannot remove during select"); - case EXIT: - reference.set(h); - return false; // exit - case REMOVE_AND_EXIT: - TrieEntry entry = new TrieEntry( - h.getKey(), h.getValue(), -1); - reference.set(entry); - removeEntry(h); - return false; - case CONTINUE: - // fall through. - } - } - return true; // continue - } - - if (!isBitSet(key, h.bitIndex)) { - if (selectR(h.left, h.bitIndex, key, cursor, reference)) { - return selectR(h.right, h.bitIndex, key, cursor, reference); - } - } else { - if (selectR(h.right, h.bitIndex, key, cursor, reference)) { - return selectR(h.left, h.bitIndex, key, cursor, reference); - } - } - - return false; - } - - @Override - public Map.Entry traverse(Cursor cursor) { - TrieEntry entry = nextEntry(null); - while (entry != null) { - TrieEntry current = entry; - Cursor.Decision decision = cursor.select(current); - entry = nextEntry(current); - switch (decision) { - case EXIT: - return current; - case REMOVE: - removeEntry(current); - break; // out of switch, stay in while loop - case REMOVE_AND_EXIT: - Map.Entry value = new TrieEntry( - current.getKey(), current.getValue(), -1); - removeEntry(current); - return value; - case CONTINUE: // do nothing. - } - } - - return null; - } - - @SuppressWarnings("unchecked") - @Override - public boolean containsKey(Object k) { - if (k == null) { - return false; - } - K key = (K) k; - TrieEntry entry = getNearestEntryForKey(key); - return !entry.isEmpty() && compareKeys(key, entry.key); - } - - @Override - public Set> entrySet() { - if (entrySet == null) { - entrySet = new EntrySet(); - } - return entrySet; - } - - @Override - public Set keySet() { - if (keySet == null) { - keySet = new KeySet(); - } - return keySet; - } - - @Override - public Collection values() { - if (values == null) { - values = new Values(); - } - return values; - } - - @SuppressWarnings("unchecked") - @Override - public V remove(Object k) { - if (k == null) { - return null; - } - K key = (K) k; - TrieEntry current = root.left; - TrieEntry path = root; - while (true) { - if (current.bitIndex <= path.bitIndex) { - if (!current.isEmpty() && compareKeys(key, current.key)) { - return removeEntry(current); - } else { - return null; - } - } - - path = current; - - if (!isBitSet(key, current.bitIndex)) { - current = current.left; - } else { - current = current.right; - } - } - } - - /** - * Returns the nearest entry for a given key. This is useful - * for finding knowing if a given key exists (and finding the value - * for it), or for inserting the key. - *

- * The actual get implementation. This is very similar to - * selectR but with the exception that it might return the - * root Entry even if it's empty. - */ - TrieEntry getNearestEntryForKey(K key) { - TrieEntry current = root.left; - TrieEntry path = root; - while (true) { - if (current.bitIndex <= path.bitIndex) { - return current; - } - - path = current; - if (!isBitSet(key, current.bitIndex)) { - current = current.left; - } else { - current = current.right; - } - } - } - - /** - * Removes a single entry from the {@link PrefixTree}. - *

- * If we found a Key (Entry h) then figure out if it's - * an internal (hard to remove) or external Entry (easy - * to remove) - */ - V removeEntry(TrieEntry h) { - if (h != root) { - if (h.isInternalNode()) { - removeInternalEntry(h); - } else { - removeExternalEntry(h); - } - } - - decrementSize(); - return h.setKeyValue(null, null); - } - - /** - * Removes an external entry from the {@link PrefixTree}. - *

- * If it's an external Entry then just remove it. - * This is very easy and straight forward. - */ - private void removeExternalEntry(TrieEntry h) { - if (h == root) { - throw new IllegalArgumentException("Cannot delete root Entry!"); - } else if (!h.isExternalNode()) { - throw new IllegalArgumentException(h + " is not an external Entry!"); - } - - TrieEntry parent = h.parent; - TrieEntry child = (h.left == h) ? h.right : h.left; - - if (parent.left == h) { - parent.left = child; - } else { - parent.right = child; - } - - // either the parent is changing, or the predecessor is changing. - if (child.bitIndex > parent.bitIndex) { - child.parent = parent; - } else { - child.predecessor = parent; - } - - } - - /** - * Removes an internal entry from the {@link PrefixTree}. - *

- * If it's an internal Entry then "good luck" with understanding - * this code. The Idea is essentially that Entry p takes Entry h's - * place in the trie which requires some re-wiring. - */ - private void removeInternalEntry(TrieEntry h) { - if (h == root) { - throw new IllegalArgumentException("Cannot delete root Entry!"); - } else if (!h.isInternalNode()) { - throw new IllegalArgumentException(h + " is not an internal Entry!"); - } - - TrieEntry p = h.predecessor; - - // Set P's bitIndex - p.bitIndex = h.bitIndex; - - // Fix P's parent, predecessor and child Nodes - { - TrieEntry parent = p.parent; - TrieEntry child = (p.left == h) ? p.right : p.left; - - // if it was looping to itself previously, - // it will now be pointed from it's parent - // (if we aren't removing it's parent -- - // in that case, it remains looping to itself). - // otherwise, it will continue to have the same - // predecessor. - if (p.predecessor == p && p.parent != h) { - p.predecessor = p.parent; - } - - if (parent.left == p) { - parent.left = child; - } else { - parent.right = child; - } - - if (child.bitIndex > parent.bitIndex) { - child.parent = parent; - } - } - - // Fix H's parent and child Nodes - { - // If H is a parent of its left and right child - // then change them to P - if (h.left.parent == h) { - h.left.parent = p; - } - - if (h.right.parent == h) { - h.right.parent = p; - } - - // Change H's parent - if (h.parent.left == h) { - h.parent.left = p; - } else { - h.parent.right = p; - } - } - - // Copy the remaining fields from H to P - //p.bitIndex = h.bitIndex; - p.parent = h.parent; - p.left = h.left; - p.right = h.right; - - // Make sure that if h was pointing to any uplinks, - // p now points to them. - if (isValidUplink(p.left, p)) { - p.left.predecessor = p; - } - - if (isValidUplink(p.right, p)) { - p.right.predecessor = p; - } - } - - /** - * Returns the entry lexicographically after the given entry. - * If the given entry is null, returns the first node. - */ - TrieEntry nextEntry(TrieEntry node) { - if (node == null) { - return firstEntry(); - } else { - return nextEntryImpl(node.predecessor, node, null); - } - } - - /** - * Scans for the next node, starting at the specified point, and using 'previous' - * as a hint that the last node we returned was 'previous' (so we know not to return - * it again). If 'tree' is non-null, this will limit the search to the given tree. - *

- * The basic premise is that each iteration can follow the following steps: - *

- * 1) Scan all the way to the left. - * a) If we already started from this node last time, proceed to Step 2. - * b) If a valid uplink is found, use it. - * c) If the result is an empty node (root not set), break the scan. - * d) If we already returned the left node, break the scan. - *

- * 2) Check the right. - * a) If we already returned the right node, proceed to Step 3. - * b) If it is a valid uplink, use it. - * c) Do Step 1 from the right node. - *

- * 3) Back up through the parents until we encounter find a parent - * that we're not the right child of. - *

- * 4) If there's no right child of that parent, the iteration is finished. - * Otherwise continue to Step 5. - *

- * 5) Check to see if the right child is a valid uplink. - * a) If we already returned that child, proceed to Step 6. - * Otherwise, use it. - *

- * 6) If the right child of the parent is the parent itself, we've - * already found & returned the end of the Trie, so exit. - *

- * 7) Do Step 1 on the parent's right child. - */ - TrieEntry nextEntryImpl(TrieEntry start, - TrieEntry previous, TrieEntry tree) { - - TrieEntry current = start; - - // Only look at the left if this was a recursive or - // the first check, otherwise we know we've already looked - // at the left. - if (previous == null || start != previous.predecessor) { - while (!current.left.isEmpty()) { - // stop traversing if we've already - // returned the left of this node. - if (previous == current.left) { - break; - } - - if (isValidUplink(current.left, current)) { - return current.left; - } - - current = current.left; - } - } - - // If there's no data at all, exit. - if (current.isEmpty()) { - return null; - } - - // If we've already returned the left, - // and the immediate right is null, - // there's only one entry in the Trie - // which is stored at the root. - // - // / ("") <-- root - // \_/ \ - // null <-- 'current' - // - if (current.right == null) { - return null; - } - - // If nothing valid on the left, try the right. - if (previous != current.right) { - // See if it immediately is valid. - if (isValidUplink(current.right, current)) { - return current.right; - } - - // Must search on the right's side if it wasn't initially valid. - return nextEntryImpl(current.right, previous, tree); - } - - // Neither left nor right are valid, find the first parent - // whose child did not come from the right & traverse it. - while (current == current.parent.right) { - // If we're going to traverse to above the subtree, stop. - if (current == tree) { - return null; - } - - current = current.parent; - } - - // If we're on the top of the subtree, we can't go any higher. - if (current == tree) { - return null; - } - - // If there's no right, the parent must be root, so we're done. - if (current.parent.right == null) { - return null; - } - - // If the parent's right points to itself, we've found one. - if (previous != current.parent.right - && isValidUplink(current.parent.right, current.parent)) { - return current.parent.right; - } - - // If the parent's right is itself, there can't be any more nodes. - if (current.parent.right == current.parent) { - return null; - } - - // We need to traverse down the parent's right's path. - return nextEntryImpl(current.parent.right, previous, tree); - } - - /** - * Returns the first entry the {@link PrefixTree} is storing. - *

- * This is implemented by going always to the left until - * we encounter a valid uplink. That uplink is the first key. - */ - TrieEntry firstEntry() { - // if Trie is empty, no first node. - if (isEmpty()) { - return null; - } - - return followLeft(root); - } - - /** - * Goes left through the tree until it finds a valid node. - */ - TrieEntry followLeft(TrieEntry node) { - while (true) { - TrieEntry child = node.left; - // if we hit root and it didn't have a node, go right instead. - if (child.isEmpty()) { - child = node.right; - } - - if (child.bitIndex <= node.bitIndex) { - return child; - } - - node = child; - } - } - - /** - * A {@link Reference} allows us to return something through a Method's - * argument list. An alternative would be to an Array with a length of - * one (1) but that leads to compiler warnings. Computationally and memory - * wise there's no difference (except for the need to load the - * {@link Reference} Class but that happens only once). - */ - private static class Reference { - - private E item; - - public void set(E item) { - this.item = item; - } - - public E get() { - return item; - } - } - - /** - * A {@link PrefixTree} is a set of {@link TrieEntry} nodes - */ - static class TrieEntry extends BasicEntry { - - private static final long serialVersionUID = 4596023148184140013L; - - /** - * The index this entry is comparing. - */ - protected int bitIndex; - - /** - * The parent of this entry. - */ - protected TrieEntry parent; - - /** - * The left child of this entry. - */ - protected TrieEntry left; - - /** - * The right child of this entry. - */ - protected TrieEntry right; - - /** - * The entry who uplinks to this entry. - */ - protected TrieEntry predecessor; - - public TrieEntry(K key, V value, int bitIndex) { - super(key, value); - - this.bitIndex = bitIndex; - - this.parent = null; - this.left = this; - this.right = null; - this.predecessor = this; - } - - /** - * Whether or not the entry is storing a key. - * Only the root can potentially be empty, all other - * nodes must have a key. - */ - public boolean isEmpty() { - return key == null; - } - - /** - * Neither the left nor right child is a loopback - */ - public boolean isInternalNode() { - return left != this && right != this; - } - - /** - * Either the left or right child is a loopback - */ - public boolean isExternalNode() { - return !isInternalNode(); - } - - @Override - public String toString() { - StringBuilder buffer = new StringBuilder(); - - if (bitIndex == -1) { - buffer.append("RootEntry("); - } else { - buffer.append("Entry("); - } - - buffer.append("key=").append(getKey()).append(" [").append(bitIndex).append("], "); - buffer.append("value=").append(getValue()).append(", "); - //buffer.append("bitIndex=").append(bitIndex).append(", "); - - if (parent != null) { - if (parent.bitIndex == -1) { - buffer.append("parent=").append("ROOT"); - } else { - buffer.append("parent=").append(parent.getKey()).append(" [").append(parent.bitIndex).append("]"); - } - } else { - buffer.append("parent=").append("null"); - } - buffer.append(", "); - - if (left != null) { - if (left.bitIndex == -1) { - buffer.append("left=").append("ROOT"); - } else { - buffer.append("left=").append(left.getKey()).append(" [").append(left.bitIndex).append("]"); - } - } else { - buffer.append("left=").append("null"); - } - buffer.append(", "); - - if (right != null) { - if (right.bitIndex == -1) { - buffer.append("right=").append("ROOT"); - } else { - buffer.append("right=").append(right.getKey()).append(" [").append(right.bitIndex).append("]"); - } - } else { - buffer.append("right=").append("null"); - } - buffer.append(", "); - - if (predecessor != null) { - if (predecessor.bitIndex == -1) { - buffer.append("predecessor=").append("ROOT"); - } else { - buffer.append("predecessor=").append(predecessor.getKey()).append(" [").append(predecessor.bitIndex).append("]"); - } - } - - buffer.append(")"); - return buffer.toString(); - } - } - - - /** - * This is a entry set view of the {@link PrefixTree} as returned - * by {@link Map#entrySet()} - */ - private class EntrySet extends AbstractSet> { - - @Override - public Iterator> iterator() { - return new EntryIterator(); - } - - @Override - public boolean contains(Object o) { - if (!(o instanceof Map.Entry)) { - return false; - } - - TrieEntry candidate = getEntry(((Map.Entry) o).getKey()); - return candidate != null && candidate.equals(o); - } - - @Override - public boolean remove(Object o) { - int size = size(); - AbstractPatriciaTrie.this.remove(o); - return size != size(); - } - - @Override - public int size() { - return AbstractPatriciaTrie.this.size(); - } - - @Override - public void clear() { - AbstractPatriciaTrie.this.clear(); - } - - /** - * An {@link Iterator} that returns {@link Entry} Objects - */ - private class EntryIterator extends TrieIterator> { - @Override - public Map.Entry next() { - return nextEntry(); - } - } - } - - /** - * This is a key set view of the {@link PrefixTree} as returned - * by {@link Map#keySet()} - */ - private class KeySet extends AbstractSet { - - @Override - public Iterator iterator() { - return new KeyIterator(); - } - - @Override - public int size() { - return AbstractPatriciaTrie.this.size(); - } - - @Override - public boolean contains(Object o) { - return containsKey(o); - } - - @Override - public boolean remove(Object o) { - int size = size(); - AbstractPatriciaTrie.this.remove(o); - return size != size(); - } - - @Override - public void clear() { - AbstractPatriciaTrie.this.clear(); - } - - /** - * An {@link Iterator} that returns Key Objects - */ - private class KeyIterator extends TrieIterator { - @Override - public K next() { - return nextEntry().getKey(); - } - } - } - - /** - * This is a value view of the {@link PrefixTree} as returned - * by {@link Map#values()} - */ - private class Values extends AbstractCollection { - - @Override - public Iterator iterator() { - return new ValueIterator(); - } - - @Override - public int size() { - return AbstractPatriciaTrie.this.size(); - } - - @Override - public boolean contains(Object o) { - return containsValue(o); - } - - @Override - public void clear() { - AbstractPatriciaTrie.this.clear(); - } - - @Override - public boolean remove(Object o) { - for (Iterator it = iterator(); it.hasNext(); ) { - V value = it.next(); - if ((value == null ? o == null : value.equals(o))) { - it.remove(); - return true; - } - } - return false; - } - - /** - * An {@link Iterator} that returns Value Objects - */ - private class ValueIterator extends TrieIterator { - @Override - public V next() { - return nextEntry().getValue(); - } - } - } - - /** - * An iterator for the entries. - */ - abstract class TrieIterator implements Iterator { - - /** - * For fast-fail - */ - protected int expectedModCount = AbstractPatriciaTrie.this.modCount; - - protected TrieEntry next; // the next node to return - protected TrieEntry current; // the current entry we're on - - /** - * Starts iteration from the root - */ - protected TrieIterator() { - next = AbstractPatriciaTrie.this.nextEntry(null); - } - - /** - * Starts iteration at the given entry - */ - protected TrieIterator(TrieEntry firstEntry) { - next = firstEntry; - } - - /** - * Returns the next {@link TrieEntry} - */ - protected TrieEntry nextEntry() { - if (expectedModCount != AbstractPatriciaTrie.this.modCount) { - throw new ConcurrentModificationException(); - } - - TrieEntry e = next; - if (e == null) { - throw new NoSuchElementException(); - } - - next = findNext(e); - current = e; - return e; - } - - /** - * @see PatriciaTrie#nextEntry(TrieEntry) - */ - protected TrieEntry findNext(TrieEntry prior) { - return AbstractPatriciaTrie.this.nextEntry(prior); - } - - @Override - public boolean hasNext() { - return next != null; - } - - @Override - public void remove() { - if (current == null) { - throw new IllegalStateException(); - } - - if (expectedModCount != AbstractPatriciaTrie.this.modCount) { - throw new ConcurrentModificationException(); - } - - TrieEntry node = current; - current = null; - AbstractPatriciaTrie.this.removeEntry(node); - - expectedModCount = AbstractPatriciaTrie.this.modCount; - } - } -} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/patricia/PatriciaTrie.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/patricia/PatriciaTrie.java index 785fbd5..ae0d3ce 100644 --- a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/patricia/PatriciaTrie.java +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/patricia/PatriciaTrie.java @@ -1,9 +1,12 @@ package org.xbib.datastructures.trie.patricia; +import java.util.AbstractCollection; import java.util.AbstractMap; import java.util.AbstractSet; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; +import java.util.ConcurrentModificationException; import java.util.Iterator; import java.util.Map; import java.util.NoSuchElementException; @@ -44,69 +47,1139 @@ import java.util.SortedMap; * @see PATRICIA * @see Crit-Bit Tree */ -public class PatriciaTrie extends AbstractPatriciaTrie { +public class PatriciaTrie extends AbstractTrie { public PatriciaTrie() { super(); } public PatriciaTrie(Map m) { - super(m); + super(); + putAll(m); + } + + public PatriciaTrie(KeyAnalyzer keyAnalyzer) { + super(keyAnalyzer); + } + + public PatriciaTrie(KeyAnalyzer keyAnalyzer, + Map m) { + super(keyAnalyzer); + putAll(m); + } + + @Override + public Comparator comparator() { + return keyAnalyzer; + } + + @Override + public SortedMap prefixMap(K prefix) { + int lengthInBits = lengthInBits(prefix); + if (lengthInBits == 0) { + return this; + } + + return new PrefixRangeMap(prefix); + } + + @Override + public K firstKey() { + return firstEntry().getKey(); + } + + @Override + public K lastKey() { + TrieEntry entry = lastEntry(); + if (entry != null) { + return entry.getKey(); + } + return null; + } + + @Override + public SortedMap headMap(K toKey) { + return new RangeEntryMap(null, toKey); + } + + @Override + public SortedMap subMap(K fromKey, K toKey) { + return new RangeEntryMap(fromKey, toKey); + } + + @Override + public SortedMap tailMap(K fromKey) { + return new RangeEntryMap(fromKey, null); + } + + /** + * The root node of the {@link PrefixTree}. + */ + final TrieEntry root = new TrieEntry<>(null, null, -1); + /** + * The number of times this {@link PrefixTree} has been modified. + * It's used to detect concurrent modifications and fail-fast + * the {@link Iterator}s. + */ + transient int modCount = 0; + /** + * Each of these fields are initialized to contain an instance of the + * appropriate view the first time this view is requested. The views are + * stateless, so there's no reason to create more than one of each. + */ + private transient volatile Set keySet; + + private transient volatile Collection values; + + private transient volatile Set> entrySet; + + /** + * The current size of the {@link PrefixTree} + */ + private int size = 0; + + /** + * Returns true if 'next' is a valid uplink coming from 'from'. + */ + static boolean isValidUplink(TrieEntry next, TrieEntry from) { + return next != null && next.bitIndex <= from.bitIndex && !next.isEmpty(); + } + + @Override + public void clear() { + root.key = null; + root.bitIndex = -1; + root.value = null; + root.parent = null; + root.left = root; + root.right = null; + root.predecessor = root; + size = 0; + incrementModCount(); + } + + @Override + public int size() { + return size; + } + + /** + * A helper method to increment the {@link PrefixTree} size + * and the modification counter. + */ + void incrementSize() { + size++; + incrementModCount(); + } + + /** + * A helper method to decrement the {@link PrefixTree} size + * and increment the modification counter. + */ + void decrementSize() { + size--; + incrementModCount(); + } + + /** + * A helper method to increment the modification counter. + */ + private void incrementModCount() { + ++modCount; } - public PatriciaTrie(KeyAnalyzer keyAnalyzer) { - super(keyAnalyzer); + @Override + public V put(K key, V value) { + if (key == null) { + throw new NullPointerException("Key cannot be null"); + } + int lengthInBits = lengthInBits(key); + // The only place to store a key with a length + // of zero bits is the root node + if (lengthInBits == 0) { + if (root.isEmpty()) { + incrementSize(); + } else { + incrementModCount(); + } + return root.setKeyValue(key, value); + } + TrieEntry found = getNearestEntryForKey(key); + if (compareKeys(key, found.key)) { + if (found.isEmpty()) { // <- must be the root + incrementSize(); + } else { + incrementModCount(); + } + return found.setKeyValue(key, value); + } + + int bitIndex = bitIndex(key, found.key); + if (bitIndex != KeyAnalyzer.OUT_OF_BOUNDS_BIT_KEY) { + if (0 <= bitIndex) { // in 99.999...9% the case + /* NEW KEY+VALUE TUPLE */ + TrieEntry t = new TrieEntry<>(key, value, bitIndex); + addEntry(t); + incrementSize(); + return null; + } else if (bitIndex == KeyAnalyzer.NULL_BIT_KEY) { + // A bits of the Key are zero. The only place to + // store such a Key is the root Node! + /* NULL BIT KEY */ + if (root.isEmpty()) { + incrementSize(); + } else { + incrementModCount(); + } + return root.setKeyValue(key, value); + } else if (bitIndex == KeyAnalyzer.EQUAL_BIT_KEY) { + // This is a very special and rare case. + /* REPLACE OLD KEY+VALUE */ + if (found != root) { + incrementModCount(); + return found.setKeyValue(key, value); + } + } + } + throw new IndexOutOfBoundsException("Failed to put: " + key + " -> " + value + ", " + bitIndex); + } + + /** + * Adds the given {@link TrieEntry} to the {@link PrefixTree} + */ + TrieEntry addEntry(TrieEntry entry) { + TrieEntry current = root.left; + TrieEntry path = root; + while (true) { + if (current.bitIndex >= entry.bitIndex + || current.bitIndex <= path.bitIndex) { + entry.predecessor = entry; + + if (!isBitSet(entry.key, entry.bitIndex)) { + entry.left = entry; + entry.right = current; + } else { + entry.left = current; + entry.right = entry; + } + + entry.parent = path; + if (current.bitIndex >= entry.bitIndex) { + current.parent = entry; + } + + // if we inserted an uplink, set the predecessor on it + if (current.bitIndex <= path.bitIndex) { + current.predecessor = entry; + } + + if (path == root || !isBitSet(entry.key, path.bitIndex)) { + path.left = entry; + } else { + path.right = entry; + } + + return entry; + } + + path = current; + + if (!isBitSet(entry.key, current.bitIndex)) { + current = current.left; + } else { + current = current.right; + } + } + } + + @Override + public V get(Object k) { + TrieEntry entry = getEntry(k); + return entry != null ? entry.getValue() : null; + } + + /** + * Returns the entry associated with the specified key in the + * AbstractPatriciaTrie. Returns null if the map contains no mapping + * for this key. + *

+ * This may throw ClassCastException if the object is not of type K. + */ + @SuppressWarnings("unchecked") + TrieEntry getEntry(Object k) { + K key = (K) k; + if (key == null) { + return null; + } + + TrieEntry entry = getNearestEntryForKey(key); + return !entry.isEmpty() && compareKeys(key, entry.key) ? entry : null; + } + + @Override + public Map.Entry select(K key) { + Reference> reference + = new Reference>(); + if (!selectR(root.left, -1, key, reference)) { + return reference.get(); + } + return null; + } + + @Override + public Map.Entry select(K key, Cursor cursor) { + Reference> reference + = new Reference>(); + selectR(root.left, -1, key, cursor, reference); + return reference.get(); + } + + private boolean selectR(TrieEntry h, int bitIndex, + final K key, final Reference> reference) { + if (h.bitIndex <= bitIndex) { + // If we hit the root Node and it is empty + // we have to look for an alternative best + // matching node. + if (!h.isEmpty()) { + reference.set(h); + return false; + } + return true; + } + + if (!isBitSet(key, h.bitIndex)) { + if (selectR(h.left, h.bitIndex, key, reference)) { + return selectR(h.right, h.bitIndex, key, reference); + } + } else { + if (selectR(h.right, h.bitIndex, key, reference)) { + return selectR(h.left, h.bitIndex, key, reference); + } + } + return false; + } + + /** + * + */ + private boolean selectR(TrieEntry h, int bitIndex, + final K key, final Cursor cursor, + final Reference> reference) { + + if (h.bitIndex <= bitIndex) { + if (!h.isEmpty()) { + Cursor.Decision decision = cursor.select(h); + switch (decision) { + case REMOVE: + throw new UnsupportedOperationException( + "Cannot remove during select"); + case EXIT: + reference.set(h); + return false; // exit + case REMOVE_AND_EXIT: + TrieEntry entry = new TrieEntry( + h.getKey(), h.getValue(), -1); + reference.set(entry); + removeEntry(h); + return false; + case CONTINUE: + // fall through. + } + } + return true; // continue + } + + if (!isBitSet(key, h.bitIndex)) { + if (selectR(h.left, h.bitIndex, key, cursor, reference)) { + return selectR(h.right, h.bitIndex, key, cursor, reference); + } + } else { + if (selectR(h.right, h.bitIndex, key, cursor, reference)) { + return selectR(h.left, h.bitIndex, key, cursor, reference); + } + } + + return false; + } + + @Override + public Map.Entry traverse(Cursor cursor) { + TrieEntry entry = nextEntry(null); + while (entry != null) { + TrieEntry current = entry; + Cursor.Decision decision = cursor.select(current); + entry = nextEntry(current); + switch (decision) { + case EXIT: + return current; + case REMOVE: + removeEntry(current); + break; // out of switch, stay in while loop + case REMOVE_AND_EXIT: + Map.Entry value = new TrieEntry( + current.getKey(), current.getValue(), -1); + removeEntry(current); + return value; + case CONTINUE: // do nothing. + } + } + + return null; + } + + @SuppressWarnings("unchecked") + @Override + public boolean containsKey(Object k) { + if (k == null) { + return false; + } + K key = (K) k; + TrieEntry entry = getNearestEntryForKey(key); + return !entry.isEmpty() && compareKeys(key, entry.key); + } + + @Override + public Set> entrySet() { + if (entrySet == null) { + entrySet = new EntrySet(); + } + return entrySet; + } + + @Override + public Set keySet() { + if (keySet == null) { + keySet = new KeySet(); + } + return keySet; + } + + @Override + public Collection values() { + if (values == null) { + values = new Values(); + } + return values; + } + + @SuppressWarnings("unchecked") + @Override + public V remove(Object k) { + if (k == null) { + return null; + } + K key = (K) k; + TrieEntry current = root.left; + TrieEntry path = root; + while (true) { + if (current.bitIndex <= path.bitIndex) { + if (!current.isEmpty() && compareKeys(key, current.key)) { + return removeEntry(current); + } else { + return null; + } + } + + path = current; + + if (!isBitSet(key, current.bitIndex)) { + current = current.left; + } else { + current = current.right; + } + } + } + + /** + * Returns the nearest entry for a given key. This is useful + * for finding knowing if a given key exists (and finding the value + * for it), or for inserting the key. + *

+ * The actual get implementation. This is very similar to + * selectR but with the exception that it might return the + * root Entry even if it's empty. + */ + TrieEntry getNearestEntryForKey(K key) { + TrieEntry current = root.left; + TrieEntry path = root; + while (true) { + if (current.bitIndex <= path.bitIndex) { + return current; + } + + path = current; + if (!isBitSet(key, current.bitIndex)) { + current = current.left; + } else { + current = current.right; + } + } + } + + /** + * Removes a single entry from the {@link PrefixTree}. + *

+ * If we found a Key (Entry h) then figure out if it's + * an internal (hard to remove) or external Entry (easy + * to remove) + */ + V removeEntry(TrieEntry h) { + if (h != root) { + if (h.isInternalNode()) { + removeInternalEntry(h); + } else { + removeExternalEntry(h); + } + } + + decrementSize(); + return h.setKeyValue(null, null); + } + + /** + * Removes an external entry from the {@link PrefixTree}. + *

+ * If it's an external Entry then just remove it. + * This is very easy and straight forward. + */ + private void removeExternalEntry(TrieEntry h) { + if (h == root) { + throw new IllegalArgumentException("Cannot delete root Entry!"); + } else if (!h.isExternalNode()) { + throw new IllegalArgumentException(h + " is not an external Entry!"); + } + + TrieEntry parent = h.parent; + TrieEntry child = (h.left == h) ? h.right : h.left; + + if (parent.left == h) { + parent.left = child; + } else { + parent.right = child; + } + + // either the parent is changing, or the predecessor is changing. + if (child.bitIndex > parent.bitIndex) { + child.parent = parent; + } else { + child.predecessor = parent; + } + + } + + /** + * Removes an internal entry from the {@link PrefixTree}. + *

+ * If it's an internal Entry then "good luck" with understanding + * this code. The Idea is essentially that Entry p takes Entry h's + * place in the trie which requires some re-wiring. + */ + private void removeInternalEntry(TrieEntry h) { + if (h == root) { + throw new IllegalArgumentException("Cannot delete root Entry!"); + } else if (!h.isInternalNode()) { + throw new IllegalArgumentException(h + " is not an internal Entry!"); + } + + TrieEntry p = h.predecessor; + + // Set P's bitIndex + p.bitIndex = h.bitIndex; + + // Fix P's parent, predecessor and child Nodes + { + TrieEntry parent = p.parent; + TrieEntry child = (p.left == h) ? p.right : p.left; + + // if it was looping to itself previously, + // it will now be pointed from it's parent + // (if we aren't removing it's parent -- + // in that case, it remains looping to itself). + // otherwise, it will continue to have the same + // predecessor. + if (p.predecessor == p && p.parent != h) { + p.predecessor = p.parent; + } + + if (parent.left == p) { + parent.left = child; + } else { + parent.right = child; + } + + if (child.bitIndex > parent.bitIndex) { + child.parent = parent; + } + } + + // Fix H's parent and child Nodes + { + // If H is a parent of its left and right child + // then change them to P + if (h.left.parent == h) { + h.left.parent = p; + } + + if (h.right.parent == h) { + h.right.parent = p; + } + + // Change H's parent + if (h.parent.left == h) { + h.parent.left = p; + } else { + h.parent.right = p; + } + } + + // Copy the remaining fields from H to P + //p.bitIndex = h.bitIndex; + p.parent = h.parent; + p.left = h.left; + p.right = h.right; + + // Make sure that if h was pointing to any uplinks, + // p now points to them. + if (isValidUplink(p.left, p)) { + p.left.predecessor = p; + } + + if (isValidUplink(p.right, p)) { + p.right.predecessor = p; + } + } + + /** + * Returns the entry lexicographically after the given entry. + * If the given entry is null, returns the first node. + */ + TrieEntry nextEntry(TrieEntry node) { + if (node == null) { + return firstEntry(); + } else { + return nextEntryImpl(node.predecessor, node, null); + } + } + + /** + * Scans for the next node, starting at the specified point, and using 'previous' + * as a hint that the last node we returned was 'previous' (so we know not to return + * it again). If 'tree' is non-null, this will limit the search to the given tree. + *

+ * The basic premise is that each iteration can follow the following steps: + *

+ * 1) Scan all the way to the left. + * a) If we already started from this node last time, proceed to Step 2. + * b) If a valid uplink is found, use it. + * c) If the result is an empty node (root not set), break the scan. + * d) If we already returned the left node, break the scan. + *

+ * 2) Check the right. + * a) If we already returned the right node, proceed to Step 3. + * b) If it is a valid uplink, use it. + * c) Do Step 1 from the right node. + *

+ * 3) Back up through the parents until we encounter find a parent + * that we're not the right child of. + *

+ * 4) If there's no right child of that parent, the iteration is finished. + * Otherwise continue to Step 5. + *

+ * 5) Check to see if the right child is a valid uplink. + * a) If we already returned that child, proceed to Step 6. + * Otherwise, use it. + *

+ * 6) If the right child of the parent is the parent itself, we've + * already found & returned the end of the Trie, so exit. + *

+ * 7) Do Step 1 on the parent's right child. + */ + TrieEntry nextEntryImpl(TrieEntry start, + TrieEntry previous, TrieEntry tree) { + + TrieEntry current = start; + + // Only look at the left if this was a recursive or + // the first check, otherwise we know we've already looked + // at the left. + if (previous == null || start != previous.predecessor) { + while (!current.left.isEmpty()) { + // stop traversing if we've already + // returned the left of this node. + if (previous == current.left) { + break; + } + + if (isValidUplink(current.left, current)) { + return current.left; + } + + current = current.left; + } + } + + // If there's no data at all, exit. + if (current.isEmpty()) { + return null; + } + + // If we've already returned the left, + // and the immediate right is null, + // there's only one entry in the Trie + // which is stored at the root. + // + // / ("") <-- root + // \_/ \ + // null <-- 'current' + // + if (current.right == null) { + return null; + } + + // If nothing valid on the left, try the right. + if (previous != current.right) { + // See if it immediately is valid. + if (isValidUplink(current.right, current)) { + return current.right; + } + + // Must search on the right's side if it wasn't initially valid. + return nextEntryImpl(current.right, previous, tree); + } + + // Neither left nor right are valid, find the first parent + // whose child did not come from the right & traverse it. + while (current == current.parent.right) { + // If we're going to traverse to above the subtree, stop. + if (current == tree) { + return null; + } + + current = current.parent; + } + + // If we're on the top of the subtree, we can't go any higher. + if (current == tree) { + return null; + } + + // If there's no right, the parent must be root, so we're done. + if (current.parent.right == null) { + return null; + } + + // If the parent's right points to itself, we've found one. + if (previous != current.parent.right + && isValidUplink(current.parent.right, current.parent)) { + return current.parent.right; + } + + // If the parent's right is itself, there can't be any more nodes. + if (current.parent.right == current.parent) { + return null; + } + + // We need to traverse down the parent's right's path. + return nextEntryImpl(current.parent.right, previous, tree); } - public PatriciaTrie(KeyAnalyzer keyAnalyzer, - Map m) { - super(keyAnalyzer, m); + /** + * Returns the first entry the {@link PrefixTree} is storing. + *

+ * This is implemented by going always to the left until + * we encounter a valid uplink. That uplink is the first key. + */ + TrieEntry firstEntry() { + // if Trie is empty, no first node. + if (isEmpty()) { + return null; + } + + return followLeft(root); } - @Override - public Comparator comparator() { - return keyAnalyzer; + /** + * Goes left through the tree until it finds a valid node. + */ + TrieEntry followLeft(TrieEntry node) { + while (true) { + TrieEntry child = node.left; + // if we hit root and it didn't have a node, go right instead. + if (child.isEmpty()) { + child = node.right; + } + + if (child.bitIndex <= node.bitIndex) { + return child; + } + + node = child; + } } - @Override - public SortedMap prefixMap(K prefix) { - int lengthInBits = lengthInBits(prefix); - if (lengthInBits == 0) { - return this; + /** + * A {@link Reference} allows us to return something through a Method's + * argument list. An alternative would be to an Array with a length of + * one (1) but that leads to compiler warnings. Computationally and memory + * wise there's no difference (except for the need to load the + * {@link Reference} Class but that happens only once). + */ + private static class Reference { + + private E item; + + public void set(E item) { + this.item = item; } - return new PrefixRangeMap(prefix); + public E get() { + return item; + } } - @Override - public K firstKey() { - return firstEntry().getKey(); - } + /** + * A {@link PrefixTree} is a set of {@link TrieEntry} nodes + */ + static class TrieEntry extends BasicEntry { - @Override - public K lastKey() { - TrieEntry entry = lastEntry(); - if (entry != null) { - return entry.getKey(); + private static final long serialVersionUID = 4596023148184140013L; + + /** + * The index this entry is comparing. + */ + protected int bitIndex; + + /** + * The parent of this entry. + */ + protected TrieEntry parent; + + /** + * The left child of this entry. + */ + protected TrieEntry left; + + /** + * The right child of this entry. + */ + protected TrieEntry right; + + /** + * The entry who uplinks to this entry. + */ + protected TrieEntry predecessor; + + public TrieEntry(K key, V value, int bitIndex) { + super(key, value); + + this.bitIndex = bitIndex; + + this.parent = null; + this.left = this; + this.right = null; + this.predecessor = this; + } + + /** + * Whether or not the entry is storing a key. + * Only the root can potentially be empty, all other + * nodes must have a key. + */ + public boolean isEmpty() { + return key == null; + } + + /** + * Neither the left nor right child is a loopback + */ + public boolean isInternalNode() { + return left != this && right != this; + } + + /** + * Either the left or right child is a loopback + */ + public boolean isExternalNode() { + return !isInternalNode(); + } + + @Override + public String toString() { + StringBuilder buffer = new StringBuilder(); + + if (bitIndex == -1) { + buffer.append("RootEntry("); + } else { + buffer.append("Entry("); + } + + buffer.append("key=").append(getKey()).append(" [").append(bitIndex).append("], "); + buffer.append("value=").append(getValue()).append(", "); + //buffer.append("bitIndex=").append(bitIndex).append(", "); + + if (parent != null) { + if (parent.bitIndex == -1) { + buffer.append("parent=").append("ROOT"); + } else { + buffer.append("parent=").append(parent.getKey()).append(" [").append(parent.bitIndex).append("]"); + } + } else { + buffer.append("parent=").append("null"); + } + buffer.append(", "); + + if (left != null) { + if (left.bitIndex == -1) { + buffer.append("left=").append("ROOT"); + } else { + buffer.append("left=").append(left.getKey()).append(" [").append(left.bitIndex).append("]"); + } + } else { + buffer.append("left=").append("null"); + } + buffer.append(", "); + + if (right != null) { + if (right.bitIndex == -1) { + buffer.append("right=").append("ROOT"); + } else { + buffer.append("right=").append(right.getKey()).append(" [").append(right.bitIndex).append("]"); + } + } else { + buffer.append("right=").append("null"); + } + buffer.append(", "); + + if (predecessor != null) { + if (predecessor.bitIndex == -1) { + buffer.append("predecessor=").append("ROOT"); + } else { + buffer.append("predecessor=").append(predecessor.getKey()).append(" [").append(predecessor.bitIndex).append("]"); + } + } + + buffer.append(")"); + return buffer.toString(); } - return null; } - @Override - public SortedMap headMap(K toKey) { - return new RangeEntryMap(null, toKey); + + /** + * This is a entry set view of the {@link PrefixTree} as returned + * by {@link Map#entrySet()} + */ + private class EntrySet extends AbstractSet> { + + @Override + public Iterator> iterator() { + return new EntryIterator(); + } + + @Override + public boolean contains(Object o) { + if (!(o instanceof Map.Entry)) { + return false; + } + + TrieEntry candidate = getEntry(((Map.Entry) o).getKey()); + return candidate != null && candidate.equals(o); + } + + @Override + public boolean remove(Object o) { + int size = size(); + PatriciaTrie.this.remove(o); + return size != size(); + } + + @Override + public int size() { + return PatriciaTrie.this.size(); + } + + @Override + public void clear() { + PatriciaTrie.this.clear(); + } + + /** + * An {@link Iterator} that returns {@link Entry} Objects + */ + private class EntryIterator extends TrieIterator> { + @Override + public Map.Entry next() { + return nextEntry(); + } + } } - @Override - public SortedMap subMap(K fromKey, K toKey) { - return new RangeEntryMap(fromKey, toKey); + /** + * This is a key set view of the {@link PrefixTree} as returned + * by {@link Map#keySet()} + */ + private class KeySet extends AbstractSet { + + @Override + public Iterator iterator() { + return new KeyIterator(); + } + + @Override + public int size() { + return PatriciaTrie.this.size(); + } + + @Override + public boolean contains(Object o) { + return containsKey(o); + } + + @Override + public boolean remove(Object o) { + int size = size(); + PatriciaTrie.this.remove(o); + return size != size(); + } + + @Override + public void clear() { + PatriciaTrie.this.clear(); + } + + /** + * An {@link Iterator} that returns Key Objects + */ + private class KeyIterator extends TrieIterator { + @Override + public K next() { + return nextEntry().getKey(); + } + } } - @Override - public SortedMap tailMap(K fromKey) { - return new RangeEntryMap(fromKey, null); + /** + * This is a value view of the {@link PrefixTree} as returned + * by {@link Map#values()} + */ + private class Values extends AbstractCollection { + + @Override + public Iterator iterator() { + return new ValueIterator(); + } + + @Override + public int size() { + return PatriciaTrie.this.size(); + } + + @Override + public boolean contains(Object o) { + return containsValue(o); + } + + @Override + public void clear() { + PatriciaTrie.this.clear(); + } + + @Override + public boolean remove(Object o) { + for (Iterator it = iterator(); it.hasNext(); ) { + V value = it.next(); + if ((value == null ? o == null : value.equals(o))) { + it.remove(); + return true; + } + } + return false; + } + + /** + * An {@link Iterator} that returns Value Objects + */ + private class ValueIterator extends TrieIterator { + @Override + public V next() { + return nextEntry().getValue(); + } + } } + /** + * An iterator for the entries. + */ + abstract class TrieIterator implements Iterator { + + /** + * For fast-fail + */ + protected int expectedModCount = PatriciaTrie.this.modCount; + + protected TrieEntry next; // the next node to return + protected TrieEntry current; // the current entry we're on + + /** + * Starts iteration from the root + */ + protected TrieIterator() { + next = PatriciaTrie.this.nextEntry(null); + } + + /** + * Starts iteration at the given entry + */ + protected TrieIterator(TrieEntry firstEntry) { + next = firstEntry; + } + + /** + * Returns the next {@link TrieEntry} + */ + protected TrieEntry nextEntry() { + if (expectedModCount != PatriciaTrie.this.modCount) { + throw new ConcurrentModificationException(); + } + + TrieEntry e = next; + if (e == null) { + throw new NoSuchElementException(); + } + + next = findNext(e); + current = e; + return e; + } + + /** + * @see PatriciaTrie#nextEntry(TrieEntry) + */ + protected TrieEntry findNext(TrieEntry prior) { + return PatriciaTrie.this.nextEntry(prior); + } + + @Override + public boolean hasNext() { + return next != null; + } + + @Override + public void remove() { + if (current == null) { + throw new IllegalStateException(); + } + + if (expectedModCount != PatriciaTrie.this.modCount) { + throw new ConcurrentModificationException(); + } + + TrieEntry node = current; + current = null; + PatriciaTrie.this.removeEntry(node); + + expectedModCount = PatriciaTrie.this.modCount; + } + } /** * Returns an entry strictly higher than the given key, * or null if no such entry exists. @@ -464,7 +1537,7 @@ public class PatriciaTrie extends AbstractPatriciaTrie { /** * A range view of the {@link PrefixTree} */ - private abstract class RangeMap extends AbstractMap + abstract class RangeMap extends AbstractMap implements SortedMap { /** @@ -504,6 +1577,7 @@ public class PatriciaTrie extends AbstractPatriciaTrie { return PatriciaTrie.this.comparator(); } + @SuppressWarnings("unchecked") @Override public boolean containsKey(Object key) { if (!inRange((K) key)) { @@ -513,6 +1587,7 @@ public class PatriciaTrie extends AbstractPatriciaTrie { return PatriciaTrie.this.containsKey(key); } + @SuppressWarnings("unchecked") @Override public V remove(Object key) { if (!inRange((K) key)) { @@ -521,6 +1596,7 @@ public class PatriciaTrie extends AbstractPatriciaTrie { return PatriciaTrie.this.remove(key); } + @SuppressWarnings("unchecked") @Override public V get(Object key) { if (!inRange((K) key)) { @@ -669,16 +1745,13 @@ public class PatriciaTrie extends AbstractPatriciaTrie { */ protected RangeEntryMap(K fromKey, boolean fromInclusive, K toKey, boolean toInclusive) { - if (fromKey == null && toKey == null) { throw new IllegalArgumentException("must have a from or to!"); } - if (fromKey != null && toKey != null && keyAnalyzer.compare(fromKey, toKey) > 0) { throw new IllegalArgumentException("fromKey > toKey"); } - this.fromKey = fromKey; this.fromInclusive = fromInclusive; this.toKey = toKey; @@ -688,7 +1761,7 @@ public class PatriciaTrie extends AbstractPatriciaTrie { @Override public K firstKey() { - Map.Entry e = null; + Map.Entry e; if (fromKey == null) { e = firstEntry(); } else { @@ -698,7 +1771,6 @@ public class PatriciaTrie extends AbstractPatriciaTrie { e = higherEntry(fromKey); } } - K first = e != null ? e.getKey() : null; if (e == null || toKey != null && !inToRange(first, false)) { throw new NoSuchElementException(); @@ -719,7 +1791,6 @@ public class PatriciaTrie extends AbstractPatriciaTrie { e = lowerEntry(toKey); } } - K last = e != null ? e.getKey() : null; if (e == null || fromKey != null && !inFromRange(last, false)) { throw new NoSuchElementException(); @@ -944,48 +2015,40 @@ public class PatriciaTrie extends AbstractPatriciaTrie { entry = nextEntry((TrieEntry) entry); toKey = entry == null ? null : entry.getKey(); } - expectedModCount = PatriciaTrie.this.modCount; } - return size; } @Override public K firstKey() { fixup(); - - Map.Entry e = null; + Map.Entry e; if (fromKey == null) { e = firstEntry(); } else { e = higherEntry(fromKey); } - K first = e != null ? e.getKey() : null; if (e == null || !isPrefix(first, prefix)) { throw new NoSuchElementException(); } - return first; } @Override public K lastKey() { fixup(); - - Map.Entry e = null; + Map.Entry e; if (toKey == null) { e = lastEntry(); } else { e = lowerEntry(toKey); } - K last = e != null ? e.getKey() : null; if (e == null || !isPrefix(last, prefix)) { throw new NoSuchElementException(); } - return last; } @@ -1050,8 +2113,7 @@ public class PatriciaTrie extends AbstractPatriciaTrie { } @Override - protected SortedMap createRangeMap( - K fromKey, boolean fromInclusive, + protected SortedMap createRangeMap(K fromKey, boolean fromInclusive, K toKey, boolean toInclusive) { return new RangeEntryMap(fromKey, fromInclusive, toKey, toInclusive); } diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/patricia/package-info.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/patricia/package-info.java new file mode 100644 index 0000000..a2bd997 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/patricia/package-info.java @@ -0,0 +1,5 @@ +/** + * Taken from + * https://github.com/rkapsi/patricia-trie + */ +package org.xbib.datastructures.trie.patricia; \ No newline at end of file diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/DuplicateKeyException.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/DuplicateKeyException.java index d57f3a3..8e09e74 100644 --- a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/DuplicateKeyException.java +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/DuplicateKeyException.java @@ -3,8 +3,10 @@ package org.xbib.datastructures.trie.radix; /** * Exception thrown if a duplicate key is inserted in a {@link RadixTree} */ +@SuppressWarnings("serial") public class DuplicateKeyException extends RuntimeException { + public DuplicateKeyException(String msg) { super(msg); } -} \ No newline at end of file +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/Node.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/Node.java index 1875a62..3b2dfc8 100644 --- a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/Node.java +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/Node.java @@ -8,7 +8,7 @@ import java.util.List; * * @param */ -class Node { +public class Node { private String key; diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/AdaptiveRadixTree.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/AdaptiveRadixTree.java new file mode 100644 index 0000000..8b0c956 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/AdaptiveRadixTree.java @@ -0,0 +1,1129 @@ +package org.xbib.datastructures.trie.radix.adaptive; + +import java.util.AbstractMap; +import java.util.Arrays; +import java.util.Collection; +import java.util.Comparator; +import java.util.ConcurrentModificationException; +import java.util.Iterator; +import java.util.Map; +import java.util.NavigableMap; +import java.util.NavigableSet; +import java.util.NoSuchElementException; +import java.util.Objects; +import java.util.Set; +import java.util.SortedMap; + +/** + * An Adaptive Radix tree based {@link NavigableMap} implementation. + * The map is sorted according to the {@linkplain BinaryComparable} provided at map + * creation time. + * + *

This implementation provides log(k) time cost for the + * {@code containsKey}, {@code get}, {@code put} and {@code remove} + * operations where k is the length of the key. + * Algorithms are adaptations of those as described in the + * paper + * "The Adaptive Radix Tree: ARTful Indexing for Main-Memory Databases" + * by Dr. Viktor Leis. + * + *

Note that this implementation is not synchronized. + * If multiple threads access a map concurrently, and at least one of the + * threads modifies the map structurally, it must be synchronized + * externally. (A structural modification is any operation that adds or + * deletes one or more mappings; merely changing the value associated + * with an existing key is not a structural modification.) + * + *

The iterators returned by the {@code iterator} method of the collections + * returned by all of this class's "collection view methods" are + * fail-fast: if the map is structurally modified at any time after + * the iterator is created, in any way except through the iterator's own + * {@code remove} method, the iterator will throw a {@link + * ConcurrentModificationException}. Thus, in the face of concurrent + * modification, the iterator fails quickly and cleanly, rather than risking + * arbitrary, non-deterministic behavior at an undetermined time in the future. + * + *

Note that the fail-fast behavior of an iterator cannot be guaranteed + * as it is, generally speaking, impossible to make any hard guarantees in the + * presence of unsynchronized concurrent modification. Fail-fast iterators + * throw {@code ConcurrentModificationException} on a best-effort basis. + * Therefore, it would be wrong to write a program that depended on this + * exception for its correctness: the fail-fast behavior of iterators + * should be used only to detect bugs. + * + *

Note that null keys are not permitted. + * + *

All {@code Map.Entry} pairs returned by methods in this class + * and its views represent snapshots of mappings at the time they were + * produced. They do not support the {@code Entry.setValue} + * method. (Note however that it is possible to change mappings in the + * associated map using {@code put}.) + * + * @param the type of keys maintained by this map + * @param the type of mapped values + * @author Rohan Suri + * @see NavigableMap + * @see BinaryComparable + */ +public class AdaptiveRadixTree extends AbstractMap implements NavigableMap { + // 2^7 = 128 + private static final int BYTE_SHIFT = 1 << Byte.SIZE - 1; + private final BinaryComparable binaryComparable; + private transient EntrySet entrySet; + private transient NavigableMap descendingMap; + private transient KeySet navigableKeySet; + private transient Collection values; + private transient int size = 0; + /** + * The number of structural modifications to the tree. + * To be touched where ever size changes. + */ + private transient int modCount = 0; + + // TODO: offer a bulk create constructor + private Node root; + + public AdaptiveRadixTree(BinaryComparable binaryComparable) { + Objects.requireNonNull(binaryComparable, "Specifying a BinaryComparable is necessary"); + this.binaryComparable = binaryComparable; + } + + /* + updates given node's only child's compressed path to: + given node's compressed path + partialKey to child + child's own compressed path) + */ + static void updateCompressedPathOfOnlyChild(Node4 toCompress, Node onlyChild) { + assert onlyChild != null; + if (!(onlyChild instanceof LeafNode)) { + byte partialKeyToOnlyChild = toCompress.getOnlyChildKey();// toCompress.getKeys()[0]; // R + InnerNode oc = (InnerNode) onlyChild; + // update nextNode's compressed path with toCompress' + int toCopy = Math.min(InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT, toCompress.prefixLen + 1); + int leftForMe = InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT - toCopy; + int iHave = Math.min(InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT, oc.prefixLen); + + // make space + System.arraycopy(oc.prefixKeys, 0, oc.prefixKeys, toCopy, Math.min(leftForMe, iHave)); + + int toCopyFromToCompress = Math.min(InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT, toCompress.prefixLen); + System.arraycopy(toCompress.prefixKeys, 0, oc.prefixKeys, 0, toCopyFromToCompress); + if (toCopyFromToCompress < InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT) { + // we got space left for the partialKey to only child + oc.prefixKeys[toCopyFromToCompress] = partialKeyToOnlyChild; + } + oc.prefixLen += toCompress.prefixLen + 1; + } + } + + // is compressed path equal/more/lesser (0, 1, -1) than key + static int comparePessimisticCompressedPath(InnerNode node, byte[] key, int depth) { + byte[] prefix = node.prefixKeys; + int upperLimitForPessimisticMatch = Math.min(InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT, node.prefixLen); + // limit key because if key length greater than compressed path + // and all byte comparisons are same, then also we consider + // compressed path == key length + return compare(prefix, 0, upperLimitForPessimisticMatch, key, depth, Math + .min(depth + upperLimitForPessimisticMatch, key.length)); + } + + private static int compareOptimisticCompressedPath(InnerNode node, byte[] key, int depth) { + int result = comparePessimisticCompressedPath(node, key, depth); + if (result != 0 || node.prefixLen <= InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT) { + return result; + } + // expand optimistic path and compare + byte[] leafBytes = getFirstEntry(node).getKeyBytes(); + // limit key because if key length greater than compressed path + // and all byte comparisons are same, then also we consider + // compressed path == key length + return compare(leafBytes, depth + InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT, depth + node.prefixLen, + key, depth + InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT, Math + .min(depth + node.prefixLen, key.length)); + } + + /* + we reached a lazy expanded leaf node, we gotta expand it now. + but how much should we expand? + since we reached depth X, it means till now both leaf node and new node have same bytes. + now what has been stored lazily is leaf node's key(depth, end). + that's the part over which we need to compute longest common prefix. + that's the part we can path compress. + */ + private static Node lazyExpansion(LeafNode leaf, byte[] keyBytes, K key, V value, int depth) { + + // find LCP + int lcp = 0; + byte[] leafKey = leaf.getKeyBytes(); // loadKey in paper + int end = Math.min(leafKey.length, keyBytes.length); + for (; depth < end && leafKey[depth] == keyBytes[depth]; depth++, lcp++) ; + if (depth == keyBytes.length && depth == leafKey.length) { + // we're referring to a key that already exists, replace value and return current + return leaf; + } + + // create new node with LCP + Node4 pathCompressedNode = new Node4(); + pathCompressedNode.prefixLen = lcp; + int pessimisticLcp = Math.min(lcp, InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT); + System.arraycopy(keyBytes, depth - lcp, pathCompressedNode.prefixKeys, 0, pessimisticLcp); + + // add new key and old leaf as children + LeafNode newLeaf = new LeafNode<>(keyBytes, key, value); + if (depth == keyBytes.length) { + // barca to be inserted, barcalona already exists + // set barca's parent to be this path compressed node + // setup uplink whenever we set downlink + pathCompressedNode.setLeaf(newLeaf); + pathCompressedNode.addChild(leafKey[depth], leaf); // l + } else if (depth == leafKey.length) { + // barcalona to be inserted, barca already exists + pathCompressedNode.setLeaf(leaf); + pathCompressedNode.addChild(keyBytes[depth], newLeaf); // l + } else { + pathCompressedNode.addChild(leafKey[depth], leaf); + pathCompressedNode.addChild(keyBytes[depth], newLeaf); + } + + return pathCompressedNode; + } + + static void removeOptimisticLCPFromCompressedPath(InnerNode node, int depth, int lcp, byte[] leafBytes) { + // lcp cannot be equal to node.prefixLen + // it has to be less, else it'd mean the compressed path matches completely + assert lcp < node.prefixLen && lcp >= InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT : lcp; + + // since there's more compressed path left + // we need to "bring up" more of it what we can take + node.prefixLen = node.prefixLen - lcp - 1; + int end = Math.min(InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT, node.prefixLen); + System.arraycopy(leafBytes, depth + 1, node.prefixKeys, 0, end); + } + + static void removePessimisticLCPFromCompressedPath(InnerNode node, int depth, int lcp) { + // lcp cannot be equal to Math.min(InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT, node.prefixLen) + // it has to be less, else it'd mean the compressed path matches completely + assert lcp < Math.min(InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT, node.prefixLen); + if (node.prefixLen <= InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT) { + node.prefixLen = node.prefixLen - lcp - 1; + System.arraycopy(node.prefixKeys, lcp + 1, node.prefixKeys, 0, node.prefixLen); + } else { + // since there's more compressed path left + // we need to "bring up" more of it what we can take + node.prefixLen = node.prefixLen - lcp - 1; + byte[] leafBytes = getFirstEntry(node).getKeyBytes(); + int end = Math.min(InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT, node.prefixLen); + System.arraycopy(leafBytes, depth + 1, node.prefixKeys, 0, end); + } + } + + // called when lcp has become more than InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT + static InnerNode branchOutOptimistic(InnerNode node, byte[] keyBytes, K key, V value, int lcp, int depth, + byte[] leafBytes) { + // prefix doesn't match entirely, we have to branch + //assert lcp < node.prefixLen && lcp >= InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT : lcp + ", " + node.prefixLen; + int initialDepth = depth - lcp; + LeafNode leafNode = new LeafNode<>(keyBytes, key, value); + + // new node with updated prefix len, compressed path + Node4 branchOut = new Node4(); + branchOut.prefixLen = lcp; + // note: depth is the updated depth (initialDepth = depth - lcp) + System.arraycopy(keyBytes, initialDepth, branchOut.prefixKeys, 0, InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT); + if (depth == keyBytes.length) { + branchOut.setLeaf(leafNode); + } else { + branchOut.addChild(keyBytes[depth], leafNode); + } + branchOut.addChild(leafBytes[depth], node); // reusing "this" node + + // remove lcp common prefix key from "this" node + removeOptimisticLCPFromCompressedPath(node, depth, lcp, leafBytes); + return branchOut; + } + + static InnerNode branchOutPessimistic(InnerNode node, byte[] keyBytes, K key, V value, int lcp, int depth) { + // pessimistic prefix doesn't match entirely, we have to branch + // BAR, BAZ inserted, now inserting BOZ + //assert lcp < node.prefixLen && lcp < InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT; + + int initialDepth = depth - lcp; + + // create new lazy leaf node for unmatched key? + LeafNode leafNode = new LeafNode<>(keyBytes, key, value); + + // new node with updated prefix len, compressed path + Node4 branchOut = new Node4(); + branchOut.prefixLen = lcp; + // note: depth is the updated depth (initialDepth = depth - lcp) + System.arraycopy(keyBytes, initialDepth, branchOut.prefixKeys, 0, lcp); + if (depth == keyBytes.length) { // key ended it means it is a prefix + branchOut.setLeaf(leafNode); + } else { + branchOut.addChild(keyBytes[depth], leafNode); + } + branchOut.addChild(node.prefixKeys[lcp], node); // reusing "this" node + + // remove lcp common prefix key from "this" node + removePessimisticLCPFromCompressedPath(node, depth, lcp); + return branchOut; + } + + @SuppressWarnings("unchecked") + private static LeafNode getFirstEntry(Node startFrom) { + Node node = startFrom; + Node next = node.firstOrLeaf(); + while (next != null) { + node = next; + next = node.firstOrLeaf(); + } + return (LeafNode) node; + } + + @SuppressWarnings("unchecked") + private static LeafNode getLastEntry(Node startFrom) { + Node node = startFrom; + Node next = node.last(); + while (next != null) { + node = next; + next = node.last(); + } + return (LeafNode) node; + } + + // 0 if a == b + // -1 if a < b + // 1 if a > b + // note: aFrom, bFrom are exclusive bounds + static int compare(byte[] a, int aFrom, int aTo, byte[] b, int bFrom, int bTo) { + int i = aFrom, j = bFrom; + for (; i < aTo && j < bTo && a[i] == b[j]; i++, j++) ; + if (i == aTo && j == bTo) { + return 0; + } else if (i == aTo) { + return -1; + } else if (j == bTo) { + return 1; + } else { + return unsigned(a[i]) < unsigned(b[j]) ? -1 : 1; + } + } + + /** + * For Node4, Node16 to interpret every byte as unsigned when storing partial keys. + * Node 48, Node256 simply use {@link Byte#toUnsignedInt(byte)} + * to index into their key arrays. + */ + static byte unsigned(byte b) { + return (byte) (b ^ BYTE_SHIFT); + } + + /** + * Return key for entry, or null if null + * Note: taken from TreeMap + */ + static K keyOrNull(Entry e) { + return (e == null) ? null : e.getKey(); + } + + /** + * Return SimpleImmutableEntry for entry, or null if null + * Note: taken from TreeMap + */ + static Map.Entry exportEntry(Entry e) { + return (e == null) ? null : + new AbstractMap.SimpleImmutableEntry<>(e); + } + + /** + * Returns the key corresponding to the specified Entry. + * + * @throws NoSuchElementException if the Entry is null + * Note: taken from TreeMap + */ + static K key(Entry e) { + if (e == null) + throw new NoSuchElementException(); + return e.getKey(); + } + + static LeafNode successor(Node node) { + InnerNode uplink; + while ((uplink = node.parent()) != null) { + if (uplink.getLeaf() == node) { + // we surely have a first node + return getFirstEntry(uplink.first()); + } + Node greater = uplink.greater(node.uplinkKey()); + if (greater != null) { + return getFirstEntry(greater); + } + node = uplink; + } + return null; + } + + @SuppressWarnings("unchecked") + static LeafNode predecessor(Node node) { + InnerNode uplink; + while ((uplink = node.parent()) != null) { + if (uplink.getLeaf() == node) { // least node, go up + node = uplink; + continue; + } + Node lesser = uplink.lesser(node.uplinkKey()); + if (lesser != null) { + return getLastEntry(lesser); + } else if (uplink.hasLeaf()) { + return (LeafNode) uplink.getLeaf(); + } + node = uplink; + } + return null; + } + + /** + * Test two values for equality. Differs from o1.equals(o2) only in + * that it copes with {@code null} o1 properly. + * Note: Taken from TreeMap + */ + static boolean valEquals(Object o1, Object o2) { + return (o1 == null ? o2 == null : o1.equals(o2)); + } + + int getModCount() { + return modCount; + } + + public V put(K key, V value) { + if (key == null) { + throw new NullPointerException(); + } + byte[] bytes = binaryComparable.get(key); + if (root == null) { + // create leaf node and set root to that + root = new LeafNode<>(bytes, key, value); + size = 1; + modCount++; + return null; + } + return put(bytes, key, value); + } + + // note: taken from TreeMap + @Override + public boolean containsKey(Object key) { + return getEntry(key) != null; + } + + // note: taken from TreeMap + // why doesn't TreeMap use AbstractMap's provided impl? + // the only difference is default impl requires an iterator to be created, + // but it ultimately uses the successor calls to iterate. + @Override + public boolean containsValue(Object value) { + for (LeafNode e = getFirstEntry(); e != null; e = successor(e)) { + if (valEquals(value, e.getValue())) { + return true; + } + } + return false; + } + + // Note: taken from TreeMap + public Map.Entry pollFirstEntry() { + LeafNode p = getFirstEntry(); + Map.Entry result = exportEntry(p); + if (p != null) { + deleteEntry(p); + } + return result; + } + + // Note: taken from TreeMap + public Map.Entry pollLastEntry() { + LeafNode p = getLastEntry(); + Map.Entry result = exportEntry(p); + if (p != null) { + deleteEntry(p); + } + return result; + } + + @Override + public void clear() { + size = 0; + root = null; + modCount++; + } + + @Override + public Set> entrySet() { + EntrySet es = entrySet; + return (es != null) ? es : (entrySet = new EntrySet<>(this)); + } + + @Override + public Collection values() { + Collection c = values; + return (c != null) ? c : (values = new Values<>(this)); + } + + @Override + public V get(Object key) { + LeafNode entry = getEntry(key); + return (entry == null ? null : entry.getValue()); + } + + /** + * Returns this map's entry for the given key, or {@code null} if the map + * does not contain an entry for the key. + * + * @return this map's entry for the given key, or {@code null} if the map + * does not contain an entry for the key + * @throws ClassCastException if the specified key cannot be compared + * with the keys currently in the map + * @throws NullPointerException if the specified key is null + */ + LeafNode getEntry(Object key) { + if (key == null) + throw new NullPointerException(); + if (root == null) { // empty tree + return null; + } + @SuppressWarnings("unchecked") + K k = (K) key; + byte[] bytes = binaryComparable.get(k); + return getEntry(root, bytes); + } + + @Override + public V remove(Object key) { + LeafNode p = getEntry(key); + if (p == null) + return null; + V oldValue = p.getValue(); + deleteEntry(p); + return oldValue; + } + + /* + given node only has one child and has a parent. + we eliminate this node and pull up it's only child, + linking it with the parent. + + transform: parent --> partial key to this node --> partialKey to only child + to: parent --> same partial key to this node, but now directly to only child + + also update child's compressed path updated to: + this node's compressed path + partialKey to child + child's own compressed path) + */ + private void pathCompressOnlyChild(Node4 toCompress) { + Node onlyChild = toCompress.getChild()[0]; + updateCompressedPathOfOnlyChild(toCompress, onlyChild); + replace(toCompress.uplinkKey(), toCompress.parent(), onlyChild); + } + + @SuppressWarnings("unchecked") + private LeafNode getEntry(Node node, byte[] key) { + int depth = 0; + boolean skippedPrefix = false; + while (true) { + if (node instanceof LeafNode) { + LeafNode leaf = (LeafNode) node; + byte[] leafBytes = leaf.getKeyBytes(); + int startFrom = skippedPrefix ? 0 : depth; + if (Arrays.equals(leafBytes, startFrom, leafBytes.length, key, startFrom, key.length)) { + return leaf; + } + return null; + } + + InnerNode innerNode = (InnerNode) node; + + if (key.length < depth + innerNode.prefixLen) { + return null; + } + + if (innerNode.prefixLen <= InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT) { + // match pessimistic compressed path completely + for (int i = 0; i < innerNode.prefixLen; i++) { + if (innerNode.prefixKeys[i] != key[depth + i]) + return null; + } + } else { + // else take optimistic jump + skippedPrefix = true; + } + + // took pessimistic match or optimistic jump, continue search + depth = depth + innerNode.prefixLen; + Node nextNode; + if (depth == key.length) { + nextNode = innerNode.getLeaf(); + if (!skippedPrefix) { + return (LeafNode) nextNode; + } + } else { + nextNode = innerNode.findChild(key[depth]); + depth++; + } + if (nextNode == null) { + return null; + } + // set fields for next iteration + node = nextNode; + } + } + + void replace(int depth, byte[] key, InnerNode prevDepth, Node replaceWith) { + if (prevDepth == null) { + assert depth == 0; + root = replaceWith; + Node.replaceUplink(null, root); + } else { + assert depth > 0; + prevDepth.replace(key[depth - 1], replaceWith); + } + } + + // replace down link + private void replace(byte partialKey, InnerNode prevDepth, Node replaceWith) { + if (prevDepth == null) { + root = replaceWith; + Node.replaceUplink(null, root); + } else { + prevDepth.replace(partialKey, replaceWith); + } + } + + @SuppressWarnings("unchecked") + private V put(byte[] keyBytes, K key, V value) { + int depth = 0; + InnerNode prevDepth = null; + Node node = root; + while (true) { + if (node instanceof LeafNode) { + @SuppressWarnings("unchecked") + LeafNode leaf = (LeafNode) node; + Node pathCompressedNode = lazyExpansion(leaf, keyBytes, key, value, depth); + if (pathCompressedNode == node) { + // key already exists + V oldValue = leaf.getValue(); + leaf.setValue(value); + return oldValue; + } + // we gotta replace the prevDepth's child pointer to this new node + replace(depth, keyBytes, prevDepth, pathCompressedNode); + size++; + modCount++; + return null; + } + // compare with compressed path + InnerNode innerNode = (InnerNode) node; + int newDepth = matchCompressedPath(innerNode, keyBytes, key, value, depth, prevDepth); + if (newDepth == -1) { // matchCompressedPath already inserted the leaf node for us + size++; + modCount++; + return null; + } + + if (keyBytes.length == newDepth) { + LeafNode leaf = (LeafNode) innerNode.getLeaf(); + V oldValue = leaf.getValue(); + leaf.setValue(value); + return oldValue; + } + + // we're now at line 26 in paper + byte partialKey = keyBytes[newDepth]; + Node child = innerNode.findChild(partialKey); + if (child != null) { + // set fields for next iteration + prevDepth = innerNode; + depth = newDepth + 1; + node = child; + continue; + } + + // add this key as child + Node leaf = new LeafNode<>(keyBytes, key, value); + if (innerNode.isFull()) { + innerNode = innerNode.grow(); + replace(depth, keyBytes, prevDepth, innerNode); + } + innerNode.addChild(partialKey, leaf); + size++; + modCount++; + return null; + } + } + + /* + 1) pessimistic path matched entirely + + case 1: key has nothing left (can't happen, else they'd be prefixes and our key transformations + must ensure that it is not possible) + case 2: prefixLen <= InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT + we're done here, we can do a findChild for next partial key (caller's depth + lcp + 1) + case 3: prefixLen is more i.e. an optimistic path is left to match. + traverse down and get leaf to match remaining optimistic prefix path. + case 3a: optimistic path matches, we can do findChild for next partial key + case 3b: have to split + + 2) pessimistic path did not match, we have to split + */ + private int matchCompressedPath(InnerNode node, byte[] keyBytes, K key, V value, int depth, InnerNode prevDepth) { + int lcp = 0; + int end = Math.min(keyBytes.length - depth, Math.min(node.prefixLen, InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT)); + // match pessimistic compressed path + while (lcp < end && keyBytes[depth] == node.prefixKeys[lcp]) { + lcp++; + depth++; + } + if (lcp == node.prefixLen) { + if (depth == keyBytes.length && !node.hasLeaf()) { // key ended, it means it is a prefix + LeafNode leafNode = new LeafNode<>(keyBytes, key, value); + node.setLeaf(leafNode); + return -1; + } else { + return depth; + } + } + + InnerNode newNode; + if (lcp == InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT) { + // match remaining optimistic path + byte[] leafBytes = getFirstEntry(node).getKeyBytes(); + int leftToMatch = node.prefixLen - InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT; + end = Math.min(keyBytes.length, depth + leftToMatch); + /* + match remaining optimistic path + if we match entirely we return with new depth and caller can proceed with findChild (depth + lcp + 1) + if we don't match entirely, then we split + */ + while (depth < end && keyBytes[depth] == leafBytes[depth]) { + depth++; + lcp++; + } + if (lcp == node.prefixLen) { + if (depth == keyBytes.length && !node.hasLeaf()) { // key ended, it means it is a prefix + LeafNode leafNode = new LeafNode<>(keyBytes, key, value); + node.setLeaf(leafNode); + return -1; + } else { + // matched entirely, but key is left + return depth; + } + } else { + newNode = branchOutOptimistic(node, keyBytes, key, value, lcp, depth, leafBytes); + } + } else { + newNode = branchOutPessimistic(node, keyBytes, key, value, lcp, depth); + } + // replace "this" node with newNode + // initialDepth can be zero even if prefixLen is not zero. + // the root node could have a prefix too, for example after insertions of + // BAR, BAZ? prefix would be BA kept in the root node itself + replace(depth - lcp, keyBytes, prevDepth, newNode); + return -1; // we've already inserted the leaf node, caller needs to do nothing more + } + + /* + Returns null if the ART is empty + */ + @SuppressWarnings("unchecked") + LeafNode getFirstEntry() { + if (isEmpty()) { + return null; + } + return getFirstEntry(root); + } + + /* + Returns null if the ART is empty + */ + @SuppressWarnings("unchecked") + LeafNode getLastEntry() { + if (isEmpty()) { + return null; + } + return getLastEntry(root); + } + + @Override + public Entry lowerEntry(K key) { + return exportEntry(getLowerEntry(key)); + } + + @Override + public K lowerKey(K key) { + return keyOrNull(getLowerEntry(key)); + } + + @Override + public Entry floorEntry(K key) { + return exportEntry(getFloorEntry(key)); + } + + @Override + public K floorKey(K key) { + return keyOrNull(getFloorEntry(key)); + } + + LeafNode getLowerEntry(K k) { + return getLowerOrFloorEntry(true, k); + } + + LeafNode getLowerEntry(byte[] k) { + if (isEmpty()) { + return null; + } + return getLowerOrFloorEntry(true, k); + } + + LeafNode getFloorEntry(K k) { + return getLowerOrFloorEntry(false, k); + } + + LeafNode getFloorEntry(byte[] k) { + if (isEmpty()) { + return null; + } + return getLowerOrFloorEntry(false, k); + } + + // note: caller needs to check if map is empty + @SuppressWarnings("unchecked") + private LeafNode getLowerOrFloorEntry(boolean lower, byte[] key) { + int depth = 0; + Node node = root; + while (true) { + if (node instanceof LeafNode) { + // binary comparable comparison + @SuppressWarnings("unchecked") + LeafNode leafNode = (LeafNode) node; + byte[] leafKey = leafNode.getKeyBytes(); + if (compare(key, depth, key.length, leafKey, depth, leafKey.length) >= (lower ? 1 : 0)) { + return leafNode; + } + return predecessor(leafNode); + } + InnerNode innerNode = (InnerNode) node; + // compare compressed path + int compare = compareOptimisticCompressedPath((InnerNode) node, key, depth); + if (compare < 0) { // lesser + return getLastEntry(node); + } else if (compare > 0) { // greater, that means all children of this node will be greater than key + return predecessor(node); + } + // compressed path matches completely + depth += innerNode.prefixLen; + if (depth == key.length) { + if (!lower && innerNode.hasLeaf()) { + return (LeafNode) innerNode.getLeaf(); + } + return predecessor(innerNode); + } + Node child = innerNode.floor(key[depth]); + if (child == null) { + return leafOrPredecessor(innerNode); + } else if (child.uplinkKey() != key[depth]) { + return getLastEntry(child); + } + depth++; + node = child; + } + } + + private LeafNode getLowerOrFloorEntry(boolean lower, K k) { + if (isEmpty()) { + return null; + } + byte[] key = binaryComparable.get(k); + return getLowerOrFloorEntry(lower, key); + } + + @SuppressWarnings("unchecked") + private LeafNode leafOrPredecessor(InnerNode innerNode) { + if (innerNode.hasLeaf()) { + return (LeafNode) innerNode.getLeaf(); + } + return predecessor(innerNode); + } + + @Override + public Entry ceilingEntry(K key) { + return exportEntry(getCeilingEntry(key)); + } + + int compare(K k1, byte[] k2Bytes) { + byte[] k1Bytes = binaryComparable.get(k1); + return compare(k1Bytes, 0, k1Bytes.length, k2Bytes, 0, k2Bytes.length); + } + + @Override + public K ceilingKey(K key) { + return keyOrNull(getCeilingEntry(key)); + } + + LeafNode getHigherEntry(K k) { + return getHigherOrCeilEntry(false, k); + } + + LeafNode getHigherEntry(byte[] key) { + if (isEmpty()) { + return null; + } + return getHigherOrCeilEntry(false, key); + } + + LeafNode getCeilingEntry(K k) { + return getHigherOrCeilEntry(true, k); + } + + LeafNode getCeilingEntry(byte[] key) { + if (isEmpty()) { + return null; + } + return getHigherOrCeilEntry(true, key); + } + + /* + On level X match compressed path of "this" node + if matches, then take follow on pointer and continue matching + if doesn't, see if compressed path greater/smaller than key + if greater, return the first node of the this level i.e. call first on this node and return. + if lesser, go one level up (using parent link) + and find the next partialKey greater than the uplinking partialKey on level X-1. + if you got one, simply take the first child nodes at each down level and return + the leaf (left most traversal) + if not, then we got to go on level X-2 and find the next greater + and keep going level ups until we either find a next greater partialKey + or we find root (which will have parent null and hence search ends). + + What if all compressed paths matched, then when taking the next follow on pointer, + we reach a leafNode? or a null? + if leafNode then it means, uptil now the leafNode has the same prefix as the provided key. + if leafNode >= given key, then return leafNode + if leafNode < given key, then take leafNode's parent uplink and find next + greater partialKey than the uplinking partialKey on level leaf-1. + if you reach a null, then it means key doesn't exist, + but before taking this previous partialKey, the entire path did exist. + Hence we come up a level from where we got the null. + Find the next higher partialKey than which we took for null + (no uplink from the null node, so we do it before the recursive call itself). + + so it seems the uplinking traversal is same in all cases + */ + // note: caller needs to check if map is empty + private LeafNode getHigherOrCeilEntry(boolean ceil, byte[] key) { + int depth = 0; + Node node = root; + while (true) { + if (node instanceof LeafNode) { + // binary comparable comparison + @SuppressWarnings("unchecked") + LeafNode leafNode = (LeafNode) node; + byte[] leafKey = leafNode.getKeyBytes(); + if (compare(key, depth, key.length, leafKey, depth, leafKey.length) < (ceil ? 1 : 0)) { + return leafNode; + } + return successor(leafNode); + } + InnerNode innerNode = (InnerNode) node; + // compare compressed path + int compare = compareOptimisticCompressedPath(innerNode, key, depth); + if (compare > 0) { // greater + return getFirstEntry(node); + } else if (compare < 0) { // lesser, that means all children of this node will be lesser than key + return successor(node); + } + + // compressed path matches completely + depth += innerNode.prefixLen; + if (depth == key.length) { + // if ceil is true, then we are allowed to return the prefix ending here (leaf of this node) + // if ceil is false, then we need something higher and not the prefix, hence we start traversal + // from first() + return ceil ? getFirstEntry(innerNode) : getFirstEntry(innerNode.first()); + } + Node child = innerNode.ceil(key[depth]); + if (child == null) { // on this level, no child is greater or equal + return successor(node); + } else if (child.uplinkKey() != key[depth]) { // ceil returned a greater child + return getFirstEntry(child); + } + depth++; + node = child; + } + } + + private LeafNode getHigherOrCeilEntry(boolean ceil, K k) { + if (isEmpty()) { + return null; + } + byte[] key = binaryComparable.get(k); + return getHigherOrCeilEntry(ceil, key); + } + + @Override + public Entry higherEntry(K key) { + return exportEntry(getHigherEntry(key)); + } + + @Override + public K higherKey(K key) { + return keyOrNull(getHigherOrCeilEntry(false, key)); + } + + @Override + public Entry firstEntry() { + // we need a snapshot (i.e. immutable entry) as per NavigableMap's docs + // also see Doug Lea's reply: + // http://jsr166-concurrency.10961.n7.nabble.com/Immutable-Entry-objects-in-j-u-TreeMap-td3384.html + // but why do we need a snapshot? + return exportEntry(getFirstEntry()); + } + + @Override + public Entry lastEntry() { + return exportEntry(getLastEntry()); + } + + @Override + public NavigableMap descendingMap() { + NavigableMap km = descendingMap; + return (km != null) ? km : + (descendingMap = new DescendingSubMap<>(this, + true, null, true, + true, null, true)); + } + + @Override + public NavigableSet navigableKeySet() { + KeySet nks = navigableKeySet; + return (nks != null) ? nks : (navigableKeySet = new KeySet<>(this)); + } + + @Override + public Set keySet() { + return navigableKeySet(); + } + + @Override + public NavigableSet descendingKeySet() { + return descendingMap().navigableKeySet(); + } + + @Override + public NavigableMap subMap(K fromKey, boolean fromInclusive, + K toKey, boolean toInclusive) { + return new AscendingSubMap<>(this, + false, fromKey, fromInclusive, + false, toKey, toInclusive); + } + + @Override + public NavigableMap headMap(K toKey, boolean inclusive) { + return new AscendingSubMap<>(this, + true, null, true, + false, toKey, inclusive); + } + + @Override + public NavigableMap tailMap(K fromKey, boolean inclusive) { + return new AscendingSubMap<>(this, + false, fromKey, inclusive, + true, null, true); + } + + // QUES: why does comparator return ? super K? + @Override + public Comparator comparator() { + return null; + } + + public BinaryComparable binaryComparable() { + return binaryComparable; + } + + @Override + public SortedMap subMap(K fromKey, K toKey) { + return subMap(fromKey, true, toKey, false); + } + + @Override + + public SortedMap headMap(K toKey) { + return headMap(toKey, false); + } + + @Override + + public SortedMap tailMap(K fromKey) { + return tailMap(fromKey, true); + } + + @Override + public K firstKey() { + return key(getFirstEntry()); + } + + @Override + public K lastKey() { + return key(getLastEntry()); + } + + @Override + public int size() { + return size; + } + + // leaf should not be null + // neither should tree be empty when calling this + void deleteEntry(LeafNode leaf) { + size--; + modCount++; + InnerNode parent = leaf.parent(); + if (parent == null) { + // means root == leaf + root = null; + return; + } + + if (parent.getLeaf() == leaf) { + parent.removeLeaf(); + } else { + parent.removeChild(leaf.uplinkKey()); + } + + if (parent.shouldShrink()) { + InnerNode newParent = parent.shrink(); + // newParent should have copied the uplink to same grandParent of oldParent + InnerNode grandParent = newParent.parent(); + replace(newParent.uplinkKey(), grandParent, newParent); + } else if (parent.size() == 1 && !parent.hasLeaf()) { + pathCompressOnlyChild((Node4) parent); + } else if (parent.size() == 0) { + assert parent.hasLeaf(); + replace(parent.uplinkKey(), parent.parent(), parent.getLeaf()); + } + } + + Iterator> entryIterator() { + return new EntryIterator<>(this, getFirstEntry()); + } + + Iterator valueIterator() { + return new ValueIterator<>(this, getFirstEntry()); + } + + Iterator keyIterator() { + return new KeyIterator<>(this, getFirstEntry()); + } + + Iterator descendingKeyIterator() { + return new DescendingKeyIterator<>(this, getLastEntry()); + } + +} \ No newline at end of file diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/AscendingSubMap.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/AscendingSubMap.java new file mode 100644 index 0000000..6407f7a --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/AscendingSubMap.java @@ -0,0 +1,121 @@ +package org.xbib.datastructures.trie.radix.adaptive; + +import java.util.Comparator; +import java.util.Iterator; +import java.util.NavigableMap; +import java.util.Set; +import java.util.Spliterator; + +final class AscendingSubMap extends NavigableSubMap { + + AscendingSubMap(AdaptiveRadixTree m, + boolean fromStart, K lo, boolean loInclusive, + boolean toEnd, K hi, boolean hiInclusive) { + super(m, fromStart, lo, loInclusive, toEnd, hi, hiInclusive); + } + + @Override + public Comparator comparator() { + return m.comparator(); + } + + @Override + public NavigableMap subMap(K fromKey, boolean fromInclusive, + K toKey, boolean toInclusive) { + if (!inRange(fromKey, fromInclusive)) + throw new IllegalArgumentException("fromKey out of range"); + if (!inRange(toKey, toInclusive)) + throw new IllegalArgumentException("toKey out of range"); + return new AscendingSubMap<>(m, + false, fromKey, fromInclusive, + false, toKey, toInclusive); + } + + // TODO: offer another ctor to take in loBytes + @Override + public NavigableMap headMap(K toKey, boolean inclusive) { + if (!inRange(toKey, inclusive)) + throw new IllegalArgumentException("toKey out of range"); + return new AscendingSubMap<>(m, + fromStart, lo, loInclusive, + false, toKey, inclusive); + } + + // TODO: offer another ctor to take in hiBytes + @Override + public NavigableMap tailMap(K fromKey, boolean inclusive) { + if (!inRange(fromKey, inclusive)) + throw new IllegalArgumentException("fromKey out of range"); + return new AscendingSubMap<>(m, + false, fromKey, inclusive, + toEnd, hi, hiInclusive); + } + + @Override + public NavigableMap descendingMap() { + NavigableMap mv = descendingMapView; + return (mv != null) ? mv : + (descendingMapView = + new DescendingSubMap<>(m, + fromStart, lo, loInclusive, + toEnd, hi, hiInclusive)); + } + + @Override + Iterator keyIterator() { + return new SubMapKeyIterator(absLowest(), absHighFence()); + } + + @Override + Spliterator keySpliterator() { + return new SubMapKeyIterator(absLowest(), absHighFence()); + } + + @Override + Iterator descendingKeyIterator() { + return new DescendingSubMapKeyIterator(absHighest(), absLowFence()); + } + + @Override + public Set> entrySet() { + EntrySetView es = entrySetView; + return (es != null) ? es : (entrySetView = new AscendingEntrySetView()); + } + + @Override + LeafNode subLowest() { + return absLowest(); + } + + @Override + LeafNode subHighest() { + return absHighest(); + } + + @Override + LeafNode subCeiling(K key) { + return absCeiling(key); + } + + @Override + LeafNode subHigher(K key) { + return absHigher(key); + } + + @Override + LeafNode subFloor(K key) { + return absFloor(key); + } + + @Override + LeafNode subLower(K key) { + return absLower(key); + } + + final class AscendingEntrySetView extends EntrySetView { + @Override + public Iterator> iterator() { + return new SubMapEntryIterator(absLowest(), absHighFence()); + } + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/BinaryComparable.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/BinaryComparable.java new file mode 100644 index 0000000..b43c30b --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/BinaryComparable.java @@ -0,0 +1,54 @@ +package org.xbib.datastructures.trie.radix.adaptive; + +/** + * For using {@link AdaptiveRadixTree}, the keys need to be transformed into binary comparable keys + * which are the byte array representation of your keys such that the result of doing + * lexicographic comparison over them is the same as doing the key comparison. + * + *

Example of key transformation

+ *

Signed integers

+ * Signed integers are stored in two's complement notation. + * This means that negative integers always have their MSB set and hence are + * bitwise lexicographically greater than positive integers. + *

+ * For example -1 in 2's complement form is 1111 1111 1111 1111 1111 1111 1111 1111, + * whereas +1 is 0000 0000 0000 0000 0000 0000 0000 0001. + *

+ * This is not the correct binary comparable transformation since + * +1 > -1 but the above transformation lexicographically orders +1 before -1. + *

+ * In this case, the right transformation is obtained by flipping the sign bit. + *

+ * Therefore -1 will be 0111 1111 1111 1111 1111 1111 1111 1111 and +1 as 1000 0000 0000 0000 0000 0000 0000 0001. + * + *

ASCII encoded character strings

+ * Naturally yield the expected order as 'a' < 'b' and their respective byte values 97 < 98 obey the order. + * + *

IPv4 addresses

+ * Naturally yield the expected order since each octet is an unsigned byte and unsigned types in binary have the expected lexicographic ordering. + *

+ * For example, 12.10.192.0 < 12.10.199.255 and their respective binary representation 00001100.00001010.11000000.00000000 is lexicographically smaller than 00001100.00001010.11000111.11111111. + * + *

Implementing the interface

+ *

Simple keys based on primitives and String

+ * + *

Compound keys

+ *

With only fixed length attributes

+ * Transform each attribute separately and concatenate the results. + *

+ * This example shows the transformation for a compound key made up of two integers. + * + *

With variable length attributes

+ * Transformation of a variable length attribute that is succeeded by another attribute is required to end with a byte 0 for the right transformation. Without it, compound key ("a", "bc") and ("ab", "c") would be incorrectly treated equal. Note this only works if byte 0 is not part of the variable length attribute's key space, otherwise ("a\0", "b") would be incorrectly ordered before ("a", "b"). + *

+ * If byte 0 is part of the key space then the key transformation requires remapping every byte 0 as byte 0 followed by byte 1 and ending with two byte 0s. This is described in section IV.B (e). + * + *

Further reading

+ * Section IV of the paper. + * + * @param the key type to be used in {@link AdaptiveRadixTree} + */ +public interface BinaryComparable { + byte[] get(K key); +} + diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/DescendingKeyIterator.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/DescendingKeyIterator.java new file mode 100644 index 0000000..9dcf059 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/DescendingKeyIterator.java @@ -0,0 +1,12 @@ +package org.xbib.datastructures.trie.radix.adaptive; + +final class DescendingKeyIterator extends PrivateEntryIterator { + DescendingKeyIterator(AdaptiveRadixTree m, LeafNode last) { + super(m, last); + } + + @Override + public K next() { + return prevEntry().getKey(); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/DescendingSubMap.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/DescendingSubMap.java new file mode 100644 index 0000000..23ba29e --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/DescendingSubMap.java @@ -0,0 +1,121 @@ +package org.xbib.datastructures.trie.radix.adaptive; + +import java.util.Comparator; +import java.util.Iterator; +import java.util.NavigableMap; +import java.util.Set; +import java.util.Spliterator; + +final class DescendingSubMap extends NavigableSubMap { + + DescendingSubMap(AdaptiveRadixTree m, + boolean fromStart, K lo, boolean loInclusive, + boolean toEnd, K hi, boolean hiInclusive) { + super(m, fromStart, lo, loInclusive, toEnd, hi, hiInclusive); + } + + @Override + public Comparator comparator() { + return m.comparator(); + } + + // create a new submap out of a submap. + // the new bounds should be within the current submap's bounds + @Override + public NavigableMap subMap(K fromKey, boolean fromInclusive, + K toKey, boolean toInclusive) { + if (!inRange(fromKey, fromInclusive)) + throw new IllegalArgumentException("fromKey out of range"); + if (!inRange(toKey, toInclusive)) + throw new IllegalArgumentException("toKey out of range"); + return new DescendingSubMap<>(m, + false, toKey, toInclusive, + false, fromKey, fromInclusive); + } + + @Override + public NavigableMap headMap(K toKey, boolean inclusive) { + if (!inRange(toKey, inclusive)) + throw new IllegalArgumentException("toKey out of range"); + return new DescendingSubMap<>(m, + false, toKey, inclusive, + toEnd, hi, hiInclusive); + } + + @Override + public NavigableMap tailMap(K fromKey, boolean inclusive) { + if (!inRange(fromKey, inclusive)) + throw new IllegalArgumentException("fromKey out of range"); + return new DescendingSubMap<>(m, + fromStart, lo, loInclusive, + false, fromKey, inclusive); + } + + @Override + public NavigableMap descendingMap() { + NavigableMap mv = descendingMapView; + return (mv != null) ? mv : + (descendingMapView = + new AscendingSubMap<>(m, + fromStart, lo, loInclusive, + toEnd, hi, hiInclusive)); + } + + @Override + Iterator keyIterator() { + return new DescendingSubMapKeyIterator(absHighest(), absLowFence()); + } + + @Override + Spliterator keySpliterator() { + return new DescendingSubMapKeyIterator(absHighest(), absLowFence()); + } + + @Override + Iterator descendingKeyIterator() { + return new SubMapKeyIterator(absLowest(), absHighFence()); + } + + @Override + public Set> entrySet() { + EntrySetView es = entrySetView; + return (es != null) ? es : (entrySetView = new DescendingEntrySetView()); + } + + @Override + LeafNode subLowest() { + return absHighest(); + } + + @Override + LeafNode subHighest() { + return absLowest(); + } + + @Override + LeafNode subCeiling(K key) { + return absFloor(key); + } + + @Override + LeafNode subHigher(K key) { + return absLower(key); + } + + @Override + LeafNode subFloor(K key) { + return absCeiling(key); + } + + @Override + LeafNode subLower(K key) { + return absHigher(key); + } + + final class DescendingEntrySetView extends EntrySetView { + @Override + public Iterator> iterator() { + return new DescendingSubMapEntryIterator(absHighest(), absLowFence()); + } + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/EntryIterator.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/EntryIterator.java new file mode 100644 index 0000000..8ddbb21 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/EntryIterator.java @@ -0,0 +1,14 @@ +package org.xbib.datastructures.trie.radix.adaptive; + +import java.util.Map; + +final class EntryIterator extends PrivateEntryIterator> { + EntryIterator(AdaptiveRadixTree m, LeafNode first) { + super(m, first); + } + + @Override + public Map.Entry next() { + return nextEntry(); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/EntrySet.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/EntrySet.java new file mode 100644 index 0000000..b2004e2 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/EntrySet.java @@ -0,0 +1,54 @@ +package org.xbib.datastructures.trie.radix.adaptive; + +import java.util.AbstractSet; +import java.util.Iterator; +import java.util.Map; + +class EntrySet extends AbstractSet> { + private final AdaptiveRadixTree m; + + EntrySet(AdaptiveRadixTree m) { + this.m = m; + } + + @Override + public Iterator> iterator() { + return m.entryIterator(); + } + + @Override + public boolean contains(Object o) { + if (!(o instanceof Map.Entry)) + return false; + Map.Entry entry = (Map.Entry) o; + Object value = entry.getValue(); + LeafNode p = m.getEntry(entry.getKey()); + return p != null && AdaptiveRadixTree.valEquals(p.getValue(), value); + } + + @Override + public boolean remove(Object o) { + if (!(o instanceof Map.Entry)) + return false; + Map.Entry entry = (Map.Entry) o; + Object value = entry.getValue(); + LeafNode p = m.getEntry(entry.getKey()); + if (p != null && AdaptiveRadixTree.valEquals(p.getValue(), value)) { + m.deleteEntry(p); + return true; + } + return false; + } + + @Override + public int size() { + return m.size(); + } + + @Override + public void clear() { + m.clear(); + } + + // TODO: implement Spliterator +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/InnerNode.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/InnerNode.java new file mode 100644 index 0000000..d18cd5a --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/InnerNode.java @@ -0,0 +1,159 @@ +package org.xbib.datastructures.trie.radix.adaptive; + +/* + These are internal contracts/interfaces + They've been written with only what they're used for internally + For example InnerNode#remove could have returned a false indicative of a failed remove + due to partialKey entry not actually existing, but the return value is of no use in code till now + and is sure to be called from places where it'll surely exist. + since they're internal, we could change them later if a better contract makes more sense. + + The impls have assert conditions all around to make sure the methods are called being in the right + state. For example you should not call shrink() if the Node is not ready to shrink, etc. + Or for example when calling last() on Node16 or higher, we're sure we'll have at least + X amount of children hence safe to return child[noOfChildren-1], without worrying about bounds. + + */ +abstract class InnerNode extends Node { + + static final int PESSIMISTIC_PATH_COMPRESSION_LIMIT = 8; + + // max limit of 8 bytes (Pessimistic) + final byte[] prefixKeys; + final Node[] child; + // Optimistic + int prefixLen; // 4 bytes + // TODO: we could save space by making this a byte and returning + // Byte.toUnsignedInt wherever comparison with it is done. + short noOfChildren; + + InnerNode(int size) { + prefixKeys = new byte[PESSIMISTIC_PATH_COMPRESSION_LIMIT]; + child = new Node[size + 1]; + } + + // copy ctor. called when growing/shrinking + InnerNode(InnerNode node, int size) { + super(node); + child = new Node[size + 1]; + // copy header + this.noOfChildren = node.noOfChildren; + this.prefixLen = node.prefixLen; + this.prefixKeys = node.prefixKeys; + + // copy leaf & replace uplink + child[size] = node.getLeaf(); + if (child[size] != null) { + replaceUplink(this, child[size]); + } + } + + public void removeLeaf() { + removeUplink(child[child.length - 1]); + child[child.length - 1] = null; + } + + public boolean hasLeaf() { + return child[child.length - 1] != null; + } + + public LeafNode getLeaf() { + return (LeafNode) child[child.length - 1]; + } + + public void setLeaf(LeafNode leaf) { + child[child.length - 1] = leaf; + createUplink(this, leaf); + } + + @Override + public Node firstOrLeaf() { + if (hasLeaf()) { + return getLeaf(); + } + return first(); + } + + Node[] getChild() { + return child; + } + + /** + * @return no of children this Node has + */ + public short size() { + return noOfChildren; + } + + /** + * @param partialKey search if this node has an entry for given partialKey + * @return if it does, then return the following child pointer. + * Returns null if there is no corresponding entry. + */ + abstract Node findChild(byte partialKey); + + /** + * @param partialKey + * @return a child which is equal or greater than given partial key, or null if there is no such child + */ + abstract Node ceil(byte partialKey); + + /** + * @param partialKey + * @return a child which is equal or lesser than given partial key, or null if there is no such child + */ + abstract Node floor(byte partialKey); + + /** + * Note: caller needs to check if {@link InnerNode} {@link #isFull()} before calling this. + * If it is full then call {@link #grow()} followed by {@link #addChild(byte, Node)} on the new node. + * + * @param partialKey partialKey to be mapped + * @param child the child node to be added + */ + abstract void addChild(byte partialKey, Node child); + + /** + * @param partialKey for which the child pointer mapping is to be updated + * @param newChild the new mapping to be added for given partialKey + */ + abstract void replace(byte partialKey, Node newChild); + + /** + * @param partialKey for which the child pointer mapping is to be removed + */ + abstract void removeChild(byte partialKey); + + /** + * creates and returns the next larger node type with the same mappings as this node + * + * @return a new node with the same mappings + */ + abstract InnerNode grow(); + + abstract boolean shouldShrink(); + + /** + * creates and returns the a smaller node type with the same mappings as this node + * + * @return a smaller node with the same mappings + */ + abstract InnerNode shrink(); + + /** + * @return true if Node has reached it's capacity + */ + abstract boolean isFull(); + + /** + * @return returns the smallest child node for the partialKey strictly greater than the partialKey passed. + * Returns null if no such child. + */ + abstract Node greater(byte partialKey); + + /** + * @return returns the greatest child node for the partialKey strictly lesser than the partialKey passed. + * Returns null if no such child. + */ + abstract Node lesser(byte partialKey); +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/KeyIterator.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/KeyIterator.java new file mode 100644 index 0000000..9601cb8 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/KeyIterator.java @@ -0,0 +1,12 @@ +package org.xbib.datastructures.trie.radix.adaptive; + +final class KeyIterator extends PrivateEntryIterator { + KeyIterator(AdaptiveRadixTree m, LeafNode first) { + super(m, first); + } + + @Override + public K next() { + return nextEntry().getKey(); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/KeySet.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/KeySet.java new file mode 100644 index 0000000..4a59731 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/KeySet.java @@ -0,0 +1,152 @@ +package org.xbib.datastructures.trie.radix.adaptive; + +import java.util.AbstractSet; +import java.util.Comparator; +import java.util.Iterator; +import java.util.Map; +import java.util.NavigableMap; +import java.util.NavigableSet; +import java.util.SortedSet; + +// implementation simply relays/delegates calls to backing map's methods +final class KeySet extends AbstractSet implements NavigableSet { + private final NavigableMap m; + + KeySet(NavigableMap map) { + m = map; + } + + // this KeySet can only be created either on ART or on one of it's subMaps + @Override + @SuppressWarnings("unchecked") + public Iterator iterator() { + if (m instanceof AdaptiveRadixTree) + + return ((AdaptiveRadixTree) m).keyIterator(); + else + return ((NavigableSubMap) m).keyIterator(); + } + + // this KeySet can only be created either on ART or on one of it's subMaps + @Override + @SuppressWarnings("unchecked") + public Iterator descendingIterator() { + if (m instanceof AdaptiveRadixTree) + return ((AdaptiveRadixTree) m).descendingKeyIterator(); + else + return ((NavigableSubMap) m).descendingKeyIterator(); + } + + @Override + public int size() { + return m.size(); + } + + @Override + public boolean isEmpty() { + return m.isEmpty(); + } + + @Override + public boolean contains(Object o) { + return m.containsKey(o); + } + + @Override + public void clear() { + m.clear(); + } + + @Override + public E lower(E e) { + return m.lowerKey(e); + } + + @Override + public E floor(E e) { + return m.floorKey(e); + } + + @Override + public E ceiling(E e) { + return m.ceilingKey(e); + } + + @Override + public E higher(E e) { + return m.higherKey(e); + } + + @Override + public E first() { + return m.firstKey(); + } + + @Override + public E last() { + return m.lastKey(); + } + + @Override + public Comparator comparator() { + return m.comparator(); + } + + @Override + public E pollFirst() { + Map.Entry e = m.pollFirstEntry(); + return (e == null) ? null : e.getKey(); + } + + @Override + public E pollLast() { + Map.Entry e = m.pollLastEntry(); + return (e == null) ? null : e.getKey(); + } + + @Override + public boolean remove(Object o) { + int oldSize = size(); + m.remove(o); + return size() != oldSize; + } + + @Override + public NavigableSet subSet(E fromElement, boolean fromInclusive, + E toElement, boolean toInclusive) { + return new KeySet<>(m.subMap(fromElement, fromInclusive, + toElement, toInclusive)); + } + + @Override + public NavigableSet headSet(E toElement, boolean inclusive) { + return new KeySet<>(m.headMap(toElement, inclusive)); + } + + @Override + public NavigableSet tailSet(E fromElement, boolean inclusive) { + return new KeySet<>(m.tailMap(fromElement, inclusive)); + } + + @Override + public SortedSet subSet(E fromElement, E toElement) { + return subSet(fromElement, true, toElement, false); + } + + @Override + public SortedSet headSet(E toElement) { + return headSet(toElement, false); + } + + @Override + public SortedSet tailSet(E fromElement) { + return tailSet(fromElement, true); + } + + @Override + public NavigableSet descendingSet() { + return new KeySet<>(m.descendingMap()); + } + + // TODO: implement Spliterator +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/LeafNode.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/LeafNode.java new file mode 100644 index 0000000..b326483 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/LeafNode.java @@ -0,0 +1,92 @@ +package org.xbib.datastructures.trie.radix.adaptive; + +import java.util.Arrays; +import java.util.Map; + +/* + currently we use what the paper mentions as "Single-value" leaves + */ +class LeafNode extends Node implements Map.Entry { + // we have to save the keyBytes, because leaves are lazy expanded at times + private final byte[] keyBytes; + private final K key; + private V value; + + LeafNode(byte[] keyBytes, K key, V value) { + this.value = value; + // defensive copy + this.keyBytes = Arrays.copyOf(keyBytes, keyBytes.length); + this.key = key; + } + + public V setValue(V value) { + V oldValue = this.value; + this.value = value; + return oldValue; + } + + public V getValue() { + return value; + } + + byte[] getKeyBytes() { + return keyBytes; + } + + public K getKey() { + return key; + } + + /** + * Dev note: first() is implemented to detect end of the SortedMap.firstKey() + */ + @Override + public Node first() { + return null; + } + + @Override + public Node firstOrLeaf() { + return null; + } + + /** + * Dev note: last() is implemented to detect end of the SortedMap.lastKey() + */ + @Override + public Node last() { + return null; + } + + /** + * Compares this Map.Entry with another Map.Entry. + *

+ * Implemented per API documentation of {@link java.util.Map.Entry#equals(Object)} + * + * @param obj the object to compare to + * @return true if equal key and value + */ + @Override + public boolean equals(final Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof Map.Entry)) { + return false; + } + final Map.Entry other = (Map.Entry) obj; + return (getKey() == null ? other.getKey() == null : getKey().equals(other.getKey())) && + (getValue() == null ? other.getValue() == null : getValue().equals(other.getValue())); + } + + @Override + public int hashCode() { + return (getKey() == null ? 0 : getKey().hashCode()) ^ + (getValue() == null ? 0 : getValue().hashCode()); + } + + @Override + public String toString() { + return key + "=" + value; + } +} \ No newline at end of file diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/NavigableSubMap.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/NavigableSubMap.java new file mode 100644 index 0000000..79cf3cb --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/NavigableSubMap.java @@ -0,0 +1,596 @@ +package org.xbib.datastructures.trie.radix.adaptive; + +import java.util.AbstractMap; +import java.util.AbstractSet; +import java.util.Comparator; +import java.util.ConcurrentModificationException; +import java.util.Iterator; +import java.util.Map; +import java.util.NavigableMap; +import java.util.NavigableSet; +import java.util.NoSuchElementException; +import java.util.Set; +import java.util.SortedMap; +import java.util.Spliterator; +import java.util.function.Consumer; + +/** + * A NavigableMap that adds range checking (if passed in key is within lower and upper bound) + * for all the map methods and then relays the call + * into the backing map + */ +abstract class NavigableSubMap extends AbstractMap + implements NavigableMap { + + // Dummy value serving as unmatchable fence key for unbounded SubMapIterators + private static final Object UNBOUNDED = new Object(); + final AdaptiveRadixTree m; + /** + * Endpoints are represented as triples (fromStart, lo, + * loInclusive) and (toEnd, hi, hiInclusive). If fromStart is + * true, then the low (absolute) bound is the start of the + * backing map, and the other values are ignored. Otherwise, + * if loInclusive is true, lo is the inclusive bound, else lo + * is the exclusive bound. Similarly for the upper bound. + */ + + final K lo, hi; + final byte[] loBytes, hiBytes; + final boolean fromStart, toEnd; + final boolean loInclusive, hiInclusive; + + transient NavigableMap descendingMapView; + transient NavigableSubMap.EntrySetView entrySetView; + transient KeySet navigableKeySetView; + + NavigableSubMap(AdaptiveRadixTree m, + boolean fromStart, K lo, boolean loInclusive, + boolean toEnd, K hi, boolean hiInclusive) { + this.loBytes = fromStart ? null : m.binaryComparable().get(lo); + this.hiBytes = toEnd ? null : m.binaryComparable().get(hi); + if (!fromStart && !toEnd) { + if (AdaptiveRadixTree.compare(loBytes, 0, loBytes.length, hiBytes, 0, hiBytes.length) > 0) + throw new IllegalArgumentException("fromKey > toKey"); + } + this.m = m; + this.fromStart = fromStart; + this.lo = lo; + this.loInclusive = loInclusive; + this.toEnd = toEnd; + this.hi = hi; + this.hiInclusive = hiInclusive; + } + + final boolean tooLow(K key) { + if (!fromStart) { + int c = m.compare(key, loBytes); + // if c == 0 and if lower bound is exclusive + // then this key is too low + // else it is not, since it is as low as our lower bound + return c < 0 || (c == 0 && !loInclusive); + } + // we don't have a lower bound + return false; + } + + + /* + * Absolute versions of relation operations. + * Subclasses map to these using like-named "sub" + * versions that invert senses for descending maps + */ + + final boolean tooHigh(K key) { + if (!toEnd) { + int c = m.compare(key, hiBytes); + // if c == 0 and if upper bound is exclusive + // then this key is too higher + // else it is not, since it is as greater as our upper bound + return c > 0 || (c == 0 && !hiInclusive); + } + // we don't have an upper bound + return false; + } + + final boolean inRange(K key) { + return !tooLow(key) && !tooHigh(key); + } + + final boolean inClosedRange(K key) { + // if we don't have any upper nor lower bounds, then all keys are always in range. + // if we have a lower bound, then this key ought to be higher than our lower bound (closed, hence including). + // if we have an upper bound, then this key ought to be lower than our upper bound (closed, hence including). + return (fromStart || m.compare(key, loBytes) >= 0) + && (toEnd || m.compare(key, hiBytes) <= 0); + } + + final boolean inRange(K key, boolean inclusive) { + return inclusive ? inRange(key) : inClosedRange(key); + } + + final LeafNode absLowest() { + LeafNode e = + (fromStart ? m.getFirstEntry() : + (loInclusive ? m.getCeilingEntry(loBytes) : + m.getHigherEntry(loBytes))); + return (e == null || tooHigh(e.getKey())) ? null : e; + } + + final LeafNode absHighest() { + LeafNode e = + (toEnd ? m.getLastEntry() : + (hiInclusive ? m.getFloorEntry(hiBytes) : + m.getLowerEntry(hiBytes))); + return (e == null || tooLow(e.getKey())) ? null : e; + } + + final LeafNode absCeiling(K key) { + if (tooLow(key)) + return absLowest(); + LeafNode e = m.getCeilingEntry(key); + return (e == null || tooHigh(e.getKey())) ? null : e; + } + + final LeafNode absHigher(K key) { + if (tooLow(key)) + return absLowest(); + LeafNode e = m.getHigherEntry(key); + return (e == null || tooHigh(e.getKey())) ? null : e; + } + + // Abstract methods defined in ascending vs descending classes + // These relay to the appropriate absolute versions + + final LeafNode absFloor(K key) { + if (tooHigh(key)) + return absHighest(); + LeafNode e = m.getFloorEntry(key); + return (e == null || tooLow(e.getKey())) ? null : e; + } + + final LeafNode absLower(K key) { + if (tooHigh(key)) + return absHighest(); + LeafNode e = m.getLowerEntry(key); + return (e == null || tooLow(e.getKey())) ? null : e; + } + + /** + * Returns the absolute high fence for ascending traversal + */ + final LeafNode absHighFence() { + return (toEnd ? null : (hiInclusive ? + m.getHigherEntry(hiBytes) : + m.getCeilingEntry(hiBytes))); // then hi itself (but we want the entry, hence traversal is required) + } + + /** + * Return the absolute low fence for descending traversal + */ + final LeafNode absLowFence() { + return (fromStart ? null : (loInclusive ? + m.getLowerEntry(loBytes) : + m.getFloorEntry(loBytes))); // then lo itself (but we want the entry, hence traversal is required) + } + + abstract LeafNode subLowest(); + + abstract LeafNode subHighest(); + + + /* Returns ascending iterator from the perspective of this submap */ + + abstract LeafNode subCeiling(K key); + + abstract LeafNode subHigher(K key); + + + /* Returns descending iterator from the perspective of this submap*/ + + abstract LeafNode subFloor(K key); + + abstract LeafNode subLower(K key); + + abstract Iterator keyIterator(); + + abstract Spliterator keySpliterator(); + + abstract Iterator descendingKeyIterator(); + + // public methods + @Override + public boolean isEmpty() { + return (fromStart && toEnd) ? m.isEmpty() : entrySet().isEmpty(); + } + + @Override + public int size() { + return (fromStart && toEnd) ? m.size() : entrySet().size(); + } + + @SuppressWarnings("unchecked") + @Override + public final boolean containsKey(Object key) { + return inRange((K) key) && m.containsKey(key); + } + + @Override + public final V put(K key, V value) { + if (!inRange(key)) + throw new IllegalArgumentException("key out of range"); + return m.put(key, value); + } + + @SuppressWarnings("unchecked") + @Override + public final V get(Object key) { + return !inRange((K) key) ? null : m.get(key); + } + + @SuppressWarnings("unchecked") + @Override + public final V remove(Object key) { + return !inRange((K) key) ? null : m.remove(key); + } + + @Override + public final Map.Entry ceilingEntry(K key) { + return AdaptiveRadixTree.exportEntry(subCeiling(key)); + } + + @Override + public final K ceilingKey(K key) { + return AdaptiveRadixTree.keyOrNull(subCeiling(key)); + } + + @Override + public final Map.Entry higherEntry(K key) { + return AdaptiveRadixTree.exportEntry(subHigher(key)); + } + + @Override + public final K higherKey(K key) { + return AdaptiveRadixTree.keyOrNull(subHigher(key)); + } + + @Override + public final Map.Entry floorEntry(K key) { + return AdaptiveRadixTree.exportEntry(subFloor(key)); + } + + @Override + public final K floorKey(K key) { + return AdaptiveRadixTree.keyOrNull(subFloor(key)); + } + + @Override + public final Map.Entry lowerEntry(K key) { + return AdaptiveRadixTree.exportEntry(subLower(key)); + } + + @Override + public final K lowerKey(K key) { + return AdaptiveRadixTree.keyOrNull(subLower(key)); + } + + @Override + public final K firstKey() { + return AdaptiveRadixTree.key(subLowest()); + } + + @Override + public final K lastKey() { + return AdaptiveRadixTree.key(subHighest()); + } + + @Override + public final Map.Entry firstEntry() { + return AdaptiveRadixTree.exportEntry(subLowest()); + } + + @Override + public final Map.Entry lastEntry() { + return AdaptiveRadixTree.exportEntry(subHighest()); + } + + @Override + public final Map.Entry pollFirstEntry() { + LeafNode e = subLowest(); + Map.Entry result = AdaptiveRadixTree.exportEntry(e); + if (e != null) + m.deleteEntry(e); + return result; + } + + @Override + public final Map.Entry pollLastEntry() { + LeafNode e = subHighest(); + Map.Entry result = AdaptiveRadixTree.exportEntry(e); + if (e != null) + m.deleteEntry(e); + return result; + } + + @Override + public final NavigableSet navigableKeySet() { + KeySet nksv = navigableKeySetView; + return (nksv != null) ? nksv : + (navigableKeySetView = new KeySet<>(this)); + } + + @Override + public final Set keySet() { + return navigableKeySet(); + } + + @Override + public NavigableSet descendingKeySet() { + return descendingMap().navigableKeySet(); + } + + @Override + public final SortedMap subMap(K fromKey, K toKey) { + return subMap(fromKey, true, toKey, false); + } + + @Override + public final SortedMap headMap(K toKey) { + return headMap(toKey, false); + } + + // View classes + + @Override + public final SortedMap tailMap(K fromKey) { + return tailMap(fromKey, true); + } + + // entry set views for submaps + abstract class EntrySetView extends AbstractSet> { + private transient int size = -1, sizeModCount; + + // if the submap does not define any upper and lower bounds + // i.e. it is the same view as the original map (very unlikely) + // then no need to explicitly calculate the size. + @Override + public int size() { + if (fromStart && toEnd) + return m.size(); + // if size == -1, it is the first time we're calculating the size + // if sizeModCount != m.getModCount(), the map has had modification operations + // so it's size must've changed, recalculate. + if (size == -1 || sizeModCount != m.getModCount()) { + sizeModCount = m.getModCount(); + size = 0; + Iterator i = iterator(); + while (i.hasNext()) { + size++; + i.next(); + } + } + return size; + } + + @Override + public boolean isEmpty() { + LeafNode n = absLowest(); + return n == null || tooHigh(n.getKey()); + } + + @SuppressWarnings("unchecked") + @Override + public boolean contains(Object o) { + if (!(o instanceof Map.Entry)) + return false; + Map.Entry entry = (Map.Entry) o; + Object key = entry.getKey(); + if (!inRange((K) key)) + return false; + LeafNode node = m.getEntry(key); + return node != null && + AdaptiveRadixTree.valEquals(node.getValue(), entry.getValue()); + } + + @SuppressWarnings("unchecked") + @Override + public boolean remove(Object o) { + if (!(o instanceof Map.Entry)) + return false; + Map.Entry entry = (Map.Entry) o; + Object key = entry.getKey(); + if (!inRange((K) key)) + return false; + LeafNode node = m.getEntry(key); + if (node != null && AdaptiveRadixTree.valEquals(node.getValue(), + entry.getValue())) { + m.deleteEntry(node); + return true; + } + return false; + } + } + + /* + * Iterators for SubMaps + * that understand the submap's upper and lower bound while iterating. + * Fence is one of the bounds depending on the kind of iterator (ascending, descending) + * and first becomes the other one to start from. + */ + abstract class SubMapIterator implements Iterator { + final Object fenceKey; + LeafNode lastReturned; + LeafNode next; + int expectedModCount; + + SubMapIterator(LeafNode first, + LeafNode fence) { + expectedModCount = m.getModCount(); + lastReturned = null; + next = first; + fenceKey = fence == null ? UNBOUNDED : fence.getKey(); + } + + @Override + public final boolean hasNext() { + return next != null && next.getKey() != fenceKey; + } + + final LeafNode nextEntry() { + LeafNode e = next; + if (e == null || e.getKey() == fenceKey) + throw new NoSuchElementException(); + if (m.getModCount() != expectedModCount) + throw new ConcurrentModificationException(); + next = AdaptiveRadixTree.successor(e); + lastReturned = e; + return e; + } + + final LeafNode prevEntry() { + LeafNode e = next; + if (e == null || e.getKey() == fenceKey) + throw new NoSuchElementException(); + if (m.getModCount() != expectedModCount) + throw new ConcurrentModificationException(); + next = AdaptiveRadixTree.predecessor(e); + lastReturned = e; + return e; + } + + @Override + public void remove() { + if (lastReturned == null) + throw new IllegalStateException(); + if (m.getModCount() != expectedModCount) + throw new ConcurrentModificationException(); + // deleted entries are replaced by their successors + // if (lastReturned.left != null && lastReturned.right != null) + // next = lastReturned; + m.deleteEntry(lastReturned); + lastReturned = null; + expectedModCount = m.getModCount(); + } + } + + final class SubMapEntryIterator extends SubMapIterator> { + SubMapEntryIterator(LeafNode first, + LeafNode fence) { + super(first, fence); + } + + @Override + public Map.Entry next() { + return nextEntry(); + } + } + + final class DescendingSubMapEntryIterator extends SubMapIterator> { + DescendingSubMapEntryIterator(LeafNode last, + LeafNode fence) { + super(last, fence); + } + + @Override + public Map.Entry next() { + return prevEntry(); + } + } + + // Implement minimal Spliterator as KeySpliterator backup + final class SubMapKeyIterator extends SubMapIterator + implements Spliterator { + SubMapKeyIterator(LeafNode first, + LeafNode fence) { + super(first, fence); + } + + @Override + public K next() { + return nextEntry().getKey(); + } + + @Override + public Spliterator trySplit() { + return null; + } + + @Override + public void forEachRemaining(Consumer action) { + while (hasNext()) + action.accept(next()); + } + + @Override + public boolean tryAdvance(Consumer action) { + if (hasNext()) { + action.accept(next()); + return true; + } + return false; + } + + // estimating size of submap would be expensive + // since we'd have to traverse from lower bound to upper bound + // for this submap + @Override + public long estimateSize() { + return Long.MAX_VALUE; + } + + @Override + public int characteristics() { + return Spliterator.DISTINCT | Spliterator.ORDERED | + Spliterator.SORTED; + } + + @Override + public final Comparator getComparator() { + return NavigableSubMap.this.comparator(); + } + } + + final class DescendingSubMapKeyIterator extends SubMapIterator + implements Spliterator { + DescendingSubMapKeyIterator(LeafNode last, + LeafNode fence) { + super(last, fence); + } + + @Override + public K next() { + return prevEntry().getKey(); + } + + @Override + public Spliterator trySplit() { + return null; + } + + @Override + public void forEachRemaining(Consumer action) { + while (hasNext()) + action.accept(next()); + } + + @Override + public boolean tryAdvance(Consumer action) { + if (hasNext()) { + action.accept(next()); + return true; + } + return false; + } + + @Override + public long estimateSize() { + return Long.MAX_VALUE; + } + + @Override + public int characteristics() { + return Spliterator.DISTINCT | Spliterator.ORDERED; + } + } +} + + diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/Node.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/Node.java new file mode 100644 index 0000000..148cf52 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/Node.java @@ -0,0 +1,65 @@ +package org.xbib.datastructures.trie.radix.adaptive; + +abstract class Node { + // for upwards traversal + // dev note: wherever you setup downlinks, you setup uplinks as well + private InnerNode parent; + private byte partialKey; + + Node() { + } + + // copy ctor. called when growing/shrinking + Node(Node node) { + this.partialKey = node.partialKey; + this.parent = node.parent; + } + + // do we need partial key for leaf nodes? we'll find out + static void createUplink(InnerNode parent, LeafNode child) { + Node c = child; + c.parent = parent; + } + + static void createUplink(InnerNode parent, Node child, byte partialKey) { + child.parent = parent; + child.partialKey = partialKey; + } + + // called when growing/shrinking and all children now have a new parent + static void replaceUplink(InnerNode parent, Node child) { + child.parent = parent; + } + + static void removeUplink(Node child) { + child.parent = null; + } + + /** + * @return child pointer for the smallest partialKey stored in this Node. + * Returns null if this node has no children. + */ + abstract Node first(); + + abstract Node firstOrLeaf(); + + /** + * @return child pointer for the largest partialKey stored in this Node. + * Returns null if this node has no children. + */ + abstract Node last(); + + /** + * @return the parent of this node. Returns null for root node. + */ + public InnerNode parent() { + return parent; + } + + /** + * @return the uplinking partial key to parent + */ + public byte uplinkKey() { + return partialKey; + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/Node16.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/Node16.java new file mode 100644 index 0000000..de2c47e --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/Node16.java @@ -0,0 +1,186 @@ +package org.xbib.datastructures.trie.radix.adaptive; + +import java.util.Arrays; + +class Node16 extends InnerNode { + static final int NODE_SIZE = 16; + private static final int BYTE_SHIFT = 1 << Byte.SIZE - 1; + private final byte[] keys = new byte[NODE_SIZE]; + + Node16(Node4 node) { + super(node, NODE_SIZE); + assert node.isFull(); + byte[] keys = node.getKeys(); + Node[] child = node.getChild(); + System.arraycopy(keys, 0, this.keys, 0, node.noOfChildren); + System.arraycopy(child, 0, this.child, 0, node.noOfChildren); + + // update up links + for (int i = 0; i < noOfChildren; i++) { + replaceUplink(this, this.child[i]); + } + } + + Node16(Node48 node48) { + super(node48, NODE_SIZE); + assert node48.shouldShrink(); + byte[] keyIndex = node48.getKeyIndex(); + Node[] children = node48.getChild(); + + // keyIndex by virtue of being "array indexed" is already sorted + // so we can iterate and keep adding into Node16 + for (int i = 0, j = 0; i < Node48.KEY_INDEX_SIZE; i++) { + if (keyIndex[i] != Node48.ABSENT) { + child[j] = children[keyIndex[i]]; + keys[j] = unsigned(child[j].uplinkKey()); + replaceUplink(this, child[j]); + j++; + } + } + } + + static byte unsigned(byte b) { + return (byte) (b ^ BYTE_SHIFT); + } + + @Override + public Node findChild(byte partialKey) { + // TODO: use simple loop to see if -XX:+SuperWord applies SIMD JVM instrinsics + partialKey = unsigned(partialKey); + for (int i = 0; i < noOfChildren; i++) { + if (keys[i] == partialKey) { + return child[i]; + } + } + return null; + } + + @Override + public void addChild(byte partialKey, Node child) { + assert !isFull(); + byte unsignedPartialKey = unsigned(partialKey); + + int index = Arrays.binarySearch(keys, 0, noOfChildren, unsignedPartialKey); + // the partialKey should not exist + assert index < 0; + int insertionPoint = -(index + 1); + // shift elements from this point to right by one place + assert insertionPoint <= noOfChildren; + for (int i = noOfChildren; i > insertionPoint; i--) { + keys[i] = keys[i - 1]; + this.child[i] = this.child[i - 1]; + } + keys[insertionPoint] = unsignedPartialKey; + this.child[insertionPoint] = child; + noOfChildren++; + createUplink(this, child, partialKey); + } + + @Override + public void replace(byte partialKey, Node newChild) { + byte unsignedPartialKey = unsigned(partialKey); + int index = Arrays.binarySearch(keys, 0, noOfChildren, unsignedPartialKey); + assert index >= 0; + child[index] = newChild; + createUplink(this, newChild, partialKey); + } + + @Override + public void removeChild(byte partialKey) { + assert !shouldShrink(); + byte unsignedPartialKey = unsigned(partialKey); + int index = Arrays.binarySearch(keys, 0, noOfChildren, unsignedPartialKey); + // if this fails, the question is, how could you reach the leaf node? + // this node must've been your follow on pointer holding the partialKey + assert index >= 0; + removeUplink(child[index]); + for (int i = index; i < noOfChildren - 1; i++) { + keys[i] = keys[i + 1]; + child[i] = child[i + 1]; + } + child[noOfChildren - 1] = null; + noOfChildren--; + } + + @Override + public InnerNode grow() { + assert isFull(); + return new Node48(this); + } + + @Override + public boolean shouldShrink() { + return noOfChildren == Node4.NODE_SIZE; + } + + @Override + public InnerNode shrink() { + assert shouldShrink() : "Haven't crossed shrinking threshold yet"; + return new Node4(this); + } + + @Override + public Node first() { + assert noOfChildren > Node4.NODE_SIZE; + return child[0]; + } + + @Override + public Node last() { + assert noOfChildren > Node4.NODE_SIZE; + return child[noOfChildren - 1]; + } + + @Override + public Node ceil(byte partialKey) { + partialKey = unsigned(partialKey); + for (int i = 0; i < noOfChildren; i++) { + if (keys[i] >= partialKey) { + return child[i]; + } + } + return null; + } + + @Override + public Node greater(byte partialKey) { + partialKey = unsigned(partialKey); + for (int i = 0; i < noOfChildren; i++) { + if (keys[i] > partialKey) { + return child[i]; + } + } + return null; + } + + @Override + public Node lesser(byte partialKey) { + partialKey = unsigned(partialKey); + for (int i = noOfChildren - 1; i >= 0; i--) { + if (keys[i] < partialKey) { + return child[i]; + } + } + return null; + } + + @Override + public Node floor(byte partialKey) { + partialKey = unsigned(partialKey); + for (int i = noOfChildren - 1; i >= 0; i--) { + if (keys[i] <= partialKey) { + return child[i]; + } + } + return null; + } + + @Override + public boolean isFull() { + return noOfChildren == NODE_SIZE; + } + + byte[] getKeys() { + return keys; + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/Node256.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/Node256.java new file mode 100644 index 0000000..0978487 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/Node256.java @@ -0,0 +1,140 @@ +package org.xbib.datastructures.trie.radix.adaptive; + +class Node256 extends InnerNode { + static final int NODE_SIZE = 256; + + Node256(Node48 node) { + super(node, NODE_SIZE); + assert node.isFull(); + + byte[] keyIndex = node.getKeyIndex(); + Node[] child = node.getChild(); + + for (int i = 0; i < Node48.KEY_INDEX_SIZE; i++) { + byte index = keyIndex[i]; + if (index == Node48.ABSENT) { + continue; + } + assert index >= 0 && index <= 47; + // index is byte, but gets type promoted + // https://docs.oracle.com/javase/specs/jls/se7/html/jls-10.html#jls-10.4-120 + this.child[i] = child[index]; + // update up link + replaceUplink(this, this.child[i]); + } + } + + @Override + public Node findChild(byte partialKey) { + // We treat the 8 bits as unsigned int since we've got 256 slots + int index = Byte.toUnsignedInt(partialKey); + return child[index]; + } + + @Override + public void addChild(byte partialKey, Node child) { + // addChild would never be called on a full Node256 + // since the corresponding findChild for any byte key + // would always find the byte since the Node is full. + assert !isFull(); + int index = Byte.toUnsignedInt(partialKey); + assert this.child[index] == null; + createUplink(this, child, partialKey); + this.child[index] = child; + noOfChildren++; + } + + @Override + public void replace(byte partialKey, Node newChild) { + int index = Byte.toUnsignedInt(partialKey); + assert child[index] != null; + child[index] = newChild; + createUplink(this, newChild, partialKey); + } + + @Override + public void removeChild(byte partialKey) { + int index = Byte.toUnsignedInt(partialKey); + assert child[index] != null; + removeUplink(child[index]); + child[index] = null; + noOfChildren--; + } + + @Override + public InnerNode grow() { + throw new UnsupportedOperationException("Span of ART is 8 bits, so Node256 is the largest node type."); + } + + @Override + public boolean shouldShrink() { + return noOfChildren == Node48.NODE_SIZE; + } + + @Override + public InnerNode shrink() { + assert shouldShrink(); + return new Node48(this); + } + + @Override + public Node first() { + assert noOfChildren > Node48.NODE_SIZE; + int i = 0; + while (child[i] == null) i++; + return child[i]; + } + + @Override + public Node last() { + assert noOfChildren > Node48.NODE_SIZE; + int i = NODE_SIZE - 1; + while (child[i] == null) i--; + return child[i]; + } + + @Override + public Node ceil(byte partialKey) { + for (int i = Byte.toUnsignedInt(partialKey); i < NODE_SIZE; i++) { + if (child[i] != null) { + return child[i]; + } + } + return null; + } + + @Override + public Node greater(byte partialKey) { + for (int i = Byte.toUnsignedInt(partialKey) + 1; i < NODE_SIZE; i++) { + if (child[i] != null) { + return child[i]; + } + } + return null; + } + + @Override + public Node lesser(byte partialKey) { + for (int i = Byte.toUnsignedInt(partialKey) - 1; i >= 0; i--) { + if (child[i] != null) { + return child[i]; + } + } + return null; + } + + @Override + public Node floor(byte partialKey) { + for (int i = Byte.toUnsignedInt(partialKey); i >= 0; i--) { + if (child[i] != null) { + return child[i]; + } + } + return null; + } + + @Override + public boolean isFull() { + return noOfChildren == NODE_SIZE; + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/Node4.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/Node4.java new file mode 100644 index 0000000..212ae5c --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/Node4.java @@ -0,0 +1,195 @@ +package org.xbib.datastructures.trie.radix.adaptive; + +class Node4 extends InnerNode { + + static final int NODE_SIZE = 4; + // 2^7 = 128 + private static final int BYTE_SHIFT = 1 << Byte.SIZE - 1; + // each array element would contain the partial byte key to match + // if key matches then take up the same index from the child pointer array + private final byte[] keys = new byte[NODE_SIZE]; + + Node4() { + super(NODE_SIZE); + } + + Node4(Node16 node16) { + super(node16, NODE_SIZE); + assert node16.shouldShrink(); + byte[] keys = node16.getKeys(); + Node[] child = node16.getChild(); + System.arraycopy(keys, 0, this.keys, 0, node16.noOfChildren); + System.arraycopy(child, 0, this.child, 0, node16.noOfChildren); + + // update up links + for (int i = 0; i < noOfChildren; i++) { + replaceUplink(this, this.child[i]); + } + } + + /** + * For Node4, Node16 to interpret every byte as unsigned when storing partial keys. + * Node 48, Node256 simply use {@link Byte#toUnsignedInt(byte)} + * to index into their key arrays. + */ + static byte unsigned(byte b) { + return (byte) (b ^ BYTE_SHIFT); + } + + // passed b must have been interpreted as unsigned already + // this is the reverse of unsigned + static byte signed(byte b) { + return unsigned(b); + } + + @Override + public Node findChild(byte partialKey) { + partialKey = unsigned(partialKey); + // paper does simple loop over because it's a tiny array of size 4 + for (int i = 0; i < noOfChildren; i++) { + if (keys[i] == partialKey) { + return child[i]; + } + } + return null; + } + + @Override + public void addChild(byte partialKey, Node child) { + assert !isFull(); + byte unsignedPartialKey = unsigned(partialKey); + // shift elements from this point to right by one place + // noOfChildren here would never be == Node_SIZE (since we have isFull() check) + int i = noOfChildren; + for (; i > 0 && unsignedPartialKey < keys[i - 1]; i--) { + keys[i] = keys[i - 1]; + this.child[i] = this.child[i - 1]; + } + keys[i] = unsignedPartialKey; + this.child[i] = child; + noOfChildren++; + createUplink(this, child, partialKey); + } + + @Override + public void replace(byte partialKey, Node newChild) { + byte unsignedPartialKey = unsigned(partialKey); + + int index = 0; + for (; index < noOfChildren; index++) { + if (keys[index] == unsignedPartialKey) { + break; + } + } + // replace will be called from in a state where you know partialKey entry surely exists + assert index < noOfChildren : "Partial key does not exist"; + child[index] = newChild; + createUplink(this, newChild, partialKey); + } + + @Override + public void removeChild(byte partialKey) { + partialKey = unsigned(partialKey); + int index = 0; + for (; index < noOfChildren; index++) { + if (keys[index] == partialKey) { + break; + } + } + // if this fails, the question is, how could you reach the leaf node? + // this node must've been your follow on pointer holding the partialKey + assert index < noOfChildren : "Partial key does not exist"; + removeUplink(child[index]); + for (int i = index; i < noOfChildren - 1; i++) { + keys[i] = keys[i + 1]; + child[i] = child[i + 1]; + } + child[noOfChildren - 1] = null; + noOfChildren--; + } + + @Override + public InnerNode grow() { + assert isFull(); + // grow from Node4 to Node16 + return new Node16(this); + } + + @Override + public boolean shouldShrink() { + return false; + } + + @Override + public InnerNode shrink() { + throw new UnsupportedOperationException("Node4 is smallest node type"); + } + + @Override + public Node first() { + return child[0]; + } + + @Override + public Node last() { + return child[Math.max(0, noOfChildren - 1)]; + } + + @Override + public Node ceil(byte partialKey) { + partialKey = unsigned(partialKey); + for (int i = 0; i < noOfChildren; i++) { + if (keys[i] >= partialKey) { + return child[i]; + } + } + return null; + } + + @Override + public Node greater(byte partialKey) { + partialKey = unsigned(partialKey); + for (int i = 0; i < noOfChildren; i++) { + if (keys[i] > partialKey) { + return child[i]; + } + } + return null; + } + + @Override + public Node lesser(byte partialKey) { + partialKey = unsigned(partialKey); + for (int i = noOfChildren - 1; i >= 0; i--) { + if (keys[i] < partialKey) { + return child[i]; + } + } + return null; + } + + @Override + public Node floor(byte partialKey) { + partialKey = unsigned(partialKey); + for (int i = noOfChildren - 1; i >= 0; i--) { + if (keys[i] <= partialKey) { + return child[i]; + } + } + return null; + } + + @Override + public boolean isFull() { + return noOfChildren == NODE_SIZE; + } + + byte[] getKeys() { + return keys; + } + + byte getOnlyChildKey() { + assert noOfChildren == 1; + return signed(keys[0]); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/Node48.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/Node48.java new file mode 100644 index 0000000..a10af43 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/Node48.java @@ -0,0 +1,194 @@ +package org.xbib.datastructures.trie.radix.adaptive; + +import java.util.Arrays; + +class Node48 extends InnerNode { + /* + 48 * 8 (child pointers) + 256 = 640 bytes + */ + + static final int NODE_SIZE = 48; + static final int KEY_INDEX_SIZE = 256; + // so that when you use the partial key to index into keyIndex + // and you see a -1, you know there's no mapping for this key + static final byte ABSENT = -1; + private static final int BYTE_SHIFT = 1 << Byte.SIZE - 1; + // for partial keys of one byte size, you index directly into this array to find the + // array index of the child pointer array + // the index value can only be between 0 to 47 (to index into the child pointer array) + private final byte[] keyIndex = new byte[KEY_INDEX_SIZE]; + + Node48(Node16 node) { + super(node, NODE_SIZE); + assert node.isFull(); + + Arrays.fill(keyIndex, ABSENT); + + byte[] keys = node.getKeys(); + Node[] child = node.getChild(); + + for (int i = 0; i < Node16.NODE_SIZE; i++) { + byte key = signed(keys[i]); + int index = Byte.toUnsignedInt(key); + keyIndex[index] = (byte) i; + this.child[i] = child[i]; + // update up link + replaceUplink(this, this.child[i]); + } + } + + Node48(Node256 node256) { + super(node256, NODE_SIZE); + assert node256.shouldShrink(); + Arrays.fill(keyIndex, ABSENT); + + Node[] children = node256.getChild(); + byte j = 0; + for (int i = 0; i < Node256.NODE_SIZE; i++) { + if (children[i] != null) { + keyIndex[i] = j; + child[j] = children[i]; + replaceUplink(this, child[j]); + j++; + } + } + assert j == NODE_SIZE; + } + + static byte signed(byte b) { + return unsigned(b); + } + + static byte unsigned(byte b) { + return (byte) (b ^ BYTE_SHIFT); + } + + @Override + public Node findChild(byte partialKey) { + byte index = keyIndex[Byte.toUnsignedInt(partialKey)]; + if (index == ABSENT) { + return null; + } + + assert index >= 0 && index <= 47; + return child[index]; + } + + @Override + public void addChild(byte partialKey, Node child) { + assert !isFull(); + int index = Byte.toUnsignedInt(partialKey); + assert keyIndex[index] == ABSENT; + // find a null place, left fragmented by a removeChild or has always been null + byte insertPosition = 0; + for (; this.child[insertPosition] != null && insertPosition < NODE_SIZE; insertPosition++) ; + + this.child[insertPosition] = child; + keyIndex[index] = insertPosition; + noOfChildren++; + createUplink(this, child, partialKey); + } + + @Override + public void replace(byte partialKey, Node newChild) { + byte index = keyIndex[Byte.toUnsignedInt(partialKey)]; + assert index >= 0 && index <= 47; + child[index] = newChild; + createUplink(this, newChild, partialKey); + } + + @Override + public void removeChild(byte partialKey) { + assert !shouldShrink(); + int index = Byte.toUnsignedInt(partialKey); + int pos = keyIndex[index]; + assert pos != ABSENT; + removeUplink(child[pos]); + child[pos] = null; // fragment + keyIndex[index] = ABSENT; + noOfChildren--; + } + + @Override + public InnerNode grow() { + assert isFull(); + return new Node256(this); + } + + @Override + public boolean shouldShrink() { + return noOfChildren == Node16.NODE_SIZE; + } + + @Override + public InnerNode shrink() { + assert shouldShrink(); + return new Node16(this); + } + + @Override + public Node first() { + assert noOfChildren > Node16.NODE_SIZE; + int i = 0; + while (keyIndex[i] == ABSENT) i++; + return child[keyIndex[i]]; + } + + @Override + public Node last() { + assert noOfChildren > Node16.NODE_SIZE; + int i = KEY_INDEX_SIZE - 1; + while (keyIndex[i] == ABSENT) i--; + return child[keyIndex[i]]; + } + + @Override + public boolean isFull() { + return noOfChildren == NODE_SIZE; + } + + @Override + public Node ceil(byte partialKey) { + for (int i = Byte.toUnsignedInt(partialKey); i < KEY_INDEX_SIZE; i++) { + if (keyIndex[i] != ABSENT) { + return child[keyIndex[i]]; + } + } + return null; + } + + @Override + public Node greater(byte partialKey) { + for (int i = Byte.toUnsignedInt(partialKey) + 1; i < KEY_INDEX_SIZE; i++) { + if (keyIndex[i] != ABSENT) { + return child[keyIndex[i]]; + } + } + return null; + } + + @Override + public Node lesser(byte partialKey) { + for (int i = Byte.toUnsignedInt(partialKey) - 1; i >= 0; i--) { + if (keyIndex[i] != ABSENT) { + return child[keyIndex[i]]; + } + } + return null; + } + + @Override + public Node floor(byte partialKey) { + for (int i = Byte.toUnsignedInt(partialKey); i >= 0; i--) { + if (keyIndex[i] != ABSENT) { + return child[keyIndex[i]]; + } + } + return null; + } + + + byte[] getKeyIndex() { + return keyIndex; + } +} \ No newline at end of file diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/PrivateEntryIterator.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/PrivateEntryIterator.java new file mode 100644 index 0000000..fdecd44 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/PrivateEntryIterator.java @@ -0,0 +1,72 @@ +package org.xbib.datastructures.trie.radix.adaptive; + +import java.util.ConcurrentModificationException; +import java.util.Iterator; +import java.util.NoSuchElementException; + +/** + * Base class for AdaptiveRadixTree Iterators + * note: taken from TreeMap + */ +abstract class PrivateEntryIterator implements Iterator { + private final AdaptiveRadixTree m; + private LeafNode next; + private LeafNode lastReturned; + private int expectedModCount; + + PrivateEntryIterator(AdaptiveRadixTree m, LeafNode first) { + expectedModCount = m.getModCount(); + lastReturned = null; + next = first; + this.m = m; + } + + public final boolean hasNext() { + return next != null; + } + + final LeafNode nextEntry() { + LeafNode e = next; + if (e == null) + throw new NoSuchElementException(); + if (m.getModCount() != expectedModCount) + throw new ConcurrentModificationException(); + next = AdaptiveRadixTree.successor(e); + lastReturned = e; + return e; + } + + final LeafNode prevEntry() { + LeafNode e = next; + if (e == null) + throw new NoSuchElementException(); + if (m.getModCount() != expectedModCount) + throw new ConcurrentModificationException(); + next = AdaptiveRadixTree.predecessor(e); + lastReturned = e; + return e; + } + + public void remove() { + if (lastReturned == null) + throw new IllegalStateException(); + if (m.getModCount() != expectedModCount) + throw new ConcurrentModificationException(); + /* + next already points to the next leaf node (that might be a sibling to this lastReturned). + if next is the only sibling left, then the parent gets path compressed. + BUT the reference that next holds to the sibling leaf node remains the same, just it's parent changes. + Therefore at all times, next is a valid reference to be simply returned on the + next call to next(). + Is there any scenario in which the next leaf pointer gets changed and iterator next + points to a stale leaf? + No. + Infact the LeafNode ctor is only ever called in a put and that too for the newer leaf + to be created/entered. + So references to an existing LeafNode won't get stale. + */ + m.deleteEntry(lastReturned); + expectedModCount = m.getModCount(); + lastReturned = null; + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/ValueIterator.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/ValueIterator.java new file mode 100644 index 0000000..8faaddf --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/ValueIterator.java @@ -0,0 +1,12 @@ +package org.xbib.datastructures.trie.radix.adaptive; + +final class ValueIterator extends PrivateEntryIterator { + ValueIterator(AdaptiveRadixTree m, LeafNode first) { + super(m, first); + } + + @Override + public V next() { + return nextEntry().getValue(); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/Values.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/Values.java new file mode 100644 index 0000000..02222e5 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/Values.java @@ -0,0 +1,51 @@ +package org.xbib.datastructures.trie.radix.adaptive; + +import java.util.AbstractCollection; +import java.util.Iterator; + +/** + * Contains all stuff borrowed from TreeMap + * such methods/utilities should be taken out and made a library of their own + * so any implementation of NavigableMap can reuse it, while the implementation + * provides certain primitive methods (getEntry, successor, predecessor, etc) + */ +class Values extends AbstractCollection { + private final AdaptiveRadixTree m; + + Values(AdaptiveRadixTree m) { + this.m = m; + } + + @Override + public Iterator iterator() { + return m.valueIterator(); + } + + @Override + public int size() { + return m.size(); + } + + @SuppressWarnings("unchecked") + @Override + public boolean contains(Object o) { + return m.containsValue((V) o); + } + + @Override + public boolean remove(Object o) { + for (LeafNode e = m.getFirstEntry(); e != null; e = AdaptiveRadixTree.successor(e)) { + if (AdaptiveRadixTree.valEquals(e.getValue(), o)) { + m.deleteEntry(e); + return true; + } + } + return false; + } + + @Override + public void clear() { + m.clear(); + } + +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ArrayChildPtr.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ArrayChildPtr.java new file mode 100644 index 0000000..80a3bde --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ArrayChildPtr.java @@ -0,0 +1,21 @@ +package org.xbib.datastructures.trie.radix.adaptive.persistent; + +class ArrayChildPtr extends ChildPtr { + private final int i; + private final Node[] children; + + public ArrayChildPtr(Node[] children, int i) { + this.children = children; + this.i = i; + } + + @Override + public Node get() { + return children[i]; + } + + @Override + public void set(Node n) { + children[i] = n; + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ArtIterator.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ArtIterator.java new file mode 100644 index 0000000..5fabaa8 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ArtIterator.java @@ -0,0 +1,79 @@ +package org.xbib.datastructures.trie.radix.adaptive.persistent; + +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.Iterator; +import java.util.NoSuchElementException; + +class ArtIterator implements Iterator> { + private final Deque elemStack = new ArrayDeque(); + private final Deque idxStack = new ArrayDeque(); + + public ArtIterator(Node root) { + if (root != null) { + elemStack.push(root); + idxStack.push(0); + maybeAdvance(); + } + } + + @Override + public boolean hasNext() { + return !elemStack.isEmpty(); + } + + @Override + public Tuple2 next() { + if (hasNext()) { + Leaf leaf = (Leaf) elemStack.peek(); + byte[] key = leaf.key; + Object value = leaf.value; + + // Mark the leaf as consumed + idxStack.push(idxStack.pop() + 1); + + maybeAdvance(); + return new Tuple2(key, value); + } else { + throw new NoSuchElementException("end of iterator"); + } + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + // Postcondition: if the stack is nonempty, the top of the stack must contain a leaf + private void maybeAdvance() { + // Pop exhausted nodes + while (!elemStack.isEmpty() && elemStack.peek().exhausted(idxStack.peek())) { + elemStack.pop(); + idxStack.pop(); + + if (!elemStack.isEmpty()) { + // Move on by advancing the exhausted node's parent + idxStack.push(idxStack.pop() + 1); + } + } + + if (!elemStack.isEmpty()) { + // Descend to the next leaf node element + while (true) { + if (elemStack.peek() instanceof Leaf) { + // Done - reached the next element + break; + } else { + // Advance to the next child of this node + ArtNode cur = (ArtNode) elemStack.peek(); + idxStack.push(cur.nextChildAtOrAfter(idxStack.pop())); + Node child = cur.childAt(idxStack.peek()); + + // Push it onto the stack + elemStack.push(child); + idxStack.push(0); + } + } + } + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ArtNode.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ArtNode.java new file mode 100644 index 0000000..bae44c4 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ArtNode.java @@ -0,0 +1,174 @@ +package org.xbib.datastructures.trie.radix.adaptive.persistent; + +abstract class ArtNode extends Node { + final byte[] partial = new byte[Node.MAX_PREFIX_LEN]; + int num_children = 0; + int partial_len = 0; + + public ArtNode() { + super(); + } + + public ArtNode(final ArtNode other) { + super(); + this.num_children = other.num_children; + this.partial_len = other.partial_len; + System.arraycopy(other.partial, 0, + partial, 0, + Math.min(Node.MAX_PREFIX_LEN, partial_len)); + } + + /** + * Returns the number of prefix characters shared between + * the key and node. + */ + public int check_prefix(final byte[] key, int depth) { + int max_cmp = Math.min(Math.min(partial_len, Node.MAX_PREFIX_LEN), key.length - depth); + int idx; + for (idx = 0; idx < max_cmp; idx++) { + if (partial[idx] != key[depth + idx]) + return idx; + } + return idx; + } + + /** + * Calculates the index at which the prefixes mismatch + */ + public int prefix_mismatch(final byte[] key, int depth) { + int max_cmp = Math.min(Math.min(Node.MAX_PREFIX_LEN, partial_len), key.length - depth); + int idx; + for (idx = 0; idx < max_cmp; idx++) { + if (partial[idx] != key[depth + idx]) + return idx; + } + + // If the prefix is short we can avoid finding a leaf + if (partial_len > Node.MAX_PREFIX_LEN) { + // Prefix is longer than what we've checked, find a leaf + final Leaf l = this.minimum(); + max_cmp = Math.min(l.key.length, key.length) - depth; + for (; idx < max_cmp; idx++) { + if (l.key[idx + depth] != key[depth + idx]) + return idx; + } + } + return idx; + } + + public abstract ChildPtr find_child(byte c); + + public abstract void add_child(ChildPtr ref, byte c, Node child); + + public abstract void remove_child(ChildPtr ref, byte c); + + // Precondition: isLastChild(i) == false + public abstract int nextChildAtOrAfter(int i); + + public abstract Node childAt(int i); + + @Override + public boolean insert(ChildPtr ref, final byte[] key, Object value, + int depth, boolean force_clone) { + boolean do_clone = force_clone || this.refcount > 1; + + // Check if given node has a prefix + if (partial_len > 0) { + // Determine if the prefixes differ, since we need to split + int prefix_diff = prefix_mismatch(key, depth); + if (prefix_diff >= partial_len) { + depth += partial_len; + } else { + // Create a new node + ArtNode4 result = new ArtNode4(); + Node ref_old = ref.get(); + ref.change_no_decrement(result); // don't decrement yet, because doing so might destroy self + result.partial_len = prefix_diff; + System.arraycopy(partial, 0, + result.partial, 0, + Math.min(Node.MAX_PREFIX_LEN, prefix_diff)); + + // Adjust the prefix of the old node + ArtNode this_writable = do_clone ? (ArtNode) this.n_clone() : this; + if (partial_len <= Node.MAX_PREFIX_LEN) { + result.add_child(ref, this_writable.partial[prefix_diff], this_writable); + this_writable.partial_len -= (prefix_diff + 1); + System.arraycopy(this_writable.partial, prefix_diff + 1, + this_writable.partial, 0, + Math.min(Node.MAX_PREFIX_LEN, this_writable.partial_len)); + } else { + this_writable.partial_len -= (prefix_diff + 1); + final Leaf l = this.minimum(); + result.add_child(ref, l.key[depth + prefix_diff], this_writable); + System.arraycopy(l.key, depth + prefix_diff + 1, + this_writable.partial, 0, + Math.min(Node.MAX_PREFIX_LEN, this_writable.partial_len)); + } + + // Insert the new leaf + Leaf l = new Leaf(key, value); + result.add_child(ref, key[depth + prefix_diff], l); + + ref_old.decrement_refcount(); + + return true; + } + } + + // Clone self if necessary + ArtNode this_writable = do_clone ? (ArtNode) this.n_clone() : this; + if (do_clone) { + ref.change(this_writable); + } + // Do the insert, either in a child (if a matching child already exists) or in self + ChildPtr child = this_writable.find_child(key[depth]); + if (child != null) { + return Node.insert(child.get(), child, key, value, depth + 1, force_clone); + } else { + // No child, node goes within us + Leaf l = new Leaf(key, value); + this_writable.add_child(ref, key[depth], l); + // If `this` was full and `do_clone` is true, we will clone a full node + // and then immediately delete the clone in favor of a larger node. + // TODO: avoid this + return true; + } + } + + @Override + public boolean delete(ChildPtr ref, final byte[] key, int depth, + boolean force_clone) { + // Bail if the prefix does not match + if (partial_len > 0) { + int prefix_len = check_prefix(key, depth); + if (prefix_len != Math.min(MAX_PREFIX_LEN, partial_len)) { + return false; + } + depth += partial_len; + } + + boolean do_clone = force_clone || this.refcount > 1; + + // Clone self if necessary. Note: this allocation will be wasted if the + // key does not exist in the child's subtree + ArtNode this_writable = do_clone ? (ArtNode) this.n_clone() : this; + + // Find child node + ChildPtr child = this_writable.find_child(key[depth]); + if (child == null) return false; // when translating to C++, make sure to delete this_writable + + if (do_clone) { + ref.change(this_writable); + } + + boolean child_is_leaf = child.get() instanceof Leaf; + boolean do_delete = child.get().delete(child, key, depth + 1, do_clone); + + if (do_delete && child_is_leaf) { + // The leaf to delete is our child, so we must remove it + this_writable.remove_child(ref, key[depth]); + } + + return do_delete; + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ArtNode16.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ArtNode16.java new file mode 100644 index 0000000..db9640e --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ArtNode16.java @@ -0,0 +1,168 @@ +package org.xbib.datastructures.trie.radix.adaptive.persistent; + +class ArtNode16 extends ArtNode { + public static int count; + byte[] keys = new byte[16]; + Node[] children = new Node[16]; + + public ArtNode16() { + super(); + count++; + } + + public ArtNode16(final ArtNode16 other) { + super(other); + System.arraycopy(other.keys, 0, keys, 0, other.num_children); + for (int i = 0; i < other.num_children; i++) { + children[i] = other.children[i]; + children[i].refcount++; + } + count++; + } + + public ArtNode16(final ArtNode4 other) { + this(); + // ArtNode + this.num_children = other.num_children; + this.partial_len = other.partial_len; + System.arraycopy(other.partial, 0, + this.partial, 0, + Math.min(MAX_PREFIX_LEN, this.partial_len)); + // ArtNode16 from ArtNode4 + System.arraycopy(other.keys, 0, keys, 0, this.num_children); + for (int i = 0; i < this.num_children; i++) { + children[i] = other.children[i]; + children[i].refcount++; + } + } + + public ArtNode16(final ArtNode48 other) { + this(); + assert (other.num_children <= 16); + // ArtNode + this.num_children = other.num_children; + this.partial_len = other.partial_len; + System.arraycopy(other.partial, 0, + this.partial, 0, + Math.min(MAX_PREFIX_LEN, this.partial_len)); + // ArtNode16 from ArtNode48 + int child = 0; + for (int i = 0; i < 256; i++) { + int pos = to_uint(other.keys[i]); + if (pos != 0) { + keys[child] = (byte) i; + children[child] = other.children[pos - 1]; + children[child].refcount++; + child++; + } + } + } + + @Override + public Node n_clone() { + return new ArtNode16(this); + } + + @Override + public ChildPtr find_child(byte c) { + // TODO: avoid linear search using intrinsics if available + for (int i = 0; i < this.num_children; i++) { + if (keys[i] == c) { + return new ArrayChildPtr(children, i); + } + } + return null; + } + + @Override + public Leaf minimum() { + return Node.minimum(children[0]); + } + + @Override + public void add_child(ChildPtr ref, byte c, Node child) { + assert (refcount <= 1); + + if (this.num_children < 16) { + // TODO: avoid linear search using intrinsics if available + int idx; + for (idx = 0; idx < this.num_children; idx++) { + if (to_uint(c) < to_uint(keys[idx])) break; + } + + // Shift to make room + System.arraycopy(this.keys, idx, this.keys, idx + 1, this.num_children - idx); + System.arraycopy(this.children, idx, this.children, idx + 1, this.num_children - idx); + + // Insert element + this.keys[idx] = c; + this.children[idx] = child; + child.refcount++; + this.num_children++; + } else { + // Copy the node16 into a new node48 + ArtNode48 result = new ArtNode48(this); + // Update the parent pointer to the node48 + ref.change(result); + // Insert the element into the node48 instead + result.add_child(ref, c, child); + } + } + + @Override + public void remove_child(ChildPtr ref, byte c) { + assert (refcount <= 1); + + int idx; + for (idx = 0; idx < this.num_children; idx++) { + if (c == keys[idx]) break; + } + if (idx == this.num_children) return; + + children[idx].decrement_refcount(); + + // Shift to fill the hole + System.arraycopy(this.keys, idx + 1, this.keys, idx, this.num_children - idx - 1); + System.arraycopy(this.children, idx + 1, this.children, idx, this.num_children - idx - 1); + this.num_children--; + + if (num_children == 3) { + ArtNode4 result = new ArtNode4(this); + ref.change(result); + } + } + + @Override + public boolean exhausted(int i) { + return i >= num_children; + } + + @Override + public int nextChildAtOrAfter(int i) { + return i; + } + + @Override + public Node childAt(int i) { + return children[i]; + } + + @Override + public int decrement_refcount() { + if (--this.refcount <= 0) { + int freed = 0; + for (int i = 0; i < this.num_children; i++) { + freed += children[i].decrement_refcount(); + } + count--; + // delete this; + return freed + 232; + // object size (8) + refcount (4) + + // num_children int (4) + partial_len int (4) + + // pointer to partial array (8) + partial array size (8+4+1*MAX_PREFIX_LEN) + // pointer to key array (8) + key array size (8+4+1*16) + + // pointer to children array (8) + children array size (8+4+8*16) + } + return 0; + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ArtNode256.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ArtNode256.java new file mode 100644 index 0000000..bdc432d --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ArtNode256.java @@ -0,0 +1,125 @@ +package org.xbib.datastructures.trie.radix.adaptive.persistent; + +class ArtNode256 extends ArtNode { + public static int count; + Node[] children = new Node[256]; + + public ArtNode256() { + super(); + count++; + } + + public ArtNode256(final ArtNode256 other) { + super(other); + for (int i = 0; i < 256; i++) { + children[i] = other.children[i]; + if (children[i] != null) { + children[i].refcount++; + } + } + count++; + } + + public ArtNode256(final ArtNode48 other) { + this(); + // ArtNode + this.num_children = other.num_children; + this.partial_len = other.partial_len; + System.arraycopy(other.partial, 0, this.partial, 0, Math.min(MAX_PREFIX_LEN, this.partial_len)); + // ArtNode256 from ArtNode48 + for (int i = 0; i < 256; i++) { + if (other.keys[i] != 0) { + children[i] = other.children[to_uint(other.keys[i]) - 1]; + children[i].refcount++; + } + } + } + + @Override + public Node n_clone() { + return new ArtNode256(this); + } + + @Override + public ChildPtr find_child(byte c) { + if (children[to_uint(c)] != null) return new ArrayChildPtr(children, to_uint(c)); + return null; + } + + @Override + public Leaf minimum() { + int idx = 0; + while (children[idx] == null) idx++; + return Node.minimum(children[idx]); + } + + @Override + public void add_child(ChildPtr ref, byte c, Node child) { + assert (refcount <= 1); + + this.num_children++; + this.children[to_uint(c)] = child; + child.refcount++; + } + + @Override + public void remove_child(ChildPtr ref, byte c) { + assert (refcount <= 1); + + children[to_uint(c)].decrement_refcount(); + children[to_uint(c)] = null; + num_children--; + + if (num_children == 37) { + ArtNode48 result = new ArtNode48(this); + ref.change(result); + } + } + + @Override + public boolean exhausted(int c) { + for (int i = c; i < 256; i++) { + if (children[i] != null) { + return false; + } + } + return true; + } + + @Override + public int nextChildAtOrAfter(int c) { + int pos = c; + for (; pos < 256; pos++) { + if (children[pos] != null) { + break; + } + } + return pos; + } + + @Override + public Node childAt(int pos) { + return children[pos]; + } + + @Override + public int decrement_refcount() { + if (--this.refcount <= 0) { + int freed = 0; + for (int i = 0; i < 256; i++) { + if (children[i] != null) { + freed += children[i].decrement_refcount(); + } + } + count--; + // delete this; + return freed + 2120; + // object size (8) + refcount (4) + + // num_children int (4) + partial_len int (4) + + // pointer to partial array (8) + partial array size (8+4+1*MAX_PREFIX_LEN) + // pointer to children array (8) + children array size (8+4+8*256) + + // padding (4) + } + return 0; + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ArtNode4.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ArtNode4.java new file mode 100644 index 0000000..e95b7f5 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ArtNode4.java @@ -0,0 +1,169 @@ +package org.xbib.datastructures.trie.radix.adaptive.persistent; + +class ArtNode4 extends ArtNode { + public static int count; + byte[] keys = new byte[4]; + Node[] children = new Node[4]; + + public ArtNode4() { + super(); + count++; + } + + public ArtNode4(final ArtNode4 other) { + super(other); + System.arraycopy(other.keys, 0, keys, 0, other.num_children); + for (int i = 0; i < other.num_children; i++) { + children[i] = other.children[i]; + children[i].refcount++; + } + count++; + } + + public ArtNode4(final ArtNode16 other) { + this(); + assert (other.num_children <= 4); + // ArtNode + this.num_children = other.num_children; + this.partial_len = other.partial_len; + System.arraycopy(other.partial, 0, + this.partial, 0, + Math.min(MAX_PREFIX_LEN, this.partial_len)); + // ArtNode4 from ArtNode16 + System.arraycopy(other.keys, 0, keys, 0, this.num_children); + for (int i = 0; i < this.num_children; i++) { + children[i] = other.children[i]; + children[i].refcount++; + } + } + + @Override + public Node n_clone() { + return new ArtNode4(this); + } + + @Override + public ChildPtr find_child(byte c) { + for (int i = 0; i < this.num_children; i++) { + if (keys[i] == c) { + return new ArrayChildPtr(children, i); + } + } + return null; + } + + @Override + public Leaf minimum() { + return Node.minimum(children[0]); + } + + @Override + public void add_child(ChildPtr ref, byte c, Node child) { + assert (refcount <= 1); + + if (this.num_children < 4) { + int idx; + for (idx = 0; idx < this.num_children; idx++) { + if (to_uint(c) < to_uint(keys[idx])) break; + } + + // Shift to make room + System.arraycopy(this.keys, idx, this.keys, idx + 1, this.num_children - idx); + System.arraycopy(this.children, idx, this.children, idx + 1, this.num_children - idx); + + // Insert element + this.keys[idx] = c; + this.children[idx] = child; + child.refcount++; + this.num_children++; + } else { + // Copy the node4 into a new node16 + ArtNode16 result = new ArtNode16(this); + // Update the parent pointer to the node16 + ref.change(result); + // Insert the element into the node16 instead + result.add_child(ref, c, child); + } + } + + @Override + public void remove_child(ChildPtr ref, byte c) { + assert (refcount <= 1); + + int idx; + for (idx = 0; idx < this.num_children; idx++) { + if (c == keys[idx]) break; + } + if (idx == this.num_children) return; + + assert (children[idx] instanceof Leaf); + children[idx].decrement_refcount(); + + // Shift to fill the hole + System.arraycopy(this.keys, idx + 1, this.keys, idx, this.num_children - idx - 1); + System.arraycopy(this.children, idx + 1, this.children, idx, this.num_children - idx - 1); + this.num_children--; + + // Remove nodes with only a single child + if (num_children == 1) { + Node child = children[0]; + if (!(child instanceof Leaf)) { + if (child.refcount > 1) { + child = child.n_clone(); + } + ArtNode an_child = (ArtNode) child; + // Concatenate the prefixes + int prefix = partial_len; + if (prefix < MAX_PREFIX_LEN) { + partial[prefix] = keys[0]; + prefix++; + } + if (prefix < MAX_PREFIX_LEN) { + int sub_prefix = Math.min(an_child.partial_len, MAX_PREFIX_LEN - prefix); + System.arraycopy(an_child.partial, 0, partial, prefix, sub_prefix); + prefix += sub_prefix; + } + + // Store the prefix in the child + System.arraycopy(partial, 0, an_child.partial, 0, Math.min(prefix, MAX_PREFIX_LEN)); + an_child.partial_len += partial_len + 1; + } + ref.change(child); + } + } + + @Override + public boolean exhausted(int i) { + return i >= num_children; + } + + @Override + public int nextChildAtOrAfter(int i) { + return i; + } + + @Override + public Node childAt(int i) { + return children[i]; + } + + @Override + public int decrement_refcount() { + if (--this.refcount <= 0) { + int freed = 0; + for (int i = 0; i < this.num_children; i++) { + freed += children[i].decrement_refcount(); + } + count--; + // delete this; + return freed + 128; + // object size (8) + refcount (4) + + // num_children int (4) + partial_len int (4) + + // pointer to partial array (8) + partial array size (8+4+1*MAX_PREFIX_LEN) + // pointer to key array (8) + key array size (8+4+1*4) + + // pointer to children array (8) + children array size (8+4+8*4) + + // padding (4) + } + return 0; + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ArtNode48.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ArtNode48.java new file mode 100644 index 0000000..9930e63 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ArtNode48.java @@ -0,0 +1,173 @@ +package org.xbib.datastructures.trie.radix.adaptive.persistent; + +class ArtNode48 extends ArtNode { + public static int count; + byte[] keys = new byte[256]; + Node[] children = new Node[48]; + + public ArtNode48() { + super(); + count++; + } + + public ArtNode48(final ArtNode48 other) { + super(other); + System.arraycopy(other.keys, 0, keys, 0, 256); + // Copy the children. We have to look at all elements of `children` + // rather than just the first num_children elements because `children` + // may not be contiguous due to deletion + for (int i = 0; i < 48; i++) { + children[i] = other.children[i]; + if (children[i] != null) { + children[i].refcount++; + } + } + count++; + } + + public ArtNode48(final ArtNode16 other) { + this(); + // ArtNode + this.num_children = other.num_children; + this.partial_len = other.partial_len; + System.arraycopy(other.partial, 0, this.partial, 0, + Math.min(MAX_PREFIX_LEN, this.partial_len)); + + // ArtNode48 from ArtNode16 + for (int i = 0; i < this.num_children; i++) { + keys[to_uint(other.keys[i])] = (byte) (i + 1); + children[i] = other.children[i]; + children[i].refcount++; + } + } + + public ArtNode48(final ArtNode256 other) { + this(); + assert (other.num_children <= 48); + // ArtNode + this.num_children = other.num_children; + this.partial_len = other.partial_len; + System.arraycopy(other.partial, 0, this.partial, 0, + Math.min(MAX_PREFIX_LEN, this.partial_len)); + + // ArtNode48 from ArtNode256 + int pos = 0; + for (int i = 0; i < 256; i++) { + if (other.children[i] != null) { + keys[i] = (byte) (pos + 1); + children[pos] = other.children[i]; + children[pos].refcount++; + pos++; + } + } + } + + @Override + public Node n_clone() { + return new ArtNode48(this); + } + + @Override + public ChildPtr find_child(byte c) { + int idx = to_uint(keys[to_uint(c)]); + if (idx != 0) return new ArrayChildPtr(children, idx - 1); + return null; + } + + @Override + public Leaf minimum() { + int idx = 0; + while (keys[idx] == 0) idx++; + Node child = children[to_uint(keys[idx]) - 1]; + return Node.minimum(child); + } + + @Override + public void add_child(ChildPtr ref, byte c, Node child) { + assert (refcount <= 1); + + if (this.num_children < 48) { + // Have to do a linear scan because deletion may create holes in + // children array + int pos = 0; + while (children[pos] != null) pos++; + + this.children[pos] = child; + child.refcount++; + this.keys[to_uint(c)] = (byte) (pos + 1); + this.num_children++; + } else { + // Copy the node48 into a new node256 + ArtNode256 result = new ArtNode256(this); + // Update the parent pointer to the node256 + ref.change(result); + // Insert the element into the node256 instead + result.add_child(ref, c, child); + } + } + + @Override + public void remove_child(ChildPtr ref, byte c) { + assert (refcount <= 1); + + // Delete the child, leaving a hole in children. We can't shift children + // because that would require decrementing many elements of keys + int pos = to_uint(keys[to_uint(c)]); + keys[to_uint(c)] = 0; + children[pos - 1].decrement_refcount(); + children[pos - 1] = null; + num_children--; + + if (num_children == 12) { + ArtNode16 result = new ArtNode16(this); + ref.change(result); + } + } + + @Override + public boolean exhausted(int c) { + for (int i = c; i < 256; i++) { + if (keys[i] != 0) { + return false; + } + } + return true; + } + + @Override + public int nextChildAtOrAfter(int c) { + int pos = c; + for (; pos < 256; pos++) { + if (keys[pos] != 0) { + break; + } + } + return pos; + } + + @Override + public Node childAt(int c) { + return children[to_uint(keys[c]) - 1]; + } + + @Override + public int decrement_refcount() { + if (--this.refcount <= 0) { + int freed = 0; + for (int i = 0; i < this.num_children; i++) { + if (children[i] != null) { + freed += children[i].decrement_refcount(); + } + } + count--; + // delete this; + return freed + 728; + // object size (8) + refcount (4) + + // num_children int (4) + partial_len int (4) + + // pointer to partial array (8) + partial array size (8+4+1*MAX_PREFIX_LEN) + // pointer to key array (8) + key array size (8+4+1*256) + + // pointer to children array (8) + children array size (8+4+8*48) + } + return 0; + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ArtTree.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ArtTree.java new file mode 100644 index 0000000..83d1b93 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ArtTree.java @@ -0,0 +1,156 @@ +package org.xbib.datastructures.trie.radix.adaptive.persistent; + +import java.util.Iterator; + +public class ArtTree extends ChildPtr { + Node root = null; + long num_elements = 0; + + public ArtTree() { + } + + public ArtTree(final ArtTree other) { + root = other.root; + num_elements = other.num_elements; + } + + public ArtTree snapshot() { + ArtTree b = new ArtTree(); + if (root != null) { + b.root = Node.n_clone(root); + b.root.refcount++; + } + b.num_elements = num_elements; + return b; + } + + @Override + Node get() { + return root; + } + + @Override + void set(Node n) { + root = n; + } + + public Object search(final byte[] key) { + Node n = root; + int prefix_len, depth = 0; + while (n != null) { + if (n instanceof Leaf) { + Leaf l = (Leaf) n; + // Check if the expanded path matches + if (l.matches(key)) { + return l.value; + } else { + return null; + } + } else { + ArtNode an = (ArtNode) (n); + + // Bail if the prefix does not match + if (an.partial_len > 0) { + prefix_len = an.check_prefix(key, depth); + if (prefix_len != Math.min(Node.MAX_PREFIX_LEN, an.partial_len)) { + return null; + } + depth += an.partial_len; + } + + if (depth >= key.length) return null; + + // Recursively search + ChildPtr child = an.find_child(key[depth]); + n = (child != null) ? child.get() : null; + depth++; + } + } + return null; + } + + public void insert(final byte[] key, Object value) throws UnsupportedOperationException { + if (Node.insert(root, this, key, value, 0, false)) num_elements++; + } + + public void delete(final byte[] key) { + if (root != null) { + boolean child_is_leaf = root instanceof Leaf; + boolean do_delete = root.delete(this, key, 0, false); + if (do_delete) { + num_elements--; + if (child_is_leaf) { + // The leaf to delete is the root, so we must remove it + root = null; + } + } + } + } + + public Iterator> iterator() { + return new ArtIterator(root); + } + + public Iterator> prefixIterator(final byte[] prefix) { + // Find the root node for the prefix + Node n = root; + int prefix_len, depth = 0; + while (n != null) { + if (n instanceof Leaf) { + Leaf l = (Leaf) n; + // Check if the expanded path matches + if (l.prefix_matches(prefix)) { + return new ArtIterator(l); + } else { + return new ArtIterator(null); + } + } else { + if (depth == prefix.length) { + // If we have reached appropriate depth, return the iterator + if (n.minimum().prefix_matches(prefix)) { + return new ArtIterator(n); + } else { + return new ArtIterator(null); + } + } else { + ArtNode an = (ArtNode) (n); + + // Bail if the prefix does not match + if (an.partial_len > 0) { + prefix_len = an.prefix_mismatch(prefix, depth); + if (prefix_len == 0) { + // No match, return empty + return new ArtIterator(null); + } else if (depth + prefix_len == prefix.length) { + // Prefix match, return iterator + return new ArtIterator(n); + } else { + // Full match, go deeper + depth += an.partial_len; + } + } + + // Recursively search + ChildPtr child = an.find_child(prefix[depth]); + n = (child != null) ? child.get() : null; + depth++; + } + } + } + return new ArtIterator(null); + } + + public long size() { + return num_elements; + } + + public int destroy() { + if (root != null) { + int result = root.decrement_refcount(); + root = null; + return result; + } else { + return 0; + } + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ChildPtr.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ChildPtr.java new file mode 100644 index 0000000..7c90409 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/ChildPtr.java @@ -0,0 +1,22 @@ +package org.xbib.datastructures.trie.radix.adaptive.persistent; + +abstract class ChildPtr { + abstract Node get(); + + abstract void set(Node n); + + void change(Node n) { + // First increment the refcount of the new node, in case it would + // otherwise have been deleted by the decrement of the old node + n.refcount++; + if (get() != null) { + get().decrement_refcount(); + } + set(n); + } + + void change_no_decrement(Node n) { + n.refcount++; + set(n); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/Leaf.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/Leaf.java new file mode 100644 index 0000000..30590aa --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/Leaf.java @@ -0,0 +1,136 @@ +package org.xbib.datastructures.trie.radix.adaptive.persistent; + +class Leaf extends Node { + public static int count; + final byte[] key; + Object value; + + public Leaf(final byte[] key, Object value) { + super(); + this.key = key; + this.value = value; + count++; + } + + public Leaf(final Leaf other) { + super(); + this.key = other.key; + this.value = other.value; + count++; + } + + @Override + public Node n_clone() { + return new Leaf(this); + } + + public boolean matches(final byte[] key) { + if (this.key.length != key.length) return false; + for (int i = 0; i < key.length; i++) { + if (this.key[i] != key[i]) { + return false; + } + } + return true; + } + + public boolean prefix_matches(final byte[] prefix) { + if (this.key.length < prefix.length) return false; + for (int i = 0; i < prefix.length; i++) { + if (this.key[i] != prefix[i]) { + return false; + } + } + return true; + } + + @Override + public Leaf minimum() { + return this; + } + + public int longest_common_prefix(Leaf other, int depth) { + int max_cmp = Math.min(key.length, other.key.length) - depth; + int idx; + for (idx = 0; idx < max_cmp; idx++) { + if (key[depth + idx] != other.key[depth + idx]) { + return idx; + } + } + return idx; + } + + @Override + public boolean insert(ChildPtr ref, final byte[] key, Object value, + int depth, boolean force_clone) throws UnsupportedOperationException { + boolean clone = force_clone || this.refcount > 1; + if (matches(key)) { + if (clone) { + // Updating an existing value, but need to create a new leaf to + // reflect the change + ref.change(new Leaf(key, value)); + } else { + // Updating an existing value, and safe to make the change in + // place + this.value = value; + } + return false; + } else { + // New value + + // Create a new leaf + Leaf l2 = new Leaf(key, value); + + // Determine longest prefix + int longest_prefix = longest_common_prefix(l2, depth); + if (depth + longest_prefix >= this.key.length || + depth + longest_prefix >= key.length) { + throw new UnsupportedOperationException("keys cannot be prefixes of other keys"); + } + + // Split the current leaf into a node4 + ArtNode4 result = new ArtNode4(); + result.partial_len = longest_prefix; + Node ref_old = ref.get(); + ref.change_no_decrement(result); + + System.arraycopy(key, depth, + result.partial, 0, + Math.min(Node.MAX_PREFIX_LEN, longest_prefix)); + // Add the leafs to the new node4 + result.add_child(ref, this.key[depth + longest_prefix], this); + result.add_child(ref, l2.key[depth + longest_prefix], l2); + + ref_old.decrement_refcount(); + + // TODO: avoid the increment to self immediately followed by decrement + + return true; + } + } + + @Override + public boolean delete(ChildPtr ref, final byte[] key, int depth, + boolean force_clone) { + return matches(key); + } + + @Override + public boolean exhausted(int i) { + return i > 0; + } + + @Override + public int decrement_refcount() { + if (--this.refcount <= 0) { + count--; + // delete this; + // Don't delete the actual key or value because they may be used + // elsewhere + return 32; + // object size (8) + refcount (4) + pointer to key array (8) + + // pointer to value (8) + padding (4) + } + return 0; + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/Node.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/Node.java new file mode 100644 index 0000000..dd5d936 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/Node.java @@ -0,0 +1,54 @@ +package org.xbib.datastructures.trie.radix.adaptive.persistent; + +abstract class Node { + static final int MAX_PREFIX_LEN = 8; + int refcount; + + public Node() { + refcount = 0; + } + + public static Node n_clone(Node n) { + if (n == null) return null; + else return n.n_clone(); + } + + public static Leaf minimum(Node n) { + if (n == null) return null; + else return n.minimum(); + } + + public static boolean insert(Node n, ChildPtr ref, final byte[] key, Object value, int depth, + boolean force_clone) { + // If we are at a NULL node, inject a leaf + if (n == null) { + ref.change(new Leaf(key, value)); + return true; + } else { + return n.insert(ref, key, value, depth, force_clone); + } + } + + public static boolean exhausted(Node n, int i) { + if (n == null) return true; + else return n.exhausted(i); + } + + static int to_uint(byte b) { + return ((int) b) & 0xFF; + } + + public abstract Node n_clone(); + + public abstract Leaf minimum(); + + public abstract boolean insert(ChildPtr ref, final byte[] key, Object value, int depth, + boolean force_clone) throws UnsupportedOperationException; + + public abstract boolean delete(ChildPtr ref, final byte[] key, int depth, + boolean force_clone); + + public abstract int decrement_refcount(); + + public abstract boolean exhausted(int i); +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/Tuple2.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/Tuple2.java new file mode 100644 index 0000000..f5ef68c --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/Tuple2.java @@ -0,0 +1,33 @@ +package org.xbib.datastructures.trie.radix.adaptive.persistent; + +public class Tuple2 { + + private A a; + + private B b; + + public Tuple2() { + } + + public Tuple2(A a, B b) { + this.a = a; + this.b = b; + } + + public A getA() { + return a; + } + + public void setA(A a) { + this.a = a; + } + + public B getB() { + return b; + } + + public void setB(B b) { + this.b = b; + } + +} \ No newline at end of file diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/package-info.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/package-info.java new file mode 100644 index 0000000..2fc4f50 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/adaptive/persistent/package-info.java @@ -0,0 +1,4 @@ +/** + * Taken from Ankur Dave, https://github.com/ankurdave/part + */ +package org.xbib.datastructures.trie.radix.adaptive.persistent; \ No newline at end of file diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/package-info.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/package-info.java new file mode 100644 index 0000000..7a700b1 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/radix/package-info.java @@ -0,0 +1,5 @@ +/** + * Taken from https://code.google.com/archive/p/radixtree/ + * + */ +package org.xbib.datastructures.trie.radix; \ No newline at end of file diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/regex/RegexTrie.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/regex/RegexTrie.java new file mode 100644 index 0000000..4263bb1 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/regex/RegexTrie.java @@ -0,0 +1,255 @@ +package org.xbib.datastructures.trie.regex; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +/** + * The RegexTrie is a trie where each _stored_ segment of the key is a regex {@link Pattern}. Thus, + * the full _stored_ key is a List rather than a String as in a standard trie. Note that + * the retrieve method requires a List, which will be matched against the + * {@link Pattern}s, rather than checked for equality as in a standard trie. It will likely perform + * poorly for large datasets. + *

+ * One can also use a {@code null} entry in the {@code Pattern} sequence to serve as a wildcard. If + * a {@code null} is encountered, all subsequent entries in the sequence will be ignored. + * When the retrieval code encounters a {@code null} {@code Pattern}, it will first wait to see if a + * more-specific entry matches the sequence. If one does, that more-specific entry will proceed, + * even if it subsequently fails to match. + *

+ * If no more-specific entry matches, the wildcard match will add all remaining {@code String}s + * to the list of captures (if enabled) and return the value associated with the wildcard. + *

+ * A short sample of the wildcard functionality: + *

+ * List<List<String>> captures = new LinkedList<List<String>>();
+ * RegexTrie trie = new RegexTrie();
+ * trie.put(2, "a", null);
+ * trie.put(4, "a", "b");
+ * trie.retrieve(captures, "a", "c", "e");
+ * // returns 2.  captures is now [[], ["c"], ["e"]]
+ * trie.retrieve(captures, "a", "b");
+ * // returns 4.  captures is now [[], []]
+ * trie.retrieve(captures, "a", "b", "c");
+ * // returns null.  captures is now [[], []]
+ * 
+ */ +public class RegexTrie { + + private V value; + + private final Map> children; + + public RegexTrie() { + children = new LinkedHashMap<>(); + } + + public void clear() { + value = null; + for (RegexTrie child : children.values()) { + child.clear(); + } + children.clear(); + } + + public boolean containsKey(List strings) { + return resolve(strings) != null; + } + + public void add(String pattern, V value) { + put(value, Arrays.stream(pattern.split("/")).distinct().collect(Collectors.toList())); + } + + /** + * Add an entry to the trie. + * + * @param value The value to set + * @param patterns The sequence of {@link Pattern}s that must be sequentially matched to + * retrieve the associated {@code value} + */ + public void put(V value, List patterns) { + List list = new ArrayList<>(patterns.size()); + for (Object object : patterns) { + CompPattern compPattern = null; + if (object instanceof Pattern) { + compPattern = new CompPattern((Pattern) object); + } else if (object instanceof String) { + compPattern = new CompPattern(Pattern.compile((String) object)); + } + list.add(compPattern); + } + validateAndPut(value, list); + } + + /** + * Resolve a value from the trie, by matching the provided sequence of {@link String}s to a + * sequence of {@link Pattern}s stored in the trie. + * + * @param strings A sequence of {@link String}s to match + * @return The associated value, or {@code null} if no value was found + */ + public V resolve(List strings) { + return resolve(null, strings); + } + + /** + * Resolve a value from the trie, by matching the provided sequence of {@link String}s to a + * sequence of {@link Pattern}s stored in the trie. This version of the method also returns + * a {@link List} of capture groups for each {@link Pattern} that was matched. + *

+ * Each entry in the outer List corresponds to one level of {@code Pattern} in the trie. + * For each level, the list of capture groups will be stored. If there were no captures + * for a particular level, an empty list will be stored. + *

+ * Note that {@code captures} will be {@link List#clear()}ed before the retrieval begins. + * Also, if the retrieval fails after a partial sequence of matches, {@code captures} will + * still reflect the capture groups from the partial match. + * + * @param captures A {@code List>} through which capture groups will be returned. + * @param strings A sequence of {@link String}s to match + * @return The associated value, or {@code null} if no value was found + */ + public V resolve(List> captures, List strings) { + if (strings.size() == 0) { + throw new IllegalArgumentException("string list must be non-empty"); + } + if (captures != null) { + captures.clear(); + } + return recursiveRetrieve(captures, strings); + } + + /** + * A helper method to consolidate validation before adding an entry to the trie. + * + * @param value The value to set + * @param list The sequence of {@link CompPattern}s that must be sequentially matched to + * retrieve the associated {@code value} + */ + private V validateAndPut(V value, List list) { + if (list.size() == 0) { + throw new IllegalArgumentException("pattern list must be non-empty"); + } + return recursivePut(value, list); + } + + private V recursivePut(V value, List patterns) { + // Cases: + // 1) patterns is empty -- set our value + // 2) patterns is non-empty -- recurse downward, creating a child if necessary + if (patterns.isEmpty()) { + V oldValue = this.value; + this.value = value; + return oldValue; + } else { + CompPattern curKey = patterns.get(0); + List nextKeys = patterns.subList(1, patterns.size()); + // Create a new child to handle + RegexTrie nextChild = children.get(curKey); + if (nextChild == null) { + nextChild = new RegexTrie(); + children.put(curKey, nextChild); + } + return nextChild.recursivePut(value, nextKeys); + } + } + + private V recursiveRetrieve(List> captures, List strings) { + // Cases: + // 1) strings is empty -- return our value + // 2) strings is non-empty -- find the first child that matches, recurse downward + if (strings.isEmpty()) { + return value; + } else { + boolean wildcardMatch = false; + V wildcardValue = null; + String curKey = strings.get(0); + List nextKeys = strings.subList(1, strings.size()); + for (Map.Entry> child : children.entrySet()) { + CompPattern pattern = child.getKey(); + if (pattern == null) { + wildcardMatch = true; + wildcardValue = child.getValue().value; + continue; + } + Matcher matcher = pattern.matcher(curKey); + if (matcher.matches()) { + if (captures != null) { + List curCaptures = new ArrayList<>(matcher.groupCount()); + for (int i = 0; i < matcher.groupCount(); i++) { + // i+1 since group 0 is the entire matched string + curCaptures.add(matcher.group(i + 1)); + } + captures.add(curCaptures); + } + return child.getValue().recursiveRetrieve(captures, nextKeys); + } + } + if (wildcardMatch) { + // stick the rest of the query string into the captures list and return + if (captures != null) { + for (String str : strings) { + captures.add(List.of(str)); + } + } + return wildcardValue; + } + // no match + return null; + } + } + + @Override + public String toString() { + return String.format("{V: %s, C: %s}", value, children); + } + + /** + * Patterns aren't comparable by default, which prevents you from retrieving them from a Map. + * This is a simple stub class that makes a Pattern with a working + * {@link CompPattern#equals(Object)} method. + */ + private static class CompPattern { + + protected final Pattern pattern; + + CompPattern(Pattern pattern) { + Objects.requireNonNull(pattern); + this.pattern = pattern; + } + + @Override + public boolean equals(Object other) { + Pattern otherPat; + if (other instanceof Pattern) { + otherPat = (Pattern) other; + } else if (other instanceof CompPattern) { + CompPattern otherCPat = (CompPattern) other; + otherPat = otherCPat.pattern; + } else { + return false; + } + return pattern.toString().equals(otherPat.toString()); + } + + @Override + public int hashCode() { + return pattern.toString().hashCode(); + } + + @Override + public String toString() { + return String.format("P(%s)", pattern); + } + + public Matcher matcher(String string) { + return pattern.matcher(string); + } + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/Node.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/Node.java new file mode 100644 index 0000000..7a4121c --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/Node.java @@ -0,0 +1,20 @@ +package org.xbib.datastructures.trie.segment; + +import java.util.Map; + +public interface Node { + + void setKey(TrieKeySegment key); + + TrieKeySegment getKey(); + + void setValue(V value); + + V getValue(); + + void setTerminal(boolean terminal); + + boolean isTerminal(); + + Map, Node> getChildren(); +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/NodeImpl.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/NodeImpl.java new file mode 100644 index 0000000..6a8fa45 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/NodeImpl.java @@ -0,0 +1,54 @@ +package org.xbib.datastructures.trie.segment; + +import java.util.HashMap; +import java.util.Map; + +public class NodeImpl implements Node { + + private TrieKeySegment key; + + private V value; + + private boolean terminal; + + private final Map, Node> children; + + public NodeImpl() { + this.children = new HashMap<>(); + } + + @Override + public void setKey(TrieKeySegment key) { + this.key = key; + } + + @Override + public TrieKeySegment getKey() { + return key; + } + + @Override + public void setValue(V value) { + this.value = value; + } + + @Override + public V getValue() { + return value; + } + + @Override + public void setTerminal(boolean terminal) { + this.terminal = terminal; + } + + @Override + public boolean isTerminal() { + return terminal; + } + + @Override + public Map, Node> getChildren() { + return children; + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/StringSegment.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/StringSegment.java new file mode 100644 index 0000000..4778a7a --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/StringSegment.java @@ -0,0 +1,24 @@ +package org.xbib.datastructures.trie.segment; + +public class StringSegment implements TrieKeySegment { + + private final String segment; + + public StringSegment(String segment) { + this.segment = segment; + } + + public static StringSegment of(String segment) { + return new StringSegment(segment); + } + + @Override + public int compareTo(String o) { + return segment.compareTo(o); + } + + @Override + public String toString() { + return segment; + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/Trie.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/Trie.java new file mode 100644 index 0000000..76c2631 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/Trie.java @@ -0,0 +1,19 @@ +package org.xbib.datastructures.trie.segment; + +import java.util.List; +import java.util.Set; + +public interface Trie, V> { + + void add(K key, V value); + + V search(K key); + + List startsWith(List> prefix); + + boolean contains(K key); + + Set getAllKeys(); + + int size(); +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/TrieImpl.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/TrieImpl.java new file mode 100644 index 0000000..b1fff64 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/TrieImpl.java @@ -0,0 +1,131 @@ +package org.xbib.datastructures.trie.segment; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class TrieImpl implements Trie, V> { + + private final Node node; + + public TrieImpl() { + this.node = new NodeImpl<>(); + } + + @Override + public void add(TrieKey key, V value) { + addNode(node, key, 0, value); + } + + @Override + public V search(TrieKey key) { + return findKey(node, key); + } + + @Override + public List startsWith(List> prefix) { + List list = new ArrayList<>(); + Node node = this.node; + for (TrieKeySegment e : prefix) { + node = node.getChildren().get(e); + if (node == null) { + break; + } + } + if (node != null) { + getValues(node, list); + } + return list; + } + + @Override + public boolean contains(TrieKey key) { + return hasKey(node, key); + } + + @Override + public Set> getAllKeys() { + Set> keySet = new HashSet<>(); + getKeys(node, new TrieKeyImpl<>(), keySet); + return keySet; + } + + @Override + public int size() { + return getAllKeys().size(); + } + + private void getValues(Node currNode, List valueList) { + if (currNode.isTerminal()) { + valueList.add(currNode.getValue()); + } + Map, Node> children = currNode.getChildren(); + for (Map.Entry, Node> entry : children.entrySet()) { + getValues(entry.getValue(), valueList); + } + } + + private void getKeys(Node currNode, TrieKey key, Set> keySet) { + if (currNode.isTerminal()) { + keySet.add(key); + } + Map, Node> children = currNode.getChildren(); + for (Map.Entry, Node> entry : children.entrySet()) { + TrieKey k = key.append(entry.getValue().getKey()); + getKeys(entry.getValue(), k, keySet); + } + } + + private V findKey(Node currNode, TrieKey key) { + TrieKeySegment e = key.size() > 0 ? key.get(0) : null; + if (currNode.getChildren().containsKey(e)) { + Node nextNode = currNode.getChildren().get(e); + if (key.size() <= 1) { + if (nextNode.isTerminal()) { + return nextNode.getValue(); + } + } else { + return findKey(nextNode, key.subKey(1)); + } + } + return null; + } + + private boolean hasKey(Node currNode, TrieKey key) { + TrieKeySegment e = key.size() > 0 ? key.get(0) : null; + if (currNode.getChildren().containsKey(e)) { + Node nextNode = currNode.getChildren().get(e); + if (key.size() <= 1) { + return nextNode.isTerminal(); + } else { + return hasKey(nextNode, key.subKey(1)); + } + } + return false; + } + + private void addNode(Node currNode, TrieKey key, int pos, V value) { + TrieKeySegment e = pos < key.size() ? key.get(pos) : null; + Node nextNode = currNode.getChildren().get(e); + if (nextNode == null) { + nextNode = new NodeImpl<>(); + nextNode.setKey(e); + if (pos < key.size() - 1) { + addNode(nextNode, key, pos + 1, value); + } else { + nextNode.setValue(value); + nextNode.setTerminal(true); + } + currNode.getChildren().put(e, nextNode); + } else { + if (pos < key.size() - 1) { + addNode(nextNode, key, pos + 1, value); + } else { + nextNode.setValue(value); + nextNode.setTerminal(true); + } + } + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/TrieKey.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/TrieKey.java new file mode 100644 index 0000000..254e307 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/TrieKey.java @@ -0,0 +1,19 @@ +package org.xbib.datastructures.trie.segment; + +import java.util.Arrays; +import java.util.List; + +public interface TrieKey { + + int size(); + + TrieKey subKey(int i); + + TrieKey append(TrieKeySegment trieKeySegment); + + void set(int i, TrieKeySegment trieKeySegment); + + TrieKeySegment get(int i); + + List> getSegments(); +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/TrieKeyImpl.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/TrieKeyImpl.java new file mode 100644 index 0000000..4643b3e --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/TrieKeyImpl.java @@ -0,0 +1,88 @@ +package org.xbib.datastructures.trie.segment; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class TrieKeyImpl implements TrieKey, Comparable> { + + private final List> segments; + + public TrieKeyImpl() { + this(new ArrayList<>()); + } + + public TrieKeyImpl(List> segments) { + this.segments = segments; + } + + public TrieKeyImpl add(TrieKeySegment segment) { + segments.add(segment); + return this; + } + + @Override + public List> getSegments() { + return segments; + } + + @Override + public int size() { + return segments.size(); + } + + @Override + public TrieKey subKey(int i) { + return new TrieKeyImpl<>(segments.subList(1, segments.size())); + } + + @Override + public TrieKey append(TrieKeySegment trieKeySegment) { + segments.add(trieKeySegment); + return this; + } + + @Override + public void set(int i, TrieKeySegment trieKeySegment) { + segments.set(i, trieKeySegment); + } + + @Override + public TrieKeySegment get(int i) { + return segments.get(i); + } + + @SuppressWarnings("unchecked") + @Override + public int compareTo(TrieKeyImpl o) { + for (int i = 0; i < segments.size(); i++) { + TrieKeySegment segment1 = segments.get(i); + T segment2 = i < o.segments.size() ? (T) o.segments.get(i) : null; + if (segment2 == null) { + return 1; + } + int c = segment1.compareTo(segment2); + if (c != 0) { + return c; + } + } + return 0; + } + + public static TrieKey stringKey(String... segments ) { + TrieKey trieKey = new TrieKeyImpl<>(); + Arrays.stream(segments).forEach(s -> { + trieKey.append(new StringSegment(s)); + }); + return trieKey; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + for (TrieKeySegment segment : segments) { + sb.append(segment.toString()); + } + return sb.toString(); + } +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/TrieKeySegment.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/TrieKeySegment.java new file mode 100644 index 0000000..307ff14 --- /dev/null +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/segment/TrieKeySegment.java @@ -0,0 +1,4 @@ +package org.xbib.datastructures.trie.segment; + +public interface TrieKeySegment extends Comparable { +} diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/Node.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/simple/Node.java similarity index 94% rename from datastructures-trie/src/main/java/org/xbib/datastructures/trie/Node.java rename to datastructures-trie/src/main/java/org/xbib/datastructures/trie/simple/Node.java index e4625f6..8080342 100644 --- a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/Node.java +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/simple/Node.java @@ -1,4 +1,4 @@ -package org.xbib.datastructures.trie; +package org.xbib.datastructures.trie.simple; import java.util.HashMap; import java.util.Map; diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/Trie.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/simple/Trie.java similarity index 88% rename from datastructures-trie/src/main/java/org/xbib/datastructures/trie/Trie.java rename to datastructures-trie/src/main/java/org/xbib/datastructures/trie/simple/Trie.java index b8b04aa..5b0380b 100644 --- a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/Trie.java +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/simple/Trie.java @@ -1,4 +1,4 @@ -package org.xbib.datastructures.trie; +package org.xbib.datastructures.trie.simple; import java.util.List; import java.util.Set; diff --git a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/TrieImpl.java b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/simple/TrieImpl.java similarity index 98% rename from datastructures-trie/src/main/java/org/xbib/datastructures/trie/TrieImpl.java rename to datastructures-trie/src/main/java/org/xbib/datastructures/trie/simple/TrieImpl.java index 8d5c9d1..6e9e6f1 100644 --- a/datastructures-trie/src/main/java/org/xbib/datastructures/trie/TrieImpl.java +++ b/datastructures-trie/src/main/java/org/xbib/datastructures/trie/simple/TrieImpl.java @@ -1,4 +1,4 @@ -package org.xbib.datastructures.trie; +package org.xbib.datastructures.trie.simple; import java.util.ArrayList; import java.util.HashSet; diff --git a/datastructures-trie/src/test/java/org/xbib/datastructures/trie/WordTreeTest.java b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/WordTreeTest.java new file mode 100644 index 0000000..5a461d0 --- /dev/null +++ b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/WordTreeTest.java @@ -0,0 +1,153 @@ +package org.xbib.datastructures.trie; + +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; +import java.util.Objects; + +public class WordTreeTest { + + static class Node { + + private final char ch; + + private boolean leaf; + + private LinkedList children; + + public Node(char ch) { + this.ch = ch; + } + + public void addChild(Node node) { + if (children == null) { + children = new LinkedList<>(); + } + children.add(node); + } + + public Node getNode(char ch) { + if (children == null) { + return null; + } + for (Node child : children) { + if (child.getChar() == ch) { + return child; + } + } + return null; + } + + public char getChar() { + return ch; + } + + public List getChildren() { + return Objects.requireNonNullElse(this.children, Collections.emptyList()); + } + + public boolean isLeaf() { + return leaf; + } + + public void setLeaf(boolean leaf) { + this.leaf = leaf; + } + } + + Node root = new Node(' '); + + public WordTreeTest() { + } + + public List getWordsForPrefix(String prefix) { + if (prefix.length() == 0) { + return Collections.emptyList(); + } + Node node = getNodeForPrefix(root, prefix); + if (node == null) { + return Collections.emptyList(); + } + List> chars = collectChars(node); + List words = new ArrayList<>(chars.size()); + for (List charList : chars) { + words.add(combine(prefix.substring(0, prefix.length() - 1), charList)); + } + return words; + } + + + private String combine(String prefix, List charList) { + StringBuilder sb = new StringBuilder(prefix); + for (Character character : charList) { + sb.append(character); + } + return sb.toString(); + } + + private Node getNodeForPrefix(Node node, String prefix) { + if (prefix.length() == 0) { + return node; + } + Node next = node.getNode(prefix.charAt(0)); + if (next == null) { + return null; + } + return getNodeForPrefix(next, prefix.substring(1)); + } + + private List> collectChars(Node node) { + List> chars = new ArrayList<>(); + if (node.getChildren().size() == 0) { + chars.add(new LinkedList<>(Collections.singletonList(node.getChar()))); + } else { + if (node.isLeaf()) { + chars.add(new LinkedList<>(Collections.singletonList(node.getChar()))); + } + List children = node.getChildren(); + for (Node child : children) { + List> childList = collectChars(child); + for (LinkedList characters : childList) { + characters.push(node.getChar()); + chars.add(characters); + } + } + } + return chars; + } + + + public void addWord(String word) { + addWord(root, word); + } + + private void addWord(Node parent, String word) { + if (word.trim().length() == 0) { + return; + } + Node child = parent.getNode(word.charAt(0)); + if (child == null) { + child = new Node(word.charAt(0)); + parent.addChild(child); + } + if (word.length() == 1) { + child.setLeaf(true); + } else { + addWord(child, word.substring(1)); + } + } + + @Test + public void testWordTree() { + WordTreeTest tree = new WordTreeTest(); + tree.addWord("world"); + tree.addWord("work"); + tree.addWord("wolf"); + tree.addWord("life"); + tree.addWord("love"); + System.out.println(tree.getWordsForPrefix("wo")); + } +} diff --git a/datastructures-trie/src/test/java/org/xbib/datastructures/trie/ahocorasick/IntervalTest.java b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/ahocorasick/IntervalTest.java new file mode 100644 index 0000000..b0d4a7a --- /dev/null +++ b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/ahocorasick/IntervalTest.java @@ -0,0 +1,59 @@ +package org.xbib.datastructures.trie.ahocorasick; + +import org.junit.jupiter.api.Test; + +import java.util.Iterator; +import java.util.Set; +import java.util.TreeSet; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class IntervalTest { + + @Test + public void test_construct() { + final Interval i = new Interval(1, 3); + assertEquals(1, i.getStart()); + assertEquals(3, i.getEnd()); + } + + @Test + public void test_size() { + Interval interval = new Interval(0, 2); + assertEquals(3, interval.getEnd() - interval.getStart() + 1); + } + + @Test + public void test_intervaloverlaps() { + assertTrue(new Interval(1, 3).overlapsWith(new Interval(2, 4))); + } + + @Test + public void test_intervalDoesNotOverlap() { + assertFalse(new Interval(1, 13).overlapsWith(new Interval(27, 42))); + } + + @Test + public void test_pointOverlaps() { + assertTrue(new Interval(1, 3).overlapsWith(2)); + } + + @Test + public void test_pointDoesNotOverlap() { + assertFalse(new Interval(1, 13).overlapsWith(42)); + } + + @Test + public void test_comparable() { + final Set intervals = new TreeSet<>(); + intervals.add(new Interval(4, 6)); + intervals.add(new Interval(2, 7)); + intervals.add(new Interval(3, 4)); + final Iterator it = intervals.iterator(); + assertEquals(2, it.next().getStart()); + assertEquals(3, it.next().getStart()); + assertEquals(4, it.next().getStart()); + } +} diff --git a/datastructures-trie/src/test/java/org/xbib/datastructures/trie/ahocorasick/IntervalTreeTest.java b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/ahocorasick/IntervalTreeTest.java new file mode 100644 index 0000000..ea20c83 --- /dev/null +++ b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/ahocorasick/IntervalTreeTest.java @@ -0,0 +1,49 @@ +package org.xbib.datastructures.trie.ahocorasick; + +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class IntervalTreeTest { + + @Test + public void findOverlaps() { + List intervals = new ArrayList<>(); + intervals.add(new Interval(0, 2)); + intervals.add(new Interval(1, 3)); + intervals.add(new Interval(2, 4)); + intervals.add(new Interval(3, 5)); + intervals.add(new Interval(4, 6)); + intervals.add(new Interval(5, 7)); + IntervalTree intervalTree = new IntervalTree(intervals); + List overlaps = intervalTree.findOverlaps(new Interval(1, 3)); + assertEquals(3, overlaps.size()); + Iterator overlapsIt = overlaps.iterator(); + assertOverlap(overlapsIt.next(), 2, 4); + assertOverlap(overlapsIt.next(), 3, 5); + assertOverlap(overlapsIt.next(), 0, 2); + } + + @Test + public void removeOverlaps() { + List intervals = new ArrayList<>(); + intervals.add(new Interval(0, 2)); + intervals.add(new Interval(4, 5)); + intervals.add(new Interval(2, 10)); + intervals.add(new Interval(6, 13)); + intervals.add(new Interval(9, 15)); + intervals.add(new Interval(12, 16)); + IntervalTree intervalTree = new IntervalTree(intervals); + intervals = intervalTree.removeOverlaps(intervals); + assertEquals(2, intervals.size()); + } + + protected void assertOverlap(Interval interval, int expectedStart, int expectedEnd) { + assertEquals(expectedStart, interval.getStart()); + assertEquals(expectedEnd, interval.getEnd()); + } +} diff --git a/datastructures-trie/src/test/java/org/xbib/datastructures/trie/ahocorasick/TrieTest.java b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/ahocorasick/TrieTest.java new file mode 100644 index 0000000..1dd254a --- /dev/null +++ b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/ahocorasick/TrieTest.java @@ -0,0 +1,567 @@ +package org.xbib.datastructures.trie.ahocorasick; + +import org.junit.jupiter.api.Test; + +import java.security.SecureRandom; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class TrieTest { + + private static final SecureRandom random = new SecureRandom(); + + private final static String[] ALPHABET = new String[] { "abc", "bcd", "cde" }; + private final static String[] ALPHABET_PAYLOAD = new String[] { "alpha:abc", "alpha:bcd", "alpha:cde" }; + + private final static List> ALPHABET_WITH_ENTRIES = Arrays.asList( + new Entry<>( ALPHABET[ 0 ], ALPHABET_PAYLOAD[ 0 ] ), + new Entry<>( ALPHABET[ 1 ], ALPHABET_PAYLOAD[ 1 ] ), + new Entry<>( ALPHABET[ 2 ], ALPHABET_PAYLOAD[ 2 ] )); + + private final static String[] PRONOUNS = new String[] { "hers", "his", "she", "he" }; + private final static int[] PRONOUNS_PAYLOAD_ID = new int[] { 9, 12, 4, 20 }; + + private final static List> PRONOUNS_WITH_ENTRIES = Arrays.asList( + new Entry<>( PRONOUNS[ 0 ], PRONOUNS_PAYLOAD_ID[ 0 ] ), + new Entry<>( PRONOUNS[ 1 ], PRONOUNS_PAYLOAD_ID[ 1 ] ), + new Entry<>( PRONOUNS[ 2 ], PRONOUNS_PAYLOAD_ID[ 2 ] ), + new Entry<>( PRONOUNS[ 3 ], PRONOUNS_PAYLOAD_ID[ 3 ] ) + ); + + private final static String[] FOOD = new String[] { "veal", "cauliflower", "broccoli", "tomatoes" }; + private final static Food[] FOOD_PAYLOAD = new Food[] { new Food("veal"), new Food("cauliflower"), new Food("broccoli"), + new Food("tomatoes") }; + + private final static List> FOOD_WITH_ENTRIES = Arrays.asList( + new Entry<>( FOOD[ 0 ], FOOD_PAYLOAD[ 0 ] ), + new Entry<>( FOOD[ 1 ], FOOD_PAYLOAD[ 1 ] ), + new Entry<>( FOOD[ 2 ], FOOD_PAYLOAD[ 2 ] ), + new Entry<>( FOOD[ 3 ], FOOD_PAYLOAD[ 3 ] ) + ); + + private final static String[] GREEK_LETTERS = new String[] { "Alpha", "Beta", "Gamma" }; + private final static String[] GREEK_LETTERS_PAYLOAD = new String[] { "greek:Alpha", "greek:Beta", "greek:Gamma" }; + + private final static List> GREEK_LETTERS_WITH_ENTRIES = Arrays.asList( + new Entry<>( GREEK_LETTERS[ 0 ], GREEK_LETTERS_PAYLOAD[ 0 ] ), + new Entry<>( GREEK_LETTERS[ 1 ], GREEK_LETTERS_PAYLOAD[ 1 ] ), + new Entry<>( GREEK_LETTERS[ 2 ], GREEK_LETTERS_PAYLOAD[ 2 ] )); + + private final static String[] UNICODE = new String[] { "turning", "once", "again", "börkü" }; + private final static String[] UNICODE_PAYLOAD = new String[] { "uni:turning", "uni:once", "uni:again", "uni:börkü" }; + + private final static List> UNICODE_WITH_ENTRIES = Arrays.asList( + new Entry<>( UNICODE[ 0 ], UNICODE_PAYLOAD[ 0 ] ), + new Entry<>( UNICODE[ 1 ], UNICODE_PAYLOAD[ 1 ] ), + new Entry<>( UNICODE[ 2 ], UNICODE_PAYLOAD[ 2 ] ), + new Entry<>( UNICODE[ 3 ], UNICODE_PAYLOAD[ 3 ] )); + + public static class Food { + private final String name; + + public Food(String name) { + this.name = name; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((name == null) ? 0 : name.hashCode()); + return result; + } + + @Override + public boolean equals( Object obj ) { + if( this == obj ) { + return true; + } + if( obj == null ) { + return false; + } + if( getClass() != obj.getClass() ) { + return false; + } + Food other = (Food) obj; + if( name == null ) { + return other.name == null; + } + else { + return name.equals( other.name ); + } + } + } + + @Test + public void keyAndTextAreTheSame() { + Trie trie = Trie.builder() + .add(ALPHABET[0], ALPHABET_PAYLOAD[0]) + .build(); + Collection> outputs = trie.parse(ALPHABET[0]); + Iterator> iterator = outputs.iterator(); + checkOutput(iterator.next(), 0, 2, ALPHABET[0], ALPHABET_PAYLOAD[0]); + } + + @Test + public void keyAndTextAreTheSameFirstMatch() { + Trie trie = Trie.builder() + .add(ALPHABET[0], ALPHABET_PAYLOAD[0]) + .build(); + EntryOutput firstMatch = trie.firstMatch(ALPHABET[0]); + checkOutput(firstMatch, 0, 2, ALPHABET[0], ALPHABET_PAYLOAD[0]); + } + + @Test + public void textIsLongerThanKey() { + Trie trie = Trie.builder() + .add(ALPHABET[0], ALPHABET_PAYLOAD[0]) + .build(); + Collection> emits = trie.parse(" " + ALPHABET[0]); + Iterator> iterator = emits.iterator(); + checkOutput(iterator.next(), 1, 3, ALPHABET[0], ALPHABET_PAYLOAD[0]); + } + + @Test + public void textIsLongerThanKeyFirstMatch() { + Trie trie = Trie.builder() + .add(ALPHABET[0], ALPHABET_PAYLOAD[0]) + .build(); + EntryOutput firstMatch = trie.firstMatch(" " + ALPHABET[0]); + checkOutput(firstMatch, 1, 3, ALPHABET[0], ALPHABET_PAYLOAD[0]); + } + + @Test + public void variousKeysOneMatch() { + Trie trie = Trie.builder() + .add(ALPHABET_WITH_ENTRIES) + .build(); + Collection> outputs = trie.parse("bcd"); + Iterator> iterator = outputs.iterator(); + checkOutput(iterator.next(), 0, 2, "bcd", "alpha:bcd"); + } + + @Test + public void variousKeysFirstMatch() { + Trie trie = Trie.builder().add(ALPHABET_WITH_ENTRIES).build(); + EntryOutput firstMatch = trie.firstMatch("bcd"); + checkOutput(firstMatch, 0, 2, "bcd", "alpha:bcd"); + } + + @Test + public void ushersTestAndStopOnHit() { + Trie trie = Trie.builder().add(PRONOUNS_WITH_ENTRIES).stopOnHit().build(); + Collection> emits = trie.parse("ushers"); + assertEquals(1, emits.size()); // she @ 3, he @ 3, hers @ 5 + Iterator> iterator = emits.iterator(); + checkOutput(iterator.next(), 2, 3, "he", 20); + } + + @Test + public void ushersTestStopOnHitSkipOne() { + Trie trie = Trie.builder().add(PRONOUNS_WITH_ENTRIES).stopOnHit().build(); + + CollectingOutputHandler testEmitHandler = new AbstractCollectingOutputHandler<>() { + boolean first = true; + + @Override + public boolean output(final EntryOutput emit) { + if (first) { + // return false for the first element + first = false; + return false; + } + add(emit); + return true; + } + + }; + + trie.parse("ushers", testEmitHandler); + Collection> emits = testEmitHandler.getOutputs(); + assertEquals(1, emits.size()); // she @ 3, he @ 3, hers @ 5 + Iterator> iterator = emits.iterator(); + checkOutput(iterator.next(), 1, 3, "she", 4); + } + + @Test + public void ushersTest() { + Trie trie = Trie.builder().add(PRONOUNS_WITH_ENTRIES).build(); + Collection> emits = trie.parse("ushers"); + assertEquals(3, emits.size()); // she @ 3, he @ 3, hers @ 5 + Iterator> iterator = emits.iterator(); + + checkOutput(iterator.next(), 2, 3, "he", 20); + checkOutput(iterator.next(), 1, 3, "she", 4); + checkOutput(iterator.next(), 2, 5, "hers", 9); + } + + @Test + public void ushersTestWithCapitalKeywords() { + Trie trie = Trie.builder().ignoreCase().add("HERS", "hers").add("HIS", "his") + .add("SHE", "she").add("HE", "he").build(); + Collection> emits = trie.parse("ushers"); + assertEquals(3, emits.size()); // she @ 3, he @ 3, hers @ 5 + Iterator> iterator = emits.iterator(); + checkOutput(iterator.next(), 2, 3, "HE", "he"); + checkOutput(iterator.next(), 1, 3, "SHE", "she"); + checkOutput(iterator.next(), 2, 5, "HERS", "hers"); + } + + @Test + public void ushersTestFirstMatch() { + Trie trie = Trie.builder().add(PRONOUNS_WITH_ENTRIES).build(); + EntryOutput firstMatch = trie.firstMatch("ushers"); + checkOutput(firstMatch, 2, 3, "he", 20); + } + + @Test + public void ushersTestByCallback() { + Trie trie = Trie.builder().add(PRONOUNS_WITH_ENTRIES).build(); + + final List> emits = new LinkedList<>(); + OutputHandler emitHandler = emit -> { + emits.add(emit); + return true; + }; + trie.parse("ushers", emitHandler); + assertEquals(3, emits.size()); // she @ 3, he @ 3, hers @ 5 + Iterator> iterator = emits.iterator(); + + checkOutput(iterator.next(), 2, 3, "he", 20); + checkOutput(iterator.next(), 1, 3, "she", 4); + checkOutput(iterator.next(), 2, 5, "hers", 9); + } + + @Test + public void misleadingTest() { + Trie trie = Trie.builder().add("hers", "pronon:hers").build(); + Collection> emits = trie.parse("h he her hers"); + Iterator> iterator = emits.iterator(); + checkOutput(iterator.next(), 9, 12, "hers", "pronon:hers"); + } + + @Test + public void misleadingTestFirstMatch() { + Trie trie = Trie.builder().add("hers", "pronon:hers").build(); + EntryOutput firstMatch = trie.firstMatch("h he her hers"); + checkOutput(firstMatch, 9, 12, "hers", "pronon:hers"); + } + + @Test + public void recipes() { + Trie trie = Trie.builder().add(FOOD_WITH_ENTRIES).build(); + Collection> emits = trie.parse("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli"); + Iterator> iterator = emits.iterator(); + checkOutput(iterator.next(), 2, 12, "cauliflower", new Food("cauliflower")); + checkOutput(iterator.next(), 18, 25, "tomatoes", new Food("tomatoes")); + checkOutput(iterator.next(), 40, 43, "veal", new Food("veal")); + checkOutput(iterator.next(), 51, 58, "broccoli", new Food("broccoli")); + } + + @Test + public void recipesFirstMatch() { + Trie trie = Trie.builder().add(FOOD_WITH_ENTRIES).build(); + EntryOutput firstMatch = trie.firstMatch("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli"); + checkOutput(firstMatch, 2, 12, "cauliflower", new Food("cauliflower")); + } + + @Test + public void longAndShortOverlappingMatch() { + Trie trie = Trie.builder().add("he", "pronon:he").add("hehehehe", "garbage") + .build(); + Collection> emits = trie.parse("hehehehehe"); + Iterator> iterator = emits.iterator(); + checkOutput(iterator.next(), 0, 1, "he", "pronon:he"); + checkOutput(iterator.next(), 2, 3, "he", "pronon:he"); + checkOutput(iterator.next(), 4, 5, "he", "pronon:he"); + checkOutput(iterator.next(), 6, 7, "he", "pronon:he"); + checkOutput(iterator.next(), 0, 7, "hehehehe", "garbage"); + checkOutput(iterator.next(), 8, 9, "he", "pronon:he"); + checkOutput(iterator.next(), 2, 9, "hehehehe", "garbage"); + } + + @Test + public void nonOverlapping() { + Trie trie = Trie.builder().ignoreOverlaps().add("ab", "alpha:ab") + .add("cba", "alpha:cba").add("ababc", "alpha:ababc").build(); + Collection> emits = trie.parse("ababcbab"); + assertEquals(2, emits.size()); + Iterator> iterator = emits.iterator(); + // With overlaps: ab@1, ab@3, ababc@4, cba@6, ab@7 + checkOutput(iterator.next(), 0, 4, "ababc", "alpha:ababc"); + checkOutput(iterator.next(), 6, 7, "ab", "alpha:ab"); + } + + @Test + public void nonOverlappingFirstMatch() { + Trie trie = Trie.builder().ignoreOverlaps().add("ab", "alpha:ab") + .add("cba", "alpha:cba").add("ababc", "alpha:ababc").build(); + EntryOutput firstMatch = trie.firstMatch("ababcbab"); + + checkOutput(firstMatch, 0, 4, "ababc", "alpha:ababc"); + } + + @Test + public void containsMatch() { + Trie trie = Trie.builder().ignoreOverlaps().add("ab", "alpha:ab") + .add("cba", "alpha:cba").add("ababc", "alpha:ababc").build(); + assertTrue(trie.match("ababcbab")); + } + + @Test + public void startOfChurchillSpeech() { + Trie trie = Trie.builder().ignoreOverlaps().add("T").add("u").add("ur") + .add("r").add("urn").add("ni").add("i").add("in").add("n") + .add("urning").build(); + Collection> emits = trie.parse("Turning"); + assertEquals(2, emits.size()); + } + + @Test + public void partialMatch() { + Trie trie = Trie.builder().onlyWholeWords().add("sugar", "food:sugar").build(); + Collection> emits = trie.parse("sugarcane sugarcane sugar canesugar"); // left, middle, right test + assertEquals(1, emits.size()); // Match must not be made + checkOutput(emits.iterator().next(), 20, 24, "sugar", "food:sugar"); + } + + @Test + public void partialMatchFirstMatch() { + Trie trie = Trie.builder().onlyWholeWords().add("sugar", "food:sugar").build(); + EntryOutput firstMatch = trie.firstMatch("sugarcane sugarcane sugar canesugar"); // left, middle, right test + + checkOutput(firstMatch, 20, 24, "sugar", "food:sugar"); + } + + @Test + public void tokenizeFullSentence() { + Trie trie = Trie.builder().add(GREEK_LETTERS_WITH_ENTRIES).build(); + Collection> tokens = trie.tokenize("Hear: Alpha team first, Beta from the rear, Gamma in reserve"); + assertEquals(7, tokens.size()); + Iterator> tokensIt = tokens.iterator(); + assertEquals("Hear: ", tokensIt.next().getFragment()); + assertEquals("Alpha", tokensIt.next().getFragment()); + assertEquals(" team first, ", tokensIt.next().getFragment()); + assertEquals("Beta", tokensIt.next().getFragment()); + assertEquals(" from the rear, ", tokensIt.next().getFragment()); + assertEquals("Gamma", tokensIt.next().getFragment()); + assertEquals(" in reserve", tokensIt.next().getFragment()); + } + + // @see https://github.com/robert-bor/aho-corasick/issues/5 + @Test + public void testStringIndexOutOfBoundsException() { + Trie trie = Trie.builder().ignoreCase().onlyWholeWords().add(UNICODE_WITH_ENTRIES) + .build(); + Collection> emits = trie.parse("TurninG OnCe AgAiN BÖRKÜ"); + assertEquals(4, emits.size()); // Match must not be made + Iterator> it = emits.iterator(); + + checkOutput(it.next(), 0, 6, "turning", "uni:turning"); + checkOutput(it.next(), 8, 11, "once", "uni:once"); + checkOutput(it.next(), 13, 17, "again", "uni:again"); + checkOutput(it.next(), 19, 23, "börkü", "uni:börkü"); + } + + @Test + public void testIgnoreCase() { + Trie trie = Trie.builder().ignoreCase().add(UNICODE_WITH_ENTRIES).build(); + Collection> emits = trie.parse("TurninG OnCe AgAiN BÖRKÜ"); + assertEquals(4, emits.size()); // Match must not be made + Iterator> it = emits.iterator(); + + checkOutput(it.next(), 0, 6, "turning", "uni:turning"); + checkOutput(it.next(), 8, 11, "once", "uni:once"); + checkOutput(it.next(), 13, 17, "again", "uni:again"); + checkOutput(it.next(), 19, 23, "börkü", "uni:börkü"); + } + + @Test + public void testIgnoreCaseFirstMatch() { + Trie trie = Trie.builder().ignoreCase().add(UNICODE_WITH_ENTRIES).build(); + EntryOutput firstMatch = trie.firstMatch("TurninG OnCe AgAiN BÖRKÜ"); + + checkOutput(firstMatch, 0, 6, "turning", "uni:turning"); + } + + @Test + public void tokenizeTokensInSequence() { + Trie trie = Trie.builder().add(GREEK_LETTERS_WITH_ENTRIES).build(); + Collection> tokens = trie.tokenize("Alpha Beta Gamma"); + assertEquals(5, tokens.size()); + } + + // @see https://github.com/robert-bor/aho-corasick/issues/7 + @Test + public void testZeroLength() { + Trie trie = Trie.builder().ignoreOverlaps().onlyWholeWords().ignoreCase().add("") + .build(); + trie.tokenize( + "Try a natural lip and subtle bronzer to keep all the focus on those big bright eyes with NARS Eyeshadow Duo in Rated R And the winner is... Boots No7 Advanced Renewal Anti-ageing Glycolic Peel Kit ($25 amazon.com) won most-appealing peel."); + } + + // @see https://github.com/robert-bor/aho-corasick/issues/8 + @Test + public void testUnicode1() { + String target = "LİKE THIS"; // The second character ('İ') is Unicode, which was read by AC as a 2-byte char + assertEquals("THIS", target.substring(5, 9)); // Java does it the right way + Trie trie = Trie.builder().ignoreCase().onlyWholeWords().add("this", "pronon:this") + .build(); + Collection> emits = trie.parse(target); + assertEquals(1, emits.size()); + Iterator> it = emits.iterator(); + checkOutput(it.next(), 5, 8, "this", "pronon:this"); + } + + // @see https://github.com/robert-bor/aho-corasick/issues/8 + @Test + public void testUnicode2() { + String target = "LİKE THIS"; // The second character ('İ') is Unicode, which was read by AC as a 2-byte char + Trie trie = Trie.builder().ignoreCase().onlyWholeWords().add("this", "pronon:this") + .build(); + assertEquals("THIS", target.substring(5, 9)); // Java does it the right way + EntryOutput firstMatch = trie.firstMatch(target); + checkOutput(firstMatch, 5, 8, "this", "pronon:this"); + } + + @Test + public void testPartialMatchWhiteSpaces() { + Trie trie = Trie.builder().onlyWholeWordsWhiteSpaceSeparated() + .add("#sugar-123", "sugar").build(); + Collection> emits = trie.parse("#sugar-123 #sugar-1234"); // left, middle, right test + assertEquals(1, emits.size()); // Match must not be made + checkOutput(emits.iterator().next(), 0, 9, "#sugar-123", "sugar"); + } + + @Test + public void testLargeString() { + final int interval = 100; + final int textSize = 1000000; + final String keyword = FOOD[1]; + final Food payload = FOOD_PAYLOAD[1]; + final StringBuilder text = randomNumbers(textSize); + + injectKeyword(text, keyword, interval); + + Trie trie = Trie.builder().onlyWholeWords().add(keyword, payload).build(); + + final Collection> emits = trie.parse(text); + + assertEquals(textSize / interval, emits.size()); + } + + @Test + public void test_containsMatchWithCaseInsensitive() { + Trie trie = Trie.builder().ignoreCase().add("foo", "bar").build(); + + assertTrue(trie.match("FOOBAR")); + assertFalse(trie.match("FO!?AR")); + } + + // @see https://github.com/robert-bor/aho-corasick/issues/85 + @Test + public void test_wholeWords() { + Trie trie = Trie.builder().add("foo", "bar").onlyWholeWords().build(); + // access via PayloadTrie.parseText(CharSequence) + Collection> result1 = trie.parse("foobar"); + // access via PayloadTrie.parseText(CharSequence, PayloadEmitHandler) + Collection> result2 = new LinkedList<>(); + trie.parse("foobar", result2::add); + + assertTrue(result1.isEmpty()); + assertEquals(result1, result2); + } + + // @see https://github.com/robert-bor/aho-corasick/issues/85 + @Test + public void test_wholeWordsWhiteSpaceSeparated() { + Trie trie = Trie.builder().add("foo", "bar").onlyWholeWordsWhiteSpaceSeparated().build(); + // access via PayloadTrie.parseText(CharSequence) + Collection> result1 = trie.parse("foo#bar"); + // access via PayloadTrie.parseText(CharSequence, PayloadEmitHandler) + Collection> result2 = new LinkedList<>(); + trie.parse("foo#bar", result2::add); + + assertTrue(result1.isEmpty()); + assertEquals(result1, result2); + } + + /** + * Generates a random sequence of ASCII numbers. + * + * @param count The number of numbers to generate. + * @return A character sequence filled with random digits. + */ + private StringBuilder randomNumbers(int count) { + final StringBuilder sb = new StringBuilder(count); + while (--count > 0) { + sb.append(randomInt(10)); + } + return sb; + } + + /** + * Injects keywords into a string builder. + * + * @param source Should contain a bunch of random data that cannot match any + * keyword. + * @param keyword A keyword to inject repeatedly in the text. + * @param interval How often to inject the keyword. + */ + private void injectKeyword(final StringBuilder source, final String keyword, final int interval) { + final int length = source.length(); + for (int i = 0; i < length; i += interval) { + source.replace(i, i + keyword.length(), keyword); + } + } + + private int randomInt(final int bound) { + return random.nextInt(bound); + } + + private void checkOutput(EntryOutput next, int expectedStart, int expectedEnd, String expectedKeyword, + Food expectedPayload) { + assertEquals(expectedStart, next.getStart(), "Start of emit should have been " + expectedStart); + assertEquals(expectedEnd, next.getEnd(), "End of emit should have been " + expectedEnd); + assertEquals(expectedKeyword, next.getKey(), "Keyword of emit shoud be " + expectedKeyword); + assertEquals(expectedPayload, next.getValue(), "Payload of emit shoud be " + expectedPayload); + } + + private void checkOutput(EntryOutput next, int expectedStart, int expectedEnd, String expectedKeyword, + Integer expectedPayload) { + assertEquals(expectedStart, next.getStart(), "Start of emit should have been " + expectedStart); + assertEquals(expectedEnd, next.getEnd(), "End of emit should have been " + expectedEnd); + assertEquals(expectedKeyword, next.getKey(), "Keyword of emit shoud be " + expectedKeyword); + assertEquals(expectedPayload, next.getValue(), "Payload of emit shoud be " + expectedPayload); + } + + private void checkOutput(EntryOutput next, int expectedStart, int expectedEnd, String expectedKeyword, + String expectedPayload) { + assertEquals(expectedStart, next.getStart(), "Start of emit should have been " + expectedStart); + assertEquals(expectedEnd, next.getEnd(), "End of emit should have been " + expectedEnd); + assertEquals(expectedKeyword, next.getKey(), "Keyword of emit shoud be " + expectedKeyword); + assertEquals(expectedPayload, next.getValue(), "Payload of emit shoud be " + expectedPayload); + } + + static abstract class AbstractCollectingOutputHandler implements CollectingOutputHandler { + + private final List> outputs = new ArrayList<>(); + + public void add(final EntryOutput emit) { + outputs.add(emit); + } + + @Override + public List> getOutputs() { + return this.outputs; + } + } +} diff --git a/datastructures-trie/src/test/java/org/xbib/datastructures/trie/patricia/StringKeyAnalyzer.java b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/patricia/StringKeyAnalyzer.java index da0e187..208a29a 100644 --- a/datastructures-trie/src/test/java/org/xbib/datastructures/trie/patricia/StringKeyAnalyzer.java +++ b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/patricia/StringKeyAnalyzer.java @@ -61,10 +61,8 @@ public class StringKeyAnalyzer extends AbstractKeyAnalyzer { if (bitIndex >= lengthInBits(key)) { return false; } - int index = bitIndex / size; int bit = bitIndex % size; - return (key.charAt(index) & mask(bit)) != 0; } @@ -75,15 +73,11 @@ public class StringKeyAnalyzer extends AbstractKeyAnalyzer { @Override public int bitIndex(String key, String otherKey) { - boolean allNull = true; int length = Math.max(key.length(), otherKey.length()); - for (int i = 0; i < length; i++) { - char ch1 = valueAt(key, i); char ch2 = valueAt(otherKey, i); - if (ch1 != ch2) { int xor = ch1 ^ ch2; for (int j = 0; j < size; j++) { @@ -92,17 +86,14 @@ public class StringKeyAnalyzer extends AbstractKeyAnalyzer { } } } - if (ch1 != 0) { allNull = false; } } - // All bits are 0 if (allNull) { return KeyAnalyzer.NULL_BIT_KEY; } - // Both keys are equal return KeyAnalyzer.EQUAL_BIT_KEY; } diff --git a/datastructures-trie/src/test/java/org/xbib/datastructures/trie/radix/adaptive/InnerNodeUnitTest.java b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/radix/adaptive/InnerNodeUnitTest.java new file mode 100644 index 0000000..97d2845 --- /dev/null +++ b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/radix/adaptive/InnerNodeUnitTest.java @@ -0,0 +1,401 @@ +package org.xbib.datastructures.trie.radix.adaptive; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; + +public abstract class InnerNodeUnitTest { + protected static class Pair implements Comparable { + final byte partialKey; + final Node child; + + Pair(byte partialKey, Node child) { + this.partialKey = partialKey; + this.child = child; + } + + @Override + public int compareTo(Pair o) { + return compare(partialKey, o.partialKey); + } + } + + public static int compare(byte a, byte b) { + return toInt(a) - toInt(b); + } + public static int toInt(byte value) { + return value & 0xFF; + } + + protected InnerNode node; + protected Pair[] existingData; + + InnerNodeUnitTest(int nodeSize) { + InnerNode node = new Node4(); + existingData = new Pair[nodeSize + 1]; + for (int j = 0, i = -nodeSize / 2; j < nodeSize + 1; i++, j++) { + if (node.isFull()) { + node = node.grow(); + } + Pair p = new Pair((byte) i, Mockito.spy(Node.class)); + existingData[j] = p; + node.addChild(p.partialKey, p.child); + } + this.node = node; + } + + @BeforeEach + public void setup() { + int i = 0; + for (; i < existingData.length; i++) { + if (existingData[i].partialKey < 0) { + break; + } + } + assertTrue(i < existingData.length, "sample key set should contain at least" + + " one negative integer to test for unsigned lexicographic ordering"); + } + + // lexicographic sorted order: 0, 1, -2, -1 + // -2, -1, 0, 1 + byte[] existingKeys() { + byte[] keys = new byte[existingData.length]; + for (int i = 0; i < keys.length; i++) { + keys[i] = existingData[i].partialKey; + } + return keys; + } + + void verifyUnsignedLexicographicOrder() { + verifyUnsignedLexicographicOrder(node); + } + + /* + work only with interface methods + we don't care about the implementation details + for example how Node4 stores bytes as unsigned, etc. + we just care about the right lexicographic ordering. + of course this requires us to test with negative as well as + positive data set and hence the check in test setup. + we don't test child mappings, since that is tested in findChild already (if the same mappings + are maintained). + this really is making sure negative bytes come after positives. + we don't really want to test that children storage is sorted, + all we want is if the lexicographic order dependant methods (first, last, greater, lesser) + are answered correctly. + they might be answered correctly even without storing the children in sorted order, + but we don't care as a generic test suite. + we base our assertions on invariants. + */ + void verifyUnsignedLexicographicOrder(InnerNode node) { + boolean negExist = false; + byte prev = node.first().uplinkKey(); + if (prev < 0) { + negExist = true; + } + for (int i = 1; i < node.size(); i++) { + byte next = node.greater(prev).uplinkKey(); + assertTrue(compare(prev, next) < 0); + prev = next; + if (prev < 0) { + negExist = true; + } + } + assertTrue(negExist, "expected at least one negative byte to test lexicographic ordering"); + + prev = node.last().uplinkKey(); + for (int i = node.size() - 2; i >= 0; i--) { + byte next = node.lesser(prev).uplinkKey(); + assertTrue(compare(prev, next) > 0); + prev = next; + } + } + + + /* + add partial keys + all key, child mappings should exist + size increase + uplinks setup + expect keys to be in the right unsigned lexicographic order + */ + @Test + public void testAddAndFindChild() { + List pairs = new ArrayList<>(Arrays.asList(existingData)); + for (byte i = 0; !node.isFull(); i++) { + if (node.findChild(i) != null) { + continue; + } + Pair p = new Pair(i, Mockito.spy(Node.class)); + pairs.add(p); + node.addChild(p.partialKey, p.child); + } + + // size + assertEquals(node.size(), pairs.size()); + + for (Pair p : pairs) { + // uplinks setup + assertEquals(node, p.child.parent()); + assertEquals(p.partialKey, p.child.uplinkKey()); + // all added partial keys exist + assertEquals(p.child, node.findChild(p.partialKey)); + } + + verifyUnsignedLexicographicOrder(); + } + + /* + sort sample data and expect the smallest lexicographic byte + */ + @Test + public void testFirst() { + byte[] data = existingKeys(); + sort(data); + assertEquals(node.first().uplinkKey(), data[0]); + } + + /* + sort sample data and expect the largest lexicographic byte + */ + @Test + public void testLast() { + byte[] data = existingKeys(); + sortDescending(data); + assertEquals(node.last().uplinkKey(), data[0]); + } + + /* + nothing greater than greatest + first is greater than smallest lexicographic unsigned i.e. 0 (0000 0000) + */ + @Test + public void testGreater() { + Node last = node.last(); + assertNull(node.greater(last.uplinkKey())); + Arrays.sort(existingData); + for (int i = 0; i < node.size() - 1; i++) { + Node greater = node.greater(existingData[i].partialKey); + assertEquals(existingData[i + 1].child, greater); + } + } + + /* + nothing lesser than least + last is lesser than largest lexicographic unsigned i.e. -1 (1111 1111) + */ + @Test + public void testLesser() { + Node first = node.first(); + assertNull(node.lesser(first.uplinkKey())); + Arrays.sort(existingData); + for (int i = 1; i < node.size(); i++) { + Node lesser = node.lesser(existingData[i].partialKey); + assertEquals(existingData[i - 1].child, lesser); + } + } + + /* + remove child + unsigned lexicopgrahic order maintained + removes uplink + reduces size + child no longer exists (findChild) + */ + @Test + public void testRemove() { + // since we remove two in the test + // we must not break constraint of a node that it must have + // a number of minimum elements (check node size assert in first, last assert) + byte minByte = Byte.MAX_VALUE, maxByte = Byte.MIN_VALUE; + for(int i = 0; i < existingKeys().length; i++){ + if(existingData[i].partialKey > maxByte){ + maxByte = existingData[i].partialKey; + } + if(existingData[i].partialKey < minByte){ + minByte = existingData[i].partialKey; + } + } + Pair p = new Pair((byte)(minByte-1), Mockito.spy(Node.class)); + node.addChild(p.partialKey, p.child); + p = new Pair((byte)(maxByte+1), Mockito.spy(Node.class)); + if(!node.isFull()){ // need for Node4 since we add 3 elements in test setup already + node.addChild(p.partialKey, p.child); + } + + int initialSize = node.size(); + + // remove at head + Node head = node.first(); + node.removeChild(head.uplinkKey()); + assertNull(node.findChild(head.uplinkKey())); + assertEquals(initialSize - 1, node.size()); + assertNull(head.parent()); + + // remove at tail + Node tail = node.last(); + node.removeChild(tail.uplinkKey()); + assertNull(node.findChild(tail.uplinkKey())); + assertEquals(initialSize - 2, node.size()); + assertNull(tail.parent()); + + verifyUnsignedLexicographicOrder(); + } + + /* + after growing, new node: + contains same key, child mappings in same lexicographic order but with uplinks to new grown node + same prefix key, no of children, uplink key, parent + */ + @Test + public void testGrow() { + List pairs = new ArrayList<>(Arrays.asList(existingData)); + byte i; + Pair pair; + // fill node to capacity + for (i = 0; ; i++) { + if (node.findChild(i) != null) { + continue; // find at least one non existent child to force add + } + pair = new Pair(i, Mockito.spy(Node.class)); + if (node.isFull()) { + break; + } + pairs.add(pair); + node.addChild(pair.partialKey, pair.child); + } + + // capacity reached + assertTrue(node.isFull()); + + // hence we need to grow + InnerNode grown = node.grow(); + assertEquals(node.size(), grown.size()); + assertEqualHeader(node, grown); + + // add child on newly grown node + grown.addChild(pair.partialKey, pair.child); + pairs.add(pair); + + // verify same key, child mappings exist + for (i = 0; i < pairs.size(); i++) { + Pair p = pairs.get(i); + // uplinks setup + assertEquals(grown, p.child.parent()); + assertEquals(p.partialKey, p.child.uplinkKey()); + // all added partial keys exist + assertEquals(p.child, grown.findChild(p.partialKey)); + } + verifyUnsignedLexicographicOrder(grown); + } + + /* + after shrinking contains same key, child mappings + lexicographic order maintained + same parent as before, prefix len, prefix keys + */ + @Test + public void testShrink() { + List pairs = new ArrayList<>(Arrays.asList(existingData)); + while (!node.shouldShrink()) { + node.removeChild(pairs.remove(0).partialKey); + } + assertTrue(node.shouldShrink()); + InnerNode shrunk = node.shrink(); + + assertEquals(shrunk.size(), node.size()); + assertEqualHeader(node, shrunk); + + // verify same key, child mappings exist + for (Pair p : pairs) { + // uplinks setup + assertEquals(shrunk, p.child.parent()); + assertEquals(p.partialKey, p.child.uplinkKey()); + // all added partial keys exist + assertEquals(p.child, shrunk.findChild(p.partialKey)); + } + verifyUnsignedLexicographicOrder(shrunk); + } + + void assertEqualHeader(Node a, Node b) { + InnerNode aa = (InnerNode) a; + InnerNode bb = (InnerNode) b; + assertEquals(aa.prefixLen, bb.prefixLen); + assertArrayEquals(getValidPrefixKey(aa), getValidPrefixKey(bb)); + assertEquals(aa.parent(), bb.parent()); + assertEquals(aa.uplinkKey(), bb.uplinkKey()); + } + + + static byte[] getValidPrefixKey(InnerNode innerNode) { + int limit = Math.min(InnerNode.PESSIMISTIC_PATH_COMPRESSION_LIMIT, innerNode.prefixLen); + byte[] valid = new byte[limit]; + System.arraycopy(innerNode.prefixKeys, 0, valid, 0, limit); + return valid; + } + + /* + replace the child associated with a key + assert new child found + same size + lexicographic order maintained + uplink setup for new child + old child uplink stays: + why? because in lazy leaf expansion case, we first link current leaf node with a + new Node4() and later replace current down pointer to this leaf node with this new + Node4() parent. If we remove old child's uplink, it could be the case that the old child + has been linked with a new parent. + Well we could make sure that explicitly in the branch, but it is fine + to not do in replace as well. + */ + @Test + public void testReplace() { + Node first = node.first(); + Node newChild = Mockito.spy(Node.class); + node.replace(first.uplinkKey(), newChild); + assertEquals(newChild, node.findChild(first.uplinkKey())); + assertEquals(existingData.length, node.size()); + assertEquals(newChild.uplinkKey(), first.uplinkKey()); + assertEquals(node, newChild.parent()); + assertEquals(first.uplinkKey(), first.uplinkKey()); + assertEquals(node, first.parent()); + } + + public static void sort(byte[] array) { + sort(array, 0, array.length); + } + + public static void sort(byte[] array, int fromIndex, int toIndex) { + for (int i = fromIndex; i < toIndex; i++) { + array[i] = flip(array[i]); + } + Arrays.sort(array, fromIndex, toIndex); + for (int i = fromIndex; i < toIndex; i++) { + array[i] = flip(array[i]); + } + } + + private static byte flip(byte b) { + return (byte) (b ^ 0x80); + } + + public static void sortDescending(byte[] array) { + sortDescending(array, 0, array.length); + } + + public static void sortDescending(byte[] array, int fromIndex, int toIndex) { + for (int i = fromIndex; i < toIndex; i++) { + array[i] ^= Byte.MAX_VALUE; + } + Arrays.sort(array, fromIndex, toIndex); + for (int i = fromIndex; i < toIndex; i++) { + array[i] ^= Byte.MAX_VALUE; + } + } +} diff --git a/datastructures-trie/src/test/java/org/xbib/datastructures/trie/radix/adaptive/Node4UnitTest.java b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/radix/adaptive/Node4UnitTest.java new file mode 100644 index 0000000..e56c761 --- /dev/null +++ b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/radix/adaptive/Node4UnitTest.java @@ -0,0 +1,41 @@ +package org.xbib.datastructures.trie.radix.adaptive; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; + +public class Node4UnitTest extends InnerNodeUnitTest { + + Node4UnitTest() { + super(2); + } + + @Test + public void testGetOnlyChild() { + // remove until only one child + while (node.size() != 1) { + node.removeChild(node.first().uplinkKey()); + } + + byte[] keys = existingKeys(); + sortDescending(keys); + assertEquals(keys[0], ((Node4) node).getOnlyChildKey()); + } + + @Override + @Test + public void testShrink() { + Assertions.assertThrows(UnsupportedOperationException.class, () -> node.shrink()); + } + + @Test + public void testShouldShrinkAlwaysFalse() { + // remove all + while (node.size() != 0) { + node.removeChild(node.first().uplinkKey()); + } + assertFalse(node.shouldShrink()); + } +} diff --git a/datastructures-trie/src/test/java/org/xbib/datastructures/trie/segment/TrieTest.java b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/segment/TrieTest.java new file mode 100644 index 0000000..1577a59 --- /dev/null +++ b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/segment/TrieTest.java @@ -0,0 +1,61 @@ +package org.xbib.datastructures.trie.segment; + +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNull; + +public class TrieTest { + + @Test + public void testEmptyTrie() { + Trie, String> trie = new TrieImpl<>(); + TrieKey trieKey = new TrieKeyImpl<>(); + String result = trie.search(trieKey); + assertNull(result); + } + + @Test + public void testEmptyKey() { + Trie, Integer> trie = new TrieImpl<>(); + TrieKey trieKey = new TrieKeyImpl<>(); + trie.add(trieKey, 100); + Integer result = trie.search(trieKey); + assertEquals(result, (Integer) 100); + trie.add(trieKey, 200); + result = trie.search(trieKey); + assertEquals(result, (Integer) 200); + } + + @Test + public void testSingletonTrie() { + Trie, String> trie = new TrieImpl<>(); + TrieKey trieKey = TrieKeyImpl.stringKey("key"); + trie.add(trieKey, "value"); + String result = trie.search(trieKey); + assertNotEquals(result, "key"); + } + + @Test + public void testLargeInsertionAndSearch() { + Trie, Long> trie = new TrieImpl<>(); + List> keys = new ArrayList<>(); + for (int i = 0; i < 10000; i++) { + Random random = new Random(); + Long value = random.nextLong(); + String key = value.toString(); + TrieKey trieKey = TrieKeyImpl.stringKey(key); + trie.add(trieKey, value); + keys.add(trieKey); + } + for (TrieKey key : keys) { + Long value = trie.search(key); + assertEquals(key.toString(), value.toString()); + } + } +} diff --git a/datastructures-trie/src/test/java/org/xbib/datastructures/trie/TrieTest.java b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/simple/TrieTest.java similarity index 97% rename from datastructures-trie/src/test/java/org/xbib/datastructures/trie/TrieTest.java rename to datastructures-trie/src/test/java/org/xbib/datastructures/trie/simple/TrieTest.java index 71877ef..b4f50d3 100644 --- a/datastructures-trie/src/test/java/org/xbib/datastructures/trie/TrieTest.java +++ b/datastructures-trie/src/test/java/org/xbib/datastructures/trie/simple/TrieTest.java @@ -1,4 +1,4 @@ -package org.xbib.datastructures.trie; +package org.xbib.datastructures.trie.simple; import org.junit.jupiter.api.Test; diff --git a/datastructures-trie/src/test/resources/org/xbib/datastructures/trie/patricia/hamlet.txt b/datastructures-trie/src/test/resources/org/xbib/datastructures/trie/patricia/hamlet.txt index 87c8188..e327c40 100644 --- a/datastructures-trie/src/test/resources/org/xbib/datastructures/trie/patricia/hamlet.txt +++ b/datastructures-trie/src/test/resources/org/xbib/datastructures/trie/patricia/hamlet.txt @@ -4898,7 +4898,7 @@ QUEEN GERTRUDE There is a willow grows aslant a brook, Which time she chanted snatches of old tunes; As one incapable of her own distress, Or like a creature native and indued - Unto that element: but long it could not be + Unto that trieKeySegment: but long it could not be Till that her garments, heavy with their drink, Pull'd the poor wretch from her melodious lay To muddy death. diff --git a/datastructures-validation/src/test/resources/emoji-test-12.txt b/datastructures-validation/src/test/resources/emoji-test-12.txt index 5f2f43c..9bf504d 100644 --- a/datastructures-validation/src/test/resources/emoji-test-12.txt +++ b/datastructures-validation/src/test/resources/emoji-test-12.txt @@ -25,7 +25,7 @@ # presentation when isolated. # • The RGI set is covered by the listed fully-qualified emoji. # • The listed minimally-qualified and unqualified cover all cases where an -# element of the RGI set is missing one or more emoji presentation selectors. +# trieKeySegment of the RGI set is missing one or more emoji presentation selectors. # • The file is in CLDR order, not codepoint order. This is recommended (but not required!) for keyboard palettes. # • The groups and subgroups are illustrative. See the Emoji Order chart for more information. diff --git a/gradle/compile/java.gradle b/gradle/compile/java.gradle index 7b7c7e7..de7e0b3 100644 --- a/gradle/compile/java.gradle +++ b/gradle/compile/java.gradle @@ -39,7 +39,7 @@ artifacts { tasks.withType(JavaCompile) { // commented out mostly because of jmh generated code - // options.compilerArgs << '-Xlint:all' + options.compilerArgs << '-Xlint:all' } javadoc {