update to gradle 7.3.2, add bgzf (MIT License)
This commit is contained in:
parent
9ca8990bf0
commit
337e1c19c0
24 changed files with 2588 additions and 113 deletions
|
@ -1,5 +1,5 @@
|
|||
group = org.xbib
|
||||
name = archive
|
||||
version = 1.0.1
|
||||
version = 1.1.0
|
||||
|
||||
gradle.wrapper.version = 6.6.1
|
||||
gradle.wrapper.version = 7.3.2
|
||||
|
|
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
Binary file not shown.
2
gradle/wrapper/gradle-wrapper.properties
vendored
2
gradle/wrapper/gradle-wrapper.properties
vendored
|
@ -1,5 +1,5 @@
|
|||
distributionBase=GRADLE_USER_HOME
|
||||
distributionPath=wrapper/dists
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-6.6.1-all.zip
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.2-all.zip
|
||||
zipStoreBase=GRADLE_USER_HOME
|
||||
zipStorePath=wrapper/dists
|
||||
|
|
269
gradlew
vendored
269
gradlew
vendored
|
@ -1,7 +1,7 @@
|
|||
#!/usr/bin/env sh
|
||||
#!/bin/sh
|
||||
|
||||
#
|
||||
# Copyright 2015 the original author or authors.
|
||||
# Copyright © 2015-2021 the original authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -17,67 +17,101 @@
|
|||
#
|
||||
|
||||
##############################################################################
|
||||
##
|
||||
## Gradle start up script for UN*X
|
||||
##
|
||||
#
|
||||
# Gradle start up script for POSIX generated by Gradle.
|
||||
#
|
||||
# Important for running:
|
||||
#
|
||||
# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
|
||||
# noncompliant, but you have some other compliant shell such as ksh or
|
||||
# bash, then to run this script, type that shell name before the whole
|
||||
# command line, like:
|
||||
#
|
||||
# ksh Gradle
|
||||
#
|
||||
# Busybox and similar reduced shells will NOT work, because this script
|
||||
# requires all of these POSIX shell features:
|
||||
# * functions;
|
||||
# * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
|
||||
# «${var#prefix}», «${var%suffix}», and «$( cmd )»;
|
||||
# * compound commands having a testable exit status, especially «case»;
|
||||
# * various built-in commands including «command», «set», and «ulimit».
|
||||
#
|
||||
# Important for patching:
|
||||
#
|
||||
# (2) This script targets any POSIX shell, so it avoids extensions provided
|
||||
# by Bash, Ksh, etc; in particular arrays are avoided.
|
||||
#
|
||||
# The "traditional" practice of packing multiple parameters into a
|
||||
# space-separated string is a well documented source of bugs and security
|
||||
# problems, so this is (mostly) avoided, by progressively accumulating
|
||||
# options in "$@", and eventually passing that to Java.
|
||||
#
|
||||
# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
|
||||
# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
|
||||
# see the in-line comments for details.
|
||||
#
|
||||
# There are tweaks for specific operating systems such as AIX, CygWin,
|
||||
# Darwin, MinGW, and NonStop.
|
||||
#
|
||||
# (3) This script is generated from the Groovy template
|
||||
# https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
|
||||
# within the Gradle project.
|
||||
#
|
||||
# You can find Gradle at https://github.com/gradle/gradle/.
|
||||
#
|
||||
##############################################################################
|
||||
|
||||
# Attempt to set APP_HOME
|
||||
|
||||
# Resolve links: $0 may be a link
|
||||
PRG="$0"
|
||||
# Need this for relative symlinks.
|
||||
while [ -h "$PRG" ] ; do
|
||||
ls=`ls -ld "$PRG"`
|
||||
link=`expr "$ls" : '.*-> \(.*\)$'`
|
||||
if expr "$link" : '/.*' > /dev/null; then
|
||||
PRG="$link"
|
||||
else
|
||||
PRG=`dirname "$PRG"`"/$link"
|
||||
fi
|
||||
app_path=$0
|
||||
|
||||
# Need this for daisy-chained symlinks.
|
||||
while
|
||||
APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
|
||||
[ -h "$app_path" ]
|
||||
do
|
||||
ls=$( ls -ld "$app_path" )
|
||||
link=${ls#*' -> '}
|
||||
case $link in #(
|
||||
/*) app_path=$link ;; #(
|
||||
*) app_path=$APP_HOME$link ;;
|
||||
esac
|
||||
done
|
||||
SAVED="`pwd`"
|
||||
cd "`dirname \"$PRG\"`/" >/dev/null
|
||||
APP_HOME="`pwd -P`"
|
||||
cd "$SAVED" >/dev/null
|
||||
|
||||
APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
|
||||
|
||||
APP_NAME="Gradle"
|
||||
APP_BASE_NAME=`basename "$0"`
|
||||
APP_BASE_NAME=${0##*/}
|
||||
|
||||
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
|
||||
|
||||
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
||||
MAX_FD="maximum"
|
||||
MAX_FD=maximum
|
||||
|
||||
warn () {
|
||||
echo "$*"
|
||||
}
|
||||
} >&2
|
||||
|
||||
die () {
|
||||
echo
|
||||
echo "$*"
|
||||
echo
|
||||
exit 1
|
||||
}
|
||||
} >&2
|
||||
|
||||
# OS specific support (must be 'true' or 'false').
|
||||
cygwin=false
|
||||
msys=false
|
||||
darwin=false
|
||||
nonstop=false
|
||||
case "`uname`" in
|
||||
CYGWIN* )
|
||||
cygwin=true
|
||||
;;
|
||||
Darwin* )
|
||||
darwin=true
|
||||
;;
|
||||
MINGW* )
|
||||
msys=true
|
||||
;;
|
||||
NONSTOP* )
|
||||
nonstop=true
|
||||
;;
|
||||
case "$( uname )" in #(
|
||||
CYGWIN* ) cygwin=true ;; #(
|
||||
Darwin* ) darwin=true ;; #(
|
||||
MSYS* | MINGW* ) msys=true ;; #(
|
||||
NONSTOP* ) nonstop=true ;;
|
||||
esac
|
||||
|
||||
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
||||
|
@ -87,9 +121,9 @@ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
|||
if [ -n "$JAVA_HOME" ] ; then
|
||||
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
|
||||
# IBM's JDK on AIX uses strange locations for the executables
|
||||
JAVACMD="$JAVA_HOME/jre/sh/java"
|
||||
JAVACMD=$JAVA_HOME/jre/sh/java
|
||||
else
|
||||
JAVACMD="$JAVA_HOME/bin/java"
|
||||
JAVACMD=$JAVA_HOME/bin/java
|
||||
fi
|
||||
if [ ! -x "$JAVACMD" ] ; then
|
||||
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
|
||||
|
@ -98,7 +132,7 @@ Please set the JAVA_HOME variable in your environment to match the
|
|||
location of your Java installation."
|
||||
fi
|
||||
else
|
||||
JAVACMD="java"
|
||||
JAVACMD=java
|
||||
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||
|
||||
Please set the JAVA_HOME variable in your environment to match the
|
||||
|
@ -106,80 +140,95 @@ location of your Java installation."
|
|||
fi
|
||||
|
||||
# Increase the maximum file descriptors if we can.
|
||||
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
|
||||
MAX_FD_LIMIT=`ulimit -H -n`
|
||||
if [ $? -eq 0 ] ; then
|
||||
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
|
||||
MAX_FD="$MAX_FD_LIMIT"
|
||||
fi
|
||||
ulimit -n $MAX_FD
|
||||
if [ $? -ne 0 ] ; then
|
||||
warn "Could not set maximum file descriptor limit: $MAX_FD"
|
||||
fi
|
||||
else
|
||||
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
|
||||
fi
|
||||
fi
|
||||
|
||||
# For Darwin, add options to specify how the application appears in the dock
|
||||
if $darwin; then
|
||||
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
|
||||
fi
|
||||
|
||||
# For Cygwin or MSYS, switch paths to Windows format before running java
|
||||
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
|
||||
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
|
||||
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
|
||||
|
||||
JAVACMD=`cygpath --unix "$JAVACMD"`
|
||||
|
||||
# We build the pattern for arguments to be converted via cygpath
|
||||
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
|
||||
SEP=""
|
||||
for dir in $ROOTDIRSRAW ; do
|
||||
ROOTDIRS="$ROOTDIRS$SEP$dir"
|
||||
SEP="|"
|
||||
done
|
||||
OURCYGPATTERN="(^($ROOTDIRS))"
|
||||
# Add a user-defined pattern to the cygpath arguments
|
||||
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
|
||||
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
|
||||
fi
|
||||
# Now convert the arguments - kludge to limit ourselves to /bin/sh
|
||||
i=0
|
||||
for arg in "$@" ; do
|
||||
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
|
||||
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
|
||||
|
||||
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
|
||||
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
|
||||
else
|
||||
eval `echo args$i`="\"$arg\""
|
||||
fi
|
||||
i=`expr $i + 1`
|
||||
done
|
||||
case $i in
|
||||
0) set -- ;;
|
||||
1) set -- "$args0" ;;
|
||||
2) set -- "$args0" "$args1" ;;
|
||||
3) set -- "$args0" "$args1" "$args2" ;;
|
||||
4) set -- "$args0" "$args1" "$args2" "$args3" ;;
|
||||
5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
|
||||
6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
|
||||
7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
|
||||
8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
|
||||
9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
|
||||
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
|
||||
case $MAX_FD in #(
|
||||
max*)
|
||||
MAX_FD=$( ulimit -H -n ) ||
|
||||
warn "Could not query maximum file descriptor limit"
|
||||
esac
|
||||
case $MAX_FD in #(
|
||||
'' | soft) :;; #(
|
||||
*)
|
||||
ulimit -n "$MAX_FD" ||
|
||||
warn "Could not set maximum file descriptor limit to $MAX_FD"
|
||||
esac
|
||||
fi
|
||||
|
||||
# Escape application args
|
||||
save () {
|
||||
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
|
||||
echo " "
|
||||
}
|
||||
APP_ARGS=`save "$@"`
|
||||
# Collect all arguments for the java command, stacking in reverse order:
|
||||
# * args from the command line
|
||||
# * the main class name
|
||||
# * -classpath
|
||||
# * -D...appname settings
|
||||
# * --module-path (only if needed)
|
||||
# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
|
||||
|
||||
# Collect all arguments for the java command, following the shell quoting and substitution rules
|
||||
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
|
||||
# For Cygwin or MSYS, switch paths to Windows format before running java
|
||||
if "$cygwin" || "$msys" ; then
|
||||
APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
|
||||
CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
|
||||
|
||||
JAVACMD=$( cygpath --unix "$JAVACMD" )
|
||||
|
||||
# Now convert the arguments - kludge to limit ourselves to /bin/sh
|
||||
for arg do
|
||||
if
|
||||
case $arg in #(
|
||||
-*) false ;; # don't mess with options #(
|
||||
/?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
|
||||
[ -e "$t" ] ;; #(
|
||||
*) false ;;
|
||||
esac
|
||||
then
|
||||
arg=$( cygpath --path --ignore --mixed "$arg" )
|
||||
fi
|
||||
# Roll the args list around exactly as many times as the number of
|
||||
# args, so each arg winds up back in the position where it started, but
|
||||
# possibly modified.
|
||||
#
|
||||
# NB: a `for` loop captures its iteration list before it begins, so
|
||||
# changing the positional parameters here affects neither the number of
|
||||
# iterations, nor the values presented in `arg`.
|
||||
shift # remove old arg
|
||||
set -- "$@" "$arg" # push replacement arg
|
||||
done
|
||||
fi
|
||||
|
||||
# Collect all arguments for the java command;
|
||||
# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
|
||||
# shell script including quotes and variable substitutions, so put them in
|
||||
# double quotes to make sure that they get re-expanded; and
|
||||
# * put everything else in single quotes, so that it's not re-expanded.
|
||||
|
||||
set -- \
|
||||
"-Dorg.gradle.appname=$APP_BASE_NAME" \
|
||||
-classpath "$CLASSPATH" \
|
||||
org.gradle.wrapper.GradleWrapperMain \
|
||||
"$@"
|
||||
|
||||
# Use "xargs" to parse quoted args.
|
||||
#
|
||||
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
|
||||
#
|
||||
# In Bash we could simply go:
|
||||
#
|
||||
# readarray ARGS < <( xargs -n1 <<<"$var" ) &&
|
||||
# set -- "${ARGS[@]}" "$@"
|
||||
#
|
||||
# but POSIX shell has neither arrays nor command substitution, so instead we
|
||||
# post-process each arg (as a line of input to sed) to backslash-escape any
|
||||
# character that might be a shell metacharacter, then use eval to reverse
|
||||
# that process (while maintaining the separation between arguments), and wrap
|
||||
# the whole thing up as a single "set" statement.
|
||||
#
|
||||
# This will of course break if any of these variables contains a newline or
|
||||
# an unmatched quote.
|
||||
#
|
||||
|
||||
eval "set -- $(
|
||||
printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
|
||||
xargs -n1 |
|
||||
sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
|
||||
tr '\n' ' '
|
||||
)" '"$@"'
|
||||
|
||||
exec "$JAVACMD" "$@"
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
dependencies {
|
||||
api project(':io-archive')
|
||||
implementation project(':io-compress-bgzf')
|
||||
implementation project(':io-compress-bzip2')
|
||||
implementation project(':io-compress-lzf')
|
||||
implementation project(':io-compress-xz')
|
||||
|
|
|
@ -2,11 +2,13 @@ module org.xbib.io.codec {
|
|||
uses org.xbib.io.codec.StreamCodec;
|
||||
exports org.xbib.io.codec;
|
||||
exports org.xbib.io.codec.ar;
|
||||
exports org.xbib.io.codec.bgzf;
|
||||
exports org.xbib.io.codec.cpio;
|
||||
exports org.xbib.io.codec.file;
|
||||
exports org.xbib.io.codec.jar;
|
||||
exports org.xbib.io.codec.tar;
|
||||
exports org.xbib.io.codec.zip;
|
||||
requires transitive org.xbib.io.compress.bgzf;
|
||||
requires org.xbib.io.compress.bzip;
|
||||
requires org.xbib.io.compress.lzf;
|
||||
requires org.xbib.io.compress.xz;
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
package org.xbib.io.codec.bgzf;
|
||||
|
||||
import org.xbib.io.codec.StreamCodec;
|
||||
import org.xbib.io.compress.bgzf.BlockCompressedInputStream;
|
||||
import org.xbib.io.compress.bgzf.BlockCompressedOutputStream;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
|
||||
public class BzgfStreamCodec implements StreamCodec<BlockCompressedInputStream, BlockCompressedOutputStream> {
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return "bgzf";
|
||||
}
|
||||
|
||||
@Override
|
||||
public BlockCompressedInputStream decode(InputStream in) throws IOException {
|
||||
return new BlockCompressedInputStream(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BlockCompressedInputStream decode(InputStream in, int bufsize) throws IOException {
|
||||
return new BlockCompressedInputStream(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BlockCompressedOutputStream encode(OutputStream out) throws IOException {
|
||||
return new BlockCompressedOutputStream(out);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BlockCompressedOutputStream encode(OutputStream out, int bufsize) throws IOException {
|
||||
return new BlockCompressedOutputStream(out);
|
||||
}
|
||||
}
|
21
io-compress-bgzf/LICENSE.txt
Normal file
21
io-compress-bgzf/LICENSE.txt
Normal file
|
@ -0,0 +1,21 @@
|
|||
/*
|
||||
* The MIT License
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
3
io-compress-bgzf/src/main/java/module-info.java
Normal file
3
io-compress-bgzf/src/main/java/module-info.java
Normal file
|
@ -0,0 +1,3 @@
|
|||
module org.xbib.io.compress.bgzf {
|
||||
exports org.xbib.io.compress.bgzf;
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
package org.xbib.io.compress.bgzf;
|
||||
|
||||
@SuppressWarnings("serial")
|
||||
public class BGZFException extends RuntimeException {
|
||||
|
||||
public BGZFException() {}
|
||||
|
||||
public BGZFException(final String s) {
|
||||
super(s);
|
||||
}
|
||||
|
||||
public BGZFException(final String s, final Throwable throwable) {
|
||||
super(s, throwable);
|
||||
}
|
||||
|
||||
public BGZFException(final Throwable throwable) {
|
||||
super(throwable);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
|
||||
package org.xbib.io.compress.bgzf;
|
||||
|
||||
public class BGZFFilePointerUtil {
|
||||
|
||||
private static final int SHIFT_AMOUNT = 16;
|
||||
private static final int OFFSET_MASK = 0xffff;
|
||||
private static final long ADDRESS_MASK = 0xFFFFFFFFFFFFL;
|
||||
|
||||
public static final long MAX_BLOCK_ADDRESS = ADDRESS_MASK;
|
||||
public static final int MAX_OFFSET = OFFSET_MASK;
|
||||
|
||||
public static int compare(final long vfp1, final long vfp2) {
|
||||
if (vfp1 == vfp2) return 0;
|
||||
// When treating as unsigned, negative number is > positive.
|
||||
if (vfp1 < 0 && vfp2 >= 0) return 1;
|
||||
if (vfp1 >= 0 && vfp2 < 0) return -1;
|
||||
// Either both negative or both non-negative, so regular comparison works.
|
||||
if (vfp1 < vfp2) return -1;
|
||||
return 1; // vfp1 > vfp2
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if vfp2 points to somewhere in the same BGZF block, or the one immediately
|
||||
* following vfp1's BGZF block.
|
||||
*/
|
||||
public static boolean areInSameOrAdjacentBlocks(final long vfp1, final long vfp2) {
|
||||
final long block1 = getBlockAddress(vfp1);
|
||||
final long block2 = getBlockAddress(vfp2);
|
||||
return (block1 == block2 || block1 + 1 == block2);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param blockAddress File offset of start of BGZF block.
|
||||
* @param blockOffset Offset into uncompressed block.
|
||||
* @return Virtual file pointer that embodies the input parameters.
|
||||
*/
|
||||
static long makeFilePointer(final long blockAddress, final int blockOffset) {
|
||||
if (blockOffset < 0) {
|
||||
throw new IllegalArgumentException("Negative blockOffset " + blockOffset
|
||||
+ " not allowed.");
|
||||
}
|
||||
if (blockAddress < 0) {
|
||||
throw new IllegalArgumentException("Negative blockAddress " + blockAddress
|
||||
+ " not allowed.");
|
||||
}
|
||||
if (blockOffset > MAX_OFFSET) {
|
||||
throw new IllegalArgumentException("blockOffset " + blockOffset + " too large.");
|
||||
}
|
||||
if (blockAddress > MAX_BLOCK_ADDRESS) {
|
||||
throw new IllegalArgumentException("blockAddress " + blockAddress + " too large.");
|
||||
}
|
||||
return blockAddress << SHIFT_AMOUNT | blockOffset;
|
||||
}
|
||||
|
||||
public static long getBlockAddress(final long virtualFilePointer) {
|
||||
return (virtualFilePointer >> SHIFT_AMOUNT) & ADDRESS_MASK;
|
||||
}
|
||||
|
||||
public static int getBlockOffset(final long virtualFilePointer) {
|
||||
return (int)(virtualFilePointer & OFFSET_MASK);
|
||||
}
|
||||
|
||||
public static String asString(final long vfp) {
|
||||
return String.format("%d(0x%x): (block address: %d, offset: %d)", vfp, vfp, getBlockAddress(vfp), getBlockOffset(vfp));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
package org.xbib.io.compress.bgzf;
|
||||
|
||||
@SuppressWarnings("serial")
|
||||
public class BGZFFormatException extends BGZFException {
|
||||
|
||||
public BGZFFormatException() {}
|
||||
|
||||
public BGZFFormatException(final String s) {
|
||||
super(s);
|
||||
}
|
||||
|
||||
public BGZFFormatException(final String s, final Throwable throwable) {
|
||||
super(s, throwable);
|
||||
}
|
||||
|
||||
public BGZFFormatException(final Throwable throwable) {
|
||||
super(throwable);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,95 @@
|
|||
package org.xbib.io.compress.bgzf;
|
||||
|
||||
/**
|
||||
* Constants shared by BlockCompressed{Input,Output}Stream classes
|
||||
*/
|
||||
public class BGZFStreamConstants {
|
||||
|
||||
// Number of bytes in the gzip block before the deflated data.
|
||||
// This is not the standard header size, because we include one optional subfield,
|
||||
// but it is the standard for us.
|
||||
public static final int BLOCK_HEADER_LENGTH = 18;
|
||||
|
||||
// Location in the gzip block of the total block size (actually total block size - 1)
|
||||
public static final int BLOCK_LENGTH_OFFSET = 16;
|
||||
|
||||
// Number of bytes that follow the deflated data
|
||||
public static final int BLOCK_FOOTER_LENGTH = 8;
|
||||
|
||||
// We require that a compressed block (including header and footer, be <= this)
|
||||
public static final int MAX_COMPRESSED_BLOCK_SIZE = 64 * 1024;
|
||||
|
||||
// Gzip overhead is the header, the footer, and the block size (encoded as a short).
|
||||
public static final int GZIP_OVERHEAD = BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH + 2;
|
||||
|
||||
// If Deflater has compression level == NO_COMPRESSION, 10 bytes of overhead (determined experimentally).
|
||||
public static final int NO_COMPRESSION_OVERHEAD = 10;
|
||||
|
||||
// Push out a gzip block when this many uncompressed bytes have been accumulated.
|
||||
// This size is selected so that if data is not compressible, if Deflater is given
|
||||
// compression level == NO_COMPRESSION, compressed size is guaranteed to be <= MAX_COMPRESSED_BLOCK_SIZE.
|
||||
public static final int DEFAULT_UNCOMPRESSED_BLOCK_SIZE = 64 * 1024 - (GZIP_OVERHEAD + NO_COMPRESSION_OVERHEAD);
|
||||
|
||||
// Magic numbers
|
||||
public static final byte GZIP_ID1 = 31;
|
||||
public static final int GZIP_ID2 = 139;
|
||||
|
||||
// FEXTRA flag means there are optional fields
|
||||
public static final int GZIP_FLG = 4;
|
||||
|
||||
// extra flags
|
||||
public static final int GZIP_XFL = 0;
|
||||
|
||||
// length of extra subfield
|
||||
public static final short GZIP_XLEN = 6;
|
||||
|
||||
// The deflate compression, which is customarily used by gzip
|
||||
public static final byte GZIP_CM_DEFLATE = 8;
|
||||
|
||||
public static final int DEFAULT_COMPRESSION_LEVEL = 5;
|
||||
|
||||
// We don't care about OS because we're not doing line terminator translation
|
||||
public static final int GZIP_OS_UNKNOWN = 255;
|
||||
|
||||
// The subfield ID
|
||||
public static final byte BGZF_ID1 = 66;
|
||||
public static final byte BGZF_ID2 = 67;
|
||||
|
||||
// subfield length in bytes
|
||||
public static final byte BGZF_LEN = 2;
|
||||
|
||||
public static final byte[] EMPTY_GZIP_BLOCK = {
|
||||
BGZFStreamConstants.GZIP_ID1,
|
||||
(byte)BGZFStreamConstants.GZIP_ID2,
|
||||
BGZFStreamConstants.GZIP_CM_DEFLATE,
|
||||
BGZFStreamConstants.GZIP_FLG,
|
||||
0, 0, 0, 0, // Modification time
|
||||
BGZFStreamConstants.GZIP_XFL,
|
||||
(byte)BGZFStreamConstants.GZIP_OS_UNKNOWN,
|
||||
BGZFStreamConstants.GZIP_XLEN, 0, // Little-endian short
|
||||
BGZFStreamConstants.BGZF_ID1,
|
||||
BGZFStreamConstants.BGZF_ID2,
|
||||
BGZFStreamConstants.BGZF_LEN, 0, // Little-endian short
|
||||
// Total block size - 1
|
||||
BGZFStreamConstants.BLOCK_HEADER_LENGTH +
|
||||
BGZFStreamConstants.BLOCK_FOOTER_LENGTH - 1 + 2, 0, // Little-endian short
|
||||
// Dummy payload?
|
||||
3, 0,
|
||||
0, 0, 0, 0, // crc
|
||||
0, 0, 0, 0, // uncompressedSize
|
||||
};
|
||||
|
||||
public static final byte[] GZIP_BLOCK_PREAMBLE = {
|
||||
BGZFStreamConstants.GZIP_ID1,
|
||||
(byte)BGZFStreamConstants.GZIP_ID2,
|
||||
BGZFStreamConstants.GZIP_CM_DEFLATE,
|
||||
BGZFStreamConstants.GZIP_FLG,
|
||||
0, 0, 0, 0, // Modification time
|
||||
BGZFStreamConstants.GZIP_XFL,
|
||||
(byte)BGZFStreamConstants.GZIP_OS_UNKNOWN,
|
||||
BGZFStreamConstants.GZIP_XLEN, 0, // Little-endian short
|
||||
BGZFStreamConstants.BGZF_ID1,
|
||||
BGZFStreamConstants.BGZF_ID2,
|
||||
BGZFStreamConstants.BGZF_LEN, 0, // Little-endian short
|
||||
};
|
||||
}
|
|
@ -0,0 +1,666 @@
|
|||
package org.xbib.io.compress.bgzf;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.EOFException;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.SyncFailedException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
|
||||
/**
|
||||
* Encapsulates file representation of various primitive data types. Forces little-endian disk
|
||||
* representation. Note that this class is currently not very efficient. There are plans to increase
|
||||
* the size of the ByteBuffer, and move data between the ByteBuffer and the underlying input or
|
||||
* output stream in larger chunks.
|
||||
*
|
||||
* All the read methods throw EOFException if the input stream is exhausted before the
|
||||
* required number of bytes are read.
|
||||
*/
|
||||
public class BinaryCodec {
|
||||
|
||||
// Outstream to write to
|
||||
private OutputStream outputStream;
|
||||
// If a file or filename was given it will be stored here. Used for error reporting.
|
||||
private String outputFileName;
|
||||
|
||||
// Input stream to read from
|
||||
private InputStream inputStream;
|
||||
// If a file or filename was give to read from it will be stored here. Used for error reporting.
|
||||
private String inputFileName;
|
||||
|
||||
/*
|
||||
* Mode that the BinaryCodec is in. It is either writing to a binary file or reading from. This
|
||||
* is set to true if it is writing to a binary file Right now we don't support reading and
|
||||
* writing to the same file with the same BinaryCodec instance
|
||||
*/
|
||||
private boolean isWriting;
|
||||
|
||||
/**
|
||||
* For byte swapping.
|
||||
*/
|
||||
private ByteBuffer byteBuffer;
|
||||
|
||||
/**
|
||||
* For reading Strings of known length, this can reduce object creation
|
||||
*/
|
||||
private final byte[] scratchBuffer = new byte[16];
|
||||
|
||||
// Byte order used in BAM files.
|
||||
private static final ByteOrder LITTLE_ENDIAN = ByteOrder.LITTLE_ENDIAN;
|
||||
private static final byte[] NULL_BYTE = {0};
|
||||
|
||||
private static final long MAX_UBYTE = (Byte.MAX_VALUE * 2) + 1;
|
||||
private static final long MAX_USHORT = (Short.MAX_VALUE * 2) + 1;
|
||||
private static final long MAX_UINT = ((long)Integer.MAX_VALUE * 2) + 1;
|
||||
|
||||
// We never serialize more than this much at a time (except for Strings)
|
||||
private static final int MAX_BYTE_BUFFER = 8;
|
||||
/**
|
||||
* Constructs BinaryCodec from a file and set it's mode to writing or not
|
||||
*
|
||||
* @param file file to be written to or read from
|
||||
* @param writing whether the file is being written to
|
||||
* @throws FileNotFoundException
|
||||
*/
|
||||
public BinaryCodec(final File file, final boolean writing) throws FileNotFoundException {
|
||||
this();
|
||||
|
||||
this.isWriting = writing;
|
||||
if (this.isWriting) {
|
||||
this.outputStream = new FileOutputStream(file);
|
||||
this.outputFileName = file.getName();
|
||||
} else {
|
||||
this.inputStream = new FileInputStream(file);
|
||||
this.inputFileName = file.getName();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs BinaryCodec from a file name and set it's mode to writing or not
|
||||
*
|
||||
* @param fileName name of the file to be written to or read from
|
||||
* @param writing writing whether the file is being written to
|
||||
* @throws FileNotFoundException
|
||||
*/
|
||||
public BinaryCodec(final String fileName, final boolean writing) throws FileNotFoundException {
|
||||
this(new File(fileName), writing);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs BinaryCodec from an output stream
|
||||
*
|
||||
* @param outputStream Stream to write to, since it's an output stream we know that isWriting
|
||||
* should be set to true
|
||||
*/
|
||||
public BinaryCodec(final OutputStream outputStream) {
|
||||
this();
|
||||
setOutputStream(outputStream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs BinaryCodec from an input stream
|
||||
*
|
||||
* @param inputStream Stream to read from, since we are reading isWriting is set to false
|
||||
*/
|
||||
public BinaryCodec(final InputStream inputStream) {
|
||||
this();
|
||||
setInputStream(inputStream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Ambiguous whether reading or writing until set{In,Out}putStream is called
|
||||
*/
|
||||
public BinaryCodec() {
|
||||
initByteBuffer();
|
||||
}
|
||||
|
||||
/**
|
||||
* Shared among ctors. Note that if endianness is changed, all the unsigned methods must also be
|
||||
* changed.
|
||||
*/
|
||||
private void initByteBuffer() {
|
||||
byteBuffer = ByteBuffer.allocate(MAX_BYTE_BUFFER);
|
||||
byteBuffer.order(LITTLE_ENDIAN);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Write whatever has been put into the byte buffer
|
||||
*
|
||||
* @param numBytes -- how much to write. Note that in case of writing an unsigned value, more
|
||||
* bytes were put into the ByteBuffer than will get written out.
|
||||
* @throws IOException
|
||||
*/
|
||||
private void writeByteBuffer(final int numBytes) throws IOException {
|
||||
assert (numBytes <= byteBuffer.limit());
|
||||
writeBytes(byteBuffer.array(), 0, numBytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a byte to the output buffer
|
||||
*
|
||||
* @param bite byte array to write
|
||||
* @throws IOException
|
||||
*/
|
||||
public void writeByte(final byte bite) throws IOException {
|
||||
byteBuffer.clear();
|
||||
byteBuffer.put(bite);
|
||||
writeByteBuffer(1);
|
||||
}
|
||||
|
||||
public void writeByte(final int b) throws IOException {
|
||||
writeByte((byte)b);
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a byte array to the output buffer
|
||||
*
|
||||
* @param bytes value to write
|
||||
* @throws IOException
|
||||
*/
|
||||
public void writeBytes(final byte[] bytes) throws IOException {
|
||||
writeBytes(bytes, 0, bytes.length);
|
||||
}
|
||||
|
||||
public void writeBytes(final byte[] bytes, final int startOffset, final int numBytes) throws IOException {
|
||||
if (!isWriting) {
|
||||
throw new IllegalStateException("Calling write method on BinaryCodec open for read.");
|
||||
}
|
||||
|
||||
outputStream.write(bytes, startOffset, numBytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a 32-bit int to the output stream
|
||||
*
|
||||
* @param value int to write
|
||||
* @throws IOException
|
||||
*/
|
||||
public void writeInt(final int value) throws IOException {
|
||||
byteBuffer.clear();
|
||||
byteBuffer.putInt(value);
|
||||
writeByteBuffer(4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a double (8 bytes) to the output stream
|
||||
*
|
||||
* @param value double to write
|
||||
* @throws IOException
|
||||
*/
|
||||
public void writeDouble(final double value) throws IOException {
|
||||
byteBuffer.clear();
|
||||
byteBuffer.putDouble(value);
|
||||
writeByteBuffer(8);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a 64-bit long to the output stream
|
||||
*
|
||||
* @param value long to write
|
||||
* @throws IOException
|
||||
*/
|
||||
public void writeLong(final long value) throws IOException {
|
||||
byteBuffer.clear();
|
||||
byteBuffer.putLong(value);
|
||||
writeByteBuffer(8);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Write a 16-bit short to output stream
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public void writeShort(final short value) throws IOException {
|
||||
byteBuffer.clear();
|
||||
byteBuffer.putShort(value);
|
||||
writeByteBuffer(2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a float (4 bytes) to the output stream
|
||||
*
|
||||
* @param value float to write
|
||||
* @throws IOException
|
||||
*/
|
||||
public void writeFloat(final float value) throws IOException {
|
||||
byteBuffer.clear();
|
||||
byteBuffer.putFloat(value);
|
||||
writeByteBuffer(4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a boolean (1 byte) to the output buffer
|
||||
*
|
||||
* @param value boolean to write
|
||||
* @throws IOException
|
||||
*/
|
||||
public void writeBoolean(final boolean value) throws IOException {
|
||||
byteBuffer.clear();
|
||||
byteBuffer.put(value ? (byte)1 : (byte)0);
|
||||
writeByteBuffer(1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a string to the buffer as ASCII bytes
|
||||
*
|
||||
* @param value string to write to buffer
|
||||
* @param writeLength prefix the string with the length as a 32-bit int
|
||||
* @param appendNull add a null byte to the end of the string
|
||||
* @throws IOException
|
||||
*/
|
||||
public void writeString(final String value, final boolean writeLength, final boolean appendNull) throws IOException {
|
||||
if (writeLength) {
|
||||
int lengthToWrite = value.length();
|
||||
if (appendNull) lengthToWrite++;
|
||||
writeInt(lengthToWrite);
|
||||
}
|
||||
|
||||
// Actually writes the string to a buffer
|
||||
writeString(value);
|
||||
|
||||
if (appendNull) writeBytes(NULL_BYTE);
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Write a string to the buffer as ASCII bytes
|
||||
*
|
||||
* @param value string to write
|
||||
* @throws IOException
|
||||
*/
|
||||
private void writeString(final String value) throws IOException {
|
||||
final byte[] byteBuffer = new byte[value.length()];
|
||||
final char[] charBuffer = value.toCharArray();
|
||||
for (int i = 0; i < charBuffer.length; ++i) {
|
||||
byteBuffer[i] = (byte)(charBuffer[i] & 0xff);
|
||||
}
|
||||
writeBytes(byteBuffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write an 8-bit unsigned byte. NOTE: This method will break if we change to big-endian.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public void writeUByte(final short val) throws IOException {
|
||||
if (val < 0) {
|
||||
throw new IllegalArgumentException("Negative value (" + val
|
||||
+ ") passed to unsigned writing method.");
|
||||
}
|
||||
if (val > MAX_UBYTE) {
|
||||
throw new IllegalArgumentException("Value (" + val
|
||||
+ ") to large to be written as ubyte.");
|
||||
}
|
||||
byteBuffer.clear();
|
||||
byteBuffer.putShort(val);
|
||||
writeByteBuffer(1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a 16-bit unsigned short. NOTE: This method will break if we change to big-endian.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public void writeUShort(final int val) throws IOException {
|
||||
if (val < 0) {
|
||||
throw new IllegalArgumentException("Negative value (" + val
|
||||
+ ") passed to unsigned writing method.");
|
||||
}
|
||||
if (val > MAX_USHORT) {
|
||||
throw new IllegalArgumentException("Value (" + val
|
||||
+ ") to large to be written as ushort.");
|
||||
}
|
||||
byteBuffer.clear();
|
||||
byteBuffer.putInt(val);
|
||||
writeByteBuffer(2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a 32-bit unsigned int. NOTE: This method will break if we change to big-endian.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public void writeUInt(final long val) throws IOException {
|
||||
if (val < 0) {
|
||||
throw new IllegalArgumentException("Negative value (" + val
|
||||
+ ") passed to unsigned writing method.");
|
||||
}
|
||||
if (val > MAX_UINT) {
|
||||
throw new IllegalArgumentException("Value (" + val
|
||||
+ ") to large to be written as uint.");
|
||||
}
|
||||
byteBuffer.clear();
|
||||
byteBuffer.putLong(val);
|
||||
writeByteBuffer(4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a byte array from the input stream.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public void readBytes(final byte[] buffer) throws IOException {
|
||||
readBytes(buffer, 0, buffer.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a byte array from the input stream
|
||||
*
|
||||
* @param buffer where to put bytes read
|
||||
* @param offset offset to start putting bytes into buffer
|
||||
* @param length number of bytes to read
|
||||
* @throws IOException
|
||||
*/
|
||||
public void readBytes(final byte[] buffer, final int offset, final int length) throws IOException {
|
||||
int totalNumRead = 0;
|
||||
do {
|
||||
final int numRead =
|
||||
readBytesOrFewer(buffer, offset + totalNumRead, length - totalNumRead);
|
||||
if (numRead < 0) {
|
||||
throw new EOFException(constructErrorMessage("Premature EOF"));
|
||||
} else {
|
||||
totalNumRead += numRead;
|
||||
}
|
||||
} while (totalNumRead < length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a byte array from the input stream.
|
||||
*
|
||||
* @param buffer where to put bytes read
|
||||
* @param offset offset to start putting bytes into buffer
|
||||
* @param length number of bytes to read. Fewer bytes may be read if EOF is reached before
|
||||
* length bytes have been read.
|
||||
* @return the total number of bytes read into the buffer, or -1 if there is no more data
|
||||
* because the end of the stream has been reached.
|
||||
* @throws IOException
|
||||
*/
|
||||
public int readBytesOrFewer(final byte[] buffer, final int offset, final int length) throws IOException {
|
||||
if (isWriting) {
|
||||
throw new IllegalStateException("Calling read method on BinaryCodec open for write.");
|
||||
}
|
||||
|
||||
return inputStream.read(buffer, offset, length);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return a single byte read from the input stream.
|
||||
* @throws IOException
|
||||
*/
|
||||
public byte readByte() throws IOException {
|
||||
if (isWriting) {
|
||||
throw new IllegalStateException("Calling read method on BinaryCodec open for write.");
|
||||
}
|
||||
|
||||
final int ret = inputStream.read();
|
||||
if (ret == -1) {
|
||||
throw new EOFException(constructErrorMessage("Premature EOF"));
|
||||
}
|
||||
return (byte)ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if it is possible to know for sure if at EOF, and it is known for sure. If the
|
||||
* input stream is a ByteArrayInputStream, this is faster than causing a
|
||||
* RuntimeEOFException to be thrown.
|
||||
* @throws IOException
|
||||
*/
|
||||
public boolean knownAtEof() throws IOException {
|
||||
if (isWriting) {
|
||||
throw new IllegalStateException(
|
||||
"Calling knownAtEof method on BinaryCodec open for write.");
|
||||
}
|
||||
|
||||
return inputStream instanceof ByteArrayInputStream && inputStream.available() == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a string off the input stream, as ASCII bytes
|
||||
*
|
||||
* @param length length of string to read
|
||||
* @return String read from stream
|
||||
* @throws IOException
|
||||
*/
|
||||
public String readString(final int length) throws IOException {
|
||||
final byte[] buffer;
|
||||
// Recycle single buffer if possible
|
||||
if (length <= scratchBuffer.length) {
|
||||
buffer = scratchBuffer;
|
||||
} else {
|
||||
buffer = new byte[length];
|
||||
|
||||
}
|
||||
readBytes(buffer, 0, length);
|
||||
|
||||
final char[] charBuffer = new char[length];
|
||||
for (int i = 0; i < length; ++i) {
|
||||
charBuffer[i] = (char)buffer[i];
|
||||
}
|
||||
return new String(charBuffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read ASCII bytes from the input stream until a null byte is read
|
||||
*
|
||||
* @return String constructed from the ASCII bytes read
|
||||
* @throws IOException
|
||||
*/
|
||||
public String readNullTerminatedString() throws IOException {
|
||||
final StringBuilder ret = new StringBuilder();
|
||||
for (byte b = this.readByte(); b != 0; b = this.readByte()) {
|
||||
ret.append((char)(b & 0xff));
|
||||
}
|
||||
return ret.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Read an int length, and then a String of that length
|
||||
*
|
||||
* @param devourNull if true, the length include a null terminator, which is read and discarded
|
||||
* @throws IOException
|
||||
*/
|
||||
public String readLengthAndString(final boolean devourNull) throws IOException {
|
||||
int length = readInt();
|
||||
if (devourNull) {
|
||||
--length;
|
||||
}
|
||||
final String ret = readString(length);
|
||||
if (devourNull) {
|
||||
readByte();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
private void readByteBuffer(final int numBytes) throws IOException {
|
||||
assert (numBytes <= byteBuffer.capacity());
|
||||
readBytes(byteBuffer.array(), 0, numBytes);
|
||||
byteBuffer.limit(byteBuffer.capacity());
|
||||
byteBuffer.position(numBytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read an int off the input stream
|
||||
*
|
||||
* @return int from input stream
|
||||
* @throws IOException
|
||||
*/
|
||||
public int readInt() throws IOException {
|
||||
readByteBuffer(4);
|
||||
byteBuffer.flip();
|
||||
return byteBuffer.getInt();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a double off the input stream
|
||||
*
|
||||
* @return double
|
||||
* @throws IOException
|
||||
*/
|
||||
public double readDouble() throws IOException {
|
||||
readByteBuffer(8);
|
||||
byteBuffer.flip();
|
||||
return byteBuffer.getDouble();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a long off the input stream
|
||||
*
|
||||
* @return long
|
||||
* @throws IOException
|
||||
*/
|
||||
public long readLong() throws IOException {
|
||||
readByteBuffer(8);
|
||||
byteBuffer.flip();
|
||||
return byteBuffer.getLong();
|
||||
}
|
||||
|
||||
public short readShort() throws IOException {
|
||||
readByteBuffer(2);
|
||||
byteBuffer.flip();
|
||||
return byteBuffer.getShort();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a float off the input stream
|
||||
*
|
||||
* @return float
|
||||
* @throws IOException
|
||||
*/
|
||||
public float readFloat() throws IOException {
|
||||
readByteBuffer(4);
|
||||
byteBuffer.flip();
|
||||
return byteBuffer.getFloat();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a boolean off the input stream, represented as a byte with value 1 or 0
|
||||
*
|
||||
* @return boolean
|
||||
* @throws IOException
|
||||
*/
|
||||
public boolean readBoolean() throws IOException {
|
||||
return ((readByte()) == 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads an 8-bit unsigned byte from the input stream. This method assumes little-endianness.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public short readUByte() throws IOException {
|
||||
readByteBuffer(1);
|
||||
byteBuffer.put((byte)0);
|
||||
byteBuffer.flip();
|
||||
return byteBuffer.getShort();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a 16-bit unsigned short from the input stream. This method assumes little-endianness.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public int readUShort() throws IOException {
|
||||
readByteBuffer(2);
|
||||
byteBuffer.putShort((short)0);
|
||||
byteBuffer.flip();
|
||||
return byteBuffer.getInt();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a 32-bit unsigned int from the input stream. This method assumes little-endianness.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public long readUInt() throws IOException {
|
||||
readByteBuffer(4);
|
||||
byteBuffer.putInt(0);
|
||||
byteBuffer.flip();
|
||||
return byteBuffer.getLong();
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the appropriate stream
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public void close() throws IOException {
|
||||
if (this.isWriting) {
|
||||
// To the degree possible, make sure the bytes get forced to the file system,
|
||||
// or else cause an exception to be thrown.
|
||||
if (this.outputStream instanceof FileOutputStream) {
|
||||
this.outputStream.flush();
|
||||
FileOutputStream fos = (FileOutputStream)this.outputStream;
|
||||
try {
|
||||
fos.getFD().sync();
|
||||
} catch (SyncFailedException e) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
this.outputStream.close();
|
||||
} else {
|
||||
this.inputStream.close();
|
||||
}
|
||||
}
|
||||
|
||||
private String constructErrorMessage(final String msg) {
|
||||
final StringBuilder sb = new StringBuilder(msg);
|
||||
sb.append("; BinaryCodec in ");
|
||||
sb.append(isWriting ? "write" : "read");
|
||||
sb.append("mode; ");
|
||||
final String filename = isWriting ? outputFileName : inputFileName;
|
||||
if (filename != null) {
|
||||
sb.append("file: ");
|
||||
sb.append(filename);
|
||||
} else {
|
||||
sb.append("streamed file (filename not available)");
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public String getInputFileName() {
|
||||
return inputFileName;
|
||||
}
|
||||
|
||||
public String getOutputFileName() {
|
||||
return outputFileName;
|
||||
}
|
||||
|
||||
public void setOutputFileName(final String outputFileName) {
|
||||
this.outputFileName = outputFileName;
|
||||
}
|
||||
|
||||
public void setInputFileName(final String inputFileName) {
|
||||
this.inputFileName = inputFileName;
|
||||
}
|
||||
|
||||
public boolean isWriting() {
|
||||
return isWriting;
|
||||
}
|
||||
|
||||
public OutputStream getOutputStream() {
|
||||
return outputStream;
|
||||
}
|
||||
|
||||
public InputStream getInputStream() {
|
||||
return inputStream;
|
||||
}
|
||||
|
||||
public void setInputStream(final InputStream is) {
|
||||
isWriting = false;
|
||||
this.inputStream = is;
|
||||
}
|
||||
|
||||
public void setOutputStream(final OutputStream os) {
|
||||
isWriting = true;
|
||||
this.outputStream = os;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,709 @@
|
|||
package org.xbib.io.compress.bgzf;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.EOFException;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.channels.SeekableByteChannel;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Stream class for reading BGZF block compressed files. The caller can treat this file like any other InputStream.
|
||||
* It probably is not necessary to wrap this stream in a buffering stream, because there is internal buffering.
|
||||
* The advantage of BGZF over conventional GZip format is that BGZF allows for seeking without having to read the
|
||||
* entire file up to the location being sought. Note that seeking is only possible if the input stream is seekable.
|
||||
*
|
||||
* Note that this implementation is not synchronized. If multiple threads access an instance concurrently, it must be synchronized externally.
|
||||
*
|
||||
* @see <a href="http://samtools.sourceforge.net/SAM1.pdf">http://samtools.sourceforge.net/SAM1.pdf</a> for details of BGZF file format.
|
||||
*/
|
||||
public class BlockCompressedInputStream extends InputStream {
|
||||
|
||||
public final static String INCORRECT_HEADER_SIZE_MSG = "Incorrect header size for file: ";
|
||||
public final static String UNEXPECTED_BLOCK_LENGTH_MSG = "Unexpected compressed block length: ";
|
||||
public final static String PREMATURE_END_MSG = "Premature end of file: ";
|
||||
public final static String CANNOT_SEEK_STREAM_MSG = "Cannot seek a position for a non-file stream";
|
||||
public final static String CANNOT_SEEK_CLOSED_STREAM_MSG = "Cannot seek a position for a closed stream";
|
||||
public final static String INVALID_FILE_PTR_MSG = "Invalid file pointer: ";
|
||||
|
||||
private InputStream mStream;
|
||||
private boolean mIsClosed = false;
|
||||
private SeekableStream mFile;
|
||||
private byte[] mFileBuffer = null;
|
||||
private DecompressedBlock mCurrentBlock = null;
|
||||
private int mCurrentOffset = 0;
|
||||
private long mStreamOffset = 0;
|
||||
private final BlockGunzipper blockGunzipper;
|
||||
|
||||
private volatile ByteArrayOutputStream buf = null;
|
||||
private static final byte eol = '\n';
|
||||
private static final byte eolCr = '\r';
|
||||
|
||||
/**
|
||||
* Note that seek() is not supported if this ctor is used.
|
||||
* @param stream source of bytes
|
||||
*/
|
||||
public BlockCompressedInputStream(final InputStream stream) {
|
||||
this(stream, true, BlockGunzipper.getDefaultInflaterFactory());
|
||||
}
|
||||
|
||||
/**
|
||||
* Note that seek() is not supported if this ctor is used.
|
||||
* @param stream source of bytes
|
||||
* @param inflaterFactory {@link InflaterFactory} used by {@link BlockGunzipper}
|
||||
*/
|
||||
public BlockCompressedInputStream(final InputStream stream, final InflaterFactory inflaterFactory) {
|
||||
this(stream, true, inflaterFactory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Note that seek() is not supported if this ctor is used.
|
||||
* @param stream source of bytes
|
||||
* @param allowBuffering if true, allow buffering
|
||||
*/
|
||||
public BlockCompressedInputStream(final InputStream stream, final boolean allowBuffering) {
|
||||
this(stream, allowBuffering, BlockGunzipper.getDefaultInflaterFactory());
|
||||
}
|
||||
|
||||
/**
|
||||
* Note that seek() is not supported if this ctor is used.
|
||||
* @param stream source of bytes
|
||||
* @param allowBuffering if true, allow buffering
|
||||
* @param inflaterFactory {@link InflaterFactory} used by {@link BlockGunzipper}
|
||||
*/
|
||||
public BlockCompressedInputStream(final InputStream stream, final boolean allowBuffering, final InflaterFactory inflaterFactory) {
|
||||
if (allowBuffering) {
|
||||
mStream = new BufferedInputStream(stream);
|
||||
}
|
||||
else {
|
||||
mStream = stream;
|
||||
}
|
||||
|
||||
mFile = null;
|
||||
blockGunzipper = new BlockGunzipper(inflaterFactory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Use this ctor if you wish to call seek()
|
||||
* @param file source of bytes
|
||||
* @throws IOException
|
||||
*/
|
||||
public BlockCompressedInputStream(final File file) throws IOException {
|
||||
this(file, BlockGunzipper.getDefaultInflaterFactory());
|
||||
}
|
||||
|
||||
/**
|
||||
* Use this ctor if you wish to call seek()
|
||||
* @param file source of bytes
|
||||
* @param inflaterFactory {@link InflaterFactory} used by {@link BlockGunzipper}
|
||||
* @throws IOException
|
||||
*/
|
||||
public BlockCompressedInputStream(final File file, final InflaterFactory inflaterFactory) throws IOException {
|
||||
mFile = new SeekableFileStream(file);
|
||||
mStream = null;
|
||||
blockGunzipper = new BlockGunzipper(inflaterFactory);
|
||||
}
|
||||
|
||||
/**
|
||||
* For providing some arbitrary data source. No additional buffering is
|
||||
* provided, so if the underlying source is not buffered, wrap it in a
|
||||
* SeekableBufferedStream before passing to this ctor.
|
||||
* @param strm source of bytes
|
||||
*/
|
||||
public BlockCompressedInputStream(final SeekableStream strm) {
|
||||
this(strm, BlockGunzipper.getDefaultInflaterFactory());
|
||||
}
|
||||
|
||||
/**
|
||||
* For providing some arbitrary data source. No additional buffering is
|
||||
* provided, so if the underlying source is not buffered, wrap it in a
|
||||
* SeekableBufferedStream before passing to this ctor.
|
||||
* @param strm source of bytes
|
||||
* @param inflaterFactory {@link InflaterFactory} used by {@link BlockGunzipper}
|
||||
*/
|
||||
public BlockCompressedInputStream(final SeekableStream strm, final InflaterFactory inflaterFactory) {
|
||||
mFile = strm;
|
||||
mStream = null;
|
||||
blockGunzipper = new BlockGunzipper(inflaterFactory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether or not the inflater will re-calculated the CRC on the decompressed data
|
||||
* and check it against the value stored in the GZIP header. CRC checking is an expensive
|
||||
* operation and should be used accordingly.
|
||||
*/
|
||||
public void setCheckCrcs(final boolean check) {
|
||||
this.blockGunzipper.setCheckCrcs(check);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the number of bytes that can be read (or skipped over) from this input stream without blocking by the
|
||||
* next caller of a method for this input stream. The next caller might be the same thread or another thread.
|
||||
* Note that although the next caller can read this many bytes without blocking, the available() method call itself
|
||||
* may block in order to fill an internal buffer if it has been exhausted.
|
||||
*/
|
||||
@Override
|
||||
public int available() throws IOException {
|
||||
if (mCurrentBlock == null || mCurrentOffset == mCurrentBlock.mBlock.length) {
|
||||
readBlock();
|
||||
}
|
||||
if (mCurrentBlock == null) {
|
||||
return 0;
|
||||
}
|
||||
return mCurrentBlock.mBlock.length - mCurrentOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return <code>true</code> if the stream is at the end of a BGZF block,
|
||||
* <code>false</code> otherwise.
|
||||
*/
|
||||
public boolean endOfBlock() {
|
||||
return (mCurrentBlock != null && mCurrentOffset == mCurrentBlock.mBlock.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the underlying InputStream or RandomAccessFile
|
||||
*/
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
if (mFile != null) {
|
||||
mFile.close();
|
||||
mFile = null;
|
||||
} else if (mStream != null) {
|
||||
mStream.close();
|
||||
mStream = null;
|
||||
}
|
||||
// Encourage garbage collection
|
||||
mFileBuffer = null;
|
||||
mCurrentBlock = null;
|
||||
|
||||
// Mark as closed
|
||||
mIsClosed = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the next byte of data from the input stream. The value byte is returned as an int in the range 0 to 255.
|
||||
* If no byte is available because the end of the stream has been reached, the value -1 is returned.
|
||||
* This method blocks until input data is available, the end of the stream is detected, or an exception is thrown.
|
||||
|
||||
* @return the next byte of data, or -1 if the end of the stream is reached.
|
||||
*/
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
return (available() > 0) ? (mCurrentBlock.mBlock[mCurrentOffset++] & 0xFF) : -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads some number of bytes from the input stream and stores them into the buffer array b. The number of bytes
|
||||
* actually read is returned as an integer. This method blocks until input data is available, end of file is detected,
|
||||
* or an exception is thrown.
|
||||
*
|
||||
* read(buf) has the same effect as read(buf, 0, buf.length).
|
||||
*
|
||||
* @param buffer the buffer into which the data is read.
|
||||
* @return the total number of bytes read into the buffer, or -1 is there is no more data because the end of
|
||||
* the stream has been reached.
|
||||
*/
|
||||
@Override
|
||||
public int read(final byte[] buffer) throws IOException {
|
||||
return read(buffer, 0, buffer.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a whole line. A line is considered to be terminated by either a line feed ('\n'),
|
||||
* carriage return ('\r') or carriage return followed by a line feed ("\r\n").
|
||||
*
|
||||
* @return A String containing the contents of the line, excluding the line terminating
|
||||
* character, or null if the end of the stream has been reached
|
||||
*
|
||||
* @exception IOException If an I/O error occurs
|
||||
*/
|
||||
public String readLine() throws IOException {
|
||||
int available = available();
|
||||
if (available == 0) {
|
||||
return null;
|
||||
}
|
||||
if(null == buf){ // lazy initialisation
|
||||
buf = new ByteArrayOutputStream(8192);
|
||||
}
|
||||
buf.reset();
|
||||
boolean done = false;
|
||||
boolean foundCr = false; // \r found flag
|
||||
while (!done) {
|
||||
int linetmpPos = mCurrentOffset;
|
||||
int bCnt = 0;
|
||||
while((available-- > 0)){
|
||||
final byte c = mCurrentBlock.mBlock[linetmpPos++];
|
||||
if(c == eol){ // found \n
|
||||
done = true;
|
||||
break;
|
||||
} else if(foundCr){ // previous char was \r
|
||||
--linetmpPos; // current char is not \n so put it back
|
||||
done = true;
|
||||
break;
|
||||
} else if(c == eolCr){ // found \r
|
||||
foundCr = true;
|
||||
continue; // no ++bCnt
|
||||
}
|
||||
++bCnt;
|
||||
}
|
||||
if(mCurrentOffset < linetmpPos) {
|
||||
buf.write(mCurrentBlock.mBlock, mCurrentOffset, bCnt);
|
||||
mCurrentOffset = linetmpPos;
|
||||
}
|
||||
available = available();
|
||||
if(available == 0) {
|
||||
// EOF
|
||||
done = true;
|
||||
}
|
||||
}
|
||||
return buf.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads up to len bytes of data from the input stream into an array of bytes. An attempt is made to read
|
||||
* as many as len bytes, but a smaller number may be read. The number of bytes actually read is returned as an integer.
|
||||
*
|
||||
* This method blocks until input data is available, end of file is detected, or an exception is thrown.
|
||||
*
|
||||
* @param buffer buffer into which data is read.
|
||||
* @param offset the start offset in array b at which the data is written.
|
||||
* @param length the maximum number of bytes to read.
|
||||
* @return the total number of bytes read into the buffer, or -1 if there is no more data because the end of
|
||||
* the stream has been reached.
|
||||
*/
|
||||
@Override
|
||||
public int read(final byte[] buffer, int offset, int length) throws IOException {
|
||||
final int originalLength = length;
|
||||
while (length > 0) {
|
||||
final int available = available();
|
||||
if (available == 0) {
|
||||
// Signal EOF to caller
|
||||
if (originalLength == length) {
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
final int copyLength = Math.min(length, available);
|
||||
System.arraycopy(mCurrentBlock.mBlock, mCurrentOffset, buffer, offset, copyLength);
|
||||
mCurrentOffset += copyLength;
|
||||
offset += copyLength;
|
||||
length -= copyLength;
|
||||
}
|
||||
return originalLength - length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Seek to the given position in the file. Note that pos is a special virtual file pointer,
|
||||
* not an actual byte offset.
|
||||
*
|
||||
* @param pos virtual file pointer position
|
||||
* @throws IOException if stream is closed or not a file based stream
|
||||
*/
|
||||
public void seek(final long pos) throws IOException {
|
||||
// Must be before the mFile == null check because mFile == null for closed files and streams
|
||||
if (mIsClosed) {
|
||||
throw new IOException(CANNOT_SEEK_CLOSED_STREAM_MSG);
|
||||
}
|
||||
|
||||
// Cannot seek on streams that are not file based
|
||||
if (mFile == null) {
|
||||
throw new IOException(CANNOT_SEEK_STREAM_MSG);
|
||||
}
|
||||
|
||||
// Decode virtual file pointer
|
||||
// Upper 48 bits is the byte offset into the compressed stream of a
|
||||
// block.
|
||||
// Lower 16 bits is the byte offset into the uncompressed stream inside
|
||||
// the block.
|
||||
final long compressedOffset = BGZFFilePointerUtil.getBlockAddress(pos);
|
||||
final int uncompressedOffset = BGZFFilePointerUtil.getBlockOffset(pos);
|
||||
final int available;
|
||||
if (mCurrentBlock != null && mCurrentBlock.mBlockAddress == compressedOffset) {
|
||||
available = mCurrentBlock.mBlock.length;
|
||||
} else {
|
||||
prepareForSeek();
|
||||
mFile.seek(compressedOffset);
|
||||
mStreamOffset = compressedOffset;
|
||||
mCurrentBlock = nextBlock(getBufferForReuse(mCurrentBlock));
|
||||
mCurrentOffset = 0;
|
||||
available = available();
|
||||
}
|
||||
if (uncompressedOffset > available || (uncompressedOffset == available && !eof())) {
|
||||
throw new IOException(INVALID_FILE_PTR_MSG + pos + " for " + getSource());
|
||||
}
|
||||
mCurrentOffset = uncompressedOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs cleanup required before seek is called on the underlying stream
|
||||
*/
|
||||
protected void prepareForSeek() {
|
||||
}
|
||||
|
||||
private boolean eof() throws IOException {
|
||||
if (mFile.eof()) {
|
||||
return true;
|
||||
}
|
||||
// If the last remaining block is the size of the EMPTY_GZIP_BLOCK, this is the same as being at EOF.
|
||||
return (mFile.length() - (mCurrentBlock.mBlockAddress
|
||||
+ mCurrentBlock.mBlockCompressedSize) == BGZFStreamConstants.EMPTY_GZIP_BLOCK.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return virtual file pointer that can be passed to seek() to return to the current position. This is
|
||||
* not an actual byte offset, so arithmetic on file pointers cannot be done to determine the distance between
|
||||
* the two.
|
||||
*/
|
||||
public long getFilePointer() {
|
||||
if (mCurrentBlock == null) {
|
||||
// Haven't read anything yet = at start of stream
|
||||
return BGZFFilePointerUtil.makeFilePointer(0, 0);
|
||||
}
|
||||
if (mCurrentOffset > 0 && mCurrentOffset == mCurrentBlock.mBlock.length) {
|
||||
// If current offset is at the end of the current block, file
|
||||
// pointer should point
|
||||
// to the beginning of the next block.
|
||||
return BGZFFilePointerUtil.makeFilePointer(mCurrentBlock.mBlockAddress + mCurrentBlock.mBlockCompressedSize, 0);
|
||||
}
|
||||
return BGZFFilePointerUtil.makeFilePointer(mCurrentBlock.mBlockAddress, mCurrentOffset);
|
||||
}
|
||||
|
||||
public long getPosition() {
|
||||
return getFilePointer();
|
||||
}
|
||||
|
||||
public static long getFileBlock(final long bgzfOffset) {
|
||||
return BGZFFilePointerUtil.getBlockAddress(bgzfOffset);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param stream Must be at start of file. Throws RuntimeException if !stream.markSupported().
|
||||
* @return true if the given file looks like a valid BGZF file.
|
||||
*/
|
||||
public static boolean isValidFile(final InputStream stream) throws IOException {
|
||||
if (!stream.markSupported()) {
|
||||
throw new RuntimeException("Cannot test non-buffered stream");
|
||||
}
|
||||
stream.mark(BGZFStreamConstants.BLOCK_HEADER_LENGTH);
|
||||
final byte[] buffer = new byte[BGZFStreamConstants.BLOCK_HEADER_LENGTH];
|
||||
final int count = readBytes(stream, buffer, 0, BGZFStreamConstants.BLOCK_HEADER_LENGTH);
|
||||
stream.reset();
|
||||
return count == BGZFStreamConstants.BLOCK_HEADER_LENGTH && isValidBlockHeader(buffer);
|
||||
}
|
||||
|
||||
private static boolean isValidBlockHeader(final byte[] buffer) {
|
||||
return (buffer[0] == BGZFStreamConstants.GZIP_ID1 &&
|
||||
(buffer[1] & 0xFF) == BGZFStreamConstants.GZIP_ID2 &&
|
||||
(buffer[3] & BGZFStreamConstants.GZIP_FLG) != 0 &&
|
||||
buffer[10] == BGZFStreamConstants.GZIP_XLEN &&
|
||||
buffer[12] == BGZFStreamConstants.BGZF_ID1 &&
|
||||
buffer[13] == BGZFStreamConstants.BGZF_ID2);
|
||||
}
|
||||
|
||||
private void readBlock() throws IOException {
|
||||
mCurrentBlock = nextBlock(getBufferForReuse(mCurrentBlock));
|
||||
mCurrentOffset = 0;
|
||||
checkAndRethrowDecompressionException();
|
||||
}
|
||||
/**
|
||||
* Reads and decompresses the next block
|
||||
* @param bufferAvailableForReuse decompression buffer available for reuse
|
||||
* @return next block in the decompressed stream
|
||||
*/
|
||||
protected DecompressedBlock nextBlock(byte[] bufferAvailableForReuse) {
|
||||
return processNextBlock(bufferAvailableForReuse);
|
||||
}
|
||||
/**
|
||||
* Rethrows an exception encountered during decompression
|
||||
* @throws IOException
|
||||
*/
|
||||
private void checkAndRethrowDecompressionException() throws IOException {
|
||||
if (mCurrentBlock.mException != null) {
|
||||
if (mCurrentBlock.mException instanceof IOException) {
|
||||
throw (IOException) mCurrentBlock.mException;
|
||||
} else if (mCurrentBlock.mException instanceof RuntimeException) {
|
||||
throw (RuntimeException) mCurrentBlock.mException;
|
||||
} else {
|
||||
throw new RuntimeException(mCurrentBlock.mException);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to reuse the buffer of the given block
|
||||
* @param block owning block
|
||||
* @return null decompressing buffer to reuse, null if no buffer is available
|
||||
*/
|
||||
private byte[] getBufferForReuse(DecompressedBlock block) {
|
||||
if (block == null) return null;
|
||||
return block.mBlock;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decompress the next block from the input stream. When using asynchronous
|
||||
* IO, this will be called by the background thread.
|
||||
* @param bufferAvailableForReuse buffer in which to place decompressed block. A null or
|
||||
* incorrectly sized buffer will result in the buffer being ignored and
|
||||
* a new buffer allocated for decompression.
|
||||
* @return next block in input stream
|
||||
*/
|
||||
protected DecompressedBlock processNextBlock(byte[] bufferAvailableForReuse) {
|
||||
if (mFileBuffer == null) {
|
||||
mFileBuffer = new byte[BGZFStreamConstants.MAX_COMPRESSED_BLOCK_SIZE];
|
||||
}
|
||||
long blockAddress = mStreamOffset;
|
||||
try {
|
||||
final int headerByteCount = readBytes(mFileBuffer, 0, BGZFStreamConstants.BLOCK_HEADER_LENGTH);
|
||||
mStreamOffset += headerByteCount;
|
||||
if (headerByteCount == 0) {
|
||||
// Handle case where there is no empty gzip block at end.
|
||||
return new DecompressedBlock(blockAddress, new byte[0], 0);
|
||||
}
|
||||
if (headerByteCount != BGZFStreamConstants.BLOCK_HEADER_LENGTH) {
|
||||
return new DecompressedBlock(blockAddress, headerByteCount, new IOException(INCORRECT_HEADER_SIZE_MSG + getSource()));
|
||||
}
|
||||
final int blockLength = unpackInt16(mFileBuffer, BGZFStreamConstants.BLOCK_LENGTH_OFFSET) + 1;
|
||||
if (blockLength < BGZFStreamConstants.BLOCK_HEADER_LENGTH || blockLength > mFileBuffer.length) {
|
||||
return new DecompressedBlock(blockAddress, blockLength,
|
||||
new IOException(UNEXPECTED_BLOCK_LENGTH_MSG + blockLength + " for " + getSource()));
|
||||
}
|
||||
final int remaining = blockLength - BGZFStreamConstants.BLOCK_HEADER_LENGTH;
|
||||
final int dataByteCount = readBytes(mFileBuffer, BGZFStreamConstants.BLOCK_HEADER_LENGTH,
|
||||
remaining);
|
||||
mStreamOffset += dataByteCount;
|
||||
if (dataByteCount != remaining) {
|
||||
return new DecompressedBlock(blockAddress, blockLength,
|
||||
new BGZFException(PREMATURE_END_MSG + getSource()));
|
||||
}
|
||||
final byte[] decompressed = inflateBlock(mFileBuffer, blockLength, bufferAvailableForReuse);
|
||||
return new DecompressedBlock(blockAddress, decompressed, blockLength);
|
||||
} catch (IOException e) {
|
||||
return new DecompressedBlock(blockAddress, 0, e);
|
||||
}
|
||||
}
|
||||
|
||||
private byte[] inflateBlock(final byte[] compressedBlock, final int compressedLength,
|
||||
final byte[] bufferAvailableForReuse) throws IOException {
|
||||
final int uncompressedLength = unpackInt32(compressedBlock, compressedLength - 4);
|
||||
if (uncompressedLength < 0) {
|
||||
throw new BGZFException(getSource() + " has invalid uncompressedLength: " + uncompressedLength);
|
||||
}
|
||||
byte[] buffer = bufferAvailableForReuse;
|
||||
if (buffer == null || uncompressedLength != buffer.length) {
|
||||
// can't reuse the buffer since the size is incorrect
|
||||
buffer = new byte[uncompressedLength];
|
||||
}
|
||||
blockGunzipper.unzipBlock(buffer, compressedBlock, compressedLength);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
private String getSource() {
|
||||
return mFile == null ? "data stream" : mFile.getSource();
|
||||
}
|
||||
|
||||
private int readBytes(final byte[] buffer, final int offset, final int length) throws IOException {
|
||||
if (mFile != null) {
|
||||
return readBytes(mFile, buffer, offset, length);
|
||||
} else if (mStream != null) {
|
||||
return readBytes(mStream, buffer, offset, length);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
private static int readBytes(final SeekableStream file, final byte[] buffer, final int offset, final int length) throws IOException {
|
||||
int bytesRead = 0;
|
||||
while (bytesRead < length) {
|
||||
final int count = file.read(buffer, offset + bytesRead, length - bytesRead);
|
||||
if (count <= 0) {
|
||||
break;
|
||||
}
|
||||
bytesRead += count;
|
||||
}
|
||||
return bytesRead;
|
||||
}
|
||||
|
||||
private static int readBytes(final InputStream stream, final byte[] buffer, final int offset, final int length) throws IOException {
|
||||
int bytesRead = 0;
|
||||
while (bytesRead < length) {
|
||||
final int count = stream.read(buffer, offset + bytesRead, length - bytesRead);
|
||||
if (count <= 0) {
|
||||
break;
|
||||
}
|
||||
bytesRead += count;
|
||||
}
|
||||
return bytesRead;
|
||||
}
|
||||
|
||||
private int unpackInt16(final byte[] buffer, final int offset) {
|
||||
return ((buffer[offset] & 0xFF) |
|
||||
((buffer[offset+1] & 0xFF) << 8));
|
||||
}
|
||||
|
||||
private int unpackInt32(final byte[] buffer, final int offset) {
|
||||
return ((buffer[offset] & 0xFF) |
|
||||
((buffer[offset+1] & 0xFF) << 8) |
|
||||
((buffer[offset+2] & 0xFF) << 16) |
|
||||
((buffer[offset+3] & 0xFF) << 24));
|
||||
}
|
||||
|
||||
public enum FileTermination {HAS_TERMINATOR_BLOCK, HAS_HEALTHY_LAST_BLOCK, DEFECTIVE}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param file the file to check
|
||||
* @return status of the last compressed block
|
||||
* @throws IOException
|
||||
*/
|
||||
public static FileTermination checkTermination(final File file) throws IOException {
|
||||
return checkTermination(file.toPath());
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param path to the file to check
|
||||
* @return status of the last compressed block
|
||||
* @throws IOException
|
||||
*/
|
||||
public static FileTermination checkTermination(final Path path) throws IOException {
|
||||
try( final SeekableByteChannel channel = Files.newByteChannel(path, StandardOpenOption.READ) ){
|
||||
return checkTermination(channel);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* check the status of the final bzgipped block for the given bgzipped resource
|
||||
*
|
||||
* @param channel an open channel to read from,
|
||||
* the channel will remain open and the initial position will be restored when the operation completes
|
||||
* this makes no guarantee about the state of the channel if an exception is thrown during reading
|
||||
*
|
||||
* @return the status of the last compressed black
|
||||
* @throws IOException
|
||||
*/
|
||||
public static FileTermination checkTermination(SeekableByteChannel channel) throws IOException {
|
||||
final long fileSize = channel.size();
|
||||
if (fileSize < BGZFStreamConstants.EMPTY_GZIP_BLOCK.length) {
|
||||
return FileTermination.DEFECTIVE;
|
||||
}
|
||||
final long initialPosition = channel.position();
|
||||
boolean exceptionThrown = false;
|
||||
try {
|
||||
channel.position(fileSize - BGZFStreamConstants.EMPTY_GZIP_BLOCK.length);
|
||||
|
||||
//Check if the end of the file is an empty gzip block which is used as the terminator for a bgzipped file
|
||||
final ByteBuffer lastBlockBuffer = ByteBuffer.allocate(BGZFStreamConstants.EMPTY_GZIP_BLOCK.length);
|
||||
readFully(channel, lastBlockBuffer);
|
||||
if (Arrays.equals(lastBlockBuffer.array(), BGZFStreamConstants.EMPTY_GZIP_BLOCK)) {
|
||||
return FileTermination.HAS_TERMINATOR_BLOCK;
|
||||
}
|
||||
|
||||
//if the last block isn't an empty gzip block, check to see if it is a healthy compressed block or if it's corrupted
|
||||
final int bufsize = (int) Math.min(fileSize, BGZFStreamConstants.MAX_COMPRESSED_BLOCK_SIZE);
|
||||
final byte[] bufferArray = new byte[bufsize];
|
||||
channel.position(fileSize - bufsize);
|
||||
readFully(channel, ByteBuffer.wrap(bufferArray));
|
||||
for (int i = bufferArray.length - BGZFStreamConstants.EMPTY_GZIP_BLOCK.length;
|
||||
i >= 0; --i) {
|
||||
if (!preambleEqual(BGZFStreamConstants.GZIP_BLOCK_PREAMBLE,
|
||||
bufferArray, i, BGZFStreamConstants.GZIP_BLOCK_PREAMBLE.length)) {
|
||||
continue;
|
||||
}
|
||||
final ByteBuffer byteBuffer = ByteBuffer.wrap(bufferArray,
|
||||
i + BGZFStreamConstants.GZIP_BLOCK_PREAMBLE.length,
|
||||
4);
|
||||
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
|
||||
final int totalBlockSizeMinusOne = byteBuffer.getShort() & 0xFFFF;
|
||||
if (bufferArray.length - i == totalBlockSizeMinusOne + 1) {
|
||||
return FileTermination.HAS_HEALTHY_LAST_BLOCK;
|
||||
} else {
|
||||
return FileTermination.DEFECTIVE;
|
||||
}
|
||||
}
|
||||
return FileTermination.DEFECTIVE;
|
||||
} catch (final Throwable e) {
|
||||
exceptionThrown = true;
|
||||
throw e;
|
||||
} finally {
|
||||
//if an exception was thrown we don't want to reset the position because that would be likely to throw again
|
||||
//and suppress the initial exception
|
||||
if(!exceptionThrown) {
|
||||
channel.position(initialPosition);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* read as many bytes as dst's capacity into dst or throw if that's not possible
|
||||
*
|
||||
* @throws EOFException if channel has fewer bytes available than dst's capacity
|
||||
*/
|
||||
static void readFully(SeekableByteChannel channel, ByteBuffer dst) throws IOException {
|
||||
int totalBytesRead = 0;
|
||||
final int capacity = dst.capacity();
|
||||
while (totalBytesRead < capacity) {
|
||||
final int bytesRead = channel.read(dst);
|
||||
if (bytesRead == -1) {
|
||||
throw new EOFException();
|
||||
}
|
||||
totalBytesRead += bytesRead;
|
||||
}
|
||||
}
|
||||
|
||||
public static void assertNonDefectiveFile(final File file) throws IOException {
|
||||
if (checkTermination(file) == FileTermination.DEFECTIVE) {
|
||||
throw new BGZFException(file.getAbsolutePath() + " does not have a valid GZIP block at the end of the file.");
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean preambleEqual(final byte[] preamble, final byte[] buf, final int startOffset, final int length) {
|
||||
for (int i = 0; i < length; ++i) {
|
||||
if (preamble[i] != buf[i + startOffset]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
protected static class DecompressedBlock {
|
||||
/**
|
||||
* Decompressed block
|
||||
*/
|
||||
private final byte[] mBlock;
|
||||
/**
|
||||
* Compressed size of block (the uncompressed size can be found using
|
||||
* mBlock.length)
|
||||
*/
|
||||
private final int mBlockCompressedSize;
|
||||
/**
|
||||
* Stream offset of start of block
|
||||
*/
|
||||
private final long mBlockAddress;
|
||||
/**
|
||||
* Exception thrown (if any) when attempting to decompress block
|
||||
*/
|
||||
private final Exception mException;
|
||||
|
||||
public DecompressedBlock(long blockAddress, byte[] block, int compressedSize) {
|
||||
mBlock = block;
|
||||
mBlockAddress = blockAddress;
|
||||
mBlockCompressedSize = compressedSize;
|
||||
mException = null;
|
||||
}
|
||||
|
||||
public DecompressedBlock(long blockAddress, int compressedSize, Exception exception) {
|
||||
mBlock = new byte[0];
|
||||
mBlockAddress = blockAddress;
|
||||
mBlockCompressedSize = compressedSize;
|
||||
mException = exception;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,358 @@
|
|||
package org.xbib.io.compress.bgzf;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.zip.CRC32;
|
||||
import java.util.zip.Deflater;
|
||||
|
||||
/**
|
||||
* Stream class for a file that is a series of gzip blocks (BGZF format). The caller just treats it as an
|
||||
* OutputStream, and under the covers a gzip block is written when the amount of uncompressed as-yet-unwritten
|
||||
* bytes reaches a threshold.
|
||||
*
|
||||
* The advantage of BGZF over conventional gzip is that BGZF allows for seeking without having to scan through
|
||||
* the entire file up to the position being sought.
|
||||
*
|
||||
* Note that the flush() method should not be called by client
|
||||
* unless you know what you're doing, because it forces a gzip block to be written even if the
|
||||
* number of buffered bytes has not reached threshold. close(), on the other hand, must be called
|
||||
* when done writing in order to force the last gzip block to be written.
|
||||
*
|
||||
* @see <a href="http://samtools.sourceforge.net/SAM1.pdf">http://samtools.sourceforge.net/SAM1.pdf</a> for details of BGZF file format.
|
||||
*/
|
||||
public class BlockCompressedOutputStream extends OutputStream {
|
||||
|
||||
private static int defaultCompressionLevel = BGZFStreamConstants.DEFAULT_COMPRESSION_LEVEL;
|
||||
private static DeflaterFactory defaultDeflaterFactory = new DeflaterFactory();
|
||||
|
||||
public static void setDefaultCompressionLevel(final int compressionLevel) {
|
||||
if (compressionLevel < Deflater.NO_COMPRESSION || compressionLevel > Deflater.BEST_COMPRESSION) {
|
||||
throw new IllegalArgumentException("Invalid compression level: " + compressionLevel);
|
||||
}
|
||||
defaultCompressionLevel = compressionLevel;
|
||||
}
|
||||
|
||||
public static int getDefaultCompressionLevel() {
|
||||
return defaultCompressionLevel;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the default {@link DeflaterFactory} that will be used for all instances unless specified otherwise in the constructor.
|
||||
* If this method is not called the default is a factory that will create the JDK {@link Deflater}.
|
||||
* @param deflaterFactory non-null default factory.
|
||||
*/
|
||||
public static void setDefaultDeflaterFactory(final DeflaterFactory deflaterFactory) {
|
||||
if (deflaterFactory == null) {
|
||||
throw new IllegalArgumentException("null deflaterFactory");
|
||||
}
|
||||
defaultDeflaterFactory = deflaterFactory;
|
||||
}
|
||||
|
||||
public static DeflaterFactory getDefaultDeflaterFactory() {
|
||||
return defaultDeflaterFactory;
|
||||
}
|
||||
|
||||
private final BinaryCodec codec;
|
||||
private final byte[] uncompressedBuffer = new byte[BGZFStreamConstants.DEFAULT_UNCOMPRESSED_BLOCK_SIZE];
|
||||
private int numUncompressedBytes = 0;
|
||||
private final byte[] compressedBuffer =
|
||||
new byte[BGZFStreamConstants.MAX_COMPRESSED_BLOCK_SIZE -
|
||||
BGZFStreamConstants.BLOCK_HEADER_LENGTH];
|
||||
private final Deflater deflater;
|
||||
|
||||
// A second deflater is created for the very unlikely case where the regular deflation actually makes
|
||||
// things bigger, and the compressed block is too big. It should be possible to downshift the
|
||||
// primary deflater to NO_COMPRESSION level, recompress, and then restore it to its original setting,
|
||||
// but in practice that doesn't work.
|
||||
// The motivation for deflating at NO_COMPRESSION level is that it will predictably produce compressed
|
||||
// output that is 10 bytes larger than the input, and the threshold at which a block is generated is such that
|
||||
// the size of tbe final gzip block will always be <= 64K. This is preferred over the previous method,
|
||||
// which would attempt to compress up to 64K bytes, and if the resulting compressed block was too large,
|
||||
// try compressing fewer input bytes (aka "downshifting'). The problem with downshifting is that
|
||||
// getFilePointer might return an inaccurate value.
|
||||
// I assume (AW 29-Oct-2013) that there is no value in using hardware-assisted deflater for no-compression mode,
|
||||
// so just use JDK standard.
|
||||
private final Deflater noCompressionDeflater = new Deflater(Deflater.NO_COMPRESSION, true);
|
||||
private final CRC32 crc32 = new CRC32();
|
||||
private Path file = null;
|
||||
private long mBlockAddress = 0;
|
||||
|
||||
/**
|
||||
* Uses default compression level, which is 5 unless changed by setCompressionLevel
|
||||
* Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}.
|
||||
* Use {@link #BlockCompressedOutputStream(File, int, DeflaterFactory)} to specify a custom factory.
|
||||
*/
|
||||
public BlockCompressedOutputStream(final String filename) throws FileNotFoundException {
|
||||
this(filename, defaultCompressionLevel);
|
||||
}
|
||||
|
||||
/**
|
||||
* Uses default compression level, which is 5 unless changed by setCompressionLevel
|
||||
* Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}.
|
||||
* Use {@link #BlockCompressedOutputStream(File, int, DeflaterFactory)} to specify a custom factory.
|
||||
*/
|
||||
public BlockCompressedOutputStream(final File file) throws FileNotFoundException {
|
||||
this(file, defaultCompressionLevel);
|
||||
}
|
||||
|
||||
public BlockCompressedOutputStream(final String filename, final int compressionLevel) throws FileNotFoundException {
|
||||
this(new File(filename), compressionLevel);
|
||||
}
|
||||
|
||||
public BlockCompressedOutputStream(final File file, final int compressionLevel) throws FileNotFoundException {
|
||||
this(file, compressionLevel, defaultDeflaterFactory);
|
||||
}
|
||||
|
||||
public BlockCompressedOutputStream(final File file, final int compressionLevel, final DeflaterFactory deflaterFactory) throws FileNotFoundException {
|
||||
this.file = file.toPath();
|
||||
codec = new BinaryCodec(file, true);
|
||||
deflater = deflaterFactory.makeDeflater(compressionLevel, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Uses default compression level, which is 5 unless changed by setCompressionLevel
|
||||
* Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}.
|
||||
* Use {@link #BlockCompressedOutputStream(OutputStream, File, int, DeflaterFactory)} to specify a custom factory.
|
||||
*/
|
||||
public BlockCompressedOutputStream(final OutputStream os) {
|
||||
this(os, (File)null, defaultCompressionLevel);
|
||||
}
|
||||
|
||||
/**
|
||||
* Uses default compression level, which is 5 unless changed by setCompressionLevel
|
||||
* Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}.
|
||||
* Use {@link #BlockCompressedOutputStream(OutputStream, File, int, DeflaterFactory)} to specify a custom factory.
|
||||
*
|
||||
* @param file may be null
|
||||
*/
|
||||
public BlockCompressedOutputStream(final OutputStream os, final Path file) {
|
||||
this(os, file, defaultCompressionLevel);
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}.
|
||||
* Use {@link #BlockCompressedOutputStream(OutputStream, File, int, DeflaterFactory)} to specify a custom factory.
|
||||
*/
|
||||
public BlockCompressedOutputStream(final OutputStream os, final File file, final int compressionLevel) {
|
||||
this(os, file, compressionLevel, defaultDeflaterFactory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}.
|
||||
* Use {@link #BlockCompressedOutputStream(OutputStream, File, int, DeflaterFactory)} to specify a custom factory.
|
||||
*/
|
||||
public BlockCompressedOutputStream(final OutputStream os, final Path file, final int compressionLevel) {
|
||||
this(os, file, compressionLevel, defaultDeflaterFactory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the output stream.
|
||||
* @param os output stream to create a BlockCompressedOutputStream from
|
||||
* @param file file to which to write the output or null if not available
|
||||
* @param compressionLevel the compression level (0-9)
|
||||
* @param deflaterFactory custom factory to create deflaters (overrides the default)
|
||||
*/
|
||||
public BlockCompressedOutputStream(final OutputStream os, final File file, final int compressionLevel, final DeflaterFactory deflaterFactory) {
|
||||
this(os, file != null ? file.toPath() : null, compressionLevel, deflaterFactory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the output stream.
|
||||
* @param os output stream to create a BlockCompressedOutputStream from
|
||||
* @param file file to which to write the output or null if not available
|
||||
* @param compressionLevel the compression level (0-9)
|
||||
* @param deflaterFactory custom factory to create deflaters (overrides the default)
|
||||
*/
|
||||
public BlockCompressedOutputStream(final OutputStream os, final Path file, final int compressionLevel, final DeflaterFactory deflaterFactory) {
|
||||
this.file = file;
|
||||
codec = new BinaryCodec(os);
|
||||
if (file != null) {
|
||||
codec.setOutputFileName(file.toAbsolutePath().toUri().toString());
|
||||
}
|
||||
deflater = deflaterFactory.makeDeflater(compressionLevel, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param output May or not already be a BlockCompressedOutputStream.
|
||||
* @return A BlockCompressedOutputStream, either by wrapping the given OutputStream, or by casting if it already
|
||||
* is a BCOS.
|
||||
*/
|
||||
public static BlockCompressedOutputStream maybeBgzfWrapOutputStream(OutputStream output) {
|
||||
if (!(output instanceof BlockCompressedOutputStream)) {
|
||||
return new BlockCompressedOutputStream(output);
|
||||
} else {
|
||||
return (BlockCompressedOutputStream)output;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes b.length bytes from the specified byte array to this output stream. The general contract for write(b)
|
||||
* is that it should have exactly the same effect as the call write(b, 0, b.length).
|
||||
* @param bytes the data
|
||||
*/
|
||||
@Override
|
||||
public void write(final byte[] bytes) throws IOException {
|
||||
write(bytes, 0, bytes.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes len bytes from the specified byte array starting at offset off to this output stream. The general
|
||||
* contract for write(b, off, len) is that some of the bytes in the array b are written to the output stream in order;
|
||||
* element b[off] is the first byte written and b[off+len-1] is the last byte written by this operation.
|
||||
*
|
||||
* @param bytes the data
|
||||
* @param startIndex the start offset in the data
|
||||
* @param numBytes the number of bytes to write
|
||||
*/
|
||||
@Override
|
||||
public void write(final byte[] bytes, int startIndex, int numBytes) throws IOException {
|
||||
while (numBytes > 0) {
|
||||
final int bytesToWrite = Math.min(uncompressedBuffer.length - numUncompressedBytes, numBytes);
|
||||
System.arraycopy(bytes, startIndex, uncompressedBuffer, numUncompressedBytes, bytesToWrite);
|
||||
numUncompressedBytes += bytesToWrite;
|
||||
startIndex += bytesToWrite;
|
||||
numBytes -= bytesToWrite;
|
||||
if (numUncompressedBytes == uncompressedBuffer.length) {
|
||||
deflateBlock();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(final int b) throws IOException {
|
||||
uncompressedBuffer[numUncompressedBytes++] = (byte) b;
|
||||
if (numUncompressedBytes == uncompressedBuffer.length) deflateBlock();
|
||||
}
|
||||
|
||||
/**
|
||||
* WARNING: flush() affects the output format, because it causes the current contents of uncompressedBuffer
|
||||
* to be compressed and written, even if it isn't full. Unless you know what you're doing, don't call flush().
|
||||
* Instead, call close(), which will flush any unwritten data before closing the underlying stream.
|
||||
*
|
||||
*/
|
||||
@Override
|
||||
public void flush() throws IOException {
|
||||
while (numUncompressedBytes > 0) {
|
||||
deflateBlock();
|
||||
}
|
||||
codec.getOutputStream().flush();
|
||||
}
|
||||
|
||||
/**
|
||||
* close() must be called in order to flush any remaining buffered bytes. An unclosed file will likely be
|
||||
* defective.
|
||||
*
|
||||
*/
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
close(true);
|
||||
}
|
||||
|
||||
public void close(final boolean writeTerminatorBlock) throws IOException {
|
||||
flush();
|
||||
// For debugging...
|
||||
// if (numberOfThrottleBacks > 0) {
|
||||
// System.err.println("In BlockCompressedOutputStream, had to throttle back " + numberOfThrottleBacks +
|
||||
// " times for file " + codec.getOutputFileName());
|
||||
// }
|
||||
if (writeTerminatorBlock) {
|
||||
codec.writeBytes(BGZFStreamConstants.EMPTY_GZIP_BLOCK);
|
||||
}
|
||||
codec.close();
|
||||
|
||||
// If a terminator block was written, ensure that it's there and valid
|
||||
if (writeTerminatorBlock) {
|
||||
// Can't re-open something that is not a regular file, e.g. a named pipe or an output stream
|
||||
if (this.file == null || !Files.isRegularFile(this.file)) return;
|
||||
if (BlockCompressedInputStream.checkTermination(this.file) !=
|
||||
BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK) {
|
||||
throw new IOException("Terminator block not found after closing BGZF file " + this.file);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Encode virtual file pointer
|
||||
* Upper 48 bits is the byte offset into the compressed stream of a block.
|
||||
* Lower 16 bits is the byte offset into the uncompressed stream inside the block.
|
||||
*/
|
||||
public long getFilePointer(){
|
||||
return BGZFFilePointerUtil.makeFilePointer(mBlockAddress, numUncompressedBytes);
|
||||
}
|
||||
|
||||
public long getPosition() {
|
||||
return getFilePointer();
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to write the data in uncompressedBuffer to the underlying file in a gzip block.
|
||||
* If the entire uncompressedBuffer does not fit in the maximum allowed size, reduce the amount
|
||||
* of data to be compressed, and slide the excess down in uncompressedBuffer so it can be picked
|
||||
* up in the next deflate event.
|
||||
* @return size of gzip block that was written.
|
||||
*/
|
||||
private int deflateBlock() throws IOException {
|
||||
if (numUncompressedBytes == 0) {
|
||||
return 0;
|
||||
}
|
||||
final int bytesToCompress = numUncompressedBytes;
|
||||
// Compress the input
|
||||
deflater.reset();
|
||||
deflater.setInput(uncompressedBuffer, 0, bytesToCompress);
|
||||
deflater.finish();
|
||||
int compressedSize = deflater.deflate(compressedBuffer, 0, compressedBuffer.length);
|
||||
|
||||
// If it didn't all fit in compressedBuffer.length, set compression level to NO_COMPRESSION
|
||||
// and try again. This should always fit.
|
||||
if (!deflater.finished()) {
|
||||
noCompressionDeflater.reset();
|
||||
noCompressionDeflater.setInput(uncompressedBuffer, 0, bytesToCompress);
|
||||
noCompressionDeflater.finish();
|
||||
compressedSize = noCompressionDeflater.deflate(compressedBuffer, 0, compressedBuffer.length);
|
||||
if (!noCompressionDeflater.finished()) {
|
||||
throw new IllegalStateException("unpossible");
|
||||
}
|
||||
}
|
||||
// Data compressed small enough, so write it out.
|
||||
crc32.reset();
|
||||
crc32.update(uncompressedBuffer, 0, bytesToCompress);
|
||||
|
||||
final int totalBlockSize = writeGzipBlock(compressedSize, bytesToCompress, crc32.getValue());
|
||||
|
||||
// Clear out from uncompressedBuffer the data that was written
|
||||
numUncompressedBytes = 0;
|
||||
mBlockAddress += totalBlockSize;
|
||||
return totalBlockSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the entire gzip block, assuming the compressed data is stored in compressedBuffer
|
||||
* @return size of gzip block that was written.
|
||||
*/
|
||||
private int writeGzipBlock(final int compressedSize, final int uncompressedSize, final long crc) throws IOException {
|
||||
// Init gzip header
|
||||
codec.writeByte(BGZFStreamConstants.GZIP_ID1);
|
||||
codec.writeByte(BGZFStreamConstants.GZIP_ID2);
|
||||
codec.writeByte(BGZFStreamConstants.GZIP_CM_DEFLATE);
|
||||
codec.writeByte(BGZFStreamConstants.GZIP_FLG);
|
||||
codec.writeInt(0); // Modification time
|
||||
codec.writeByte(BGZFStreamConstants.GZIP_XFL);
|
||||
codec.writeByte(BGZFStreamConstants.GZIP_OS_UNKNOWN);
|
||||
codec.writeShort(BGZFStreamConstants.GZIP_XLEN);
|
||||
codec.writeByte(BGZFStreamConstants.BGZF_ID1);
|
||||
codec.writeByte(BGZFStreamConstants.BGZF_ID2);
|
||||
codec.writeShort(BGZFStreamConstants.BGZF_LEN);
|
||||
final int totalBlockSize = compressedSize + BGZFStreamConstants.BLOCK_HEADER_LENGTH +
|
||||
BGZFStreamConstants.BLOCK_FOOTER_LENGTH;
|
||||
|
||||
// I don't know why we store block size - 1, but that is what the spec says
|
||||
codec.writeShort((short)(totalBlockSize - 1));
|
||||
codec.writeBytes(compressedBuffer, 0, compressedSize);
|
||||
codec.writeInt((int)crc);
|
||||
codec.writeInt(uncompressedSize);
|
||||
return totalBlockSize;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,114 @@
|
|||
package org.xbib.io.compress.bgzf;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.util.zip.CRC32;
|
||||
import java.util.zip.DataFormatException;
|
||||
import java.util.zip.Inflater;
|
||||
|
||||
/**
|
||||
* For decompressing GZIP blocks that are already loaded into a byte[].
|
||||
* The main advantage is that this object can be used over and over again to decompress many blocks,
|
||||
* whereas a new GZIPInputStream and ByteArrayInputStream would otherwise need to be created for each
|
||||
* block to be decompressed.
|
||||
*
|
||||
* This code requires that the GZIP header conform to the GZIP blocks written to BAM files, with
|
||||
* a specific subfield and no other optional stuff.
|
||||
*/
|
||||
public class BlockGunzipper {
|
||||
private static InflaterFactory defaultInflaterFactory = new InflaterFactory();
|
||||
private final Inflater inflater;
|
||||
private final CRC32 crc32 = new CRC32();
|
||||
private boolean checkCrcs = false;
|
||||
BlockGunzipper() {
|
||||
inflater = defaultInflaterFactory.makeInflater(true); // GZIP mode
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a BlockGunzipper using the provided inflaterFactory
|
||||
* @param inflaterFactory
|
||||
*/
|
||||
BlockGunzipper(InflaterFactory inflaterFactory) {
|
||||
inflater = inflaterFactory.makeInflater(true); // GZIP mode
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the default {@link InflaterFactory} that will be used for all instances unless specified otherwise in the constructor.
|
||||
* If this method is not called the default is a factory that will create the JDK {@link Inflater}.
|
||||
* @param inflaterFactory non-null default factory.
|
||||
*/
|
||||
public static void setDefaultInflaterFactory(final InflaterFactory inflaterFactory) {
|
||||
if (inflaterFactory == null) {
|
||||
throw new IllegalArgumentException("null inflaterFactory");
|
||||
}
|
||||
defaultInflaterFactory = inflaterFactory;
|
||||
}
|
||||
|
||||
public static InflaterFactory getDefaultInflaterFactory() {
|
||||
return defaultInflaterFactory;
|
||||
}
|
||||
/** Allows the caller to decide whether or not to check CRCs on when uncompressing blocks. */
|
||||
public void setCheckCrcs(final boolean check) {
|
||||
this.checkCrcs = check;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decompress GZIP-compressed data
|
||||
* @param uncompressedBlock must be big enough to hold decompressed output.
|
||||
* @param compressedBlock compressed data starting at offset 0
|
||||
* @param compressedLength size of compressed data, possibly less than the size of the buffer.
|
||||
*/
|
||||
void unzipBlock(byte[] uncompressedBlock, byte[] compressedBlock, int compressedLength) {
|
||||
try {
|
||||
ByteBuffer byteBuffer = ByteBuffer.wrap(compressedBlock, 0, compressedLength);
|
||||
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
|
||||
|
||||
// Validate GZIP header
|
||||
if (byteBuffer.get() != BGZFStreamConstants.GZIP_ID1 ||
|
||||
byteBuffer.get() != (byte)BGZFStreamConstants.GZIP_ID2 ||
|
||||
byteBuffer.get() != BGZFStreamConstants.GZIP_CM_DEFLATE ||
|
||||
byteBuffer.get() != BGZFStreamConstants.GZIP_FLG
|
||||
) {
|
||||
throw new BGZFFormatException("Invalid GZIP header");
|
||||
}
|
||||
// Skip MTIME, XFL, OS fields
|
||||
byteBuffer.position(byteBuffer.position() + 6);
|
||||
if (byteBuffer.getShort() != BGZFStreamConstants.GZIP_XLEN) {
|
||||
throw new BGZFFormatException("Invalid GZIP header");
|
||||
}
|
||||
// Skip blocksize subfield intro
|
||||
byteBuffer.position(byteBuffer.position() + 4);
|
||||
// Read ushort
|
||||
final int totalBlockSize = (byteBuffer.getShort() & 0xffff) + 1;
|
||||
if (totalBlockSize != compressedLength) {
|
||||
throw new BGZFFormatException("GZIP blocksize disagreement");
|
||||
}
|
||||
|
||||
// Read expected size and CRD from end of GZIP block
|
||||
final int deflatedSize = compressedLength - BGZFStreamConstants.BLOCK_HEADER_LENGTH - BGZFStreamConstants.BLOCK_FOOTER_LENGTH;
|
||||
byteBuffer.position(byteBuffer.position() + deflatedSize);
|
||||
int expectedCrc = byteBuffer.getInt();
|
||||
int uncompressedSize = byteBuffer.getInt();
|
||||
inflater.reset();
|
||||
|
||||
// Decompress
|
||||
inflater.setInput(compressedBlock, BGZFStreamConstants.BLOCK_HEADER_LENGTH, deflatedSize);
|
||||
final int inflatedBytes = inflater.inflate(uncompressedBlock, 0, uncompressedSize);
|
||||
if (inflatedBytes != uncompressedSize) {
|
||||
throw new BGZFFormatException("Did not inflate expected amount");
|
||||
}
|
||||
|
||||
// Validate CRC if so desired
|
||||
if (this.checkCrcs) {
|
||||
crc32.reset();
|
||||
crc32.update(uncompressedBlock, 0, uncompressedSize);
|
||||
final long crc = crc32.getValue();
|
||||
if ((int)crc != expectedCrc) {
|
||||
throw new BGZFFormatException("CRC mismatch");
|
||||
}
|
||||
}
|
||||
} catch (DataFormatException e) {
|
||||
throw new BGZFException(e);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
package org.xbib.io.compress.bgzf;
|
||||
|
||||
import java.util.zip.Deflater;
|
||||
|
||||
/**
|
||||
* Factory for {@link Deflater} objects used by {@link BlockCompressedOutputStream}.
|
||||
* This class may be extended to provide alternative deflaters (e.g., for improved performance).
|
||||
*/
|
||||
public class DeflaterFactory {
|
||||
|
||||
public DeflaterFactory() {
|
||||
//Note: made explicit constructor to make searching for references easier
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a deflater object that will be used when writing BAM files.
|
||||
* Subclasses may override to provide their own deflater implementation.
|
||||
* @param compressionLevel the compression level (0-9)
|
||||
* @param gzipCompatible if true then use GZIP compatible compression
|
||||
*/
|
||||
public Deflater makeDeflater(final int compressionLevel, final boolean gzipCompatible) {
|
||||
return new Deflater(compressionLevel, gzipCompatible);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
package org.xbib.io.compress.bgzf;
|
||||
|
||||
import java.util.zip.Inflater;
|
||||
|
||||
/**
|
||||
* Factory for {@link Inflater} objects used by {@link BlockGunzipper}.
|
||||
* This class may be extended to provide alternative inflaters (e.g., for improved performance).
|
||||
* The default implementation returns a JDK {@link Inflater}
|
||||
*/
|
||||
public class InflaterFactory {
|
||||
/**
|
||||
* Returns an inflater object that will be used when reading DEFLATE compressed files.
|
||||
* Subclasses may override to provide their own inflater implementation.
|
||||
* The default implementation returns a JDK {@link Inflater}
|
||||
* @param gzipCompatible if true then use GZIP compatible compression
|
||||
*/
|
||||
public Inflater makeInflater(final boolean gzipCompatible) {
|
||||
return new Inflater(gzipCompatible);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,105 @@
|
|||
package org.xbib.io.compress.bgzf;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
/**
|
||||
* A wrapper class to provide buffered read access to a SeekableStream. Just wrapping such a stream with
|
||||
* a BufferedInputStream will not work as it does not support seeking. In this implementation a
|
||||
* seek call is delegated to the wrapped stream, and the buffer reset.
|
||||
*/
|
||||
public class SeekableBufferedStream extends SeekableStream {
|
||||
|
||||
/** Little extension to buffered input stream to give access to the available bytes in the buffer. */
|
||||
private static class ExtBufferedInputStream extends BufferedInputStream {
|
||||
private ExtBufferedInputStream(final InputStream inputStream, final int i) {
|
||||
super(inputStream, i);
|
||||
}
|
||||
|
||||
/** Returns the number of bytes that can be read from the buffer without reading more into the buffer. */
|
||||
int getBytesInBufferAvailable() {
|
||||
if (this.count == this.pos) return 0; // documented test for "is buffer empty"
|
||||
else return this.buf.length - this.pos;
|
||||
}
|
||||
}
|
||||
|
||||
public static final int DEFAULT_BUFFER_SIZE = 512000;
|
||||
|
||||
final private int bufferSize;
|
||||
final SeekableStream wrappedStream;
|
||||
ExtBufferedInputStream bufferedStream;
|
||||
long position;
|
||||
|
||||
public SeekableBufferedStream(final SeekableStream stream, final int bufferSize) {
|
||||
this.bufferSize = bufferSize;
|
||||
this.wrappedStream = stream;
|
||||
this.position = 0;
|
||||
bufferedStream = new ExtBufferedInputStream(wrappedStream, bufferSize);
|
||||
}
|
||||
|
||||
public SeekableBufferedStream(final SeekableStream stream) {
|
||||
this(stream, DEFAULT_BUFFER_SIZE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long length() {
|
||||
return wrappedStream.length();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long skip(final long skipLength) throws IOException {
|
||||
if (skipLength < this.bufferedStream.getBytesInBufferAvailable()) {
|
||||
final long retval = this.bufferedStream.skip(skipLength);
|
||||
this.position += retval;
|
||||
return retval;
|
||||
} else {
|
||||
final long position = this.position + skipLength;
|
||||
seek(position);
|
||||
return skipLength;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seek(final long position) throws IOException {
|
||||
this.position = position;
|
||||
wrappedStream.seek(position);
|
||||
bufferedStream = new ExtBufferedInputStream(wrappedStream, bufferSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
int b = bufferedStream.read();
|
||||
position++;
|
||||
return b;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(final byte[] buffer, final int offset, final int length) throws IOException {
|
||||
final int nBytesRead = bufferedStream.read(buffer, offset, length);
|
||||
if (nBytesRead > 0) {
|
||||
position += nBytesRead;
|
||||
}
|
||||
return nBytesRead;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
wrappedStream.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean eof() throws IOException {
|
||||
return position >= wrappedStream.length();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSource() {
|
||||
return wrappedStream.getSource();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() throws IOException {
|
||||
return position;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,110 @@
|
|||
package org.xbib.io.compress.bgzf;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.RandomAccessFile;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
|
||||
public class SeekableFileStream extends SeekableStream {
|
||||
|
||||
/**
|
||||
* Collection of all open instances. SeekableFileStream objects are usually open and kept open for the
|
||||
* duration of a session. This collection supports a method to close them all.
|
||||
*/
|
||||
private static final Collection<SeekableFileStream> allInstances = Collections.synchronizedCollection(new HashSet<>());
|
||||
|
||||
File file;
|
||||
RandomAccessFile fis;
|
||||
|
||||
public SeekableFileStream(final File file) throws FileNotFoundException {
|
||||
this.file = file;
|
||||
fis = new RandomAccessFile(file, "r");
|
||||
allInstances.add(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long length() {
|
||||
return file.length();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean eof() throws IOException {
|
||||
return fis.length() == fis.getFilePointer();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seek(final long position) throws IOException {
|
||||
fis.seek(position);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() throws IOException {
|
||||
return fis.getChannel().position();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long skip(long n) throws IOException {
|
||||
long initPos = position();
|
||||
fis.getChannel().position(initPos + n);
|
||||
return position() - initPos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(final byte[] buffer, final int offset, final int length) throws IOException {
|
||||
if (length < 0) {
|
||||
throw new IndexOutOfBoundsException();
|
||||
}
|
||||
int n = 0;
|
||||
while (n < length) {
|
||||
final int count = fis.read(buffer, offset + n, length - n);
|
||||
if (count < 0) {
|
||||
if (n > 0) {
|
||||
return n;
|
||||
} else {
|
||||
return count;
|
||||
}
|
||||
}
|
||||
n += count;
|
||||
}
|
||||
return n;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
return fis.read();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(byte[] b) throws IOException {
|
||||
return fis.read(b);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSource() {
|
||||
return file.getAbsolutePath();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
allInstances.remove(this);
|
||||
fis.close();
|
||||
|
||||
}
|
||||
|
||||
public static synchronized void closeAllInstances() {
|
||||
Collection<SeekableFileStream> clonedInstances = new HashSet<>(allInstances);
|
||||
for (SeekableFileStream sfs : clonedInstances) {
|
||||
try {
|
||||
sfs.close();
|
||||
} catch (IOException e) {
|
||||
//
|
||||
}
|
||||
}
|
||||
allInstances.clear();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
package org.xbib.io.compress.bgzf;
|
||||
|
||||
import java.io.EOFException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
public abstract class SeekableStream extends InputStream {
|
||||
|
||||
public abstract long length();
|
||||
|
||||
public abstract long position() throws IOException;
|
||||
|
||||
public abstract void seek(long position) throws IOException;
|
||||
|
||||
@Override
|
||||
public abstract int read(byte[] buffer, int offset, int length) throws IOException;
|
||||
|
||||
@Override
|
||||
public abstract void close() throws IOException;
|
||||
|
||||
public abstract boolean eof() throws IOException;
|
||||
|
||||
/**
|
||||
* @return String representation of source (e.g. URL, file path, etc.), or null if not available.
|
||||
*/
|
||||
public abstract String getSource();
|
||||
|
||||
/**
|
||||
* Read enough bytes to fill the input buffer.
|
||||
* @param b byte array
|
||||
* @throws EOFException If EOF is reached before buffer is filled
|
||||
*/
|
||||
public void readFully(byte[] b) throws IOException {
|
||||
int len = b.length;
|
||||
int n = 0;
|
||||
while (n < len) {
|
||||
int count = read(b, n, len - n);
|
||||
if (count < 0){
|
||||
throw new EOFException();
|
||||
}
|
||||
n += count;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
package org.xbib.io.compress.bgzf;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
public class SeekableStreamFactory {
|
||||
|
||||
public static SeekableStream getStreamFor(String path) throws IOException {
|
||||
return new SeekableFileStream(new File(path));
|
||||
}
|
||||
}
|
|
@ -1,3 +1,4 @@
|
|||
include 'io-compress-bgzf'
|
||||
include 'io-compress-bzip2'
|
||||
include 'io-compress-lzf'
|
||||
include 'io-compress-xz'
|
||||
|
|
Loading…
Reference in a new issue