diff --git a/gradle.properties b/gradle.properties index 8a3e593..496f169 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,5 +1,5 @@ group = org.xbib name = archive -version = 1.0.1 +version = 1.1.0 -gradle.wrapper.version = 6.6.1 +gradle.wrapper.version = 7.3.2 diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar index e708b1c..7454180 100644 Binary files a/gradle/wrapper/gradle-wrapper.jar and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 33682bb..ac0b842 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,5 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-6.6.1-all.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.2-all.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew index 4f906e0..1b6c787 100755 --- a/gradlew +++ b/gradlew @@ -1,7 +1,7 @@ -#!/usr/bin/env sh +#!/bin/sh # -# Copyright 2015 the original author or authors. +# Copyright © 2015-2021 the original authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,67 +17,101 @@ # ############################################################################## -## -## Gradle start up script for UN*X -## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# ############################################################################## # Attempt to set APP_HOME + # Resolve links: $0 may be a link -PRG="$0" -# Need this for relative symlinks. -while [ -h "$PRG" ] ; do - ls=`ls -ld "$PRG"` - link=`expr "$ls" : '.*-> \(.*\)$'` - if expr "$link" : '/.*' > /dev/null; then - PRG="$link" - else - PRG=`dirname "$PRG"`"/$link" - fi +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac done -SAVED="`pwd`" -cd "`dirname \"$PRG\"`/" >/dev/null -APP_HOME="`pwd -P`" -cd "$SAVED" >/dev/null + +APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit APP_NAME="Gradle" -APP_BASE_NAME=`basename "$0"` +APP_BASE_NAME=${0##*/} # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' # Use the maximum available, or set MAX_FD != -1 to use that value. -MAX_FD="maximum" +MAX_FD=maximum warn () { echo "$*" -} +} >&2 die () { echo echo "$*" echo exit 1 -} +} >&2 # OS specific support (must be 'true' or 'false'). cygwin=false msys=false darwin=false nonstop=false -case "`uname`" in - CYGWIN* ) - cygwin=true - ;; - Darwin* ) - darwin=true - ;; - MINGW* ) - msys=true - ;; - NONSTOP* ) - nonstop=true - ;; +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; esac CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar @@ -87,9 +121,9 @@ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar if [ -n "$JAVA_HOME" ] ; then if [ -x "$JAVA_HOME/jre/sh/java" ] ; then # IBM's JDK on AIX uses strange locations for the executables - JAVACMD="$JAVA_HOME/jre/sh/java" + JAVACMD=$JAVA_HOME/jre/sh/java else - JAVACMD="$JAVA_HOME/bin/java" + JAVACMD=$JAVA_HOME/bin/java fi if [ ! -x "$JAVACMD" ] ; then die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME @@ -98,7 +132,7 @@ Please set the JAVA_HOME variable in your environment to match the location of your Java installation." fi else - JAVACMD="java" + JAVACMD=java which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. Please set the JAVA_HOME variable in your environment to match the @@ -106,80 +140,95 @@ location of your Java installation." fi # Increase the maximum file descriptors if we can. -if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then - MAX_FD_LIMIT=`ulimit -H -n` - if [ $? -eq 0 ] ; then - if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then - MAX_FD="$MAX_FD_LIMIT" - fi - ulimit -n $MAX_FD - if [ $? -ne 0 ] ; then - warn "Could not set maximum file descriptor limit: $MAX_FD" - fi - else - warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" - fi -fi - -# For Darwin, add options to specify how the application appears in the dock -if $darwin; then - GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" -fi - -# For Cygwin or MSYS, switch paths to Windows format before running java -if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then - APP_HOME=`cygpath --path --mixed "$APP_HOME"` - CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` - - JAVACMD=`cygpath --unix "$JAVACMD"` - - # We build the pattern for arguments to be converted via cygpath - ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` - SEP="" - for dir in $ROOTDIRSRAW ; do - ROOTDIRS="$ROOTDIRS$SEP$dir" - SEP="|" - done - OURCYGPATTERN="(^($ROOTDIRS))" - # Add a user-defined pattern to the cygpath arguments - if [ "$GRADLE_CYGPATTERN" != "" ] ; then - OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" - fi - # Now convert the arguments - kludge to limit ourselves to /bin/sh - i=0 - for arg in "$@" ; do - CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` - CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option - - if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition - eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` - else - eval `echo args$i`="\"$arg\"" - fi - i=`expr $i + 1` - done - case $i in - 0) set -- ;; - 1) set -- "$args0" ;; - 2) set -- "$args0" "$args1" ;; - 3) set -- "$args0" "$args1" "$args2" ;; - 4) set -- "$args0" "$args1" "$args2" "$args3" ;; - 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; - 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; - 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; - 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; - 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" esac fi -# Escape application args -save () { - for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done - echo " " -} -APP_ARGS=`save "$@"` +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. -# Collect all arguments for the java command, following the shell quoting and substitution rules -eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + + # Now convert the arguments - kludge to limit ourselves to /bin/sh + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) + fi + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg + done +fi + +# Collect all arguments for the java command; +# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of +# shell script including quotes and variable substitutions, so put them in +# double quotes to make sure that they get re-expanded; and +# * put everything else in single quotes, so that it's not re-expanded. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' exec "$JAVACMD" "$@" diff --git a/io-codec/build.gradle b/io-codec/build.gradle index bdf594f..45bca87 100644 --- a/io-codec/build.gradle +++ b/io-codec/build.gradle @@ -1,5 +1,6 @@ dependencies { api project(':io-archive') + implementation project(':io-compress-bgzf') implementation project(':io-compress-bzip2') implementation project(':io-compress-lzf') implementation project(':io-compress-xz') diff --git a/io-codec/src/main/java/module-info.java b/io-codec/src/main/java/module-info.java index 0231a06..1ece307 100644 --- a/io-codec/src/main/java/module-info.java +++ b/io-codec/src/main/java/module-info.java @@ -2,11 +2,13 @@ module org.xbib.io.codec { uses org.xbib.io.codec.StreamCodec; exports org.xbib.io.codec; exports org.xbib.io.codec.ar; + exports org.xbib.io.codec.bgzf; exports org.xbib.io.codec.cpio; exports org.xbib.io.codec.file; exports org.xbib.io.codec.jar; exports org.xbib.io.codec.tar; exports org.xbib.io.codec.zip; + requires transitive org.xbib.io.compress.bgzf; requires org.xbib.io.compress.bzip; requires org.xbib.io.compress.lzf; requires org.xbib.io.compress.xz; diff --git a/io-codec/src/main/java/org/xbib/io/codec/bgzf/BzgfStreamCodec.java b/io-codec/src/main/java/org/xbib/io/codec/bgzf/BzgfStreamCodec.java new file mode 100644 index 0000000..fa0f65c --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/bgzf/BzgfStreamCodec.java @@ -0,0 +1,37 @@ +package org.xbib.io.codec.bgzf; + +import org.xbib.io.codec.StreamCodec; +import org.xbib.io.compress.bgzf.BlockCompressedInputStream; +import org.xbib.io.compress.bgzf.BlockCompressedOutputStream; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +public class BzgfStreamCodec implements StreamCodec { + + @Override + public String getName() { + return "bgzf"; + } + + @Override + public BlockCompressedInputStream decode(InputStream in) throws IOException { + return new BlockCompressedInputStream(in); + } + + @Override + public BlockCompressedInputStream decode(InputStream in, int bufsize) throws IOException { + return new BlockCompressedInputStream(in); + } + + @Override + public BlockCompressedOutputStream encode(OutputStream out) throws IOException { + return new BlockCompressedOutputStream(out); + } + + @Override + public BlockCompressedOutputStream encode(OutputStream out, int bufsize) throws IOException { + return new BlockCompressedOutputStream(out); + } +} diff --git a/io-compress-bgzf/LICENSE.txt b/io-compress-bgzf/LICENSE.txt new file mode 100644 index 0000000..b149f1b --- /dev/null +++ b/io-compress-bgzf/LICENSE.txt @@ -0,0 +1,21 @@ +/* + * The MIT License + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ \ No newline at end of file diff --git a/io-compress-bgzf/src/main/java/module-info.java b/io-compress-bgzf/src/main/java/module-info.java new file mode 100644 index 0000000..80b1d35 --- /dev/null +++ b/io-compress-bgzf/src/main/java/module-info.java @@ -0,0 +1,3 @@ +module org.xbib.io.compress.bgzf { + exports org.xbib.io.compress.bgzf; +} diff --git a/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/BGZFException.java b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/BGZFException.java new file mode 100644 index 0000000..1dc4d25 --- /dev/null +++ b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/BGZFException.java @@ -0,0 +1,19 @@ +package org.xbib.io.compress.bgzf; + +@SuppressWarnings("serial") +public class BGZFException extends RuntimeException { + + public BGZFException() {} + + public BGZFException(final String s) { + super(s); + } + + public BGZFException(final String s, final Throwable throwable) { + super(s, throwable); + } + + public BGZFException(final Throwable throwable) { + super(throwable); + } +} diff --git a/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/BGZFFilePointerUtil.java b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/BGZFFilePointerUtil.java new file mode 100644 index 0000000..ebc5935 --- /dev/null +++ b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/BGZFFilePointerUtil.java @@ -0,0 +1,67 @@ + +package org.xbib.io.compress.bgzf; + +public class BGZFFilePointerUtil { + + private static final int SHIFT_AMOUNT = 16; + private static final int OFFSET_MASK = 0xffff; + private static final long ADDRESS_MASK = 0xFFFFFFFFFFFFL; + + public static final long MAX_BLOCK_ADDRESS = ADDRESS_MASK; + public static final int MAX_OFFSET = OFFSET_MASK; + + public static int compare(final long vfp1, final long vfp2) { + if (vfp1 == vfp2) return 0; + // When treating as unsigned, negative number is > positive. + if (vfp1 < 0 && vfp2 >= 0) return 1; + if (vfp1 >= 0 && vfp2 < 0) return -1; + // Either both negative or both non-negative, so regular comparison works. + if (vfp1 < vfp2) return -1; + return 1; // vfp1 > vfp2 + } + + /** + * @return true if vfp2 points to somewhere in the same BGZF block, or the one immediately + * following vfp1's BGZF block. + */ + public static boolean areInSameOrAdjacentBlocks(final long vfp1, final long vfp2) { + final long block1 = getBlockAddress(vfp1); + final long block2 = getBlockAddress(vfp2); + return (block1 == block2 || block1 + 1 == block2); + } + + /** + * @param blockAddress File offset of start of BGZF block. + * @param blockOffset Offset into uncompressed block. + * @return Virtual file pointer that embodies the input parameters. + */ + static long makeFilePointer(final long blockAddress, final int blockOffset) { + if (blockOffset < 0) { + throw new IllegalArgumentException("Negative blockOffset " + blockOffset + + " not allowed."); + } + if (blockAddress < 0) { + throw new IllegalArgumentException("Negative blockAddress " + blockAddress + + " not allowed."); + } + if (blockOffset > MAX_OFFSET) { + throw new IllegalArgumentException("blockOffset " + blockOffset + " too large."); + } + if (blockAddress > MAX_BLOCK_ADDRESS) { + throw new IllegalArgumentException("blockAddress " + blockAddress + " too large."); + } + return blockAddress << SHIFT_AMOUNT | blockOffset; + } + + public static long getBlockAddress(final long virtualFilePointer) { + return (virtualFilePointer >> SHIFT_AMOUNT) & ADDRESS_MASK; + } + + public static int getBlockOffset(final long virtualFilePointer) { + return (int)(virtualFilePointer & OFFSET_MASK); + } + + public static String asString(final long vfp) { + return String.format("%d(0x%x): (block address: %d, offset: %d)", vfp, vfp, getBlockAddress(vfp), getBlockOffset(vfp)); + } +} diff --git a/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/BGZFFormatException.java b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/BGZFFormatException.java new file mode 100644 index 0000000..3212fc5 --- /dev/null +++ b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/BGZFFormatException.java @@ -0,0 +1,19 @@ +package org.xbib.io.compress.bgzf; + +@SuppressWarnings("serial") +public class BGZFFormatException extends BGZFException { + + public BGZFFormatException() {} + + public BGZFFormatException(final String s) { + super(s); + } + + public BGZFFormatException(final String s, final Throwable throwable) { + super(s, throwable); + } + + public BGZFFormatException(final Throwable throwable) { + super(throwable); + } +} diff --git a/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/BGZFStreamConstants.java b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/BGZFStreamConstants.java new file mode 100644 index 0000000..298aa8a --- /dev/null +++ b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/BGZFStreamConstants.java @@ -0,0 +1,95 @@ +package org.xbib.io.compress.bgzf; + +/** + * Constants shared by BlockCompressed{Input,Output}Stream classes + */ +public class BGZFStreamConstants { + + // Number of bytes in the gzip block before the deflated data. + // This is not the standard header size, because we include one optional subfield, + // but it is the standard for us. + public static final int BLOCK_HEADER_LENGTH = 18; + + // Location in the gzip block of the total block size (actually total block size - 1) + public static final int BLOCK_LENGTH_OFFSET = 16; + + // Number of bytes that follow the deflated data + public static final int BLOCK_FOOTER_LENGTH = 8; + + // We require that a compressed block (including header and footer, be <= this) + public static final int MAX_COMPRESSED_BLOCK_SIZE = 64 * 1024; + + // Gzip overhead is the header, the footer, and the block size (encoded as a short). + public static final int GZIP_OVERHEAD = BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH + 2; + + // If Deflater has compression level == NO_COMPRESSION, 10 bytes of overhead (determined experimentally). + public static final int NO_COMPRESSION_OVERHEAD = 10; + + // Push out a gzip block when this many uncompressed bytes have been accumulated. + // This size is selected so that if data is not compressible, if Deflater is given + // compression level == NO_COMPRESSION, compressed size is guaranteed to be <= MAX_COMPRESSED_BLOCK_SIZE. + public static final int DEFAULT_UNCOMPRESSED_BLOCK_SIZE = 64 * 1024 - (GZIP_OVERHEAD + NO_COMPRESSION_OVERHEAD); + + // Magic numbers + public static final byte GZIP_ID1 = 31; + public static final int GZIP_ID2 = 139; + + // FEXTRA flag means there are optional fields + public static final int GZIP_FLG = 4; + + // extra flags + public static final int GZIP_XFL = 0; + + // length of extra subfield + public static final short GZIP_XLEN = 6; + + // The deflate compression, which is customarily used by gzip + public static final byte GZIP_CM_DEFLATE = 8; + + public static final int DEFAULT_COMPRESSION_LEVEL = 5; + + // We don't care about OS because we're not doing line terminator translation + public static final int GZIP_OS_UNKNOWN = 255; + + // The subfield ID + public static final byte BGZF_ID1 = 66; + public static final byte BGZF_ID2 = 67; + + // subfield length in bytes + public static final byte BGZF_LEN = 2; + + public static final byte[] EMPTY_GZIP_BLOCK = { + BGZFStreamConstants.GZIP_ID1, + (byte)BGZFStreamConstants.GZIP_ID2, + BGZFStreamConstants.GZIP_CM_DEFLATE, + BGZFStreamConstants.GZIP_FLG, + 0, 0, 0, 0, // Modification time + BGZFStreamConstants.GZIP_XFL, + (byte)BGZFStreamConstants.GZIP_OS_UNKNOWN, + BGZFStreamConstants.GZIP_XLEN, 0, // Little-endian short + BGZFStreamConstants.BGZF_ID1, + BGZFStreamConstants.BGZF_ID2, + BGZFStreamConstants.BGZF_LEN, 0, // Little-endian short + // Total block size - 1 + BGZFStreamConstants.BLOCK_HEADER_LENGTH + + BGZFStreamConstants.BLOCK_FOOTER_LENGTH - 1 + 2, 0, // Little-endian short + // Dummy payload? + 3, 0, + 0, 0, 0, 0, // crc + 0, 0, 0, 0, // uncompressedSize + }; + + public static final byte[] GZIP_BLOCK_PREAMBLE = { + BGZFStreamConstants.GZIP_ID1, + (byte)BGZFStreamConstants.GZIP_ID2, + BGZFStreamConstants.GZIP_CM_DEFLATE, + BGZFStreamConstants.GZIP_FLG, + 0, 0, 0, 0, // Modification time + BGZFStreamConstants.GZIP_XFL, + (byte)BGZFStreamConstants.GZIP_OS_UNKNOWN, + BGZFStreamConstants.GZIP_XLEN, 0, // Little-endian short + BGZFStreamConstants.BGZF_ID1, + BGZFStreamConstants.BGZF_ID2, + BGZFStreamConstants.BGZF_LEN, 0, // Little-endian short + }; +} diff --git a/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/BinaryCodec.java b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/BinaryCodec.java new file mode 100644 index 0000000..9466fba --- /dev/null +++ b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/BinaryCodec.java @@ -0,0 +1,666 @@ +package org.xbib.io.compress.bgzf; + +import java.io.ByteArrayInputStream; +import java.io.EOFException; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.SyncFailedException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +/** + * Encapsulates file representation of various primitive data types. Forces little-endian disk + * representation. Note that this class is currently not very efficient. There are plans to increase + * the size of the ByteBuffer, and move data between the ByteBuffer and the underlying input or + * output stream in larger chunks. + * + * All the read methods throw EOFException if the input stream is exhausted before the + * required number of bytes are read. + */ +public class BinaryCodec { + + // Outstream to write to + private OutputStream outputStream; + // If a file or filename was given it will be stored here. Used for error reporting. + private String outputFileName; + + // Input stream to read from + private InputStream inputStream; + // If a file or filename was give to read from it will be stored here. Used for error reporting. + private String inputFileName; + + /* + * Mode that the BinaryCodec is in. It is either writing to a binary file or reading from. This + * is set to true if it is writing to a binary file Right now we don't support reading and + * writing to the same file with the same BinaryCodec instance + */ + private boolean isWriting; + + /** + * For byte swapping. + */ + private ByteBuffer byteBuffer; + + /** + * For reading Strings of known length, this can reduce object creation + */ + private final byte[] scratchBuffer = new byte[16]; + + // Byte order used in BAM files. + private static final ByteOrder LITTLE_ENDIAN = ByteOrder.LITTLE_ENDIAN; + private static final byte[] NULL_BYTE = {0}; + + private static final long MAX_UBYTE = (Byte.MAX_VALUE * 2) + 1; + private static final long MAX_USHORT = (Short.MAX_VALUE * 2) + 1; + private static final long MAX_UINT = ((long)Integer.MAX_VALUE * 2) + 1; + + // We never serialize more than this much at a time (except for Strings) + private static final int MAX_BYTE_BUFFER = 8; + /** + * Constructs BinaryCodec from a file and set it's mode to writing or not + * + * @param file file to be written to or read from + * @param writing whether the file is being written to + * @throws FileNotFoundException + */ + public BinaryCodec(final File file, final boolean writing) throws FileNotFoundException { + this(); + + this.isWriting = writing; + if (this.isWriting) { + this.outputStream = new FileOutputStream(file); + this.outputFileName = file.getName(); + } else { + this.inputStream = new FileInputStream(file); + this.inputFileName = file.getName(); + } + } + + /** + * Constructs BinaryCodec from a file name and set it's mode to writing or not + * + * @param fileName name of the file to be written to or read from + * @param writing writing whether the file is being written to + * @throws FileNotFoundException + */ + public BinaryCodec(final String fileName, final boolean writing) throws FileNotFoundException { + this(new File(fileName), writing); + } + + /** + * Constructs BinaryCodec from an output stream + * + * @param outputStream Stream to write to, since it's an output stream we know that isWriting + * should be set to true + */ + public BinaryCodec(final OutputStream outputStream) { + this(); + setOutputStream(outputStream); + } + + /** + * Constructs BinaryCodec from an input stream + * + * @param inputStream Stream to read from, since we are reading isWriting is set to false + */ + public BinaryCodec(final InputStream inputStream) { + this(); + setInputStream(inputStream); + } + + /** + * Ambiguous whether reading or writing until set{In,Out}putStream is called + */ + public BinaryCodec() { + initByteBuffer(); + } + + /** + * Shared among ctors. Note that if endianness is changed, all the unsigned methods must also be + * changed. + */ + private void initByteBuffer() { + byteBuffer = ByteBuffer.allocate(MAX_BYTE_BUFFER); + byteBuffer.order(LITTLE_ENDIAN); + } + + + /** + * Write whatever has been put into the byte buffer + * + * @param numBytes -- how much to write. Note that in case of writing an unsigned value, more + * bytes were put into the ByteBuffer than will get written out. + * @throws IOException + */ + private void writeByteBuffer(final int numBytes) throws IOException { + assert (numBytes <= byteBuffer.limit()); + writeBytes(byteBuffer.array(), 0, numBytes); + } + + /** + * Writes a byte to the output buffer + * + * @param bite byte array to write + * @throws IOException + */ + public void writeByte(final byte bite) throws IOException { + byteBuffer.clear(); + byteBuffer.put(bite); + writeByteBuffer(1); + } + + public void writeByte(final int b) throws IOException { + writeByte((byte)b); + } + + /** + * Writes a byte array to the output buffer + * + * @param bytes value to write + * @throws IOException + */ + public void writeBytes(final byte[] bytes) throws IOException { + writeBytes(bytes, 0, bytes.length); + } + + public void writeBytes(final byte[] bytes, final int startOffset, final int numBytes) throws IOException { + if (!isWriting) { + throw new IllegalStateException("Calling write method on BinaryCodec open for read."); + } + + outputStream.write(bytes, startOffset, numBytes); + } + + /** + * Write a 32-bit int to the output stream + * + * @param value int to write + * @throws IOException + */ + public void writeInt(final int value) throws IOException { + byteBuffer.clear(); + byteBuffer.putInt(value); + writeByteBuffer(4); + } + + /** + * Write a double (8 bytes) to the output stream + * + * @param value double to write + * @throws IOException + */ + public void writeDouble(final double value) throws IOException { + byteBuffer.clear(); + byteBuffer.putDouble(value); + writeByteBuffer(8); + } + + /** + * Write a 64-bit long to the output stream + * + * @param value long to write + * @throws IOException + */ + public void writeLong(final long value) throws IOException { + byteBuffer.clear(); + byteBuffer.putLong(value); + writeByteBuffer(8); + } + + + /** + * Write a 16-bit short to output stream + * + * @throws IOException + */ + public void writeShort(final short value) throws IOException { + byteBuffer.clear(); + byteBuffer.putShort(value); + writeByteBuffer(2); + } + + /** + * Write a float (4 bytes) to the output stream + * + * @param value float to write + * @throws IOException + */ + public void writeFloat(final float value) throws IOException { + byteBuffer.clear(); + byteBuffer.putFloat(value); + writeByteBuffer(4); + } + + /** + * Writes a boolean (1 byte) to the output buffer + * + * @param value boolean to write + * @throws IOException + */ + public void writeBoolean(final boolean value) throws IOException { + byteBuffer.clear(); + byteBuffer.put(value ? (byte)1 : (byte)0); + writeByteBuffer(1); + } + + /** + * Writes a string to the buffer as ASCII bytes + * + * @param value string to write to buffer + * @param writeLength prefix the string with the length as a 32-bit int + * @param appendNull add a null byte to the end of the string + * @throws IOException + */ + public void writeString(final String value, final boolean writeLength, final boolean appendNull) throws IOException { + if (writeLength) { + int lengthToWrite = value.length(); + if (appendNull) lengthToWrite++; + writeInt(lengthToWrite); + } + + // Actually writes the string to a buffer + writeString(value); + + if (appendNull) writeBytes(NULL_BYTE); + + } + + + /** + * Write a string to the buffer as ASCII bytes + * + * @param value string to write + * @throws IOException + */ + private void writeString(final String value) throws IOException { + final byte[] byteBuffer = new byte[value.length()]; + final char[] charBuffer = value.toCharArray(); + for (int i = 0; i < charBuffer.length; ++i) { + byteBuffer[i] = (byte)(charBuffer[i] & 0xff); + } + writeBytes(byteBuffer); + } + + /** + * Write an 8-bit unsigned byte. NOTE: This method will break if we change to big-endian. + * + * @throws IOException + */ + public void writeUByte(final short val) throws IOException { + if (val < 0) { + throw new IllegalArgumentException("Negative value (" + val + + ") passed to unsigned writing method."); + } + if (val > MAX_UBYTE) { + throw new IllegalArgumentException("Value (" + val + + ") to large to be written as ubyte."); + } + byteBuffer.clear(); + byteBuffer.putShort(val); + writeByteBuffer(1); + } + + /** + * Write a 16-bit unsigned short. NOTE: This method will break if we change to big-endian. + * + * @throws IOException + */ + public void writeUShort(final int val) throws IOException { + if (val < 0) { + throw new IllegalArgumentException("Negative value (" + val + + ") passed to unsigned writing method."); + } + if (val > MAX_USHORT) { + throw new IllegalArgumentException("Value (" + val + + ") to large to be written as ushort."); + } + byteBuffer.clear(); + byteBuffer.putInt(val); + writeByteBuffer(2); + } + + /** + * Write a 32-bit unsigned int. NOTE: This method will break if we change to big-endian. + * + * @throws IOException + */ + public void writeUInt(final long val) throws IOException { + if (val < 0) { + throw new IllegalArgumentException("Negative value (" + val + + ") passed to unsigned writing method."); + } + if (val > MAX_UINT) { + throw new IllegalArgumentException("Value (" + val + + ") to large to be written as uint."); + } + byteBuffer.clear(); + byteBuffer.putLong(val); + writeByteBuffer(4); + } + + /** + * Read a byte array from the input stream. + * + * @throws IOException + */ + public void readBytes(final byte[] buffer) throws IOException { + readBytes(buffer, 0, buffer.length); + } + + /** + * Read a byte array from the input stream + * + * @param buffer where to put bytes read + * @param offset offset to start putting bytes into buffer + * @param length number of bytes to read + * @throws IOException + */ + public void readBytes(final byte[] buffer, final int offset, final int length) throws IOException { + int totalNumRead = 0; + do { + final int numRead = + readBytesOrFewer(buffer, offset + totalNumRead, length - totalNumRead); + if (numRead < 0) { + throw new EOFException(constructErrorMessage("Premature EOF")); + } else { + totalNumRead += numRead; + } + } while (totalNumRead < length); + } + + /** + * Reads a byte array from the input stream. + * + * @param buffer where to put bytes read + * @param offset offset to start putting bytes into buffer + * @param length number of bytes to read. Fewer bytes may be read if EOF is reached before + * length bytes have been read. + * @return the total number of bytes read into the buffer, or -1 if there is no more data + * because the end of the stream has been reached. + * @throws IOException + */ + public int readBytesOrFewer(final byte[] buffer, final int offset, final int length) throws IOException { + if (isWriting) { + throw new IllegalStateException("Calling read method on BinaryCodec open for write."); + } + + return inputStream.read(buffer, offset, length); + } + + /** + * @return a single byte read from the input stream. + * @throws IOException + */ + public byte readByte() throws IOException { + if (isWriting) { + throw new IllegalStateException("Calling read method on BinaryCodec open for write."); + } + + final int ret = inputStream.read(); + if (ret == -1) { + throw new EOFException(constructErrorMessage("Premature EOF")); + } + return (byte)ret; + } + + /** + * @return true if it is possible to know for sure if at EOF, and it is known for sure. If the + * input stream is a ByteArrayInputStream, this is faster than causing a + * RuntimeEOFException to be thrown. + * @throws IOException + */ + public boolean knownAtEof() throws IOException { + if (isWriting) { + throw new IllegalStateException( + "Calling knownAtEof method on BinaryCodec open for write."); + } + + return inputStream instanceof ByteArrayInputStream && inputStream.available() == 0; + } + + /** + * Read a string off the input stream, as ASCII bytes + * + * @param length length of string to read + * @return String read from stream + * @throws IOException + */ + public String readString(final int length) throws IOException { + final byte[] buffer; + // Recycle single buffer if possible + if (length <= scratchBuffer.length) { + buffer = scratchBuffer; + } else { + buffer = new byte[length]; + + } + readBytes(buffer, 0, length); + + final char[] charBuffer = new char[length]; + for (int i = 0; i < length; ++i) { + charBuffer[i] = (char)buffer[i]; + } + return new String(charBuffer); + } + + /** + * Read ASCII bytes from the input stream until a null byte is read + * + * @return String constructed from the ASCII bytes read + * @throws IOException + */ + public String readNullTerminatedString() throws IOException { + final StringBuilder ret = new StringBuilder(); + for (byte b = this.readByte(); b != 0; b = this.readByte()) { + ret.append((char)(b & 0xff)); + } + return ret.toString(); + } + + /** + * Read an int length, and then a String of that length + * + * @param devourNull if true, the length include a null terminator, which is read and discarded + * @throws IOException + */ + public String readLengthAndString(final boolean devourNull) throws IOException { + int length = readInt(); + if (devourNull) { + --length; + } + final String ret = readString(length); + if (devourNull) { + readByte(); + } + return ret; + } + + private void readByteBuffer(final int numBytes) throws IOException { + assert (numBytes <= byteBuffer.capacity()); + readBytes(byteBuffer.array(), 0, numBytes); + byteBuffer.limit(byteBuffer.capacity()); + byteBuffer.position(numBytes); + } + + /** + * Read an int off the input stream + * + * @return int from input stream + * @throws IOException + */ + public int readInt() throws IOException { + readByteBuffer(4); + byteBuffer.flip(); + return byteBuffer.getInt(); + } + + /** + * Reads a double off the input stream + * + * @return double + * @throws IOException + */ + public double readDouble() throws IOException { + readByteBuffer(8); + byteBuffer.flip(); + return byteBuffer.getDouble(); + } + + /** + * Reads a long off the input stream + * + * @return long + * @throws IOException + */ + public long readLong() throws IOException { + readByteBuffer(8); + byteBuffer.flip(); + return byteBuffer.getLong(); + } + + public short readShort() throws IOException { + readByteBuffer(2); + byteBuffer.flip(); + return byteBuffer.getShort(); + } + + /** + * Reads a float off the input stream + * + * @return float + * @throws IOException + */ + public float readFloat() throws IOException { + readByteBuffer(4); + byteBuffer.flip(); + return byteBuffer.getFloat(); + } + + /** + * Reads a boolean off the input stream, represented as a byte with value 1 or 0 + * + * @return boolean + * @throws IOException + */ + public boolean readBoolean() throws IOException { + return ((readByte()) == 1); + } + + /** + * Reads an 8-bit unsigned byte from the input stream. This method assumes little-endianness. + * + * @throws IOException + */ + public short readUByte() throws IOException { + readByteBuffer(1); + byteBuffer.put((byte)0); + byteBuffer.flip(); + return byteBuffer.getShort(); + } + + /** + * Reads a 16-bit unsigned short from the input stream. This method assumes little-endianness. + * + * @throws IOException + */ + public int readUShort() throws IOException { + readByteBuffer(2); + byteBuffer.putShort((short)0); + byteBuffer.flip(); + return byteBuffer.getInt(); + } + + /** + * Reads a 32-bit unsigned int from the input stream. This method assumes little-endianness. + * + * @throws IOException + */ + public long readUInt() throws IOException { + readByteBuffer(4); + byteBuffer.putInt(0); + byteBuffer.flip(); + return byteBuffer.getLong(); + } + + /** + * Close the appropriate stream + * + * @throws IOException + */ + public void close() throws IOException { + if (this.isWriting) { + // To the degree possible, make sure the bytes get forced to the file system, + // or else cause an exception to be thrown. + if (this.outputStream instanceof FileOutputStream) { + this.outputStream.flush(); + FileOutputStream fos = (FileOutputStream)this.outputStream; + try { + fos.getFD().sync(); + } catch (SyncFailedException e) { + // ignore + } + } + this.outputStream.close(); + } else { + this.inputStream.close(); + } + } + + private String constructErrorMessage(final String msg) { + final StringBuilder sb = new StringBuilder(msg); + sb.append("; BinaryCodec in "); + sb.append(isWriting ? "write" : "read"); + sb.append("mode; "); + final String filename = isWriting ? outputFileName : inputFileName; + if (filename != null) { + sb.append("file: "); + sb.append(filename); + } else { + sb.append("streamed file (filename not available)"); + } + return sb.toString(); + } + + public String getInputFileName() { + return inputFileName; + } + + public String getOutputFileName() { + return outputFileName; + } + + public void setOutputFileName(final String outputFileName) { + this.outputFileName = outputFileName; + } + + public void setInputFileName(final String inputFileName) { + this.inputFileName = inputFileName; + } + + public boolean isWriting() { + return isWriting; + } + + public OutputStream getOutputStream() { + return outputStream; + } + + public InputStream getInputStream() { + return inputStream; + } + + public void setInputStream(final InputStream is) { + isWriting = false; + this.inputStream = is; + } + + public void setOutputStream(final OutputStream os) { + isWriting = true; + this.outputStream = os; + } +} diff --git a/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/BlockCompressedInputStream.java b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/BlockCompressedInputStream.java new file mode 100644 index 0000000..b7f40a4 --- /dev/null +++ b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/BlockCompressedInputStream.java @@ -0,0 +1,709 @@ +package org.xbib.io.compress.bgzf; + +import java.io.BufferedInputStream; +import java.io.ByteArrayOutputStream; +import java.io.EOFException; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.SeekableByteChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.Arrays; + +/** + * Stream class for reading BGZF block compressed files. The caller can treat this file like any other InputStream. + * It probably is not necessary to wrap this stream in a buffering stream, because there is internal buffering. + * The advantage of BGZF over conventional GZip format is that BGZF allows for seeking without having to read the + * entire file up to the location being sought. Note that seeking is only possible if the input stream is seekable. + * + * Note that this implementation is not synchronized. If multiple threads access an instance concurrently, it must be synchronized externally. + * + * @see http://samtools.sourceforge.net/SAM1.pdf for details of BGZF file format. + */ +public class BlockCompressedInputStream extends InputStream { + + public final static String INCORRECT_HEADER_SIZE_MSG = "Incorrect header size for file: "; + public final static String UNEXPECTED_BLOCK_LENGTH_MSG = "Unexpected compressed block length: "; + public final static String PREMATURE_END_MSG = "Premature end of file: "; + public final static String CANNOT_SEEK_STREAM_MSG = "Cannot seek a position for a non-file stream"; + public final static String CANNOT_SEEK_CLOSED_STREAM_MSG = "Cannot seek a position for a closed stream"; + public final static String INVALID_FILE_PTR_MSG = "Invalid file pointer: "; + + private InputStream mStream; + private boolean mIsClosed = false; + private SeekableStream mFile; + private byte[] mFileBuffer = null; + private DecompressedBlock mCurrentBlock = null; + private int mCurrentOffset = 0; + private long mStreamOffset = 0; + private final BlockGunzipper blockGunzipper; + + private volatile ByteArrayOutputStream buf = null; + private static final byte eol = '\n'; + private static final byte eolCr = '\r'; + + /** + * Note that seek() is not supported if this ctor is used. + * @param stream source of bytes + */ + public BlockCompressedInputStream(final InputStream stream) { + this(stream, true, BlockGunzipper.getDefaultInflaterFactory()); + } + + /** + * Note that seek() is not supported if this ctor is used. + * @param stream source of bytes + * @param inflaterFactory {@link InflaterFactory} used by {@link BlockGunzipper} + */ + public BlockCompressedInputStream(final InputStream stream, final InflaterFactory inflaterFactory) { + this(stream, true, inflaterFactory); + } + + /** + * Note that seek() is not supported if this ctor is used. + * @param stream source of bytes + * @param allowBuffering if true, allow buffering + */ + public BlockCompressedInputStream(final InputStream stream, final boolean allowBuffering) { + this(stream, allowBuffering, BlockGunzipper.getDefaultInflaterFactory()); + } + + /** + * Note that seek() is not supported if this ctor is used. + * @param stream source of bytes + * @param allowBuffering if true, allow buffering + * @param inflaterFactory {@link InflaterFactory} used by {@link BlockGunzipper} + */ + public BlockCompressedInputStream(final InputStream stream, final boolean allowBuffering, final InflaterFactory inflaterFactory) { + if (allowBuffering) { + mStream = new BufferedInputStream(stream); + } + else { + mStream = stream; + } + + mFile = null; + blockGunzipper = new BlockGunzipper(inflaterFactory); + } + + /** + * Use this ctor if you wish to call seek() + * @param file source of bytes + * @throws IOException + */ + public BlockCompressedInputStream(final File file) throws IOException { + this(file, BlockGunzipper.getDefaultInflaterFactory()); + } + + /** + * Use this ctor if you wish to call seek() + * @param file source of bytes + * @param inflaterFactory {@link InflaterFactory} used by {@link BlockGunzipper} + * @throws IOException + */ + public BlockCompressedInputStream(final File file, final InflaterFactory inflaterFactory) throws IOException { + mFile = new SeekableFileStream(file); + mStream = null; + blockGunzipper = new BlockGunzipper(inflaterFactory); + } + + /** + * For providing some arbitrary data source. No additional buffering is + * provided, so if the underlying source is not buffered, wrap it in a + * SeekableBufferedStream before passing to this ctor. + * @param strm source of bytes + */ + public BlockCompressedInputStream(final SeekableStream strm) { + this(strm, BlockGunzipper.getDefaultInflaterFactory()); + } + + /** + * For providing some arbitrary data source. No additional buffering is + * provided, so if the underlying source is not buffered, wrap it in a + * SeekableBufferedStream before passing to this ctor. + * @param strm source of bytes + * @param inflaterFactory {@link InflaterFactory} used by {@link BlockGunzipper} + */ + public BlockCompressedInputStream(final SeekableStream strm, final InflaterFactory inflaterFactory) { + mFile = strm; + mStream = null; + blockGunzipper = new BlockGunzipper(inflaterFactory); + } + + /** + * Determines whether or not the inflater will re-calculated the CRC on the decompressed data + * and check it against the value stored in the GZIP header. CRC checking is an expensive + * operation and should be used accordingly. + */ + public void setCheckCrcs(final boolean check) { + this.blockGunzipper.setCheckCrcs(check); + } + + /** + * @return the number of bytes that can be read (or skipped over) from this input stream without blocking by the + * next caller of a method for this input stream. The next caller might be the same thread or another thread. + * Note that although the next caller can read this many bytes without blocking, the available() method call itself + * may block in order to fill an internal buffer if it has been exhausted. + */ + @Override + public int available() throws IOException { + if (mCurrentBlock == null || mCurrentOffset == mCurrentBlock.mBlock.length) { + readBlock(); + } + if (mCurrentBlock == null) { + return 0; + } + return mCurrentBlock.mBlock.length - mCurrentOffset; + } + + /** + * @return true if the stream is at the end of a BGZF block, + * false otherwise. + */ + public boolean endOfBlock() { + return (mCurrentBlock != null && mCurrentOffset == mCurrentBlock.mBlock.length); + } + + /** + * Closes the underlying InputStream or RandomAccessFile + */ + @Override + public void close() throws IOException { + if (mFile != null) { + mFile.close(); + mFile = null; + } else if (mStream != null) { + mStream.close(); + mStream = null; + } + // Encourage garbage collection + mFileBuffer = null; + mCurrentBlock = null; + + // Mark as closed + mIsClosed = true; + } + + /** + * Reads the next byte of data from the input stream. The value byte is returned as an int in the range 0 to 255. + * If no byte is available because the end of the stream has been reached, the value -1 is returned. + * This method blocks until input data is available, the end of the stream is detected, or an exception is thrown. + + * @return the next byte of data, or -1 if the end of the stream is reached. + */ + @Override + public int read() throws IOException { + return (available() > 0) ? (mCurrentBlock.mBlock[mCurrentOffset++] & 0xFF) : -1; + } + + /** + * Reads some number of bytes from the input stream and stores them into the buffer array b. The number of bytes + * actually read is returned as an integer. This method blocks until input data is available, end of file is detected, + * or an exception is thrown. + * + * read(buf) has the same effect as read(buf, 0, buf.length). + * + * @param buffer the buffer into which the data is read. + * @return the total number of bytes read into the buffer, or -1 is there is no more data because the end of + * the stream has been reached. + */ + @Override + public int read(final byte[] buffer) throws IOException { + return read(buffer, 0, buffer.length); + } + + /** + * Reads a whole line. A line is considered to be terminated by either a line feed ('\n'), + * carriage return ('\r') or carriage return followed by a line feed ("\r\n"). + * + * @return A String containing the contents of the line, excluding the line terminating + * character, or null if the end of the stream has been reached + * + * @exception IOException If an I/O error occurs + */ + public String readLine() throws IOException { + int available = available(); + if (available == 0) { + return null; + } + if(null == buf){ // lazy initialisation + buf = new ByteArrayOutputStream(8192); + } + buf.reset(); + boolean done = false; + boolean foundCr = false; // \r found flag + while (!done) { + int linetmpPos = mCurrentOffset; + int bCnt = 0; + while((available-- > 0)){ + final byte c = mCurrentBlock.mBlock[linetmpPos++]; + if(c == eol){ // found \n + done = true; + break; + } else if(foundCr){ // previous char was \r + --linetmpPos; // current char is not \n so put it back + done = true; + break; + } else if(c == eolCr){ // found \r + foundCr = true; + continue; // no ++bCnt + } + ++bCnt; + } + if(mCurrentOffset < linetmpPos) { + buf.write(mCurrentBlock.mBlock, mCurrentOffset, bCnt); + mCurrentOffset = linetmpPos; + } + available = available(); + if(available == 0) { + // EOF + done = true; + } + } + return buf.toString(); + } + + /** + * Reads up to len bytes of data from the input stream into an array of bytes. An attempt is made to read + * as many as len bytes, but a smaller number may be read. The number of bytes actually read is returned as an integer. + * + * This method blocks until input data is available, end of file is detected, or an exception is thrown. + * + * @param buffer buffer into which data is read. + * @param offset the start offset in array b at which the data is written. + * @param length the maximum number of bytes to read. + * @return the total number of bytes read into the buffer, or -1 if there is no more data because the end of + * the stream has been reached. + */ + @Override + public int read(final byte[] buffer, int offset, int length) throws IOException { + final int originalLength = length; + while (length > 0) { + final int available = available(); + if (available == 0) { + // Signal EOF to caller + if (originalLength == length) { + return -1; + } + break; + } + final int copyLength = Math.min(length, available); + System.arraycopy(mCurrentBlock.mBlock, mCurrentOffset, buffer, offset, copyLength); + mCurrentOffset += copyLength; + offset += copyLength; + length -= copyLength; + } + return originalLength - length; + } + + /** + * Seek to the given position in the file. Note that pos is a special virtual file pointer, + * not an actual byte offset. + * + * @param pos virtual file pointer position + * @throws IOException if stream is closed or not a file based stream + */ + public void seek(final long pos) throws IOException { + // Must be before the mFile == null check because mFile == null for closed files and streams + if (mIsClosed) { + throw new IOException(CANNOT_SEEK_CLOSED_STREAM_MSG); + } + + // Cannot seek on streams that are not file based + if (mFile == null) { + throw new IOException(CANNOT_SEEK_STREAM_MSG); + } + + // Decode virtual file pointer + // Upper 48 bits is the byte offset into the compressed stream of a + // block. + // Lower 16 bits is the byte offset into the uncompressed stream inside + // the block. + final long compressedOffset = BGZFFilePointerUtil.getBlockAddress(pos); + final int uncompressedOffset = BGZFFilePointerUtil.getBlockOffset(pos); + final int available; + if (mCurrentBlock != null && mCurrentBlock.mBlockAddress == compressedOffset) { + available = mCurrentBlock.mBlock.length; + } else { + prepareForSeek(); + mFile.seek(compressedOffset); + mStreamOffset = compressedOffset; + mCurrentBlock = nextBlock(getBufferForReuse(mCurrentBlock)); + mCurrentOffset = 0; + available = available(); + } + if (uncompressedOffset > available || (uncompressedOffset == available && !eof())) { + throw new IOException(INVALID_FILE_PTR_MSG + pos + " for " + getSource()); + } + mCurrentOffset = uncompressedOffset; + } + + /** + * Performs cleanup required before seek is called on the underlying stream + */ + protected void prepareForSeek() { + } + + private boolean eof() throws IOException { + if (mFile.eof()) { + return true; + } + // If the last remaining block is the size of the EMPTY_GZIP_BLOCK, this is the same as being at EOF. + return (mFile.length() - (mCurrentBlock.mBlockAddress + + mCurrentBlock.mBlockCompressedSize) == BGZFStreamConstants.EMPTY_GZIP_BLOCK.length); + } + + /** + * @return virtual file pointer that can be passed to seek() to return to the current position. This is + * not an actual byte offset, so arithmetic on file pointers cannot be done to determine the distance between + * the two. + */ + public long getFilePointer() { + if (mCurrentBlock == null) { + // Haven't read anything yet = at start of stream + return BGZFFilePointerUtil.makeFilePointer(0, 0); + } + if (mCurrentOffset > 0 && mCurrentOffset == mCurrentBlock.mBlock.length) { + // If current offset is at the end of the current block, file + // pointer should point + // to the beginning of the next block. + return BGZFFilePointerUtil.makeFilePointer(mCurrentBlock.mBlockAddress + mCurrentBlock.mBlockCompressedSize, 0); + } + return BGZFFilePointerUtil.makeFilePointer(mCurrentBlock.mBlockAddress, mCurrentOffset); + } + + public long getPosition() { + return getFilePointer(); + } + + public static long getFileBlock(final long bgzfOffset) { + return BGZFFilePointerUtil.getBlockAddress(bgzfOffset); + } + + /** + * @param stream Must be at start of file. Throws RuntimeException if !stream.markSupported(). + * @return true if the given file looks like a valid BGZF file. + */ + public static boolean isValidFile(final InputStream stream) throws IOException { + if (!stream.markSupported()) { + throw new RuntimeException("Cannot test non-buffered stream"); + } + stream.mark(BGZFStreamConstants.BLOCK_HEADER_LENGTH); + final byte[] buffer = new byte[BGZFStreamConstants.BLOCK_HEADER_LENGTH]; + final int count = readBytes(stream, buffer, 0, BGZFStreamConstants.BLOCK_HEADER_LENGTH); + stream.reset(); + return count == BGZFStreamConstants.BLOCK_HEADER_LENGTH && isValidBlockHeader(buffer); + } + + private static boolean isValidBlockHeader(final byte[] buffer) { + return (buffer[0] == BGZFStreamConstants.GZIP_ID1 && + (buffer[1] & 0xFF) == BGZFStreamConstants.GZIP_ID2 && + (buffer[3] & BGZFStreamConstants.GZIP_FLG) != 0 && + buffer[10] == BGZFStreamConstants.GZIP_XLEN && + buffer[12] == BGZFStreamConstants.BGZF_ID1 && + buffer[13] == BGZFStreamConstants.BGZF_ID2); + } + + private void readBlock() throws IOException { + mCurrentBlock = nextBlock(getBufferForReuse(mCurrentBlock)); + mCurrentOffset = 0; + checkAndRethrowDecompressionException(); + } + /** + * Reads and decompresses the next block + * @param bufferAvailableForReuse decompression buffer available for reuse + * @return next block in the decompressed stream + */ + protected DecompressedBlock nextBlock(byte[] bufferAvailableForReuse) { + return processNextBlock(bufferAvailableForReuse); + } + /** + * Rethrows an exception encountered during decompression + * @throws IOException + */ + private void checkAndRethrowDecompressionException() throws IOException { + if (mCurrentBlock.mException != null) { + if (mCurrentBlock.mException instanceof IOException) { + throw (IOException) mCurrentBlock.mException; + } else if (mCurrentBlock.mException instanceof RuntimeException) { + throw (RuntimeException) mCurrentBlock.mException; + } else { + throw new RuntimeException(mCurrentBlock.mException); + } + } + } + + /** + * Attempt to reuse the buffer of the given block + * @param block owning block + * @return null decompressing buffer to reuse, null if no buffer is available + */ + private byte[] getBufferForReuse(DecompressedBlock block) { + if (block == null) return null; + return block.mBlock; + } + + /** + * Decompress the next block from the input stream. When using asynchronous + * IO, this will be called by the background thread. + * @param bufferAvailableForReuse buffer in which to place decompressed block. A null or + * incorrectly sized buffer will result in the buffer being ignored and + * a new buffer allocated for decompression. + * @return next block in input stream + */ + protected DecompressedBlock processNextBlock(byte[] bufferAvailableForReuse) { + if (mFileBuffer == null) { + mFileBuffer = new byte[BGZFStreamConstants.MAX_COMPRESSED_BLOCK_SIZE]; + } + long blockAddress = mStreamOffset; + try { + final int headerByteCount = readBytes(mFileBuffer, 0, BGZFStreamConstants.BLOCK_HEADER_LENGTH); + mStreamOffset += headerByteCount; + if (headerByteCount == 0) { + // Handle case where there is no empty gzip block at end. + return new DecompressedBlock(blockAddress, new byte[0], 0); + } + if (headerByteCount != BGZFStreamConstants.BLOCK_HEADER_LENGTH) { + return new DecompressedBlock(blockAddress, headerByteCount, new IOException(INCORRECT_HEADER_SIZE_MSG + getSource())); + } + final int blockLength = unpackInt16(mFileBuffer, BGZFStreamConstants.BLOCK_LENGTH_OFFSET) + 1; + if (blockLength < BGZFStreamConstants.BLOCK_HEADER_LENGTH || blockLength > mFileBuffer.length) { + return new DecompressedBlock(blockAddress, blockLength, + new IOException(UNEXPECTED_BLOCK_LENGTH_MSG + blockLength + " for " + getSource())); + } + final int remaining = blockLength - BGZFStreamConstants.BLOCK_HEADER_LENGTH; + final int dataByteCount = readBytes(mFileBuffer, BGZFStreamConstants.BLOCK_HEADER_LENGTH, + remaining); + mStreamOffset += dataByteCount; + if (dataByteCount != remaining) { + return new DecompressedBlock(blockAddress, blockLength, + new BGZFException(PREMATURE_END_MSG + getSource())); + } + final byte[] decompressed = inflateBlock(mFileBuffer, blockLength, bufferAvailableForReuse); + return new DecompressedBlock(blockAddress, decompressed, blockLength); + } catch (IOException e) { + return new DecompressedBlock(blockAddress, 0, e); + } + } + + private byte[] inflateBlock(final byte[] compressedBlock, final int compressedLength, + final byte[] bufferAvailableForReuse) throws IOException { + final int uncompressedLength = unpackInt32(compressedBlock, compressedLength - 4); + if (uncompressedLength < 0) { + throw new BGZFException(getSource() + " has invalid uncompressedLength: " + uncompressedLength); + } + byte[] buffer = bufferAvailableForReuse; + if (buffer == null || uncompressedLength != buffer.length) { + // can't reuse the buffer since the size is incorrect + buffer = new byte[uncompressedLength]; + } + blockGunzipper.unzipBlock(buffer, compressedBlock, compressedLength); + return buffer; + } + + private String getSource() { + return mFile == null ? "data stream" : mFile.getSource(); + } + + private int readBytes(final byte[] buffer, final int offset, final int length) throws IOException { + if (mFile != null) { + return readBytes(mFile, buffer, offset, length); + } else if (mStream != null) { + return readBytes(mStream, buffer, offset, length); + } else { + return 0; + } + } + + private static int readBytes(final SeekableStream file, final byte[] buffer, final int offset, final int length) throws IOException { + int bytesRead = 0; + while (bytesRead < length) { + final int count = file.read(buffer, offset + bytesRead, length - bytesRead); + if (count <= 0) { + break; + } + bytesRead += count; + } + return bytesRead; + } + + private static int readBytes(final InputStream stream, final byte[] buffer, final int offset, final int length) throws IOException { + int bytesRead = 0; + while (bytesRead < length) { + final int count = stream.read(buffer, offset + bytesRead, length - bytesRead); + if (count <= 0) { + break; + } + bytesRead += count; + } + return bytesRead; + } + + private int unpackInt16(final byte[] buffer, final int offset) { + return ((buffer[offset] & 0xFF) | + ((buffer[offset+1] & 0xFF) << 8)); + } + + private int unpackInt32(final byte[] buffer, final int offset) { + return ((buffer[offset] & 0xFF) | + ((buffer[offset+1] & 0xFF) << 8) | + ((buffer[offset+2] & 0xFF) << 16) | + ((buffer[offset+3] & 0xFF) << 24)); + } + + public enum FileTermination {HAS_TERMINATOR_BLOCK, HAS_HEALTHY_LAST_BLOCK, DEFECTIVE} + + /** + * + * @param file the file to check + * @return status of the last compressed block + * @throws IOException + */ + public static FileTermination checkTermination(final File file) throws IOException { + return checkTermination(file.toPath()); + } + + /** + * + * @param path to the file to check + * @return status of the last compressed block + * @throws IOException + */ + public static FileTermination checkTermination(final Path path) throws IOException { + try( final SeekableByteChannel channel = Files.newByteChannel(path, StandardOpenOption.READ) ){ + return checkTermination(channel); + } + } + + /** + * check the status of the final bzgipped block for the given bgzipped resource + * + * @param channel an open channel to read from, + * the channel will remain open and the initial position will be restored when the operation completes + * this makes no guarantee about the state of the channel if an exception is thrown during reading + * + * @return the status of the last compressed black + * @throws IOException + */ + public static FileTermination checkTermination(SeekableByteChannel channel) throws IOException { + final long fileSize = channel.size(); + if (fileSize < BGZFStreamConstants.EMPTY_GZIP_BLOCK.length) { + return FileTermination.DEFECTIVE; + } + final long initialPosition = channel.position(); + boolean exceptionThrown = false; + try { + channel.position(fileSize - BGZFStreamConstants.EMPTY_GZIP_BLOCK.length); + + //Check if the end of the file is an empty gzip block which is used as the terminator for a bgzipped file + final ByteBuffer lastBlockBuffer = ByteBuffer.allocate(BGZFStreamConstants.EMPTY_GZIP_BLOCK.length); + readFully(channel, lastBlockBuffer); + if (Arrays.equals(lastBlockBuffer.array(), BGZFStreamConstants.EMPTY_GZIP_BLOCK)) { + return FileTermination.HAS_TERMINATOR_BLOCK; + } + + //if the last block isn't an empty gzip block, check to see if it is a healthy compressed block or if it's corrupted + final int bufsize = (int) Math.min(fileSize, BGZFStreamConstants.MAX_COMPRESSED_BLOCK_SIZE); + final byte[] bufferArray = new byte[bufsize]; + channel.position(fileSize - bufsize); + readFully(channel, ByteBuffer.wrap(bufferArray)); + for (int i = bufferArray.length - BGZFStreamConstants.EMPTY_GZIP_BLOCK.length; + i >= 0; --i) { + if (!preambleEqual(BGZFStreamConstants.GZIP_BLOCK_PREAMBLE, + bufferArray, i, BGZFStreamConstants.GZIP_BLOCK_PREAMBLE.length)) { + continue; + } + final ByteBuffer byteBuffer = ByteBuffer.wrap(bufferArray, + i + BGZFStreamConstants.GZIP_BLOCK_PREAMBLE.length, + 4); + byteBuffer.order(ByteOrder.LITTLE_ENDIAN); + final int totalBlockSizeMinusOne = byteBuffer.getShort() & 0xFFFF; + if (bufferArray.length - i == totalBlockSizeMinusOne + 1) { + return FileTermination.HAS_HEALTHY_LAST_BLOCK; + } else { + return FileTermination.DEFECTIVE; + } + } + return FileTermination.DEFECTIVE; + } catch (final Throwable e) { + exceptionThrown = true; + throw e; + } finally { + //if an exception was thrown we don't want to reset the position because that would be likely to throw again + //and suppress the initial exception + if(!exceptionThrown) { + channel.position(initialPosition); + } + } + } + + /** + * read as many bytes as dst's capacity into dst or throw if that's not possible + * + * @throws EOFException if channel has fewer bytes available than dst's capacity + */ + static void readFully(SeekableByteChannel channel, ByteBuffer dst) throws IOException { + int totalBytesRead = 0; + final int capacity = dst.capacity(); + while (totalBytesRead < capacity) { + final int bytesRead = channel.read(dst); + if (bytesRead == -1) { + throw new EOFException(); + } + totalBytesRead += bytesRead; + } + } + + public static void assertNonDefectiveFile(final File file) throws IOException { + if (checkTermination(file) == FileTermination.DEFECTIVE) { + throw new BGZFException(file.getAbsolutePath() + " does not have a valid GZIP block at the end of the file."); + } + } + + private static boolean preambleEqual(final byte[] preamble, final byte[] buf, final int startOffset, final int length) { + for (int i = 0; i < length; ++i) { + if (preamble[i] != buf[i + startOffset]) { + return false; + } + } + return true; + } + + protected static class DecompressedBlock { + /** + * Decompressed block + */ + private final byte[] mBlock; + /** + * Compressed size of block (the uncompressed size can be found using + * mBlock.length) + */ + private final int mBlockCompressedSize; + /** + * Stream offset of start of block + */ + private final long mBlockAddress; + /** + * Exception thrown (if any) when attempting to decompress block + */ + private final Exception mException; + + public DecompressedBlock(long blockAddress, byte[] block, int compressedSize) { + mBlock = block; + mBlockAddress = blockAddress; + mBlockCompressedSize = compressedSize; + mException = null; + } + + public DecompressedBlock(long blockAddress, int compressedSize, Exception exception) { + mBlock = new byte[0]; + mBlockAddress = blockAddress; + mBlockCompressedSize = compressedSize; + mException = exception; + } + } +} diff --git a/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/BlockCompressedOutputStream.java b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/BlockCompressedOutputStream.java new file mode 100644 index 0000000..8453f71 --- /dev/null +++ b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/BlockCompressedOutputStream.java @@ -0,0 +1,358 @@ +package org.xbib.io.compress.bgzf; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.zip.CRC32; +import java.util.zip.Deflater; + +/** + * Stream class for a file that is a series of gzip blocks (BGZF format). The caller just treats it as an + * OutputStream, and under the covers a gzip block is written when the amount of uncompressed as-yet-unwritten + * bytes reaches a threshold. + * + * The advantage of BGZF over conventional gzip is that BGZF allows for seeking without having to scan through + * the entire file up to the position being sought. + * + * Note that the flush() method should not be called by client + * unless you know what you're doing, because it forces a gzip block to be written even if the + * number of buffered bytes has not reached threshold. close(), on the other hand, must be called + * when done writing in order to force the last gzip block to be written. + * + * @see http://samtools.sourceforge.net/SAM1.pdf for details of BGZF file format. + */ +public class BlockCompressedOutputStream extends OutputStream { + + private static int defaultCompressionLevel = BGZFStreamConstants.DEFAULT_COMPRESSION_LEVEL; + private static DeflaterFactory defaultDeflaterFactory = new DeflaterFactory(); + + public static void setDefaultCompressionLevel(final int compressionLevel) { + if (compressionLevel < Deflater.NO_COMPRESSION || compressionLevel > Deflater.BEST_COMPRESSION) { + throw new IllegalArgumentException("Invalid compression level: " + compressionLevel); + } + defaultCompressionLevel = compressionLevel; + } + + public static int getDefaultCompressionLevel() { + return defaultCompressionLevel; + } + + /** + * Sets the default {@link DeflaterFactory} that will be used for all instances unless specified otherwise in the constructor. + * If this method is not called the default is a factory that will create the JDK {@link Deflater}. + * @param deflaterFactory non-null default factory. + */ + public static void setDefaultDeflaterFactory(final DeflaterFactory deflaterFactory) { + if (deflaterFactory == null) { + throw new IllegalArgumentException("null deflaterFactory"); + } + defaultDeflaterFactory = deflaterFactory; + } + + public static DeflaterFactory getDefaultDeflaterFactory() { + return defaultDeflaterFactory; + } + + private final BinaryCodec codec; + private final byte[] uncompressedBuffer = new byte[BGZFStreamConstants.DEFAULT_UNCOMPRESSED_BLOCK_SIZE]; + private int numUncompressedBytes = 0; + private final byte[] compressedBuffer = + new byte[BGZFStreamConstants.MAX_COMPRESSED_BLOCK_SIZE - + BGZFStreamConstants.BLOCK_HEADER_LENGTH]; + private final Deflater deflater; + + // A second deflater is created for the very unlikely case where the regular deflation actually makes + // things bigger, and the compressed block is too big. It should be possible to downshift the + // primary deflater to NO_COMPRESSION level, recompress, and then restore it to its original setting, + // but in practice that doesn't work. + // The motivation for deflating at NO_COMPRESSION level is that it will predictably produce compressed + // output that is 10 bytes larger than the input, and the threshold at which a block is generated is such that + // the size of tbe final gzip block will always be <= 64K. This is preferred over the previous method, + // which would attempt to compress up to 64K bytes, and if the resulting compressed block was too large, + // try compressing fewer input bytes (aka "downshifting'). The problem with downshifting is that + // getFilePointer might return an inaccurate value. + // I assume (AW 29-Oct-2013) that there is no value in using hardware-assisted deflater for no-compression mode, + // so just use JDK standard. + private final Deflater noCompressionDeflater = new Deflater(Deflater.NO_COMPRESSION, true); + private final CRC32 crc32 = new CRC32(); + private Path file = null; + private long mBlockAddress = 0; + + /** + * Uses default compression level, which is 5 unless changed by setCompressionLevel + * Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}. + * Use {@link #BlockCompressedOutputStream(File, int, DeflaterFactory)} to specify a custom factory. + */ + public BlockCompressedOutputStream(final String filename) throws FileNotFoundException { + this(filename, defaultCompressionLevel); + } + + /** + * Uses default compression level, which is 5 unless changed by setCompressionLevel + * Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}. + * Use {@link #BlockCompressedOutputStream(File, int, DeflaterFactory)} to specify a custom factory. + */ + public BlockCompressedOutputStream(final File file) throws FileNotFoundException { + this(file, defaultCompressionLevel); + } + + public BlockCompressedOutputStream(final String filename, final int compressionLevel) throws FileNotFoundException { + this(new File(filename), compressionLevel); + } + + public BlockCompressedOutputStream(final File file, final int compressionLevel) throws FileNotFoundException { + this(file, compressionLevel, defaultDeflaterFactory); + } + + public BlockCompressedOutputStream(final File file, final int compressionLevel, final DeflaterFactory deflaterFactory) throws FileNotFoundException { + this.file = file.toPath(); + codec = new BinaryCodec(file, true); + deflater = deflaterFactory.makeDeflater(compressionLevel, true); + } + + /** + * Uses default compression level, which is 5 unless changed by setCompressionLevel + * Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}. + * Use {@link #BlockCompressedOutputStream(OutputStream, File, int, DeflaterFactory)} to specify a custom factory. + */ + public BlockCompressedOutputStream(final OutputStream os) { + this(os, (File)null, defaultCompressionLevel); + } + + /** + * Uses default compression level, which is 5 unless changed by setCompressionLevel + * Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}. + * Use {@link #BlockCompressedOutputStream(OutputStream, File, int, DeflaterFactory)} to specify a custom factory. + * + * @param file may be null + */ + public BlockCompressedOutputStream(final OutputStream os, final Path file) { + this(os, file, defaultCompressionLevel); + } + + /** + * Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}. + * Use {@link #BlockCompressedOutputStream(OutputStream, File, int, DeflaterFactory)} to specify a custom factory. + */ + public BlockCompressedOutputStream(final OutputStream os, final File file, final int compressionLevel) { + this(os, file, compressionLevel, defaultDeflaterFactory); + } + + /** + * Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}. + * Use {@link #BlockCompressedOutputStream(OutputStream, File, int, DeflaterFactory)} to specify a custom factory. + */ + public BlockCompressedOutputStream(final OutputStream os, final Path file, final int compressionLevel) { + this(os, file, compressionLevel, defaultDeflaterFactory); + } + + /** + * Creates the output stream. + * @param os output stream to create a BlockCompressedOutputStream from + * @param file file to which to write the output or null if not available + * @param compressionLevel the compression level (0-9) + * @param deflaterFactory custom factory to create deflaters (overrides the default) + */ + public BlockCompressedOutputStream(final OutputStream os, final File file, final int compressionLevel, final DeflaterFactory deflaterFactory) { + this(os, file != null ? file.toPath() : null, compressionLevel, deflaterFactory); + } + + /** + * Creates the output stream. + * @param os output stream to create a BlockCompressedOutputStream from + * @param file file to which to write the output or null if not available + * @param compressionLevel the compression level (0-9) + * @param deflaterFactory custom factory to create deflaters (overrides the default) + */ + public BlockCompressedOutputStream(final OutputStream os, final Path file, final int compressionLevel, final DeflaterFactory deflaterFactory) { + this.file = file; + codec = new BinaryCodec(os); + if (file != null) { + codec.setOutputFileName(file.toAbsolutePath().toUri().toString()); + } + deflater = deflaterFactory.makeDeflater(compressionLevel, true); + } + + /** + * @param output May or not already be a BlockCompressedOutputStream. + * @return A BlockCompressedOutputStream, either by wrapping the given OutputStream, or by casting if it already + * is a BCOS. + */ + public static BlockCompressedOutputStream maybeBgzfWrapOutputStream(OutputStream output) { + if (!(output instanceof BlockCompressedOutputStream)) { + return new BlockCompressedOutputStream(output); + } else { + return (BlockCompressedOutputStream)output; + } + } + + /** + * Writes b.length bytes from the specified byte array to this output stream. The general contract for write(b) + * is that it should have exactly the same effect as the call write(b, 0, b.length). + * @param bytes the data + */ + @Override + public void write(final byte[] bytes) throws IOException { + write(bytes, 0, bytes.length); + } + + /** + * Writes len bytes from the specified byte array starting at offset off to this output stream. The general + * contract for write(b, off, len) is that some of the bytes in the array b are written to the output stream in order; + * element b[off] is the first byte written and b[off+len-1] is the last byte written by this operation. + * + * @param bytes the data + * @param startIndex the start offset in the data + * @param numBytes the number of bytes to write + */ + @Override + public void write(final byte[] bytes, int startIndex, int numBytes) throws IOException { + while (numBytes > 0) { + final int bytesToWrite = Math.min(uncompressedBuffer.length - numUncompressedBytes, numBytes); + System.arraycopy(bytes, startIndex, uncompressedBuffer, numUncompressedBytes, bytesToWrite); + numUncompressedBytes += bytesToWrite; + startIndex += bytesToWrite; + numBytes -= bytesToWrite; + if (numUncompressedBytes == uncompressedBuffer.length) { + deflateBlock(); + } + } + } + + @Override + public void write(final int b) throws IOException { + uncompressedBuffer[numUncompressedBytes++] = (byte) b; + if (numUncompressedBytes == uncompressedBuffer.length) deflateBlock(); + } + + /** + * WARNING: flush() affects the output format, because it causes the current contents of uncompressedBuffer + * to be compressed and written, even if it isn't full. Unless you know what you're doing, don't call flush(). + * Instead, call close(), which will flush any unwritten data before closing the underlying stream. + * + */ + @Override + public void flush() throws IOException { + while (numUncompressedBytes > 0) { + deflateBlock(); + } + codec.getOutputStream().flush(); + } + + /** + * close() must be called in order to flush any remaining buffered bytes. An unclosed file will likely be + * defective. + * + */ + @Override + public void close() throws IOException { + close(true); + } + + public void close(final boolean writeTerminatorBlock) throws IOException { + flush(); + // For debugging... + // if (numberOfThrottleBacks > 0) { + // System.err.println("In BlockCompressedOutputStream, had to throttle back " + numberOfThrottleBacks + + // " times for file " + codec.getOutputFileName()); + // } + if (writeTerminatorBlock) { + codec.writeBytes(BGZFStreamConstants.EMPTY_GZIP_BLOCK); + } + codec.close(); + + // If a terminator block was written, ensure that it's there and valid + if (writeTerminatorBlock) { + // Can't re-open something that is not a regular file, e.g. a named pipe or an output stream + if (this.file == null || !Files.isRegularFile(this.file)) return; + if (BlockCompressedInputStream.checkTermination(this.file) != + BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK) { + throw new IOException("Terminator block not found after closing BGZF file " + this.file); + } + } + } + + /** Encode virtual file pointer + * Upper 48 bits is the byte offset into the compressed stream of a block. + * Lower 16 bits is the byte offset into the uncompressed stream inside the block. + */ + public long getFilePointer(){ + return BGZFFilePointerUtil.makeFilePointer(mBlockAddress, numUncompressedBytes); + } + + public long getPosition() { + return getFilePointer(); + } + + /** + * Attempt to write the data in uncompressedBuffer to the underlying file in a gzip block. + * If the entire uncompressedBuffer does not fit in the maximum allowed size, reduce the amount + * of data to be compressed, and slide the excess down in uncompressedBuffer so it can be picked + * up in the next deflate event. + * @return size of gzip block that was written. + */ + private int deflateBlock() throws IOException { + if (numUncompressedBytes == 0) { + return 0; + } + final int bytesToCompress = numUncompressedBytes; + // Compress the input + deflater.reset(); + deflater.setInput(uncompressedBuffer, 0, bytesToCompress); + deflater.finish(); + int compressedSize = deflater.deflate(compressedBuffer, 0, compressedBuffer.length); + + // If it didn't all fit in compressedBuffer.length, set compression level to NO_COMPRESSION + // and try again. This should always fit. + if (!deflater.finished()) { + noCompressionDeflater.reset(); + noCompressionDeflater.setInput(uncompressedBuffer, 0, bytesToCompress); + noCompressionDeflater.finish(); + compressedSize = noCompressionDeflater.deflate(compressedBuffer, 0, compressedBuffer.length); + if (!noCompressionDeflater.finished()) { + throw new IllegalStateException("unpossible"); + } + } + // Data compressed small enough, so write it out. + crc32.reset(); + crc32.update(uncompressedBuffer, 0, bytesToCompress); + + final int totalBlockSize = writeGzipBlock(compressedSize, bytesToCompress, crc32.getValue()); + + // Clear out from uncompressedBuffer the data that was written + numUncompressedBytes = 0; + mBlockAddress += totalBlockSize; + return totalBlockSize; + } + + /** + * Writes the entire gzip block, assuming the compressed data is stored in compressedBuffer + * @return size of gzip block that was written. + */ + private int writeGzipBlock(final int compressedSize, final int uncompressedSize, final long crc) throws IOException { + // Init gzip header + codec.writeByte(BGZFStreamConstants.GZIP_ID1); + codec.writeByte(BGZFStreamConstants.GZIP_ID2); + codec.writeByte(BGZFStreamConstants.GZIP_CM_DEFLATE); + codec.writeByte(BGZFStreamConstants.GZIP_FLG); + codec.writeInt(0); // Modification time + codec.writeByte(BGZFStreamConstants.GZIP_XFL); + codec.writeByte(BGZFStreamConstants.GZIP_OS_UNKNOWN); + codec.writeShort(BGZFStreamConstants.GZIP_XLEN); + codec.writeByte(BGZFStreamConstants.BGZF_ID1); + codec.writeByte(BGZFStreamConstants.BGZF_ID2); + codec.writeShort(BGZFStreamConstants.BGZF_LEN); + final int totalBlockSize = compressedSize + BGZFStreamConstants.BLOCK_HEADER_LENGTH + + BGZFStreamConstants.BLOCK_FOOTER_LENGTH; + + // I don't know why we store block size - 1, but that is what the spec says + codec.writeShort((short)(totalBlockSize - 1)); + codec.writeBytes(compressedBuffer, 0, compressedSize); + codec.writeInt((int)crc); + codec.writeInt(uncompressedSize); + return totalBlockSize; + } +} diff --git a/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/BlockGunzipper.java b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/BlockGunzipper.java new file mode 100644 index 0000000..e67fca8 --- /dev/null +++ b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/BlockGunzipper.java @@ -0,0 +1,114 @@ +package org.xbib.io.compress.bgzf; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.zip.CRC32; +import java.util.zip.DataFormatException; +import java.util.zip.Inflater; + +/** + * For decompressing GZIP blocks that are already loaded into a byte[]. + * The main advantage is that this object can be used over and over again to decompress many blocks, + * whereas a new GZIPInputStream and ByteArrayInputStream would otherwise need to be created for each + * block to be decompressed. + * + * This code requires that the GZIP header conform to the GZIP blocks written to BAM files, with + * a specific subfield and no other optional stuff. + */ +public class BlockGunzipper { + private static InflaterFactory defaultInflaterFactory = new InflaterFactory(); + private final Inflater inflater; + private final CRC32 crc32 = new CRC32(); + private boolean checkCrcs = false; + BlockGunzipper() { + inflater = defaultInflaterFactory.makeInflater(true); // GZIP mode + } + + /** + * Create a BlockGunzipper using the provided inflaterFactory + * @param inflaterFactory + */ + BlockGunzipper(InflaterFactory inflaterFactory) { + inflater = inflaterFactory.makeInflater(true); // GZIP mode + } + + /** + * Sets the default {@link InflaterFactory} that will be used for all instances unless specified otherwise in the constructor. + * If this method is not called the default is a factory that will create the JDK {@link Inflater}. + * @param inflaterFactory non-null default factory. + */ + public static void setDefaultInflaterFactory(final InflaterFactory inflaterFactory) { + if (inflaterFactory == null) { + throw new IllegalArgumentException("null inflaterFactory"); + } + defaultInflaterFactory = inflaterFactory; + } + + public static InflaterFactory getDefaultInflaterFactory() { + return defaultInflaterFactory; + } + /** Allows the caller to decide whether or not to check CRCs on when uncompressing blocks. */ + public void setCheckCrcs(final boolean check) { + this.checkCrcs = check; + } + + /** + * Decompress GZIP-compressed data + * @param uncompressedBlock must be big enough to hold decompressed output. + * @param compressedBlock compressed data starting at offset 0 + * @param compressedLength size of compressed data, possibly less than the size of the buffer. + */ + void unzipBlock(byte[] uncompressedBlock, byte[] compressedBlock, int compressedLength) { + try { + ByteBuffer byteBuffer = ByteBuffer.wrap(compressedBlock, 0, compressedLength); + byteBuffer.order(ByteOrder.LITTLE_ENDIAN); + + // Validate GZIP header + if (byteBuffer.get() != BGZFStreamConstants.GZIP_ID1 || + byteBuffer.get() != (byte)BGZFStreamConstants.GZIP_ID2 || + byteBuffer.get() != BGZFStreamConstants.GZIP_CM_DEFLATE || + byteBuffer.get() != BGZFStreamConstants.GZIP_FLG + ) { + throw new BGZFFormatException("Invalid GZIP header"); + } + // Skip MTIME, XFL, OS fields + byteBuffer.position(byteBuffer.position() + 6); + if (byteBuffer.getShort() != BGZFStreamConstants.GZIP_XLEN) { + throw new BGZFFormatException("Invalid GZIP header"); + } + // Skip blocksize subfield intro + byteBuffer.position(byteBuffer.position() + 4); + // Read ushort + final int totalBlockSize = (byteBuffer.getShort() & 0xffff) + 1; + if (totalBlockSize != compressedLength) { + throw new BGZFFormatException("GZIP blocksize disagreement"); + } + + // Read expected size and CRD from end of GZIP block + final int deflatedSize = compressedLength - BGZFStreamConstants.BLOCK_HEADER_LENGTH - BGZFStreamConstants.BLOCK_FOOTER_LENGTH; + byteBuffer.position(byteBuffer.position() + deflatedSize); + int expectedCrc = byteBuffer.getInt(); + int uncompressedSize = byteBuffer.getInt(); + inflater.reset(); + + // Decompress + inflater.setInput(compressedBlock, BGZFStreamConstants.BLOCK_HEADER_LENGTH, deflatedSize); + final int inflatedBytes = inflater.inflate(uncompressedBlock, 0, uncompressedSize); + if (inflatedBytes != uncompressedSize) { + throw new BGZFFormatException("Did not inflate expected amount"); + } + + // Validate CRC if so desired + if (this.checkCrcs) { + crc32.reset(); + crc32.update(uncompressedBlock, 0, uncompressedSize); + final long crc = crc32.getValue(); + if ((int)crc != expectedCrc) { + throw new BGZFFormatException("CRC mismatch"); + } + } + } catch (DataFormatException e) { + throw new BGZFException(e); + } + } +} diff --git a/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/DeflaterFactory.java b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/DeflaterFactory.java new file mode 100644 index 0000000..da1605d --- /dev/null +++ b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/DeflaterFactory.java @@ -0,0 +1,24 @@ +package org.xbib.io.compress.bgzf; + +import java.util.zip.Deflater; + +/** + * Factory for {@link Deflater} objects used by {@link BlockCompressedOutputStream}. + * This class may be extended to provide alternative deflaters (e.g., for improved performance). + */ +public class DeflaterFactory { + + public DeflaterFactory() { + //Note: made explicit constructor to make searching for references easier + } + + /** + * Returns a deflater object that will be used when writing BAM files. + * Subclasses may override to provide their own deflater implementation. + * @param compressionLevel the compression level (0-9) + * @param gzipCompatible if true then use GZIP compatible compression + */ + public Deflater makeDeflater(final int compressionLevel, final boolean gzipCompatible) { + return new Deflater(compressionLevel, gzipCompatible); + } +} diff --git a/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/InflaterFactory.java b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/InflaterFactory.java new file mode 100644 index 0000000..1cdc4da --- /dev/null +++ b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/InflaterFactory.java @@ -0,0 +1,20 @@ +package org.xbib.io.compress.bgzf; + +import java.util.zip.Inflater; + +/** + * Factory for {@link Inflater} objects used by {@link BlockGunzipper}. + * This class may be extended to provide alternative inflaters (e.g., for improved performance). + * The default implementation returns a JDK {@link Inflater} + */ +public class InflaterFactory { + /** + * Returns an inflater object that will be used when reading DEFLATE compressed files. + * Subclasses may override to provide their own inflater implementation. + * The default implementation returns a JDK {@link Inflater} + * @param gzipCompatible if true then use GZIP compatible compression + */ + public Inflater makeInflater(final boolean gzipCompatible) { + return new Inflater(gzipCompatible); + } +} diff --git a/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/SeekableBufferedStream.java b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/SeekableBufferedStream.java new file mode 100644 index 0000000..0f13d85 --- /dev/null +++ b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/SeekableBufferedStream.java @@ -0,0 +1,105 @@ +package org.xbib.io.compress.bgzf; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; + +/** + * A wrapper class to provide buffered read access to a SeekableStream. Just wrapping such a stream with + * a BufferedInputStream will not work as it does not support seeking. In this implementation a + * seek call is delegated to the wrapped stream, and the buffer reset. + */ +public class SeekableBufferedStream extends SeekableStream { + + /** Little extension to buffered input stream to give access to the available bytes in the buffer. */ + private static class ExtBufferedInputStream extends BufferedInputStream { + private ExtBufferedInputStream(final InputStream inputStream, final int i) { + super(inputStream, i); + } + + /** Returns the number of bytes that can be read from the buffer without reading more into the buffer. */ + int getBytesInBufferAvailable() { + if (this.count == this.pos) return 0; // documented test for "is buffer empty" + else return this.buf.length - this.pos; + } + } + + public static final int DEFAULT_BUFFER_SIZE = 512000; + + final private int bufferSize; + final SeekableStream wrappedStream; + ExtBufferedInputStream bufferedStream; + long position; + + public SeekableBufferedStream(final SeekableStream stream, final int bufferSize) { + this.bufferSize = bufferSize; + this.wrappedStream = stream; + this.position = 0; + bufferedStream = new ExtBufferedInputStream(wrappedStream, bufferSize); + } + + public SeekableBufferedStream(final SeekableStream stream) { + this(stream, DEFAULT_BUFFER_SIZE); + } + + @Override + public long length() { + return wrappedStream.length(); + } + + @Override + public long skip(final long skipLength) throws IOException { + if (skipLength < this.bufferedStream.getBytesInBufferAvailable()) { + final long retval = this.bufferedStream.skip(skipLength); + this.position += retval; + return retval; + } else { + final long position = this.position + skipLength; + seek(position); + return skipLength; + } + } + + @Override + public void seek(final long position) throws IOException { + this.position = position; + wrappedStream.seek(position); + bufferedStream = new ExtBufferedInputStream(wrappedStream, bufferSize); + } + + @Override + public int read() throws IOException { + int b = bufferedStream.read(); + position++; + return b; + } + + @Override + public int read(final byte[] buffer, final int offset, final int length) throws IOException { + final int nBytesRead = bufferedStream.read(buffer, offset, length); + if (nBytesRead > 0) { + position += nBytesRead; + } + return nBytesRead; + } + + @Override + public void close() throws IOException { + wrappedStream.close(); + } + + @Override + public boolean eof() throws IOException { + return position >= wrappedStream.length(); + } + + @Override + public String getSource() { + return wrappedStream.getSource(); + } + + @Override + public long position() throws IOException { + return position; + } +} diff --git a/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/SeekableFileStream.java b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/SeekableFileStream.java new file mode 100644 index 0000000..8a80470 --- /dev/null +++ b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/SeekableFileStream.java @@ -0,0 +1,110 @@ +package org.xbib.io.compress.bgzf; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; + +public class SeekableFileStream extends SeekableStream { + + /** + * Collection of all open instances. SeekableFileStream objects are usually open and kept open for the + * duration of a session. This collection supports a method to close them all. + */ + private static final Collection allInstances = Collections.synchronizedCollection(new HashSet<>()); + + File file; + RandomAccessFile fis; + + public SeekableFileStream(final File file) throws FileNotFoundException { + this.file = file; + fis = new RandomAccessFile(file, "r"); + allInstances.add(this); + } + + @Override + public long length() { + return file.length(); + } + + @Override + public boolean eof() throws IOException { + return fis.length() == fis.getFilePointer(); + } + + @Override + public void seek(final long position) throws IOException { + fis.seek(position); + } + + @Override + public long position() throws IOException { + return fis.getChannel().position(); + } + + @Override + public long skip(long n) throws IOException { + long initPos = position(); + fis.getChannel().position(initPos + n); + return position() - initPos; + } + + @Override + public int read(final byte[] buffer, final int offset, final int length) throws IOException { + if (length < 0) { + throw new IndexOutOfBoundsException(); + } + int n = 0; + while (n < length) { + final int count = fis.read(buffer, offset + n, length - n); + if (count < 0) { + if (n > 0) { + return n; + } else { + return count; + } + } + n += count; + } + return n; + + } + + @Override + public int read() throws IOException { + return fis.read(); + } + + @Override + public int read(byte[] b) throws IOException { + return fis.read(b); + } + + @Override + public String getSource() { + return file.getAbsolutePath(); + } + + + @Override + public void close() throws IOException { + allInstances.remove(this); + fis.close(); + + } + + public static synchronized void closeAllInstances() { + Collection clonedInstances = new HashSet<>(allInstances); + for (SeekableFileStream sfs : clonedInstances) { + try { + sfs.close(); + } catch (IOException e) { + // + } + } + allInstances.clear(); + } +} diff --git a/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/SeekableStream.java b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/SeekableStream.java new file mode 100644 index 0000000..8af3c4c --- /dev/null +++ b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/SeekableStream.java @@ -0,0 +1,44 @@ +package org.xbib.io.compress.bgzf; + +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; + +public abstract class SeekableStream extends InputStream { + + public abstract long length(); + + public abstract long position() throws IOException; + + public abstract void seek(long position) throws IOException; + + @Override + public abstract int read(byte[] buffer, int offset, int length) throws IOException; + + @Override + public abstract void close() throws IOException; + + public abstract boolean eof() throws IOException; + + /** + * @return String representation of source (e.g. URL, file path, etc.), or null if not available. + */ + public abstract String getSource(); + + /** + * Read enough bytes to fill the input buffer. + * @param b byte array + * @throws EOFException If EOF is reached before buffer is filled + */ + public void readFully(byte[] b) throws IOException { + int len = b.length; + int n = 0; + while (n < len) { + int count = read(b, n, len - n); + if (count < 0){ + throw new EOFException(); + } + n += count; + } + } +} diff --git a/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/SeekableStreamFactory.java b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/SeekableStreamFactory.java new file mode 100644 index 0000000..4acb8b4 --- /dev/null +++ b/io-compress-bgzf/src/main/java/org/xbib/io/compress/bgzf/SeekableStreamFactory.java @@ -0,0 +1,11 @@ +package org.xbib.io.compress.bgzf; + +import java.io.File; +import java.io.IOException; + +public class SeekableStreamFactory { + + public static SeekableStream getStreamFor(String path) throws IOException { + return new SeekableFileStream(new File(path)); + } +} diff --git a/settings.gradle b/settings.gradle index 6d59c8e..3166302 100644 --- a/settings.gradle +++ b/settings.gradle @@ -1,3 +1,4 @@ +include 'io-compress-bgzf' include 'io-compress-bzip2' include 'io-compress-lzf' include 'io-compress-xz'