update to gradle 7.3.2, add bgzf (MIT License)
This commit is contained in:
parent
9ca8990bf0
commit
337e1c19c0
24 changed files with 2588 additions and 113 deletions
|
@ -1,5 +1,5 @@
|
||||||
group = org.xbib
|
group = org.xbib
|
||||||
name = archive
|
name = archive
|
||||||
version = 1.0.1
|
version = 1.1.0
|
||||||
|
|
||||||
gradle.wrapper.version = 6.6.1
|
gradle.wrapper.version = 7.3.2
|
||||||
|
|
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
Binary file not shown.
2
gradle/wrapper/gradle-wrapper.properties
vendored
2
gradle/wrapper/gradle-wrapper.properties
vendored
|
@ -1,5 +1,5 @@
|
||||||
distributionBase=GRADLE_USER_HOME
|
distributionBase=GRADLE_USER_HOME
|
||||||
distributionPath=wrapper/dists
|
distributionPath=wrapper/dists
|
||||||
distributionUrl=https\://services.gradle.org/distributions/gradle-6.6.1-all.zip
|
distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.2-all.zip
|
||||||
zipStoreBase=GRADLE_USER_HOME
|
zipStoreBase=GRADLE_USER_HOME
|
||||||
zipStorePath=wrapper/dists
|
zipStorePath=wrapper/dists
|
||||||
|
|
269
gradlew
vendored
269
gradlew
vendored
|
@ -1,7 +1,7 @@
|
||||||
#!/usr/bin/env sh
|
#!/bin/sh
|
||||||
|
|
||||||
#
|
#
|
||||||
# Copyright 2015 the original author or authors.
|
# Copyright © 2015-2021 the original authors.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
# you may not use this file except in compliance with the License.
|
||||||
|
@ -17,67 +17,101 @@
|
||||||
#
|
#
|
||||||
|
|
||||||
##############################################################################
|
##############################################################################
|
||||||
##
|
#
|
||||||
## Gradle start up script for UN*X
|
# Gradle start up script for POSIX generated by Gradle.
|
||||||
##
|
#
|
||||||
|
# Important for running:
|
||||||
|
#
|
||||||
|
# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
|
||||||
|
# noncompliant, but you have some other compliant shell such as ksh or
|
||||||
|
# bash, then to run this script, type that shell name before the whole
|
||||||
|
# command line, like:
|
||||||
|
#
|
||||||
|
# ksh Gradle
|
||||||
|
#
|
||||||
|
# Busybox and similar reduced shells will NOT work, because this script
|
||||||
|
# requires all of these POSIX shell features:
|
||||||
|
# * functions;
|
||||||
|
# * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
|
||||||
|
# «${var#prefix}», «${var%suffix}», and «$( cmd )»;
|
||||||
|
# * compound commands having a testable exit status, especially «case»;
|
||||||
|
# * various built-in commands including «command», «set», and «ulimit».
|
||||||
|
#
|
||||||
|
# Important for patching:
|
||||||
|
#
|
||||||
|
# (2) This script targets any POSIX shell, so it avoids extensions provided
|
||||||
|
# by Bash, Ksh, etc; in particular arrays are avoided.
|
||||||
|
#
|
||||||
|
# The "traditional" practice of packing multiple parameters into a
|
||||||
|
# space-separated string is a well documented source of bugs and security
|
||||||
|
# problems, so this is (mostly) avoided, by progressively accumulating
|
||||||
|
# options in "$@", and eventually passing that to Java.
|
||||||
|
#
|
||||||
|
# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
|
||||||
|
# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
|
||||||
|
# see the in-line comments for details.
|
||||||
|
#
|
||||||
|
# There are tweaks for specific operating systems such as AIX, CygWin,
|
||||||
|
# Darwin, MinGW, and NonStop.
|
||||||
|
#
|
||||||
|
# (3) This script is generated from the Groovy template
|
||||||
|
# https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
|
||||||
|
# within the Gradle project.
|
||||||
|
#
|
||||||
|
# You can find Gradle at https://github.com/gradle/gradle/.
|
||||||
|
#
|
||||||
##############################################################################
|
##############################################################################
|
||||||
|
|
||||||
# Attempt to set APP_HOME
|
# Attempt to set APP_HOME
|
||||||
|
|
||||||
# Resolve links: $0 may be a link
|
# Resolve links: $0 may be a link
|
||||||
PRG="$0"
|
app_path=$0
|
||||||
# Need this for relative symlinks.
|
|
||||||
while [ -h "$PRG" ] ; do
|
# Need this for daisy-chained symlinks.
|
||||||
ls=`ls -ld "$PRG"`
|
while
|
||||||
link=`expr "$ls" : '.*-> \(.*\)$'`
|
APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
|
||||||
if expr "$link" : '/.*' > /dev/null; then
|
[ -h "$app_path" ]
|
||||||
PRG="$link"
|
do
|
||||||
else
|
ls=$( ls -ld "$app_path" )
|
||||||
PRG=`dirname "$PRG"`"/$link"
|
link=${ls#*' -> '}
|
||||||
fi
|
case $link in #(
|
||||||
|
/*) app_path=$link ;; #(
|
||||||
|
*) app_path=$APP_HOME$link ;;
|
||||||
|
esac
|
||||||
done
|
done
|
||||||
SAVED="`pwd`"
|
|
||||||
cd "`dirname \"$PRG\"`/" >/dev/null
|
APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
|
||||||
APP_HOME="`pwd -P`"
|
|
||||||
cd "$SAVED" >/dev/null
|
|
||||||
|
|
||||||
APP_NAME="Gradle"
|
APP_NAME="Gradle"
|
||||||
APP_BASE_NAME=`basename "$0"`
|
APP_BASE_NAME=${0##*/}
|
||||||
|
|
||||||
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||||
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
|
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
|
||||||
|
|
||||||
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
||||||
MAX_FD="maximum"
|
MAX_FD=maximum
|
||||||
|
|
||||||
warn () {
|
warn () {
|
||||||
echo "$*"
|
echo "$*"
|
||||||
}
|
} >&2
|
||||||
|
|
||||||
die () {
|
die () {
|
||||||
echo
|
echo
|
||||||
echo "$*"
|
echo "$*"
|
||||||
echo
|
echo
|
||||||
exit 1
|
exit 1
|
||||||
}
|
} >&2
|
||||||
|
|
||||||
# OS specific support (must be 'true' or 'false').
|
# OS specific support (must be 'true' or 'false').
|
||||||
cygwin=false
|
cygwin=false
|
||||||
msys=false
|
msys=false
|
||||||
darwin=false
|
darwin=false
|
||||||
nonstop=false
|
nonstop=false
|
||||||
case "`uname`" in
|
case "$( uname )" in #(
|
||||||
CYGWIN* )
|
CYGWIN* ) cygwin=true ;; #(
|
||||||
cygwin=true
|
Darwin* ) darwin=true ;; #(
|
||||||
;;
|
MSYS* | MINGW* ) msys=true ;; #(
|
||||||
Darwin* )
|
NONSTOP* ) nonstop=true ;;
|
||||||
darwin=true
|
|
||||||
;;
|
|
||||||
MINGW* )
|
|
||||||
msys=true
|
|
||||||
;;
|
|
||||||
NONSTOP* )
|
|
||||||
nonstop=true
|
|
||||||
;;
|
|
||||||
esac
|
esac
|
||||||
|
|
||||||
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
||||||
|
@ -87,9 +121,9 @@ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
||||||
if [ -n "$JAVA_HOME" ] ; then
|
if [ -n "$JAVA_HOME" ] ; then
|
||||||
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
|
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
|
||||||
# IBM's JDK on AIX uses strange locations for the executables
|
# IBM's JDK on AIX uses strange locations for the executables
|
||||||
JAVACMD="$JAVA_HOME/jre/sh/java"
|
JAVACMD=$JAVA_HOME/jre/sh/java
|
||||||
else
|
else
|
||||||
JAVACMD="$JAVA_HOME/bin/java"
|
JAVACMD=$JAVA_HOME/bin/java
|
||||||
fi
|
fi
|
||||||
if [ ! -x "$JAVACMD" ] ; then
|
if [ ! -x "$JAVACMD" ] ; then
|
||||||
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
|
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
|
||||||
|
@ -98,7 +132,7 @@ Please set the JAVA_HOME variable in your environment to match the
|
||||||
location of your Java installation."
|
location of your Java installation."
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
JAVACMD="java"
|
JAVACMD=java
|
||||||
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||||
|
|
||||||
Please set the JAVA_HOME variable in your environment to match the
|
Please set the JAVA_HOME variable in your environment to match the
|
||||||
|
@ -106,80 +140,95 @@ location of your Java installation."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Increase the maximum file descriptors if we can.
|
# Increase the maximum file descriptors if we can.
|
||||||
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
|
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
|
||||||
MAX_FD_LIMIT=`ulimit -H -n`
|
case $MAX_FD in #(
|
||||||
if [ $? -eq 0 ] ; then
|
max*)
|
||||||
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
|
MAX_FD=$( ulimit -H -n ) ||
|
||||||
MAX_FD="$MAX_FD_LIMIT"
|
warn "Could not query maximum file descriptor limit"
|
||||||
fi
|
esac
|
||||||
ulimit -n $MAX_FD
|
case $MAX_FD in #(
|
||||||
if [ $? -ne 0 ] ; then
|
'' | soft) :;; #(
|
||||||
warn "Could not set maximum file descriptor limit: $MAX_FD"
|
*)
|
||||||
fi
|
ulimit -n "$MAX_FD" ||
|
||||||
else
|
warn "Could not set maximum file descriptor limit to $MAX_FD"
|
||||||
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# For Darwin, add options to specify how the application appears in the dock
|
|
||||||
if $darwin; then
|
|
||||||
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
|
|
||||||
fi
|
|
||||||
|
|
||||||
# For Cygwin or MSYS, switch paths to Windows format before running java
|
|
||||||
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
|
|
||||||
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
|
|
||||||
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
|
|
||||||
|
|
||||||
JAVACMD=`cygpath --unix "$JAVACMD"`
|
|
||||||
|
|
||||||
# We build the pattern for arguments to be converted via cygpath
|
|
||||||
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
|
|
||||||
SEP=""
|
|
||||||
for dir in $ROOTDIRSRAW ; do
|
|
||||||
ROOTDIRS="$ROOTDIRS$SEP$dir"
|
|
||||||
SEP="|"
|
|
||||||
done
|
|
||||||
OURCYGPATTERN="(^($ROOTDIRS))"
|
|
||||||
# Add a user-defined pattern to the cygpath arguments
|
|
||||||
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
|
|
||||||
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
|
|
||||||
fi
|
|
||||||
# Now convert the arguments - kludge to limit ourselves to /bin/sh
|
|
||||||
i=0
|
|
||||||
for arg in "$@" ; do
|
|
||||||
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
|
|
||||||
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
|
|
||||||
|
|
||||||
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
|
|
||||||
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
|
|
||||||
else
|
|
||||||
eval `echo args$i`="\"$arg\""
|
|
||||||
fi
|
|
||||||
i=`expr $i + 1`
|
|
||||||
done
|
|
||||||
case $i in
|
|
||||||
0) set -- ;;
|
|
||||||
1) set -- "$args0" ;;
|
|
||||||
2) set -- "$args0" "$args1" ;;
|
|
||||||
3) set -- "$args0" "$args1" "$args2" ;;
|
|
||||||
4) set -- "$args0" "$args1" "$args2" "$args3" ;;
|
|
||||||
5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
|
|
||||||
6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
|
|
||||||
7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
|
|
||||||
8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
|
|
||||||
9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
|
|
||||||
esac
|
esac
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Escape application args
|
# Collect all arguments for the java command, stacking in reverse order:
|
||||||
save () {
|
# * args from the command line
|
||||||
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
|
# * the main class name
|
||||||
echo " "
|
# * -classpath
|
||||||
}
|
# * -D...appname settings
|
||||||
APP_ARGS=`save "$@"`
|
# * --module-path (only if needed)
|
||||||
|
# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
|
||||||
|
|
||||||
# Collect all arguments for the java command, following the shell quoting and substitution rules
|
# For Cygwin or MSYS, switch paths to Windows format before running java
|
||||||
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
|
if "$cygwin" || "$msys" ; then
|
||||||
|
APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
|
||||||
|
CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
|
||||||
|
|
||||||
|
JAVACMD=$( cygpath --unix "$JAVACMD" )
|
||||||
|
|
||||||
|
# Now convert the arguments - kludge to limit ourselves to /bin/sh
|
||||||
|
for arg do
|
||||||
|
if
|
||||||
|
case $arg in #(
|
||||||
|
-*) false ;; # don't mess with options #(
|
||||||
|
/?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
|
||||||
|
[ -e "$t" ] ;; #(
|
||||||
|
*) false ;;
|
||||||
|
esac
|
||||||
|
then
|
||||||
|
arg=$( cygpath --path --ignore --mixed "$arg" )
|
||||||
|
fi
|
||||||
|
# Roll the args list around exactly as many times as the number of
|
||||||
|
# args, so each arg winds up back in the position where it started, but
|
||||||
|
# possibly modified.
|
||||||
|
#
|
||||||
|
# NB: a `for` loop captures its iteration list before it begins, so
|
||||||
|
# changing the positional parameters here affects neither the number of
|
||||||
|
# iterations, nor the values presented in `arg`.
|
||||||
|
shift # remove old arg
|
||||||
|
set -- "$@" "$arg" # push replacement arg
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Collect all arguments for the java command;
|
||||||
|
# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
|
||||||
|
# shell script including quotes and variable substitutions, so put them in
|
||||||
|
# double quotes to make sure that they get re-expanded; and
|
||||||
|
# * put everything else in single quotes, so that it's not re-expanded.
|
||||||
|
|
||||||
|
set -- \
|
||||||
|
"-Dorg.gradle.appname=$APP_BASE_NAME" \
|
||||||
|
-classpath "$CLASSPATH" \
|
||||||
|
org.gradle.wrapper.GradleWrapperMain \
|
||||||
|
"$@"
|
||||||
|
|
||||||
|
# Use "xargs" to parse quoted args.
|
||||||
|
#
|
||||||
|
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
|
||||||
|
#
|
||||||
|
# In Bash we could simply go:
|
||||||
|
#
|
||||||
|
# readarray ARGS < <( xargs -n1 <<<"$var" ) &&
|
||||||
|
# set -- "${ARGS[@]}" "$@"
|
||||||
|
#
|
||||||
|
# but POSIX shell has neither arrays nor command substitution, so instead we
|
||||||
|
# post-process each arg (as a line of input to sed) to backslash-escape any
|
||||||
|
# character that might be a shell metacharacter, then use eval to reverse
|
||||||
|
# that process (while maintaining the separation between arguments), and wrap
|
||||||
|
# the whole thing up as a single "set" statement.
|
||||||
|
#
|
||||||
|
# This will of course break if any of these variables contains a newline or
|
||||||
|
# an unmatched quote.
|
||||||
|
#
|
||||||
|
|
||||||
|
eval "set -- $(
|
||||||
|
printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
|
||||||
|
xargs -n1 |
|
||||||
|
sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
|
||||||
|
tr '\n' ' '
|
||||||
|
)" '"$@"'
|
||||||
|
|
||||||
exec "$JAVACMD" "$@"
|
exec "$JAVACMD" "$@"
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
dependencies {
|
dependencies {
|
||||||
api project(':io-archive')
|
api project(':io-archive')
|
||||||
|
implementation project(':io-compress-bgzf')
|
||||||
implementation project(':io-compress-bzip2')
|
implementation project(':io-compress-bzip2')
|
||||||
implementation project(':io-compress-lzf')
|
implementation project(':io-compress-lzf')
|
||||||
implementation project(':io-compress-xz')
|
implementation project(':io-compress-xz')
|
||||||
|
|
|
@ -2,11 +2,13 @@ module org.xbib.io.codec {
|
||||||
uses org.xbib.io.codec.StreamCodec;
|
uses org.xbib.io.codec.StreamCodec;
|
||||||
exports org.xbib.io.codec;
|
exports org.xbib.io.codec;
|
||||||
exports org.xbib.io.codec.ar;
|
exports org.xbib.io.codec.ar;
|
||||||
|
exports org.xbib.io.codec.bgzf;
|
||||||
exports org.xbib.io.codec.cpio;
|
exports org.xbib.io.codec.cpio;
|
||||||
exports org.xbib.io.codec.file;
|
exports org.xbib.io.codec.file;
|
||||||
exports org.xbib.io.codec.jar;
|
exports org.xbib.io.codec.jar;
|
||||||
exports org.xbib.io.codec.tar;
|
exports org.xbib.io.codec.tar;
|
||||||
exports org.xbib.io.codec.zip;
|
exports org.xbib.io.codec.zip;
|
||||||
|
requires transitive org.xbib.io.compress.bgzf;
|
||||||
requires org.xbib.io.compress.bzip;
|
requires org.xbib.io.compress.bzip;
|
||||||
requires org.xbib.io.compress.lzf;
|
requires org.xbib.io.compress.lzf;
|
||||||
requires org.xbib.io.compress.xz;
|
requires org.xbib.io.compress.xz;
|
||||||
|
|
|
@ -0,0 +1,37 @@
|
||||||
|
package org.xbib.io.codec.bgzf;
|
||||||
|
|
||||||
|
import org.xbib.io.codec.StreamCodec;
|
||||||
|
import org.xbib.io.compress.bgzf.BlockCompressedInputStream;
|
||||||
|
import org.xbib.io.compress.bgzf.BlockCompressedOutputStream;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
|
||||||
|
public class BzgfStreamCodec implements StreamCodec<BlockCompressedInputStream, BlockCompressedOutputStream> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getName() {
|
||||||
|
return "bgzf";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BlockCompressedInputStream decode(InputStream in) throws IOException {
|
||||||
|
return new BlockCompressedInputStream(in);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BlockCompressedInputStream decode(InputStream in, int bufsize) throws IOException {
|
||||||
|
return new BlockCompressedInputStream(in);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BlockCompressedOutputStream encode(OutputStream out) throws IOException {
|
||||||
|
return new BlockCompressedOutputStream(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BlockCompressedOutputStream encode(OutputStream out, int bufsize) throws IOException {
|
||||||
|
return new BlockCompressedOutputStream(out);
|
||||||
|
}
|
||||||
|
}
|
21
io-compress-bgzf/LICENSE.txt
Normal file
21
io-compress-bgzf/LICENSE.txt
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
/*
|
||||||
|
* The MIT License
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
3
io-compress-bgzf/src/main/java/module-info.java
Normal file
3
io-compress-bgzf/src/main/java/module-info.java
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
module org.xbib.io.compress.bgzf {
|
||||||
|
exports org.xbib.io.compress.bgzf;
|
||||||
|
}
|
|
@ -0,0 +1,19 @@
|
||||||
|
package org.xbib.io.compress.bgzf;
|
||||||
|
|
||||||
|
@SuppressWarnings("serial")
|
||||||
|
public class BGZFException extends RuntimeException {
|
||||||
|
|
||||||
|
public BGZFException() {}
|
||||||
|
|
||||||
|
public BGZFException(final String s) {
|
||||||
|
super(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
public BGZFException(final String s, final Throwable throwable) {
|
||||||
|
super(s, throwable);
|
||||||
|
}
|
||||||
|
|
||||||
|
public BGZFException(final Throwable throwable) {
|
||||||
|
super(throwable);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,67 @@
|
||||||
|
|
||||||
|
package org.xbib.io.compress.bgzf;
|
||||||
|
|
||||||
|
public class BGZFFilePointerUtil {
|
||||||
|
|
||||||
|
private static final int SHIFT_AMOUNT = 16;
|
||||||
|
private static final int OFFSET_MASK = 0xffff;
|
||||||
|
private static final long ADDRESS_MASK = 0xFFFFFFFFFFFFL;
|
||||||
|
|
||||||
|
public static final long MAX_BLOCK_ADDRESS = ADDRESS_MASK;
|
||||||
|
public static final int MAX_OFFSET = OFFSET_MASK;
|
||||||
|
|
||||||
|
public static int compare(final long vfp1, final long vfp2) {
|
||||||
|
if (vfp1 == vfp2) return 0;
|
||||||
|
// When treating as unsigned, negative number is > positive.
|
||||||
|
if (vfp1 < 0 && vfp2 >= 0) return 1;
|
||||||
|
if (vfp1 >= 0 && vfp2 < 0) return -1;
|
||||||
|
// Either both negative or both non-negative, so regular comparison works.
|
||||||
|
if (vfp1 < vfp2) return -1;
|
||||||
|
return 1; // vfp1 > vfp2
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return true if vfp2 points to somewhere in the same BGZF block, or the one immediately
|
||||||
|
* following vfp1's BGZF block.
|
||||||
|
*/
|
||||||
|
public static boolean areInSameOrAdjacentBlocks(final long vfp1, final long vfp2) {
|
||||||
|
final long block1 = getBlockAddress(vfp1);
|
||||||
|
final long block2 = getBlockAddress(vfp2);
|
||||||
|
return (block1 == block2 || block1 + 1 == block2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param blockAddress File offset of start of BGZF block.
|
||||||
|
* @param blockOffset Offset into uncompressed block.
|
||||||
|
* @return Virtual file pointer that embodies the input parameters.
|
||||||
|
*/
|
||||||
|
static long makeFilePointer(final long blockAddress, final int blockOffset) {
|
||||||
|
if (blockOffset < 0) {
|
||||||
|
throw new IllegalArgumentException("Negative blockOffset " + blockOffset
|
||||||
|
+ " not allowed.");
|
||||||
|
}
|
||||||
|
if (blockAddress < 0) {
|
||||||
|
throw new IllegalArgumentException("Negative blockAddress " + blockAddress
|
||||||
|
+ " not allowed.");
|
||||||
|
}
|
||||||
|
if (blockOffset > MAX_OFFSET) {
|
||||||
|
throw new IllegalArgumentException("blockOffset " + blockOffset + " too large.");
|
||||||
|
}
|
||||||
|
if (blockAddress > MAX_BLOCK_ADDRESS) {
|
||||||
|
throw new IllegalArgumentException("blockAddress " + blockAddress + " too large.");
|
||||||
|
}
|
||||||
|
return blockAddress << SHIFT_AMOUNT | blockOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static long getBlockAddress(final long virtualFilePointer) {
|
||||||
|
return (virtualFilePointer >> SHIFT_AMOUNT) & ADDRESS_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int getBlockOffset(final long virtualFilePointer) {
|
||||||
|
return (int)(virtualFilePointer & OFFSET_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String asString(final long vfp) {
|
||||||
|
return String.format("%d(0x%x): (block address: %d, offset: %d)", vfp, vfp, getBlockAddress(vfp), getBlockOffset(vfp));
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,19 @@
|
||||||
|
package org.xbib.io.compress.bgzf;
|
||||||
|
|
||||||
|
@SuppressWarnings("serial")
|
||||||
|
public class BGZFFormatException extends BGZFException {
|
||||||
|
|
||||||
|
public BGZFFormatException() {}
|
||||||
|
|
||||||
|
public BGZFFormatException(final String s) {
|
||||||
|
super(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
public BGZFFormatException(final String s, final Throwable throwable) {
|
||||||
|
super(s, throwable);
|
||||||
|
}
|
||||||
|
|
||||||
|
public BGZFFormatException(final Throwable throwable) {
|
||||||
|
super(throwable);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,95 @@
|
||||||
|
package org.xbib.io.compress.bgzf;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constants shared by BlockCompressed{Input,Output}Stream classes
|
||||||
|
*/
|
||||||
|
public class BGZFStreamConstants {
|
||||||
|
|
||||||
|
// Number of bytes in the gzip block before the deflated data.
|
||||||
|
// This is not the standard header size, because we include one optional subfield,
|
||||||
|
// but it is the standard for us.
|
||||||
|
public static final int BLOCK_HEADER_LENGTH = 18;
|
||||||
|
|
||||||
|
// Location in the gzip block of the total block size (actually total block size - 1)
|
||||||
|
public static final int BLOCK_LENGTH_OFFSET = 16;
|
||||||
|
|
||||||
|
// Number of bytes that follow the deflated data
|
||||||
|
public static final int BLOCK_FOOTER_LENGTH = 8;
|
||||||
|
|
||||||
|
// We require that a compressed block (including header and footer, be <= this)
|
||||||
|
public static final int MAX_COMPRESSED_BLOCK_SIZE = 64 * 1024;
|
||||||
|
|
||||||
|
// Gzip overhead is the header, the footer, and the block size (encoded as a short).
|
||||||
|
public static final int GZIP_OVERHEAD = BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH + 2;
|
||||||
|
|
||||||
|
// If Deflater has compression level == NO_COMPRESSION, 10 bytes of overhead (determined experimentally).
|
||||||
|
public static final int NO_COMPRESSION_OVERHEAD = 10;
|
||||||
|
|
||||||
|
// Push out a gzip block when this many uncompressed bytes have been accumulated.
|
||||||
|
// This size is selected so that if data is not compressible, if Deflater is given
|
||||||
|
// compression level == NO_COMPRESSION, compressed size is guaranteed to be <= MAX_COMPRESSED_BLOCK_SIZE.
|
||||||
|
public static final int DEFAULT_UNCOMPRESSED_BLOCK_SIZE = 64 * 1024 - (GZIP_OVERHEAD + NO_COMPRESSION_OVERHEAD);
|
||||||
|
|
||||||
|
// Magic numbers
|
||||||
|
public static final byte GZIP_ID1 = 31;
|
||||||
|
public static final int GZIP_ID2 = 139;
|
||||||
|
|
||||||
|
// FEXTRA flag means there are optional fields
|
||||||
|
public static final int GZIP_FLG = 4;
|
||||||
|
|
||||||
|
// extra flags
|
||||||
|
public static final int GZIP_XFL = 0;
|
||||||
|
|
||||||
|
// length of extra subfield
|
||||||
|
public static final short GZIP_XLEN = 6;
|
||||||
|
|
||||||
|
// The deflate compression, which is customarily used by gzip
|
||||||
|
public static final byte GZIP_CM_DEFLATE = 8;
|
||||||
|
|
||||||
|
public static final int DEFAULT_COMPRESSION_LEVEL = 5;
|
||||||
|
|
||||||
|
// We don't care about OS because we're not doing line terminator translation
|
||||||
|
public static final int GZIP_OS_UNKNOWN = 255;
|
||||||
|
|
||||||
|
// The subfield ID
|
||||||
|
public static final byte BGZF_ID1 = 66;
|
||||||
|
public static final byte BGZF_ID2 = 67;
|
||||||
|
|
||||||
|
// subfield length in bytes
|
||||||
|
public static final byte BGZF_LEN = 2;
|
||||||
|
|
||||||
|
public static final byte[] EMPTY_GZIP_BLOCK = {
|
||||||
|
BGZFStreamConstants.GZIP_ID1,
|
||||||
|
(byte)BGZFStreamConstants.GZIP_ID2,
|
||||||
|
BGZFStreamConstants.GZIP_CM_DEFLATE,
|
||||||
|
BGZFStreamConstants.GZIP_FLG,
|
||||||
|
0, 0, 0, 0, // Modification time
|
||||||
|
BGZFStreamConstants.GZIP_XFL,
|
||||||
|
(byte)BGZFStreamConstants.GZIP_OS_UNKNOWN,
|
||||||
|
BGZFStreamConstants.GZIP_XLEN, 0, // Little-endian short
|
||||||
|
BGZFStreamConstants.BGZF_ID1,
|
||||||
|
BGZFStreamConstants.BGZF_ID2,
|
||||||
|
BGZFStreamConstants.BGZF_LEN, 0, // Little-endian short
|
||||||
|
// Total block size - 1
|
||||||
|
BGZFStreamConstants.BLOCK_HEADER_LENGTH +
|
||||||
|
BGZFStreamConstants.BLOCK_FOOTER_LENGTH - 1 + 2, 0, // Little-endian short
|
||||||
|
// Dummy payload?
|
||||||
|
3, 0,
|
||||||
|
0, 0, 0, 0, // crc
|
||||||
|
0, 0, 0, 0, // uncompressedSize
|
||||||
|
};
|
||||||
|
|
||||||
|
public static final byte[] GZIP_BLOCK_PREAMBLE = {
|
||||||
|
BGZFStreamConstants.GZIP_ID1,
|
||||||
|
(byte)BGZFStreamConstants.GZIP_ID2,
|
||||||
|
BGZFStreamConstants.GZIP_CM_DEFLATE,
|
||||||
|
BGZFStreamConstants.GZIP_FLG,
|
||||||
|
0, 0, 0, 0, // Modification time
|
||||||
|
BGZFStreamConstants.GZIP_XFL,
|
||||||
|
(byte)BGZFStreamConstants.GZIP_OS_UNKNOWN,
|
||||||
|
BGZFStreamConstants.GZIP_XLEN, 0, // Little-endian short
|
||||||
|
BGZFStreamConstants.BGZF_ID1,
|
||||||
|
BGZFStreamConstants.BGZF_ID2,
|
||||||
|
BGZFStreamConstants.BGZF_LEN, 0, // Little-endian short
|
||||||
|
};
|
||||||
|
}
|
|
@ -0,0 +1,666 @@
|
||||||
|
package org.xbib.io.compress.bgzf;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.EOFException;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.io.SyncFailedException;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encapsulates file representation of various primitive data types. Forces little-endian disk
|
||||||
|
* representation. Note that this class is currently not very efficient. There are plans to increase
|
||||||
|
* the size of the ByteBuffer, and move data between the ByteBuffer and the underlying input or
|
||||||
|
* output stream in larger chunks.
|
||||||
|
*
|
||||||
|
* All the read methods throw EOFException if the input stream is exhausted before the
|
||||||
|
* required number of bytes are read.
|
||||||
|
*/
|
||||||
|
public class BinaryCodec {
|
||||||
|
|
||||||
|
// Outstream to write to
|
||||||
|
private OutputStream outputStream;
|
||||||
|
// If a file or filename was given it will be stored here. Used for error reporting.
|
||||||
|
private String outputFileName;
|
||||||
|
|
||||||
|
// Input stream to read from
|
||||||
|
private InputStream inputStream;
|
||||||
|
// If a file or filename was give to read from it will be stored here. Used for error reporting.
|
||||||
|
private String inputFileName;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Mode that the BinaryCodec is in. It is either writing to a binary file or reading from. This
|
||||||
|
* is set to true if it is writing to a binary file Right now we don't support reading and
|
||||||
|
* writing to the same file with the same BinaryCodec instance
|
||||||
|
*/
|
||||||
|
private boolean isWriting;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For byte swapping.
|
||||||
|
*/
|
||||||
|
private ByteBuffer byteBuffer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For reading Strings of known length, this can reduce object creation
|
||||||
|
*/
|
||||||
|
private final byte[] scratchBuffer = new byte[16];
|
||||||
|
|
||||||
|
// Byte order used in BAM files.
|
||||||
|
private static final ByteOrder LITTLE_ENDIAN = ByteOrder.LITTLE_ENDIAN;
|
||||||
|
private static final byte[] NULL_BYTE = {0};
|
||||||
|
|
||||||
|
private static final long MAX_UBYTE = (Byte.MAX_VALUE * 2) + 1;
|
||||||
|
private static final long MAX_USHORT = (Short.MAX_VALUE * 2) + 1;
|
||||||
|
private static final long MAX_UINT = ((long)Integer.MAX_VALUE * 2) + 1;
|
||||||
|
|
||||||
|
// We never serialize more than this much at a time (except for Strings)
|
||||||
|
private static final int MAX_BYTE_BUFFER = 8;
|
||||||
|
/**
|
||||||
|
* Constructs BinaryCodec from a file and set it's mode to writing or not
|
||||||
|
*
|
||||||
|
* @param file file to be written to or read from
|
||||||
|
* @param writing whether the file is being written to
|
||||||
|
* @throws FileNotFoundException
|
||||||
|
*/
|
||||||
|
public BinaryCodec(final File file, final boolean writing) throws FileNotFoundException {
|
||||||
|
this();
|
||||||
|
|
||||||
|
this.isWriting = writing;
|
||||||
|
if (this.isWriting) {
|
||||||
|
this.outputStream = new FileOutputStream(file);
|
||||||
|
this.outputFileName = file.getName();
|
||||||
|
} else {
|
||||||
|
this.inputStream = new FileInputStream(file);
|
||||||
|
this.inputFileName = file.getName();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs BinaryCodec from a file name and set it's mode to writing or not
|
||||||
|
*
|
||||||
|
* @param fileName name of the file to be written to or read from
|
||||||
|
* @param writing writing whether the file is being written to
|
||||||
|
* @throws FileNotFoundException
|
||||||
|
*/
|
||||||
|
public BinaryCodec(final String fileName, final boolean writing) throws FileNotFoundException {
|
||||||
|
this(new File(fileName), writing);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs BinaryCodec from an output stream
|
||||||
|
*
|
||||||
|
* @param outputStream Stream to write to, since it's an output stream we know that isWriting
|
||||||
|
* should be set to true
|
||||||
|
*/
|
||||||
|
public BinaryCodec(final OutputStream outputStream) {
|
||||||
|
this();
|
||||||
|
setOutputStream(outputStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs BinaryCodec from an input stream
|
||||||
|
*
|
||||||
|
* @param inputStream Stream to read from, since we are reading isWriting is set to false
|
||||||
|
*/
|
||||||
|
public BinaryCodec(final InputStream inputStream) {
|
||||||
|
this();
|
||||||
|
setInputStream(inputStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ambiguous whether reading or writing until set{In,Out}putStream is called
|
||||||
|
*/
|
||||||
|
public BinaryCodec() {
|
||||||
|
initByteBuffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shared among ctors. Note that if endianness is changed, all the unsigned methods must also be
|
||||||
|
* changed.
|
||||||
|
*/
|
||||||
|
private void initByteBuffer() {
|
||||||
|
byteBuffer = ByteBuffer.allocate(MAX_BYTE_BUFFER);
|
||||||
|
byteBuffer.order(LITTLE_ENDIAN);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write whatever has been put into the byte buffer
|
||||||
|
*
|
||||||
|
* @param numBytes -- how much to write. Note that in case of writing an unsigned value, more
|
||||||
|
* bytes were put into the ByteBuffer than will get written out.
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
private void writeByteBuffer(final int numBytes) throws IOException {
|
||||||
|
assert (numBytes <= byteBuffer.limit());
|
||||||
|
writeBytes(byteBuffer.array(), 0, numBytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes a byte to the output buffer
|
||||||
|
*
|
||||||
|
* @param bite byte array to write
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void writeByte(final byte bite) throws IOException {
|
||||||
|
byteBuffer.clear();
|
||||||
|
byteBuffer.put(bite);
|
||||||
|
writeByteBuffer(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void writeByte(final int b) throws IOException {
|
||||||
|
writeByte((byte)b);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes a byte array to the output buffer
|
||||||
|
*
|
||||||
|
* @param bytes value to write
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void writeBytes(final byte[] bytes) throws IOException {
|
||||||
|
writeBytes(bytes, 0, bytes.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void writeBytes(final byte[] bytes, final int startOffset, final int numBytes) throws IOException {
|
||||||
|
if (!isWriting) {
|
||||||
|
throw new IllegalStateException("Calling write method on BinaryCodec open for read.");
|
||||||
|
}
|
||||||
|
|
||||||
|
outputStream.write(bytes, startOffset, numBytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write a 32-bit int to the output stream
|
||||||
|
*
|
||||||
|
* @param value int to write
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void writeInt(final int value) throws IOException {
|
||||||
|
byteBuffer.clear();
|
||||||
|
byteBuffer.putInt(value);
|
||||||
|
writeByteBuffer(4);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write a double (8 bytes) to the output stream
|
||||||
|
*
|
||||||
|
* @param value double to write
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void writeDouble(final double value) throws IOException {
|
||||||
|
byteBuffer.clear();
|
||||||
|
byteBuffer.putDouble(value);
|
||||||
|
writeByteBuffer(8);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write a 64-bit long to the output stream
|
||||||
|
*
|
||||||
|
* @param value long to write
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void writeLong(final long value) throws IOException {
|
||||||
|
byteBuffer.clear();
|
||||||
|
byteBuffer.putLong(value);
|
||||||
|
writeByteBuffer(8);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write a 16-bit short to output stream
|
||||||
|
*
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void writeShort(final short value) throws IOException {
|
||||||
|
byteBuffer.clear();
|
||||||
|
byteBuffer.putShort(value);
|
||||||
|
writeByteBuffer(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write a float (4 bytes) to the output stream
|
||||||
|
*
|
||||||
|
* @param value float to write
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void writeFloat(final float value) throws IOException {
|
||||||
|
byteBuffer.clear();
|
||||||
|
byteBuffer.putFloat(value);
|
||||||
|
writeByteBuffer(4);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes a boolean (1 byte) to the output buffer
|
||||||
|
*
|
||||||
|
* @param value boolean to write
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void writeBoolean(final boolean value) throws IOException {
|
||||||
|
byteBuffer.clear();
|
||||||
|
byteBuffer.put(value ? (byte)1 : (byte)0);
|
||||||
|
writeByteBuffer(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes a string to the buffer as ASCII bytes
|
||||||
|
*
|
||||||
|
* @param value string to write to buffer
|
||||||
|
* @param writeLength prefix the string with the length as a 32-bit int
|
||||||
|
* @param appendNull add a null byte to the end of the string
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void writeString(final String value, final boolean writeLength, final boolean appendNull) throws IOException {
|
||||||
|
if (writeLength) {
|
||||||
|
int lengthToWrite = value.length();
|
||||||
|
if (appendNull) lengthToWrite++;
|
||||||
|
writeInt(lengthToWrite);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Actually writes the string to a buffer
|
||||||
|
writeString(value);
|
||||||
|
|
||||||
|
if (appendNull) writeBytes(NULL_BYTE);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write a string to the buffer as ASCII bytes
|
||||||
|
*
|
||||||
|
* @param value string to write
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
private void writeString(final String value) throws IOException {
|
||||||
|
final byte[] byteBuffer = new byte[value.length()];
|
||||||
|
final char[] charBuffer = value.toCharArray();
|
||||||
|
for (int i = 0; i < charBuffer.length; ++i) {
|
||||||
|
byteBuffer[i] = (byte)(charBuffer[i] & 0xff);
|
||||||
|
}
|
||||||
|
writeBytes(byteBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write an 8-bit unsigned byte. NOTE: This method will break if we change to big-endian.
|
||||||
|
*
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void writeUByte(final short val) throws IOException {
|
||||||
|
if (val < 0) {
|
||||||
|
throw new IllegalArgumentException("Negative value (" + val
|
||||||
|
+ ") passed to unsigned writing method.");
|
||||||
|
}
|
||||||
|
if (val > MAX_UBYTE) {
|
||||||
|
throw new IllegalArgumentException("Value (" + val
|
||||||
|
+ ") to large to be written as ubyte.");
|
||||||
|
}
|
||||||
|
byteBuffer.clear();
|
||||||
|
byteBuffer.putShort(val);
|
||||||
|
writeByteBuffer(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write a 16-bit unsigned short. NOTE: This method will break if we change to big-endian.
|
||||||
|
*
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void writeUShort(final int val) throws IOException {
|
||||||
|
if (val < 0) {
|
||||||
|
throw new IllegalArgumentException("Negative value (" + val
|
||||||
|
+ ") passed to unsigned writing method.");
|
||||||
|
}
|
||||||
|
if (val > MAX_USHORT) {
|
||||||
|
throw new IllegalArgumentException("Value (" + val
|
||||||
|
+ ") to large to be written as ushort.");
|
||||||
|
}
|
||||||
|
byteBuffer.clear();
|
||||||
|
byteBuffer.putInt(val);
|
||||||
|
writeByteBuffer(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write a 32-bit unsigned int. NOTE: This method will break if we change to big-endian.
|
||||||
|
*
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void writeUInt(final long val) throws IOException {
|
||||||
|
if (val < 0) {
|
||||||
|
throw new IllegalArgumentException("Negative value (" + val
|
||||||
|
+ ") passed to unsigned writing method.");
|
||||||
|
}
|
||||||
|
if (val > MAX_UINT) {
|
||||||
|
throw new IllegalArgumentException("Value (" + val
|
||||||
|
+ ") to large to be written as uint.");
|
||||||
|
}
|
||||||
|
byteBuffer.clear();
|
||||||
|
byteBuffer.putLong(val);
|
||||||
|
writeByteBuffer(4);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read a byte array from the input stream.
|
||||||
|
*
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void readBytes(final byte[] buffer) throws IOException {
|
||||||
|
readBytes(buffer, 0, buffer.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read a byte array from the input stream
|
||||||
|
*
|
||||||
|
* @param buffer where to put bytes read
|
||||||
|
* @param offset offset to start putting bytes into buffer
|
||||||
|
* @param length number of bytes to read
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void readBytes(final byte[] buffer, final int offset, final int length) throws IOException {
|
||||||
|
int totalNumRead = 0;
|
||||||
|
do {
|
||||||
|
final int numRead =
|
||||||
|
readBytesOrFewer(buffer, offset + totalNumRead, length - totalNumRead);
|
||||||
|
if (numRead < 0) {
|
||||||
|
throw new EOFException(constructErrorMessage("Premature EOF"));
|
||||||
|
} else {
|
||||||
|
totalNumRead += numRead;
|
||||||
|
}
|
||||||
|
} while (totalNumRead < length);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a byte array from the input stream.
|
||||||
|
*
|
||||||
|
* @param buffer where to put bytes read
|
||||||
|
* @param offset offset to start putting bytes into buffer
|
||||||
|
* @param length number of bytes to read. Fewer bytes may be read if EOF is reached before
|
||||||
|
* length bytes have been read.
|
||||||
|
* @return the total number of bytes read into the buffer, or -1 if there is no more data
|
||||||
|
* because the end of the stream has been reached.
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public int readBytesOrFewer(final byte[] buffer, final int offset, final int length) throws IOException {
|
||||||
|
if (isWriting) {
|
||||||
|
throw new IllegalStateException("Calling read method on BinaryCodec open for write.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return inputStream.read(buffer, offset, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return a single byte read from the input stream.
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public byte readByte() throws IOException {
|
||||||
|
if (isWriting) {
|
||||||
|
throw new IllegalStateException("Calling read method on BinaryCodec open for write.");
|
||||||
|
}
|
||||||
|
|
||||||
|
final int ret = inputStream.read();
|
||||||
|
if (ret == -1) {
|
||||||
|
throw new EOFException(constructErrorMessage("Premature EOF"));
|
||||||
|
}
|
||||||
|
return (byte)ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return true if it is possible to know for sure if at EOF, and it is known for sure. If the
|
||||||
|
* input stream is a ByteArrayInputStream, this is faster than causing a
|
||||||
|
* RuntimeEOFException to be thrown.
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public boolean knownAtEof() throws IOException {
|
||||||
|
if (isWriting) {
|
||||||
|
throw new IllegalStateException(
|
||||||
|
"Calling knownAtEof method on BinaryCodec open for write.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return inputStream instanceof ByteArrayInputStream && inputStream.available() == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read a string off the input stream, as ASCII bytes
|
||||||
|
*
|
||||||
|
* @param length length of string to read
|
||||||
|
* @return String read from stream
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public String readString(final int length) throws IOException {
|
||||||
|
final byte[] buffer;
|
||||||
|
// Recycle single buffer if possible
|
||||||
|
if (length <= scratchBuffer.length) {
|
||||||
|
buffer = scratchBuffer;
|
||||||
|
} else {
|
||||||
|
buffer = new byte[length];
|
||||||
|
|
||||||
|
}
|
||||||
|
readBytes(buffer, 0, length);
|
||||||
|
|
||||||
|
final char[] charBuffer = new char[length];
|
||||||
|
for (int i = 0; i < length; ++i) {
|
||||||
|
charBuffer[i] = (char)buffer[i];
|
||||||
|
}
|
||||||
|
return new String(charBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read ASCII bytes from the input stream until a null byte is read
|
||||||
|
*
|
||||||
|
* @return String constructed from the ASCII bytes read
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public String readNullTerminatedString() throws IOException {
|
||||||
|
final StringBuilder ret = new StringBuilder();
|
||||||
|
for (byte b = this.readByte(); b != 0; b = this.readByte()) {
|
||||||
|
ret.append((char)(b & 0xff));
|
||||||
|
}
|
||||||
|
return ret.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read an int length, and then a String of that length
|
||||||
|
*
|
||||||
|
* @param devourNull if true, the length include a null terminator, which is read and discarded
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public String readLengthAndString(final boolean devourNull) throws IOException {
|
||||||
|
int length = readInt();
|
||||||
|
if (devourNull) {
|
||||||
|
--length;
|
||||||
|
}
|
||||||
|
final String ret = readString(length);
|
||||||
|
if (devourNull) {
|
||||||
|
readByte();
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void readByteBuffer(final int numBytes) throws IOException {
|
||||||
|
assert (numBytes <= byteBuffer.capacity());
|
||||||
|
readBytes(byteBuffer.array(), 0, numBytes);
|
||||||
|
byteBuffer.limit(byteBuffer.capacity());
|
||||||
|
byteBuffer.position(numBytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read an int off the input stream
|
||||||
|
*
|
||||||
|
* @return int from input stream
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public int readInt() throws IOException {
|
||||||
|
readByteBuffer(4);
|
||||||
|
byteBuffer.flip();
|
||||||
|
return byteBuffer.getInt();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a double off the input stream
|
||||||
|
*
|
||||||
|
* @return double
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public double readDouble() throws IOException {
|
||||||
|
readByteBuffer(8);
|
||||||
|
byteBuffer.flip();
|
||||||
|
return byteBuffer.getDouble();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a long off the input stream
|
||||||
|
*
|
||||||
|
* @return long
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public long readLong() throws IOException {
|
||||||
|
readByteBuffer(8);
|
||||||
|
byteBuffer.flip();
|
||||||
|
return byteBuffer.getLong();
|
||||||
|
}
|
||||||
|
|
||||||
|
public short readShort() throws IOException {
|
||||||
|
readByteBuffer(2);
|
||||||
|
byteBuffer.flip();
|
||||||
|
return byteBuffer.getShort();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a float off the input stream
|
||||||
|
*
|
||||||
|
* @return float
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public float readFloat() throws IOException {
|
||||||
|
readByteBuffer(4);
|
||||||
|
byteBuffer.flip();
|
||||||
|
return byteBuffer.getFloat();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a boolean off the input stream, represented as a byte with value 1 or 0
|
||||||
|
*
|
||||||
|
* @return boolean
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public boolean readBoolean() throws IOException {
|
||||||
|
return ((readByte()) == 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads an 8-bit unsigned byte from the input stream. This method assumes little-endianness.
|
||||||
|
*
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public short readUByte() throws IOException {
|
||||||
|
readByteBuffer(1);
|
||||||
|
byteBuffer.put((byte)0);
|
||||||
|
byteBuffer.flip();
|
||||||
|
return byteBuffer.getShort();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a 16-bit unsigned short from the input stream. This method assumes little-endianness.
|
||||||
|
*
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public int readUShort() throws IOException {
|
||||||
|
readByteBuffer(2);
|
||||||
|
byteBuffer.putShort((short)0);
|
||||||
|
byteBuffer.flip();
|
||||||
|
return byteBuffer.getInt();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a 32-bit unsigned int from the input stream. This method assumes little-endianness.
|
||||||
|
*
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public long readUInt() throws IOException {
|
||||||
|
readByteBuffer(4);
|
||||||
|
byteBuffer.putInt(0);
|
||||||
|
byteBuffer.flip();
|
||||||
|
return byteBuffer.getLong();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close the appropriate stream
|
||||||
|
*
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void close() throws IOException {
|
||||||
|
if (this.isWriting) {
|
||||||
|
// To the degree possible, make sure the bytes get forced to the file system,
|
||||||
|
// or else cause an exception to be thrown.
|
||||||
|
if (this.outputStream instanceof FileOutputStream) {
|
||||||
|
this.outputStream.flush();
|
||||||
|
FileOutputStream fos = (FileOutputStream)this.outputStream;
|
||||||
|
try {
|
||||||
|
fos.getFD().sync();
|
||||||
|
} catch (SyncFailedException e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.outputStream.close();
|
||||||
|
} else {
|
||||||
|
this.inputStream.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String constructErrorMessage(final String msg) {
|
||||||
|
final StringBuilder sb = new StringBuilder(msg);
|
||||||
|
sb.append("; BinaryCodec in ");
|
||||||
|
sb.append(isWriting ? "write" : "read");
|
||||||
|
sb.append("mode; ");
|
||||||
|
final String filename = isWriting ? outputFileName : inputFileName;
|
||||||
|
if (filename != null) {
|
||||||
|
sb.append("file: ");
|
||||||
|
sb.append(filename);
|
||||||
|
} else {
|
||||||
|
sb.append("streamed file (filename not available)");
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getInputFileName() {
|
||||||
|
return inputFileName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getOutputFileName() {
|
||||||
|
return outputFileName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOutputFileName(final String outputFileName) {
|
||||||
|
this.outputFileName = outputFileName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setInputFileName(final String inputFileName) {
|
||||||
|
this.inputFileName = inputFileName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isWriting() {
|
||||||
|
return isWriting;
|
||||||
|
}
|
||||||
|
|
||||||
|
public OutputStream getOutputStream() {
|
||||||
|
return outputStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
public InputStream getInputStream() {
|
||||||
|
return inputStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setInputStream(final InputStream is) {
|
||||||
|
isWriting = false;
|
||||||
|
this.inputStream = is;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOutputStream(final OutputStream os) {
|
||||||
|
isWriting = true;
|
||||||
|
this.outputStream = os;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,709 @@
|
||||||
|
package org.xbib.io.compress.bgzf;
|
||||||
|
|
||||||
|
import java.io.BufferedInputStream;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.EOFException;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
import java.nio.channels.SeekableByteChannel;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.StandardOpenOption;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stream class for reading BGZF block compressed files. The caller can treat this file like any other InputStream.
|
||||||
|
* It probably is not necessary to wrap this stream in a buffering stream, because there is internal buffering.
|
||||||
|
* The advantage of BGZF over conventional GZip format is that BGZF allows for seeking without having to read the
|
||||||
|
* entire file up to the location being sought. Note that seeking is only possible if the input stream is seekable.
|
||||||
|
*
|
||||||
|
* Note that this implementation is not synchronized. If multiple threads access an instance concurrently, it must be synchronized externally.
|
||||||
|
*
|
||||||
|
* @see <a href="http://samtools.sourceforge.net/SAM1.pdf">http://samtools.sourceforge.net/SAM1.pdf</a> for details of BGZF file format.
|
||||||
|
*/
|
||||||
|
public class BlockCompressedInputStream extends InputStream {
|
||||||
|
|
||||||
|
public final static String INCORRECT_HEADER_SIZE_MSG = "Incorrect header size for file: ";
|
||||||
|
public final static String UNEXPECTED_BLOCK_LENGTH_MSG = "Unexpected compressed block length: ";
|
||||||
|
public final static String PREMATURE_END_MSG = "Premature end of file: ";
|
||||||
|
public final static String CANNOT_SEEK_STREAM_MSG = "Cannot seek a position for a non-file stream";
|
||||||
|
public final static String CANNOT_SEEK_CLOSED_STREAM_MSG = "Cannot seek a position for a closed stream";
|
||||||
|
public final static String INVALID_FILE_PTR_MSG = "Invalid file pointer: ";
|
||||||
|
|
||||||
|
private InputStream mStream;
|
||||||
|
private boolean mIsClosed = false;
|
||||||
|
private SeekableStream mFile;
|
||||||
|
private byte[] mFileBuffer = null;
|
||||||
|
private DecompressedBlock mCurrentBlock = null;
|
||||||
|
private int mCurrentOffset = 0;
|
||||||
|
private long mStreamOffset = 0;
|
||||||
|
private final BlockGunzipper blockGunzipper;
|
||||||
|
|
||||||
|
private volatile ByteArrayOutputStream buf = null;
|
||||||
|
private static final byte eol = '\n';
|
||||||
|
private static final byte eolCr = '\r';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Note that seek() is not supported if this ctor is used.
|
||||||
|
* @param stream source of bytes
|
||||||
|
*/
|
||||||
|
public BlockCompressedInputStream(final InputStream stream) {
|
||||||
|
this(stream, true, BlockGunzipper.getDefaultInflaterFactory());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Note that seek() is not supported if this ctor is used.
|
||||||
|
* @param stream source of bytes
|
||||||
|
* @param inflaterFactory {@link InflaterFactory} used by {@link BlockGunzipper}
|
||||||
|
*/
|
||||||
|
public BlockCompressedInputStream(final InputStream stream, final InflaterFactory inflaterFactory) {
|
||||||
|
this(stream, true, inflaterFactory);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Note that seek() is not supported if this ctor is used.
|
||||||
|
* @param stream source of bytes
|
||||||
|
* @param allowBuffering if true, allow buffering
|
||||||
|
*/
|
||||||
|
public BlockCompressedInputStream(final InputStream stream, final boolean allowBuffering) {
|
||||||
|
this(stream, allowBuffering, BlockGunzipper.getDefaultInflaterFactory());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Note that seek() is not supported if this ctor is used.
|
||||||
|
* @param stream source of bytes
|
||||||
|
* @param allowBuffering if true, allow buffering
|
||||||
|
* @param inflaterFactory {@link InflaterFactory} used by {@link BlockGunzipper}
|
||||||
|
*/
|
||||||
|
public BlockCompressedInputStream(final InputStream stream, final boolean allowBuffering, final InflaterFactory inflaterFactory) {
|
||||||
|
if (allowBuffering) {
|
||||||
|
mStream = new BufferedInputStream(stream);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
mStream = stream;
|
||||||
|
}
|
||||||
|
|
||||||
|
mFile = null;
|
||||||
|
blockGunzipper = new BlockGunzipper(inflaterFactory);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use this ctor if you wish to call seek()
|
||||||
|
* @param file source of bytes
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public BlockCompressedInputStream(final File file) throws IOException {
|
||||||
|
this(file, BlockGunzipper.getDefaultInflaterFactory());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use this ctor if you wish to call seek()
|
||||||
|
* @param file source of bytes
|
||||||
|
* @param inflaterFactory {@link InflaterFactory} used by {@link BlockGunzipper}
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public BlockCompressedInputStream(final File file, final InflaterFactory inflaterFactory) throws IOException {
|
||||||
|
mFile = new SeekableFileStream(file);
|
||||||
|
mStream = null;
|
||||||
|
blockGunzipper = new BlockGunzipper(inflaterFactory);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For providing some arbitrary data source. No additional buffering is
|
||||||
|
* provided, so if the underlying source is not buffered, wrap it in a
|
||||||
|
* SeekableBufferedStream before passing to this ctor.
|
||||||
|
* @param strm source of bytes
|
||||||
|
*/
|
||||||
|
public BlockCompressedInputStream(final SeekableStream strm) {
|
||||||
|
this(strm, BlockGunzipper.getDefaultInflaterFactory());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For providing some arbitrary data source. No additional buffering is
|
||||||
|
* provided, so if the underlying source is not buffered, wrap it in a
|
||||||
|
* SeekableBufferedStream before passing to this ctor.
|
||||||
|
* @param strm source of bytes
|
||||||
|
* @param inflaterFactory {@link InflaterFactory} used by {@link BlockGunzipper}
|
||||||
|
*/
|
||||||
|
public BlockCompressedInputStream(final SeekableStream strm, final InflaterFactory inflaterFactory) {
|
||||||
|
mFile = strm;
|
||||||
|
mStream = null;
|
||||||
|
blockGunzipper = new BlockGunzipper(inflaterFactory);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines whether or not the inflater will re-calculated the CRC on the decompressed data
|
||||||
|
* and check it against the value stored in the GZIP header. CRC checking is an expensive
|
||||||
|
* operation and should be used accordingly.
|
||||||
|
*/
|
||||||
|
public void setCheckCrcs(final boolean check) {
|
||||||
|
this.blockGunzipper.setCheckCrcs(check);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the number of bytes that can be read (or skipped over) from this input stream without blocking by the
|
||||||
|
* next caller of a method for this input stream. The next caller might be the same thread or another thread.
|
||||||
|
* Note that although the next caller can read this many bytes without blocking, the available() method call itself
|
||||||
|
* may block in order to fill an internal buffer if it has been exhausted.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int available() throws IOException {
|
||||||
|
if (mCurrentBlock == null || mCurrentOffset == mCurrentBlock.mBlock.length) {
|
||||||
|
readBlock();
|
||||||
|
}
|
||||||
|
if (mCurrentBlock == null) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return mCurrentBlock.mBlock.length - mCurrentOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return <code>true</code> if the stream is at the end of a BGZF block,
|
||||||
|
* <code>false</code> otherwise.
|
||||||
|
*/
|
||||||
|
public boolean endOfBlock() {
|
||||||
|
return (mCurrentBlock != null && mCurrentOffset == mCurrentBlock.mBlock.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Closes the underlying InputStream or RandomAccessFile
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
if (mFile != null) {
|
||||||
|
mFile.close();
|
||||||
|
mFile = null;
|
||||||
|
} else if (mStream != null) {
|
||||||
|
mStream.close();
|
||||||
|
mStream = null;
|
||||||
|
}
|
||||||
|
// Encourage garbage collection
|
||||||
|
mFileBuffer = null;
|
||||||
|
mCurrentBlock = null;
|
||||||
|
|
||||||
|
// Mark as closed
|
||||||
|
mIsClosed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads the next byte of data from the input stream. The value byte is returned as an int in the range 0 to 255.
|
||||||
|
* If no byte is available because the end of the stream has been reached, the value -1 is returned.
|
||||||
|
* This method blocks until input data is available, the end of the stream is detected, or an exception is thrown.
|
||||||
|
|
||||||
|
* @return the next byte of data, or -1 if the end of the stream is reached.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int read() throws IOException {
|
||||||
|
return (available() > 0) ? (mCurrentBlock.mBlock[mCurrentOffset++] & 0xFF) : -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads some number of bytes from the input stream and stores them into the buffer array b. The number of bytes
|
||||||
|
* actually read is returned as an integer. This method blocks until input data is available, end of file is detected,
|
||||||
|
* or an exception is thrown.
|
||||||
|
*
|
||||||
|
* read(buf) has the same effect as read(buf, 0, buf.length).
|
||||||
|
*
|
||||||
|
* @param buffer the buffer into which the data is read.
|
||||||
|
* @return the total number of bytes read into the buffer, or -1 is there is no more data because the end of
|
||||||
|
* the stream has been reached.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int read(final byte[] buffer) throws IOException {
|
||||||
|
return read(buffer, 0, buffer.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a whole line. A line is considered to be terminated by either a line feed ('\n'),
|
||||||
|
* carriage return ('\r') or carriage return followed by a line feed ("\r\n").
|
||||||
|
*
|
||||||
|
* @return A String containing the contents of the line, excluding the line terminating
|
||||||
|
* character, or null if the end of the stream has been reached
|
||||||
|
*
|
||||||
|
* @exception IOException If an I/O error occurs
|
||||||
|
*/
|
||||||
|
public String readLine() throws IOException {
|
||||||
|
int available = available();
|
||||||
|
if (available == 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if(null == buf){ // lazy initialisation
|
||||||
|
buf = new ByteArrayOutputStream(8192);
|
||||||
|
}
|
||||||
|
buf.reset();
|
||||||
|
boolean done = false;
|
||||||
|
boolean foundCr = false; // \r found flag
|
||||||
|
while (!done) {
|
||||||
|
int linetmpPos = mCurrentOffset;
|
||||||
|
int bCnt = 0;
|
||||||
|
while((available-- > 0)){
|
||||||
|
final byte c = mCurrentBlock.mBlock[linetmpPos++];
|
||||||
|
if(c == eol){ // found \n
|
||||||
|
done = true;
|
||||||
|
break;
|
||||||
|
} else if(foundCr){ // previous char was \r
|
||||||
|
--linetmpPos; // current char is not \n so put it back
|
||||||
|
done = true;
|
||||||
|
break;
|
||||||
|
} else if(c == eolCr){ // found \r
|
||||||
|
foundCr = true;
|
||||||
|
continue; // no ++bCnt
|
||||||
|
}
|
||||||
|
++bCnt;
|
||||||
|
}
|
||||||
|
if(mCurrentOffset < linetmpPos) {
|
||||||
|
buf.write(mCurrentBlock.mBlock, mCurrentOffset, bCnt);
|
||||||
|
mCurrentOffset = linetmpPos;
|
||||||
|
}
|
||||||
|
available = available();
|
||||||
|
if(available == 0) {
|
||||||
|
// EOF
|
||||||
|
done = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return buf.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads up to len bytes of data from the input stream into an array of bytes. An attempt is made to read
|
||||||
|
* as many as len bytes, but a smaller number may be read. The number of bytes actually read is returned as an integer.
|
||||||
|
*
|
||||||
|
* This method blocks until input data is available, end of file is detected, or an exception is thrown.
|
||||||
|
*
|
||||||
|
* @param buffer buffer into which data is read.
|
||||||
|
* @param offset the start offset in array b at which the data is written.
|
||||||
|
* @param length the maximum number of bytes to read.
|
||||||
|
* @return the total number of bytes read into the buffer, or -1 if there is no more data because the end of
|
||||||
|
* the stream has been reached.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int read(final byte[] buffer, int offset, int length) throws IOException {
|
||||||
|
final int originalLength = length;
|
||||||
|
while (length > 0) {
|
||||||
|
final int available = available();
|
||||||
|
if (available == 0) {
|
||||||
|
// Signal EOF to caller
|
||||||
|
if (originalLength == length) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
final int copyLength = Math.min(length, available);
|
||||||
|
System.arraycopy(mCurrentBlock.mBlock, mCurrentOffset, buffer, offset, copyLength);
|
||||||
|
mCurrentOffset += copyLength;
|
||||||
|
offset += copyLength;
|
||||||
|
length -= copyLength;
|
||||||
|
}
|
||||||
|
return originalLength - length;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Seek to the given position in the file. Note that pos is a special virtual file pointer,
|
||||||
|
* not an actual byte offset.
|
||||||
|
*
|
||||||
|
* @param pos virtual file pointer position
|
||||||
|
* @throws IOException if stream is closed or not a file based stream
|
||||||
|
*/
|
||||||
|
public void seek(final long pos) throws IOException {
|
||||||
|
// Must be before the mFile == null check because mFile == null for closed files and streams
|
||||||
|
if (mIsClosed) {
|
||||||
|
throw new IOException(CANNOT_SEEK_CLOSED_STREAM_MSG);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cannot seek on streams that are not file based
|
||||||
|
if (mFile == null) {
|
||||||
|
throw new IOException(CANNOT_SEEK_STREAM_MSG);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decode virtual file pointer
|
||||||
|
// Upper 48 bits is the byte offset into the compressed stream of a
|
||||||
|
// block.
|
||||||
|
// Lower 16 bits is the byte offset into the uncompressed stream inside
|
||||||
|
// the block.
|
||||||
|
final long compressedOffset = BGZFFilePointerUtil.getBlockAddress(pos);
|
||||||
|
final int uncompressedOffset = BGZFFilePointerUtil.getBlockOffset(pos);
|
||||||
|
final int available;
|
||||||
|
if (mCurrentBlock != null && mCurrentBlock.mBlockAddress == compressedOffset) {
|
||||||
|
available = mCurrentBlock.mBlock.length;
|
||||||
|
} else {
|
||||||
|
prepareForSeek();
|
||||||
|
mFile.seek(compressedOffset);
|
||||||
|
mStreamOffset = compressedOffset;
|
||||||
|
mCurrentBlock = nextBlock(getBufferForReuse(mCurrentBlock));
|
||||||
|
mCurrentOffset = 0;
|
||||||
|
available = available();
|
||||||
|
}
|
||||||
|
if (uncompressedOffset > available || (uncompressedOffset == available && !eof())) {
|
||||||
|
throw new IOException(INVALID_FILE_PTR_MSG + pos + " for " + getSource());
|
||||||
|
}
|
||||||
|
mCurrentOffset = uncompressedOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs cleanup required before seek is called on the underlying stream
|
||||||
|
*/
|
||||||
|
protected void prepareForSeek() {
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean eof() throws IOException {
|
||||||
|
if (mFile.eof()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// If the last remaining block is the size of the EMPTY_GZIP_BLOCK, this is the same as being at EOF.
|
||||||
|
return (mFile.length() - (mCurrentBlock.mBlockAddress
|
||||||
|
+ mCurrentBlock.mBlockCompressedSize) == BGZFStreamConstants.EMPTY_GZIP_BLOCK.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return virtual file pointer that can be passed to seek() to return to the current position. This is
|
||||||
|
* not an actual byte offset, so arithmetic on file pointers cannot be done to determine the distance between
|
||||||
|
* the two.
|
||||||
|
*/
|
||||||
|
public long getFilePointer() {
|
||||||
|
if (mCurrentBlock == null) {
|
||||||
|
// Haven't read anything yet = at start of stream
|
||||||
|
return BGZFFilePointerUtil.makeFilePointer(0, 0);
|
||||||
|
}
|
||||||
|
if (mCurrentOffset > 0 && mCurrentOffset == mCurrentBlock.mBlock.length) {
|
||||||
|
// If current offset is at the end of the current block, file
|
||||||
|
// pointer should point
|
||||||
|
// to the beginning of the next block.
|
||||||
|
return BGZFFilePointerUtil.makeFilePointer(mCurrentBlock.mBlockAddress + mCurrentBlock.mBlockCompressedSize, 0);
|
||||||
|
}
|
||||||
|
return BGZFFilePointerUtil.makeFilePointer(mCurrentBlock.mBlockAddress, mCurrentOffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getPosition() {
|
||||||
|
return getFilePointer();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static long getFileBlock(final long bgzfOffset) {
|
||||||
|
return BGZFFilePointerUtil.getBlockAddress(bgzfOffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param stream Must be at start of file. Throws RuntimeException if !stream.markSupported().
|
||||||
|
* @return true if the given file looks like a valid BGZF file.
|
||||||
|
*/
|
||||||
|
public static boolean isValidFile(final InputStream stream) throws IOException {
|
||||||
|
if (!stream.markSupported()) {
|
||||||
|
throw new RuntimeException("Cannot test non-buffered stream");
|
||||||
|
}
|
||||||
|
stream.mark(BGZFStreamConstants.BLOCK_HEADER_LENGTH);
|
||||||
|
final byte[] buffer = new byte[BGZFStreamConstants.BLOCK_HEADER_LENGTH];
|
||||||
|
final int count = readBytes(stream, buffer, 0, BGZFStreamConstants.BLOCK_HEADER_LENGTH);
|
||||||
|
stream.reset();
|
||||||
|
return count == BGZFStreamConstants.BLOCK_HEADER_LENGTH && isValidBlockHeader(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isValidBlockHeader(final byte[] buffer) {
|
||||||
|
return (buffer[0] == BGZFStreamConstants.GZIP_ID1 &&
|
||||||
|
(buffer[1] & 0xFF) == BGZFStreamConstants.GZIP_ID2 &&
|
||||||
|
(buffer[3] & BGZFStreamConstants.GZIP_FLG) != 0 &&
|
||||||
|
buffer[10] == BGZFStreamConstants.GZIP_XLEN &&
|
||||||
|
buffer[12] == BGZFStreamConstants.BGZF_ID1 &&
|
||||||
|
buffer[13] == BGZFStreamConstants.BGZF_ID2);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void readBlock() throws IOException {
|
||||||
|
mCurrentBlock = nextBlock(getBufferForReuse(mCurrentBlock));
|
||||||
|
mCurrentOffset = 0;
|
||||||
|
checkAndRethrowDecompressionException();
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Reads and decompresses the next block
|
||||||
|
* @param bufferAvailableForReuse decompression buffer available for reuse
|
||||||
|
* @return next block in the decompressed stream
|
||||||
|
*/
|
||||||
|
protected DecompressedBlock nextBlock(byte[] bufferAvailableForReuse) {
|
||||||
|
return processNextBlock(bufferAvailableForReuse);
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Rethrows an exception encountered during decompression
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
private void checkAndRethrowDecompressionException() throws IOException {
|
||||||
|
if (mCurrentBlock.mException != null) {
|
||||||
|
if (mCurrentBlock.mException instanceof IOException) {
|
||||||
|
throw (IOException) mCurrentBlock.mException;
|
||||||
|
} else if (mCurrentBlock.mException instanceof RuntimeException) {
|
||||||
|
throw (RuntimeException) mCurrentBlock.mException;
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException(mCurrentBlock.mException);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Attempt to reuse the buffer of the given block
|
||||||
|
* @param block owning block
|
||||||
|
* @return null decompressing buffer to reuse, null if no buffer is available
|
||||||
|
*/
|
||||||
|
private byte[] getBufferForReuse(DecompressedBlock block) {
|
||||||
|
if (block == null) return null;
|
||||||
|
return block.mBlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decompress the next block from the input stream. When using asynchronous
|
||||||
|
* IO, this will be called by the background thread.
|
||||||
|
* @param bufferAvailableForReuse buffer in which to place decompressed block. A null or
|
||||||
|
* incorrectly sized buffer will result in the buffer being ignored and
|
||||||
|
* a new buffer allocated for decompression.
|
||||||
|
* @return next block in input stream
|
||||||
|
*/
|
||||||
|
protected DecompressedBlock processNextBlock(byte[] bufferAvailableForReuse) {
|
||||||
|
if (mFileBuffer == null) {
|
||||||
|
mFileBuffer = new byte[BGZFStreamConstants.MAX_COMPRESSED_BLOCK_SIZE];
|
||||||
|
}
|
||||||
|
long blockAddress = mStreamOffset;
|
||||||
|
try {
|
||||||
|
final int headerByteCount = readBytes(mFileBuffer, 0, BGZFStreamConstants.BLOCK_HEADER_LENGTH);
|
||||||
|
mStreamOffset += headerByteCount;
|
||||||
|
if (headerByteCount == 0) {
|
||||||
|
// Handle case where there is no empty gzip block at end.
|
||||||
|
return new DecompressedBlock(blockAddress, new byte[0], 0);
|
||||||
|
}
|
||||||
|
if (headerByteCount != BGZFStreamConstants.BLOCK_HEADER_LENGTH) {
|
||||||
|
return new DecompressedBlock(blockAddress, headerByteCount, new IOException(INCORRECT_HEADER_SIZE_MSG + getSource()));
|
||||||
|
}
|
||||||
|
final int blockLength = unpackInt16(mFileBuffer, BGZFStreamConstants.BLOCK_LENGTH_OFFSET) + 1;
|
||||||
|
if (blockLength < BGZFStreamConstants.BLOCK_HEADER_LENGTH || blockLength > mFileBuffer.length) {
|
||||||
|
return new DecompressedBlock(blockAddress, blockLength,
|
||||||
|
new IOException(UNEXPECTED_BLOCK_LENGTH_MSG + blockLength + " for " + getSource()));
|
||||||
|
}
|
||||||
|
final int remaining = blockLength - BGZFStreamConstants.BLOCK_HEADER_LENGTH;
|
||||||
|
final int dataByteCount = readBytes(mFileBuffer, BGZFStreamConstants.BLOCK_HEADER_LENGTH,
|
||||||
|
remaining);
|
||||||
|
mStreamOffset += dataByteCount;
|
||||||
|
if (dataByteCount != remaining) {
|
||||||
|
return new DecompressedBlock(blockAddress, blockLength,
|
||||||
|
new BGZFException(PREMATURE_END_MSG + getSource()));
|
||||||
|
}
|
||||||
|
final byte[] decompressed = inflateBlock(mFileBuffer, blockLength, bufferAvailableForReuse);
|
||||||
|
return new DecompressedBlock(blockAddress, decompressed, blockLength);
|
||||||
|
} catch (IOException e) {
|
||||||
|
return new DecompressedBlock(blockAddress, 0, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[] inflateBlock(final byte[] compressedBlock, final int compressedLength,
|
||||||
|
final byte[] bufferAvailableForReuse) throws IOException {
|
||||||
|
final int uncompressedLength = unpackInt32(compressedBlock, compressedLength - 4);
|
||||||
|
if (uncompressedLength < 0) {
|
||||||
|
throw new BGZFException(getSource() + " has invalid uncompressedLength: " + uncompressedLength);
|
||||||
|
}
|
||||||
|
byte[] buffer = bufferAvailableForReuse;
|
||||||
|
if (buffer == null || uncompressedLength != buffer.length) {
|
||||||
|
// can't reuse the buffer since the size is incorrect
|
||||||
|
buffer = new byte[uncompressedLength];
|
||||||
|
}
|
||||||
|
blockGunzipper.unzipBlock(buffer, compressedBlock, compressedLength);
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getSource() {
|
||||||
|
return mFile == null ? "data stream" : mFile.getSource();
|
||||||
|
}
|
||||||
|
|
||||||
|
private int readBytes(final byte[] buffer, final int offset, final int length) throws IOException {
|
||||||
|
if (mFile != null) {
|
||||||
|
return readBytes(mFile, buffer, offset, length);
|
||||||
|
} else if (mStream != null) {
|
||||||
|
return readBytes(mStream, buffer, offset, length);
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int readBytes(final SeekableStream file, final byte[] buffer, final int offset, final int length) throws IOException {
|
||||||
|
int bytesRead = 0;
|
||||||
|
while (bytesRead < length) {
|
||||||
|
final int count = file.read(buffer, offset + bytesRead, length - bytesRead);
|
||||||
|
if (count <= 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
bytesRead += count;
|
||||||
|
}
|
||||||
|
return bytesRead;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int readBytes(final InputStream stream, final byte[] buffer, final int offset, final int length) throws IOException {
|
||||||
|
int bytesRead = 0;
|
||||||
|
while (bytesRead < length) {
|
||||||
|
final int count = stream.read(buffer, offset + bytesRead, length - bytesRead);
|
||||||
|
if (count <= 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
bytesRead += count;
|
||||||
|
}
|
||||||
|
return bytesRead;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int unpackInt16(final byte[] buffer, final int offset) {
|
||||||
|
return ((buffer[offset] & 0xFF) |
|
||||||
|
((buffer[offset+1] & 0xFF) << 8));
|
||||||
|
}
|
||||||
|
|
||||||
|
private int unpackInt32(final byte[] buffer, final int offset) {
|
||||||
|
return ((buffer[offset] & 0xFF) |
|
||||||
|
((buffer[offset+1] & 0xFF) << 8) |
|
||||||
|
((buffer[offset+2] & 0xFF) << 16) |
|
||||||
|
((buffer[offset+3] & 0xFF) << 24));
|
||||||
|
}
|
||||||
|
|
||||||
|
public enum FileTermination {HAS_TERMINATOR_BLOCK, HAS_HEALTHY_LAST_BLOCK, DEFECTIVE}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param file the file to check
|
||||||
|
* @return status of the last compressed block
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static FileTermination checkTermination(final File file) throws IOException {
|
||||||
|
return checkTermination(file.toPath());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param path to the file to check
|
||||||
|
* @return status of the last compressed block
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static FileTermination checkTermination(final Path path) throws IOException {
|
||||||
|
try( final SeekableByteChannel channel = Files.newByteChannel(path, StandardOpenOption.READ) ){
|
||||||
|
return checkTermination(channel);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* check the status of the final bzgipped block for the given bgzipped resource
|
||||||
|
*
|
||||||
|
* @param channel an open channel to read from,
|
||||||
|
* the channel will remain open and the initial position will be restored when the operation completes
|
||||||
|
* this makes no guarantee about the state of the channel if an exception is thrown during reading
|
||||||
|
*
|
||||||
|
* @return the status of the last compressed black
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static FileTermination checkTermination(SeekableByteChannel channel) throws IOException {
|
||||||
|
final long fileSize = channel.size();
|
||||||
|
if (fileSize < BGZFStreamConstants.EMPTY_GZIP_BLOCK.length) {
|
||||||
|
return FileTermination.DEFECTIVE;
|
||||||
|
}
|
||||||
|
final long initialPosition = channel.position();
|
||||||
|
boolean exceptionThrown = false;
|
||||||
|
try {
|
||||||
|
channel.position(fileSize - BGZFStreamConstants.EMPTY_GZIP_BLOCK.length);
|
||||||
|
|
||||||
|
//Check if the end of the file is an empty gzip block which is used as the terminator for a bgzipped file
|
||||||
|
final ByteBuffer lastBlockBuffer = ByteBuffer.allocate(BGZFStreamConstants.EMPTY_GZIP_BLOCK.length);
|
||||||
|
readFully(channel, lastBlockBuffer);
|
||||||
|
if (Arrays.equals(lastBlockBuffer.array(), BGZFStreamConstants.EMPTY_GZIP_BLOCK)) {
|
||||||
|
return FileTermination.HAS_TERMINATOR_BLOCK;
|
||||||
|
}
|
||||||
|
|
||||||
|
//if the last block isn't an empty gzip block, check to see if it is a healthy compressed block or if it's corrupted
|
||||||
|
final int bufsize = (int) Math.min(fileSize, BGZFStreamConstants.MAX_COMPRESSED_BLOCK_SIZE);
|
||||||
|
final byte[] bufferArray = new byte[bufsize];
|
||||||
|
channel.position(fileSize - bufsize);
|
||||||
|
readFully(channel, ByteBuffer.wrap(bufferArray));
|
||||||
|
for (int i = bufferArray.length - BGZFStreamConstants.EMPTY_GZIP_BLOCK.length;
|
||||||
|
i >= 0; --i) {
|
||||||
|
if (!preambleEqual(BGZFStreamConstants.GZIP_BLOCK_PREAMBLE,
|
||||||
|
bufferArray, i, BGZFStreamConstants.GZIP_BLOCK_PREAMBLE.length)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
final ByteBuffer byteBuffer = ByteBuffer.wrap(bufferArray,
|
||||||
|
i + BGZFStreamConstants.GZIP_BLOCK_PREAMBLE.length,
|
||||||
|
4);
|
||||||
|
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
|
||||||
|
final int totalBlockSizeMinusOne = byteBuffer.getShort() & 0xFFFF;
|
||||||
|
if (bufferArray.length - i == totalBlockSizeMinusOne + 1) {
|
||||||
|
return FileTermination.HAS_HEALTHY_LAST_BLOCK;
|
||||||
|
} else {
|
||||||
|
return FileTermination.DEFECTIVE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return FileTermination.DEFECTIVE;
|
||||||
|
} catch (final Throwable e) {
|
||||||
|
exceptionThrown = true;
|
||||||
|
throw e;
|
||||||
|
} finally {
|
||||||
|
//if an exception was thrown we don't want to reset the position because that would be likely to throw again
|
||||||
|
//and suppress the initial exception
|
||||||
|
if(!exceptionThrown) {
|
||||||
|
channel.position(initialPosition);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* read as many bytes as dst's capacity into dst or throw if that's not possible
|
||||||
|
*
|
||||||
|
* @throws EOFException if channel has fewer bytes available than dst's capacity
|
||||||
|
*/
|
||||||
|
static void readFully(SeekableByteChannel channel, ByteBuffer dst) throws IOException {
|
||||||
|
int totalBytesRead = 0;
|
||||||
|
final int capacity = dst.capacity();
|
||||||
|
while (totalBytesRead < capacity) {
|
||||||
|
final int bytesRead = channel.read(dst);
|
||||||
|
if (bytesRead == -1) {
|
||||||
|
throw new EOFException();
|
||||||
|
}
|
||||||
|
totalBytesRead += bytesRead;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void assertNonDefectiveFile(final File file) throws IOException {
|
||||||
|
if (checkTermination(file) == FileTermination.DEFECTIVE) {
|
||||||
|
throw new BGZFException(file.getAbsolutePath() + " does not have a valid GZIP block at the end of the file.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean preambleEqual(final byte[] preamble, final byte[] buf, final int startOffset, final int length) {
|
||||||
|
for (int i = 0; i < length; ++i) {
|
||||||
|
if (preamble[i] != buf[i + startOffset]) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static class DecompressedBlock {
|
||||||
|
/**
|
||||||
|
* Decompressed block
|
||||||
|
*/
|
||||||
|
private final byte[] mBlock;
|
||||||
|
/**
|
||||||
|
* Compressed size of block (the uncompressed size can be found using
|
||||||
|
* mBlock.length)
|
||||||
|
*/
|
||||||
|
private final int mBlockCompressedSize;
|
||||||
|
/**
|
||||||
|
* Stream offset of start of block
|
||||||
|
*/
|
||||||
|
private final long mBlockAddress;
|
||||||
|
/**
|
||||||
|
* Exception thrown (if any) when attempting to decompress block
|
||||||
|
*/
|
||||||
|
private final Exception mException;
|
||||||
|
|
||||||
|
public DecompressedBlock(long blockAddress, byte[] block, int compressedSize) {
|
||||||
|
mBlock = block;
|
||||||
|
mBlockAddress = blockAddress;
|
||||||
|
mBlockCompressedSize = compressedSize;
|
||||||
|
mException = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DecompressedBlock(long blockAddress, int compressedSize, Exception exception) {
|
||||||
|
mBlock = new byte[0];
|
||||||
|
mBlockAddress = blockAddress;
|
||||||
|
mBlockCompressedSize = compressedSize;
|
||||||
|
mException = exception;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,358 @@
|
||||||
|
package org.xbib.io.compress.bgzf;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.zip.CRC32;
|
||||||
|
import java.util.zip.Deflater;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stream class for a file that is a series of gzip blocks (BGZF format). The caller just treats it as an
|
||||||
|
* OutputStream, and under the covers a gzip block is written when the amount of uncompressed as-yet-unwritten
|
||||||
|
* bytes reaches a threshold.
|
||||||
|
*
|
||||||
|
* The advantage of BGZF over conventional gzip is that BGZF allows for seeking without having to scan through
|
||||||
|
* the entire file up to the position being sought.
|
||||||
|
*
|
||||||
|
* Note that the flush() method should not be called by client
|
||||||
|
* unless you know what you're doing, because it forces a gzip block to be written even if the
|
||||||
|
* number of buffered bytes has not reached threshold. close(), on the other hand, must be called
|
||||||
|
* when done writing in order to force the last gzip block to be written.
|
||||||
|
*
|
||||||
|
* @see <a href="http://samtools.sourceforge.net/SAM1.pdf">http://samtools.sourceforge.net/SAM1.pdf</a> for details of BGZF file format.
|
||||||
|
*/
|
||||||
|
public class BlockCompressedOutputStream extends OutputStream {
|
||||||
|
|
||||||
|
private static int defaultCompressionLevel = BGZFStreamConstants.DEFAULT_COMPRESSION_LEVEL;
|
||||||
|
private static DeflaterFactory defaultDeflaterFactory = new DeflaterFactory();
|
||||||
|
|
||||||
|
public static void setDefaultCompressionLevel(final int compressionLevel) {
|
||||||
|
if (compressionLevel < Deflater.NO_COMPRESSION || compressionLevel > Deflater.BEST_COMPRESSION) {
|
||||||
|
throw new IllegalArgumentException("Invalid compression level: " + compressionLevel);
|
||||||
|
}
|
||||||
|
defaultCompressionLevel = compressionLevel;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int getDefaultCompressionLevel() {
|
||||||
|
return defaultCompressionLevel;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the default {@link DeflaterFactory} that will be used for all instances unless specified otherwise in the constructor.
|
||||||
|
* If this method is not called the default is a factory that will create the JDK {@link Deflater}.
|
||||||
|
* @param deflaterFactory non-null default factory.
|
||||||
|
*/
|
||||||
|
public static void setDefaultDeflaterFactory(final DeflaterFactory deflaterFactory) {
|
||||||
|
if (deflaterFactory == null) {
|
||||||
|
throw new IllegalArgumentException("null deflaterFactory");
|
||||||
|
}
|
||||||
|
defaultDeflaterFactory = deflaterFactory;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DeflaterFactory getDefaultDeflaterFactory() {
|
||||||
|
return defaultDeflaterFactory;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final BinaryCodec codec;
|
||||||
|
private final byte[] uncompressedBuffer = new byte[BGZFStreamConstants.DEFAULT_UNCOMPRESSED_BLOCK_SIZE];
|
||||||
|
private int numUncompressedBytes = 0;
|
||||||
|
private final byte[] compressedBuffer =
|
||||||
|
new byte[BGZFStreamConstants.MAX_COMPRESSED_BLOCK_SIZE -
|
||||||
|
BGZFStreamConstants.BLOCK_HEADER_LENGTH];
|
||||||
|
private final Deflater deflater;
|
||||||
|
|
||||||
|
// A second deflater is created for the very unlikely case where the regular deflation actually makes
|
||||||
|
// things bigger, and the compressed block is too big. It should be possible to downshift the
|
||||||
|
// primary deflater to NO_COMPRESSION level, recompress, and then restore it to its original setting,
|
||||||
|
// but in practice that doesn't work.
|
||||||
|
// The motivation for deflating at NO_COMPRESSION level is that it will predictably produce compressed
|
||||||
|
// output that is 10 bytes larger than the input, and the threshold at which a block is generated is such that
|
||||||
|
// the size of tbe final gzip block will always be <= 64K. This is preferred over the previous method,
|
||||||
|
// which would attempt to compress up to 64K bytes, and if the resulting compressed block was too large,
|
||||||
|
// try compressing fewer input bytes (aka "downshifting'). The problem with downshifting is that
|
||||||
|
// getFilePointer might return an inaccurate value.
|
||||||
|
// I assume (AW 29-Oct-2013) that there is no value in using hardware-assisted deflater for no-compression mode,
|
||||||
|
// so just use JDK standard.
|
||||||
|
private final Deflater noCompressionDeflater = new Deflater(Deflater.NO_COMPRESSION, true);
|
||||||
|
private final CRC32 crc32 = new CRC32();
|
||||||
|
private Path file = null;
|
||||||
|
private long mBlockAddress = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Uses default compression level, which is 5 unless changed by setCompressionLevel
|
||||||
|
* Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}.
|
||||||
|
* Use {@link #BlockCompressedOutputStream(File, int, DeflaterFactory)} to specify a custom factory.
|
||||||
|
*/
|
||||||
|
public BlockCompressedOutputStream(final String filename) throws FileNotFoundException {
|
||||||
|
this(filename, defaultCompressionLevel);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Uses default compression level, which is 5 unless changed by setCompressionLevel
|
||||||
|
* Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}.
|
||||||
|
* Use {@link #BlockCompressedOutputStream(File, int, DeflaterFactory)} to specify a custom factory.
|
||||||
|
*/
|
||||||
|
public BlockCompressedOutputStream(final File file) throws FileNotFoundException {
|
||||||
|
this(file, defaultCompressionLevel);
|
||||||
|
}
|
||||||
|
|
||||||
|
public BlockCompressedOutputStream(final String filename, final int compressionLevel) throws FileNotFoundException {
|
||||||
|
this(new File(filename), compressionLevel);
|
||||||
|
}
|
||||||
|
|
||||||
|
public BlockCompressedOutputStream(final File file, final int compressionLevel) throws FileNotFoundException {
|
||||||
|
this(file, compressionLevel, defaultDeflaterFactory);
|
||||||
|
}
|
||||||
|
|
||||||
|
public BlockCompressedOutputStream(final File file, final int compressionLevel, final DeflaterFactory deflaterFactory) throws FileNotFoundException {
|
||||||
|
this.file = file.toPath();
|
||||||
|
codec = new BinaryCodec(file, true);
|
||||||
|
deflater = deflaterFactory.makeDeflater(compressionLevel, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Uses default compression level, which is 5 unless changed by setCompressionLevel
|
||||||
|
* Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}.
|
||||||
|
* Use {@link #BlockCompressedOutputStream(OutputStream, File, int, DeflaterFactory)} to specify a custom factory.
|
||||||
|
*/
|
||||||
|
public BlockCompressedOutputStream(final OutputStream os) {
|
||||||
|
this(os, (File)null, defaultCompressionLevel);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Uses default compression level, which is 5 unless changed by setCompressionLevel
|
||||||
|
* Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}.
|
||||||
|
* Use {@link #BlockCompressedOutputStream(OutputStream, File, int, DeflaterFactory)} to specify a custom factory.
|
||||||
|
*
|
||||||
|
* @param file may be null
|
||||||
|
*/
|
||||||
|
public BlockCompressedOutputStream(final OutputStream os, final Path file) {
|
||||||
|
this(os, file, defaultCompressionLevel);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}.
|
||||||
|
* Use {@link #BlockCompressedOutputStream(OutputStream, File, int, DeflaterFactory)} to specify a custom factory.
|
||||||
|
*/
|
||||||
|
public BlockCompressedOutputStream(final OutputStream os, final File file, final int compressionLevel) {
|
||||||
|
this(os, file, compressionLevel, defaultDeflaterFactory);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}.
|
||||||
|
* Use {@link #BlockCompressedOutputStream(OutputStream, File, int, DeflaterFactory)} to specify a custom factory.
|
||||||
|
*/
|
||||||
|
public BlockCompressedOutputStream(final OutputStream os, final Path file, final int compressionLevel) {
|
||||||
|
this(os, file, compressionLevel, defaultDeflaterFactory);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates the output stream.
|
||||||
|
* @param os output stream to create a BlockCompressedOutputStream from
|
||||||
|
* @param file file to which to write the output or null if not available
|
||||||
|
* @param compressionLevel the compression level (0-9)
|
||||||
|
* @param deflaterFactory custom factory to create deflaters (overrides the default)
|
||||||
|
*/
|
||||||
|
public BlockCompressedOutputStream(final OutputStream os, final File file, final int compressionLevel, final DeflaterFactory deflaterFactory) {
|
||||||
|
this(os, file != null ? file.toPath() : null, compressionLevel, deflaterFactory);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates the output stream.
|
||||||
|
* @param os output stream to create a BlockCompressedOutputStream from
|
||||||
|
* @param file file to which to write the output or null if not available
|
||||||
|
* @param compressionLevel the compression level (0-9)
|
||||||
|
* @param deflaterFactory custom factory to create deflaters (overrides the default)
|
||||||
|
*/
|
||||||
|
public BlockCompressedOutputStream(final OutputStream os, final Path file, final int compressionLevel, final DeflaterFactory deflaterFactory) {
|
||||||
|
this.file = file;
|
||||||
|
codec = new BinaryCodec(os);
|
||||||
|
if (file != null) {
|
||||||
|
codec.setOutputFileName(file.toAbsolutePath().toUri().toString());
|
||||||
|
}
|
||||||
|
deflater = deflaterFactory.makeDeflater(compressionLevel, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param output May or not already be a BlockCompressedOutputStream.
|
||||||
|
* @return A BlockCompressedOutputStream, either by wrapping the given OutputStream, or by casting if it already
|
||||||
|
* is a BCOS.
|
||||||
|
*/
|
||||||
|
public static BlockCompressedOutputStream maybeBgzfWrapOutputStream(OutputStream output) {
|
||||||
|
if (!(output instanceof BlockCompressedOutputStream)) {
|
||||||
|
return new BlockCompressedOutputStream(output);
|
||||||
|
} else {
|
||||||
|
return (BlockCompressedOutputStream)output;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes b.length bytes from the specified byte array to this output stream. The general contract for write(b)
|
||||||
|
* is that it should have exactly the same effect as the call write(b, 0, b.length).
|
||||||
|
* @param bytes the data
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void write(final byte[] bytes) throws IOException {
|
||||||
|
write(bytes, 0, bytes.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes len bytes from the specified byte array starting at offset off to this output stream. The general
|
||||||
|
* contract for write(b, off, len) is that some of the bytes in the array b are written to the output stream in order;
|
||||||
|
* element b[off] is the first byte written and b[off+len-1] is the last byte written by this operation.
|
||||||
|
*
|
||||||
|
* @param bytes the data
|
||||||
|
* @param startIndex the start offset in the data
|
||||||
|
* @param numBytes the number of bytes to write
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void write(final byte[] bytes, int startIndex, int numBytes) throws IOException {
|
||||||
|
while (numBytes > 0) {
|
||||||
|
final int bytesToWrite = Math.min(uncompressedBuffer.length - numUncompressedBytes, numBytes);
|
||||||
|
System.arraycopy(bytes, startIndex, uncompressedBuffer, numUncompressedBytes, bytesToWrite);
|
||||||
|
numUncompressedBytes += bytesToWrite;
|
||||||
|
startIndex += bytesToWrite;
|
||||||
|
numBytes -= bytesToWrite;
|
||||||
|
if (numUncompressedBytes == uncompressedBuffer.length) {
|
||||||
|
deflateBlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(final int b) throws IOException {
|
||||||
|
uncompressedBuffer[numUncompressedBytes++] = (byte) b;
|
||||||
|
if (numUncompressedBytes == uncompressedBuffer.length) deflateBlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* WARNING: flush() affects the output format, because it causes the current contents of uncompressedBuffer
|
||||||
|
* to be compressed and written, even if it isn't full. Unless you know what you're doing, don't call flush().
|
||||||
|
* Instead, call close(), which will flush any unwritten data before closing the underlying stream.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void flush() throws IOException {
|
||||||
|
while (numUncompressedBytes > 0) {
|
||||||
|
deflateBlock();
|
||||||
|
}
|
||||||
|
codec.getOutputStream().flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* close() must be called in order to flush any remaining buffered bytes. An unclosed file will likely be
|
||||||
|
* defective.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
close(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close(final boolean writeTerminatorBlock) throws IOException {
|
||||||
|
flush();
|
||||||
|
// For debugging...
|
||||||
|
// if (numberOfThrottleBacks > 0) {
|
||||||
|
// System.err.println("In BlockCompressedOutputStream, had to throttle back " + numberOfThrottleBacks +
|
||||||
|
// " times for file " + codec.getOutputFileName());
|
||||||
|
// }
|
||||||
|
if (writeTerminatorBlock) {
|
||||||
|
codec.writeBytes(BGZFStreamConstants.EMPTY_GZIP_BLOCK);
|
||||||
|
}
|
||||||
|
codec.close();
|
||||||
|
|
||||||
|
// If a terminator block was written, ensure that it's there and valid
|
||||||
|
if (writeTerminatorBlock) {
|
||||||
|
// Can't re-open something that is not a regular file, e.g. a named pipe or an output stream
|
||||||
|
if (this.file == null || !Files.isRegularFile(this.file)) return;
|
||||||
|
if (BlockCompressedInputStream.checkTermination(this.file) !=
|
||||||
|
BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK) {
|
||||||
|
throw new IOException("Terminator block not found after closing BGZF file " + this.file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Encode virtual file pointer
|
||||||
|
* Upper 48 bits is the byte offset into the compressed stream of a block.
|
||||||
|
* Lower 16 bits is the byte offset into the uncompressed stream inside the block.
|
||||||
|
*/
|
||||||
|
public long getFilePointer(){
|
||||||
|
return BGZFFilePointerUtil.makeFilePointer(mBlockAddress, numUncompressedBytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getPosition() {
|
||||||
|
return getFilePointer();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Attempt to write the data in uncompressedBuffer to the underlying file in a gzip block.
|
||||||
|
* If the entire uncompressedBuffer does not fit in the maximum allowed size, reduce the amount
|
||||||
|
* of data to be compressed, and slide the excess down in uncompressedBuffer so it can be picked
|
||||||
|
* up in the next deflate event.
|
||||||
|
* @return size of gzip block that was written.
|
||||||
|
*/
|
||||||
|
private int deflateBlock() throws IOException {
|
||||||
|
if (numUncompressedBytes == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
final int bytesToCompress = numUncompressedBytes;
|
||||||
|
// Compress the input
|
||||||
|
deflater.reset();
|
||||||
|
deflater.setInput(uncompressedBuffer, 0, bytesToCompress);
|
||||||
|
deflater.finish();
|
||||||
|
int compressedSize = deflater.deflate(compressedBuffer, 0, compressedBuffer.length);
|
||||||
|
|
||||||
|
// If it didn't all fit in compressedBuffer.length, set compression level to NO_COMPRESSION
|
||||||
|
// and try again. This should always fit.
|
||||||
|
if (!deflater.finished()) {
|
||||||
|
noCompressionDeflater.reset();
|
||||||
|
noCompressionDeflater.setInput(uncompressedBuffer, 0, bytesToCompress);
|
||||||
|
noCompressionDeflater.finish();
|
||||||
|
compressedSize = noCompressionDeflater.deflate(compressedBuffer, 0, compressedBuffer.length);
|
||||||
|
if (!noCompressionDeflater.finished()) {
|
||||||
|
throw new IllegalStateException("unpossible");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Data compressed small enough, so write it out.
|
||||||
|
crc32.reset();
|
||||||
|
crc32.update(uncompressedBuffer, 0, bytesToCompress);
|
||||||
|
|
||||||
|
final int totalBlockSize = writeGzipBlock(compressedSize, bytesToCompress, crc32.getValue());
|
||||||
|
|
||||||
|
// Clear out from uncompressedBuffer the data that was written
|
||||||
|
numUncompressedBytes = 0;
|
||||||
|
mBlockAddress += totalBlockSize;
|
||||||
|
return totalBlockSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes the entire gzip block, assuming the compressed data is stored in compressedBuffer
|
||||||
|
* @return size of gzip block that was written.
|
||||||
|
*/
|
||||||
|
private int writeGzipBlock(final int compressedSize, final int uncompressedSize, final long crc) throws IOException {
|
||||||
|
// Init gzip header
|
||||||
|
codec.writeByte(BGZFStreamConstants.GZIP_ID1);
|
||||||
|
codec.writeByte(BGZFStreamConstants.GZIP_ID2);
|
||||||
|
codec.writeByte(BGZFStreamConstants.GZIP_CM_DEFLATE);
|
||||||
|
codec.writeByte(BGZFStreamConstants.GZIP_FLG);
|
||||||
|
codec.writeInt(0); // Modification time
|
||||||
|
codec.writeByte(BGZFStreamConstants.GZIP_XFL);
|
||||||
|
codec.writeByte(BGZFStreamConstants.GZIP_OS_UNKNOWN);
|
||||||
|
codec.writeShort(BGZFStreamConstants.GZIP_XLEN);
|
||||||
|
codec.writeByte(BGZFStreamConstants.BGZF_ID1);
|
||||||
|
codec.writeByte(BGZFStreamConstants.BGZF_ID2);
|
||||||
|
codec.writeShort(BGZFStreamConstants.BGZF_LEN);
|
||||||
|
final int totalBlockSize = compressedSize + BGZFStreamConstants.BLOCK_HEADER_LENGTH +
|
||||||
|
BGZFStreamConstants.BLOCK_FOOTER_LENGTH;
|
||||||
|
|
||||||
|
// I don't know why we store block size - 1, but that is what the spec says
|
||||||
|
codec.writeShort((short)(totalBlockSize - 1));
|
||||||
|
codec.writeBytes(compressedBuffer, 0, compressedSize);
|
||||||
|
codec.writeInt((int)crc);
|
||||||
|
codec.writeInt(uncompressedSize);
|
||||||
|
return totalBlockSize;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,114 @@
|
||||||
|
package org.xbib.io.compress.bgzf;
|
||||||
|
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
import java.util.zip.CRC32;
|
||||||
|
import java.util.zip.DataFormatException;
|
||||||
|
import java.util.zip.Inflater;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For decompressing GZIP blocks that are already loaded into a byte[].
|
||||||
|
* The main advantage is that this object can be used over and over again to decompress many blocks,
|
||||||
|
* whereas a new GZIPInputStream and ByteArrayInputStream would otherwise need to be created for each
|
||||||
|
* block to be decompressed.
|
||||||
|
*
|
||||||
|
* This code requires that the GZIP header conform to the GZIP blocks written to BAM files, with
|
||||||
|
* a specific subfield and no other optional stuff.
|
||||||
|
*/
|
||||||
|
public class BlockGunzipper {
|
||||||
|
private static InflaterFactory defaultInflaterFactory = new InflaterFactory();
|
||||||
|
private final Inflater inflater;
|
||||||
|
private final CRC32 crc32 = new CRC32();
|
||||||
|
private boolean checkCrcs = false;
|
||||||
|
BlockGunzipper() {
|
||||||
|
inflater = defaultInflaterFactory.makeInflater(true); // GZIP mode
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a BlockGunzipper using the provided inflaterFactory
|
||||||
|
* @param inflaterFactory
|
||||||
|
*/
|
||||||
|
BlockGunzipper(InflaterFactory inflaterFactory) {
|
||||||
|
inflater = inflaterFactory.makeInflater(true); // GZIP mode
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the default {@link InflaterFactory} that will be used for all instances unless specified otherwise in the constructor.
|
||||||
|
* If this method is not called the default is a factory that will create the JDK {@link Inflater}.
|
||||||
|
* @param inflaterFactory non-null default factory.
|
||||||
|
*/
|
||||||
|
public static void setDefaultInflaterFactory(final InflaterFactory inflaterFactory) {
|
||||||
|
if (inflaterFactory == null) {
|
||||||
|
throw new IllegalArgumentException("null inflaterFactory");
|
||||||
|
}
|
||||||
|
defaultInflaterFactory = inflaterFactory;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static InflaterFactory getDefaultInflaterFactory() {
|
||||||
|
return defaultInflaterFactory;
|
||||||
|
}
|
||||||
|
/** Allows the caller to decide whether or not to check CRCs on when uncompressing blocks. */
|
||||||
|
public void setCheckCrcs(final boolean check) {
|
||||||
|
this.checkCrcs = check;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decompress GZIP-compressed data
|
||||||
|
* @param uncompressedBlock must be big enough to hold decompressed output.
|
||||||
|
* @param compressedBlock compressed data starting at offset 0
|
||||||
|
* @param compressedLength size of compressed data, possibly less than the size of the buffer.
|
||||||
|
*/
|
||||||
|
void unzipBlock(byte[] uncompressedBlock, byte[] compressedBlock, int compressedLength) {
|
||||||
|
try {
|
||||||
|
ByteBuffer byteBuffer = ByteBuffer.wrap(compressedBlock, 0, compressedLength);
|
||||||
|
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
|
||||||
|
|
||||||
|
// Validate GZIP header
|
||||||
|
if (byteBuffer.get() != BGZFStreamConstants.GZIP_ID1 ||
|
||||||
|
byteBuffer.get() != (byte)BGZFStreamConstants.GZIP_ID2 ||
|
||||||
|
byteBuffer.get() != BGZFStreamConstants.GZIP_CM_DEFLATE ||
|
||||||
|
byteBuffer.get() != BGZFStreamConstants.GZIP_FLG
|
||||||
|
) {
|
||||||
|
throw new BGZFFormatException("Invalid GZIP header");
|
||||||
|
}
|
||||||
|
// Skip MTIME, XFL, OS fields
|
||||||
|
byteBuffer.position(byteBuffer.position() + 6);
|
||||||
|
if (byteBuffer.getShort() != BGZFStreamConstants.GZIP_XLEN) {
|
||||||
|
throw new BGZFFormatException("Invalid GZIP header");
|
||||||
|
}
|
||||||
|
// Skip blocksize subfield intro
|
||||||
|
byteBuffer.position(byteBuffer.position() + 4);
|
||||||
|
// Read ushort
|
||||||
|
final int totalBlockSize = (byteBuffer.getShort() & 0xffff) + 1;
|
||||||
|
if (totalBlockSize != compressedLength) {
|
||||||
|
throw new BGZFFormatException("GZIP blocksize disagreement");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read expected size and CRD from end of GZIP block
|
||||||
|
final int deflatedSize = compressedLength - BGZFStreamConstants.BLOCK_HEADER_LENGTH - BGZFStreamConstants.BLOCK_FOOTER_LENGTH;
|
||||||
|
byteBuffer.position(byteBuffer.position() + deflatedSize);
|
||||||
|
int expectedCrc = byteBuffer.getInt();
|
||||||
|
int uncompressedSize = byteBuffer.getInt();
|
||||||
|
inflater.reset();
|
||||||
|
|
||||||
|
// Decompress
|
||||||
|
inflater.setInput(compressedBlock, BGZFStreamConstants.BLOCK_HEADER_LENGTH, deflatedSize);
|
||||||
|
final int inflatedBytes = inflater.inflate(uncompressedBlock, 0, uncompressedSize);
|
||||||
|
if (inflatedBytes != uncompressedSize) {
|
||||||
|
throw new BGZFFormatException("Did not inflate expected amount");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate CRC if so desired
|
||||||
|
if (this.checkCrcs) {
|
||||||
|
crc32.reset();
|
||||||
|
crc32.update(uncompressedBlock, 0, uncompressedSize);
|
||||||
|
final long crc = crc32.getValue();
|
||||||
|
if ((int)crc != expectedCrc) {
|
||||||
|
throw new BGZFFormatException("CRC mismatch");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (DataFormatException e) {
|
||||||
|
throw new BGZFException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,24 @@
|
||||||
|
package org.xbib.io.compress.bgzf;
|
||||||
|
|
||||||
|
import java.util.zip.Deflater;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory for {@link Deflater} objects used by {@link BlockCompressedOutputStream}.
|
||||||
|
* This class may be extended to provide alternative deflaters (e.g., for improved performance).
|
||||||
|
*/
|
||||||
|
public class DeflaterFactory {
|
||||||
|
|
||||||
|
public DeflaterFactory() {
|
||||||
|
//Note: made explicit constructor to make searching for references easier
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a deflater object that will be used when writing BAM files.
|
||||||
|
* Subclasses may override to provide their own deflater implementation.
|
||||||
|
* @param compressionLevel the compression level (0-9)
|
||||||
|
* @param gzipCompatible if true then use GZIP compatible compression
|
||||||
|
*/
|
||||||
|
public Deflater makeDeflater(final int compressionLevel, final boolean gzipCompatible) {
|
||||||
|
return new Deflater(compressionLevel, gzipCompatible);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,20 @@
|
||||||
|
package org.xbib.io.compress.bgzf;
|
||||||
|
|
||||||
|
import java.util.zip.Inflater;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory for {@link Inflater} objects used by {@link BlockGunzipper}.
|
||||||
|
* This class may be extended to provide alternative inflaters (e.g., for improved performance).
|
||||||
|
* The default implementation returns a JDK {@link Inflater}
|
||||||
|
*/
|
||||||
|
public class InflaterFactory {
|
||||||
|
/**
|
||||||
|
* Returns an inflater object that will be used when reading DEFLATE compressed files.
|
||||||
|
* Subclasses may override to provide their own inflater implementation.
|
||||||
|
* The default implementation returns a JDK {@link Inflater}
|
||||||
|
* @param gzipCompatible if true then use GZIP compatible compression
|
||||||
|
*/
|
||||||
|
public Inflater makeInflater(final boolean gzipCompatible) {
|
||||||
|
return new Inflater(gzipCompatible);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,105 @@
|
||||||
|
package org.xbib.io.compress.bgzf;
|
||||||
|
|
||||||
|
import java.io.BufferedInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A wrapper class to provide buffered read access to a SeekableStream. Just wrapping such a stream with
|
||||||
|
* a BufferedInputStream will not work as it does not support seeking. In this implementation a
|
||||||
|
* seek call is delegated to the wrapped stream, and the buffer reset.
|
||||||
|
*/
|
||||||
|
public class SeekableBufferedStream extends SeekableStream {
|
||||||
|
|
||||||
|
/** Little extension to buffered input stream to give access to the available bytes in the buffer. */
|
||||||
|
private static class ExtBufferedInputStream extends BufferedInputStream {
|
||||||
|
private ExtBufferedInputStream(final InputStream inputStream, final int i) {
|
||||||
|
super(inputStream, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the number of bytes that can be read from the buffer without reading more into the buffer. */
|
||||||
|
int getBytesInBufferAvailable() {
|
||||||
|
if (this.count == this.pos) return 0; // documented test for "is buffer empty"
|
||||||
|
else return this.buf.length - this.pos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final int DEFAULT_BUFFER_SIZE = 512000;
|
||||||
|
|
||||||
|
final private int bufferSize;
|
||||||
|
final SeekableStream wrappedStream;
|
||||||
|
ExtBufferedInputStream bufferedStream;
|
||||||
|
long position;
|
||||||
|
|
||||||
|
public SeekableBufferedStream(final SeekableStream stream, final int bufferSize) {
|
||||||
|
this.bufferSize = bufferSize;
|
||||||
|
this.wrappedStream = stream;
|
||||||
|
this.position = 0;
|
||||||
|
bufferedStream = new ExtBufferedInputStream(wrappedStream, bufferSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
public SeekableBufferedStream(final SeekableStream stream) {
|
||||||
|
this(stream, DEFAULT_BUFFER_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long length() {
|
||||||
|
return wrappedStream.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long skip(final long skipLength) throws IOException {
|
||||||
|
if (skipLength < this.bufferedStream.getBytesInBufferAvailable()) {
|
||||||
|
final long retval = this.bufferedStream.skip(skipLength);
|
||||||
|
this.position += retval;
|
||||||
|
return retval;
|
||||||
|
} else {
|
||||||
|
final long position = this.position + skipLength;
|
||||||
|
seek(position);
|
||||||
|
return skipLength;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void seek(final long position) throws IOException {
|
||||||
|
this.position = position;
|
||||||
|
wrappedStream.seek(position);
|
||||||
|
bufferedStream = new ExtBufferedInputStream(wrappedStream, bufferSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int read() throws IOException {
|
||||||
|
int b = bufferedStream.read();
|
||||||
|
position++;
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int read(final byte[] buffer, final int offset, final int length) throws IOException {
|
||||||
|
final int nBytesRead = bufferedStream.read(buffer, offset, length);
|
||||||
|
if (nBytesRead > 0) {
|
||||||
|
position += nBytesRead;
|
||||||
|
}
|
||||||
|
return nBytesRead;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
wrappedStream.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean eof() throws IOException {
|
||||||
|
return position >= wrappedStream.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getSource() {
|
||||||
|
return wrappedStream.getSource();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long position() throws IOException {
|
||||||
|
return position;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,110 @@
|
||||||
|
package org.xbib.io.compress.bgzf;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.RandomAccessFile;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashSet;
|
||||||
|
|
||||||
|
public class SeekableFileStream extends SeekableStream {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Collection of all open instances. SeekableFileStream objects are usually open and kept open for the
|
||||||
|
* duration of a session. This collection supports a method to close them all.
|
||||||
|
*/
|
||||||
|
private static final Collection<SeekableFileStream> allInstances = Collections.synchronizedCollection(new HashSet<>());
|
||||||
|
|
||||||
|
File file;
|
||||||
|
RandomAccessFile fis;
|
||||||
|
|
||||||
|
public SeekableFileStream(final File file) throws FileNotFoundException {
|
||||||
|
this.file = file;
|
||||||
|
fis = new RandomAccessFile(file, "r");
|
||||||
|
allInstances.add(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long length() {
|
||||||
|
return file.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean eof() throws IOException {
|
||||||
|
return fis.length() == fis.getFilePointer();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void seek(final long position) throws IOException {
|
||||||
|
fis.seek(position);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long position() throws IOException {
|
||||||
|
return fis.getChannel().position();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long skip(long n) throws IOException {
|
||||||
|
long initPos = position();
|
||||||
|
fis.getChannel().position(initPos + n);
|
||||||
|
return position() - initPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int read(final byte[] buffer, final int offset, final int length) throws IOException {
|
||||||
|
if (length < 0) {
|
||||||
|
throw new IndexOutOfBoundsException();
|
||||||
|
}
|
||||||
|
int n = 0;
|
||||||
|
while (n < length) {
|
||||||
|
final int count = fis.read(buffer, offset + n, length - n);
|
||||||
|
if (count < 0) {
|
||||||
|
if (n > 0) {
|
||||||
|
return n;
|
||||||
|
} else {
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
n += count;
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int read() throws IOException {
|
||||||
|
return fis.read();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int read(byte[] b) throws IOException {
|
||||||
|
return fis.read(b);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getSource() {
|
||||||
|
return file.getAbsolutePath();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
allInstances.remove(this);
|
||||||
|
fis.close();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static synchronized void closeAllInstances() {
|
||||||
|
Collection<SeekableFileStream> clonedInstances = new HashSet<>(allInstances);
|
||||||
|
for (SeekableFileStream sfs : clonedInstances) {
|
||||||
|
try {
|
||||||
|
sfs.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
//
|
||||||
|
}
|
||||||
|
}
|
||||||
|
allInstances.clear();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,44 @@
|
||||||
|
package org.xbib.io.compress.bgzf;
|
||||||
|
|
||||||
|
import java.io.EOFException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
public abstract class SeekableStream extends InputStream {
|
||||||
|
|
||||||
|
public abstract long length();
|
||||||
|
|
||||||
|
public abstract long position() throws IOException;
|
||||||
|
|
||||||
|
public abstract void seek(long position) throws IOException;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public abstract int read(byte[] buffer, int offset, int length) throws IOException;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public abstract void close() throws IOException;
|
||||||
|
|
||||||
|
public abstract boolean eof() throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return String representation of source (e.g. URL, file path, etc.), or null if not available.
|
||||||
|
*/
|
||||||
|
public abstract String getSource();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read enough bytes to fill the input buffer.
|
||||||
|
* @param b byte array
|
||||||
|
* @throws EOFException If EOF is reached before buffer is filled
|
||||||
|
*/
|
||||||
|
public void readFully(byte[] b) throws IOException {
|
||||||
|
int len = b.length;
|
||||||
|
int n = 0;
|
||||||
|
while (n < len) {
|
||||||
|
int count = read(b, n, len - n);
|
||||||
|
if (count < 0){
|
||||||
|
throw new EOFException();
|
||||||
|
}
|
||||||
|
n += count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,11 @@
|
||||||
|
package org.xbib.io.compress.bgzf;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public class SeekableStreamFactory {
|
||||||
|
|
||||||
|
public static SeekableStream getStreamFor(String path) throws IOException {
|
||||||
|
return new SeekableFileStream(new File(path));
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,3 +1,4 @@
|
||||||
|
include 'io-compress-bgzf'
|
||||||
include 'io-compress-bzip2'
|
include 'io-compress-bzip2'
|
||||||
include 'io-compress-lzf'
|
include 'io-compress-lzf'
|
||||||
include 'io-compress-xz'
|
include 'io-compress-xz'
|
||||||
|
|
Loading…
Reference in a new issue