Compare commits

..

10 commits

71 changed files with 3032 additions and 361 deletions

View file

@ -1,10 +1,12 @@
plugins {
id "de.marcphilipp.nexus-publish" version "0.4.0"
id "io.codearte.nexus-staging" version "0.21.1"
id 'maven-publish'
id 'signing'
id "io.github.gradle-nexus.publish-plugin" version "2.0.0-rc-1"
}
wrapper {
gradleVersion = "${project.property('gradle.wrapper.version')}"
gradleVersion = libs.versions.gradle.get()
distributionType = Wrapper.DistributionType.ALL
}
@ -25,9 +27,9 @@ ext {
subprojects {
apply plugin: 'java-library'
apply from: rootProject.file('gradle/ide/idea.gradle')
apply from: rootProject.file('gradle/compile/java.gradle')
apply from: rootProject.file('gradle/test/junit5.gradle')
apply from: rootProject.file('gradle/publishing/publication.gradle')
apply from: rootProject.file('gradle/publish/maven.gradle')
}
apply from: rootProject.file('gradle/publishing/sonatype.gradle')
apply from: rootProject.file('gradle/publish/sonatype.gradle')
apply from: rootProject.file('gradle/publish/forgejo.gradle')

View file

@ -1,5 +1,3 @@
group = org.xbib
name = archive
version = 1.0.0
gradle.wrapper.version = 6.4.1
version = 4.0.0

View file

@ -2,17 +2,12 @@
apply plugin: 'java-library'
java {
toolchain {
languageVersion = JavaLanguageVersion.of(21)
}
modularity.inferModulePath.set(true)
}
compileJava {
sourceCompatibility = JavaVersion.VERSION_11
targetCompatibility = JavaVersion.VERSION_11
}
compileTestJava {
sourceCompatibility = JavaVersion.VERSION_11
targetCompatibility = JavaVersion.VERSION_11
withSourcesJar()
withJavadocJar()
}
jar {
@ -21,15 +16,14 @@ jar {
}
}
task sourcesJar(type: Jar, dependsOn: classes) {
classifier 'sources'
from sourceSets.main.allSource
tasks.withType(JavaCompile) {
options.fork = true
options.forkOptions.jvmArgs += ['-Duser.language=en','-Duser.country=US']
options.compilerArgs << '-Xlint:all,-fallthrough'
options.encoding = 'UTF-8'
}
task javadocJar(type: Jar, dependsOn: javadoc) {
classifier 'javadoc'
javadoc {
options.addStringOption('Xdoclint:none', '-quiet')
options.encoding = 'UTF-8'
}
artifacts {
archives sourcesJar, javadocJar
}

View file

@ -0,0 +1,16 @@
if (project.hasProperty('forgeJoToken')) {
publishing {
repositories {
maven {
url 'https://xbib.org/api/packages/joerg/maven'
credentials(HttpHeaderCredentials) {
name = "Authorization"
value = "token ${project.property('forgeJoToken')}"
}
authentication {
header(HttpHeaderAuthentication)
}
}
}
}
}

27
gradle/publish/ivy.gradle Normal file
View file

@ -0,0 +1,27 @@
apply plugin: 'ivy-publish'
publishing {
repositories {
ivy {
url = "https://xbib.org/repo"
}
}
publications {
ivy(IvyPublication) {
from components.java
descriptor {
license {
name = 'The Apache License, Version 2.0'
url = 'http://www.apache.org/licenses/LICENSE-2.0.txt'
}
author {
name = 'Jörg Prante'
url = 'http://example.com/users/jane'
}
descriptor.description {
text = rootProject.ext.description
}
}
}
}
}

View file

@ -1,13 +1,10 @@
apply plugin: "de.marcphilipp.nexus-publish"
publishing {
publications {
mavenJava(MavenPublication) {
"${project.name}"(MavenPublication) {
from components.java
artifact sourcesJar
artifact javadocJar
pom {
artifactId = project.name
name = project.name
description = rootProject.ext.description
url = rootProject.ext.url
@ -49,16 +46,6 @@ publishing {
if (project.hasProperty("signing.keyId")) {
apply plugin: 'signing'
signing {
sign publishing.publications.mavenJava
}
}
nexusPublishing {
repositories {
sonatype {
username = project.property('ossrhUsername')
password = project.property('ossrhPassword')
packageGroup = "org.xbib"
}
sign publishing.publications."${project.name}"
}
}

View file

@ -0,0 +1,11 @@
if (project.hasProperty('ossrhUsername') && project.hasProperty('ossrhPassword')) {
nexusPublishing {
repositories {
sonatype {
username = project.property('ossrhUsername')
password = project.property('ossrhPassword')
packageGroup = "org.xbib"
}
}
}
}

View file

@ -1,11 +0,0 @@
if (project.hasProperty('ossrhUsername') && project.hasProperty('ossrhPassword')) {
apply plugin: 'io.codearte.nexus-staging'
nexusStaging {
username = project.property('ossrhUsername')
password = project.property('ossrhPassword')
packageGroup = "org.xbib"
}
}

View file

@ -1,12 +1,10 @@
def junitVersion = project.hasProperty('junit.version')?project.property('junit.version'):'5.6.2'
def hamcrestVersion = project.hasProperty('hamcrest.version')?project.property('hamcrest.version'):'2.2'
dependencies {
testImplementation "org.junit.jupiter:junit-jupiter-api:${junitVersion}"
testImplementation "org.junit.jupiter:junit-jupiter-params:${junitVersion}"
testImplementation "org.hamcrest:hamcrest-library:${hamcrestVersion}"
testRuntimeOnly "org.junit.jupiter:junit-jupiter-engine:${junitVersion}"
testImplementation testLibs.junit.jupiter.api
testImplementation testLibs.junit.jupiter.params
testImplementation testLibs.hamcrest
testRuntimeOnly testLibs.junit.jupiter.engine
testRuntimeOnly testLibs.junit.jupiter.platform.launcher
}
test {

Binary file not shown.

View file

@ -1,5 +1,7 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-6.4.1-all.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-8.5-all.zip
networkTimeout=10000
validateDistributionUrl=true
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists

294
gradlew vendored
View file

@ -1,7 +1,7 @@
#!/usr/bin/env sh
#!/bin/sh
#
# Copyright 2015 the original author or authors.
# Copyright © 2015-2021 the original authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -17,67 +17,99 @@
#
##############################################################################
##
## Gradle start up script for UN*X
##
#
# Gradle start up script for POSIX generated by Gradle.
#
# Important for running:
#
# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
# noncompliant, but you have some other compliant shell such as ksh or
# bash, then to run this script, type that shell name before the whole
# command line, like:
#
# ksh Gradle
#
# Busybox and similar reduced shells will NOT work, because this script
# requires all of these POSIX shell features:
# * functions;
# * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
# «${var#prefix}», «${var%suffix}», and «$( cmd )»;
# * compound commands having a testable exit status, especially «case»;
# * various built-in commands including «command», «set», and «ulimit».
#
# Important for patching:
#
# (2) This script targets any POSIX shell, so it avoids extensions provided
# by Bash, Ksh, etc; in particular arrays are avoided.
#
# The "traditional" practice of packing multiple parameters into a
# space-separated string is a well documented source of bugs and security
# problems, so this is (mostly) avoided, by progressively accumulating
# options in "$@", and eventually passing that to Java.
#
# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
# see the in-line comments for details.
#
# There are tweaks for specific operating systems such as AIX, CygWin,
# Darwin, MinGW, and NonStop.
#
# (3) This script is generated from the Groovy template
# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
# within the Gradle project.
#
# You can find Gradle at https://github.com/gradle/gradle/.
#
##############################################################################
# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
ls=`ls -ld "$PRG"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG=`dirname "$PRG"`"/$link"
fi
app_path=$0
# Need this for daisy-chained symlinks.
while
APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
[ -h "$app_path" ]
do
ls=$( ls -ld "$app_path" )
link=${ls#*' -> '}
case $link in #(
/*) app_path=$link ;; #(
*) app_path=$APP_HOME$link ;;
esac
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null
APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
# This is normally unused
# shellcheck disable=SC2034
APP_BASE_NAME=${0##*/}
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"
MAX_FD=maximum
warn () {
echo "$*"
}
} >&2
die () {
echo
echo "$*"
echo
exit 1
}
} >&2
# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
CYGWIN* )
cygwin=true
;;
Darwin* )
darwin=true
;;
MINGW* )
msys=true
;;
NONSTOP* )
nonstop=true
;;
case "$( uname )" in #(
CYGWIN* ) cygwin=true ;; #(
Darwin* ) darwin=true ;; #(
MSYS* | MINGW* ) msys=true ;; #(
NONSTOP* ) nonstop=true ;;
esac
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
@ -87,9 +119,9 @@ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD="$JAVA_HOME/jre/sh/java"
JAVACMD=$JAVA_HOME/jre/sh/java
else
JAVACMD="$JAVA_HOME/bin/java"
JAVACMD=$JAVA_HOME/bin/java
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
@ -98,88 +130,120 @@ Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD="java"
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
JAVACMD=java
if ! command -v java >/dev/null 2>&1
then
die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
fi
# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
MAX_FD_LIMIT=`ulimit -H -n`
if [ $? -eq 0 ] ; then
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
MAX_FD="$MAX_FD_LIMIT"
fi
ulimit -n $MAX_FD
if [ $? -ne 0 ] ; then
warn "Could not set maximum file descriptor limit: $MAX_FD"
fi
else
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
fi
fi
# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi
# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
JAVACMD=`cygpath --unix "$JAVACMD"`
# We build the pattern for arguments to be converted via cygpath
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
SEP=""
for dir in $ROOTDIRSRAW ; do
ROOTDIRS="$ROOTDIRS$SEP$dir"
SEP="|"
done
OURCYGPATTERN="(^($ROOTDIRS))"
# Add a user-defined pattern to the cygpath arguments
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
fi
# Now convert the arguments - kludge to limit ourselves to /bin/sh
i=0
for arg in "$@" ; do
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
else
eval `echo args$i`="\"$arg\""
fi
i=`expr $i + 1`
done
case $i in
0) set -- ;;
1) set -- "$args0" ;;
2) set -- "$args0" "$args1" ;;
3) set -- "$args0" "$args1" "$args2" ;;
4) set -- "$args0" "$args1" "$args2" "$args3" ;;
5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
case $MAX_FD in #(
max*)
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
# shellcheck disable=SC2039,SC3045
MAX_FD=$( ulimit -H -n ) ||
warn "Could not query maximum file descriptor limit"
esac
case $MAX_FD in #(
'' | soft) :;; #(
*)
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
# shellcheck disable=SC2039,SC3045
ulimit -n "$MAX_FD" ||
warn "Could not set maximum file descriptor limit to $MAX_FD"
esac
fi
# Escape application args
save () {
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
echo " "
}
APP_ARGS=`save "$@"`
# Collect all arguments for the java command, stacking in reverse order:
# * args from the command line
# * the main class name
# * -classpath
# * -D...appname settings
# * --module-path (only if needed)
# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
# For Cygwin or MSYS, switch paths to Windows format before running java
if "$cygwin" || "$msys" ; then
APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
JAVACMD=$( cygpath --unix "$JAVACMD" )
# Now convert the arguments - kludge to limit ourselves to /bin/sh
for arg do
if
case $arg in #(
-*) false ;; # don't mess with options #(
/?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
[ -e "$t" ] ;; #(
*) false ;;
esac
then
arg=$( cygpath --path --ignore --mixed "$arg" )
fi
# Roll the args list around exactly as many times as the number of
# args, so each arg winds up back in the position where it started, but
# possibly modified.
#
# NB: a `for` loop captures its iteration list before it begins, so
# changing the positional parameters here affects neither the number of
# iterations, nor the values presented in `arg`.
shift # remove old arg
set -- "$@" "$arg" # push replacement arg
done
fi
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
# Collect all arguments for the java command:
# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
# and any embedded shellness will be escaped.
# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
# treated as '${Hostname}' itself on the command line.
set -- \
"-Dorg.gradle.appname=$APP_BASE_NAME" \
-classpath "$CLASSPATH" \
org.gradle.wrapper.GradleWrapperMain \
"$@"
# Stop when "xargs" is not available.
if ! command -v xargs >/dev/null 2>&1
then
die "xargs is not available"
fi
# Use "xargs" to parse quoted args.
#
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
#
# In Bash we could simply go:
#
# readarray ARGS < <( xargs -n1 <<<"$var" ) &&
# set -- "${ARGS[@]}" "$@"
#
# but POSIX shell has neither arrays nor command substitution, so instead we
# post-process each arg (as a line of input to sed) to backslash-escape any
# character that might be a shell metacharacter, then use eval to reverse
# that process (while maintaining the separation between arguments), and wrap
# the whole thing up as a single "set" statement.
#
# This will of course break if any of these variables contains a newline or
# an unmatched quote.
#
eval "set -- $(
printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
xargs -n1 |
sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
tr '\n' ' '
)" '"$@"'
exec "$JAVACMD" "$@"

34
gradlew.bat vendored
View file

@ -14,7 +14,7 @@
@rem limitations under the License.
@rem
@if "%DEBUG%" == "" @echo off
@if "%DEBUG%"=="" @echo off
@rem ##########################################################################
@rem
@rem Gradle startup script for Windows
@ -25,7 +25,8 @@
if "%OS%"=="Windows_NT" setlocal
set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
if "%DIRNAME%"=="" set DIRNAME=.
@rem This is normally unused
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%
@ -40,7 +41,7 @@ if defined JAVA_HOME goto findJavaFromJavaHome
set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto init
if %ERRORLEVEL% equ 0 goto execute
echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
@ -54,7 +55,7 @@ goto fail
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
if exist "%JAVA_EXE%" goto init
if exist "%JAVA_EXE%" goto execute
echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
@ -64,21 +65,6 @@ echo location of your Java installation.
goto fail
:init
@rem Get command-line arguments, handling Windows variants
if not "%OS%" == "Windows_NT" goto win9xME_args
:win9xME_args
@rem Slurp the command line arguments.
set CMD_LINE_ARGS=
set _SKIP=2
:win9xME_args_slurp
if "x%~1" == "x" goto execute
set CMD_LINE_ARGS=%*
:execute
@rem Setup the command line
@ -86,17 +72,19 @@ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd
if %ERRORLEVEL% equ 0 goto mainEnd
:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1
set EXIT_CODE=%ERRORLEVEL%
if %EXIT_CODE% equ 0 set EXIT_CODE=1
if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
exit /b %EXIT_CODE%
:mainEnd
if "%OS%"=="Windows_NT" endlocal

View file

@ -1,4 +1,4 @@
module org.xbib.io.archive.ar {
exports org.xbib.io.archive.ar;
requires org.xbib.io.archive;
requires transitive org.xbib.io.archive;
}

View file

@ -24,7 +24,6 @@ import java.util.Date;
* can read but not write the GNU variant and doesn't support
* the BSD variant at all.
*
* <a href="http://www.freebsd.org/cgi/man.cgi?query=ar&sektion=5">ar man page</a>
*/
public class ArArchiveEntry implements ArchiveEntry {

View file

@ -1,4 +1,4 @@
module org.xbib.io.archive.cpio {
exports org.xbib.io.archive.cpio;
requires org.xbib.io.archive;
requires transitive org.xbib.io.archive;
}

View file

@ -1,6 +1,5 @@
package org.xbib.io.archive.cpio;
import org.xbib.io.archive.entry.ArchiveEntry;
import org.xbib.io.archive.stream.ArchiveInputStream;
import org.xbib.io.archive.util.ArchiveUtils;
@ -33,7 +32,7 @@ import java.io.InputStream;
* Note: This implementation should be compatible to cpio 2.5
*/
public class CpioArchiveInputStream extends ArchiveInputStream implements CpioConstants {
public class CpioArchiveInputStream extends ArchiveInputStream<CpioArchiveEntry> implements CpioConstants {
private boolean closed = false;
@ -380,7 +379,7 @@ public class CpioArchiveInputStream extends ArchiveInputStream implements CpioCo
}
@Override
public ArchiveEntry getNextEntry() throws IOException {
public CpioArchiveEntry getNextEntry() throws IOException {
return getNextCPIOEntry();
}

View file

@ -1,4 +1,4 @@
module org.xbib.io.archive.dump {
exports org.xbib.io.archive.dump;
requires org.xbib.io.archive;
requires transitive org.xbib.io.archive;
}

View file

@ -2,10 +2,10 @@ package org.xbib.io.archive.dump;
import java.io.IOException;
/**
* Dump Archive Exception
*/
@SuppressWarnings("serial")
public class DumpArchiveException extends IOException {
public DumpArchiveException(String msg) {

View file

@ -19,9 +19,9 @@ import java.util.Stack;
* the archive, and the read each entry as a normal input stream
* using read().
*/
public class DumpArchiveInputStream extends ArchiveInputStream {
public class DumpArchiveInputStream extends ArchiveInputStream<DumpArchiveEntry> {
private DumpArchiveSummary summary;
private final DumpArchiveSummary summary;
private DumpArchiveEntry active;
@ -35,7 +35,7 @@ public class DumpArchiveInputStream extends ArchiveInputStream {
private int readIdx;
private byte[] readBuf = new byte[DumpArchiveConstants.TP_SIZE];
private final byte[] readBuf = new byte[DumpArchiveConstants.TP_SIZE];
private byte[] blockBuffer;
@ -46,13 +46,13 @@ public class DumpArchiveInputStream extends ArchiveInputStream {
protected TapeInputStream raw;
// map of ino -> dirent entry. We can use this to reconstruct full paths.
private Map<Integer, Dirent> names = new HashMap<Integer, Dirent>();
private final Map<Integer, Dirent> names = new HashMap<Integer, Dirent>();
// map of ino -> (directory) entry when we're missing one or more elements in the path.
private Map<Integer, DumpArchiveEntry> pending = new HashMap<Integer, DumpArchiveEntry>();
private final Map<Integer, DumpArchiveEntry> pending = new HashMap<Integer, DumpArchiveEntry>();
// queue of (directory) entries where we now have the full path.
private Queue<DumpArchiveEntry> queue;
private final Queue<DumpArchiveEntry> queue;
/**
* Constructor.

View file

@ -1,11 +1,10 @@
package org.xbib.io.archive.dump;
/**
* Invalid Format Exception. There was an error decoding a
* tape segment header.
*/
@SuppressWarnings("serial")
public class InvalidFormatException extends DumpArchiveException {
protected long offset;

View file

@ -8,18 +8,24 @@ import java.util.Arrays;
import java.util.zip.DataFormatException;
import java.util.zip.Inflater;
/**
* Filter stream that mimics a physical tape drive capable of compressing
* the data stream
*/
class TapeInputStream extends FilterInputStream {
public class TapeInputStream extends FilterInputStream {
private byte[] blockBuffer = new byte[DumpArchiveConstants.TP_SIZE];
private int currBlkIdx = -1;
private int blockSize = DumpArchiveConstants.TP_SIZE;
private int recordSize = DumpArchiveConstants.TP_SIZE;
private int readOffset = DumpArchiveConstants.TP_SIZE;
private boolean isCompressed = false;
private long bytesRead = 0;
/**

View file

@ -1,5 +1,4 @@
module org.xbib.io.archive.jar {
exports org.xbib.io.archive.jar;
requires org.xbib.io.archive;
requires org.xbib.io.archive.zip;
requires transitive org.xbib.io.archive.zip;
}

View file

@ -1,7 +1,6 @@
package org.xbib.io.archive.jar;
import org.xbib.io.archive.entry.ArchiveEntry;
import org.xbib.io.archive.zip.ZipArchiveEntry;
import org.xbib.io.archive.zip.ZipArchiveInputStream;
@ -11,7 +10,7 @@ import java.io.InputStream;
/**
* Implements an input stream that can read entries from jar files.
*/
public class JarArchiveInputStream extends ZipArchiveInputStream {
public class JarArchiveInputStream extends ZipArchiveInputStream<JarArchiveEntry> {
public JarArchiveInputStream(final InputStream inputStream) {
super(inputStream);
@ -23,7 +22,7 @@ public class JarArchiveInputStream extends ZipArchiveInputStream {
}
@Override
public ArchiveEntry getNextEntry() throws IOException {
public JarArchiveEntry getNextEntry() throws IOException {
return getNextJarEntry();
}

View file

@ -1,4 +1,4 @@
module org.xbib.io.archive.tar {
exports org.xbib.io.archive.tar;
requires org.xbib.io.archive;
requires transitive org.xbib.io.archive;
}

View file

@ -546,6 +546,207 @@ public class TarArchiveEntry implements TarConstants, ArchiveEntry {
parseTarHeader(header, encoding, false);
}
/**
* Write an entry's header information to a header buffer.
*
* @param outbuf The tar entry header buffer to fill in.
* @param encoding encoding to use when writing the file name.
* @param starMode whether to use the star/GNU tar/BSD tar
* extension for numeric fields if their value doesn't fit in the
* maximum size of standard tar archives
*/
public void writeEntryHeader(byte[] outbuf, ArchiveEntryEncoding encoding, boolean starMode) throws IOException {
int offset = 0;
offset = ArchiveUtils.formatNameBytes(name, outbuf, offset, NAMELEN, encoding);
offset = writeEntryHeaderField(mode, outbuf, offset, MODELEN, starMode);
offset = writeEntryHeaderField(userId, outbuf, offset, UIDLEN, starMode);
offset = writeEntryHeaderField(groupId, outbuf, offset, GIDLEN, starMode);
offset = writeEntryHeaderField(size, outbuf, offset, SIZELEN, starMode);
offset = writeEntryHeaderField(modTime, outbuf, offset, MODTIMELEN, starMode);
int csOffset = offset;
for (int c = 0; c < CHKSUMLEN; ++c) {
outbuf[offset++] = (byte) ' ';
}
outbuf[offset++] = linkFlag;
offset = ArchiveUtils.formatNameBytes(linkName, outbuf, offset, NAMELEN, encoding);
offset = ArchiveUtils.formatNameBytes(MAGIC_POSIX, outbuf, offset, MAGICLEN);
offset = ArchiveUtils.formatNameBytes(version, outbuf, offset, VERSIONLEN);
offset = ArchiveUtils.formatNameBytes(userName, outbuf, offset, UNAMELEN, encoding);
offset = ArchiveUtils.formatNameBytes(groupName, outbuf, offset, GNAMELEN, encoding);
offset = writeEntryHeaderField(devMajor, outbuf, offset, DEVLEN, starMode);
offset = writeEntryHeaderField(devMinor, outbuf, offset, DEVLEN, starMode);
while (offset < outbuf.length) {
outbuf[offset++] = 0;
}
long chk = computeCheckSum(outbuf);
formatCheckSumOctalBytes(chk, outbuf, csOffset, CHKSUMLEN);
}
private int writeEntryHeaderField(long value, byte[] outbuf, int offset, int length, boolean starMode) {
if (!starMode && (value < 0
|| value >= (1l << (3 * (length - 1))))) {
// value doesn't fit into field when written as octal
// number, will be written to PAX header or causes an
// error
return formatLongOctalBytes(0, outbuf, offset, length);
}
return formatLongOctalOrBinaryBytes(value, outbuf, offset, length);
}
/**
* Write an long integer into a buffer as an octal string if this
* will fit, or as a binary number otherwise.
* <p/>
* Uses {@link #formatUnsignedOctalString} to format
* the value as an octal string with leading zeros.
* The converted number is followed by a space.
*
* @param value The value to write into the buffer.
* @param buf The destination buffer.
* @param offset The starting offset into the buffer.
* @param length The length of the buffer.
* @return The updated offset.
* @throws IllegalArgumentException if the value (and trailer)
* will not fit in the buffer.
*/
private int formatLongOctalOrBinaryBytes(final long value, byte[] buf, final int offset, final int length) {
// Check whether we are dealing with UID/GID or SIZE field
final long maxAsOctalChar = length == UIDLEN ? MAXID : MAXSIZE;
final boolean negative = value < 0;
if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars
return formatLongOctalBytes(value, buf, offset, length);
}
if (length < 9) {
formatLongBinary(value, buf, offset, length, negative);
}
formatBigIntegerBinary(value, buf, offset, length, negative);
buf[offset] = (byte) (negative ? 0xff : 0x80);
return offset + length;
}
private void formatLongBinary(final long value, byte[] buf, final int offset, final int length, final boolean negative) {
final int bits = (length - 1) * 8;
final long max = 1l << bits;
long val = Math.abs(value);
if (val >= max) {
throw new IllegalArgumentException("Value " + value +
" is too large for " + length + " byte field.");
}
if (negative) {
val ^= max - 1;
val |= 0xff << bits;
val++;
}
for (int i = offset + length - 1; i >= offset; i--) {
buf[i] = (byte) val;
val >>= 8;
}
}
private void formatBigIntegerBinary(final long value, byte[] buf,
final int offset,
final int length,
final boolean negative) {
BigInteger val = BigInteger.valueOf(value);
final byte[] b = val.toByteArray();
final int len = b.length;
final int off = offset + length - len;
System.arraycopy(b, 0, buf, off, len);
final byte fill = (byte) (negative ? 0xff : 0);
for (int i = offset + 1; i < off; i++) {
buf[i] = fill;
}
}
/**
* Writes an octal value into a buffer.
* <p/>
* Uses {@link #formatUnsignedOctalString} to format
* the value as an octal string with leading zeros.
* The converted number is followed by NUL and then space.
*
* @param value The value to convert
* @param buf The destination buffer
* @param offset The starting offset into the buffer.
* @param length The size of the buffer.
* @return The updated value of offset, i.e. offset+length
* @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
*/
private int formatCheckSumOctalBytes(final long value, byte[] buf, final int offset, final int length) {
int idx = length - 2;
formatUnsignedOctalString(value, buf, offset, idx);
buf[offset + idx++] = 0;
buf[offset + idx] = (byte) ' ';
return offset + length;
}
/**
* Write an octal long integer into a buffer.
* <p/>
* Uses {@link #formatUnsignedOctalString} to format
* the value as an octal string with leading zeros.
* The converted number is followed by a space.
*
* @param value The value to write as octal
* @param buf The destinationbuffer.
* @param offset The starting offset into the buffer.
* @param length The length of the buffer
* @return The updated offset
* @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
*/
private int formatLongOctalBytes(final long value, byte[] buf, final int offset, final int length) {
int idx = length - 1; // For space
formatUnsignedOctalString(value, buf, offset, idx);
buf[offset + idx] = (byte) ' '; // Trailing space
return offset + length;
}
/**
* Fill buffer with unsigned octal number, padded with leading zeroes.
*
* @param value number to convert to octal - treated as unsigned
* @param buffer destination buffer
* @param offset starting offset in buffer
* @param length length of buffer to fill
* @throws IllegalArgumentException if the value will not fit in the buffer
*/
private void formatUnsignedOctalString(final long value, byte[] buffer, final int offset, final int length) {
int remaining = length;
remaining--;
if (value == 0) {
buffer[offset + remaining--] = (byte) '0';
} else {
long val = value;
for (; remaining >= 0 && val != 0; --remaining) {
buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7));
val = val >>> 3;
}
if (val != 0) {
throw new IllegalArgumentException(value + "=" + Long.toOctalString(value) + " will not fit in octal number buffer of length " + length);
}
}
for (; remaining >= 0; --remaining) { // leading zeros
buffer[offset + remaining] = (byte) '0';
}
}
/**
* Compute the checksum of a tar entry header.
*
* @param buf The tar entry's header buffer.
* @return The computed checksum.
*/
private long computeCheckSum(final byte[] buf) {
long sum = 0;
for (byte aBuf : buf) {
sum += 255 & aBuf;
}
return sum;
}
private void parseTarHeader(byte[] header, ArchiveEntryEncoding encoding, final boolean oldStyle)
throws IOException {
int offset = 0;

View file

@ -10,7 +10,7 @@ import java.math.BigInteger;
import java.util.Date;
/**
* This class represents an entry in a Tar archive for output
* This class represents an entry in a Tar archive for output.
*/
public class TarArchiveOutputEntry implements TarConstants, ArchiveEntry {

View file

@ -7,7 +7,7 @@ import org.xbib.io.archive.entry.ArchiveEntryEncodingHelper;
import java.io.IOException;
import java.io.OutputStream;
import java.io.StringWriter;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
@ -15,7 +15,7 @@ import java.util.Map;
/**
* The TarOutputStream writes a UNIX tar archive as an output stream
*/
public class TarArchiveOutputStream extends ArchiveOutputStream<TarArchiveOutputEntry> implements TarConstants {
public class TarArchiveOutputStream extends ArchiveOutputStream<TarArchiveEntry> implements TarConstants {
private static final ArchiveEntryEncoding ASCII = ArchiveEntryEncodingHelper.getEncoding("ASCII");
@ -239,8 +239,8 @@ public class TarArchiveOutputStream extends ArchiveOutputStream<TarArchiveOutput
}
@Override
public TarArchiveOutputEntry newArchiveEntry() {
return new TarArchiveOutputEntry();
public TarArchiveEntry newArchiveEntry() {
return new TarArchiveEntry();
}
/**
@ -257,7 +257,7 @@ public class TarArchiveOutputStream extends ArchiveOutputStream<TarArchiveOutput
* @throws ClassCastException if archiveEntry is not an instance of TarArchiveEntry
*/
@Override
public void putArchiveEntry(TarArchiveOutputEntry archiveEntry) throws IOException {
public void putArchiveEntry(TarArchiveEntry archiveEntry) throws IOException {
if (finished) {
throw new IOException("Stream has already been finished");
}
@ -272,7 +272,7 @@ public class TarArchiveOutputStream extends ArchiveOutputStream<TarArchiveOutput
} else if (longFileMode == LONGFILE_GNU) {
// create a TarEntry for the LongLink, the contents
// of which are the entry's name
TarArchiveOutputEntry longLinkEntry = new TarArchiveOutputEntry(GNU_LONGLINK, LF_GNUTYPE_LONGNAME);
TarArchiveEntry longLinkEntry = new TarArchiveEntry(GNU_LONGLINK, LF_GNUTYPE_LONGNAME);
longLinkEntry.setEntrySize(nameBytes.length + 1); // +1 for NUL
putArchiveEntry(longLinkEntry);
write(nameBytes);
@ -440,7 +440,7 @@ public class TarArchiveOutputStream extends ArchiveOutputStream<TarArchiveOutput
if (name.length() >= NAMELEN) {
name = name.substring(0, NAMELEN - 1);
}
TarArchiveOutputEntry pex = new TarArchiveOutputEntry(name, LF_PAX_EXTENDED_HEADER_LC);
TarArchiveEntry pex = new TarArchiveEntry(name, LF_PAX_EXTENDED_HEADER_LC);
StringWriter w = new StringWriter();
for (Map.Entry<String, String> h : headers.entrySet()) {
String key = h.getKey();
@ -449,7 +449,7 @@ public class TarArchiveOutputStream extends ArchiveOutputStream<TarArchiveOutput
+ 3 /* blank, equals and newline */
+ 2 /* guess 9 < actual length < 100 */;
String line = len + " " + key + "=" + value + "\n";
int actualLength = line.getBytes(Charset.forName("UTF-8")).length;
int actualLength = line.getBytes(StandardCharsets.UTF_8).length;
while (len != actualLength) {
// Adjust for cases where length < 10 or > 100
// or where UTF-8 encoding isn't a single octet
@ -458,11 +458,11 @@ public class TarArchiveOutputStream extends ArchiveOutputStream<TarArchiveOutput
// first pass so we'd need a second.
len = actualLength;
line = len + " " + key + "=" + value + "\n";
actualLength = line.getBytes(Charset.forName("UTF-8")).length;
actualLength = line.getBytes(StandardCharsets.UTF_8).length;
}
w.write(line);
}
byte[] data = w.toString().getBytes(Charset.forName("UTF-8"));
byte[] data = w.toString().getBytes(StandardCharsets.UTF_8);
pex.setEntrySize(data.length);
putArchiveEntry(pex);
write(data);
@ -499,7 +499,7 @@ public class TarArchiveOutputStream extends ArchiveOutputStream<TarArchiveOutput
}
private void addPaxHeadersForBigNumbers(Map<String, String> paxHeaders,
TarArchiveOutputEntry entry) {
TarArchiveEntry entry) {
addPaxHeaderForBigNumber(paxHeaders, "size", entry.getEntrySize(), MAXSIZE);
addPaxHeaderForBigNumber(paxHeaders, "gid", entry.getGroupId(), MAXID);
addPaxHeaderForBigNumber(paxHeaders, "mtime", entry.getLastModified().getTime() / 1000, MAXSIZE);
@ -519,7 +519,7 @@ public class TarArchiveOutputStream extends ArchiveOutputStream<TarArchiveOutput
}
}
private void failForBigNumbers(TarArchiveOutputEntry entry) {
private void failForBigNumbers(TarArchiveEntry entry) {
failForBigNumber("entry size", entry.getEntrySize(), MAXSIZE);
failForBigNumber("group id", entry.getGroupId(), MAXID);
failForBigNumber("last modification time", entry.getLastModified().getTime() / 1000, MAXSIZE);

View file

@ -9,7 +9,7 @@ import org.junit.jupiter.api.Test;
public class TarTest {
@Test
public void testTar() throws IOException {
public void testReadTar() throws IOException {
InputStream in = getClass().getResourceAsStream("test.tar");
TarArchiveInputStream tarArchiveInputStream = new TarArchiveInputStream(in);
byte[] buffer = new byte[1024];

View file

@ -1,4 +1,4 @@
module org.xbib.io.archive.zip {
exports org.xbib.io.archive.zip;
requires org.xbib.io.archive;
requires transitive org.xbib.io.archive;
}

View file

@ -1,4 +1,3 @@
package org.xbib.io.archive.zip;
import java.util.ArrayList;
@ -20,7 +19,7 @@ public class ExtraFieldUtils {
private static final Map<ZipShort, Class<?>> implementations;
static {
implementations = new HashMap<ZipShort, Class<?>>();
implementations = new HashMap<>();
register(AsiExtraField.class);
register(JarMarker.class);
register(UnicodePathExtraField.class);
@ -37,14 +36,10 @@ public class ExtraFieldUtils {
*/
public static void register(Class<?> c) {
try {
ZipExtraField ze = (ZipExtraField) c.newInstance();
ZipExtraField ze = (ZipExtraField) c.getDeclaredConstructor().newInstance();
implementations.put(ze.getHeaderId(), c);
} catch (ClassCastException cc) {
throw new RuntimeException(c + " doesn\'t implement ZipExtraField");
} catch (InstantiationException ie) {
throw new RuntimeException(c + " is not a concrete class");
} catch (IllegalAccessException ie) {
throw new RuntimeException(c + "\'s no-arg constructor is not public");
} catch (Exception e) {
throw new RuntimeException(e);
}
}
@ -54,14 +49,13 @@ public class ExtraFieldUtils {
*
* @param headerId the header identifier
* @return an instance of the appropiate ExtraField
* @throws InstantiationException if unable to instantiate the class
* @throws IllegalAccessException if not allowed to instatiate the class
* @throws Exception if unable to instantiate the class
*/
public static ZipExtraField createExtraField(ZipShort headerId)
throws InstantiationException, IllegalAccessException {
throws Exception {
Class<?> c = implementations.get(headerId);
if (c != null) {
return (ZipExtraField) c.newInstance();
return (ZipExtraField) c.getDeclaredConstructor().newInstance();
}
UnrecognizedExtraField u = new UnrecognizedExtraField();
u.setHeaderId(headerId);
@ -157,7 +151,7 @@ public class ExtraFieldUtils {
length);
}
v.add(ze);
} catch (InstantiationException | IllegalAccessException ie) {
} catch (Exception ie) {
throw new ZipException(ie.getMessage());
}
start += (length + WORD);

View file

@ -7,6 +7,7 @@ import java.util.zip.ZipException;
* Exception thrown when attempting to read or write data for a zip
* entry that uses ZIP features not supported by this library.
*/
@SuppressWarnings("serial")
public class UnsupportedZipFeatureException extends ZipException {
private final Feature reason;

View file

@ -8,6 +8,7 @@ import java.util.zip.ZipException;
* support to an archive and {@link ZipArchiveOutputStream#setUseZip64
* UseZip64} has been set to {@link Zip64Mode#Never Never}.
*/
@SuppressWarnings("serial")
public class Zip64RequiredException extends ZipException {
/**

View file

@ -1,6 +1,5 @@
package org.xbib.io.archive.zip;
import org.xbib.io.archive.entry.ArchiveEntry;
import org.xbib.io.archive.stream.ArchiveInputStream;
import org.xbib.io.archive.entry.ArchiveEntryEncoding;
import org.xbib.io.archive.entry.ArchiveEntryEncodingHelper;
@ -34,7 +33,7 @@ import static org.xbib.io.archive.zip.ZipConstants.ZIP64_MAGIC;
*
* @see ZipFile
*/
public class ZipArchiveInputStream extends ArchiveInputStream {
public class ZipArchiveInputStream<E extends ZipArchiveEntry> extends ArchiveInputStream<E> {
/**
* The zip encoding to use for filenames and the file comment.
@ -252,9 +251,10 @@ public class ZipArchiveInputStream extends ArchiveInputStream {
}
}
@SuppressWarnings("unchecked")
@Override
public ArchiveEntry getNextEntry() throws IOException {
return getNextZipEntry();
public E getNextEntry() throws IOException {
return (E) getNextZipEntry();
}
@Override

View file

@ -562,9 +562,10 @@ public class ZipArchiveOutputStream<E extends ZipArchiveEntry> extends ArchiveOu
raf.seek(save);
}
@SuppressWarnings("unchecked")
@Override
public E newArchiveEntry() {
return (E)new ZipArchiveEntry();
return (E) new ZipArchiveEntry();
}
/**

View file

@ -316,23 +316,6 @@ public class ZipFile {
}
}
/**
* Ensures that the close method of this zipfile is called when
* there are no more references to it.
*
* @see #close()
*/
@Override
protected void finalize() throws Throwable {
try {
if (!closed) {
close();
}
} finally {
super.finalize();
}
}
/**
* Length of a "central directory" entry structure without file
* name, extra fields or comment.
@ -370,8 +353,7 @@ public class ZipFile {
*/
private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory()
throws IOException {
HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag =
new HashMap<ZipArchiveEntry, NameAndComment>();
HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = new HashMap<>();
positionAtCentralDirectory();

View file

@ -9,7 +9,7 @@ public class ZipTest {
@Test
public void testZip() throws Exception {
InputStream in = getClass().getResourceAsStream("test.zip");
ZipArchiveInputStream z = new ZipArchiveInputStream(in);
ZipArchiveInputStream<ZipArchiveEntry> z = new ZipArchiveInputStream<>(in);
byte[] buffer = new byte[1024];
long total = 0L;
while ((z.getNextEntry()) != null) {

View file

@ -1,5 +1,6 @@
dependencies {
api project(':io-archive')
implementation project(':io-compress-bgzf')
implementation project(':io-compress-bzip2')
implementation project(':io-compress-lzf')
implementation project(':io-compress-xz')

View file

@ -1,20 +1,22 @@
module org.xbib.io.codec {
uses org.xbib.io.codec.StreamCodec;
exports org.xbib.io.codec;
exports org.xbib.io.codec.ar;
exports org.xbib.io.codec.bgzf;
exports org.xbib.io.codec.cpio;
exports org.xbib.io.codec.file;
exports org.xbib.io.codec.jar;
exports org.xbib.io.codec.tar;
exports org.xbib.io.codec.zip;
requires transitive org.xbib.io.compress.bgzf;
requires org.xbib.io.compress.bzip;
requires org.xbib.io.compress.lzf;
requires org.xbib.io.compress.xz;
requires org.xbib.io.compress.zlib;
requires org.xbib.io.archive;
requires org.xbib.io.archive.ar;
requires org.xbib.io.archive.cpio;
requires org.xbib.io.archive.dump;
requires org.xbib.io.archive.jar;
requires org.xbib.io.archive.tar;
requires org.xbib.io.archive.zip;
requires transitive org.xbib.io.archive.ar;
requires transitive org.xbib.io.archive.cpio;
requires transitive org.xbib.io.archive.dump;
requires transitive org.xbib.io.archive.jar;
requires transitive org.xbib.io.archive.tar;
requires transitive org.xbib.io.archive.zip;
}

View file

@ -1,5 +1,6 @@
package org.xbib.io.codec;
import org.xbib.io.archive.entry.ArchiveEntry;
import org.xbib.io.archive.stream.ArchiveInputStream;
import org.xbib.io.archive.stream.ArchiveOutputStream;
import java.io.IOException;
@ -14,7 +15,7 @@ import java.io.OutputStream;
* @param <I> the archive input stream type
* @param <O> the archive output type
*/
public interface ArchiveCodec<S extends ArchiveSession, I extends ArchiveInputStream, O extends ArchiveOutputStream> {
public interface ArchiveCodec<E extends ArchiveEntry, I extends ArchiveInputStream<E>, O extends ArchiveOutputStream<E>, S extends ArchiveSession<E, I, O>> {
/**
* Returns the name of this archive codec ("cpio", "tar", "zip")
@ -23,14 +24,6 @@ public interface ArchiveCodec<S extends ArchiveSession, I extends ArchiveInputSt
*/
String getName();
/**
* Creates a new archive session with a progress watcher.
*
* @param watcher the progress watcher
* @return the new archive session
*/
S newSession(BytesProgressWatcher watcher);
/**
* Creates a new archive input stream
*
@ -49,4 +42,11 @@ public interface ArchiveCodec<S extends ArchiveSession, I extends ArchiveInputSt
*/
O createArchiveOutputStream(OutputStream out) throws IOException;
/**
* Creates a new archive session with a progress watcher.
*
* @param watcher the progress watcher
* @return the new archive session
*/
S newSession(BytesProgressWatcher watcher);
}

View file

@ -1,34 +1,3 @@
/*
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2012 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*/
package org.xbib.io.codec;
import org.xbib.io.archive.entry.ArchiveEntry;
@ -45,13 +14,11 @@ import java.util.Date;
import java.util.Set;
/**
* Archive session
* Archive session.
*/
public abstract class ArchiveSession<I extends ArchiveInputStream, O extends ArchiveOutputStream>
public abstract class ArchiveSession<E extends ArchiveEntry, I extends ArchiveInputStream<E>, O extends ArchiveOutputStream<E>>
implements Session<StringPacket> {
private final static StreamCodecService codecFactory = StreamCodecService.getInstance();
private final static int DEFAULT_INPUT_BUFSIZE = 65536;
protected int bufferSize = DEFAULT_INPUT_BUFSIZE;
@ -65,15 +32,13 @@ public abstract class ArchiveSession<I extends ArchiveInputStream, O extends Arc
protected ArchiveSession() {
}
public ArchiveSession setPath(Path path, OpenOption option) {
public void setPath(Path path, OpenOption option) {
this.path = path;
this.option = option;
return this;
}
public ArchiveSession setBufferSize(int bufferSize) {
public void setBufferSize(int bufferSize) {
this.bufferSize = bufferSize;
return this;
}
@Override
@ -138,7 +103,7 @@ public abstract class ArchiveSession<I extends ArchiveInputStream, O extends Arc
byte[] buf = packet.toString().getBytes();
if (buf.length > 0) {
String name = packet.name();
ArchiveEntry entry = getOutputStream().newArchiveEntry();
E entry = getOutputStream().newArchiveEntry();
entry.setName(name);
entry.setLastModified(new Date());
entry.setEntrySize(buf.length);

View file

@ -7,11 +7,11 @@ import java.io.IOException;
* operations, and being closed. Sessions must be opened before the first
* operation and closed after the last operation.
*/
public interface Session<P extends Packet> {
public interface Session<P extends StringPacket> {
enum Mode {
READ, WRITE, APPEND,CONTROL, DELETE;
READ, WRITE, APPEND, CONTROL, DELETE;
}
/**

View file

@ -1,5 +1,7 @@
package org.xbib.io.codec;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Map;
import java.util.ServiceLoader;
import java.util.Set;
@ -10,13 +12,14 @@ import java.util.WeakHashMap;
*/
public class StreamCodecService {
private final static Map<String, StreamCodec> codecs = new WeakHashMap<>();
private final static Map<String, StreamCodec<InputStream, OutputStream>> codecs = new WeakHashMap<>();
private final static StreamCodecService instance = new StreamCodecService();
@SuppressWarnings({"rawtypes","unchecked"})
private StreamCodecService() {
ServiceLoader<StreamCodec> loader = ServiceLoader.load(StreamCodec.class);
for (StreamCodec codec : loader) {
for (StreamCodec<InputStream, OutputStream> codec : loader) {
if (!codecs.containsKey(codec.getName())) {
codecs.put(codec.getName(), codec);
}
@ -27,7 +30,7 @@ public class StreamCodecService {
return instance;
}
public StreamCodec getCodec(String suffix) {
public StreamCodec<InputStream, OutputStream> getCodec(String suffix) {
if (codecs.containsKey(suffix)) {
return codecs.get(suffix);
}

View file

@ -27,6 +27,7 @@ public class ArConnection extends URLConnection implements Connection<ArSession>
* the object referenced by the URL is not created.
*
* @param url the specified URL.
* @throws URISyntaxException if URI is invalid
*/
public ArConnection(URL url) throws URISyntaxException {
super(url);

View file

@ -1,9 +1,9 @@
package org.xbib.io.codec.ar;
import org.xbib.io.archive.ar.ArArchiveEntry;
import org.xbib.io.codec.ArchiveSession;
import org.xbib.io.archive.ar.ArArchiveInputStream;
import org.xbib.io.archive.ar.ArArchiveOutputStream;
import org.xbib.io.codec.Packet;
import org.xbib.io.codec.Session;
import org.xbib.io.codec.StringPacket;
import java.io.InputStream;
@ -12,7 +12,7 @@ import java.io.OutputStream;
/**
* Ar Session
*/
public class ArSession extends ArchiveSession<ArArchiveInputStream, ArArchiveOutputStream>
public class ArSession extends ArchiveSession<ArArchiveEntry, ArArchiveInputStream, ArArchiveOutputStream>
implements Session<StringPacket> {
private final static String SUFFIX = "ar";

View file

@ -0,0 +1,37 @@
package org.xbib.io.codec.bgzf;
import org.xbib.io.codec.StreamCodec;
import org.xbib.io.compress.bgzf.BlockCompressedInputStream;
import org.xbib.io.compress.bgzf.BlockCompressedOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
public class BzgfStreamCodec implements StreamCodec<BlockCompressedInputStream, BlockCompressedOutputStream> {
@Override
public String getName() {
return "bgzf";
}
@Override
public BlockCompressedInputStream decode(InputStream in) throws IOException {
return new BlockCompressedInputStream(in);
}
@Override
public BlockCompressedInputStream decode(InputStream in, int bufsize) throws IOException {
return new BlockCompressedInputStream(in);
}
@Override
public BlockCompressedOutputStream encode(OutputStream out) throws IOException {
return new BlockCompressedOutputStream(out);
}
@Override
public BlockCompressedOutputStream encode(OutputStream out, int bufsize) throws IOException {
return new BlockCompressedOutputStream(out);
}
}

View file

@ -1,5 +1,6 @@
package org.xbib.io.codec.cpio;
import org.xbib.io.archive.cpio.CpioArchiveEntry;
import org.xbib.io.codec.ArchiveSession;
import org.xbib.io.archive.cpio.CpioArchiveInputStream;
import org.xbib.io.archive.cpio.CpioArchiveOutputStream;
@ -9,7 +10,7 @@ import java.io.OutputStream;
/**
* Cpio Session
*/
public class CpioSession extends ArchiveSession<CpioArchiveInputStream, CpioArchiveOutputStream> {
public class CpioSession extends ArchiveSession<CpioArchiveEntry, CpioArchiveInputStream, CpioArchiveOutputStream> {
private final static String SUFFIX = "cpio";

View file

@ -1,12 +1,13 @@
package org.xbib.io.codec.jar;
import org.xbib.io.archive.jar.JarArchiveEntry;
import org.xbib.io.codec.ArchiveSession;
import org.xbib.io.archive.jar.JarArchiveInputStream;
import org.xbib.io.archive.jar.JarArchiveOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
public class JarSession extends ArchiveSession<JarArchiveInputStream, JarArchiveOutputStream> {
public class JarSession extends ArchiveSession<JarArchiveEntry, JarArchiveInputStream, JarArchiveOutputStream> {
private final static String SUFFIX = "jar";

View file

@ -31,6 +31,7 @@
*/
package org.xbib.io.codec.tar;
import org.xbib.io.archive.tar.TarArchiveEntry;
import org.xbib.io.codec.ArchiveSession;
import org.xbib.io.archive.tar.TarArchiveInputStream;
import org.xbib.io.archive.tar.TarArchiveOutputStream;
@ -38,7 +39,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
public class TarSession extends ArchiveSession<TarArchiveInputStream, TarArchiveOutputStream> {
public class TarSession extends ArchiveSession<TarArchiveEntry, TarArchiveInputStream, TarArchiveOutputStream> {
private final static String SUFFIX = "tar";

View file

@ -1,37 +1,37 @@
package org.xbib.io.codec.zip;
import org.xbib.io.archive.zip.ZipArchiveEntry;
import org.xbib.io.codec.ArchiveSession;
import org.xbib.io.archive.zip.ZipArchiveInputStream;
import org.xbib.io.archive.zip.ZipArchiveOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
public class ZipSession extends ArchiveSession<ZipArchiveInputStream, ZipArchiveOutputStream> {
public class ZipSession extends ArchiveSession<ZipArchiveEntry, ZipArchiveInputStream<ZipArchiveEntry>, ZipArchiveOutputStream<ZipArchiveEntry>> {
private final static String SUFFIX = "zip";
private ZipArchiveInputStream in;
private ZipArchiveInputStream<ZipArchiveEntry> in;
private ZipArchiveOutputStream out;
private ZipArchiveOutputStream<ZipArchiveEntry> out;
protected String getSuffix() {
return SUFFIX;
}
protected void open(InputStream in) {
this.in = new ZipArchiveInputStream(in);
this.in = new ZipArchiveInputStream<>(in);
}
protected void open(OutputStream out) {
this.out = new ZipArchiveOutputStream(out);
this.out = new ZipArchiveOutputStream<>(out);
}
public ZipArchiveInputStream getInputStream() {
public ZipArchiveInputStream<ZipArchiveEntry> getInputStream() {
return in;
}
public ZipArchiveOutputStream getOutputStream() {
public ZipArchiveOutputStream<ZipArchiveEntry> getOutputStream() {
return out;
}
}

View file

@ -0,0 +1,21 @@
/*
* The MIT License
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

View file

@ -0,0 +1,3 @@
module org.xbib.io.compress.bgzf {
exports org.xbib.io.compress.bgzf;
}

View file

@ -0,0 +1,19 @@
package org.xbib.io.compress.bgzf;
@SuppressWarnings("serial")
public class BGZFException extends RuntimeException {
public BGZFException() {}
public BGZFException(final String s) {
super(s);
}
public BGZFException(final String s, final Throwable throwable) {
super(s, throwable);
}
public BGZFException(final Throwable throwable) {
super(throwable);
}
}

View file

@ -0,0 +1,67 @@
package org.xbib.io.compress.bgzf;
public class BGZFFilePointerUtil {
private static final int SHIFT_AMOUNT = 16;
private static final int OFFSET_MASK = 0xffff;
private static final long ADDRESS_MASK = 0xFFFFFFFFFFFFL;
public static final long MAX_BLOCK_ADDRESS = ADDRESS_MASK;
public static final int MAX_OFFSET = OFFSET_MASK;
public static int compare(final long vfp1, final long vfp2) {
if (vfp1 == vfp2) return 0;
// When treating as unsigned, negative number is > positive.
if (vfp1 < 0 && vfp2 >= 0) return 1;
if (vfp1 >= 0 && vfp2 < 0) return -1;
// Either both negative or both non-negative, so regular comparison works.
if (vfp1 < vfp2) return -1;
return 1; // vfp1 > vfp2
}
/**
* @return true if vfp2 points to somewhere in the same BGZF block, or the one immediately
* following vfp1's BGZF block.
*/
public static boolean areInSameOrAdjacentBlocks(final long vfp1, final long vfp2) {
final long block1 = getBlockAddress(vfp1);
final long block2 = getBlockAddress(vfp2);
return (block1 == block2 || block1 + 1 == block2);
}
/**
* @param blockAddress File offset of start of BGZF block.
* @param blockOffset Offset into uncompressed block.
* @return Virtual file pointer that embodies the input parameters.
*/
static long makeFilePointer(final long blockAddress, final int blockOffset) {
if (blockOffset < 0) {
throw new IllegalArgumentException("Negative blockOffset " + blockOffset
+ " not allowed.");
}
if (blockAddress < 0) {
throw new IllegalArgumentException("Negative blockAddress " + blockAddress
+ " not allowed.");
}
if (blockOffset > MAX_OFFSET) {
throw new IllegalArgumentException("blockOffset " + blockOffset + " too large.");
}
if (blockAddress > MAX_BLOCK_ADDRESS) {
throw new IllegalArgumentException("blockAddress " + blockAddress + " too large.");
}
return blockAddress << SHIFT_AMOUNT | blockOffset;
}
public static long getBlockAddress(final long virtualFilePointer) {
return (virtualFilePointer >> SHIFT_AMOUNT) & ADDRESS_MASK;
}
public static int getBlockOffset(final long virtualFilePointer) {
return (int)(virtualFilePointer & OFFSET_MASK);
}
public static String asString(final long vfp) {
return String.format("%d(0x%x): (block address: %d, offset: %d)", vfp, vfp, getBlockAddress(vfp), getBlockOffset(vfp));
}
}

View file

@ -0,0 +1,19 @@
package org.xbib.io.compress.bgzf;
@SuppressWarnings("serial")
public class BGZFFormatException extends BGZFException {
public BGZFFormatException() {}
public BGZFFormatException(final String s) {
super(s);
}
public BGZFFormatException(final String s, final Throwable throwable) {
super(s, throwable);
}
public BGZFFormatException(final Throwable throwable) {
super(throwable);
}
}

View file

@ -0,0 +1,95 @@
package org.xbib.io.compress.bgzf;
/**
* Constants shared by BlockCompressed{Input,Output}Stream classes
*/
public class BGZFStreamConstants {
// Number of bytes in the gzip block before the deflated data.
// This is not the standard header size, because we include one optional subfield,
// but it is the standard for us.
public static final int BLOCK_HEADER_LENGTH = 18;
// Location in the gzip block of the total block size (actually total block size - 1)
public static final int BLOCK_LENGTH_OFFSET = 16;
// Number of bytes that follow the deflated data
public static final int BLOCK_FOOTER_LENGTH = 8;
// We require that a compressed block (including header and footer, be <= this)
public static final int MAX_COMPRESSED_BLOCK_SIZE = 64 * 1024;
// Gzip overhead is the header, the footer, and the block size (encoded as a short).
public static final int GZIP_OVERHEAD = BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH + 2;
// If Deflater has compression level == NO_COMPRESSION, 10 bytes of overhead (determined experimentally).
public static final int NO_COMPRESSION_OVERHEAD = 10;
// Push out a gzip block when this many uncompressed bytes have been accumulated.
// This size is selected so that if data is not compressible, if Deflater is given
// compression level == NO_COMPRESSION, compressed size is guaranteed to be <= MAX_COMPRESSED_BLOCK_SIZE.
public static final int DEFAULT_UNCOMPRESSED_BLOCK_SIZE = 64 * 1024 - (GZIP_OVERHEAD + NO_COMPRESSION_OVERHEAD);
// Magic numbers
public static final byte GZIP_ID1 = 31;
public static final int GZIP_ID2 = 139;
// FEXTRA flag means there are optional fields
public static final int GZIP_FLG = 4;
// extra flags
public static final int GZIP_XFL = 0;
// length of extra subfield
public static final short GZIP_XLEN = 6;
// The deflate compression, which is customarily used by gzip
public static final byte GZIP_CM_DEFLATE = 8;
public static final int DEFAULT_COMPRESSION_LEVEL = 5;
// We don't care about OS because we're not doing line terminator translation
public static final int GZIP_OS_UNKNOWN = 255;
// The subfield ID
public static final byte BGZF_ID1 = 66;
public static final byte BGZF_ID2 = 67;
// subfield length in bytes
public static final byte BGZF_LEN = 2;
public static final byte[] EMPTY_GZIP_BLOCK = {
BGZFStreamConstants.GZIP_ID1,
(byte)BGZFStreamConstants.GZIP_ID2,
BGZFStreamConstants.GZIP_CM_DEFLATE,
BGZFStreamConstants.GZIP_FLG,
0, 0, 0, 0, // Modification time
BGZFStreamConstants.GZIP_XFL,
(byte)BGZFStreamConstants.GZIP_OS_UNKNOWN,
BGZFStreamConstants.GZIP_XLEN, 0, // Little-endian short
BGZFStreamConstants.BGZF_ID1,
BGZFStreamConstants.BGZF_ID2,
BGZFStreamConstants.BGZF_LEN, 0, // Little-endian short
// Total block size - 1
BGZFStreamConstants.BLOCK_HEADER_LENGTH +
BGZFStreamConstants.BLOCK_FOOTER_LENGTH - 1 + 2, 0, // Little-endian short
// Dummy payload?
3, 0,
0, 0, 0, 0, // crc
0, 0, 0, 0, // uncompressedSize
};
public static final byte[] GZIP_BLOCK_PREAMBLE = {
BGZFStreamConstants.GZIP_ID1,
(byte)BGZFStreamConstants.GZIP_ID2,
BGZFStreamConstants.GZIP_CM_DEFLATE,
BGZFStreamConstants.GZIP_FLG,
0, 0, 0, 0, // Modification time
BGZFStreamConstants.GZIP_XFL,
(byte)BGZFStreamConstants.GZIP_OS_UNKNOWN,
BGZFStreamConstants.GZIP_XLEN, 0, // Little-endian short
BGZFStreamConstants.BGZF_ID1,
BGZFStreamConstants.BGZF_ID2,
BGZFStreamConstants.BGZF_LEN, 0, // Little-endian short
};
}

View file

@ -0,0 +1,666 @@
package org.xbib.io.compress.bgzf;
import java.io.ByteArrayInputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.SyncFailedException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
/**
* Encapsulates file representation of various primitive data types. Forces little-endian disk
* representation. Note that this class is currently not very efficient. There are plans to increase
* the size of the ByteBuffer, and move data between the ByteBuffer and the underlying input or
* output stream in larger chunks.
*
* All the read methods throw EOFException if the input stream is exhausted before the
* required number of bytes are read.
*/
public class BinaryCodec {
// Outstream to write to
private OutputStream outputStream;
// If a file or filename was given it will be stored here. Used for error reporting.
private String outputFileName;
// Input stream to read from
private InputStream inputStream;
// If a file or filename was give to read from it will be stored here. Used for error reporting.
private String inputFileName;
/*
* Mode that the BinaryCodec is in. It is either writing to a binary file or reading from. This
* is set to true if it is writing to a binary file Right now we don't support reading and
* writing to the same file with the same BinaryCodec instance
*/
private boolean isWriting;
/**
* For byte swapping.
*/
private ByteBuffer byteBuffer;
/**
* For reading Strings of known length, this can reduce object creation
*/
private final byte[] scratchBuffer = new byte[16];
// Byte order used in BAM files.
private static final ByteOrder LITTLE_ENDIAN = ByteOrder.LITTLE_ENDIAN;
private static final byte[] NULL_BYTE = {0};
private static final long MAX_UBYTE = (Byte.MAX_VALUE * 2) + 1;
private static final long MAX_USHORT = (Short.MAX_VALUE * 2) + 1;
private static final long MAX_UINT = ((long)Integer.MAX_VALUE * 2) + 1;
// We never serialize more than this much at a time (except for Strings)
private static final int MAX_BYTE_BUFFER = 8;
/**
* Constructs BinaryCodec from a file and set it's mode to writing or not
*
* @param file file to be written to or read from
* @param writing whether the file is being written to
* @throws FileNotFoundException
*/
public BinaryCodec(final File file, final boolean writing) throws FileNotFoundException {
this();
this.isWriting = writing;
if (this.isWriting) {
this.outputStream = new FileOutputStream(file);
this.outputFileName = file.getName();
} else {
this.inputStream = new FileInputStream(file);
this.inputFileName = file.getName();
}
}
/**
* Constructs BinaryCodec from a file name and set it's mode to writing or not
*
* @param fileName name of the file to be written to or read from
* @param writing writing whether the file is being written to
* @throws FileNotFoundException
*/
public BinaryCodec(final String fileName, final boolean writing) throws FileNotFoundException {
this(new File(fileName), writing);
}
/**
* Constructs BinaryCodec from an output stream
*
* @param outputStream Stream to write to, since it's an output stream we know that isWriting
* should be set to true
*/
public BinaryCodec(final OutputStream outputStream) {
this();
setOutputStream(outputStream);
}
/**
* Constructs BinaryCodec from an input stream
*
* @param inputStream Stream to read from, since we are reading isWriting is set to false
*/
public BinaryCodec(final InputStream inputStream) {
this();
setInputStream(inputStream);
}
/**
* Ambiguous whether reading or writing until set{In,Out}putStream is called
*/
public BinaryCodec() {
initByteBuffer();
}
/**
* Shared among ctors. Note that if endianness is changed, all the unsigned methods must also be
* changed.
*/
private void initByteBuffer() {
byteBuffer = ByteBuffer.allocate(MAX_BYTE_BUFFER);
byteBuffer.order(LITTLE_ENDIAN);
}
/**
* Write whatever has been put into the byte buffer
*
* @param numBytes -- how much to write. Note that in case of writing an unsigned value, more
* bytes were put into the ByteBuffer than will get written out.
* @throws IOException
*/
private void writeByteBuffer(final int numBytes) throws IOException {
assert (numBytes <= byteBuffer.limit());
writeBytes(byteBuffer.array(), 0, numBytes);
}
/**
* Writes a byte to the output buffer
*
* @param bite byte array to write
* @throws IOException
*/
public void writeByte(final byte bite) throws IOException {
byteBuffer.clear();
byteBuffer.put(bite);
writeByteBuffer(1);
}
public void writeByte(final int b) throws IOException {
writeByte((byte)b);
}
/**
* Writes a byte array to the output buffer
*
* @param bytes value to write
* @throws IOException
*/
public void writeBytes(final byte[] bytes) throws IOException {
writeBytes(bytes, 0, bytes.length);
}
public void writeBytes(final byte[] bytes, final int startOffset, final int numBytes) throws IOException {
if (!isWriting) {
throw new IllegalStateException("Calling write method on BinaryCodec open for read.");
}
outputStream.write(bytes, startOffset, numBytes);
}
/**
* Write a 32-bit int to the output stream
*
* @param value int to write
* @throws IOException
*/
public void writeInt(final int value) throws IOException {
byteBuffer.clear();
byteBuffer.putInt(value);
writeByteBuffer(4);
}
/**
* Write a double (8 bytes) to the output stream
*
* @param value double to write
* @throws IOException
*/
public void writeDouble(final double value) throws IOException {
byteBuffer.clear();
byteBuffer.putDouble(value);
writeByteBuffer(8);
}
/**
* Write a 64-bit long to the output stream
*
* @param value long to write
* @throws IOException
*/
public void writeLong(final long value) throws IOException {
byteBuffer.clear();
byteBuffer.putLong(value);
writeByteBuffer(8);
}
/**
* Write a 16-bit short to output stream
*
* @throws IOException
*/
public void writeShort(final short value) throws IOException {
byteBuffer.clear();
byteBuffer.putShort(value);
writeByteBuffer(2);
}
/**
* Write a float (4 bytes) to the output stream
*
* @param value float to write
* @throws IOException
*/
public void writeFloat(final float value) throws IOException {
byteBuffer.clear();
byteBuffer.putFloat(value);
writeByteBuffer(4);
}
/**
* Writes a boolean (1 byte) to the output buffer
*
* @param value boolean to write
* @throws IOException
*/
public void writeBoolean(final boolean value) throws IOException {
byteBuffer.clear();
byteBuffer.put(value ? (byte)1 : (byte)0);
writeByteBuffer(1);
}
/**
* Writes a string to the buffer as ASCII bytes
*
* @param value string to write to buffer
* @param writeLength prefix the string with the length as a 32-bit int
* @param appendNull add a null byte to the end of the string
* @throws IOException
*/
public void writeString(final String value, final boolean writeLength, final boolean appendNull) throws IOException {
if (writeLength) {
int lengthToWrite = value.length();
if (appendNull) lengthToWrite++;
writeInt(lengthToWrite);
}
// Actually writes the string to a buffer
writeString(value);
if (appendNull) writeBytes(NULL_BYTE);
}
/**
* Write a string to the buffer as ASCII bytes
*
* @param value string to write
* @throws IOException
*/
private void writeString(final String value) throws IOException {
final byte[] byteBuffer = new byte[value.length()];
final char[] charBuffer = value.toCharArray();
for (int i = 0; i < charBuffer.length; ++i) {
byteBuffer[i] = (byte)(charBuffer[i] & 0xff);
}
writeBytes(byteBuffer);
}
/**
* Write an 8-bit unsigned byte. NOTE: This method will break if we change to big-endian.
*
* @throws IOException
*/
public void writeUByte(final short val) throws IOException {
if (val < 0) {
throw new IllegalArgumentException("Negative value (" + val
+ ") passed to unsigned writing method.");
}
if (val > MAX_UBYTE) {
throw new IllegalArgumentException("Value (" + val
+ ") to large to be written as ubyte.");
}
byteBuffer.clear();
byteBuffer.putShort(val);
writeByteBuffer(1);
}
/**
* Write a 16-bit unsigned short. NOTE: This method will break if we change to big-endian.
*
* @throws IOException
*/
public void writeUShort(final int val) throws IOException {
if (val < 0) {
throw new IllegalArgumentException("Negative value (" + val
+ ") passed to unsigned writing method.");
}
if (val > MAX_USHORT) {
throw new IllegalArgumentException("Value (" + val
+ ") to large to be written as ushort.");
}
byteBuffer.clear();
byteBuffer.putInt(val);
writeByteBuffer(2);
}
/**
* Write a 32-bit unsigned int. NOTE: This method will break if we change to big-endian.
*
* @throws IOException
*/
public void writeUInt(final long val) throws IOException {
if (val < 0) {
throw new IllegalArgumentException("Negative value (" + val
+ ") passed to unsigned writing method.");
}
if (val > MAX_UINT) {
throw new IllegalArgumentException("Value (" + val
+ ") to large to be written as uint.");
}
byteBuffer.clear();
byteBuffer.putLong(val);
writeByteBuffer(4);
}
/**
* Read a byte array from the input stream.
*
* @throws IOException
*/
public void readBytes(final byte[] buffer) throws IOException {
readBytes(buffer, 0, buffer.length);
}
/**
* Read a byte array from the input stream
*
* @param buffer where to put bytes read
* @param offset offset to start putting bytes into buffer
* @param length number of bytes to read
* @throws IOException
*/
public void readBytes(final byte[] buffer, final int offset, final int length) throws IOException {
int totalNumRead = 0;
do {
final int numRead =
readBytesOrFewer(buffer, offset + totalNumRead, length - totalNumRead);
if (numRead < 0) {
throw new EOFException(constructErrorMessage("Premature EOF"));
} else {
totalNumRead += numRead;
}
} while (totalNumRead < length);
}
/**
* Reads a byte array from the input stream.
*
* @param buffer where to put bytes read
* @param offset offset to start putting bytes into buffer
* @param length number of bytes to read. Fewer bytes may be read if EOF is reached before
* length bytes have been read.
* @return the total number of bytes read into the buffer, or -1 if there is no more data
* because the end of the stream has been reached.
* @throws IOException
*/
public int readBytesOrFewer(final byte[] buffer, final int offset, final int length) throws IOException {
if (isWriting) {
throw new IllegalStateException("Calling read method on BinaryCodec open for write.");
}
return inputStream.read(buffer, offset, length);
}
/**
* @return a single byte read from the input stream.
* @throws IOException
*/
public byte readByte() throws IOException {
if (isWriting) {
throw new IllegalStateException("Calling read method on BinaryCodec open for write.");
}
final int ret = inputStream.read();
if (ret == -1) {
throw new EOFException(constructErrorMessage("Premature EOF"));
}
return (byte)ret;
}
/**
* @return true if it is possible to know for sure if at EOF, and it is known for sure. If the
* input stream is a ByteArrayInputStream, this is faster than causing a
* RuntimeEOFException to be thrown.
* @throws IOException
*/
public boolean knownAtEof() throws IOException {
if (isWriting) {
throw new IllegalStateException(
"Calling knownAtEof method on BinaryCodec open for write.");
}
return inputStream instanceof ByteArrayInputStream && inputStream.available() == 0;
}
/**
* Read a string off the input stream, as ASCII bytes
*
* @param length length of string to read
* @return String read from stream
* @throws IOException
*/
public String readString(final int length) throws IOException {
final byte[] buffer;
// Recycle single buffer if possible
if (length <= scratchBuffer.length) {
buffer = scratchBuffer;
} else {
buffer = new byte[length];
}
readBytes(buffer, 0, length);
final char[] charBuffer = new char[length];
for (int i = 0; i < length; ++i) {
charBuffer[i] = (char)buffer[i];
}
return new String(charBuffer);
}
/**
* Read ASCII bytes from the input stream until a null byte is read
*
* @return String constructed from the ASCII bytes read
* @throws IOException
*/
public String readNullTerminatedString() throws IOException {
final StringBuilder ret = new StringBuilder();
for (byte b = this.readByte(); b != 0; b = this.readByte()) {
ret.append((char)(b & 0xff));
}
return ret.toString();
}
/**
* Read an int length, and then a String of that length
*
* @param devourNull if true, the length include a null terminator, which is read and discarded
* @throws IOException
*/
public String readLengthAndString(final boolean devourNull) throws IOException {
int length = readInt();
if (devourNull) {
--length;
}
final String ret = readString(length);
if (devourNull) {
readByte();
}
return ret;
}
private void readByteBuffer(final int numBytes) throws IOException {
assert (numBytes <= byteBuffer.capacity());
readBytes(byteBuffer.array(), 0, numBytes);
byteBuffer.limit(byteBuffer.capacity());
byteBuffer.position(numBytes);
}
/**
* Read an int off the input stream
*
* @return int from input stream
* @throws IOException
*/
public int readInt() throws IOException {
readByteBuffer(4);
byteBuffer.flip();
return byteBuffer.getInt();
}
/**
* Reads a double off the input stream
*
* @return double
* @throws IOException
*/
public double readDouble() throws IOException {
readByteBuffer(8);
byteBuffer.flip();
return byteBuffer.getDouble();
}
/**
* Reads a long off the input stream
*
* @return long
* @throws IOException
*/
public long readLong() throws IOException {
readByteBuffer(8);
byteBuffer.flip();
return byteBuffer.getLong();
}
public short readShort() throws IOException {
readByteBuffer(2);
byteBuffer.flip();
return byteBuffer.getShort();
}
/**
* Reads a float off the input stream
*
* @return float
* @throws IOException
*/
public float readFloat() throws IOException {
readByteBuffer(4);
byteBuffer.flip();
return byteBuffer.getFloat();
}
/**
* Reads a boolean off the input stream, represented as a byte with value 1 or 0
*
* @return boolean
* @throws IOException
*/
public boolean readBoolean() throws IOException {
return ((readByte()) == 1);
}
/**
* Reads an 8-bit unsigned byte from the input stream. This method assumes little-endianness.
*
* @throws IOException
*/
public short readUByte() throws IOException {
readByteBuffer(1);
byteBuffer.put((byte)0);
byteBuffer.flip();
return byteBuffer.getShort();
}
/**
* Reads a 16-bit unsigned short from the input stream. This method assumes little-endianness.
*
* @throws IOException
*/
public int readUShort() throws IOException {
readByteBuffer(2);
byteBuffer.putShort((short)0);
byteBuffer.flip();
return byteBuffer.getInt();
}
/**
* Reads a 32-bit unsigned int from the input stream. This method assumes little-endianness.
*
* @throws IOException
*/
public long readUInt() throws IOException {
readByteBuffer(4);
byteBuffer.putInt(0);
byteBuffer.flip();
return byteBuffer.getLong();
}
/**
* Close the appropriate stream
*
* @throws IOException
*/
public void close() throws IOException {
if (this.isWriting) {
// To the degree possible, make sure the bytes get forced to the file system,
// or else cause an exception to be thrown.
if (this.outputStream instanceof FileOutputStream) {
this.outputStream.flush();
FileOutputStream fos = (FileOutputStream)this.outputStream;
try {
fos.getFD().sync();
} catch (SyncFailedException e) {
// ignore
}
}
this.outputStream.close();
} else {
this.inputStream.close();
}
}
private String constructErrorMessage(final String msg) {
final StringBuilder sb = new StringBuilder(msg);
sb.append("; BinaryCodec in ");
sb.append(isWriting ? "write" : "read");
sb.append("mode; ");
final String filename = isWriting ? outputFileName : inputFileName;
if (filename != null) {
sb.append("file: ");
sb.append(filename);
} else {
sb.append("streamed file (filename not available)");
}
return sb.toString();
}
public String getInputFileName() {
return inputFileName;
}
public String getOutputFileName() {
return outputFileName;
}
public void setOutputFileName(final String outputFileName) {
this.outputFileName = outputFileName;
}
public void setInputFileName(final String inputFileName) {
this.inputFileName = inputFileName;
}
public boolean isWriting() {
return isWriting;
}
public OutputStream getOutputStream() {
return outputStream;
}
public InputStream getInputStream() {
return inputStream;
}
public void setInputStream(final InputStream is) {
isWriting = false;
this.inputStream = is;
}
public void setOutputStream(final OutputStream os) {
isWriting = true;
this.outputStream = os;
}
}

View file

@ -0,0 +1,709 @@
package org.xbib.io.compress.bgzf;
import java.io.BufferedInputStream;
import java.io.ByteArrayOutputStream;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.SeekableByteChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;
/**
* Stream class for reading BGZF block compressed files. The caller can treat this file like any other InputStream.
* It probably is not necessary to wrap this stream in a buffering stream, because there is internal buffering.
* The advantage of BGZF over conventional GZip format is that BGZF allows for seeking without having to read the
* entire file up to the location being sought. Note that seeking is only possible if the input stream is seekable.
*
* Note that this implementation is not synchronized. If multiple threads access an instance concurrently, it must be synchronized externally.
*
* @see <a href="http://samtools.sourceforge.net/SAM1.pdf">http://samtools.sourceforge.net/SAM1.pdf</a> for details of BGZF file format.
*/
public class BlockCompressedInputStream extends InputStream {
public final static String INCORRECT_HEADER_SIZE_MSG = "Incorrect header size for file: ";
public final static String UNEXPECTED_BLOCK_LENGTH_MSG = "Unexpected compressed block length: ";
public final static String PREMATURE_END_MSG = "Premature end of file: ";
public final static String CANNOT_SEEK_STREAM_MSG = "Cannot seek a position for a non-file stream";
public final static String CANNOT_SEEK_CLOSED_STREAM_MSG = "Cannot seek a position for a closed stream";
public final static String INVALID_FILE_PTR_MSG = "Invalid file pointer: ";
private InputStream mStream;
private boolean mIsClosed = false;
private SeekableStream mFile;
private byte[] mFileBuffer = null;
private DecompressedBlock mCurrentBlock = null;
private int mCurrentOffset = 0;
private long mStreamOffset = 0;
private final BlockGunzipper blockGunzipper;
private volatile ByteArrayOutputStream buf = null;
private static final byte eol = '\n';
private static final byte eolCr = '\r';
/**
* Note that seek() is not supported if this ctor is used.
* @param stream source of bytes
*/
public BlockCompressedInputStream(final InputStream stream) {
this(stream, true, BlockGunzipper.getDefaultInflaterFactory());
}
/**
* Note that seek() is not supported if this ctor is used.
* @param stream source of bytes
* @param inflaterFactory {@link InflaterFactory} used by {@link BlockGunzipper}
*/
public BlockCompressedInputStream(final InputStream stream, final InflaterFactory inflaterFactory) {
this(stream, true, inflaterFactory);
}
/**
* Note that seek() is not supported if this ctor is used.
* @param stream source of bytes
* @param allowBuffering if true, allow buffering
*/
public BlockCompressedInputStream(final InputStream stream, final boolean allowBuffering) {
this(stream, allowBuffering, BlockGunzipper.getDefaultInflaterFactory());
}
/**
* Note that seek() is not supported if this ctor is used.
* @param stream source of bytes
* @param allowBuffering if true, allow buffering
* @param inflaterFactory {@link InflaterFactory} used by {@link BlockGunzipper}
*/
public BlockCompressedInputStream(final InputStream stream, final boolean allowBuffering, final InflaterFactory inflaterFactory) {
if (allowBuffering) {
mStream = new BufferedInputStream(stream);
}
else {
mStream = stream;
}
mFile = null;
blockGunzipper = new BlockGunzipper(inflaterFactory);
}
/**
* Use this ctor if you wish to call seek()
* @param file source of bytes
* @throws IOException
*/
public BlockCompressedInputStream(final File file) throws IOException {
this(file, BlockGunzipper.getDefaultInflaterFactory());
}
/**
* Use this ctor if you wish to call seek()
* @param file source of bytes
* @param inflaterFactory {@link InflaterFactory} used by {@link BlockGunzipper}
* @throws IOException
*/
public BlockCompressedInputStream(final File file, final InflaterFactory inflaterFactory) throws IOException {
mFile = new SeekableFileStream(file);
mStream = null;
blockGunzipper = new BlockGunzipper(inflaterFactory);
}
/**
* For providing some arbitrary data source. No additional buffering is
* provided, so if the underlying source is not buffered, wrap it in a
* SeekableBufferedStream before passing to this ctor.
* @param strm source of bytes
*/
public BlockCompressedInputStream(final SeekableStream strm) {
this(strm, BlockGunzipper.getDefaultInflaterFactory());
}
/**
* For providing some arbitrary data source. No additional buffering is
* provided, so if the underlying source is not buffered, wrap it in a
* SeekableBufferedStream before passing to this ctor.
* @param strm source of bytes
* @param inflaterFactory {@link InflaterFactory} used by {@link BlockGunzipper}
*/
public BlockCompressedInputStream(final SeekableStream strm, final InflaterFactory inflaterFactory) {
mFile = strm;
mStream = null;
blockGunzipper = new BlockGunzipper(inflaterFactory);
}
/**
* Determines whether or not the inflater will re-calculated the CRC on the decompressed data
* and check it against the value stored in the GZIP header. CRC checking is an expensive
* operation and should be used accordingly.
*/
public void setCheckCrcs(final boolean check) {
this.blockGunzipper.setCheckCrcs(check);
}
/**
* @return the number of bytes that can be read (or skipped over) from this input stream without blocking by the
* next caller of a method for this input stream. The next caller might be the same thread or another thread.
* Note that although the next caller can read this many bytes without blocking, the available() method call itself
* may block in order to fill an internal buffer if it has been exhausted.
*/
@Override
public int available() throws IOException {
if (mCurrentBlock == null || mCurrentOffset == mCurrentBlock.mBlock.length) {
readBlock();
}
if (mCurrentBlock == null) {
return 0;
}
return mCurrentBlock.mBlock.length - mCurrentOffset;
}
/**
* @return <code>true</code> if the stream is at the end of a BGZF block,
* <code>false</code> otherwise.
*/
public boolean endOfBlock() {
return (mCurrentBlock != null && mCurrentOffset == mCurrentBlock.mBlock.length);
}
/**
* Closes the underlying InputStream or RandomAccessFile
*/
@Override
public void close() throws IOException {
if (mFile != null) {
mFile.close();
mFile = null;
} else if (mStream != null) {
mStream.close();
mStream = null;
}
// Encourage garbage collection
mFileBuffer = null;
mCurrentBlock = null;
// Mark as closed
mIsClosed = true;
}
/**
* Reads the next byte of data from the input stream. The value byte is returned as an int in the range 0 to 255.
* If no byte is available because the end of the stream has been reached, the value -1 is returned.
* This method blocks until input data is available, the end of the stream is detected, or an exception is thrown.
* @return the next byte of data, or -1 if the end of the stream is reached.
*/
@Override
public int read() throws IOException {
return (available() > 0) ? (mCurrentBlock.mBlock[mCurrentOffset++] & 0xFF) : -1;
}
/**
* Reads some number of bytes from the input stream and stores them into the buffer array b. The number of bytes
* actually read is returned as an integer. This method blocks until input data is available, end of file is detected,
* or an exception is thrown.
*
* read(buf) has the same effect as read(buf, 0, buf.length).
*
* @param buffer the buffer into which the data is read.
* @return the total number of bytes read into the buffer, or -1 is there is no more data because the end of
* the stream has been reached.
*/
@Override
public int read(final byte[] buffer) throws IOException {
return read(buffer, 0, buffer.length);
}
/**
* Reads a whole line. A line is considered to be terminated by either a line feed ('\n'),
* carriage return ('\r') or carriage return followed by a line feed ("\r\n").
*
* @return A String containing the contents of the line, excluding the line terminating
* character, or null if the end of the stream has been reached
*
* @exception IOException If an I/O error occurs
*/
public String readLine() throws IOException {
int available = available();
if (available == 0) {
return null;
}
if(null == buf){ // lazy initialisation
buf = new ByteArrayOutputStream(8192);
}
buf.reset();
boolean done = false;
boolean foundCr = false; // \r found flag
while (!done) {
int linetmpPos = mCurrentOffset;
int bCnt = 0;
while((available-- > 0)){
final byte c = mCurrentBlock.mBlock[linetmpPos++];
if(c == eol){ // found \n
done = true;
break;
} else if(foundCr){ // previous char was \r
--linetmpPos; // current char is not \n so put it back
done = true;
break;
} else if(c == eolCr){ // found \r
foundCr = true;
continue; // no ++bCnt
}
++bCnt;
}
if(mCurrentOffset < linetmpPos) {
buf.write(mCurrentBlock.mBlock, mCurrentOffset, bCnt);
mCurrentOffset = linetmpPos;
}
available = available();
if(available == 0) {
// EOF
done = true;
}
}
return buf.toString();
}
/**
* Reads up to len bytes of data from the input stream into an array of bytes. An attempt is made to read
* as many as len bytes, but a smaller number may be read. The number of bytes actually read is returned as an integer.
*
* This method blocks until input data is available, end of file is detected, or an exception is thrown.
*
* @param buffer buffer into which data is read.
* @param offset the start offset in array b at which the data is written.
* @param length the maximum number of bytes to read.
* @return the total number of bytes read into the buffer, or -1 if there is no more data because the end of
* the stream has been reached.
*/
@Override
public int read(final byte[] buffer, int offset, int length) throws IOException {
final int originalLength = length;
while (length > 0) {
final int available = available();
if (available == 0) {
// Signal EOF to caller
if (originalLength == length) {
return -1;
}
break;
}
final int copyLength = Math.min(length, available);
System.arraycopy(mCurrentBlock.mBlock, mCurrentOffset, buffer, offset, copyLength);
mCurrentOffset += copyLength;
offset += copyLength;
length -= copyLength;
}
return originalLength - length;
}
/**
* Seek to the given position in the file. Note that pos is a special virtual file pointer,
* not an actual byte offset.
*
* @param pos virtual file pointer position
* @throws IOException if stream is closed or not a file based stream
*/
public void seek(final long pos) throws IOException {
// Must be before the mFile == null check because mFile == null for closed files and streams
if (mIsClosed) {
throw new IOException(CANNOT_SEEK_CLOSED_STREAM_MSG);
}
// Cannot seek on streams that are not file based
if (mFile == null) {
throw new IOException(CANNOT_SEEK_STREAM_MSG);
}
// Decode virtual file pointer
// Upper 48 bits is the byte offset into the compressed stream of a
// block.
// Lower 16 bits is the byte offset into the uncompressed stream inside
// the block.
final long compressedOffset = BGZFFilePointerUtil.getBlockAddress(pos);
final int uncompressedOffset = BGZFFilePointerUtil.getBlockOffset(pos);
final int available;
if (mCurrentBlock != null && mCurrentBlock.mBlockAddress == compressedOffset) {
available = mCurrentBlock.mBlock.length;
} else {
prepareForSeek();
mFile.seek(compressedOffset);
mStreamOffset = compressedOffset;
mCurrentBlock = nextBlock(getBufferForReuse(mCurrentBlock));
mCurrentOffset = 0;
available = available();
}
if (uncompressedOffset > available || (uncompressedOffset == available && !eof())) {
throw new IOException(INVALID_FILE_PTR_MSG + pos + " for " + getSource());
}
mCurrentOffset = uncompressedOffset;
}
/**
* Performs cleanup required before seek is called on the underlying stream
*/
protected void prepareForSeek() {
}
private boolean eof() throws IOException {
if (mFile.eof()) {
return true;
}
// If the last remaining block is the size of the EMPTY_GZIP_BLOCK, this is the same as being at EOF.
return (mFile.length() - (mCurrentBlock.mBlockAddress
+ mCurrentBlock.mBlockCompressedSize) == BGZFStreamConstants.EMPTY_GZIP_BLOCK.length);
}
/**
* @return virtual file pointer that can be passed to seek() to return to the current position. This is
* not an actual byte offset, so arithmetic on file pointers cannot be done to determine the distance between
* the two.
*/
public long getFilePointer() {
if (mCurrentBlock == null) {
// Haven't read anything yet = at start of stream
return BGZFFilePointerUtil.makeFilePointer(0, 0);
}
if (mCurrentOffset > 0 && mCurrentOffset == mCurrentBlock.mBlock.length) {
// If current offset is at the end of the current block, file
// pointer should point
// to the beginning of the next block.
return BGZFFilePointerUtil.makeFilePointer(mCurrentBlock.mBlockAddress + mCurrentBlock.mBlockCompressedSize, 0);
}
return BGZFFilePointerUtil.makeFilePointer(mCurrentBlock.mBlockAddress, mCurrentOffset);
}
public long getPosition() {
return getFilePointer();
}
public static long getFileBlock(final long bgzfOffset) {
return BGZFFilePointerUtil.getBlockAddress(bgzfOffset);
}
/**
* @param stream Must be at start of file. Throws RuntimeException if !stream.markSupported().
* @return true if the given file looks like a valid BGZF file.
*/
public static boolean isValidFile(final InputStream stream) throws IOException {
if (!stream.markSupported()) {
throw new RuntimeException("Cannot test non-buffered stream");
}
stream.mark(BGZFStreamConstants.BLOCK_HEADER_LENGTH);
final byte[] buffer = new byte[BGZFStreamConstants.BLOCK_HEADER_LENGTH];
final int count = readBytes(stream, buffer, 0, BGZFStreamConstants.BLOCK_HEADER_LENGTH);
stream.reset();
return count == BGZFStreamConstants.BLOCK_HEADER_LENGTH && isValidBlockHeader(buffer);
}
private static boolean isValidBlockHeader(final byte[] buffer) {
return (buffer[0] == BGZFStreamConstants.GZIP_ID1 &&
(buffer[1] & 0xFF) == BGZFStreamConstants.GZIP_ID2 &&
(buffer[3] & BGZFStreamConstants.GZIP_FLG) != 0 &&
buffer[10] == BGZFStreamConstants.GZIP_XLEN &&
buffer[12] == BGZFStreamConstants.BGZF_ID1 &&
buffer[13] == BGZFStreamConstants.BGZF_ID2);
}
private void readBlock() throws IOException {
mCurrentBlock = nextBlock(getBufferForReuse(mCurrentBlock));
mCurrentOffset = 0;
checkAndRethrowDecompressionException();
}
/**
* Reads and decompresses the next block
* @param bufferAvailableForReuse decompression buffer available for reuse
* @return next block in the decompressed stream
*/
protected DecompressedBlock nextBlock(byte[] bufferAvailableForReuse) {
return processNextBlock(bufferAvailableForReuse);
}
/**
* Rethrows an exception encountered during decompression
* @throws IOException
*/
private void checkAndRethrowDecompressionException() throws IOException {
if (mCurrentBlock.mException != null) {
if (mCurrentBlock.mException instanceof IOException) {
throw (IOException) mCurrentBlock.mException;
} else if (mCurrentBlock.mException instanceof RuntimeException) {
throw (RuntimeException) mCurrentBlock.mException;
} else {
throw new RuntimeException(mCurrentBlock.mException);
}
}
}
/**
* Attempt to reuse the buffer of the given block
* @param block owning block
* @return null decompressing buffer to reuse, null if no buffer is available
*/
private byte[] getBufferForReuse(DecompressedBlock block) {
if (block == null) return null;
return block.mBlock;
}
/**
* Decompress the next block from the input stream. When using asynchronous
* IO, this will be called by the background thread.
* @param bufferAvailableForReuse buffer in which to place decompressed block. A null or
* incorrectly sized buffer will result in the buffer being ignored and
* a new buffer allocated for decompression.
* @return next block in input stream
*/
protected DecompressedBlock processNextBlock(byte[] bufferAvailableForReuse) {
if (mFileBuffer == null) {
mFileBuffer = new byte[BGZFStreamConstants.MAX_COMPRESSED_BLOCK_SIZE];
}
long blockAddress = mStreamOffset;
try {
final int headerByteCount = readBytes(mFileBuffer, 0, BGZFStreamConstants.BLOCK_HEADER_LENGTH);
mStreamOffset += headerByteCount;
if (headerByteCount == 0) {
// Handle case where there is no empty gzip block at end.
return new DecompressedBlock(blockAddress, new byte[0], 0);
}
if (headerByteCount != BGZFStreamConstants.BLOCK_HEADER_LENGTH) {
return new DecompressedBlock(blockAddress, headerByteCount, new IOException(INCORRECT_HEADER_SIZE_MSG + getSource()));
}
final int blockLength = unpackInt16(mFileBuffer, BGZFStreamConstants.BLOCK_LENGTH_OFFSET) + 1;
if (blockLength < BGZFStreamConstants.BLOCK_HEADER_LENGTH || blockLength > mFileBuffer.length) {
return new DecompressedBlock(blockAddress, blockLength,
new IOException(UNEXPECTED_BLOCK_LENGTH_MSG + blockLength + " for " + getSource()));
}
final int remaining = blockLength - BGZFStreamConstants.BLOCK_HEADER_LENGTH;
final int dataByteCount = readBytes(mFileBuffer, BGZFStreamConstants.BLOCK_HEADER_LENGTH,
remaining);
mStreamOffset += dataByteCount;
if (dataByteCount != remaining) {
return new DecompressedBlock(blockAddress, blockLength,
new BGZFException(PREMATURE_END_MSG + getSource()));
}
final byte[] decompressed = inflateBlock(mFileBuffer, blockLength, bufferAvailableForReuse);
return new DecompressedBlock(blockAddress, decompressed, blockLength);
} catch (IOException e) {
return new DecompressedBlock(blockAddress, 0, e);
}
}
private byte[] inflateBlock(final byte[] compressedBlock, final int compressedLength,
final byte[] bufferAvailableForReuse) throws IOException {
final int uncompressedLength = unpackInt32(compressedBlock, compressedLength - 4);
if (uncompressedLength < 0) {
throw new BGZFException(getSource() + " has invalid uncompressedLength: " + uncompressedLength);
}
byte[] buffer = bufferAvailableForReuse;
if (buffer == null || uncompressedLength != buffer.length) {
// can't reuse the buffer since the size is incorrect
buffer = new byte[uncompressedLength];
}
blockGunzipper.unzipBlock(buffer, compressedBlock, compressedLength);
return buffer;
}
private String getSource() {
return mFile == null ? "data stream" : mFile.getSource();
}
private int readBytes(final byte[] buffer, final int offset, final int length) throws IOException {
if (mFile != null) {
return readBytes(mFile, buffer, offset, length);
} else if (mStream != null) {
return readBytes(mStream, buffer, offset, length);
} else {
return 0;
}
}
private static int readBytes(final SeekableStream file, final byte[] buffer, final int offset, final int length) throws IOException {
int bytesRead = 0;
while (bytesRead < length) {
final int count = file.read(buffer, offset + bytesRead, length - bytesRead);
if (count <= 0) {
break;
}
bytesRead += count;
}
return bytesRead;
}
private static int readBytes(final InputStream stream, final byte[] buffer, final int offset, final int length) throws IOException {
int bytesRead = 0;
while (bytesRead < length) {
final int count = stream.read(buffer, offset + bytesRead, length - bytesRead);
if (count <= 0) {
break;
}
bytesRead += count;
}
return bytesRead;
}
private int unpackInt16(final byte[] buffer, final int offset) {
return ((buffer[offset] & 0xFF) |
((buffer[offset+1] & 0xFF) << 8));
}
private int unpackInt32(final byte[] buffer, final int offset) {
return ((buffer[offset] & 0xFF) |
((buffer[offset+1] & 0xFF) << 8) |
((buffer[offset+2] & 0xFF) << 16) |
((buffer[offset+3] & 0xFF) << 24));
}
public enum FileTermination {HAS_TERMINATOR_BLOCK, HAS_HEALTHY_LAST_BLOCK, DEFECTIVE}
/**
*
* @param file the file to check
* @return status of the last compressed block
* @throws IOException
*/
public static FileTermination checkTermination(final File file) throws IOException {
return checkTermination(file.toPath());
}
/**
*
* @param path to the file to check
* @return status of the last compressed block
* @throws IOException
*/
public static FileTermination checkTermination(final Path path) throws IOException {
try( final SeekableByteChannel channel = Files.newByteChannel(path, StandardOpenOption.READ) ){
return checkTermination(channel);
}
}
/**
* check the status of the final bzgipped block for the given bgzipped resource
*
* @param channel an open channel to read from,
* the channel will remain open and the initial position will be restored when the operation completes
* this makes no guarantee about the state of the channel if an exception is thrown during reading
*
* @return the status of the last compressed black
* @throws IOException
*/
public static FileTermination checkTermination(SeekableByteChannel channel) throws IOException {
final long fileSize = channel.size();
if (fileSize < BGZFStreamConstants.EMPTY_GZIP_BLOCK.length) {
return FileTermination.DEFECTIVE;
}
final long initialPosition = channel.position();
boolean exceptionThrown = false;
try {
channel.position(fileSize - BGZFStreamConstants.EMPTY_GZIP_BLOCK.length);
//Check if the end of the file is an empty gzip block which is used as the terminator for a bgzipped file
final ByteBuffer lastBlockBuffer = ByteBuffer.allocate(BGZFStreamConstants.EMPTY_GZIP_BLOCK.length);
readFully(channel, lastBlockBuffer);
if (Arrays.equals(lastBlockBuffer.array(), BGZFStreamConstants.EMPTY_GZIP_BLOCK)) {
return FileTermination.HAS_TERMINATOR_BLOCK;
}
//if the last block isn't an empty gzip block, check to see if it is a healthy compressed block or if it's corrupted
final int bufsize = (int) Math.min(fileSize, BGZFStreamConstants.MAX_COMPRESSED_BLOCK_SIZE);
final byte[] bufferArray = new byte[bufsize];
channel.position(fileSize - bufsize);
readFully(channel, ByteBuffer.wrap(bufferArray));
for (int i = bufferArray.length - BGZFStreamConstants.EMPTY_GZIP_BLOCK.length;
i >= 0; --i) {
if (!preambleEqual(BGZFStreamConstants.GZIP_BLOCK_PREAMBLE,
bufferArray, i, BGZFStreamConstants.GZIP_BLOCK_PREAMBLE.length)) {
continue;
}
final ByteBuffer byteBuffer = ByteBuffer.wrap(bufferArray,
i + BGZFStreamConstants.GZIP_BLOCK_PREAMBLE.length,
4);
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
final int totalBlockSizeMinusOne = byteBuffer.getShort() & 0xFFFF;
if (bufferArray.length - i == totalBlockSizeMinusOne + 1) {
return FileTermination.HAS_HEALTHY_LAST_BLOCK;
} else {
return FileTermination.DEFECTIVE;
}
}
return FileTermination.DEFECTIVE;
} catch (final Throwable e) {
exceptionThrown = true;
throw e;
} finally {
//if an exception was thrown we don't want to reset the position because that would be likely to throw again
//and suppress the initial exception
if(!exceptionThrown) {
channel.position(initialPosition);
}
}
}
/**
* read as many bytes as dst's capacity into dst or throw if that's not possible
*
* @throws EOFException if channel has fewer bytes available than dst's capacity
*/
static void readFully(SeekableByteChannel channel, ByteBuffer dst) throws IOException {
int totalBytesRead = 0;
final int capacity = dst.capacity();
while (totalBytesRead < capacity) {
final int bytesRead = channel.read(dst);
if (bytesRead == -1) {
throw new EOFException();
}
totalBytesRead += bytesRead;
}
}
public static void assertNonDefectiveFile(final File file) throws IOException {
if (checkTermination(file) == FileTermination.DEFECTIVE) {
throw new BGZFException(file.getAbsolutePath() + " does not have a valid GZIP block at the end of the file.");
}
}
private static boolean preambleEqual(final byte[] preamble, final byte[] buf, final int startOffset, final int length) {
for (int i = 0; i < length; ++i) {
if (preamble[i] != buf[i + startOffset]) {
return false;
}
}
return true;
}
protected static class DecompressedBlock {
/**
* Decompressed block
*/
private final byte[] mBlock;
/**
* Compressed size of block (the uncompressed size can be found using
* mBlock.length)
*/
private final int mBlockCompressedSize;
/**
* Stream offset of start of block
*/
private final long mBlockAddress;
/**
* Exception thrown (if any) when attempting to decompress block
*/
private final Exception mException;
public DecompressedBlock(long blockAddress, byte[] block, int compressedSize) {
mBlock = block;
mBlockAddress = blockAddress;
mBlockCompressedSize = compressedSize;
mException = null;
}
public DecompressedBlock(long blockAddress, int compressedSize, Exception exception) {
mBlock = new byte[0];
mBlockAddress = blockAddress;
mBlockCompressedSize = compressedSize;
mException = exception;
}
}
}

View file

@ -0,0 +1,358 @@
package org.xbib.io.compress.bgzf;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.zip.CRC32;
import java.util.zip.Deflater;
/**
* Stream class for a file that is a series of gzip blocks (BGZF format). The caller just treats it as an
* OutputStream, and under the covers a gzip block is written when the amount of uncompressed as-yet-unwritten
* bytes reaches a threshold.
*
* The advantage of BGZF over conventional gzip is that BGZF allows for seeking without having to scan through
* the entire file up to the position being sought.
*
* Note that the flush() method should not be called by client
* unless you know what you're doing, because it forces a gzip block to be written even if the
* number of buffered bytes has not reached threshold. close(), on the other hand, must be called
* when done writing in order to force the last gzip block to be written.
*
* @see <a href="http://samtools.sourceforge.net/SAM1.pdf">http://samtools.sourceforge.net/SAM1.pdf</a> for details of BGZF file format.
*/
public class BlockCompressedOutputStream extends OutputStream {
private static int defaultCompressionLevel = BGZFStreamConstants.DEFAULT_COMPRESSION_LEVEL;
private static DeflaterFactory defaultDeflaterFactory = new DeflaterFactory();
public static void setDefaultCompressionLevel(final int compressionLevel) {
if (compressionLevel < Deflater.NO_COMPRESSION || compressionLevel > Deflater.BEST_COMPRESSION) {
throw new IllegalArgumentException("Invalid compression level: " + compressionLevel);
}
defaultCompressionLevel = compressionLevel;
}
public static int getDefaultCompressionLevel() {
return defaultCompressionLevel;
}
/**
* Sets the default {@link DeflaterFactory} that will be used for all instances unless specified otherwise in the constructor.
* If this method is not called the default is a factory that will create the JDK {@link Deflater}.
* @param deflaterFactory non-null default factory.
*/
public static void setDefaultDeflaterFactory(final DeflaterFactory deflaterFactory) {
if (deflaterFactory == null) {
throw new IllegalArgumentException("null deflaterFactory");
}
defaultDeflaterFactory = deflaterFactory;
}
public static DeflaterFactory getDefaultDeflaterFactory() {
return defaultDeflaterFactory;
}
private final BinaryCodec codec;
private final byte[] uncompressedBuffer = new byte[BGZFStreamConstants.DEFAULT_UNCOMPRESSED_BLOCK_SIZE];
private int numUncompressedBytes = 0;
private final byte[] compressedBuffer =
new byte[BGZFStreamConstants.MAX_COMPRESSED_BLOCK_SIZE -
BGZFStreamConstants.BLOCK_HEADER_LENGTH];
private final Deflater deflater;
// A second deflater is created for the very unlikely case where the regular deflation actually makes
// things bigger, and the compressed block is too big. It should be possible to downshift the
// primary deflater to NO_COMPRESSION level, recompress, and then restore it to its original setting,
// but in practice that doesn't work.
// The motivation for deflating at NO_COMPRESSION level is that it will predictably produce compressed
// output that is 10 bytes larger than the input, and the threshold at which a block is generated is such that
// the size of tbe final gzip block will always be <= 64K. This is preferred over the previous method,
// which would attempt to compress up to 64K bytes, and if the resulting compressed block was too large,
// try compressing fewer input bytes (aka "downshifting'). The problem with downshifting is that
// getFilePointer might return an inaccurate value.
// I assume (AW 29-Oct-2013) that there is no value in using hardware-assisted deflater for no-compression mode,
// so just use JDK standard.
private final Deflater noCompressionDeflater = new Deflater(Deflater.NO_COMPRESSION, true);
private final CRC32 crc32 = new CRC32();
private Path file = null;
private long mBlockAddress = 0;
/**
* Uses default compression level, which is 5 unless changed by setCompressionLevel
* Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}.
* Use {@link #BlockCompressedOutputStream(File, int, DeflaterFactory)} to specify a custom factory.
*/
public BlockCompressedOutputStream(final String filename) throws FileNotFoundException {
this(filename, defaultCompressionLevel);
}
/**
* Uses default compression level, which is 5 unless changed by setCompressionLevel
* Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}.
* Use {@link #BlockCompressedOutputStream(File, int, DeflaterFactory)} to specify a custom factory.
*/
public BlockCompressedOutputStream(final File file) throws FileNotFoundException {
this(file, defaultCompressionLevel);
}
public BlockCompressedOutputStream(final String filename, final int compressionLevel) throws FileNotFoundException {
this(new File(filename), compressionLevel);
}
public BlockCompressedOutputStream(final File file, final int compressionLevel) throws FileNotFoundException {
this(file, compressionLevel, defaultDeflaterFactory);
}
public BlockCompressedOutputStream(final File file, final int compressionLevel, final DeflaterFactory deflaterFactory) throws FileNotFoundException {
this.file = file.toPath();
codec = new BinaryCodec(file, true);
deflater = deflaterFactory.makeDeflater(compressionLevel, true);
}
/**
* Uses default compression level, which is 5 unless changed by setCompressionLevel
* Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}.
* Use {@link #BlockCompressedOutputStream(OutputStream, File, int, DeflaterFactory)} to specify a custom factory.
*/
public BlockCompressedOutputStream(final OutputStream os) {
this(os, (File)null, defaultCompressionLevel);
}
/**
* Uses default compression level, which is 5 unless changed by setCompressionLevel
* Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}.
* Use {@link #BlockCompressedOutputStream(OutputStream, File, int, DeflaterFactory)} to specify a custom factory.
*
* @param file may be null
*/
public BlockCompressedOutputStream(final OutputStream os, final Path file) {
this(os, file, defaultCompressionLevel);
}
/**
* Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}.
* Use {@link #BlockCompressedOutputStream(OutputStream, File, int, DeflaterFactory)} to specify a custom factory.
*/
public BlockCompressedOutputStream(final OutputStream os, final File file, final int compressionLevel) {
this(os, file, compressionLevel, defaultDeflaterFactory);
}
/**
* Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}.
* Use {@link #BlockCompressedOutputStream(OutputStream, File, int, DeflaterFactory)} to specify a custom factory.
*/
public BlockCompressedOutputStream(final OutputStream os, final Path file, final int compressionLevel) {
this(os, file, compressionLevel, defaultDeflaterFactory);
}
/**
* Creates the output stream.
* @param os output stream to create a BlockCompressedOutputStream from
* @param file file to which to write the output or null if not available
* @param compressionLevel the compression level (0-9)
* @param deflaterFactory custom factory to create deflaters (overrides the default)
*/
public BlockCompressedOutputStream(final OutputStream os, final File file, final int compressionLevel, final DeflaterFactory deflaterFactory) {
this(os, file != null ? file.toPath() : null, compressionLevel, deflaterFactory);
}
/**
* Creates the output stream.
* @param os output stream to create a BlockCompressedOutputStream from
* @param file file to which to write the output or null if not available
* @param compressionLevel the compression level (0-9)
* @param deflaterFactory custom factory to create deflaters (overrides the default)
*/
public BlockCompressedOutputStream(final OutputStream os, final Path file, final int compressionLevel, final DeflaterFactory deflaterFactory) {
this.file = file;
codec = new BinaryCodec(os);
if (file != null) {
codec.setOutputFileName(file.toAbsolutePath().toUri().toString());
}
deflater = deflaterFactory.makeDeflater(compressionLevel, true);
}
/**
* @param output May or not already be a BlockCompressedOutputStream.
* @return A BlockCompressedOutputStream, either by wrapping the given OutputStream, or by casting if it already
* is a BCOS.
*/
public static BlockCompressedOutputStream maybeBgzfWrapOutputStream(OutputStream output) {
if (!(output instanceof BlockCompressedOutputStream)) {
return new BlockCompressedOutputStream(output);
} else {
return (BlockCompressedOutputStream)output;
}
}
/**
* Writes b.length bytes from the specified byte array to this output stream. The general contract for write(b)
* is that it should have exactly the same effect as the call write(b, 0, b.length).
* @param bytes the data
*/
@Override
public void write(final byte[] bytes) throws IOException {
write(bytes, 0, bytes.length);
}
/**
* Writes len bytes from the specified byte array starting at offset off to this output stream. The general
* contract for write(b, off, len) is that some of the bytes in the array b are written to the output stream in order;
* element b[off] is the first byte written and b[off+len-1] is the last byte written by this operation.
*
* @param bytes the data
* @param startIndex the start offset in the data
* @param numBytes the number of bytes to write
*/
@Override
public void write(final byte[] bytes, int startIndex, int numBytes) throws IOException {
while (numBytes > 0) {
final int bytesToWrite = Math.min(uncompressedBuffer.length - numUncompressedBytes, numBytes);
System.arraycopy(bytes, startIndex, uncompressedBuffer, numUncompressedBytes, bytesToWrite);
numUncompressedBytes += bytesToWrite;
startIndex += bytesToWrite;
numBytes -= bytesToWrite;
if (numUncompressedBytes == uncompressedBuffer.length) {
deflateBlock();
}
}
}
@Override
public void write(final int b) throws IOException {
uncompressedBuffer[numUncompressedBytes++] = (byte) b;
if (numUncompressedBytes == uncompressedBuffer.length) deflateBlock();
}
/**
* WARNING: flush() affects the output format, because it causes the current contents of uncompressedBuffer
* to be compressed and written, even if it isn't full. Unless you know what you're doing, don't call flush().
* Instead, call close(), which will flush any unwritten data before closing the underlying stream.
*
*/
@Override
public void flush() throws IOException {
while (numUncompressedBytes > 0) {
deflateBlock();
}
codec.getOutputStream().flush();
}
/**
* close() must be called in order to flush any remaining buffered bytes. An unclosed file will likely be
* defective.
*
*/
@Override
public void close() throws IOException {
close(true);
}
public void close(final boolean writeTerminatorBlock) throws IOException {
flush();
// For debugging...
// if (numberOfThrottleBacks > 0) {
// System.err.println("In BlockCompressedOutputStream, had to throttle back " + numberOfThrottleBacks +
// " times for file " + codec.getOutputFileName());
// }
if (writeTerminatorBlock) {
codec.writeBytes(BGZFStreamConstants.EMPTY_GZIP_BLOCK);
}
codec.close();
// If a terminator block was written, ensure that it's there and valid
if (writeTerminatorBlock) {
// Can't re-open something that is not a regular file, e.g. a named pipe or an output stream
if (this.file == null || !Files.isRegularFile(this.file)) return;
if (BlockCompressedInputStream.checkTermination(this.file) !=
BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK) {
throw new IOException("Terminator block not found after closing BGZF file " + this.file);
}
}
}
/** Encode virtual file pointer
* Upper 48 bits is the byte offset into the compressed stream of a block.
* Lower 16 bits is the byte offset into the uncompressed stream inside the block.
*/
public long getFilePointer(){
return BGZFFilePointerUtil.makeFilePointer(mBlockAddress, numUncompressedBytes);
}
public long getPosition() {
return getFilePointer();
}
/**
* Attempt to write the data in uncompressedBuffer to the underlying file in a gzip block.
* If the entire uncompressedBuffer does not fit in the maximum allowed size, reduce the amount
* of data to be compressed, and slide the excess down in uncompressedBuffer so it can be picked
* up in the next deflate event.
* @return size of gzip block that was written.
*/
private int deflateBlock() throws IOException {
if (numUncompressedBytes == 0) {
return 0;
}
final int bytesToCompress = numUncompressedBytes;
// Compress the input
deflater.reset();
deflater.setInput(uncompressedBuffer, 0, bytesToCompress);
deflater.finish();
int compressedSize = deflater.deflate(compressedBuffer, 0, compressedBuffer.length);
// If it didn't all fit in compressedBuffer.length, set compression level to NO_COMPRESSION
// and try again. This should always fit.
if (!deflater.finished()) {
noCompressionDeflater.reset();
noCompressionDeflater.setInput(uncompressedBuffer, 0, bytesToCompress);
noCompressionDeflater.finish();
compressedSize = noCompressionDeflater.deflate(compressedBuffer, 0, compressedBuffer.length);
if (!noCompressionDeflater.finished()) {
throw new IllegalStateException("unpossible");
}
}
// Data compressed small enough, so write it out.
crc32.reset();
crc32.update(uncompressedBuffer, 0, bytesToCompress);
final int totalBlockSize = writeGzipBlock(compressedSize, bytesToCompress, crc32.getValue());
// Clear out from uncompressedBuffer the data that was written
numUncompressedBytes = 0;
mBlockAddress += totalBlockSize;
return totalBlockSize;
}
/**
* Writes the entire gzip block, assuming the compressed data is stored in compressedBuffer
* @return size of gzip block that was written.
*/
private int writeGzipBlock(final int compressedSize, final int uncompressedSize, final long crc) throws IOException {
// Init gzip header
codec.writeByte(BGZFStreamConstants.GZIP_ID1);
codec.writeByte(BGZFStreamConstants.GZIP_ID2);
codec.writeByte(BGZFStreamConstants.GZIP_CM_DEFLATE);
codec.writeByte(BGZFStreamConstants.GZIP_FLG);
codec.writeInt(0); // Modification time
codec.writeByte(BGZFStreamConstants.GZIP_XFL);
codec.writeByte(BGZFStreamConstants.GZIP_OS_UNKNOWN);
codec.writeShort(BGZFStreamConstants.GZIP_XLEN);
codec.writeByte(BGZFStreamConstants.BGZF_ID1);
codec.writeByte(BGZFStreamConstants.BGZF_ID2);
codec.writeShort(BGZFStreamConstants.BGZF_LEN);
final int totalBlockSize = compressedSize + BGZFStreamConstants.BLOCK_HEADER_LENGTH +
BGZFStreamConstants.BLOCK_FOOTER_LENGTH;
// I don't know why we store block size - 1, but that is what the spec says
codec.writeShort((short)(totalBlockSize - 1));
codec.writeBytes(compressedBuffer, 0, compressedSize);
codec.writeInt((int)crc);
codec.writeInt(uncompressedSize);
return totalBlockSize;
}
}

View file

@ -0,0 +1,114 @@
package org.xbib.io.compress.bgzf;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.zip.CRC32;
import java.util.zip.DataFormatException;
import java.util.zip.Inflater;
/**
* For decompressing GZIP blocks that are already loaded into a byte[].
* The main advantage is that this object can be used over and over again to decompress many blocks,
* whereas a new GZIPInputStream and ByteArrayInputStream would otherwise need to be created for each
* block to be decompressed.
*
* This code requires that the GZIP header conform to the GZIP blocks written to BAM files, with
* a specific subfield and no other optional stuff.
*/
public class BlockGunzipper {
private static InflaterFactory defaultInflaterFactory = new InflaterFactory();
private final Inflater inflater;
private final CRC32 crc32 = new CRC32();
private boolean checkCrcs = false;
BlockGunzipper() {
inflater = defaultInflaterFactory.makeInflater(true); // GZIP mode
}
/**
* Create a BlockGunzipper using the provided inflaterFactory
* @param inflaterFactory
*/
BlockGunzipper(InflaterFactory inflaterFactory) {
inflater = inflaterFactory.makeInflater(true); // GZIP mode
}
/**
* Sets the default {@link InflaterFactory} that will be used for all instances unless specified otherwise in the constructor.
* If this method is not called the default is a factory that will create the JDK {@link Inflater}.
* @param inflaterFactory non-null default factory.
*/
public static void setDefaultInflaterFactory(final InflaterFactory inflaterFactory) {
if (inflaterFactory == null) {
throw new IllegalArgumentException("null inflaterFactory");
}
defaultInflaterFactory = inflaterFactory;
}
public static InflaterFactory getDefaultInflaterFactory() {
return defaultInflaterFactory;
}
/** Allows the caller to decide whether or not to check CRCs on when uncompressing blocks. */
public void setCheckCrcs(final boolean check) {
this.checkCrcs = check;
}
/**
* Decompress GZIP-compressed data
* @param uncompressedBlock must be big enough to hold decompressed output.
* @param compressedBlock compressed data starting at offset 0
* @param compressedLength size of compressed data, possibly less than the size of the buffer.
*/
void unzipBlock(byte[] uncompressedBlock, byte[] compressedBlock, int compressedLength) {
try {
ByteBuffer byteBuffer = ByteBuffer.wrap(compressedBlock, 0, compressedLength);
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
// Validate GZIP header
if (byteBuffer.get() != BGZFStreamConstants.GZIP_ID1 ||
byteBuffer.get() != (byte)BGZFStreamConstants.GZIP_ID2 ||
byteBuffer.get() != BGZFStreamConstants.GZIP_CM_DEFLATE ||
byteBuffer.get() != BGZFStreamConstants.GZIP_FLG
) {
throw new BGZFFormatException("Invalid GZIP header");
}
// Skip MTIME, XFL, OS fields
byteBuffer.position(byteBuffer.position() + 6);
if (byteBuffer.getShort() != BGZFStreamConstants.GZIP_XLEN) {
throw new BGZFFormatException("Invalid GZIP header");
}
// Skip blocksize subfield intro
byteBuffer.position(byteBuffer.position() + 4);
// Read ushort
final int totalBlockSize = (byteBuffer.getShort() & 0xffff) + 1;
if (totalBlockSize != compressedLength) {
throw new BGZFFormatException("GZIP blocksize disagreement");
}
// Read expected size and CRD from end of GZIP block
final int deflatedSize = compressedLength - BGZFStreamConstants.BLOCK_HEADER_LENGTH - BGZFStreamConstants.BLOCK_FOOTER_LENGTH;
byteBuffer.position(byteBuffer.position() + deflatedSize);
int expectedCrc = byteBuffer.getInt();
int uncompressedSize = byteBuffer.getInt();
inflater.reset();
// Decompress
inflater.setInput(compressedBlock, BGZFStreamConstants.BLOCK_HEADER_LENGTH, deflatedSize);
final int inflatedBytes = inflater.inflate(uncompressedBlock, 0, uncompressedSize);
if (inflatedBytes != uncompressedSize) {
throw new BGZFFormatException("Did not inflate expected amount");
}
// Validate CRC if so desired
if (this.checkCrcs) {
crc32.reset();
crc32.update(uncompressedBlock, 0, uncompressedSize);
final long crc = crc32.getValue();
if ((int)crc != expectedCrc) {
throw new BGZFFormatException("CRC mismatch");
}
}
} catch (DataFormatException e) {
throw new BGZFException(e);
}
}
}

View file

@ -0,0 +1,24 @@
package org.xbib.io.compress.bgzf;
import java.util.zip.Deflater;
/**
* Factory for {@link Deflater} objects used by {@link BlockCompressedOutputStream}.
* This class may be extended to provide alternative deflaters (e.g., for improved performance).
*/
public class DeflaterFactory {
public DeflaterFactory() {
//Note: made explicit constructor to make searching for references easier
}
/**
* Returns a deflater object that will be used when writing BAM files.
* Subclasses may override to provide their own deflater implementation.
* @param compressionLevel the compression level (0-9)
* @param gzipCompatible if true then use GZIP compatible compression
*/
public Deflater makeDeflater(final int compressionLevel, final boolean gzipCompatible) {
return new Deflater(compressionLevel, gzipCompatible);
}
}

View file

@ -0,0 +1,20 @@
package org.xbib.io.compress.bgzf;
import java.util.zip.Inflater;
/**
* Factory for {@link Inflater} objects used by {@link BlockGunzipper}.
* This class may be extended to provide alternative inflaters (e.g., for improved performance).
* The default implementation returns a JDK {@link Inflater}
*/
public class InflaterFactory {
/**
* Returns an inflater object that will be used when reading DEFLATE compressed files.
* Subclasses may override to provide their own inflater implementation.
* The default implementation returns a JDK {@link Inflater}
* @param gzipCompatible if true then use GZIP compatible compression
*/
public Inflater makeInflater(final boolean gzipCompatible) {
return new Inflater(gzipCompatible);
}
}

View file

@ -0,0 +1,105 @@
package org.xbib.io.compress.bgzf;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
/**
* A wrapper class to provide buffered read access to a SeekableStream. Just wrapping such a stream with
* a BufferedInputStream will not work as it does not support seeking. In this implementation a
* seek call is delegated to the wrapped stream, and the buffer reset.
*/
public class SeekableBufferedStream extends SeekableStream {
/** Little extension to buffered input stream to give access to the available bytes in the buffer. */
private static class ExtBufferedInputStream extends BufferedInputStream {
private ExtBufferedInputStream(final InputStream inputStream, final int i) {
super(inputStream, i);
}
/** Returns the number of bytes that can be read from the buffer without reading more into the buffer. */
int getBytesInBufferAvailable() {
if (this.count == this.pos) return 0; // documented test for "is buffer empty"
else return this.buf.length - this.pos;
}
}
public static final int DEFAULT_BUFFER_SIZE = 512000;
final private int bufferSize;
final SeekableStream wrappedStream;
ExtBufferedInputStream bufferedStream;
long position;
public SeekableBufferedStream(final SeekableStream stream, final int bufferSize) {
this.bufferSize = bufferSize;
this.wrappedStream = stream;
this.position = 0;
bufferedStream = new ExtBufferedInputStream(wrappedStream, bufferSize);
}
public SeekableBufferedStream(final SeekableStream stream) {
this(stream, DEFAULT_BUFFER_SIZE);
}
@Override
public long length() {
return wrappedStream.length();
}
@Override
public long skip(final long skipLength) throws IOException {
if (skipLength < this.bufferedStream.getBytesInBufferAvailable()) {
final long retval = this.bufferedStream.skip(skipLength);
this.position += retval;
return retval;
} else {
final long position = this.position + skipLength;
seek(position);
return skipLength;
}
}
@Override
public void seek(final long position) throws IOException {
this.position = position;
wrappedStream.seek(position);
bufferedStream = new ExtBufferedInputStream(wrappedStream, bufferSize);
}
@Override
public int read() throws IOException {
int b = bufferedStream.read();
position++;
return b;
}
@Override
public int read(final byte[] buffer, final int offset, final int length) throws IOException {
final int nBytesRead = bufferedStream.read(buffer, offset, length);
if (nBytesRead > 0) {
position += nBytesRead;
}
return nBytesRead;
}
@Override
public void close() throws IOException {
wrappedStream.close();
}
@Override
public boolean eof() throws IOException {
return position >= wrappedStream.length();
}
@Override
public String getSource() {
return wrappedStream.getSource();
}
@Override
public long position() throws IOException {
return position;
}
}

View file

@ -0,0 +1,110 @@
package org.xbib.io.compress.bgzf;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
public class SeekableFileStream extends SeekableStream {
/**
* Collection of all open instances. SeekableFileStream objects are usually open and kept open for the
* duration of a session. This collection supports a method to close them all.
*/
private static final Collection<SeekableFileStream> allInstances = Collections.synchronizedCollection(new HashSet<>());
File file;
RandomAccessFile fis;
public SeekableFileStream(final File file) throws FileNotFoundException {
this.file = file;
fis = new RandomAccessFile(file, "r");
allInstances.add(this);
}
@Override
public long length() {
return file.length();
}
@Override
public boolean eof() throws IOException {
return fis.length() == fis.getFilePointer();
}
@Override
public void seek(final long position) throws IOException {
fis.seek(position);
}
@Override
public long position() throws IOException {
return fis.getChannel().position();
}
@Override
public long skip(long n) throws IOException {
long initPos = position();
fis.getChannel().position(initPos + n);
return position() - initPos;
}
@Override
public int read(final byte[] buffer, final int offset, final int length) throws IOException {
if (length < 0) {
throw new IndexOutOfBoundsException();
}
int n = 0;
while (n < length) {
final int count = fis.read(buffer, offset + n, length - n);
if (count < 0) {
if (n > 0) {
return n;
} else {
return count;
}
}
n += count;
}
return n;
}
@Override
public int read() throws IOException {
return fis.read();
}
@Override
public int read(byte[] b) throws IOException {
return fis.read(b);
}
@Override
public String getSource() {
return file.getAbsolutePath();
}
@Override
public void close() throws IOException {
allInstances.remove(this);
fis.close();
}
public static synchronized void closeAllInstances() {
Collection<SeekableFileStream> clonedInstances = new HashSet<>(allInstances);
for (SeekableFileStream sfs : clonedInstances) {
try {
sfs.close();
} catch (IOException e) {
//
}
}
allInstances.clear();
}
}

View file

@ -0,0 +1,44 @@
package org.xbib.io.compress.bgzf;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
public abstract class SeekableStream extends InputStream {
public abstract long length();
public abstract long position() throws IOException;
public abstract void seek(long position) throws IOException;
@Override
public abstract int read(byte[] buffer, int offset, int length) throws IOException;
@Override
public abstract void close() throws IOException;
public abstract boolean eof() throws IOException;
/**
* @return String representation of source (e.g. URL, file path, etc.), or null if not available.
*/
public abstract String getSource();
/**
* Read enough bytes to fill the input buffer.
* @param b byte array
* @throws EOFException If EOF is reached before buffer is filled
*/
public void readFully(byte[] b) throws IOException {
int len = b.length;
int n = 0;
while (n < len) {
int count = read(b, n, len - n);
if (count < 0){
throw new EOFException();
}
n += count;
}
}
}

View file

@ -0,0 +1,11 @@
package org.xbib.io.compress.bgzf;
import java.io.File;
import java.io.IOException;
public class SeekableStreamFactory {
public static SeekableStream getStreamFor(String path) throws IOException {
return new SeekableFileStream(new File(path));
}
}

View file

@ -30,7 +30,7 @@ public class ChunkDecoderFactory {
*/
public static ChunkDecoder optimalInstance() {
try {
return INSTANCE.implClass.newInstance();
return INSTANCE.implClass.getDeclaredConstructor().newInstance();
} catch (Exception e) {
throw new IllegalStateException("Failed to load a ChunkDecoder instance (" + e.getClass().getName() + "): "
+ e.getMessage(), e);

View file

@ -1,5 +1,5 @@
package org.xbib.io.compress.xz;
@SuppressWarnings("serial")
class IndexIndicatorException extends Exception {
}

View file

@ -1,4 +1,3 @@
package org.xbib.io.compress.xz.index;
import org.xbib.io.compress.xz.XZIOException;
@ -7,11 +6,12 @@ import org.xbib.io.compress.xz.common.EncoderUtil;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.zip.CheckedOutputStream;
public class IndexEncoder extends IndexBase {
private final ArrayList records = new ArrayList();
private final List<IndexRecord> records = new ArrayList<>();
public IndexEncoder() {
super(new XZIOException("XZ Stream or its Index has grown too big"));
@ -34,8 +34,7 @@ public class IndexEncoder extends IndexBase {
EncoderUtil.encodeVLI(outChecked, recordCount);
// List of Records
for (Iterator i = records.iterator(); i.hasNext(); ) {
IndexRecord record = (IndexRecord) i.next();
for (IndexRecord record : records) {
EncoderUtil.encodeVLI(outChecked, record.unpadded);
EncoderUtil.encodeVLI(outChecked, record.uncompressed);
}

View file

@ -1,3 +1,21 @@
dependencyResolutionManagement {
versionCatalogs {
libs {
version('gradle', '8.5')
}
testLibs {
version('junit', '5.10.1')
library('junit-jupiter-api', 'org.junit.jupiter', 'junit-jupiter-api').versionRef('junit')
library('junit-jupiter-params', 'org.junit.jupiter', 'junit-jupiter-params').versionRef('junit')
library('junit-jupiter-engine', 'org.junit.jupiter', 'junit-jupiter-engine').versionRef('junit')
library('junit-jupiter-platform-launcher', 'org.junit.platform', 'junit-platform-launcher').version('1.10.1')
library('junit4', 'junit', 'junit').version('4.13.2')
library('hamcrest', 'org.hamcrest', 'hamcrest-library').version('2.2')
}
}
}
include 'io-compress-bgzf'
include 'io-compress-bzip2'
include 'io-compress-lzf'
include 'io-compress-xz'
@ -9,4 +27,4 @@ include 'io-archive-dump'
include 'io-archive-jar'
include 'io-archive-tar'
include 'io-archive-zip'
include 'io-codec'
include 'io-codec'