commit 9945b77f55e5cd2f527d24cfe81d8486b0eff04b Author: Jörg Prante Date: Thu May 28 12:09:56 2020 +0200 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6fa7db9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +/.idea +/target +/.settings +/.classpath +/.project +/.gradle +build +out +logs +*~ +*.iml +.DS_Store diff --git a/build.gradle b/build.gradle new file mode 100644 index 0000000..537a158 --- /dev/null +++ b/build.gradle @@ -0,0 +1,33 @@ +plugins { + id "de.marcphilipp.nexus-publish" version "0.4.0" + id "io.codearte.nexus-staging" version "0.21.1" +} + +wrapper { + gradleVersion = "${project.property('gradle.wrapper.version')}" + distributionType = Wrapper.DistributionType.ALL +} + +ext { + user = 'xbib' + name = 'archive' + description = 'Archive algorithms for Java' + inceptionYear = '2016' + url = 'https://github.com/' + user + '/' + name + scmUrl = 'https://github.com/' + user + '/' + name + scmConnection = 'scm:git:git://github.com/' + user + '/' + name + '.git' + scmDeveloperConnection = 'scm:git:ssh://git@github.com:' + user + '/' + name + '.git' + issueManagementSystem = 'Github' + issueManagementUrl = ext.scmUrl + '/issues' + licenseName = 'The Apache License, Version 2.0' + licenseUrl = 'http://www.apache.org/licenses/LICENSE-2.0.txt' +} + +subprojects { + apply plugin: 'java-library' + apply from: rootProject.file('gradle/ide/idea.gradle') + apply from: rootProject.file('gradle/compile/java.gradle') + apply from: rootProject.file('gradle/test/junit5.gradle') + apply from: rootProject.file('gradle/publishing/publication.gradle') +} +apply from: rootProject.file('gradle/publishing/sonatype.gradle') diff --git a/gradle.properties b/gradle.properties new file mode 100644 index 0000000..2a004ed --- /dev/null +++ b/gradle.properties @@ -0,0 +1,5 @@ +group = org.xbib +name = archive +version = 1.0.0 + +gradle.wrapper.version = 6.4.1 diff --git a/gradle/compile/java.gradle b/gradle/compile/java.gradle new file mode 100644 index 0000000..a9d76ce --- /dev/null +++ b/gradle/compile/java.gradle @@ -0,0 +1,35 @@ + +apply plugin: 'java-library' + +java { + modularity.inferModulePath.set(true) +} + +compileJava { + sourceCompatibility = JavaVersion.VERSION_11 + targetCompatibility = JavaVersion.VERSION_11 +} + +compileTestJava { + sourceCompatibility = JavaVersion.VERSION_11 + targetCompatibility = JavaVersion.VERSION_11 +} + +jar { + manifest { + attributes('Implementation-Version': project.version) + } +} + +task sourcesJar(type: Jar, dependsOn: classes) { + classifier 'sources' + from sourceSets.main.allSource +} + +task javadocJar(type: Jar, dependsOn: javadoc) { + classifier 'javadoc' +} + +artifacts { + archives sourcesJar, javadocJar +} \ No newline at end of file diff --git a/gradle/documentation/asciidoc.gradle b/gradle/documentation/asciidoc.gradle new file mode 100644 index 0000000..87ba22e --- /dev/null +++ b/gradle/documentation/asciidoc.gradle @@ -0,0 +1,55 @@ +apply plugin: 'org.xbib.gradle.plugin.asciidoctor' + +configurations { + asciidoclet +} + +dependencies { + asciidoclet "org.asciidoctor:asciidoclet:${project.property('asciidoclet.version')}" +} + + +asciidoctor { + backends 'html5' + outputDir = file("${rootProject.projectDir}/docs") + separateOutputDirs = false + attributes 'source-highlighter': 'coderay', + idprefix: '', + idseparator: '-', + toc: 'left', + doctype: 'book', + icons: 'font', + encoding: 'utf-8', + sectlink: true, + sectanchors: true, + linkattrs: true, + imagesdir: 'img', + stylesheet: "${projectDir}/src/docs/asciidoc/css/foundation.css" +} + + +/*javadoc { +options.docletpath = configurations.asciidoclet.files.asType(List) +options.doclet = 'org.asciidoctor.Asciidoclet' +//options.overview = "src/docs/asciidoclet/overview.adoc" +options.addStringOption "-base-dir", "${projectDir}" +options.addStringOption "-attribute", + "name=${project.name},version=${project.version},title-link=https://github.com/xbib/${project.name}" +configure(options) { + noTimestamp = true +} +}*/ + + +/*javadoc { + options.docletpath = configurations.asciidoclet.files.asType(List) + options.doclet = 'org.asciidoctor.Asciidoclet' + options.overview = "${rootProject.projectDir}/src/docs/asciidoclet/overview.adoc" + options.addStringOption "-base-dir", "${projectDir}" + options.addStringOption "-attribute", + "name=${project.name},version=${project.version},title-link=https://github.com/xbib/${project.name}" + options.destinationDirectory(file("${projectDir}/docs/javadoc")) + configure(options) { + noTimestamp = true + } +}*/ diff --git a/gradle/ide/idea.gradle b/gradle/ide/idea.gradle new file mode 100644 index 0000000..64e2167 --- /dev/null +++ b/gradle/ide/idea.gradle @@ -0,0 +1,13 @@ +apply plugin: 'idea' + +idea { + module { + outputDir file('build/classes/java/main') + testOutputDir file('build/classes/java/test') + } +} + +if (project.convention.findPlugin(JavaPluginConvention)) { + //sourceSets.main.output.classesDirs = file("build/classes/java/main") + //sourceSets.test.output.classesDirs = file("build/classes/java/test") +} diff --git a/gradle/publishing/publication.gradle b/gradle/publishing/publication.gradle new file mode 100644 index 0000000..c35fcb9 --- /dev/null +++ b/gradle/publishing/publication.gradle @@ -0,0 +1,64 @@ + +apply plugin: "de.marcphilipp.nexus-publish" + +publishing { + publications { + mavenJava(MavenPublication) { + from components.java + artifact sourcesJar + artifact javadocJar + pom { + name = project.name + description = rootProject.ext.description + url = rootProject.ext.url + inceptionYear = rootProject.ext.inceptionYear + packaging = 'jar' + organization { + name = 'xbib' + url = 'https://xbib.org' + } + developers { + developer { + id = 'jprante' + name = 'Jörg Prante' + email = 'joergprante@gmail.com' + url = 'https://github.com/jprante' + } + } + scm { + url = rootProject.ext.scmUrl + connection = rootProject.ext.scmConnection + developerConnection = rootProject.ext.scmDeveloperConnection + } + issueManagement { + system = rootProject.ext.issueManagementSystem + url = rootProject.ext.issueManagementUrl + } + licenses { + license { + name = rootProject.ext.licenseName + url = rootProject.ext.licenseUrl + distribution = 'repo' + } + } + } + } + } +} + +if (project.hasProperty("signing.keyId")) { + apply plugin: 'signing' + signing { + sign publishing.publications.mavenJava + } +} + +nexusPublishing { + repositories { + sonatype { + username = project.property('ossrhUsername') + password = project.property('ossrhPassword') + packageGroup = "org.xbib" + } + } +} diff --git a/gradle/publishing/sonatype.gradle b/gradle/publishing/sonatype.gradle new file mode 100644 index 0000000..e1813f3 --- /dev/null +++ b/gradle/publishing/sonatype.gradle @@ -0,0 +1,11 @@ + +if (project.hasProperty('ossrhUsername') && project.hasProperty('ossrhPassword')) { + + apply plugin: 'io.codearte.nexus-staging' + + nexusStaging { + username = project.property('ossrhUsername') + password = project.property('ossrhPassword') + packageGroup = "org.xbib" + } +} diff --git a/gradle/test/junit5.gradle b/gradle/test/junit5.gradle new file mode 100644 index 0000000..cfef972 --- /dev/null +++ b/gradle/test/junit5.gradle @@ -0,0 +1,27 @@ + +def junitVersion = project.hasProperty('junit.version')?project.property('junit.version'):'5.6.2' +def hamcrestVersion = project.hasProperty('hamcrest.version')?project.property('hamcrest.version'):'2.2' + +dependencies { + testImplementation "org.junit.jupiter:junit-jupiter-api:${junitVersion}" + testImplementation "org.junit.jupiter:junit-jupiter-params:${junitVersion}" + testImplementation "org.hamcrest:hamcrest-library:${hamcrestVersion}" + testRuntimeOnly "org.junit.jupiter:junit-jupiter-engine:${junitVersion}" +} + +test { + useJUnitPlatform() + failFast = true + testLogging { + events 'STARTED', 'PASSED', 'FAILED', 'SKIPPED' + } + afterSuite { desc, result -> + if (!desc.parent) { + println "\nTest result: ${result.resultType}" + println "Test summary: ${result.testCount} tests, " + + "${result.successfulTestCount} succeeded, " + + "${result.failedTestCount} failed, " + + "${result.skippedTestCount} skipped" + } + } +} diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000..62d4c05 Binary files /dev/null and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000..21e622d --- /dev/null +++ b/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,5 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-6.4.1-all.zip +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew new file mode 100755 index 0000000..fbd7c51 --- /dev/null +++ b/gradlew @@ -0,0 +1,185 @@ +#!/usr/bin/env sh + +# +# Copyright 2015 the original author or authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +## +## Gradle start up script for UN*X +## +############################################################################## + +# Attempt to set APP_HOME +# Resolve links: $0 may be a link +PRG="$0" +# Need this for relative symlinks. +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`"/$link" + fi +done +SAVED="`pwd`" +cd "`dirname \"$PRG\"`/" >/dev/null +APP_HOME="`pwd -P`" +cd "$SAVED" >/dev/null + +APP_NAME="Gradle" +APP_BASE_NAME=`basename "$0"` + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD="maximum" + +warn () { + echo "$*" +} + +die () { + echo + echo "$*" + echo + exit 1 +} + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "`uname`" in + CYGWIN* ) + cygwin=true + ;; + Darwin* ) + darwin=true + ;; + MINGW* ) + msys=true + ;; + NONSTOP* ) + nonstop=true + ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD="java" + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then + MAX_FD_LIMIT=`ulimit -H -n` + if [ $? -eq 0 ] ; then + if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then + MAX_FD="$MAX_FD_LIMIT" + fi + ulimit -n $MAX_FD + if [ $? -ne 0 ] ; then + warn "Could not set maximum file descriptor limit: $MAX_FD" + fi + else + warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" + fi +fi + +# For Darwin, add options to specify how the application appears in the dock +if $darwin; then + GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" +fi + +# For Cygwin or MSYS, switch paths to Windows format before running java +if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then + APP_HOME=`cygpath --path --mixed "$APP_HOME"` + CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` + + JAVACMD=`cygpath --unix "$JAVACMD"` + + # We build the pattern for arguments to be converted via cygpath + ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` + SEP="" + for dir in $ROOTDIRSRAW ; do + ROOTDIRS="$ROOTDIRS$SEP$dir" + SEP="|" + done + OURCYGPATTERN="(^($ROOTDIRS))" + # Add a user-defined pattern to the cygpath arguments + if [ "$GRADLE_CYGPATTERN" != "" ] ; then + OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" + fi + # Now convert the arguments - kludge to limit ourselves to /bin/sh + i=0 + for arg in "$@" ; do + CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` + CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option + + if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition + eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` + else + eval `echo args$i`="\"$arg\"" + fi + i=`expr $i + 1` + done + case $i in + 0) set -- ;; + 1) set -- "$args0" ;; + 2) set -- "$args0" "$args1" ;; + 3) set -- "$args0" "$args1" "$args2" ;; + 4) set -- "$args0" "$args1" "$args2" "$args3" ;; + 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; + 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; + 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; + 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; + 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; + esac +fi + +# Escape application args +save () { + for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done + echo " " +} +APP_ARGS=`save "$@"` + +# Collect all arguments for the java command, following the shell quoting and substitution rules +eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" + +exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat new file mode 100644 index 0000000..a9f778a --- /dev/null +++ b/gradlew.bat @@ -0,0 +1,104 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto init + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto init + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:init +@rem Get command-line arguments, handling Windows variants + +if not "%OS%" == "Windows_NT" goto win9xME_args + +:win9xME_args +@rem Slurp the command line arguments. +set CMD_LINE_ARGS= +set _SKIP=2 + +:win9xME_args_slurp +if "x%~1" == "x" goto execute + +set CMD_LINE_ARGS=%* + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/io-archive-ar/build.gradle b/io-archive-ar/build.gradle new file mode 100644 index 0000000..7c4e2c7 --- /dev/null +++ b/io-archive-ar/build.gradle @@ -0,0 +1,3 @@ +dependencies { + api project(':io-archive') +} diff --git a/io-archive-ar/src/main/java/module-info.java b/io-archive-ar/src/main/java/module-info.java new file mode 100644 index 0000000..b890945 --- /dev/null +++ b/io-archive-ar/src/main/java/module-info.java @@ -0,0 +1,4 @@ +module org.xbib.io.archive.ar { + exports org.xbib.io.archive.ar; + requires org.xbib.io.archive; +} diff --git a/io-archive-ar/src/main/java/org/xbib/io/archive/ar/ArArchiveEntry.java b/io-archive-ar/src/main/java/org/xbib/io/archive/ar/ArArchiveEntry.java new file mode 100644 index 0000000..419617d --- /dev/null +++ b/io-archive-ar/src/main/java/org/xbib/io/archive/ar/ArArchiveEntry.java @@ -0,0 +1,181 @@ +package org.xbib.io.archive.ar; + +import org.xbib.io.archive.entry.ArchiveEntry; +import java.io.File; +import java.util.Date; + +/** + * Represents an archive entry in the "ar" format. + * Each AR archive starts with "!<arch>" followed by a LF. After these 8 bytes + * the archive entries are listed. The format of an entry header is as it follows: + *
+ * START BYTE   END BYTE    NAME                    FORMAT      LENGTH
+ * 0            15          File name               ASCII       16
+ * 16           27          Modification timestamp  Decimal     12
+ * 28           33          Owner ID                Decimal     6
+ * 34           39          Group ID                Decimal     6
+ * 40           47          File mode               Octal       8
+ * 48           57          File size (bytes)       Decimal     10
+ * 58           59          File magic              \140\012    2
+ * 
+ * This specifies that an ar archive entry header contains 60 bytes. + * Due to the limitation of the file name length to 16 bytes GNU and + * BSD has their own variants of this format. Currently this code + * can read but not write the GNU variant and doesn't support + * the BSD variant at all. + * + * ar man page + */ +public class ArArchiveEntry implements ArchiveEntry { + + /** + * The header for each entry + */ + public static final String HEADER = "!\n"; + + /** + * The trailer for each entry + */ + public static final String TRAILER = "`\012"; + + private static final int DEFAULT_MODE = 33188; // = (octal) 0100644 + + /** + * SVR4/GNU adds a trailing / to names; BSD does not. + * They also vary in how names longer than 16 characters are represented. + * (Not yet fully supported by this implementation) + */ + private String name; + + private int userId; + + private int groupId; + + private int mode; + + private long lastModified; + + private long length; + + public ArArchiveEntry() { + } + + /** + * Create a new instance using a couple of default values. + * Sets userId and groupId to 0, the octal file mode to 644 and + * the last modified time to the current time. + * + * @param name name of the entry + * @param length length of the entry in bytes + */ + public ArArchiveEntry(String name, long length) { + this(name, length, 0, 0, DEFAULT_MODE, + System.currentTimeMillis() / 1000); + } + + /** + * Create a new instance. + * + * @param name name of the entry + * @param length length of the entry in bytes + * @param userId numeric user id + * @param groupId numeric group id + * @param mode file mode + * @param lastModified last modified time in seconds since the epoch + */ + public ArArchiveEntry(String name, long length, int userId, int groupId, + int mode, long lastModified) { + this.name = name; + this.length = length; + this.userId = userId; + this.groupId = groupId; + this.mode = mode; + this.lastModified = lastModified; + } + + /** + * Create a new instance using the attributes of the given file + */ + public ArArchiveEntry(File inputFile, String entryName) { + // TODO sort out mode + this(entryName, inputFile.isFile() ? inputFile.length() : 0, + 0, 0, DEFAULT_MODE, inputFile.lastModified() / 1000); + } + + public ArArchiveEntry setEntrySize(long size) { + this.length = size; + return this; + } + + public long getEntrySize() { + return this.getLength(); + } + + public ArArchiveEntry setName(String name) { + this.name = name; + return this; + } + + public String getName() { + return name; + } + + public int getUserId() { + return userId; + } + + public int getGroupId() { + return groupId; + } + + public int getMode() { + return mode; + } + + public ArArchiveEntry setLastModified(Date date) { + this.lastModified = date.getTime() / 1000; + return this; + } + + /** + * Last modified time in seconds since the epoch. + */ + public Date getLastModified() { + return new Date(1000 * lastModified); + } + + public long getLength() { + return length; + } + + public boolean isDirectory() { + return false; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((name == null) ? 0 : name.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + ArArchiveEntry other = (ArArchiveEntry) obj; + if (name == null) { + if (other.name != null) { + return false; + } + } else if (!name.equals(other.name)) { + return false; + } + return true; + } +} diff --git a/io-archive-ar/src/main/java/org/xbib/io/archive/ar/ArArchiveInputStream.java b/io-archive-ar/src/main/java/org/xbib/io/archive/ar/ArArchiveInputStream.java new file mode 100644 index 0000000..79b8520 --- /dev/null +++ b/io-archive-ar/src/main/java/org/xbib/io/archive/ar/ArArchiveInputStream.java @@ -0,0 +1,329 @@ +package org.xbib.io.archive.ar; + +import org.xbib.io.archive.stream.ArchiveInputStream; +import org.xbib.io.archive.util.ArchiveUtils; + +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; + +/** + * Implements the "ar" archive format as an input stream. + */ +public class ArArchiveInputStream extends ArchiveInputStream { + + static final String BSD_LONGNAME_PREFIX = "#1/"; + + private static final int BSD_LONGNAME_PREFIX_LEN = + BSD_LONGNAME_PREFIX.length(); + + private static final String BSD_LONGNAME_PATTERN = + "^" + BSD_LONGNAME_PREFIX + "\\d+"; + + private final InputStream input; + private long offset = 0; + private boolean closed; + + /* + * If getNextEnxtry has been called, the entry metadata is stored in + * currentEntry. + */ + private ArArchiveEntry currentEntry = null; + + // Storage area for extra long names (GNU ar) + private byte[] namebuffer = null; + + /* + * The offset where the current entry started. -1 if no entry has been + * called + */ + private long entryOffset = -1; + + /** + * Constructs an Ar input stream with the referenced stream + * + * @param pInput the ar input stream + */ + public ArArchiveInputStream(final InputStream pInput) { + input = pInput; + closed = false; + } + + /** + * Returns the next AR entry in this stream. + * + * @return the next AR entry. + * @throws java.io.IOException if the entry could not be read + */ + public ArArchiveEntry getNextArEntry() throws IOException { + if (currentEntry != null) { + final long entryEnd = entryOffset + currentEntry.getLength(); + while (offset < entryEnd) { + int x = read(); + if (x == -1) { + // hit EOF before previous entry was complete + // TODO: throw an exception instead? + return null; + } + } + currentEntry = null; + } + + if (offset == 0) { + final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER); + final byte[] realized = new byte[expected.length]; + final int read = read(realized); + if (read != expected.length) { + throw new IOException("failed to read header"); + } + for (int i = 0; i < expected.length; i++) { + if (expected[i] != realized[i]) { + throw new IOException("invalid header " + ArchiveUtils.toAsciiString(realized)); + } + } + } + + if (offset % 2 != 0 && read() < 0) { + // hit eof + return null; + } + + if (input.available() == 0) { + return null; + } + + final byte[] name = new byte[16]; + final byte[] lastmodified = new byte[12]; + final byte[] userid = new byte[6]; + final byte[] groupid = new byte[6]; + final byte[] filemode = new byte[8]; + final byte[] length = new byte[10]; + + read(name); + read(lastmodified); + read(userid); + read(groupid); + read(filemode); + read(length); + + { + final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.TRAILER); + final byte[] realized = new byte[expected.length]; + final int read = read(realized); + if (read != expected.length) { + throw new IOException("failed to read entry trailer"); + } + for (int i = 0; i < expected.length; i++) { + if (expected[i] != realized[i]) { + throw new IOException("invalid entry trailer. not read the content?"); + } + } + } + + entryOffset = offset; + +// GNU ar uses a '/' to mark the end of the filename; this allows for the use of spaces without the use of an extended filename. + + // entry name is stored as ASCII string + String temp = ArchiveUtils.toAsciiString(name).trim(); + long len = asLong(length); + + if (isGNUStringTable(temp)) { // GNU extended filenames entry + currentEntry = readGNUStringTable(length); + return getNextArEntry(); + } else if (temp.endsWith("/")) { // GNU terminator + temp = temp.substring(0, temp.length() - 1); + } else if (isGNULongName(temp)) { + int offset = Integer.parseInt(temp.substring(1));// get the offset + temp = getExtendedName(offset); // convert to the long name + } else if (isBSDLongName(temp)) { + temp = getBSDLongName(temp); + // entry length contained the length of the file name in + // addition to the real length of the entry. + // assume file name was ASCII, there is no "standard" otherwise + int nameLen = temp.length(); + len -= nameLen; + entryOffset += nameLen; + } + + currentEntry = new ArArchiveEntry(temp, len, asInt(userid, true), + asInt(groupid, true), asInt(filemode, 8), + asLong(lastmodified)); + return currentEntry; + } + + /** + * Get an extended name from the GNU extended name buffer. + * + * @param offset pointer to entry within the buffer + * @return the extended file name; without trailing "/" if present. + * @throws java.io.IOException if name not found or buffer not set up + */ + private String getExtendedName(int offset) throws IOException { + if (namebuffer == null) { + throw new IOException("Cannot process GNU long filename as no // record was found"); + } + for (int i = offset; i < namebuffer.length; i++) { + if (namebuffer[i] == '\012') { + if (namebuffer[i - 1] == '/') { + i--; // drop trailing / + } + return ArchiveUtils.toAsciiString(namebuffer, offset, i - offset); + } + } + throw new IOException("Failed to read entry: " + offset); + } + + private long asLong(byte[] input) { + return Long.parseLong(ArchiveUtils.toAsciiString(input).trim()); + } + + private int asInt(byte[] input) { + return asInt(input, 10, false); + } + + private int asInt(byte[] input, boolean treatBlankAsZero) { + return asInt(input, 10, treatBlankAsZero); + } + + private int asInt(byte[] input, int base) { + return asInt(input, base, false); + } + + private int asInt(byte[] input, int base, boolean treatBlankAsZero) { + String string = ArchiveUtils.toAsciiString(input).trim(); + if (string.length() == 0 && treatBlankAsZero) { + return 0; + } + return Integer.parseInt(string, base); + } + + @Override + public ArArchiveEntry getNextEntry() throws IOException { + return getNextArEntry(); + } + + @Override + public void close() throws IOException { + if (!closed) { + closed = true; + input.close(); + } + currentEntry = null; + } + + @Override + public int read(byte[] b, final int off, final int len) throws IOException { + int toRead = len; + if (currentEntry != null) { + final long entryEnd = entryOffset + currentEntry.getLength(); + if (len > 0 && entryEnd > offset) { + toRead = (int) Math.min(len, entryEnd - offset); + } else { + return -1; + } + } + final int ret = this.input.read(b, off, toRead); + offset += (ret > 0 ? ret : 0); + return ret; + } + + + /** + * Does the name look like it is a long name (or a name containing + * spaces) as encoded by BSD ar? + *

+ *

From the FreeBSD ar(5) man page:

+ *
+     * BSD   In the BSD variant, names that are shorter than 16
+     *       characters and without embedded spaces are stored
+     *       directly in this field.  If a name has an embedded
+     *       space, or if it is longer than 16 characters, then
+     *       the string "#1/" followed by the decimal represen-
+     *       tation of the length of the file name is placed in
+     *       this field. The actual file name is stored immedi-
+     *       ately after the archive header.  The content of the
+     *       archive member follows the file name.  The ar_size
+     *       field of the header (see below) will then hold the
+     *       sum of the size of the file name and the size of
+     *       the member.
+     * 
+ */ + private static boolean isBSDLongName(String name) { + return name != null && name.matches(BSD_LONGNAME_PATTERN); + } + + /** + * Reads the real name from the current stream assuming the very + * first bytes to be read are the real file name. + * + * @see #isBSDLongName + */ + private String getBSDLongName(String bsdLongName) throws IOException { + int nameLen = + Integer.parseInt(bsdLongName.substring(BSD_LONGNAME_PREFIX_LEN)); + byte[] name = new byte[nameLen]; + int read = 0, readNow = 0; + while ((readNow = input.read(name, read, nameLen - read)) >= 0) { + read += readNow; + if (read == nameLen) { + break; + } + } + if (read != nameLen) { + throw new EOFException(); + } + return ArchiveUtils.toAsciiString(name); + } + + private static final String GNU_STRING_TABLE_NAME = "//"; + + /** + * Is this the name of the "Archive String Table" as used by + * SVR4/GNU to store long file names? + *

+ *

GNU ar stores multiple extended filenames in the data section + * of a file with the name "//", this record is referred to by + * future headers.

+ *

+ *

A header references an extended filename by storing a "/" + * followed by a decimal offset to the start of the filename in + * the extended filename data section.

+ *

+ *

The format of the "//" file itself is simply a list of the + * long filenames, each separated by one or more LF + * characters. Note that the decimal offsets are number of + * characters, not line or string number within the "//" file.

+ */ + private static boolean isGNUStringTable(String name) { + return GNU_STRING_TABLE_NAME.equals(name); + } + + /** + * Reads the GNU archive String Table. + * + * @see #isGNUStringTable + */ + private ArArchiveEntry readGNUStringTable(byte[] length) throws IOException { + int bufflen = asInt(length); // Assume length will fit in an int + namebuffer = new byte[bufflen]; + int read = read(namebuffer, 0, bufflen); + if (read != bufflen) { + throw new IOException("Failed to read complete // record: expected=" + + bufflen + " read=" + read); + } + return new ArArchiveEntry(GNU_STRING_TABLE_NAME, bufflen); + } + + private static final String GNU_LONGNAME_PATTERN = "^/\\d+"; + + /** + * Does the name look like it is a long name (or a name containing + * spaces) as encoded by SVR4/GNU ar? + * + * @see #isGNUStringTable + */ + private boolean isGNULongName(String name) { + return name != null && name.matches(GNU_LONGNAME_PATTERN); + } +} diff --git a/io-archive-ar/src/main/java/org/xbib/io/archive/ar/ArArchiveOutputStream.java b/io-archive-ar/src/main/java/org/xbib/io/archive/ar/ArArchiveOutputStream.java new file mode 100644 index 0000000..44911cd --- /dev/null +++ b/io-archive-ar/src/main/java/org/xbib/io/archive/ar/ArArchiveOutputStream.java @@ -0,0 +1,215 @@ +package org.xbib.io.archive.ar; + +import org.xbib.io.archive.stream.ArchiveOutputStream; +import org.xbib.io.archive.util.ArchiveUtils; + +import java.io.IOException; +import java.io.OutputStream; + +/** + * Implements the "ar" archive format as an output stream. + */ +public class ArArchiveOutputStream extends ArchiveOutputStream { + /** + * Fail if a long file name is required in the archive. + */ + public static final int LONGFILE_ERROR = 0; + + /** + * BSD ar extensions are used to store long file names in the archive. + */ + public static final int LONGFILE_BSD = 1; + + private final OutputStream out; + + private long entryOffset = 0; + + private ArArchiveEntry prevEntry; + + private boolean haveUnclosedEntry = false; + + private int longFileMode = LONGFILE_ERROR; + + /** + * indicates if this archive is finished + */ + private boolean finished = false; + + public ArArchiveOutputStream(final OutputStream pOut) { + this.out = pOut; + } + + /** + * Set the long file mode. + * This can be LONGFILE_ERROR(0) or LONGFILE_BSD(1). + * This specifies the treatment of long file names (names >= 16). + * Default is LONGFILE_ERROR. + * + * @param longFileMode the mode to use + */ + public void setLongFileMode(int longFileMode) { + this.longFileMode = longFileMode; + } + + private long writeArchiveHeader() throws IOException { + byte[] header = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER); + out.write(header); + return header.length; + } + + @Override + public void closeArchiveEntry() throws IOException { + if (finished) { + throw new IOException("Stream has already been finished"); + } + if (prevEntry == null || !haveUnclosedEntry) { + throw new IOException("No current entry to close"); + } + if ((entryOffset % 2) != 0) { + out.write('\n'); // Pad byte + } + haveUnclosedEntry = false; + } + + @Override + public ArArchiveEntry newArchiveEntry() { + return new ArArchiveEntry(); + } + + @Override + public void putArchiveEntry(final ArArchiveEntry pEntry) throws IOException { + if (finished) { + throw new IOException("Stream has already been finished"); + } + if (prevEntry == null) { + writeArchiveHeader(); + } else { + if (prevEntry.getLength() != entryOffset) { + throw new IOException("length does not match entry (" + prevEntry.getLength() + " != " + entryOffset); + } + + if (haveUnclosedEntry) { + closeArchiveEntry(); + } + } + + prevEntry = pEntry; + + writeEntryHeader(pEntry); + + entryOffset = 0; + haveUnclosedEntry = true; + } + + private long fill(final long pOffset, final long pNewOffset, final char pFill) throws IOException { + final long diff = pNewOffset - pOffset; + + if (diff > 0) { + for (int i = 0; i < diff; i++) { + write(pFill); + } + } + + return pNewOffset; + } + + private long write(final String data) throws IOException { + final byte[] bytes = data.getBytes("ascii"); + write(bytes); + return bytes.length; + } + + private long writeEntryHeader(final ArArchiveEntry pEntry) throws IOException { + + long offset = 0; + boolean mustAppendName = false; + + final String n = pEntry.getName(); + if (LONGFILE_ERROR == longFileMode && n.length() > 16) { + throw new IOException("filename too long, > 16 chars: " + n); + } + if (LONGFILE_BSD == longFileMode && + (n.length() > 16 || n.indexOf(" ") > -1)) { + mustAppendName = true; + offset += write(ArArchiveInputStream.BSD_LONGNAME_PREFIX + + String.valueOf(n.length())); + } else { + offset += write(n); + } + + offset = fill(offset, 16, ' '); + final String m = "" + (pEntry.getLastModified()); + if (m.length() > 12) { + throw new IOException("modified too long"); + } + offset += write(m); + + offset = fill(offset, 28, ' '); + final String u = "" + pEntry.getUserId(); + if (u.length() > 6) { + throw new IOException("userid too long"); + } + offset += write(u); + + offset = fill(offset, 34, ' '); + final String g = "" + pEntry.getGroupId(); + if (g.length() > 6) { + throw new IOException("groupid too long"); + } + offset += write(g); + + offset = fill(offset, 40, ' '); + final String fm = "" + Integer.toString(pEntry.getMode(), 8); + if (fm.length() > 8) { + throw new IOException("filemode too long"); + } + offset += write(fm); + + offset = fill(offset, 48, ' '); + final String s = + String.valueOf(pEntry.getLength() + + (mustAppendName ? n.length() : 0)); + if (s.length() > 10) { + throw new IOException("size too long"); + } + offset += write(s); + + offset = fill(offset, 58, ' '); + + offset += write(ArArchiveEntry.TRAILER); + + if (mustAppendName) { + offset += write(n); + } + + return offset; + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + out.write(b, off, len); + entryOffset += len; + } + + /** + * Calls finish if necessary, and then closes the OutputStream + */ + @Override + public void close() throws IOException { + if (!finished) { + finish(); + } + out.close(); + prevEntry = null; + } + + @Override + public void finish() throws IOException { + if (haveUnclosedEntry) { + throw new IOException("This archive contains unclosed entries."); + } else if (finished) { + throw new IOException("This archive has already been finished"); + } + finished = true; + } +} diff --git a/io-archive-cpio/build.gradle b/io-archive-cpio/build.gradle new file mode 100644 index 0000000..7c4e2c7 --- /dev/null +++ b/io-archive-cpio/build.gradle @@ -0,0 +1,3 @@ +dependencies { + api project(':io-archive') +} diff --git a/io-archive-cpio/src/main/java/module-info.java b/io-archive-cpio/src/main/java/module-info.java new file mode 100644 index 0000000..a391980 --- /dev/null +++ b/io-archive-cpio/src/main/java/module-info.java @@ -0,0 +1,4 @@ +module org.xbib.io.archive.cpio { + exports org.xbib.io.archive.cpio; + requires org.xbib.io.archive; +} diff --git a/io-archive-cpio/src/main/java/org/xbib/io/archive/cpio/CountingOutputStream.java b/io-archive-cpio/src/main/java/org/xbib/io/archive/cpio/CountingOutputStream.java new file mode 100644 index 0000000..44b4e45 --- /dev/null +++ b/io-archive-cpio/src/main/java/org/xbib/io/archive/cpio/CountingOutputStream.java @@ -0,0 +1,55 @@ + +package org.xbib.io.archive.cpio; + +import java.io.FilterOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +/** + * Stream that tracks the number of bytes read. + */ +public class CountingOutputStream extends FilterOutputStream { + private long bytesWritten = 0; + + public CountingOutputStream(final OutputStream out) { + super(out); + } + + @Override + public void write(int b) throws IOException { + out.write(b); + count(1); + } + + @Override + public void write(byte[] b) throws IOException { + write(b, 0, b.length); + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + out.write(b, off, len); + count(len); + } + + /** + * Increments the counter of already written bytes. + * Doesn't increment if the EOF has been hit (written == -1) + * + * @param written the number of bytes written + */ + protected void count(long written) { + if (written != -1) { + bytesWritten += written; + } + } + + /** + * Returns the current number of bytes written to this stream. + * + * @return the number of written bytes + */ + public long getBytesWritten() { + return bytesWritten; + } +} diff --git a/io-archive-cpio/src/main/java/org/xbib/io/archive/cpio/CpioArchiveEntry.java b/io-archive-cpio/src/main/java/org/xbib/io/archive/cpio/CpioArchiveEntry.java new file mode 100644 index 0000000..dc23214 --- /dev/null +++ b/io-archive-cpio/src/main/java/org/xbib/io/archive/cpio/CpioArchiveEntry.java @@ -0,0 +1,805 @@ +package org.xbib.io.archive.cpio; + +import org.xbib.io.archive.entry.ArchiveEntry; + +import java.io.File; +import java.util.Date; + +/** + * A cpio archive consists of a sequence of files. There are several types of + * headers defided in two categories of new and old format. The headers are + * recognized by magic numbers: + * + *
    + *
  • "070701" ASCII for new portable format
  • + *
  • "070702" ASCII for new portable format with CRC format
  • + *
  • "070707" ASCII for old ascii (also known as Portable ASCII, odc or old + * character format
  • + *
  • 070707 binary for old binary
  • + *
+ * The old binary format is limited to 16 bits for user id, group + * id, device, and inode numbers. It is limited to 4 gigabyte file + * sizes. + * The old ASCII format is limited to 18 bits for the user id, group + * id, device, and inode numbers. It is limited to 8 gigabyte file + * sizes. + * The new ASCII format is limited to 4 gigabyte file sizes. + * CPIO 2.5 knows also about tar, but it is not recognized here. + * OLD FORMAT + * Each file has a 76 (ascii) / 26 (binary) byte header, a variable + * length, NUL terminated filename, and variable length file data. A + * header for a filename "TRAILER!!!" indicates the end of the + * archive. + * All the fields in the header are ISO 646 (approximately ASCII) + * strings of octal numbers, left padded, not NUL terminated. + *
+ * FIELDNAME        NOTES
+ * c_magic          The integer value octal 070707.  This value can be used to deter-
+ *                  mine whether this archive is written with little-endian or big-
+ *                  endian integers.
+ * c_dev            Device that contains a directory entry for this file
+ * c_ino            I-node number that identifies the input file to the file system
+ * c_mode           The mode specifies both the regular permissions and the file type.
+ * c_uid            Numeric User ID of the owner of the input file
+ * c_gid            Numeric Group ID of the owner of the input file
+ * c_nlink          Number of links that are connected to the input file
+ * c_rdev           For block special and character special entries, this field
+ *                  contains the associated device number.  For all other entry types,
+ *                  it should be set to zero by writers and ignored by readers.
+ * c_mtime[2]       Modification time of the file, indicated as the number of seconds
+ *                  since the start of the epoch, 00:00:00 UTC January 1, 1970.  The
+ *                  four-byte integer is stored with the most-significant 16 bits
+ *                  first followed by the least-significant 16 bits.  Each of the two
+ *                  16 bit values are stored in machine-native byte order.
+ * c_namesize       Length of the path name, including the terminating null byte
+ * c_filesize[2]    Length of the file in bytes. This is the length of the data
+ *                  section that follows the header structure. Must be 0 for
+ *                  FIFOs and directories
+ *
+ * All fields are unsigned short fields with 16-bit integer values
+ * apart from c_mtime and c_filesize which are 32-bit integer values
+ * 
+ * If necessary, the filename and file data are padded with a NUL byte to an even length + * Special files, directories, and the trailer are recorded with + * the h_filesize field equal to 0. + * In the ASCII version of this format, the 16-bit entries are represented as 6-byte octal numbers, + * and the 32-bit entries are represented as 11-byte octal numbers. No padding is added. + * NEW FORMAT + * Each file has a 110 byte header, a variable length, NUL + * terminated filename, and variable length file data. A header for a + * filename "TRAILER!!!" indicates the end of the archive. All the + * fields in the header are ISO 646 (approximately ASCII) strings of + * hexadecimal numbers, left padded, not NUL terminated. + *
+ * FIELDNAME        NOTES
+ * c_magic[6]       The string 070701 for new ASCII, the string 070702 for new ASCII with CRC
+ * c_ino[8]
+ * c_mode[8]
+ * c_uid[8]
+ * c_gid[8]
+ * c_nlink[8]
+ * c_mtim[8]
+ * c_filesize[8]    must be 0 for FIFOs and directories
+ * c_maj[8]
+ * c_min[8]
+ * c_rmaj[8]        only valid for chr and blk special files
+ * c_rmin[8]        only valid for chr and blk special files
+ * c_namesize[8]    count includes terminating NUL in pathname
+ * c_check[8]       0 for "new" portable format; for CRC format
+ *                  the sum of all the bytes in the file
+ * 
+ * New ASCII Format The "new" ASCII format uses 8-byte hexadecimal + * fields for all numbers and separates device numbers into separate + * fields for major and minor numbers. + * The pathname is followed by NUL bytes so that the total size of + * the fixed header plus pathname is a multiple of four. Likewise, the + * file data is padded to a multiple of four bytes. + * This class uses mutable fields and is not considered to be + * threadsafe. + * Based on code from the jRPM project (http://jrpm.sourceforge.net) + * The MAGIC numbers and other constants are defined in {@link CpioConstants} + * N.B. does not handle the cpio "tar" format + * + * CPIO man page + */ +public class CpioArchiveEntry implements CpioConstants, ArchiveEntry { + + /** + * See constructor documenation for possible values. + */ + private short fileFormat; + + /** + * The number of bytes in each header record; depends on the file format + */ + private int headerSize; + + /** + * The boundary to which the header and data elements are aligned: 0, 2 or 4 bytes + */ + private int alignmentBoundary; + + // Header fields + + private long chksum = 0; + + /** + * Number of bytes in the file + */ + private long filesize = 0; + + private long gid = 0; + + private long inode = 0; + + private long maj = 0; + + private long min = 0; + + private long mode = 0; + + private long mtime = 0; + + private String name; + + private long nlink = 0; + + private long rmaj = 0; + + private long rmin = 0; + + private long uid = 0; + + public CpioArchiveEntry() { + } + + /** + * Creates a CPIOArchiveEntry with a specified format. + * + * @param format The cpio format for this entry. + * Possible format values are: + * CpioConstants.FORMAT_NEW + * CpioConstants.FORMAT_NEW_CRC + * CpioConstants.FORMAT_OLD_BINARY + * CpioConstants.FORMAT_OLD_ASCII + */ + public CpioArchiveEntry(final short format) { + switch (format) { + case FORMAT_NEW: + this.headerSize = 110; + this.alignmentBoundary = 4; + break; + case FORMAT_NEW_CRC: + this.headerSize = 110; + this.alignmentBoundary = 4; + break; + case FORMAT_OLD_ASCII: + this.headerSize = 76; + this.alignmentBoundary = 0; + break; + case FORMAT_OLD_BINARY: + this.headerSize = 26; + this.alignmentBoundary = 2; + break; + default: + throw new IllegalArgumentException("Unknown header type"); + } + this.fileFormat = format; + } + + /** + * Creates a CPIOArchiveEntry with a specified name. The format of + * this entry will be the new format. + * + * @param name The name of this entry. + */ + public CpioArchiveEntry(final String name) { + this(FORMAT_NEW, name); + } + + /** + * Creates a CPIOArchiveEntry with a specified name. + * + * @param format The cpio format for this entry. + * @param name The name of this entry. + * Possible format values are: + * CpioConstants.FORMAT_NEW + * CpioConstants.FORMAT_NEW_CRC + * CpioConstants.FORMAT_OLD_BINARY + * CpioConstants.FORMAT_OLD_ASCII + */ + public CpioArchiveEntry(final short format, final String name) { + this(format); + this.name = name; + } + + /** + * Creates a CPIOArchiveEntry with a specified name. The format of + * this entry will be the new format. + * + * @param name The name of this entry. + * @param size The size of this entry + */ + public CpioArchiveEntry(final String name, final long size) { + this(name); + setEntrySize(size); + } + + /** + * Creates a CPIOArchiveEntry with a specified name. + * + * @param format The cpio format for this entry. + * @param name The name of this entry. + * @param size The size of this entry + * Possible format values are: + * CpioConstants.FORMAT_NEW + * CpioConstants.FORMAT_NEW_CRC + * CpioConstants.FORMAT_OLD_BINARY + * CpioConstants.FORMAT_OLD_ASCII + */ + public CpioArchiveEntry(final short format, final String name, + final long size) { + this(format, name); + setEntrySize(size); + } + + /** + * Creates a CPIOArchiveEntry with a specified name for a + * specified file. The format of this entry will be the new + * format. + * + * @param inputFile The file to gather information from. + * @param entryName The name of this entry. + */ + public CpioArchiveEntry(File inputFile, String entryName) { + this(FORMAT_NEW, inputFile, entryName); + } + + /** + * Creates a CPIOArchiveEntry with a specified name for a + * specified file. + * + * @param format The cpio format for this entry. + * @param inputFile The file to gather information from. + * @param entryName The name of this entry. + * Possible format values are: + * CpioConstants.FORMAT_NEW + * CpioConstants.FORMAT_NEW_CRC + * CpioConstants.FORMAT_OLD_BINARY + * CpioConstants.FORMAT_OLD_ASCII + */ + public CpioArchiveEntry(final short format, File inputFile, + String entryName) { + this(format, entryName, inputFile.isFile() ? inputFile.length() : 0); + long mode = 0; + if (inputFile.isDirectory()) { + mode |= C_ISDIR; + } else if (inputFile.isFile()) { + mode |= C_ISREG; + } else { + throw new IllegalArgumentException("Cannot determine type of file " + + inputFile.getName()); + } + setMode(mode); + setTime(inputFile.lastModified() / 1000); + } + + /** + * Check if the method is allowed for the defined format. + */ + private void checkNewFormat() { + if ((this.fileFormat & FORMAT_NEW_MASK) == 0) { + throw new UnsupportedOperationException(); + } + } + + /** + * Check if the method is allowed for the defined format. + */ + private void checkOldFormat() { + if ((this.fileFormat & FORMAT_OLD_MASK) == 0) { + throw new UnsupportedOperationException(); + } + } + + /** + * Get the checksum. + * Only supported for the new formats. + * + * @return Returns the checksum. + * @throws UnsupportedOperationException if the format is not a new format + */ + public long getChksum() { + checkNewFormat(); + return this.chksum; + } + + /** + * Get the device id. + * + * @return Returns the device id. + * @throws UnsupportedOperationException if this method is called for a CPIOArchiveEntry with a new + * format. + */ + public long getDevice() { + checkOldFormat(); + return this.min; + } + + /** + * Get the major device id. + * + * @return Returns the major device id. + * @throws UnsupportedOperationException if this method is called for a CPIOArchiveEntry with an old + * format. + */ + public long getDeviceMaj() { + checkNewFormat(); + return this.maj; + } + + /** + * Get the minor device id + * + * @return Returns the minor device id. + * @throws UnsupportedOperationException if format is not a new format + */ + public long getDeviceMin() { + checkNewFormat(); + return this.min; + } + + /** + * Get the filesize. + * + * @return Returns the filesize. + */ + public long getEntrySize() { + return this.filesize; + } + + /** + * Get the format for this entry. + * + * @return Returns the format. + */ + public short getFormat() { + return this.fileFormat; + } + + /** + * Get the group id. + * + * @return Returns the group id. + */ + public long getGID() { + return this.gid; + } + + /** + * Get the header size for this CPIO format + * + * @return Returns the header size in bytes. + */ + public int getHeaderSize() { + return this.headerSize; + } + + /** + * Get the alignment boundary for this CPIO format + * + * @return Returns the aligment boundary (0, 2, 4) in bytes + */ + public int getAlignmentBoundary() { + return this.alignmentBoundary; + } + + /** + * Get the number of bytes needed to pad the header to the alignment boundary. + * + * @return the number of bytes needed to pad the header (0,1,2,3) + */ + public int getHeaderPadCount() { + if (this.alignmentBoundary == 0) { + return 0; + } + int size = this.headerSize + this.name.length() + 1; // Name has terminating null + int remain = size % this.alignmentBoundary; + if (remain > 0) { + return this.alignmentBoundary - remain; + } + return 0; + } + + /** + * Get the number of bytes needed to pad the data to the alignment boundary. + * + * @return the number of bytes needed to pad the data (0,1,2,3) + */ + public int getDataPadCount() { + if (this.alignmentBoundary == 0) { + return 0; + } + long size = this.filesize; + int remain = (int) (size % this.alignmentBoundary); + if (remain > 0) { + return this.alignmentBoundary - remain; + } + return 0; + } + + /** + * Set the inode. + * + * @return Returns the inode. + */ + public long getInode() { + return this.inode; + } + + /** + * Get the mode of this entry (e.g. directory, regular file). + * + * @return Returns the mode. + */ + public long getMode() { + return mode == 0 && !CPIO_TRAILER.equals(name) ? C_ISREG : mode; + } + + /** + * Get the name. + * + * @return Returns the name. + */ + public String getName() { + return this.name; + } + + /** + * Get the number of links. + * + * @return Returns the number of links. + */ + public long getNumberOfLinks() { + return nlink == 0 ? + (isDirectory() ? 2 : 1) + : nlink; + } + + /** + * Get the remote device id. + * + * @return Returns the remote device id. + * @throws UnsupportedOperationException if this method is called for a CPIOArchiveEntry with a new + * format. + */ + public long getRemoteDevice() { + checkOldFormat(); + return this.rmin; + } + + /** + * Get the remote major device id. + * + * @return Returns the remote major device id. + * @throws UnsupportedOperationException if this method is called for a CPIOArchiveEntry with an old + * format. + */ + public long getRemoteDeviceMaj() { + checkNewFormat(); + return this.rmaj; + } + + /** + * Get the remote minor device id. + * + * @return Returns the remote minor device id. + * @throws UnsupportedOperationException if this method is called for a CPIOArchiveEntry with an old + * format. + */ + public long getRemoteDeviceMin() { + checkNewFormat(); + return this.rmin; + } + + /** + * Get the time in seconds. + * + * @return Returns the time. + */ + public long getTime() { + return this.mtime; + } + + public CpioArchiveEntry setLastModified(Date date) { + setTime(date.getTime() / 1000); + return this; + } + + public Date getLastModified() { + return new Date(1000 * getTime()); + } + + /** + * Get the user id. + * + * @return Returns the user id. + */ + public long getUID() { + return this.uid; + } + + /** + * Check if this entry represents a block device. + * + * @return TRUE if this entry is a block device. + */ + public boolean isBlockDevice() { + return (this.mode & S_IFMT) == C_ISBLK; + } + + /** + * Check if this entry represents a character device. + * + * @return TRUE if this entry is a character device. + */ + public boolean isCharacterDevice() { + return (this.mode & S_IFMT) == C_ISCHR; + } + + /** + * Check if this entry represents a directory. + * + * @return TRUE if this entry is a directory. + */ + public boolean isDirectory() { + return (this.mode & S_IFMT) == C_ISDIR; + } + + /** + * Check if this entry represents a network device. + * + * @return TRUE if this entry is a network device. + */ + public boolean isNetwork() { + return (this.mode & S_IFMT) == C_ISNWK; + } + + /** + * Check if this entry represents a pipe. + * + * @return TRUE if this entry is a pipe. + */ + public boolean isPipe() { + return (this.mode & S_IFMT) == C_ISFIFO; + } + + /** + * Check if this entry represents a regular file. + * + * @return TRUE if this entry is a regular file. + */ + public boolean isRegularFile() { + return (this.mode & S_IFMT) == C_ISREG; + } + + /** + * Check if this entry represents a socket. + * + * @return TRUE if this entry is a socket. + */ + public boolean isSocket() { + return (this.mode & S_IFMT) == C_ISSOCK; + } + + /** + * Check if this entry represents a symbolic link. + * + * @return TRUE if this entry is a symbolic link. + */ + public boolean isSymbolicLink() { + return (this.mode & S_IFMT) == C_ISLNK; + } + + /** + * Set the checksum. The checksum is calculated by adding all bytes of a + * file to transfer (crc += buf[pos] & 0xFF). + * + * @param chksum The checksum to set. + */ + public void setChksum(final long chksum) { + checkNewFormat(); + this.chksum = chksum; + } + + /** + * Set the device id. + * + * @param device The device id to set. + * @throws UnsupportedOperationException if this method is called for a CPIOArchiveEntry with a new + * format. + */ + public void setDevice(final long device) { + checkOldFormat(); + this.min = device; + } + + /** + * Set major device id. + * + * @param maj The major device id to set. + */ + public void setDeviceMaj(final long maj) { + checkNewFormat(); + this.maj = maj; + } + + /** + * Set the minor device id + * + * @param min The minor device id to set. + */ + public void setDeviceMin(final long min) { + checkNewFormat(); + this.min = min; + } + + /** + * Set the filesize. + * + * @param size The filesize to set. + */ + public CpioArchiveEntry setEntrySize(final long size) { + if (size < 0 || size > 0xFFFFFFFFL) { + throw new IllegalArgumentException("invalid entry size <" + size + + ">"); + } + this.filesize = size; + return this; + } + + /** + * Set the group id. + * + * @param gid The group id to set. + */ + public void setGID(final long gid) { + this.gid = gid; + } + + /** + * Set the inode. + * + * @param inode The inode to set. + */ + public void setInode(final long inode) { + this.inode = inode; + } + + /** + * Set the mode of this entry (e.g. directory, regular file). + * + * @param mode The mode to set. + */ + public void setMode(final long mode) { + final long maskedMode = mode & S_IFMT; + switch ((int) maskedMode) { + case C_ISDIR: + case C_ISLNK: + case C_ISREG: + case C_ISFIFO: + case C_ISCHR: + case C_ISBLK: + case C_ISSOCK: + case C_ISNWK: + break; + default: + throw new IllegalArgumentException( + "Unknown mode. " + + "Full: " + Long.toHexString(mode) + + " Masked: " + Long.toHexString(maskedMode)); + } + + this.mode = mode; + } + + /** + * Set the name. + * + * @param name The name to set. + */ + public CpioArchiveEntry setName(final String name) { + this.name = name; + return this; + } + + /** + * Set the number of links. + * + * @param nlink The number of links to set. + */ + public void setNumberOfLinks(final long nlink) { + this.nlink = nlink; + } + + /** + * Set the remote device id. + * + * @param device The remote device id to set. + * @throws UnsupportedOperationException if this method is called for a CPIOArchiveEntry with a new + * format. + */ + public void setRemoteDevice(final long device) { + checkOldFormat(); + this.rmin = device; + } + + /** + * Set the remote major device id. + * + * @param rmaj The remote major device id to set. + * @throws UnsupportedOperationException if this method is called for a CPIOArchiveEntry with an old + * format. + */ + public void setRemoteDeviceMaj(final long rmaj) { + checkNewFormat(); + this.rmaj = rmaj; + } + + /** + * Set the remote minor device id. + * + * @param rmin The remote minor device id to set. + * @throws UnsupportedOperationException if this method is called for a CPIOArchiveEntry with an old + * format. + */ + public void setRemoteDeviceMin(final long rmin) { + checkNewFormat(); + this.rmin = rmin; + } + + /** + * Set the time in seconds. + * + * @param time The time to set. + */ + public void setTime(final long time) { + this.mtime = time; + } + + /** + * Set the user id. + * + * @param uid The user id to set. + */ + public void setUID(final long uid) { + this.uid = uid; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((name == null) ? 0 : name.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + CpioArchiveEntry other = (CpioArchiveEntry) obj; + if (name == null) { + if (other.name != null) { + return false; + } + } else if (!name.equals(other.name)) { + return false; + } + return true; + } +} diff --git a/io-archive-cpio/src/main/java/org/xbib/io/archive/cpio/CpioArchiveInputStream.java b/io-archive-cpio/src/main/java/org/xbib/io/archive/cpio/CpioArchiveInputStream.java new file mode 100644 index 0000000..86e4468 --- /dev/null +++ b/io-archive-cpio/src/main/java/org/xbib/io/archive/cpio/CpioArchiveInputStream.java @@ -0,0 +1,387 @@ +package org.xbib.io.archive.cpio; + +import org.xbib.io.archive.entry.ArchiveEntry; +import org.xbib.io.archive.stream.ArchiveInputStream; +import org.xbib.io.archive.util.ArchiveUtils; + +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; + +/** + * CPIOArchiveInputStream is a stream for reading cpio streams. All formats of + * cpio are supported (old ascii, old binary, new portable format and the new + * portable format with crc). + * The stream can be read by extracting a cpio entry (containing all + * informations about a entry) and afterwards reading from the stream the file + * specified by the entry. + *

+ * CPIOArchiveInputStream cpioIn = new CPIOArchiveInputStream(
+ *         new FileInputStream(new File("test.cpio")));
+ * CPIOArchiveEntry cpioEntry;
+ * while ((cpioEntry = cpioIn.getNextEntry()) != null) {
+ *     System.out.println(cpioEntry.getName());
+ *     int tmp;
+ *     StringBuilder buf = new StringBuilder();
+ *     while ((tmp = cpIn.read()) != -1) {
+ *         buf.append((char) tmp);
+ *     }
+ *     System.out.println(buf.toString());
+ * }
+ * cpioIn.close();
+ * 
+ * Note: This implementation should be compatible to cpio 2.5 + */ + +public class CpioArchiveInputStream extends ArchiveInputStream implements CpioConstants { + + private boolean closed = false; + + private CpioArchiveEntry entry; + + private long entryBytesRead = 0; + + private boolean entryEOF = false; + + private final byte tmpbuf[] = new byte[4096]; + + private long crc = 0; + + private final InputStream in; + + /** + * Construct the cpio input stream + * + * @param in The cpio stream + */ + public CpioArchiveInputStream(final InputStream in) { + this.in = in; + } + + /** + * Returns 0 after EOF has reached for the current entry data, otherwise + * always return 1. + * Programs should not count on this method to return the actual number of + * bytes that could be read without blocking. + * + * @return 1 before EOF and 0 after EOF has reached for current entry. + * @throws java.io.IOException if an I/O error has occurred or if a CPIO file error has + * occurred + */ + @Override + public int available() throws IOException { + ensureOpen(); + if (this.entryEOF) { + return 0; + } + return 1; + } + + /** + * Closes the CPIO input stream. + * + * @throws java.io.IOException if an I/O error has occurred + */ + @Override + public void close() throws IOException { + if (!this.closed) { + in.close(); + this.closed = true; + } + } + + /** + * Closes the current CPIO entry and positions the stream for reading the + * next entry. + * + * @throws java.io.IOException if an I/O error has occurred or if a CPIO file error has + * occurred + */ + private void closeEntry() throws IOException { + ensureOpen(); + while (read(this.tmpbuf, 0, this.tmpbuf.length) != -1) { + // do nothing + } + + this.entryEOF = true; + } + + /** + * Check to make sure that this stream has not been closed + * + * @throws java.io.IOException if the stream is already closed + */ + private void ensureOpen() throws IOException { + if (this.closed) { + throw new IOException("stream closed"); + } + } + + /** + * Reads the next CPIO file entry and positions stream at the beginning of + * the entry data. + * + * @return the CPIOArchiveEntry just read + * @throws java.io.IOException if an I/O error has occurred or if a CPIO file error has + * occurred + */ + public CpioArchiveEntry getNextCPIOEntry() throws IOException { + ensureOpen(); + if (this.entry != null) { + closeEntry(); + } + byte magic[] = new byte[2]; + readFully(magic, 0, magic.length); + if (CpioUtil.byteArray2long(magic, false) == MAGIC_OLD_BINARY) { + this.entry = readOldBinaryEntry(false); + } else if (CpioUtil.byteArray2long(magic, true) == MAGIC_OLD_BINARY) { + this.entry = readOldBinaryEntry(true); + } else { + byte more_magic[] = new byte[4]; + readFully(more_magic, 0, more_magic.length); + byte tmp[] = new byte[6]; + System.arraycopy(magic, 0, tmp, 0, magic.length); + System.arraycopy(more_magic, 0, tmp, magic.length, + more_magic.length); + String magicString = ArchiveUtils.toAsciiString(tmp); + if (magicString.equals(MAGIC_NEW)) { + this.entry = readNewEntry(false); + } else if (magicString.equals(MAGIC_NEW_CRC)) { + this.entry = readNewEntry(true); + } else if (magicString.equals(MAGIC_OLD_ASCII)) { + this.entry = readOldAsciiEntry(); + } else { + throw new IOException("Unknown magic [" + magicString + "]"); + } + } + + this.entryBytesRead = 0; + this.entryEOF = false; + this.crc = 0; + + if (this.entry.getName().equals(CPIO_TRAILER)) { + this.entryEOF = true; + return null; + } + return this.entry; + } + + private void skip(int bytes) throws IOException { + final byte[] buff = new byte[4]; // Cannot be more than 3 bytes + if (bytes > 0) { + readFully(buff, 0, bytes); + } + } + + /** + * Reads from the current CPIO entry into an array of bytes. Blocks until + * some input is available. + * + * @param b the buffer into which the data is read + * @param off the start offset of the data + * @param len the maximum number of bytes read + * @return the actual number of bytes read, or -1 if the end of the entry is + * reached + * @throws java.io.IOException if an I/O error has occurred or if a CPIO file error has + * occurred + */ + @Override + public int read(final byte[] b, final int off, final int len) + throws IOException { + ensureOpen(); + if (off < 0 || len < 0 || off > b.length - len) { + throw new IndexOutOfBoundsException(); + } else if (len == 0) { + return 0; + } + + if (this.entry == null || this.entryEOF) { + return -1; + } + if (this.entryBytesRead == this.entry.getEntrySize()) { + skip(entry.getDataPadCount()); + this.entryEOF = true; + if (this.entry.getFormat() == FORMAT_NEW_CRC + && this.crc != this.entry.getChksum()) { + throw new IOException("CRC Error"); + } + return -1; // EOF for this entry + } + int tmplength = (int) Math.min(len, this.entry.getEntrySize() + - this.entryBytesRead); + if (tmplength < 0) { + return -1; + } + + int tmpread = readFully(b, off, tmplength); + if (this.entry.getFormat() == FORMAT_NEW_CRC) { + for (int pos = 0; pos < tmpread; pos++) { + this.crc += b[pos] & 0xFF; + } + } + this.entryBytesRead += tmpread; + + return tmpread; + } + + private int readFully(final byte[] b, final int off, final int len) + throws IOException { + if (len < 0) { + throw new IndexOutOfBoundsException(); + } + int n = 0; + while (n < len) { + int count = this.in.read(b, off + n, len - n); + if (count < 0) { + throw new EOFException(); + } + n += count; + } + return n; + } + + private long readBinaryLong(final int length, final boolean swapHalfWord) + throws IOException { + byte tmp[] = new byte[length]; + readFully(tmp, 0, tmp.length); + return CpioUtil.byteArray2long(tmp, swapHalfWord); + } + + private long readAsciiLong(final int length, final int radix) + throws IOException { + byte tmpBuffer[] = new byte[length]; + readFully(tmpBuffer, 0, tmpBuffer.length); + return Long.parseLong(ArchiveUtils.toAsciiString(tmpBuffer), radix); + } + + private CpioArchiveEntry readNewEntry(final boolean hasCrc) + throws IOException { + CpioArchiveEntry ret; + if (hasCrc) { + ret = new CpioArchiveEntry(FORMAT_NEW_CRC); + } else { + ret = new CpioArchiveEntry(FORMAT_NEW); + } + + ret.setInode(readAsciiLong(8, 16)); + long mode = readAsciiLong(8, 16); + if (mode != 0) { // mode is initialised to 0 + ret.setMode(mode); + } + ret.setUID(readAsciiLong(8, 16)); + ret.setGID(readAsciiLong(8, 16)); + ret.setNumberOfLinks(readAsciiLong(8, 16)); + ret.setTime(readAsciiLong(8, 16)); + ret.setEntrySize(readAsciiLong(8, 16)); + ret.setDeviceMaj(readAsciiLong(8, 16)); + ret.setDeviceMin(readAsciiLong(8, 16)); + ret.setRemoteDeviceMaj(readAsciiLong(8, 16)); + ret.setRemoteDeviceMin(readAsciiLong(8, 16)); + long namesize = readAsciiLong(8, 16); + ret.setChksum(readAsciiLong(8, 16)); + String name = readCString((int) namesize); + ret.setName(name); + if (mode == 0 && !name.equals(CPIO_TRAILER)) { + throw new IOException("Mode 0 only allowed in the trailer. Found entry name: " + name); + } + skip(ret.getHeaderPadCount()); + + return ret; + } + + private CpioArchiveEntry readOldAsciiEntry() throws IOException { + CpioArchiveEntry ret = new CpioArchiveEntry(FORMAT_OLD_ASCII); + + ret.setDevice(readAsciiLong(6, 8)); + ret.setInode(readAsciiLong(6, 8)); + final long mode = readAsciiLong(6, 8); + if (mode != 0) { + ret.setMode(mode); + } + ret.setUID(readAsciiLong(6, 8)); + ret.setGID(readAsciiLong(6, 8)); + ret.setNumberOfLinks(readAsciiLong(6, 8)); + ret.setRemoteDevice(readAsciiLong(6, 8)); + ret.setTime(readAsciiLong(11, 8)); + long namesize = readAsciiLong(6, 8); + ret.setEntrySize(readAsciiLong(11, 8)); + final String name = readCString((int) namesize); + ret.setName(name); + if (mode == 0 && !name.equals(CPIO_TRAILER)) { + throw new IOException("Mode 0 only allowed in the trailer. Found entry: " + name); + } + + return ret; + } + + private CpioArchiveEntry readOldBinaryEntry(final boolean swapHalfWord) + throws IOException { + CpioArchiveEntry ret = new CpioArchiveEntry(FORMAT_OLD_BINARY); + + ret.setDevice(readBinaryLong(2, swapHalfWord)); + ret.setInode(readBinaryLong(2, swapHalfWord)); + final long mode = readBinaryLong(2, swapHalfWord); + if (mode != 0) { + ret.setMode(mode); + } + ret.setUID(readBinaryLong(2, swapHalfWord)); + ret.setGID(readBinaryLong(2, swapHalfWord)); + ret.setNumberOfLinks(readBinaryLong(2, swapHalfWord)); + ret.setRemoteDevice(readBinaryLong(2, swapHalfWord)); + ret.setTime(readBinaryLong(4, swapHalfWord)); + long namesize = readBinaryLong(2, swapHalfWord); + ret.setEntrySize(readBinaryLong(4, swapHalfWord)); + final String name = readCString((int) namesize); + ret.setName(name); + if (mode == 0 && !name.equals(CPIO_TRAILER)) { + throw new IOException("Mode 0 only allowed in the trailer. Found entry: " + name); + } + skip(ret.getHeaderPadCount()); + + return ret; + } + + private String readCString(final int length) throws IOException { + byte[] tmpBuffer = new byte[length]; + readFully(tmpBuffer, 0, tmpBuffer.length); + return new String(tmpBuffer, 0, tmpBuffer.length - 1); + } + + /** + * Skips specified number of bytes in the current CPIO entry. + * + * @param n the number of bytes to skip + * @return the actual number of bytes skipped + * @throws java.io.IOException if an I/O error has occurred + * @throws IllegalArgumentException if n < 0 + */ + @Override + public long skip(final long n) throws IOException { + if (n < 0) { + throw new IllegalArgumentException("negative skip length"); + } + ensureOpen(); + int max = (int) Math.min(n, Integer.MAX_VALUE); + int total = 0; + + while (total < max) { + int len = max - total; + if (len > this.tmpbuf.length) { + len = this.tmpbuf.length; + } + len = read(this.tmpbuf, 0, len); + if (len == -1) { + this.entryEOF = true; + break; + } + total += len; + } + return total; + } + + @Override + public ArchiveEntry getNextEntry() throws IOException { + return getNextCPIOEntry(); + } + +} diff --git a/io-archive-cpio/src/main/java/org/xbib/io/archive/cpio/CpioArchiveOutputStream.java b/io-archive-cpio/src/main/java/org/xbib/io/archive/cpio/CpioArchiveOutputStream.java new file mode 100644 index 0000000..99e00f2 --- /dev/null +++ b/io-archive-cpio/src/main/java/org/xbib/io/archive/cpio/CpioArchiveOutputStream.java @@ -0,0 +1,430 @@ +package org.xbib.io.archive.cpio; + +import org.xbib.io.archive.stream.ArchiveOutputStream; +import org.xbib.io.archive.util.ArchiveUtils; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.HashMap; + +/** + * CPIOArchiveOutputStream is a stream for writing CPIO streams. All formats of + * CPIO are supported (old ASCII, old binary, new portable format and the new + * portable format with CRC). + * An entry can be written by creating an instance of CpioArchiveEntry and fill + * it with the necessary values and put it into the CPIO stream. Afterwards + * write the contents of the file into the CPIO stream. Either close the stream + * by calling finish() or put a next entry into the cpio stream. + *

+ * CpioArchiveOutputStream out = new CpioArchiveOutputStream(
+ *         new FileOutputStream(new File("test.cpio")));
+ * CpioArchiveEntry entry = new CpioArchiveEntry();
+ * entry.setName("testfile");
+ * String contents = "12345";
+ * entry.setFileSize(contents.length());
+ * entry.setMode(CpioConstants.C_ISREG); // regular file
+ * ... set other attributes, e.g. time, number of links
+ * out.putArchiveEntry(entry);
+ * out.write(testContents.getBytes());
+ * out.close();
+ * 
+ * Note: This implementation should be compatible to cpio 2.5 + */ +public class CpioArchiveOutputStream extends ArchiveOutputStream implements CpioConstants { + + private CpioArchiveEntry entry; + + private boolean closed = false; + + private boolean finished; + + private final short entryFormat; + + private final HashMap names = new HashMap<>(); + + private long crc = 0; + + private long written; + + private final CountingOutputStream out; + + private final int blockSize; + + private long nextArtificalDeviceAndInode = 1; + + /** + * Construct the cpio output stream with a specified format and a + * blocksize of {@link CpioConstants#BLOCK_SIZE BLOCK_SIZE}. + * + * @param out The cpio stream + * @param format The format of the stream + */ + public CpioArchiveOutputStream(OutputStream out, final short format) { + this(out, format, BLOCK_SIZE); + } + + /** + * Construct the cpio output stream with a specified format + * + * @param out The cpio stream + * @param format The format of the stream + * @param blockSize The block size of the archive. + */ + public CpioArchiveOutputStream(final OutputStream out, final short format, + final int blockSize) { + this.out = new CountingOutputStream(out); + switch (format) { + case FORMAT_NEW: + case FORMAT_NEW_CRC: + case FORMAT_OLD_ASCII: + case FORMAT_OLD_BINARY: + break; + default: + throw new IllegalArgumentException("Unknown format: " + format); + + } + this.entryFormat = format; + this.blockSize = blockSize; + } + + /** + * Construct the cpio output stream. The format for this CPIO stream is the + * "new" format + * + * @param out The cpio stream + */ + public CpioArchiveOutputStream(final OutputStream out) { + this(out, FORMAT_NEW); + } + + /** + * Check to make sure that this stream has not been closed + * + * @throws java.io.IOException if the stream is already closed + */ + private void ensureOpen() throws IOException { + if (this.closed) { + throw new IOException("Stream closed"); + } + } + + @Override + public CpioArchiveEntry newArchiveEntry() { + return new CpioArchiveEntry(); + } + + /** + * Begins writing a new CPIO file entry and positions the stream to the + * start of the entry data. Closes the current entry if still active. The + * current time will be used if the entry has no set modification time and + * the default header format will be used if no other format is specified in + * the entry. + * + * @param entry the CPIO cpioEntry to be written + * @throws java.io.IOException if an I/O error has occurred or if a CPIO file error has + * occurred + * @throws ClassCastException if entry is not an instance of CpioArchiveEntry + */ + @Override + public void putArchiveEntry(CpioArchiveEntry entry) throws IOException { + if (finished) { + throw new IOException("Stream has already been finished"); + } + ensureOpen(); + if (this.entry != null) { + closeArchiveEntry(); // close previous entry + } + if (entry.getTime() == -1) { + entry.setTime(System.currentTimeMillis() / 1000); + } + + final short format = entry.getFormat(); + if (format != this.entryFormat) { + throw new IOException("Header format: " + format + " does not match existing format: " + this.entryFormat); + } + + if (this.names.put(entry.getName(), entry) != null) { + throw new IOException("duplicate entry: " + entry.getName()); + } + + writeHeader(entry); + this.entry = entry; + this.written = 0; + } + + private void writeHeader(final CpioArchiveEntry e) throws IOException { + switch (e.getFormat()) { + case FORMAT_NEW: + out.write(ArchiveUtils.toAsciiBytes(MAGIC_NEW)); + writeNewEntry(e); + break; + case FORMAT_NEW_CRC: + out.write(ArchiveUtils.toAsciiBytes(MAGIC_NEW_CRC)); + writeNewEntry(e); + break; + case FORMAT_OLD_ASCII: + out.write(ArchiveUtils.toAsciiBytes(MAGIC_OLD_ASCII)); + writeOldAsciiEntry(e); + break; + case FORMAT_OLD_BINARY: + boolean swapHalfWord = true; + writeBinaryLong(MAGIC_OLD_BINARY, 2, swapHalfWord); + writeOldBinaryEntry(e, swapHalfWord); + break; + } + } + + private void writeNewEntry(final CpioArchiveEntry entry) throws IOException { + long inode = entry.getInode(); + long devMin = entry.getDeviceMin(); + if (CPIO_TRAILER.equals(entry.getName())) { + inode = devMin = 0; + } else { + if (inode == 0 && devMin == 0) { + inode = nextArtificalDeviceAndInode & 0xFFFFFFFF; + devMin = (nextArtificalDeviceAndInode++ >> 32) & 0xFFFFFFFF; + } else { + nextArtificalDeviceAndInode = + Math.max(nextArtificalDeviceAndInode, + inode + 0x100000000L * devMin) + 1; + } + } + + writeAsciiLong(inode, 8, 16); + writeAsciiLong(entry.getMode(), 8, 16); + writeAsciiLong(entry.getUID(), 8, 16); + writeAsciiLong(entry.getGID(), 8, 16); + writeAsciiLong(entry.getNumberOfLinks(), 8, 16); + writeAsciiLong(entry.getTime(), 8, 16); + writeAsciiLong(entry.getEntrySize(), 8, 16); + writeAsciiLong(entry.getDeviceMaj(), 8, 16); + writeAsciiLong(devMin, 8, 16); + writeAsciiLong(entry.getRemoteDeviceMaj(), 8, 16); + writeAsciiLong(entry.getRemoteDeviceMin(), 8, 16); + writeAsciiLong(entry.getName().length() + 1, 8, 16); + writeAsciiLong(entry.getChksum(), 8, 16); + writeCString(entry.getName()); + pad(entry.getHeaderPadCount()); + } + + private void writeOldAsciiEntry(final CpioArchiveEntry entry) + throws IOException { + long inode = entry.getInode(); + long device = entry.getDevice(); + if (CPIO_TRAILER.equals(entry.getName())) { + inode = device = 0; + } else { + if (inode == 0 && device == 0) { + inode = nextArtificalDeviceAndInode & 0777777; + device = (nextArtificalDeviceAndInode++ >> 18) & 0777777; + } else { + nextArtificalDeviceAndInode = + Math.max(nextArtificalDeviceAndInode, + inode + 01000000 * device) + 1; + } + } + + writeAsciiLong(device, 6, 8); + writeAsciiLong(inode, 6, 8); + writeAsciiLong(entry.getMode(), 6, 8); + writeAsciiLong(entry.getUID(), 6, 8); + writeAsciiLong(entry.getGID(), 6, 8); + writeAsciiLong(entry.getNumberOfLinks(), 6, 8); + writeAsciiLong(entry.getRemoteDevice(), 6, 8); + writeAsciiLong(entry.getTime(), 11, 8); + writeAsciiLong(entry.getName().length() + 1, 6, 8); + writeAsciiLong(entry.getEntrySize(), 11, 8); + writeCString(entry.getName()); + } + + private void writeOldBinaryEntry(final CpioArchiveEntry entry, + final boolean swapHalfWord) throws IOException { + long inode = entry.getInode(); + long device = entry.getDevice(); + if (CPIO_TRAILER.equals(entry.getName())) { + inode = device = 0; + } else { + if (inode == 0 && device == 0) { + inode = nextArtificalDeviceAndInode & 0xFFFF; + device = (nextArtificalDeviceAndInode++ >> 16) & 0xFFFF; + } else { + nextArtificalDeviceAndInode = + Math.max(nextArtificalDeviceAndInode, + inode + 0x10000 * device) + 1; + } + } + + writeBinaryLong(device, 2, swapHalfWord); + writeBinaryLong(inode, 2, swapHalfWord); + writeBinaryLong(entry.getMode(), 2, swapHalfWord); + writeBinaryLong(entry.getUID(), 2, swapHalfWord); + writeBinaryLong(entry.getGID(), 2, swapHalfWord); + writeBinaryLong(entry.getNumberOfLinks(), 2, swapHalfWord); + writeBinaryLong(entry.getRemoteDevice(), 2, swapHalfWord); + writeBinaryLong(entry.getTime(), 4, swapHalfWord); + writeBinaryLong(entry.getName().length() + 1, 2, swapHalfWord); + writeBinaryLong(entry.getEntrySize(), 4, swapHalfWord); + writeCString(entry.getName()); + pad(entry.getHeaderPadCount()); + } + + @Override + public void closeArchiveEntry() throws IOException { + if (finished) { + throw new IOException("Stream has already been finished"); + } + + ensureOpen(); + + if (entry == null) { + throw new IOException("Trying to close non-existent entry"); + } + + if (this.entry.getEntrySize() != this.written) { + throw new IOException("invalid entry size (expected " + + this.entry.getEntrySize() + " but got " + this.written + + " bytes)"); + } + pad(this.entry.getDataPadCount()); + if (this.entry.getFormat() == FORMAT_NEW_CRC + && this.crc != this.entry.getChksum()) { + throw new IOException("CRC Error"); + } + this.entry = null; + this.crc = 0; + this.written = 0; + } + + /** + * Writes an array of bytes to the current CPIO entry data. This method will + * block until all the bytes are written. + * + * @param b the data to be written + * @param off the start offset in the data + * @param len the number of bytes that are written + * @throws java.io.IOException if an I/O error has occurred or if a CPIO file error has + * occurred + */ + @Override + public void write(final byte[] b, final int off, final int len) + throws IOException { + ensureOpen(); + if (off < 0 || len < 0 || off > b.length - len) { + throw new IndexOutOfBoundsException(); + } else if (len == 0) { + return; + } + + if (this.entry == null) { + throw new IOException("no current CPIO entry"); + } + if (this.written + len > this.entry.getEntrySize()) { + throw new IOException("attempt to write past end of STORED entry"); + } + out.write(b, off, len); + this.written += len; + if (this.entry.getFormat() == FORMAT_NEW_CRC) { + for (int pos = 0; pos < len; pos++) { + this.crc += b[pos] & 0xFF; + } + } + } + + /** + * Finishes writing the contents of the CPIO output stream without closing + * the underlying stream. Use this method when applying multiple filters in + * succession to the same output stream. + * + * @throws IOException if an I/O exception has occurred or if a CPIO file error has occurred + */ + @Override + public void finish() throws IOException { + ensureOpen(); + if (finished) { + throw new IOException("This archive has already been finished"); + } + + if (this.entry != null) { + throw new IOException("This archive contains unclosed entries."); + } + this.entry = new CpioArchiveEntry(this.entryFormat); + this.entry.setName(CPIO_TRAILER); + this.entry.setNumberOfLinks(1); + writeHeader(this.entry); + closeArchiveEntry(); + int lengthOfLastBlock = (int) (out.getBytesWritten() % blockSize); + if (lengthOfLastBlock != 0) { + pad(blockSize - lengthOfLastBlock); + } + finished = true; + } + + /** + * Closes the CPIO output stream as well as the stream being filtered. + * + * @throws java.io.IOException if an I/O error has occurred or if a CPIO file error has + * occurred + */ + @Override + public void close() throws IOException { + if (!finished) { + finish(); + } + + if (!this.closed) { + out.close(); + this.closed = true; + } + } + + private void pad(int count) throws IOException { + if (count > 0) { + byte buff[] = new byte[count]; + out.write(buff); + } + } + + private void writeBinaryLong(final long number, final int length, + final boolean swapHalfWord) throws IOException { + byte tmp[] = CpioUtil.long2byteArray(number, length, swapHalfWord); + out.write(tmp); + } + + private void writeAsciiLong(final long number, final int length, + final int radix) throws IOException { + StringBuilder tmp = new StringBuilder(); + String tmpStr; + if (radix == 16) { + tmp.append(Long.toHexString(number)); + } else if (radix == 8) { + tmp.append(Long.toOctalString(number)); + } else { + tmp.append(Long.toString(number)); + } + + if (tmp.length() <= length) { + long insertLength = length - tmp.length(); + for (int pos = 0; pos < insertLength; pos++) { + tmp.insert(0, "0"); + } + tmpStr = tmp.toString(); + } else { + tmpStr = tmp.substring(tmp.length() - length); + } + byte[] b = ArchiveUtils.toAsciiBytes(tmpStr); + out.write(b); + } + + /** + * Writes an ASCII string to the stream followed by \0 + * + * @param str the String to write + * @throws java.io.IOException if the string couldn't be written + */ + private void writeCString(final String str) throws IOException { + byte[] b = ArchiveUtils.toAsciiBytes(str); + out.write(b); + out.write('\0'); + } + +} diff --git a/io-archive-cpio/src/main/java/org/xbib/io/archive/cpio/CpioConstants.java b/io-archive-cpio/src/main/java/org/xbib/io/archive/cpio/CpioConstants.java new file mode 100644 index 0000000..cde5075 --- /dev/null +++ b/io-archive-cpio/src/main/java/org/xbib/io/archive/cpio/CpioConstants.java @@ -0,0 +1,184 @@ +package org.xbib.io.archive.cpio; + +/** + * All constants needed by CPIO. + */ +public interface CpioConstants { + /** + * magic number of a cpio entry in the new format + */ + final String MAGIC_NEW = "070701"; + + /** + * magic number of a cpio entry in the new format with crc + */ + final String MAGIC_NEW_CRC = "070702"; + + /** + * magic number of a cpio entry in the old ascii format + */ + final String MAGIC_OLD_ASCII = "070707"; + + /** + * magic number of a cpio entry in the old binary format + */ + final int MAGIC_OLD_BINARY = 070707; + + // These FORMAT_ constants are internal to the code + + /** + * write/read a CPIOArchiveEntry in the new format + */ + final short FORMAT_NEW = 1; + + /** + * write/read a CPIOArchiveEntry in the new format with crc + */ + final short FORMAT_NEW_CRC = 2; + + /** + * write/read a CPIOArchiveEntry in the old ascii format + */ + final short FORMAT_OLD_ASCII = 4; + + /** + * write/read a CPIOArchiveEntry in the old binary format + */ + final short FORMAT_OLD_BINARY = 8; + + /** + * Mask for both new formats + */ + final short FORMAT_NEW_MASK = 3; + + /** + * Mask for both old formats + */ + final short FORMAT_OLD_MASK = 12; + + /* + * Constants for the MODE bits + */ + + /** + * Mask for all file type bits. + */ + final int S_IFMT = 0170000; + + // http://www.opengroup.org/onlinepubs/9699919799/basedefs/cpio.h.html + // has a list of the C_xxx constatnts + + /** + * Defines a socket + */ + final int C_ISSOCK = 0140000; + + /** + * Defines a symbolic link + */ + final int C_ISLNK = 0120000; + + /** + * HP/UX network special (C_ISCTG) + */ + final int C_ISNWK = 0110000; + + /** + * Defines a regular file + */ + final int C_ISREG = 0100000; + + /** + * Defines a block device + */ + final int C_ISBLK = 0060000; + + /** + * Defines a directory + */ + final int C_ISDIR = 0040000; + + /** + * Defines a character device + */ + final int C_ISCHR = 0020000; + + /** + * Defines a pipe + */ + final int C_ISFIFO = 0010000; + + + /** + * Set user ID + */ + final int C_ISUID = 0004000; + + /** + * Set group ID + */ + final int C_ISGID = 0002000; + + /** + * On directories, restricted deletion flag. + */ + final int C_ISVTX = 0001000; + + + /** + * Permits the owner of a file to read the file + */ + final int C_IRUSR = 0000400; + + /** + * Permits the owner of a file to write to the file + */ + final int C_IWUSR = 0000200; + + /** + * Permits the owner of a file to execute the file or to search the directory + */ + final int C_IXUSR = 0000100; + + + /** + * Permits a file's group to read the file + */ + final int C_IRGRP = 0000040; + + /** + * Permits a file's group to write to the file + */ + final int C_IWGRP = 0000020; + + /** + * Permits a file's group to execute the file or to search the directory + */ + final int C_IXGRP = 0000010; + + + /** + * Permits others to read the file + */ + final int C_IROTH = 0000004; + + /** + * Permits others to write to the file + */ + final int C_IWOTH = 0000002; + + /** + * Permits others to execute the file or to search the directory + */ + final int C_IXOTH = 0000001; + + /** + * The special trailer marker + */ + final String CPIO_TRAILER = "TRAILER!!!"; + + /** + * The default block size. + */ + final int BLOCK_SIZE = 512; +} diff --git a/io-archive-cpio/src/main/java/org/xbib/io/archive/cpio/CpioUtil.java b/io-archive-cpio/src/main/java/org/xbib/io/archive/cpio/CpioUtil.java new file mode 100644 index 0000000..7c041d7 --- /dev/null +++ b/io-archive-cpio/src/main/java/org/xbib/io/archive/cpio/CpioUtil.java @@ -0,0 +1,81 @@ + +package org.xbib.io.archive.cpio; + +/** + * Package private utility class for Cpio + */ +class CpioUtil { + /** + * Converts a byte array to a long. Halfwords can be swapped by setting + * swapHalfWord=true. + * + * @param number An array of bytes containing a number + * @param swapHalfWord Swap halfwords ([0][1][2][3]->[1][0][3][2]) + * @return The long value + * @throws UnsupportedOperationException if number length is not a multiple of 2 + */ + static long byteArray2long(final byte[] number, final boolean swapHalfWord) { + if (number.length % 2 != 0) { + throw new UnsupportedOperationException(); + } + + long ret = 0; + int pos = 0; + byte tmp_number[] = new byte[number.length]; + System.arraycopy(number, 0, tmp_number, 0, number.length); + + if (!swapHalfWord) { + byte tmp = 0; + for (pos = 0; pos < tmp_number.length; pos++) { + tmp = tmp_number[pos]; + tmp_number[pos++] = tmp_number[pos]; + tmp_number[pos] = tmp; + } + } + + ret = tmp_number[0] & 0xFF; + for (pos = 1; pos < tmp_number.length; pos++) { + ret <<= 8; + ret |= tmp_number[pos] & 0xFF; + } + return ret; + } + + /** + * Converts a long number to a byte array + * Halfwords can be swapped by setting swapHalfWord=true. + * + * @param number the input long number to be converted + * @param length The length of the returned array + * @param swapHalfWord Swap halfwords ([0][1][2][3]->[1][0][3][2]) + * @return The long value + * @throws UnsupportedOperationException if the length is not a positive multiple of two + */ + static byte[] long2byteArray(final long number, final int length, + final boolean swapHalfWord) { + byte[] ret = new byte[length]; + int pos = 0; + long tmp_number = 0; + + if (length % 2 != 0 || length < 2) { + throw new UnsupportedOperationException(); + } + + tmp_number = number; + for (pos = length - 1; pos >= 0; pos--) { + ret[pos] = (byte) (tmp_number & 0xFF); + tmp_number >>= 8; + } + + if (!swapHalfWord) { + byte tmp = 0; + for (pos = 0; pos < length; pos++) { + tmp = ret[pos]; + ret[pos++] = ret[pos]; + ret[pos] = tmp; + } + } + + return ret; + } +} diff --git a/io-archive-dump/build.gradle b/io-archive-dump/build.gradle new file mode 100644 index 0000000..7c4e2c7 --- /dev/null +++ b/io-archive-dump/build.gradle @@ -0,0 +1,3 @@ +dependencies { + api project(':io-archive') +} diff --git a/io-archive-dump/src/main/java/module-info.java b/io-archive-dump/src/main/java/module-info.java new file mode 100644 index 0000000..8526138 --- /dev/null +++ b/io-archive-dump/src/main/java/module-info.java @@ -0,0 +1,4 @@ +module org.xbib.io.archive.dump { + exports org.xbib.io.archive.dump; + requires org.xbib.io.archive; +} diff --git a/io-archive-dump/src/main/java/org/xbib/io/archive/dump/Dirent.java b/io-archive-dump/src/main/java/org/xbib/io/archive/dump/Dirent.java new file mode 100644 index 0000000..cd503e8 --- /dev/null +++ b/io-archive-dump/src/main/java/org/xbib/io/archive/dump/Dirent.java @@ -0,0 +1,70 @@ +package org.xbib.io.archive.dump; + +/** + * Directory entry. + */ +class Dirent { + private int ino; + private int parentIno; + private int type; + private String name; + + /** + * Constructor + * + * @param ino + * @param parentIno + * @param type + * @param name + */ + Dirent(int ino, int parentIno, int type, String name) { + this.ino = ino; + this.parentIno = parentIno; + this.type = type; + this.name = name; + } + + /** + * Get ino. + * + * @return the i-node + */ + int getIno() { + return ino; + } + + /** + * Get ino of parent directory. + * + * @return the parent i-node + */ + int getParentIno() { + return parentIno; + } + + /** + * Get entry type. + * + * @return the entry type + */ + int getType() { + return type; + } + + /** + * Get name of directory entry. + * + * @return the directory name + */ + String getName() { + return name; + } + + /** + * @see Object#toString() + */ + @Override + public String toString() { + return String.format("[%d]: %s", ino, name); + } +} diff --git a/io-archive-dump/src/main/java/org/xbib/io/archive/dump/DumpArchiveConstants.java b/io-archive-dump/src/main/java/org/xbib/io/archive/dump/DumpArchiveConstants.java new file mode 100644 index 0000000..107db0b --- /dev/null +++ b/io-archive-dump/src/main/java/org/xbib/io/archive/dump/DumpArchiveConstants.java @@ -0,0 +1,73 @@ +package org.xbib.io.archive.dump; + +/** + * Various constants associated with dump archives. + */ +public final class DumpArchiveConstants { + public static final int TP_SIZE = 1024; + public static final int NTREC = 10; + public static final int HIGH_DENSITY_NTREC = 32; + public static final int OFS_MAGIC = 60011; + public static final int NFS_MAGIC = 60012; + public static final int FS_UFS2_MAGIC = 0x19540119; + public static final int CHECKSUM = 84446; + public static final int LBLSIZE = 16; + public static final int NAMELEN = 64; + + /* do not instantiate */ + private DumpArchiveConstants() { + } + + /** + * The type of tape segment. + */ + public enum SEGMENT_TYPE { + TAPE(1), + INODE(2), + BITS(3), + ADDR(4), + END(5), + CLRI(6); + + int code; + + SEGMENT_TYPE(int code) { + this.code = code; + } + + public static SEGMENT_TYPE find(int code) { + for (SEGMENT_TYPE t : values()) { + if (t.code == code) { + return t; + } + } + + return null; + } + } + + /** + * The type of compression. + */ + public enum COMPRESSION_TYPE { + ZLIB(0), + BZLIB(1), + LZO(2); + + int code; + + COMPRESSION_TYPE(int code) { + this.code = code; + } + + public static COMPRESSION_TYPE find(int code) { + for (COMPRESSION_TYPE t : values()) { + if (t.code == code) { + return t; + } + } + + return null; + } + } +} diff --git a/io-archive-dump/src/main/java/org/xbib/io/archive/dump/DumpArchiveEntry.java b/io-archive-dump/src/main/java/org/xbib/io/archive/dump/DumpArchiveEntry.java new file mode 100644 index 0000000..a315981 --- /dev/null +++ b/io-archive-dump/src/main/java/org/xbib/io/archive/dump/DumpArchiveEntry.java @@ -0,0 +1,797 @@ +package org.xbib.io.archive.dump; + +import org.xbib.io.archive.entry.ArchiveEntry; + +import java.util.Collections; +import java.util.Date; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.Set; + +/** + * This class represents an entry in a Dump archive. It consists + * of the entry's header, the entry's File and any extended attributes. + * DumpEntries that are created from the header bytes read from + * an archive are instantiated with the DumpArchiveEntry( byte[] ) + * constructor. These entries will be used when extracting from + * or listing the contents of an archive. These entries have their + * header filled in using the header bytes. They also set the File + * to null, since they reference an archive entry not a file. + * DumpEntries can also be constructed from nothing but a name. + * This allows the programmer to construct the entry by hand, for + * instance when only an InputStream is available for writing to + * the archive, and the header information is constructed from + * other information. In this case the header fields are set to + * defaults and the File is set to null. + * The C structure for a Dump Entry's header is: + *
+ * #define TP_BSIZE    1024          // size of each file block
+ * #define NTREC       10            // number of blocks to write at once
+ * #define HIGHDENSITYTREC 32        // number of blocks to write on high-density tapes
+ * #define TP_NINDIR   (TP_BSIZE/2)  // number if indirect inodes in record
+ * #define TP_NINOS    (TP_NINDIR / sizeof (int32_t))
+ * #define LBLSIZE     16
+ * #define NAMELEN     64
+ *
+ * #define OFS_MAGIC     (int)60011  // old format magic value
+ * #define NFS_MAGIC     (int)60012  // new format magic value
+ * #define FS_UFS2_MAGIC (int)0x19540119
+ * #define CHECKSUM      (int)84446  // constant used in checksum algorithm
+ *
+ * struct  s_spcl {
+ *   int32_t c_type;             // record type (see below)
+ *   int32_t c_date;             // date of this dump
+ *   int32_t c_ddate;            // date of previous dump
+ *   int32_t c_volume;           // dump volume number
+ *   u_int32_t c_tapea;          // logical block of this record
+ *   dump_ino_t c_ino;           // number of inode
+ *   int32_t c_magic;            // magic number (see above)
+ *   int32_t c_checksum;         // record checksum
+ * #ifdef  __linux__
+ *   struct  new_bsd_inode c_dinode;
+ * #else
+ * #ifdef sunos
+ *   struct  new_bsd_inode c_dinode;
+ * #else
+ *   struct  dinode  c_dinode;   // ownership and mode of inode
+ * #endif
+ * #endif
+ *   int32_t c_count;            // number of valid c_addr entries
+ *   union u_data c_data;        // see above
+ *   char    c_label[LBLSIZE];   // dump label
+ *   int32_t c_level;            // level of this dump
+ *   char    c_filesys[NAMELEN]; // name of dumpped file system
+ *   char    c_dev[NAMELEN];     // name of dumpped device
+ *   char    c_host[NAMELEN];    // name of dumpped host
+ *   int32_t c_flags;            // additional information (see below)
+ *   int32_t c_firstrec;         // first record on volume
+ *   int32_t c_ntrec;            // blocksize on volume
+ *   int32_t c_extattributes;    // additional inode info (see below)
+ *   int32_t c_spare[30];        // reserved for future uses
+ * } s_spcl;
+ *
+ * //
+ * // flag values
+ * //
+ * #define DR_NEWHEADER     0x0001  // new format tape header
+ * #define DR_NEWINODEFMT   0x0002  // new format inodes on tape
+ * #define DR_COMPRESSED    0x0080  // dump tape is compressed
+ * #define DR_METAONLY      0x0100  // only the metadata of the inode has been dumped
+ * #define DR_INODEINFO     0x0002  // [SIC] TS_END header contains c_inos information
+ * #define DR_EXTATTRIBUTES 0x8000
+ *
+ * //
+ * // extattributes inode info
+ * //
+ * #define EXT_REGULAR         0
+ * #define EXT_MACOSFNDRINFO   1
+ * #define EXT_MACOSRESFORK    2
+ * #define EXT_XATTR           3
+ *
+ * // used for EA on tape
+ * #define EXT2_GOOD_OLD_INODE_SIZE    128
+ * #define EXT2_XATTR_MAGIC        0xEA020000  // block EA
+ * #define EXT2_XATTR_MAGIC2       0xEA020001  // in inode EA
+ * 
+ * The C structure for the inode (file) information is: + *
+ * struct bsdtimeval {           //  **** alpha-*-linux is deviant
+ *   __u32   tv_sec;
+ *   __u32   tv_usec;
+ * };
+ *
+ * #define NDADDR      12
+ * #define NIADDR       3
+ *
+ * //
+ * // This is the new (4.4) BSD inode structure
+ * // copied from the FreeBSD 2.0  ufs/ufs/dinode.h include file
+ * //
+ * struct new_bsd_inode {
+ *   __u16       di_mode;           // file type, standard Unix permissions
+ *   __s16       di_nlink;          // number of hard links to file.
+ *   union {
+ *      __u16       oldids[2];
+ *      __u32       inumber;
+ *   }           di_u;
+ *   u_quad_t    di_size;           // file size
+ *   struct bsdtimeval   di_atime;  // time file was last accessed
+ *   struct bsdtimeval   di_mtime;  // time file was last modified
+ *   struct bsdtimeval   di_ctime;  // time file was created
+ *   __u32       di_db[NDADDR];
+ *   __u32       di_ib[NIADDR];
+ *   __u32       di_flags;          //
+ *   __s32       di_blocks;         // number of disk blocks
+ *   __s32       di_gen;            // generation number
+ *   __u32       di_uid;            // user id (see /etc/passwd)
+ *   __u32       di_gid;            // group id (see /etc/group)
+ *   __s32       di_spare[2];       // unused
+ * };
+ * 
+ * It is important to note that the header DOES NOT have the name of the + * file. It can't since hard links mean that you may have multiple filenames + * for a single physical file. You must read the contents of the directory + * entries to learn the mapping(s) from filename to inode. + * The C structure that indicates if a specific block is a real block + * that contains data or is a sparse block that is not persisted to the + * disk is: + *
+ * #define TP_BSIZE    1024
+ * #define TP_NINDIR   (TP_BSIZE/2)
+ *
+ * union u_data {
+ *   char    s_addrs[TP_NINDIR]; // 1 => data; 0 => hole in inode
+ *   int32_t s_inos[TP_NINOS];   // table of first inode on each volume
+ * } u_data;
+ * 
+ */ +public class DumpArchiveEntry implements ArchiveEntry { + + private String name; + + private TYPE type = TYPE.UNKNOWN; + + private int mode; + + private Set permissions = Collections.emptySet(); + + private long size; + + private long atime; + + private long mtime; + + private int uid; + + private int gid; + + /** + * Currently unused + */ + private DumpArchiveSummary summary = null; + + // this information is available from standard index. + private TapeSegmentHeader header = new TapeSegmentHeader(); + + private String simpleName; + + private String originalName; + + // this information is available from QFA index + private int volume; + + private long offset; + + private int ino; + + private int nlink; + + private long ctime; + + private int generation; + + private boolean isDeleted; + + /** + * Default constructor. + */ + public DumpArchiveEntry() { + } + + /** + * Constructor taking only filename. + * + * @param name pathname + * @param simpleName actual filename. + */ + public DumpArchiveEntry(String name, String simpleName) { + setName(name); + this.simpleName = simpleName; + } + + /** + * Constructor taking name, inode and type. + * + * @param name + * @param simpleName + * @param ino + * @param type + */ + protected DumpArchiveEntry(String name, String simpleName, int ino, + TYPE type) { + setType(type); + setName(name); + this.simpleName = simpleName; + this.ino = ino; + this.offset = 0; + } + + /** + * Constructor taking tape buffer. + * @param buffer + * @param offset + */ + + /** + * Returns the path of the entry. + * + * @return the path of the entry. + */ + public String getSimpleName() { + return simpleName; + } + + /** + * Sets the path of the entry. + */ + protected void setSimpleName(String simpleName) { + this.simpleName = simpleName; + } + + /** + * Returns the ino of the entry. + */ + public int getIno() { + return header.getIno(); + } + + /** + * Return the number of hard links to the entry. + */ + public int getNlink() { + return nlink; + } + + /** + * Set the number of hard links. + */ + public void setNlink(int nlink) { + this.nlink = nlink; + } + + /** + * Get file creation time. + */ + public Date getCreationTime() { + return new Date(ctime); + } + + /** + * Set the file creation time. + */ + public void setCreationTime(Date ctime) { + this.ctime = ctime.getTime(); + } + + /** + * Return the generation of the file. + */ + public int getGeneration() { + return generation; + } + + /** + * Set the generation of the file. + */ + public void setGeneration(int generation) { + this.generation = generation; + } + + /** + * Has this file been deleted? (On valid on incremental dumps.) + */ + public boolean isDeleted() { + return isDeleted; + } + + /** + * Set whether this file has been deleted. + */ + public void setDeleted(boolean isDeleted) { + this.isDeleted = isDeleted; + } + + /** + * Return the offset within the archive + */ + public long getOffset() { + return offset; + } + + /** + * Set the offset within the archive. + */ + public void setOffset(long offset) { + this.offset = offset; + } + + /** + * Return the tape volume where this file is located. + */ + public int getVolume() { + return volume; + } + + /** + * Set the tape volume. + */ + public void setVolume(int volume) { + this.volume = volume; + } + + /** + * Return the type of the tape segment header. + */ + public DumpArchiveConstants.SEGMENT_TYPE getHeaderType() { + return header.getType(); + } + + /** + * Return the number of records in this segment. + */ + public int getHeaderCount() { + return header.getCount(); + } + + /** + * Return the number of sparse records in this segment. + */ + public int getHeaderHoles() { + return header.getHoles(); + } + + /** + * Is this a sparse record? + */ + public boolean isSparseRecord(int idx) { + return (header.getCdata(idx) & 0x01) == 0; + } + + /** + * @see Object#hashCode() + */ + @Override + public int hashCode() { + return ino; + } + + /** + * @see Object#equals(Object o) + */ + @Override + public boolean equals(Object o) { + if (o == this) { + return true; + } else if (o == null || !o.getClass().equals(getClass())) { + return false; + } + + DumpArchiveEntry rhs = (DumpArchiveEntry) o; + + if ((header == null) || (rhs.header == null)) { + return false; + } + + if (ino != rhs.ino) { + return false; + } + + if ((summary == null && rhs.summary != null) + || (summary != null && !summary.equals(rhs.summary))) { + return false; + } + + return true; + } + + /** + * @see Object#toString() + */ + @Override + public String toString() { + return getName(); + } + + /** + * Populate the dump archive entry and tape segment header with + * the contents of the buffer. + * + * @param buffer + * @throws Exception + */ + static DumpArchiveEntry parse(byte[] buffer) { + DumpArchiveEntry entry = new DumpArchiveEntry(); + TapeSegmentHeader header = entry.header; + + header.type = DumpArchiveConstants.SEGMENT_TYPE.find(DumpArchiveUtil.convert32( + buffer, 0)); + + //header.dumpDate = new Date(1000L * DumpArchiveUtil.convert32(buffer, 4)); + //header.previousDumpDate = new Date(1000L * DumpArchiveUtil.convert32( + // buffer, 8)); + header.volume = DumpArchiveUtil.convert32(buffer, 12); + //header.tapea = DumpArchiveUtil.convert32(buffer, 16); + entry.ino = header.ino = DumpArchiveUtil.convert32(buffer, 20); + + //header.magic = DumpArchiveUtil.convert32(buffer, 24); + //header.checksum = DumpArchiveUtil.convert32(buffer, 28); + int m = DumpArchiveUtil.convert16(buffer, 32); + + // determine the type of the file. + entry.setType(TYPE.find((m >> 12) & 0x0F)); + + // determine the standard permissions + entry.setMode(m); + + entry.nlink = DumpArchiveUtil.convert16(buffer, 34); + // inumber, oldids? + entry.setEntrySize(DumpArchiveUtil.convert64(buffer, 40)); + + long t = (1000L * DumpArchiveUtil.convert32(buffer, 48)) + + (DumpArchiveUtil.convert32(buffer, 52) / 1000); + entry.setAccessTime(new Date(t)); + t = (1000L * DumpArchiveUtil.convert32(buffer, 56)) + + (DumpArchiveUtil.convert32(buffer, 60) / 1000); + entry.setLastModified(new Date(t)); + t = (1000L * DumpArchiveUtil.convert32(buffer, 64)) + + (DumpArchiveUtil.convert32(buffer, 68) / 1000); + entry.ctime = t; + + // db: 72-119 - direct blocks + // id: 120-131 - indirect blocks + //entry.flags = DumpArchiveUtil.convert32(buffer, 132); + //entry.blocks = DumpArchiveUtil.convert32(buffer, 136); + entry.generation = DumpArchiveUtil.convert32(buffer, 140); + entry.setUserId(DumpArchiveUtil.convert32(buffer, 144)); + entry.setGroupId(DumpArchiveUtil.convert32(buffer, 148)); + // two 32-bit spare values. + header.count = DumpArchiveUtil.convert32(buffer, 160); + + header.holes = 0; + + for (int i = 0; (i < 512) && (i < header.count); i++) { + if (buffer[164 + i] == 0) { + header.holes++; + } + } + + System.arraycopy(buffer, 164, header.cdata, 0, 512); + + entry.volume = header.getVolume(); + + //entry.isSummaryOnly = false; + return entry; + } + + /** + * Update entry with information from next tape segment header. + */ + void update(byte[] buffer) { + header.volume = DumpArchiveUtil.convert32(buffer, 16); + header.count = DumpArchiveUtil.convert32(buffer, 160); + + header.holes = 0; + + for (int i = 0; (i < 512) && (i < header.count); i++) { + if (buffer[164 + i] == 0) { + header.holes++; + } + } + + System.arraycopy(buffer, 164, header.cdata, 0, 512); + } + + /** + * Archive entry as stored on tape. There is one TSH for (at most) + * every 512k in the file. + */ + static class TapeSegmentHeader { + private DumpArchiveConstants.SEGMENT_TYPE type; + private int volume; + private int ino; + private int count; + private int holes; + private byte[] cdata = new byte[512]; // map of any 'holes' + + public DumpArchiveConstants.SEGMENT_TYPE getType() { + return type; + } + + public int getVolume() { + return volume; + } + + public int getIno() { + return ino; + } + + void setIno(int ino) { + this.ino = ino; + } + + public int getCount() { + return count; + } + + public int getHoles() { + return holes; + } + + public int getCdata(int idx) { + return cdata[idx]; + } + } + + /** + * Returns the name of the entry. + * + * @return the name of the entry. + */ + public String getName() { + return name; + } + + /** + * Returns the unmodified name of the entry. + * + * @return the name of the entry. + */ + String getOriginalName() { + return originalName; + } + + /** + * Sets the name of the entry. + */ + public DumpArchiveEntry setName(String name) { + this.originalName = name; + if (name != null) { + if (isDirectory() && !name.endsWith("/")) { + name += "/"; + } + if (name.startsWith("./")) { + name = name.substring(2); + } + } + this.name = name; + return this; + } + + public Date getLastModifiedDate() { + return new Date(mtime); + } + + /** + * Is this a directory? + */ + public boolean isDirectory() { + return type == TYPE.DIRECTORY; + } + + /** + * Is this a regular file? + */ + public boolean isFile() { + return type == TYPE.FILE; + } + + /** + * Is this a network device? + */ + public boolean isSocket() { + return type == TYPE.SOCKET; + } + + /** + * Is this a character device? + */ + public boolean isChrDev() { + return type == TYPE.CHRDEV; + } + + /** + * Is this a block device? + */ + public boolean isBlkDev() { + return type == TYPE.BLKDEV; + } + + /** + * Is this a fifo/pipe? + */ + public boolean isFifo() { + return type == TYPE.FIFO; + } + + /** + * Get the type of the entry. + */ + public TYPE getType() { + return type; + } + + /** + * Set the type of the entry. + */ + public void setType(TYPE type) { + this.type = type; + } + + /** + * Return the access permissions on the entry. + */ + public int getMode() { + return mode; + } + + /** + * Set the access permissions on the entry. + */ + public void setMode(int mode) { + this.mode = mode & 07777; + this.permissions = PERMISSION.find(mode); + } + + /** + * Returns the permissions on the entry. + */ + public Set getPermissions() { + return permissions; + } + + /** + * Returns the size of the entry as read from the archive. + */ + public long getEntrySize() { + return isDirectory() ? ArchiveEntry.SIZE_UNKNOWN : size; + } + + /** + * Set the size of the entry. + */ + public DumpArchiveEntry setEntrySize(long size) { + this.size = size; + return this; + } + + /** + * Set the time the file was last modified. + */ + public DumpArchiveEntry setLastModified(Date mtime) { + this.mtime = mtime.getTime(); + return this; + } + + public Date getLastModified() { + return new Date(mtime); + } + + /** + * Returns the time the file was last accessed. + */ + public Date getAccessTime() { + return new Date(atime); + } + + /** + * Set the time the file was last accessed. + */ + public void setAccessTime(Date atime) { + this.atime = atime.getTime(); + } + + /** + * Return the user id. + */ + public int getUserId() { + return uid; + } + + /** + * Set the user id. + */ + public void setUserId(int uid) { + this.uid = uid; + } + + /** + * Return the group id + */ + public int getGroupId() { + return gid; + } + + /** + * Set the group id. + */ + public void setGroupId(int gid) { + this.gid = gid; + } + + public enum TYPE { + WHITEOUT(14), + SOCKET(12), + LINK(10), + FILE(8), + BLKDEV(6), + DIRECTORY(4), + CHRDEV(2), + FIFO(1), + UNKNOWN(15); + + private int code; + + TYPE(int code) { + this.code = code; + } + + public static TYPE find(int code) { + TYPE type = UNKNOWN; + + for (TYPE t : TYPE.values()) { + if (code == t.code) { + type = t; + } + } + + return type; + } + } + + public enum PERMISSION { + SETUID(04000), + SETGUI(02000), + STICKY(01000), + USER_READ(00400), + USER_WRITE(00200), + USER_EXEC(00100), + GROUP_READ(00040), + GROUP_WRITE(00020), + GROUP_EXEC(00010), + WORLD_READ(00004), + WORLD_WRITE(00002), + WORLD_EXEC(00001); + + private int code; + + PERMISSION(int code) { + this.code = code; + } + + public static Set find(int code) { + Set set = new HashSet<>(); + + for (PERMISSION p : PERMISSION.values()) { + if ((code & p.code) == p.code) { + set.add(p); + } + } + + if (set.isEmpty()) { + return Collections.emptySet(); + } + + return EnumSet.copyOf(set); + } + } +} diff --git a/io-archive-dump/src/main/java/org/xbib/io/archive/dump/DumpArchiveException.java b/io-archive-dump/src/main/java/org/xbib/io/archive/dump/DumpArchiveException.java new file mode 100644 index 0000000..3d3fa68 --- /dev/null +++ b/io-archive-dump/src/main/java/org/xbib/io/archive/dump/DumpArchiveException.java @@ -0,0 +1,19 @@ +package org.xbib.io.archive.dump; + +import java.io.IOException; + + +/** + * Dump Archive Exception + */ +public class DumpArchiveException extends IOException { + + public DumpArchiveException(String msg) { + super(msg); + } + + public DumpArchiveException(String msg, Throwable cause) { + super(msg); + initCause(cause); + } +} diff --git a/io-archive-dump/src/main/java/org/xbib/io/archive/dump/DumpArchiveInputStream.java b/io-archive-dump/src/main/java/org/xbib/io/archive/dump/DumpArchiveInputStream.java new file mode 100644 index 0000000..54d2298 --- /dev/null +++ b/io-archive-dump/src/main/java/org/xbib/io/archive/dump/DumpArchiveInputStream.java @@ -0,0 +1,490 @@ +package org.xbib.io.archive.dump; + +import org.xbib.io.archive.stream.ArchiveInputStream; + +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Map; +import java.util.PriorityQueue; +import java.util.Queue; +import java.util.Stack; + +/** + * The DumpArchiveInputStream reads a UNIX dump archive as an InputStream. + * Methods are provided to position at each successive entry in + * the archive, and the read each entry as a normal input stream + * using read(). + */ +public class DumpArchiveInputStream extends ArchiveInputStream { + + private DumpArchiveSummary summary; + + private DumpArchiveEntry active; + + private boolean isClosed; + + private boolean hasHitEOF; + + private long entrySize; + + private long entryOffset; + + private int readIdx; + + private byte[] readBuf = new byte[DumpArchiveConstants.TP_SIZE]; + + private byte[] blockBuffer; + + private int recordOffset; + + private long filepos; + + protected TapeInputStream raw; + + // map of ino -> dirent entry. We can use this to reconstruct full paths. + private Map names = new HashMap(); + + // map of ino -> (directory) entry when we're missing one or more elements in the path. + private Map pending = new HashMap(); + + // queue of (directory) entries where we now have the full path. + private Queue queue; + + /** + * Constructor. + * + * @param is + */ + public DumpArchiveInputStream(InputStream is) throws IOException { + this.raw = new TapeInputStream(is); + this.hasHitEOF = false; + + // read header, verify it's a dump archive. + byte[] headerBytes = raw.readRecord(); + + if (!DumpArchiveUtil.verify(headerBytes)) { + throw new UnrecognizedFormatException(); + } + + // get summary information + summary = new DumpArchiveSummary(headerBytes); + + // reset buffer with actual block size. + raw.resetBlockSize(summary.getNTRec(), summary.isCompressed()); + + // allocate our read buffer. + blockBuffer = new byte[4 * DumpArchiveConstants.TP_SIZE]; + + // skip past CLRI and BITS segments since we don't handle them yet. + readCLRI(); + readBITS(); + + // put in a dummy record for the root node. + Dirent root = new Dirent(2, 2, 4, "."); + names.put(2, root); + + // use priority based on queue to ensure parent directories are + // released first. + queue = new PriorityQueue(10, + new Comparator() { + public int compare(DumpArchiveEntry p, DumpArchiveEntry q) { + if ((p.getOriginalName() == null) || (q.getOriginalName() == null)) { + return Integer.MAX_VALUE; + } + + return p.getOriginalName().compareTo(q.getOriginalName()); + } + }); + } + + /** + * Return the archive summary information. + */ + public DumpArchiveSummary getSummary() { + return summary; + } + + /** + * Read CLRI (deleted inode) segment. + */ + private void readCLRI() throws IOException { + byte[] readBuf = raw.readRecord(); + + if (!DumpArchiveUtil.verify(readBuf)) { + throw new InvalidFormatException(); + } + + active = DumpArchiveEntry.parse(readBuf); + + if (DumpArchiveConstants.SEGMENT_TYPE.CLRI != active.getHeaderType()) { + throw new InvalidFormatException(); + } + + // we don't do anything with this yet. + if (raw.skip(DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) + == -1) { + throw new EOFException(); + } + readIdx = active.getHeaderCount(); + } + + /** + * Read BITS segment. + */ + private void readBITS() throws IOException { + byte[] readBuf = raw.readRecord(); + + if (!DumpArchiveUtil.verify(readBuf)) { + throw new InvalidFormatException(); + } + + active = DumpArchiveEntry.parse(readBuf); + + if (DumpArchiveConstants.SEGMENT_TYPE.BITS != active.getHeaderType()) { + throw new InvalidFormatException(); + } + + // we don't do anything with this yet. + if (raw.skip(DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) + == -1) { + throw new EOFException(); + } + readIdx = active.getHeaderCount(); + } + + /** + * Read the next entry. + */ + public DumpArchiveEntry getNextDumpEntry() throws IOException { + return getNextEntry(); + } + + /** + * Read the next entry. + */ + @Override + public DumpArchiveEntry getNextEntry() throws IOException { + DumpArchiveEntry entry = null; + String path = null; + + // is there anything in the queue? + if (!queue.isEmpty()) { + return queue.remove(); + } + + while (entry == null) { + if (hasHitEOF) { + return null; + } + + // skip any remaining records in this segment for prior file. + // we might still have holes... easiest to do it + // block by block. We may want to revisit this if + // the unnecessary decompression time adds up. + while (readIdx < active.getHeaderCount()) { + if (!active.isSparseRecord(readIdx++) + && raw.skip(DumpArchiveConstants.TP_SIZE) == -1) { + throw new EOFException(); + } + } + + readIdx = 0; + filepos = raw.getBytesRead(); + + byte[] headerBytes = raw.readRecord(); + + if (!DumpArchiveUtil.verify(headerBytes)) { + throw new InvalidFormatException(); + } + + active = DumpArchiveEntry.parse(headerBytes); + + // skip any remaining segments for prior file. + while (DumpArchiveConstants.SEGMENT_TYPE.ADDR == active.getHeaderType()) { + if (raw.skip(DumpArchiveConstants.TP_SIZE + * (active.getHeaderCount() + - active.getHeaderHoles())) == -1) { + throw new EOFException(); + } + + filepos = raw.getBytesRead(); + headerBytes = raw.readRecord(); + + if (!DumpArchiveUtil.verify(headerBytes)) { + throw new InvalidFormatException(); + } + + active = DumpArchiveEntry.parse(headerBytes); + } + + // check if this is an end-of-volume marker. + if (DumpArchiveConstants.SEGMENT_TYPE.END == active.getHeaderType()) { + hasHitEOF = true; + isClosed = true; + raw.close(); + + return null; + } + + entry = active; + + if (entry.isDirectory()) { + readDirectoryEntry(active); + + // now we create an empty InputStream. + entryOffset = 0; + entrySize = 0; + readIdx = active.getHeaderCount(); + } else { + entryOffset = 0; + entrySize = active.getEntrySize(); + readIdx = 0; + } + + recordOffset = readBuf.length; + + path = getPath(entry); + + if (path == null) { + entry = null; + } + } + + entry.setName(path); + entry.setSimpleName(names.get(entry.getIno()).getName()); + entry.setOffset(filepos); + + return entry; + } + + /** + * Read directory entry. + */ + private void readDirectoryEntry(DumpArchiveEntry entry) + throws IOException { + long size = entry.getEntrySize(); + boolean first = true; + + while (first || + (DumpArchiveConstants.SEGMENT_TYPE.ADDR == entry.getHeaderType())) { + // read the header that we just peeked at. + if (!first) { + raw.readRecord(); + } + + if (!names.containsKey(entry.getIno()) && + (DumpArchiveConstants.SEGMENT_TYPE.INODE == entry.getHeaderType())) { + pending.put(entry.getIno(), entry); + } + + int datalen = DumpArchiveConstants.TP_SIZE * entry.getHeaderCount(); + + if (blockBuffer.length < datalen) { + blockBuffer = new byte[datalen]; + } + + if (raw.read(blockBuffer, 0, datalen) != datalen) { + throw new EOFException(); + } + + int reclen = 0; + + for (int i = 0; (i < (datalen - 8)) && (i < (size - 8)); + i += reclen) { + int ino = DumpArchiveUtil.convert32(blockBuffer, i); + reclen = DumpArchiveUtil.convert16(blockBuffer, i + 4); + + byte type = blockBuffer[i + 6]; + + String name = new String(blockBuffer, i + 8, blockBuffer[i + 7]); // TODO default charset? + + if (".".equals(name) || "src/test".equals(name)) { + // do nothing... + continue; + } + + Dirent d = new Dirent(ino, entry.getIno(), type, name); + + + names.put(Integer.valueOf(ino), d); + + // check whether this allows us to fill anything in the pending list. + for (Map.Entry e : pending.entrySet()) { + String path = getPath(e.getValue()); + + if (path != null) { + e.getValue().setName(path); + e.getValue() + .setSimpleName(names.get(e.getKey()).getName()); + queue.add(e.getValue()); + } + } + + // remove anything that we found. (We can't do it earlier + // because of concurrent modification exceptions.) + for (DumpArchiveEntry e : queue) { + pending.remove(Integer.valueOf(e.getIno())); + } + } + + byte[] peekBytes = raw.peek(); + + if (!DumpArchiveUtil.verify(peekBytes)) { + throw new InvalidFormatException(); + } + + entry = DumpArchiveEntry.parse(peekBytes); + first = false; + size -= DumpArchiveConstants.TP_SIZE; + } + } + + /** + * Get full path for specified archive entry, or null if there's a gap. + * + * @param entry + * @return full path for specified archive entry, or null if there's a gap. + */ + private String getPath(DumpArchiveEntry entry) { + // build the stack of elements. It's possible that we're + // still missing an intermediate value and if so we + Stack elements = new Stack(); + Dirent dirent = null; + + for (int i = entry.getIno(); ; i = dirent.getParentIno()) { + if (!names.containsKey(Integer.valueOf(i))) { + elements.clear(); + break; + } + + dirent = names.get(Integer.valueOf(i)); + elements.push(dirent.getName()); + + if (dirent.getIno() == dirent.getParentIno()) { + break; + } + } + + // if an element is missing defer the work and read next entry. + if (elements.isEmpty()) { + pending.put(Integer.valueOf(entry.getIno()), entry); + + return null; + } + + // generate full path from stack of elements. + StringBuilder sb = new StringBuilder(elements.pop()); + + while (!elements.isEmpty()) { + sb.append('/'); + sb.append(elements.pop()); + } + + return sb.toString(); + } + + /** + * Reads bytes from the current dump archive entry. + * This method is aware of the boundaries of the current + * entry in the archive and will deal with them as if they + * were this stream's start and EOF. + * + * @param buf The buffer into which to place bytes read. + * @param off The offset at which to place bytes read. + * @param len The number of bytes to read. + * @return The number of bytes read, or -1 at EOF. + * @throws java.io.IOException on error + */ + @Override + public int read(byte[] buf, int off, int len) throws IOException { + int totalRead = 0; + + if (isClosed || (entryOffset >= entrySize)) { + return -1; + } + + if ((len + entryOffset) > entrySize) { + len = (int) (entrySize - entryOffset); + } + + while (len > 0) { + int sz = (len > (readBuf.length - recordOffset)) + ? (readBuf.length - recordOffset) : len; + + // copy any data we have + if ((recordOffset + sz) <= readBuf.length) { + System.arraycopy(readBuf, recordOffset, buf, off, sz); + totalRead += sz; + recordOffset += sz; + len -= sz; + off += sz; + } + + // load next block if necessary. + if (len > 0) { + if (readIdx >= 512) { + byte[] headerBytes = raw.readRecord(); + + if (!DumpArchiveUtil.verify(headerBytes)) { + throw new InvalidFormatException(); + } + + active = DumpArchiveEntry.parse(headerBytes); + readIdx = 0; + } + + if (!active.isSparseRecord(readIdx++)) { + int r = raw.read(readBuf, 0, readBuf.length); + if (r != readBuf.length) { + throw new EOFException(); + } + } else { + Arrays.fill(readBuf, (byte) 0); + } + + recordOffset = 0; + } + } + + entryOffset += totalRead; + + return totalRead; + } + + /** + * Closes the stream for this entry. + */ + @Override + public void close() throws IOException { + if (!isClosed) { + isClosed = true; + raw.close(); + } + } + + /** + * Look at the first few bytes of the file to decide if it's a dump + * archive. With 32 bytes we can look at the magic value, with a full + * 1k we can verify the checksum. + */ + public static boolean matches(byte[] buffer, int length) { + // do we have enough of the header? + if (length < 32) { + return false; + } + + // this is the best test + if (length >= DumpArchiveConstants.TP_SIZE) { + return DumpArchiveUtil.verify(buffer); + } + + // this will work in a pinch. + return DumpArchiveConstants.NFS_MAGIC == DumpArchiveUtil.convert32(buffer, + 24); + } + +} diff --git a/io-archive-dump/src/main/java/org/xbib/io/archive/dump/DumpArchiveSummary.java b/io-archive-dump/src/main/java/org/xbib/io/archive/dump/DumpArchiveSummary.java new file mode 100644 index 0000000..50591a1 --- /dev/null +++ b/io-archive-dump/src/main/java/org/xbib/io/archive/dump/DumpArchiveSummary.java @@ -0,0 +1,333 @@ +package org.xbib.io.archive.dump; + +import java.util.Date; + + +/** + * This class represents identifying information about a Dump archive volume. + * It consists the archive's dump date, label, hostname, device name and possibly + * last mount point plus the volume's volume id andfirst record number. + * For the corresponding C structure see the header of {@link DumpArchiveEntry}. + */ +public class DumpArchiveSummary { + private long dumpDate; + private long previousDumpDate; + private int volume; + private String label; + private int level; + private String filesys; + private String devname; + private String hostname; + private int flags; + private int firstrec; + private int ntrec; + + DumpArchiveSummary(byte[] buffer) { + dumpDate = 1000L * DumpArchiveUtil.convert32(buffer, 4); + previousDumpDate = 1000L * DumpArchiveUtil.convert32(buffer, 8); + volume = DumpArchiveUtil.convert32(buffer, 12); + label = new String(buffer, 676, DumpArchiveConstants.LBLSIZE).trim(); // TODO default charset? + level = DumpArchiveUtil.convert32(buffer, 692); + filesys = new String(buffer, 696, DumpArchiveConstants.NAMELEN).trim(); // TODO default charset? + devname = new String(buffer, 760, DumpArchiveConstants.NAMELEN).trim(); // TODO default charset? + hostname = new String(buffer, 824, DumpArchiveConstants.NAMELEN).trim(); // TODO default charset? + flags = DumpArchiveUtil.convert32(buffer, 888); + firstrec = DumpArchiveUtil.convert32(buffer, 892); + ntrec = DumpArchiveUtil.convert32(buffer, 896); + + //extAttributes = DumpArchiveUtil.convert32(buffer, 900); + } + + /** + * Get the date of this dump. + * + * @return the date of this dump. + */ + public Date getDumpDate() { + return new Date(dumpDate); + } + + /** + * Set dump date. + */ + public void setDumpDate(Date dumpDate) { + this.dumpDate = dumpDate.getTime(); + } + + /** + * Get the date of the previous dump at this level higher. + * + * @return dumpdate may be null + */ + public Date getPreviousDumpDate() { + return new Date(previousDumpDate); + } + + /** + * Set previous dump date. + */ + public void setPreviousDumpDate(Date previousDumpDate) { + this.previousDumpDate = previousDumpDate.getTime(); + } + + /** + * Get volume (tape) number. + * + * @return volume (tape) number. + */ + public int getVolume() { + return volume; + } + + /** + * Set volume (tape) number. + */ + public void setVolume(int volume) { + this.volume = volume; + } + + /** + * Get the level of this dump. This is a number between 0 and 9, inclusive, + * and a level 0 dump is a complete dump of the partition. For any other dump + * 'n' this dump contains all files that have changed since the last dump + * at this level or lower. This is used to support different levels of + * incremental backups. + * + * @return dump level + */ + public int getLevel() { + return level; + } + + /** + * Set level. + */ + public void setLevel(int level) { + this.level = level; + } + + /** + * Get dump label. This may be autogenerated or it may be specified + * bu the user. + * + * @return dump label + */ + public String getLabel() { + return label; + } + + /** + * Set dump label. + * + * @param label + */ + public void setLabel(String label) { + this.label = label; + } + + /** + * Get the last mountpoint, e.g., /home. + * + * @return last mountpoint + */ + public String getFilesystem() { + return filesys; + } + + /** + * Set the last mountpoint. + */ + public void setFilesystem(String filesystem) { + this.filesys = filesystem; + } + + /** + * Get the device name, e.g., /dev/sda3 or /dev/mapper/vg0-home. + * + * @return device name + */ + public String getDevname() { + return devname; + } + + /** + * Set the device name. + * + * @param devname + */ + public void setDevname(String devname) { + this.devname = devname; + } + + /** + * Get the hostname of the system where the dump was performed. + * + * @return hostname + */ + public String getHostname() { + return hostname; + } + + /** + * Set the hostname. + */ + public void setHostname(String hostname) { + this.hostname = hostname; + } + + /** + * Get the miscellaneous flags. See below. + * + * @return flags + */ + public int getFlags() { + return flags; + } + + /** + * Set the miscellaneous flags. + * + * @param flags + */ + public void setFlags(int flags) { + this.flags = flags; + } + + /** + * Get the inode of the first record on this volume. + * + * @return inode of the first record on this volume. + */ + public int getFirstRecord() { + return firstrec; + } + + /** + * Set the inode of the first record. + * + * @param firstrec + */ + public void setFirstRecord(int firstrec) { + this.firstrec = firstrec; + } + + /** + * Get the number of records per tape block. This is typically + * between 10 and 32. + * + * @return the number of records per tape block + */ + public int getNTRec() { + return ntrec; + } + + /** + * Set the number of records per tape block. + */ + public void setNTRec(int ntrec) { + this.ntrec = ntrec; + } + + /** + * Is this the new header format? (We do not currently support the + * old format.) + * + * @return true if using new header format + */ + public boolean isNewHeader() { + return (flags & 0x0001) == 0x0001; + } + + /** + * Is this the new inode format? (We do not currently support the + * old format.) + * + * @return true if using new inode format + */ + public boolean isNewInode() { + return (flags & 0x0002) == 0x0002; + } + + /** + * Is this volume compressed? N.B., individual blocks may or may not be compressed. + * The first block is never compressed. + * + * @return true if volume is compressed + */ + public boolean isCompressed() { + return (flags & 0x0080) == 0x0080; + } + + /** + * Does this volume only contain metadata? + * + * @return true if volume only contains meta-data + */ + public boolean isMetaDataOnly() { + return (flags & 0x0100) == 0x0100; + } + + /** + * Does this volume cotain extended attributes. + * + * @return true if volume cotains extended attributes. + */ + public boolean isExtendedAttributes() { + return (flags & 0x8000) == 0x8000; + } + + /** + * @see Object#hashCode() + */ + @Override + public int hashCode() { + int hash = 17; + + if (label != null) { + hash = label.hashCode(); + } + + hash += 31 * dumpDate; + + if (hostname != null) { + hash = (31 * hostname.hashCode()) + 17; + } + + if (devname != null) { + hash = (31 * devname.hashCode()) + 17; + } + + return hash; + } + + /** + * @see Object#equals(Object) + */ + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (o == null || !o.getClass().equals(getClass())) { + return false; + } + + DumpArchiveSummary rhs = (DumpArchiveSummary) o; + + if (dumpDate != rhs.dumpDate) { + return false; + } + + if ((getHostname() == null) || + !getHostname().equals(rhs.getHostname())) { + return false; + } + + if ((getDevname() == null) || !getDevname().equals(rhs.getDevname())) { + return false; + } + + return true; + } +} diff --git a/io-archive-dump/src/main/java/org/xbib/io/archive/dump/DumpArchiveUtil.java b/io-archive-dump/src/main/java/org/xbib/io/archive/dump/DumpArchiveUtil.java new file mode 100644 index 0000000..681a955 --- /dev/null +++ b/io-archive-dump/src/main/java/org/xbib/io/archive/dump/DumpArchiveUtil.java @@ -0,0 +1,116 @@ + +package org.xbib.io.archive.dump; + + +/** + * Various utilities for dump archives. + */ +class DumpArchiveUtil { + /** + * Private constructor to prevent instantiation. + */ + private DumpArchiveUtil() { + } + + /** + * Calculate checksum for buffer. + * + * @param buffer buffer containing tape segment header + * @return checksum + */ + public static int calculateChecksum(byte[] buffer) { + int calc = 0; + + for (int i = 0; i < 256; i++) { + calc += DumpArchiveUtil.convert32(buffer, 4 * i); + } + + return DumpArchiveConstants.CHECKSUM - + (calc - DumpArchiveUtil.convert32(buffer, 28)); + } + + /** + * Verify that the buffer contains a tape segment header. + * + * @param buffer + */ + public static final boolean verify(byte[] buffer) { + // verify magic. for now only accept NFS_MAGIC. + int magic = convert32(buffer, 24); + + if (magic != DumpArchiveConstants.NFS_MAGIC) { + return false; + } + + //verify checksum... + int checksum = convert32(buffer, 28); + + if (checksum != calculateChecksum(buffer)) { + return false; + } + + return true; + } + + /** + * Get the ino associated with this buffer. + * + * @param buffer + */ + public static final int getIno(byte[] buffer) { + return convert32(buffer, 20); + } + + /** + * Read 8-byte integer from buffer. + * + * @param buffer + * @param offset + * @return the 8-byte entry as a long + */ + public static final long convert64(byte[] buffer, int offset) { + long i = 0; + i += (((long) buffer[offset + 7]) << 56); + i += (((long) buffer[offset + 6] << 48) & 0x00FF000000000000L); + i += (((long) buffer[offset + 5] << 40) & 0x0000FF0000000000L); + i += (((long) buffer[offset + 4] << 32) & 0x000000FF00000000L); + i += (((long) buffer[offset + 3] << 24) & 0x00000000FF000000L); + i += (((long) buffer[offset + 2] << 16) & 0x0000000000FF0000L); + i += (((long) buffer[offset + 1] << 8) & 0x000000000000FF00L); + i += (buffer[offset] & 0x00000000000000FFL); + + return i; + } + + /** + * Read 4-byte integer from buffer. + * + * @param buffer + * @param offset + * @return the 4-byte entry as an int + */ + public static final int convert32(byte[] buffer, int offset) { + int i = 0; + i = buffer[offset + 3] << 24; + i += (buffer[offset + 2] << 16) & 0x00FF0000; + i += (buffer[offset + 1] << 8) & 0x0000FF00; + i += buffer[offset] & 0x000000FF; + + return i; + } + + /** + * Read 2-byte integer from buffer. + * + * @param buffer + * @param offset + * @return the 2-byte entry as an int + */ + public static final int convert16(byte[] buffer, int offset) { + int i = 0; + i += (buffer[offset + 1] << 8) & 0x0000FF00; + i += buffer[offset] & 0x000000FF; + + return i; + } +} diff --git a/io-archive-dump/src/main/java/org/xbib/io/archive/dump/InvalidFormatException.java b/io-archive-dump/src/main/java/org/xbib/io/archive/dump/InvalidFormatException.java new file mode 100644 index 0000000..0f34bda --- /dev/null +++ b/io-archive-dump/src/main/java/org/xbib/io/archive/dump/InvalidFormatException.java @@ -0,0 +1,20 @@ + +package org.xbib.io.archive.dump; + + +/** + * Invalid Format Exception. There was an error decoding a + * tape segment header. + */ +public class InvalidFormatException extends DumpArchiveException { + + protected long offset; + + public InvalidFormatException() { + super("there was an error decoding a tape segment"); + } + + public long getOffset() { + return offset; + } +} diff --git a/io-archive-dump/src/main/java/org/xbib/io/archive/dump/ShortFileException.java b/io-archive-dump/src/main/java/org/xbib/io/archive/dump/ShortFileException.java new file mode 100644 index 0000000..2c8e5a5 --- /dev/null +++ b/io-archive-dump/src/main/java/org/xbib/io/archive/dump/ShortFileException.java @@ -0,0 +1,15 @@ + +package org.xbib.io.archive.dump; + + +/** + * Short File Exception. There was an unexpected EOF when reading + * the input stream. + */ +public class ShortFileException extends DumpArchiveException { + private static final long serialVersionUID = 1L; + + public ShortFileException() { + super("unexpected EOF"); + } +} diff --git a/io-archive-dump/src/main/java/org/xbib/io/archive/dump/TapeInputStream.java b/io-archive-dump/src/main/java/org/xbib/io/archive/dump/TapeInputStream.java new file mode 100644 index 0000000..7e070ad --- /dev/null +++ b/io-archive-dump/src/main/java/org/xbib/io/archive/dump/TapeInputStream.java @@ -0,0 +1,331 @@ + +package org.xbib.io.archive.dump; + +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.zip.DataFormatException; +import java.util.zip.Inflater; + + +/** + * Filter stream that mimics a physical tape drive capable of compressing + * the data stream + */ +class TapeInputStream extends FilterInputStream { + private byte[] blockBuffer = new byte[DumpArchiveConstants.TP_SIZE]; + private int currBlkIdx = -1; + private int blockSize = DumpArchiveConstants.TP_SIZE; + private int recordSize = DumpArchiveConstants.TP_SIZE; + private int readOffset = DumpArchiveConstants.TP_SIZE; + private boolean isCompressed = false; + private long bytesRead = 0; + + /** + * Constructor + */ + public TapeInputStream(InputStream in) { + super(in); + } + + /** + * Set the DumpArchive Buffer's block size. We need to sync the block size with the + * dump archive's actual block size since compression is handled at the + * block level. + * + * @param recsPerBlock records per block + * @param isCompressed true if the archive is compressed + * @throws java.io.IOException more than one block has been read + * @throws java.io.IOException there was an error reading additional blocks. + */ + public void resetBlockSize(int recsPerBlock, boolean isCompressed) + throws IOException { + this.isCompressed = isCompressed; + + blockSize = recordSize * recsPerBlock; + + // save first block in case we need it again + byte[] oldBuffer = blockBuffer; + + // read rest of new block + blockBuffer = new byte[blockSize]; + System.arraycopy(oldBuffer, 0, blockBuffer, 0, recordSize); + readFully(blockBuffer, recordSize, blockSize - recordSize); + + this.currBlkIdx = 0; + this.readOffset = recordSize; + } + + /** + * @see java.io.InputStream#available + */ + @Override + public int available() throws IOException { + if (readOffset < blockSize) { + return blockSize - readOffset; + } + + return in.available(); + } + + /** + * @see java.io.InputStream#read() + */ + @Override + public int read() throws IOException { + throw new IllegalArgumentException( + "all reads must be multiple of record size (" + recordSize + + " bytes."); + } + + /** + *

reads the full given length unless EOF is reached.

+ * + * @param len length to read, must be a multiple of the stream's + * record size + */ + @Override + public int read(byte[] b, int off, int len) throws IOException { + if ((len % recordSize) != 0) { + throw new IllegalArgumentException( + "all reads must be multiple of record size (" + recordSize + + " bytes."); + } + + int bytes = 0; + + while (bytes < len) { + // we need to read from the underlying stream. + // this will reset readOffset value. + // return -1 if there's a problem. + if ((readOffset == blockSize) && !readBlock(true)) { + return -1; + } + + int n = 0; + + if ((readOffset + (len - bytes)) <= blockSize) { + // we can read entirely from the buffer. + n = len - bytes; + } else { + // copy what we can from the buffer. + n = blockSize - readOffset; + } + + // copy data, increment counters. + System.arraycopy(blockBuffer, readOffset, b, off, n); + readOffset += n; + bytes += n; + off += n; + } + + return bytes; + } + + /** + * Skip bytes. Same as read but without the arraycopy. + *

+ *

skips the full given length unless EOF is reached.

+ * + * @param len length to read, must be a multiple of the stream's + * record size + */ + @Override + public long skip(long len) throws IOException { + if ((len % recordSize) != 0) { + throw new IllegalArgumentException( + "all reads must be multiple of record size (" + recordSize + + " bytes."); + } + + long bytes = 0; + + while (bytes < len) { + // we need to read from the underlying stream. + // this will reset readOffset value. We do not perform + // any decompression if we won't eventually read the data. + // return -1 if there's a problem. + if ((readOffset == blockSize) && + !readBlock((len - bytes) < blockSize)) { + return -1; + } + + long n = 0; + + if ((readOffset + (len - bytes)) <= blockSize) { + // we can read entirely from the buffer. + n = len - bytes; + } else { + // copy what we can from the buffer. + n = blockSize - readOffset; + } + + // do not copy data but still increment counters. + readOffset += n; + bytes += n; + } + + return bytes; + } + + /** + * Close the input stream. + * + * @throws java.io.IOException on error + */ + @Override + public void close() throws IOException { + if (in != null && in != System.in) { + in.close(); + } + } + + /** + * Peek at the next record from the input stream and return the data. + * + * @return The record data. + * @throws java.io.IOException on error + */ + public byte[] peek() throws IOException { + // we need to read from the underlying stream. This + // isn't a problem since it would be the first step in + // any subsequent read() anyway. + if ((readOffset == blockSize) && !readBlock(true)) { + return null; + } + + // copy data, increment counters. + byte[] b = new byte[recordSize]; + System.arraycopy(blockBuffer, readOffset, b, 0, b.length); + + return b; + } + + /** + * Read a record from the input stream and return the data. + * + * @return The record data. + * @throws java.io.IOException on error + */ + public byte[] readRecord() throws IOException { + byte[] result = new byte[recordSize]; + + if (-1 == read(result, 0, result.length)) { + throw new ShortFileException(); + } + + return result; + } + + /** + * Read next block. All decompression is handled here. + * + * @param decompress if false the buffer will not be decompressed. + * This is an optimization for longer seeks. + * @return false if End-Of-File, else true + */ + private boolean readBlock(boolean decompress) throws IOException { + boolean success = true; + + if (in == null) { + throw new IOException("input buffer is closed"); + } + + if (!isCompressed || (currBlkIdx == -1)) { + // file is not compressed + success = readFully(blockBuffer, 0, blockSize); + bytesRead += blockSize; + } else { + if (!readFully(blockBuffer, 0, 4)) { + return false; + } + bytesRead += 4; + + int h = DumpArchiveUtil.convert32(blockBuffer, 0); + boolean compressed = (h & 0x01) == 0x01; + + if (!compressed) { + // file is compressed but this block is not. + success = readFully(blockBuffer, 0, blockSize); + bytesRead += blockSize; + } else { + // this block is compressed. + int flags = (h >> 1) & 0x07; + int length = (h >> 4) & 0x0FFFFFFF; + byte[] compBuffer = new byte[length]; + success = readFully(compBuffer, 0, length); + bytesRead += length; + + if (!decompress) { + // just in case someone reads the data. + Arrays.fill(blockBuffer, (byte) 0); + } else { + switch (DumpArchiveConstants.COMPRESSION_TYPE.find(flags & + 0x03)) { + case ZLIB: + + try { + Inflater inflator = new Inflater(); + inflator.setInput(compBuffer, 0, compBuffer.length); + length = inflator.inflate(blockBuffer); + + if (length != blockSize) { + throw new ShortFileException(); + } + + inflator.end(); + } catch (DataFormatException e) { + throw new DumpArchiveException("bad data", e); + } + + break; + + case BZLIB: + throw new UnsupportedCompressionAlgorithmException( + "BZLIB2"); + + case LZO: + throw new UnsupportedCompressionAlgorithmException( + "LZO"); + + default: + throw new UnsupportedCompressionAlgorithmException(); + } + } + } + } + + currBlkIdx++; + readOffset = 0; + + return success; + } + + /** + * Read buffer + */ + private boolean readFully(byte[] b, int off, int len) + throws IOException { + int count = 0; + + while (count < len) { + int n = in.read(b, off + count, len - count); + + if (n == -1) { + throw new ShortFileException(); + } + + count += n; + } + + return true; + } + + /** + * Get number of bytes read. + */ + public long getBytesRead() { + return bytesRead; + } +} diff --git a/io-archive-dump/src/main/java/org/xbib/io/archive/dump/UnrecognizedFormatException.java b/io-archive-dump/src/main/java/org/xbib/io/archive/dump/UnrecognizedFormatException.java new file mode 100644 index 0000000..81f85f9 --- /dev/null +++ b/io-archive-dump/src/main/java/org/xbib/io/archive/dump/UnrecognizedFormatException.java @@ -0,0 +1,14 @@ + +package org.xbib.io.archive.dump; + +/** + * Unrecognized Format Exception. This is either not a recognized dump archive or there's + * a bad tape segment header. + */ +public class UnrecognizedFormatException extends DumpArchiveException { + private static final long serialVersionUID = 1L; + + public UnrecognizedFormatException() { + super("this is not a recognized format."); + } +} diff --git a/io-archive-dump/src/main/java/org/xbib/io/archive/dump/UnsupportedCompressionAlgorithmException.java b/io-archive-dump/src/main/java/org/xbib/io/archive/dump/UnsupportedCompressionAlgorithmException.java new file mode 100644 index 0000000..ab9a3a5 --- /dev/null +++ b/io-archive-dump/src/main/java/org/xbib/io/archive/dump/UnsupportedCompressionAlgorithmException.java @@ -0,0 +1,20 @@ + +package org.xbib.io.archive.dump; + +/** + * Unsupported compression algorithm. The dump archive uses an unsupported + * compression algorithm (BZLIB2 or LZO). + */ +public class UnsupportedCompressionAlgorithmException + extends DumpArchiveException { + private static final long serialVersionUID = 1L; + + public UnsupportedCompressionAlgorithmException() { + super("this file uses an unsupported compression algorithm."); + } + + public UnsupportedCompressionAlgorithmException(String alg) { + super("this file uses an unsupported compression algorithm: " + alg + + "."); + } +} diff --git a/io-archive-jar/build.gradle b/io-archive-jar/build.gradle new file mode 100644 index 0000000..3dcc83b --- /dev/null +++ b/io-archive-jar/build.gradle @@ -0,0 +1,4 @@ +dependencies { + api project(':io-archive') + api project(':io-archive-zip') +} diff --git a/io-archive-jar/src/main/java/module-info.java b/io-archive-jar/src/main/java/module-info.java new file mode 100644 index 0000000..45c7030 --- /dev/null +++ b/io-archive-jar/src/main/java/module-info.java @@ -0,0 +1,5 @@ +module org.xbib.io.archive.jar { + exports org.xbib.io.archive.jar; + requires org.xbib.io.archive; + requires org.xbib.io.archive.zip; +} diff --git a/io-archive-jar/src/main/java/org/xbib/io/archive/jar/JarArchiveEntry.java b/io-archive-jar/src/main/java/org/xbib/io/archive/jar/JarArchiveEntry.java new file mode 100644 index 0000000..06d81b9 --- /dev/null +++ b/io-archive-jar/src/main/java/org/xbib/io/archive/jar/JarArchiveEntry.java @@ -0,0 +1,60 @@ +package org.xbib.io.archive.jar; + +import org.xbib.io.archive.zip.ZipArchiveEntry; + +import java.security.cert.Certificate; +import java.util.jar.Attributes; +import java.util.jar.JarEntry; +import java.util.zip.ZipEntry; +import java.util.zip.ZipException; + +public class JarArchiveEntry extends ZipArchiveEntry { + + private Attributes manifestAttributes = null; + + private Certificate[] certificates = null; + + public JarArchiveEntry() { + super(); + } + + public JarArchiveEntry(ZipEntry entry) throws ZipException { + super(entry); + } + + public JarArchiveEntry(String name) { + super(name); + } + + public JarArchiveEntry(ZipArchiveEntry entry) throws ZipException { + super(entry); + } + + public JarArchiveEntry(JarEntry entry) throws ZipException { + super(entry); + + } + + public Attributes getManifestAttributes() { + return manifestAttributes; + } + + public Certificate[] getCertificates() { + if (certificates != null) { + Certificate[] certs = new Certificate[certificates.length]; + System.arraycopy(certificates, 0, certs, 0, certs.length); + return certs; + } + return null; + } + + @Override + public boolean equals(Object o) { + return super.equals(o); + } + + @Override + public int hashCode() { + return super.hashCode(); + } +} diff --git a/io-archive-jar/src/main/java/org/xbib/io/archive/jar/JarArchiveInputStream.java b/io-archive-jar/src/main/java/org/xbib/io/archive/jar/JarArchiveInputStream.java new file mode 100644 index 0000000..c745ad4 --- /dev/null +++ b/io-archive-jar/src/main/java/org/xbib/io/archive/jar/JarArchiveInputStream.java @@ -0,0 +1,30 @@ + +package org.xbib.io.archive.jar; + +import org.xbib.io.archive.entry.ArchiveEntry; +import org.xbib.io.archive.zip.ZipArchiveEntry; +import org.xbib.io.archive.zip.ZipArchiveInputStream; + +import java.io.IOException; +import java.io.InputStream; + +/** + * Implements an input stream that can read entries from jar files. + */ +public class JarArchiveInputStream extends ZipArchiveInputStream { + + public JarArchiveInputStream(final InputStream inputStream) { + super(inputStream); + } + + public JarArchiveEntry getNextJarEntry() throws IOException { + ZipArchiveEntry entry = getNextZipEntry(); + return entry == null ? null : new JarArchiveEntry(entry); + } + + @Override + public ArchiveEntry getNextEntry() throws IOException { + return getNextJarEntry(); + } + +} diff --git a/io-archive-jar/src/main/java/org/xbib/io/archive/jar/JarArchiveOutputStream.java b/io-archive-jar/src/main/java/org/xbib/io/archive/jar/JarArchiveOutputStream.java new file mode 100644 index 0000000..91973d6 --- /dev/null +++ b/io-archive-jar/src/main/java/org/xbib/io/archive/jar/JarArchiveOutputStream.java @@ -0,0 +1,31 @@ + +package org.xbib.io.archive.jar; + +import org.xbib.io.archive.zip.JarMarker; +import org.xbib.io.archive.zip.ZipArchiveOutputStream; + +import java.io.IOException; +import java.io.OutputStream; + +/** + * Subclass that adds a special extra field to the very first entry + * which allows the created archive to be used as an executable jar on + * Solaris. + */ +public class JarArchiveOutputStream extends ZipArchiveOutputStream { + + private boolean jarMarkerAdded = false; + + public JarArchiveOutputStream(final OutputStream out) { + super(out); + } + + @Override + public void putArchiveEntry(JarArchiveEntry ze) throws IOException { + if (!jarMarkerAdded) { + ze.addAsFirstExtraField(JarMarker.getInstance()); + jarMarkerAdded = true; + } + super.putArchiveEntry(ze); + } +} diff --git a/io-archive-jar/src/test/java/org/xbib/io/archive/jar/JarTest.java b/io-archive-jar/src/test/java/org/xbib/io/archive/jar/JarTest.java new file mode 100644 index 0000000..9956a43 --- /dev/null +++ b/io-archive-jar/src/test/java/org/xbib/io/archive/jar/JarTest.java @@ -0,0 +1,24 @@ +package org.xbib.io.archive.jar; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import org.junit.jupiter.api.Test; +import java.io.InputStream; + +public class JarTest { + + @Test + public void testJar() throws Exception { + InputStream in = getClass().getResourceAsStream("test.jar"); + JarArchiveInputStream jarArchiveInputStream = new JarArchiveInputStream(in); + byte[] buffer = new byte[1024]; + long total = 0L; + while ((jarArchiveInputStream.getNextEntry()) != null) { + int len = 0; + while ((len = jarArchiveInputStream.read(buffer)) > 0) { + total += len; + } + } + assertEquals(1813L, total); + jarArchiveInputStream.close(); + } +} diff --git a/io-archive-jar/src/test/resources/org/xbib/io/archive/jar/test.jar b/io-archive-jar/src/test/resources/org/xbib/io/archive/jar/test.jar new file mode 100644 index 0000000..9a3553e Binary files /dev/null and b/io-archive-jar/src/test/resources/org/xbib/io/archive/jar/test.jar differ diff --git a/io-archive-tar/build.gradle b/io-archive-tar/build.gradle new file mode 100644 index 0000000..7c4e2c7 --- /dev/null +++ b/io-archive-tar/build.gradle @@ -0,0 +1,3 @@ +dependencies { + api project(':io-archive') +} diff --git a/io-archive-tar/src/main/java/module-info.java b/io-archive-tar/src/main/java/module-info.java new file mode 100644 index 0000000..835d958 --- /dev/null +++ b/io-archive-tar/src/main/java/module-info.java @@ -0,0 +1,4 @@ +module org.xbib.io.archive.tar { + exports org.xbib.io.archive.tar; + requires org.xbib.io.archive; +} diff --git a/io-archive-tar/src/main/java/org/xbib/io/archive/tar/TarArchiveEntry.java b/io-archive-tar/src/main/java/org/xbib/io/archive/tar/TarArchiveEntry.java new file mode 100644 index 0000000..e1ff479 --- /dev/null +++ b/io-archive-tar/src/main/java/org/xbib/io/archive/tar/TarArchiveEntry.java @@ -0,0 +1,842 @@ +package org.xbib.io.archive.tar; + +import org.xbib.io.archive.entry.ArchiveEntry; +import org.xbib.io.archive.util.ArchiveUtils; +import org.xbib.io.archive.entry.ArchiveEntryEncoding; + +import java.io.IOException; +import java.math.BigInteger; +import java.util.Date; + +/** + * This class represents an entry in a Tar archive. + */ +public class TarArchiveEntry implements TarConstants, ArchiveEntry { + + /** + * Maximum length of a user's name in the tar file + */ + private static final int MAX_NAMELEN = 31; + + /** + * Default permissions bits for directories + */ + private static final int DEFAULT_DIR_MODE = 040755; + + /** + * Default permissions bits for files + */ + private static final int DEFAULT_FILE_MODE = 0100644; + + /** + * Convert millis to seconds + */ + private static final int MILLIS_PER_SECOND = 1000; + + /** + * The entry's name. + */ + private String name; + + /** + * The entry's permission mode. + */ + private int mode; + + /** + * The entry's user id. + */ + private int userId; + + /** + * The entry's group id. + */ + private int groupId; + + /** + * The entry's size. + */ + private long size; + + /** + * The entry's modification time. + */ + private long modTime; + + /** + * The entry's link flag. + */ + private byte linkFlag; + + /** + * The entry's link name. + */ + private String linkName; + + /** + * The version of the format + */ + private String version; + + /** + * The entry's user name. + */ + private String userName; + + /** + * The entry's group name. + */ + private String groupName; + + /** + * The entry's major device number. + */ + private int devMajor; + + /** + * The entry's minor device number. + */ + private int devMinor; + + /** + * If an extension sparse header follows. + */ + private boolean isExtended; + + /** + * The entry's real size in case of a sparse file. + */ + private long realSize; + + private boolean isDir; + + /** + * Construct an empty entry and prepares the header values. + */ + public TarArchiveEntry() { + this.version = VERSION_POSIX; + this.name = ""; + this.linkName = ""; + this.linkFlag = LF_GNUTYPE_LONGNAME; + String user = System.getProperty("user.name", ""); + if (user.length() > MAX_NAMELEN) { + user = user.substring(0, MAX_NAMELEN); + } + this.userName = user; + this.groupName = ""; + this.userId = 0; + this.groupId = 0; + this.mode = DEFAULT_FILE_MODE; + } + + /** + * Construct an entry with only a name. This allows the programmer + * to construct the entry's header "by hand". File is set to null. + * + * @param name the entry name + */ + public TarArchiveEntry(String name) { + this(name, false); + } + + /** + * Construct an entry with only a name. This allows the programmer + * to construct the entry's header "by hand". File is set to null. + * + * @param name the entry name + * @param preserveLeadingSlashes whether to allow leading slashes + * in the name. + */ + public TarArchiveEntry(String name, boolean preserveLeadingSlashes) { + this(); + name = ArchiveUtils.normalizeFileName(name, preserveLeadingSlashes); + this.name = name; + boolean isDir = name.endsWith("/"); + this.mode = isDir ? DEFAULT_DIR_MODE : DEFAULT_FILE_MODE; + this.linkFlag = isDir ? LF_DIR : LF_NORMAL; + this.devMajor = 0; + this.devMinor = 0; + this.userId = 0; + this.groupId = 0; + this.size = 0; + this.modTime = (new Date()).getTime() / MILLIS_PER_SECOND; + this.linkName = ""; + this.userName = ""; + this.groupName = ""; + } + + /** + * Construct an entry with a name and a link flag. + * + * @param name the entry name + * @param linkFlag the entry link flag. + */ + public TarArchiveEntry(String name, byte linkFlag) { + this(name); + this.linkFlag = linkFlag; + if (linkFlag == LF_GNUTYPE_LONGNAME) { + version = VERSION_GNU_SPACE; + } + } + + /** + * Construct an entry from an archive's header bytes. File is set + * to null. + * + * @param headerBuf The header bytes from a tar archive entry. + * @param encoding encoding to use for file names + * @throws IllegalArgumentException if any of the numeric fields have an invalid format + */ + public TarArchiveEntry(byte[] headerBuf, ArchiveEntryEncoding encoding) throws IOException { + this(); + parseTarHeader(headerBuf, encoding); + } + + /** + * Determine if the two entries are equal. Equality is determined + * by the header names being equal. + * + * @param it Entry to be checked for equality. + * @return True if the entries are equal. + */ + public boolean equals(TarArchiveEntry it) { + return getName().equals(it.getName()); + } + + /** + * Determine if the two entries are equal. Equality is determined + * by the header names being equal. + * + * @param it Entry to be checked for equality. + * @return True if the entries are equal. + */ + @Override + public boolean equals(Object it) { + return !(it == null || getClass() != it.getClass()) && equals((TarArchiveEntry) it); + } + + /** + * Hashcodes are based on entry names. + * + * @return the entry hashcode + */ + @Override + public int hashCode() { + return getName().hashCode(); + } + + /** + * Get this entry's name. + * + * @return This entry's name. + */ + public String getName() { + return name; + } + + /** + * Set this entry's name. + * + * @param name This entry's new name. + */ + public TarArchiveEntry setName(String name) { + this.name = ArchiveUtils.normalizeFileName(name, false); + this.isDir = name.endsWith("/"); + this.mode = isDir ? DEFAULT_DIR_MODE : DEFAULT_FILE_MODE; + this.linkFlag = isDir ? LF_DIR : LF_NORMAL; + return this; + } + + /** + * Set this entry's modification time + * + * @param date This entry's new modification time + */ + public TarArchiveEntry setLastModified(Date date) { + modTime = date.getTime() / MILLIS_PER_SECOND; + return this; + } + + public Date getLastModified() { + return new Date(modTime * MILLIS_PER_SECOND); + } + + @Override + public boolean isDirectory() { + return isDir; + } + + /** + * Set this entry's file size. + * + * @param size This entry's new file size. + * @throws IllegalArgumentException if the size is < 0. + */ + public TarArchiveEntry setEntrySize(long size) { + if (size < 0) { + throw new IllegalArgumentException("size is out of range: " + size); + } + this.size = size; + return this; + } + + /** + * Get this entry's file size. + * + * @return This entry's file size. + */ + public long getEntrySize() { + return size; + } + + /** + * Set the mode for this entry + * + * @param mode the mode for this entry + */ + public void setMode(int mode) { + this.mode = mode; + } + + /** + * Get this entry's link name. + * + * @return This entry's link name. + */ + public String getLinkName() { + return linkName; + } + + /** + * Set this entry's link name. + * + * @param link the link name to use. + */ + public void setLinkName(String link) { + this.linkName = link; + } + + /** + * Get this entry's user id. + * + * @return This entry's user id. + */ + public int getUserId() { + return userId; + } + + /** + * Set this entry's user id. + * + * @param userId This entry's new user id. + */ + public void setUserId(int userId) { + this.userId = userId; + } + + /** + * Get this entry's group id. + * + * @return This entry's group id. + */ + public int getGroupId() { + return groupId; + } + + /** + * Set this entry's group id. + * + * @param groupId This entry's new group id. + */ + public void setGroupId(int groupId) { + this.groupId = groupId; + } + + /** + * Get this entry's user name. + * + * @return This entry's user name. + */ + public String getUserName() { + return userName; + } + + /** + * Set this entry's user name. + * + * @param userName This entry's new user name. + */ + public void setUserName(String userName) { + this.userName = userName; + } + + /** + * Get this entry's group name. + * + * @return This entry's group name. + */ + public String getGroupName() { + return groupName; + } + + /** + * Set this entry's group name. + * + * @param groupName This entry's new group name. + */ + public void setGroupName(String groupName) { + this.groupName = groupName; + } + + /** + * Get this entry's mode. + * + * @return This entry's mode. + */ + public int getMode() { + return mode; + } + + + /** + * Get this entry's major device number. + * + * @return This entry's major device number. + */ + public int getDevMajor() { + return devMajor; + } + + /** + * Set this entry's major device number. + * + * @param devNo This entry's major device number. + * @throws IllegalArgumentException if the devNo is < 0. + */ + public void setDevMajor(int devNo) { + if (devNo < 0) { + throw new IllegalArgumentException("Major device number is out of " + + "range: " + devNo); + } + this.devMajor = devNo; + } + + /** + * Get this entry's minor device number. + * + * @return This entry's minor device number. + */ + public int getDevMinor() { + return devMinor; + } + + /** + * Set this entry's minor device number. + * + * @param devNo This entry's minor device number. + * @throws IllegalArgumentException if the devNo is < 0. + */ + public void setDevMinor(int devNo) { + if (devNo < 0) { + throw new IllegalArgumentException("Minor device number is out of " + "range: " + devNo); + } + this.devMinor = devNo; + } + + /** + * Indicates in case of a sparse file if an extension sparse header + * follows. + * + * @return true if an extension sparse header follows. + */ + public boolean isExtended() { + return isExtended; + } + + /** + * Get this entry's real file size in case of a sparse file. + * + * @return This entry's real file size. + */ + public long getRealSize() { + return realSize; + } + + /** + * Indicate if this entry is a GNU sparse block + * + * @return true if this is a sparse extension provided by GNU tar + */ + public boolean isGNUSparse() { + return linkFlag == LF_GNUTYPE_SPARSE; + } + + /** + * Indicate if this entry is a GNU long name block + * + * @return true if this is a long name extension provided by GNU tar + */ + public boolean isGNULongNameEntry() { + return linkFlag == LF_GNUTYPE_LONGNAME && GNU_LONGLINK.equals(name); + } + + /** + * Check if this is a Pax header. + * + * @return {@code true} if this is a Pax header. + */ + public boolean isPaxHeader() { + return linkFlag == LF_PAX_EXTENDED_HEADER_LC || linkFlag == LF_PAX_EXTENDED_HEADER_UC; + } + + /** + * Check if this is a Pax header. + * + * @return {@code true} if this is a Pax header. + */ + public boolean isGlobalPaxHeader() { + return linkFlag == LF_PAX_GLOBAL_EXTENDED_HEADER; + } + + /** + * Check if this is a symbolic link entry. + */ + public boolean isSymbolicLink() { + return linkFlag == LF_SYMLINK; + } + + /** + * Check if this is a link entry. + */ + public boolean isLink() { + return linkFlag == LF_LINK; + } + + /** + * Check if this is a character device entry. + */ + public boolean isCharacterDevice() { + return linkFlag == LF_CHR; + } + + /** + * Check if this is a block device entry. + */ + public boolean isBlockDevice() { + return linkFlag == LF_BLK; + } + + /** + * Check if this is a FIFO (pipe) entry. + */ + public boolean isFIFO() { + return linkFlag == LF_FIFO; + } + + /** + * Parse an entry's header information from a header buffer. + * + * @param header The tar entry header buffer to get information from. + * @param encoding encoding to use for file names + * @throws IllegalArgumentException if any of the numeric fields + * have an invalid format + */ + public void parseTarHeader(byte[] header, ArchiveEntryEncoding encoding) + throws IOException { + parseTarHeader(header, encoding, false); + } + + private void parseTarHeader(byte[] header, ArchiveEntryEncoding encoding, final boolean oldStyle) + throws IOException { + int offset = 0; + int type = evaluateType(header); + name = parseFileName(header); + offset += NAMELEN; + mode = (int) parseOctalOrBinary(header, offset, MODELEN); + offset += MODELEN; + userId = (int) parseOctalOrBinary(header, offset, UIDLEN); + offset += UIDLEN; + groupId = (int) parseOctalOrBinary(header, offset, GIDLEN); + offset += GIDLEN; + if (type == GNU_FORMAT) { + size = getSize(header, offset, SIZELEN); + } else { + size = parseOctalOrBinary(header, offset, SIZELEN); + } + offset += SIZELEN; + modTime = parseOctalOrBinary(header, offset, MODTIMELEN); + offset += MODTIMELEN; + offset += CHKSUMLEN; + linkFlag = header[offset++]; + linkName = oldStyle ? parseName(header, offset, NAMELEN) : parseName(header, offset, NAMELEN, encoding); + offset += NAMELEN; + switch (type) { + case UNIX_FORMAT: { + offset += ATIMELEN_GNU; + offset += CTIMELEN_GNU; + offset += OFFSETLEN_GNU; + offset += LONGNAMESLEN_GNU; + offset += PAD2LEN_GNU; + offset += SPARSELEN_GNU; + isExtended = parseBoolean(header, offset); + offset += ISEXTENDEDLEN_GNU; + realSize = parseOctal(header, offset, REALSIZELEN_GNU); + offset += REALSIZELEN_GNU; + break; + } + case POSIX_FORMAT: { + parseName(header, offset, MAGICLEN); // magic + offset += MAGICLEN; + version = parseName(header, offset, VERSIONLEN); + offset += VERSIONLEN; + userName = oldStyle ? parseName(header, offset, UNAMELEN) : parseName(header, offset, UNAMELEN, encoding); + offset += UNAMELEN; + groupName = oldStyle ? parseName(header, offset, GNAMELEN) : parseName(header, offset, GNAMELEN, encoding); + offset += GNAMELEN; + devMajor = (int) parseOctalOrBinary(header, offset, DEVLEN); + offset += DEVLEN; + devMinor = (int) parseOctalOrBinary(header, offset, DEVLEN); + offset += DEVLEN; + } + } + } + + /** + * Evaluate an entry's header format from a header buffer. + * + * @param header The tar entry header buffer to evaluate the format for. + * @return format type + */ + private int evaluateType(byte[] header) { + if (ArchiveUtils.matchAsciiBuffer(MAGIC_UNIX, header, MAGIC_OFFSET, MAGICLEN)) { + return UNIX_FORMAT; + } + if (ArchiveUtils.matchAsciiBuffer(MAGIC_POSIX, header, MAGIC_OFFSET, MAGICLEN)) { + return POSIX_FORMAT; + } + if (ArchiveUtils.matchAsciiBuffer(MAGIC_GNU, header, MAGIC_OFFSET, MAGICLEN)) { + return GNU_FORMAT; + } + return 0; + } + + /** + * Parse an octal string from a buffer. + *

Leading spaces are ignored. + * The buffer must contain a trailing space or NUL, + * and may contain an additional trailing space or NUL.

+ *

The input buffer is allowed to contain all NULs, + * in which case the method returns 0L + * (this allows for missing fields).

+ *

To work-around some tar implementations that insert a + * leading NUL this method returns 0 if it detects a leading NUL.

+ * + * @param buffer The buffer from which to parse. + * @param offset The offset into the buffer from which to parse. + * @param length The maximum number of bytes to parse - must be at least 2 bytes. + * @return The long value of the octal string. + * @throws IllegalArgumentException if the trailing space/NUL is missing or if a invalid byte is detected. + */ + private long parseOctal(final byte[] buffer, final int offset, final int length) { + long result = 0; + int end = offset + length; + int start = offset; + if (length < 2) { + throw new IllegalArgumentException("Length " + length + " must be at least 2"); + } + if (buffer[start] == 0) { + return 0L; + } + while (start < end) { + if (buffer[start] == ' ') { + start++; + } else { + break; + } + } + byte trailer; + trailer = buffer[end - 1]; + if (trailer == 0 || trailer == ' ') { + end--; + } else { + throw new IllegalArgumentException(exceptionMessage(buffer, offset, length, end - 1, trailer)); + } + trailer = buffer[end - 1]; + if (trailer == 0 || trailer == ' ') { + end--; + } + for (; start < end; start++) { + final byte currentByte = buffer[start]; + if (currentByte < '0' || currentByte > '7') { + throw new IllegalArgumentException( + exceptionMessage(buffer, offset, length, start, currentByte)); + } + result = (result << 3) + (currentByte - '0'); // convert from ASCII + } + + return result; + } + + /** + * Compute the value contained in a byte buffer. If the most + * significant bit of the first byte in the buffer is set, this + * bit is ignored and the rest of the buffer is interpreted as a + * binary number. Otherwise, the buffer is interpreted as an + * octal number as per the parseOctal function above. + * + * @param buffer The buffer from which to parse. + * @param offset The offset into the buffer from which to parse. + * @param length The maximum number of bytes to parse. + * @return The long value of the octal or binary string. + * @throws IllegalArgumentException if the trailing space/NUL is + * missing or an invalid byte is detected in an octal number, or + * if a binary number would exceed the size of a signed long + * 64-bit integer. + */ + private long parseOctalOrBinary(final byte[] buffer, final int offset, final int length) { + if ((buffer[offset] & 0x80) == 0) { + return parseOctal(buffer, offset, length); + } + final boolean negative = buffer[offset] == (byte) 0xff; + if (length < 9) { + return parseBinaryLong(buffer, offset, length, negative); + } + return parseBinaryBigInteger(buffer, offset, length, negative); + } + + private long parseBinaryLong(final byte[] buffer, final int offset, final int length, final boolean negative) { + if (length >= 9) { + throw new IllegalArgumentException("At offset " + offset + ", " + + length + " byte binary number" + + " exceeds maximum signed long" + + " value"); + } + long val = 0; + for (int i = 1; i < length; i++) { + val = (val << 8) + (buffer[offset + i] & 0xff); + } + if (negative) { + // 2's complement + val--; + val ^= ((long) Math.pow(2, (length - 1) * 8) - 1); + } + return negative ? -val : val; + } + + private long parseBinaryBigInteger(final byte[] buffer, final int offset, final int length, final boolean negative) { + byte[] remainder = new byte[length - 1]; + System.arraycopy(buffer, offset + 1, remainder, 0, length - 1); + BigInteger val = new BigInteger(remainder); + if (negative) { + // 2's complement + val = val.add(BigInteger.valueOf(-1)).not(); + } + if (val.bitLength() > 63) { + throw new IllegalArgumentException("At offset " + offset + ", " + + length + " byte binary number" + + " exceeds maximum signed long" + + " value"); + } + return negative ? -val.longValue() : val.longValue(); + } + + /** + * Parse a boolean byte from a buffer. + * Leading spaces and NUL are ignored. + * The buffer may contain trailing spaces or NULs. + * + * @param buffer The buffer from which to parse. + * @param offset The offset into the buffer from which to parse. + * @return The boolean value of the bytes. + * @throws IllegalArgumentException if an invalid byte is detected. + */ + private boolean parseBoolean(final byte[] buffer, final int offset) { + return buffer[offset] == 1; + } + + private String exceptionMessage(byte[] buffer, final int offset, final int length, int current, final byte currentByte) { + String string = new String(buffer, offset, length); // TODO default charset? + string = string.replaceAll("\0", "{NUL}"); // Replace NULs to allow string to be printed + return "Invalid byte " + currentByte + " at offset " + (current - offset) + " in '" + string + "' len=" + length; + } + + /** + * Parse an entry name from a buffer. + * Parsing stops when a NUL is found + * or the buffer length is reached. + * + * @param buffer The buffer from which to parse. + * @param offset The offset into the buffer from which to parse. + * @param length The maximum number of bytes to parse. + * @return The entry name. + */ + private String parseName(byte[] buffer, final int offset, final int length) { + try { + return parseName(buffer, offset, length, ArchiveUtils.DEFAULT_ENCODING); + } catch (IOException ex) { + try { + return parseName(buffer, offset, length, ArchiveUtils.FALLBACK_ENCODING); + } catch (IOException ex2) { + // impossible + throw new RuntimeException(ex2); + } + } + } + + /** + * Parse an entry name from a buffer. + * Parsing stops when a NUL is found + * or the buffer length is reached. + * + * @param buffer The buffer from which to parse. + * @param offset The offset into the buffer from which to parse. + * @param length The maximum number of bytes to parse. + * @param encoding name of the encoding to use for file names + * @return The entry name. + */ + private String parseName(byte[] buffer, final int offset, final int length, final ArchiveEntryEncoding encoding) throws IOException { + int len = length; + for (; len > 0; len--) { + if (buffer[offset + len - 1] != 0) { + break; + } + } + if (len > 0) { + byte[] b = new byte[len]; + System.arraycopy(buffer, offset, b, 0, len); + return encoding.decode(b); + } + return ""; + } + + private long getSize(byte[] header, int offset, int length) { + long test = parseOctal(header, offset, length); + if (test <= 0 && header[offset] == (byte) 128) { + byte[] last = new byte[length]; + System.arraycopy(header, offset, last, 0, length); + last[0] = (byte) 0; + long rSize = new BigInteger(last).longValue(); + last = null; + return rSize; + } + return test; + } + + private String parseFileName(byte[] header) { + StringBuilder result = new StringBuilder(256); + // If header[345] is not equal to zero, then it is the "prefix" + // that 'ustar' defines. It must be prepended to the "normal" + // name field. We are responsible for the separating '/'. + if (header[345] != 0) { + for (int i = 345; i < 500 && header[i] != 0; ++i) { + result.append((char) header[i]); + } + result.append("/"); + } + for (int i = 0; i < 100 && header[i] != 0; ++i) { + result.append((char) header[i]); + } + return result.toString(); + } +} + diff --git a/io-archive-tar/src/main/java/org/xbib/io/archive/tar/TarArchiveInputStream.java b/io-archive-tar/src/main/java/org/xbib/io/archive/tar/TarArchiveInputStream.java new file mode 100644 index 0000000..eb32d6e --- /dev/null +++ b/io-archive-tar/src/main/java/org/xbib/io/archive/tar/TarArchiveInputStream.java @@ -0,0 +1,434 @@ +package org.xbib.io.archive.tar; + +import org.xbib.io.archive.stream.ArchiveInputStream; +import org.xbib.io.archive.entry.ArchiveEntryEncoding; +import org.xbib.io.archive.entry.ArchiveEntryEncodingHelper; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.Charset; +import java.util.Arrays; +import java.util.Date; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; + +public class TarArchiveInputStream extends ArchiveInputStream implements TarConstants { + + private final ArchiveEntryEncoding encoding; + + private final InputStream inStream; + + private final int blockSize; + + private final int recordSize; + + private final int recsPerBlock; + + private final byte[] blockBuffer; + + private byte[] readBuf; + + private boolean hasHitEOF; + + private long entrySize; + + private long entryOffset; + + private TarArchiveEntry entry; + + private int currRecIdx; + + /** + * Constructor for TarInputStream. + * + * @param is the input stream to use + */ + public TarArchiveInputStream(InputStream is) { + this.encoding = ArchiveEntryEncodingHelper.getEncoding(null); + this.readBuf = null; + this.hasHitEOF = false; + this.inStream = is; + this.blockSize = DEFAULT_BLOCK_SIZE; + this.recordSize = DEFAULT_RECORD_SIZE; + this.recsPerBlock = this.blockSize / this.recordSize; + this.blockBuffer = new byte[this.blockSize]; + this.currRecIdx = this.recsPerBlock; + } + + /** + * Closes this stream + * + * @throws IOException on error + */ + @Override + public void close() throws IOException { + if (inStream != null) { + if (inStream != System.in) { + inStream.close(); + } + } + } + + /** + * Get the record size + * + * @return the record size. + */ + public int getRecordSize() { + return recordSize; + } + + /** + * Get the available data that can be read from the current + * entry in the archive. This does not indicate how much data + * is left in the entire archive, only in the current entry. + * This value is determined from the entry's size header field + * and the amount of data already read from the current entry. + * Integer.MAX_VALUE is returen in case more than Integer.MAX_VALUE + * bytes are left in the current entry in the archive. + * + * @return The number of available bytes for the current entry. + * @throws IOException + */ + @Override + public int available() throws IOException { + if (entrySize - entryOffset > Integer.MAX_VALUE) { + return Integer.MAX_VALUE; + } + return (int) (entrySize - entryOffset); + } + + /** + * Skip bytes in the input buffer. This skips bytes in the + * current entry's data, not the entire archive, and will + * stop at the end of the current entry's data if the number + * to skip extends beyond that point. + * + * @param numToSkip The number of bytes to skip. + * @return the number actually skipped + * @throws IOException on error + */ + @Override + public long skip(long numToSkip) throws IOException { + // REVIEW + // This is horribly inefficient, but it ensures that we + // properly skip over bytes + // + byte[] skipBuf = new byte[1024]; + long skip = numToSkip; + while (skip > 0) { + int realSkip = (int) (skip > skipBuf.length ? skipBuf.length : skip); + int numRead = read(skipBuf, 0, realSkip); + if (numRead == -1) { + break; + } + skip -= numRead; + } + return (numToSkip - skip); + } + + /** + * Since we do not support marking just yet, we do nothing. + */ + @Override + public void reset() { + } + + /** + * Get the next entry in this tar archive. This will skip + * over any remaining data in the current entry, if there + * is one, and place the input stream at the header of the + * next entry, and read the header and instantiate a new + * TarEntry from the header bytes and return that entry. + * If there are no more entries in the archive, null will + * be returned to indicate that the end of the archive has + * been reached. + * + * @return The next TarEntry in the archive, or null. + * @throws IOException on error + */ + public synchronized TarArchiveEntry getNextTarEntry() throws IOException { + if (hasHitEOF) { + return null; + } + if (entry != null) { + long numToSkip = entrySize - entryOffset; + while (numToSkip > 0) { + long skipped = skip(numToSkip); + if (skipped <= 0) { + throw new RuntimeException("failed to skip current tar entry"); + } + numToSkip -= skipped; + } + readBuf = null; + } + byte[] headerBuf = getRecord(); + if (hasHitEOF) { + entry = null; + return null; + } + try { + this.entry = new TarArchiveEntry(headerBuf, encoding); + this.entryOffset = 0; + this.entrySize = this.entry.getEntrySize(); + } catch (IllegalArgumentException e) { + throw new IOException("error detected parsing the header", e); + } + if (entry.isGNULongNameEntry()) { + StringBuilder longName = new StringBuilder(); + byte[] buf = new byte[SMALL_BUFFER_SIZE]; + int length; + while ((length = read(buf)) >= 0) { + longName.append(new String(buf, 0, length)); + } + getNextEntry(); + if (entry == null) { + return null; + } + if (longName.length() > 0 && longName.charAt(longName.length() - 1) == 0) { + longName.deleteCharAt(longName.length() - 1); + } + entry.setName(longName.toString()); + } + if (entry.isPaxHeader()) { + paxHeaders(); + } + return entry; + } + + /** + * Get the next record in this tar archive. This will skip + * over any remaining data in the current entry, if there + * is one, and place the input stream at the header of the + * next entry. + * If there are no more entries in the archive, null will + * be returned to indicate that the end of the archive has + * been reached. + * + * @return The next header in the archive, or null. + * @throws IOException on error + */ + private byte[] getRecord() throws IOException { + if (hasHitEOF) { + return null; + } + byte[] headerBuf = readRecord(); + if (headerBuf == null) { + hasHitEOF = true; + } else if (isEOFRecord(headerBuf)) { + hasHitEOF = true; + } + return hasHitEOF ? null : headerBuf; + } + + /** + * Read a record from the input stream and return the data. + * + * @return The record data. + * @throws IOException on error + */ + private byte[] readRecord() throws IOException { + if (currRecIdx >= recsPerBlock && !readBlock()) { + return null; + } + byte[] result = new byte[recordSize]; + System.arraycopy(blockBuffer, (currRecIdx * recordSize), result, 0, recordSize); + currRecIdx++; + return result; + } + + private boolean readBlock() throws IOException { + currRecIdx = 0; + int offset = 0; + int bytesNeeded = blockSize; + while (bytesNeeded > 0) { + long numBytes = inStream.read(blockBuffer, offset, bytesNeeded); + if (numBytes == -1) { + if (offset == 0) { + return false; + } + Arrays.fill(blockBuffer, offset, offset + bytesNeeded, (byte) 0); + break; + } + offset += numBytes; + bytesNeeded -= numBytes; + } + return true; + } + + /** + * Determine if an archive record indicate End of Archive. End of + * archive is indicated by a record that consists entirely of null bytes. + * + * @param record The record data to check. + * @return true if the record data is an End of Archive + */ + private boolean isEOFRecord(byte[] record) { + for (int i = 0, sz = getRecordSize(); i < sz; ++i) { + if (record[i] != 0) { + return false; + } + } + return true; + } + + + private void paxHeaders() throws IOException { + Map headers = parsePaxHeaders(this); + getNextEntry(); // Get the actual file entry + applyPaxHeadersToCurrentEntry(headers); + } + + private Map parsePaxHeaders(InputStream i) throws IOException { + Map headers = new HashMap(); + // Format is "length keyword=value\n"; + while (true) { // get length + int ch; + int len = 0; + int read = 0; + while ((ch = i.read()) != -1) { + read++; + if (ch == ' ') { // End of length string + // Get keyword + ByteArrayOutputStream coll = new ByteArrayOutputStream(); + while ((ch = i.read()) != -1) { + read++; + if (ch == '=') { // end of keyword + String keyword = coll.toString("UTF-8"); + // Get rest of entry + byte[] rest = new byte[len - read]; + int got = i.read(rest); + if (got != len - read) { + throw new IOException("Failed to read " + + "Paxheader. Expected " + + (len - read) + + " bytes, read " + + got); + } + // Drop trailing NL + String value = new String(rest, 0, + len - read - 1, Charset.forName("UTF-8")); + headers.put(keyword, value); + break; + } + coll.write((byte) ch); + } + break; // Processed single header + } + len *= 10; + len += ch - '0'; + } + if (ch == -1) { // EOF + break; + } + } + return headers; + } + + private void applyPaxHeadersToCurrentEntry(Map headers) { + /* + * The following headers are defined for Pax. + * atime, ctime, charset: cannot use these without changing TarArchiveEntry fields + * mtime + * comment + * gid, gname + * linkpath + * size + * uid,uname + * SCHILY.devminor, SCHILY.devmajor: don't have setters/getters for those + */ + for (Entry ent : headers.entrySet()) { + String key = ent.getKey(); + String val = ent.getValue(); + if ("path".equals(key)) { + entry.setName(val); + } else if ("linkpath".equals(key)) { + entry.setLinkName(val); + } else if ("gid".equals(key)) { + entry.setGroupId(Integer.parseInt(val)); + } else if ("gname".equals(key)) { + entry.setGroupName(val); + } else if ("uid".equals(key)) { + entry.setUserId(Integer.parseInt(val)); + } else if ("uname".equals(key)) { + entry.setUserName(val); + } else if ("size".equals(key)) { + entry.setEntrySize(Long.parseLong(val)); + } else if ("mtime".equals(key)) { + long mtime = (long) (Double.parseDouble(val) * 1000); + entry.setLastModified(new Date(mtime)); + } else if ("SCHILY.devminor".equals(key)) { + entry.setDevMinor(Integer.parseInt(val)); + } else if ("SCHILY.devmajor".equals(key)) { + entry.setDevMajor(Integer.parseInt(val)); + } + } + } + + @Override + public TarArchiveEntry getNextEntry() throws IOException { + return getNextTarEntry(); + } + + /** + * Reads bytes from the current tar archive entry. + * This method is aware of the boundaries of the current + * entry in the archive and will deal with them as if they + * were this stream's start and EOF. + * + * @param buf The buffer into which to place bytes read. + * @param offset The offset at which to place bytes read. + * @param numToRead The number of bytes to read. + * @return The number of bytes read, or -1 at EOF + * @throws IOException on error + */ + @Override + public int read(byte[] buf, int offset, int numToRead) throws IOException { + int totalRead = 0; + if (entryOffset >= entrySize) { + return -1; + } + if ((numToRead + entryOffset) > entrySize) { + numToRead = (int) (entrySize - entryOffset); + } + if (readBuf != null) { + int sz = (numToRead > readBuf.length) ? readBuf.length : numToRead; + System.arraycopy(readBuf, 0, buf, offset, sz); + if (sz >= readBuf.length) { + readBuf = null; + } else { + int newLen = readBuf.length - sz; + byte[] newBuf = new byte[newLen]; + System.arraycopy(readBuf, sz, newBuf, 0, newLen); + readBuf = newBuf; + } + totalRead += sz; + numToRead -= sz; + offset += sz; + } + while (numToRead > 0) { + byte[] rec = readRecord(); + if (rec == null) { + throw new IOException("unexpected EOF with " + numToRead + " bytes unread"); + } + int sz = numToRead; + int recLen = rec.length; + if (recLen > sz) { + System.arraycopy(rec, 0, buf, offset, sz); + readBuf = new byte[recLen - sz]; + System.arraycopy(rec, sz, readBuf, 0, recLen - sz); + } else { + sz = recLen; + System.arraycopy(rec, 0, buf, offset, recLen); + } + totalRead += sz; + numToRead -= sz; + offset += sz; + } + entryOffset += totalRead; + return totalRead; + } + +} diff --git a/io-archive-tar/src/main/java/org/xbib/io/archive/tar/TarArchiveOutputEntry.java b/io-archive-tar/src/main/java/org/xbib/io/archive/tar/TarArchiveOutputEntry.java new file mode 100644 index 0000000..6e61642 --- /dev/null +++ b/io-archive-tar/src/main/java/org/xbib/io/archive/tar/TarArchiveOutputEntry.java @@ -0,0 +1,859 @@ +package org.xbib.io.archive.tar; + +import org.xbib.io.archive.entry.ArchiveEntry; +import org.xbib.io.archive.entry.ArchiveEntryEncoding; +import org.xbib.io.archive.util.ArchiveUtils; + +import java.io.File; +import java.io.IOException; +import java.math.BigInteger; +import java.util.Date; + +/** + * This class represents an entry in a Tar archive for output + */ +public class TarArchiveOutputEntry implements TarConstants, ArchiveEntry { + + private static final int BYTE_MASK = 255; + + /** + * Maximum length of a user's name in the tar file + */ + public static final int MAX_NAMELEN = 31; + + /** + * Default permissions bits for directories + */ + public static final int DEFAULT_DIR_MODE = 040755; + + /** + * Default permissions bits for files + */ + public static final int DEFAULT_FILE_MODE = 0100644; + + /** + * Convert millis to seconds + */ + public static final int MILLIS_PER_SECOND = 1000; + + /** + * The entry's name. + */ + private String name; + + /** + * The entry's permission mode. + */ + private int mode; + + /** + * The entry's user id. + */ + private int userId; + + /** + * The entry's group id. + */ + private int groupId; + + /** + * The entry's size. + */ + private long size; + + /** + * The entry's modification time. + */ + private long modTime; + + /** + * The entry's link flag. + */ + private byte linkFlag; + + /** + * The entry's link name. + */ + private String linkName; + + /** + * The entry's magic tag. + */ + private String magic; + /** + * The version of the format + */ + private String version; + + /** + * The entry's user name. + */ + private String userName; + + /** + * The entry's group name. + */ + private String groupName; + + /** + * The entry's major device number. + */ + private int devMajor; + + /** + * The entry's minor device number. + */ + private int devMinor; + + /** + * If an extension sparse header follows. + */ + private boolean isExtended; + + /** + * The entry's real size in case of a sparse file. + */ + private long realSize; + + /** + * The entry's file reference + */ + private File file; + + /** + * Construct an empty entry and prepares the header values. + */ + public TarArchiveOutputEntry() { + this.magic = MAGIC_POSIX; + this.version = VERSION_POSIX; + this.name = ""; + this.linkName = ""; + String user = System.getProperty("user.name", ""); + if (user.length() > MAX_NAMELEN) { + user = user.substring(0, MAX_NAMELEN); + } + this.userId = 0; + this.groupId = 0; + this.userName = user; + this.groupName = ""; + this.file = null; + this.mode = DEFAULT_FILE_MODE; + } + + /** + * Construct an entry with only a name. This allows the programmer + * to construct the entry's header "by hand". File is set to null. + * + * @param name the entry name + */ + public TarArchiveOutputEntry(String name) { + this(name, false); + } + + /** + * Construct an entry with only a name. This allows the programmer + * to construct the entry's header "by hand". File is set to null. + * + * @param name the entry name + * @param preserveLeadingSlashes whether to allow leading slashes + * in the name. + */ + public TarArchiveOutputEntry(String name, boolean preserveLeadingSlashes) { + this(); + name = ArchiveUtils.normalizeFileName(name, preserveLeadingSlashes); + this.name = name; + boolean isDir = name.endsWith("/"); + this.mode = isDir ? DEFAULT_DIR_MODE : DEFAULT_FILE_MODE; + this.linkFlag = isDir ? LF_DIR : LF_NORMAL; + this.devMajor = 0; + this.devMinor = 0; + this.userId = 0; + this.groupId = 0; + this.size = 0; + this.modTime = (new Date()).getTime() / MILLIS_PER_SECOND; + this.linkName = ""; + this.userName = ""; + this.groupName = ""; + } + + /** + * Construct an entry with a name and a link flag. + * + * @param name the entry name + * @param linkFlag the entry link flag. + */ + public TarArchiveOutputEntry(String name, byte linkFlag) { + this(name); + this.linkFlag = linkFlag; + if (linkFlag == LF_GNUTYPE_LONGNAME) { + magic = MAGIC_GNU; + version = VERSION_GNU_SPACE; + } + } + + /** + * Construct an entry for a file. File is set to file, and the + * header is constructed from information from the file. + * The name is set from the normalized file path. + * + * @param file The file that the entry represents. + */ + public TarArchiveOutputEntry(File file) { + this(file, ArchiveUtils.normalizeFileName(file.getPath(), false)); + } + + /** + * Construct an entry for a file. File is set to file, and the + * header is constructed from information from the file. + * + * @param file The file that the entry represents. + * @param fileName the name to be used for the entry. + */ + public TarArchiveOutputEntry(File file, String fileName) { + this(); + this.file = file; + this.linkName = ""; + if (file.isDirectory()) { + this.mode = DEFAULT_DIR_MODE; + this.linkFlag = LF_DIR; + + int nameLength = fileName.length(); + if (nameLength == 0 || fileName.charAt(nameLength - 1) != '/') { + this.name = fileName + "/"; + } else { + this.name = fileName; + } + this.size = 0; + } else { + this.mode = DEFAULT_FILE_MODE; + this.linkFlag = LF_NORMAL; + this.size = file.length(); + this.name = fileName; + } + this.modTime = file.lastModified() / MILLIS_PER_SECOND; + this.devMajor = 0; + this.devMinor = 0; + } + + /** + * Determine if the two entries are equal. Equality is determined + * by the header names being equal. + * + * @param it Entry to be checked for equality. + * @return True if the entries are equal. + */ + public boolean equals(TarArchiveOutputEntry it) { + return getName().equals(it.getName()); + } + + /** + * Determine if the two entries are equal. Equality is determined + * by the header names being equal. + * + * @param it Entry to be checked for equality. + * @return True if the entries are equal. + */ + @Override + public boolean equals(Object it) { + return !(it == null || getClass() != it.getClass()) && equals((TarArchiveOutputEntry) it); + } + + /** + * Hashcodes are based on entry names. + * + * @return the entry hashcode + */ + @Override + public int hashCode() { + return getName().hashCode(); + } + + /** + * Determine if the given entry is a descendant of this entry. + * Descendancy is determined by the name of the descendant + * starting with this entry's name. + * + * @param desc Entry to be checked as a descendent of this. + * @return True if entry is a descendant of this. + */ + public boolean isDescendent(TarArchiveOutputEntry desc) { + return desc.getName().startsWith(getName()); + } + + /** + * Get this entry's name. + * + * @return This entry's name. + */ + public String getName() { + return name; + } + + /** + * Set this entry's name. + * + * @param name This entry's new name. + */ + public TarArchiveOutputEntry setName(String name) { + this.name = ArchiveUtils.normalizeFileName(name, false); + boolean isDir = name.endsWith("/"); + this.mode = isDir ? DEFAULT_DIR_MODE : DEFAULT_FILE_MODE; + this.linkFlag = isDir ? LF_DIR : LF_NORMAL; + return this; + } + + /** + * Set the mode for this entry + * + * @param mode the mode for this entry + */ + public void setMode(int mode) { + this.mode = mode; + } + + /** + * Get this entry's link name. + * + * @return This entry's link name. + */ + public String getLinkName() { + return linkName; + } + + /** + * Set this entry's link name. + * + * @param link the link name to use. + */ + public void setLinkName(String link) { + this.linkName = link; + } + + /** + * Get this entry's user id. + * + * @return This entry's user id. + */ + public int getUserId() { + return userId; + } + + /** + * Set this entry's user id. + * + * @param userId This entry's new user id. + */ + public void setUserId(int userId) { + this.userId = userId; + } + + /** + * Get this entry's group id. + * + * @return This entry's group id. + */ + public int getGroupId() { + return groupId; + } + + /** + * Set this entry's group id. + * + * @param groupId This entry's new group id. + */ + public void setGroupId(int groupId) { + this.groupId = groupId; + } + + /** + * Get this entry's user name. + * + * @return This entry's user name. + */ + public String getUserName() { + return userName; + } + + /** + * Set this entry's user name. + * + * @param userName This entry's new user name. + */ + public void setUserName(String userName) { + this.userName = userName; + } + + /** + * Get this entry's group name. + * + * @return This entry's group name. + */ + public String getGroupName() { + return groupName; + } + + /** + * Set this entry's group name. + * + * @param groupName This entry's new group name. + */ + public void setGroupName(String groupName) { + this.groupName = groupName; + } + + /** + * Convenience method to set this entry's group and user ids. + * + * @param userId This entry's new user id. + * @param groupId This entry's new group id. + */ + public void setIds(int userId, int groupId) { + setUserId(userId); + setGroupId(groupId); + } + + /** + * Convenience method to set this entry's group and user names. + * + * @param userName This entry's new user name. + * @param groupName This entry's new group name. + */ + public void setNames(String userName, String groupName) { + setUserName(userName); + setGroupName(groupName); + } + + /** + * Set this entry's modification time. The parameter passed + * to this method is in "Java time". + * + * @param date This entry's new modification time. + */ + public TarArchiveOutputEntry setLastModified(Date date) { + modTime = date.getTime() / MILLIS_PER_SECOND; + return this; + } + + public Date getLastModified() { + return new Date(modTime * MILLIS_PER_SECOND); + } + + /** + * Get this entry's file. + * + * @return This entry's file. + */ + public File getFile() { + return file; + } + + /** + * Get this entry's mode. + * + * @return This entry's mode. + */ + public int getMode() { + return mode; + } + + /** + * Get this entry's file size. + * + * @return This entry's file size. + */ + public long getEntrySize() { + return size; + } + + /** + * Set this entry's file size. + * + * @param size This entry's new file size. + * @throws IllegalArgumentException if the size is < 0. + */ + public TarArchiveOutputEntry setEntrySize(long size) { + if (size < 0) { + throw new IllegalArgumentException("Size is out of range: " + size); + } + this.size = size; + return this; + } + + /** + * Get this entry's major device number. + * + * @return This entry's major device number. + */ + public int getDevMajor() { + return devMajor; + } + + /** + * Set this entry's major device number. + * + * @param devNo This entry's major device number. + * @throws IllegalArgumentException if the devNo is < 0. + */ + public void setDevMajor(int devNo) { + if (devNo < 0) { + throw new IllegalArgumentException("Major device number is out of " + "range: " + devNo); + } + this.devMajor = devNo; + } + + /** + * Get this entry's minor device number. + * + * @return This entry's minor device number. + */ + public int getDevMinor() { + return devMinor; + } + + /** + * Set this entry's minor device number. + * + * @param devNo This entry's minor device number. + * @throws IllegalArgumentException if the devNo is < 0. + */ + public void setDevMinor(int devNo) { + if (devNo < 0) { + throw new IllegalArgumentException("Minor device number is out of " + + "range: " + devNo); + } + this.devMinor = devNo; + } + + /** + * Indicates in case of a sparse file if an extension sparse header + * follows. + * + * @return true if an extension sparse header follows. + */ + public boolean isExtended() { + return isExtended; + } + + /** + * Get this entry's real file size in case of a sparse file. + * + * @return This entry's real file size. + */ + public long getRealSize() { + return realSize; + } + + /** + * Indicate if this entry is a GNU sparse block + * + * @return true if this is a sparse extension provided by GNU tar + */ + public boolean isGNUSparse() { + return linkFlag == LF_GNUTYPE_SPARSE; + } + + /** + * Indicate if this entry is a GNU long name block + * + * @return true if this is a long name extension provided by GNU tar + */ + public boolean isGNULongNameEntry() { + return linkFlag == LF_GNUTYPE_LONGNAME + && name.equals(GNU_LONGLINK); + } + + /** + * Check if this is a Pax header. + * + * @return {@code true} if this is a Pax header. + */ + public boolean isPaxHeader() { + return linkFlag == LF_PAX_EXTENDED_HEADER_LC + || linkFlag == LF_PAX_EXTENDED_HEADER_UC; + } + + /** + * Check if this is a Pax header. + * + * @return {@code true} if this is a Pax header. + */ + public boolean isGlobalPaxHeader() { + return linkFlag == LF_PAX_GLOBAL_EXTENDED_HEADER; + } + + /** + * Return whether or not this entry represents a directory. + * + * @return True if this entry is a directory. + */ + public boolean isDirectory() { + if (file != null) { + return file.isDirectory(); + } + return linkFlag == LF_DIR || getName().endsWith("/"); + } + + /** + * Check if this is a "normal file" + */ + public boolean isFile() { + if (file != null) { + return file.isFile(); + } + return linkFlag == LF_OLDNORM || linkFlag == LF_NORMAL || !getName().endsWith("/"); + } + + /** + * Check if this is a symbolic link entry. + */ + public boolean isSymbolicLink() { + return linkFlag == LF_SYMLINK; + } + + /** + * Check if this is a link entry. + */ + public boolean isLink() { + return linkFlag == LF_LINK; + } + + /** + * Check if this is a character device entry. + */ + public boolean isCharacterDevice() { + return linkFlag == LF_CHR; + } + + /** + * Check if this is a block device entry. + */ + public boolean isBlockDevice() { + return linkFlag == LF_BLK; + } + + /** + * Check if this is a FIFO (pipe) entry. + */ + public boolean isFIFO() { + return linkFlag == LF_FIFO; + } + + /** + * If this entry represents a file, and the file is a directory, return + * an array of TarEntries for this entry's children. + * + * @return An array of TarEntry's for this entry's children. + */ + public TarArchiveOutputEntry[] getDirectoryEntries() { + if (file == null || !file.isDirectory()) { + return new TarArchiveOutputEntry[0]; + } + String[] list = file.list(); + TarArchiveOutputEntry[] result = new TarArchiveOutputEntry[list.length]; + for (int i = 0; i < list.length; ++i) { + result[i] = new TarArchiveOutputEntry(new File(file, list[i])); + } + return result; + } + + /** + * Write an entry's header information to a header buffer. + * + * @param outbuf The tar entry header buffer to fill in. + * @param encoding encoding to use when writing the file name. + * @param starMode whether to use the star/GNU tar/BSD tar + * extension for numeric fields if their value doesn't fit in the + * maximum size of standard tar archives + */ + public void writeEntryHeader(byte[] outbuf, ArchiveEntryEncoding encoding, boolean starMode) throws IOException { + int offset = 0; + offset = ArchiveUtils.formatNameBytes(name, outbuf, offset, NAMELEN, encoding); + offset = writeEntryHeaderField(mode, outbuf, offset, MODELEN, starMode); + offset = writeEntryHeaderField(userId, outbuf, offset, UIDLEN, starMode); + offset = writeEntryHeaderField(groupId, outbuf, offset, GIDLEN, starMode); + offset = writeEntryHeaderField(size, outbuf, offset, SIZELEN, starMode); + offset = writeEntryHeaderField(modTime, outbuf, offset, MODTIMELEN, starMode); + int csOffset = offset; + for (int c = 0; c < CHKSUMLEN; ++c) { + outbuf[offset++] = (byte) ' '; + } + outbuf[offset++] = linkFlag; + offset = ArchiveUtils.formatNameBytes(linkName, outbuf, offset, NAMELEN, encoding); + offset = ArchiveUtils.formatNameBytes(magic, outbuf, offset, MAGICLEN); + offset = ArchiveUtils.formatNameBytes(version, outbuf, offset, VERSIONLEN); + offset = ArchiveUtils.formatNameBytes(userName, outbuf, offset, UNAMELEN, encoding); + offset = ArchiveUtils.formatNameBytes(groupName, outbuf, offset, GNAMELEN, encoding); + offset = writeEntryHeaderField(devMajor, outbuf, offset, DEVLEN, starMode); + offset = writeEntryHeaderField(devMinor, outbuf, offset, DEVLEN, starMode); + while (offset < outbuf.length) { + outbuf[offset++] = 0; + } + long chk = computeCheckSum(outbuf); + formatCheckSumOctalBytes(chk, outbuf, csOffset, CHKSUMLEN); + } + + private int writeEntryHeaderField(long value, byte[] outbuf, int offset, int length, boolean starMode) { + if (!starMode && (value < 0 + || value >= (1l << (3 * (length - 1))))) { + // value doesn't fit into field when written as octal + // number, will be written to PAX header or causes an + // error + return formatLongOctalBytes(0, outbuf, offset, length); + } + return formatLongOctalOrBinaryBytes(value, outbuf, offset, length); + } + + /** + * Fill buffer with unsigned octal number, padded with leading zeroes. + * + * @param value number to convert to octal - treated as unsigned + * @param buffer destination buffer + * @param offset starting offset in buffer + * @param length length of buffer to fill + * @throws IllegalArgumentException if the value will not fit in the buffer + */ + private void formatUnsignedOctalString(final long value, byte[] buffer, final int offset, final int length) { + int remaining = length; + remaining--; + if (value == 0) { + buffer[offset + remaining--] = (byte) '0'; + } else { + long val = value; + for (; remaining >= 0 && val != 0; --remaining) { + buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7)); + val = val >>> 3; + } + if (val != 0) { + throw new IllegalArgumentException(value + "=" + Long.toOctalString(value) + " will not fit in octal number buffer of length " + length); + } + } + + for (; remaining >= 0; --remaining) { // leading zeros + buffer[offset + remaining] = (byte) '0'; + } + } + + /** + * Write an octal long integer into a buffer. + *

+ * Uses {@link #formatUnsignedOctalString} to format + * the value as an octal string with leading zeros. + * The converted number is followed by a space. + * + * @param value The value to write as octal + * @param buf The destinationbuffer. + * @param offset The starting offset into the buffer. + * @param length The length of the buffer + * @return The updated offset + * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer + */ + private int formatLongOctalBytes(final long value, byte[] buf, final int offset, final int length) { + int idx = length - 1; // For space + formatUnsignedOctalString(value, buf, offset, idx); + buf[offset + idx] = (byte) ' '; // Trailing space + return offset + length; + } + + /** + * Write an long integer into a buffer as an octal string if this + * will fit, or as a binary number otherwise. + *

+ * Uses {@link #formatUnsignedOctalString} to format + * the value as an octal string with leading zeros. + * The converted number is followed by a space. + * + * @param value The value to write into the buffer. + * @param buf The destination buffer. + * @param offset The starting offset into the buffer. + * @param length The length of the buffer. + * @return The updated offset. + * @throws IllegalArgumentException if the value (and trailer) + * will not fit in the buffer. + */ + private int formatLongOctalOrBinaryBytes(final long value, byte[] buf, final int offset, final int length) { + // Check whether we are dealing with UID/GID or SIZE field + final long maxAsOctalChar = length == UIDLEN ? MAXID : MAXSIZE; + final boolean negative = value < 0; + if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars + return formatLongOctalBytes(value, buf, offset, length); + } + if (length < 9) { + formatLongBinary(value, buf, offset, length, negative); + } + formatBigIntegerBinary(value, buf, offset, length, negative); + buf[offset] = (byte) (negative ? 0xff : 0x80); + return offset + length; + } + + private void formatLongBinary(final long value, byte[] buf, final int offset, final int length, final boolean negative) { + final int bits = (length - 1) * 8; + final long max = 1l << bits; + long val = Math.abs(value); + if (val >= max) { + throw new IllegalArgumentException("Value " + value + + " is too large for " + length + " byte field."); + } + if (negative) { + val ^= max - 1; + val |= 0xff << bits; + val++; + } + for (int i = offset + length - 1; i >= offset; i--) { + buf[i] = (byte) val; + val >>= 8; + } + } + + private void formatBigIntegerBinary(final long value, byte[] buf, + final int offset, + final int length, + final boolean negative) { + BigInteger val = BigInteger.valueOf(value); + final byte[] b = val.toByteArray(); + final int len = b.length; + final int off = offset + length - len; + System.arraycopy(b, 0, buf, off, len); + final byte fill = (byte) (negative ? 0xff : 0); + for (int i = offset + 1; i < off; i++) { + buf[i] = fill; + } + } + + /** + * Writes an octal value into a buffer. + *

+ * Uses {@link #formatUnsignedOctalString} to format + * the value as an octal string with leading zeros. + * The converted number is followed by NUL and then space. + * + * @param value The value to convert + * @param buf The destination buffer + * @param offset The starting offset into the buffer. + * @param length The size of the buffer. + * @return The updated value of offset, i.e. offset+length + * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer + */ + private int formatCheckSumOctalBytes(final long value, byte[] buf, final int offset, final int length) { + int idx = length - 2; + formatUnsignedOctalString(value, buf, offset, idx); + buf[offset + idx++] = 0; + buf[offset + idx] = (byte) ' '; + return offset + length; + } + + /** + * Compute the checksum of a tar entry header. + * + * @param buf The tar entry's header buffer. + * @return The computed checksum. + */ + private long computeCheckSum(final byte[] buf) { + long sum = 0; + for (byte aBuf : buf) { + sum += BYTE_MASK & aBuf; + } + return sum; + } + +} + diff --git a/io-archive-tar/src/main/java/org/xbib/io/archive/tar/TarArchiveOutputStream.java b/io-archive-tar/src/main/java/org/xbib/io/archive/tar/TarArchiveOutputStream.java new file mode 100644 index 0000000..ce528fa --- /dev/null +++ b/io-archive-tar/src/main/java/org/xbib/io/archive/tar/TarArchiveOutputStream.java @@ -0,0 +1,573 @@ +package org.xbib.io.archive.tar; + +import org.xbib.io.archive.stream.ArchiveOutputStream; +import org.xbib.io.archive.entry.ArchiveEntryEncoding; +import org.xbib.io.archive.entry.ArchiveEntryEncodingHelper; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.StringWriter; +import java.nio.charset.Charset; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +/** + * The TarOutputStream writes a UNIX tar archive as an output stream + */ +public class TarArchiveOutputStream extends ArchiveOutputStream implements TarConstants { + + private static final ArchiveEntryEncoding ASCII = ArchiveEntryEncodingHelper.getEncoding("ASCII"); + + private final ArchiveEntryEncoding encoding; + + private final OutputStream outStream; + + private final int blockSize; + + private final int recordSize; + + private final int recsPerBlock; + + private final byte[] blockBuffer; + + private final byte[] recordBuf; + + private final byte[] assemBuf; + + private int currRecIdx; + + private long currSize; + + private String currName; + + private long currBytes; + + private int assemLen; + + private int longFileMode = LONGFILE_GNU; + + private int bigNumberMode = BIGNUMBER_ERROR; + + private boolean closed = false; + + /** + * Indicates if putArchiveEntry has been called without closeArchiveEntry + */ + private boolean haveUnclosedEntry = false; + + /** + * indicates if this archive is finished + */ + private boolean finished = false; + + private boolean addPaxHeadersForNonAsciiNames = false; + + /** + * Constructor for TarInputStream. + * + * @param os the output stream to use + */ + public TarArchiveOutputStream(OutputStream os) { + this(os, DEFAULT_BLOCK_SIZE, DEFAULT_RECORD_SIZE); + } + + /** + * Constructor for TarInputStream. + * + * @param os the output stream to use + * @param encoding name of the encoding to use for file names + */ + public TarArchiveOutputStream(OutputStream os, String encoding) { + this(os, DEFAULT_BLOCK_SIZE, DEFAULT_RECORD_SIZE, encoding); + } + + /** + * Constructor for TarInputStream. + * + * @param os the output stream to use + * @param blockSize the block size to use + */ + public TarArchiveOutputStream(OutputStream os, int blockSize) { + this(os, blockSize, DEFAULT_RECORD_SIZE); + } + + /** + * Constructor for TarInputStream. + * + * @param os the output stream to use + * @param blockSize the block size to use + * @param encoding name of the encoding to use for file names + */ + public TarArchiveOutputStream(OutputStream os, int blockSize, String encoding) { + this(os, blockSize, DEFAULT_RECORD_SIZE, encoding); + } + + /** + * Constructor for TarInputStream. + * + * @param os the output stream to use + * @param blockSize the block size to use + * @param recordSize the record size to use + */ + public TarArchiveOutputStream(OutputStream os, int blockSize, int recordSize) { + this(os, blockSize, recordSize, null); + } + + /** + * Constructor for TarInputStream. + * + * @param os the output stream to use + * @param blockSize the block size to use + * @param recordSize the record size to use + * @param encoding name of the encoding to use for file names + */ + public TarArchiveOutputStream(OutputStream os, int blockSize, int recordSize, String encoding) { + this.encoding = ArchiveEntryEncodingHelper.getEncoding(encoding); + this.assemLen = 0; + this.assemBuf = new byte[recordSize]; + this.recordBuf = new byte[recordSize]; + this.outStream = os; + this.blockSize = blockSize; + this.recordSize = recordSize; + this.recsPerBlock = (this.blockSize / this.recordSize); + this.blockBuffer = new byte[this.blockSize]; + this.currRecIdx = 0; + } + + /** + * Set the long file mode. + * This can be LONGFILE_ERROR(0), LONGFILE_TRUNCATE(1) or LONGFILE_GNU(2). + * This specifies the treatment of long file names (names >= NAMELEN). + * Default is LONGFILE_GNU. + * + * @param longFileMode the mode to use + */ + public void setLongFileMode(int longFileMode) { + this.longFileMode = longFileMode; + } + + /** + * Set the big number mode. + * This can be BIGNUMBER_ERROR(0), BIGNUMBER_POSIX(1) or BIGNUMBER_STAR(2). + * This specifies the treatment of big files (sizes > MAXSIZE) and other numeric values to big to fit into a traditional tar header. + * Default is BIGNUMBER_ERROR. + * + * @param bigNumberMode the mode to use + */ + public void setBigNumberMode(int bigNumberMode) { + this.bigNumberMode = bigNumberMode; + } + + /** + * Whether to add a PAX extension header for non-ASCII file names. + */ + public void setAddPaxHeadersForNonAsciiNames(boolean b) { + addPaxHeadersForNonAsciiNames = b; + } + + /** + * Ends the TAR archive without closing the underlying OutputStream. + * An archive consists of a series of file entries terminated by an + * end-of-archive entry, which consists of two 512 blocks of zero bytes. + * POSIX.1 requires two EOF records, like some other implementations. + * + * @throws IOException on error + */ + @Override + public void finish() throws IOException { + if (finished) { + throw new IOException("This archive has already been finished"); + } + if (haveUnclosedEntry) { + throw new IOException("This archives contains unclosed entries."); + } + writeEOFRecord(); + writeEOFRecord(); + flushBlock(); + finished = true; + } + + /** + * Closes the underlying OutputStream. + * + * @throws IOException on error + */ + @Override + public void close() throws IOException { + if (!finished) { + finish(); + } + if (!closed) { + if (outStream != null) { + flushBlock(); + if (outStream != System.out && outStream != System.err) { + outStream.close(); + } + } + closed = true; + } + } + + private void flushBlock() throws IOException { + if (outStream == null) { + throw new IOException("writing to an input buffer"); + } + if (currRecIdx > 0) { + writeBlock(); + } + } + + private void writeBlock() throws IOException { + if (outStream == null) { + throw new IOException("writing to an input buffer"); + } + outStream.write(blockBuffer, 0, blockSize); + outStream.flush(); + currRecIdx = 0; + Arrays.fill(blockBuffer, (byte) 0); + } + + + /** + * Get the record size being used by this stream's TarBuffer. + * + * @return The TarBuffer record size. + */ + public int getRecordSize() { + return recordSize; + } + + @Override + public TarArchiveOutputEntry newArchiveEntry() { + return new TarArchiveOutputEntry(); + } + + /** + * Put an entry on the output stream. This writes the entry's + * header record and positions the output stream for writing + * the contents of the entry. Once this method is called, the + * stream is ready for calls to write() to write the entry's + * contents. Once the contents are written, closeArchiveEntry() + * MUST be called to ensure that all buffered data + * is completely written to the output stream. + * + * @param archiveEntry The TarEntry to be written to the archive. + * @throws IOException on error + * @throws ClassCastException if archiveEntry is not an instance of TarArchiveEntry + */ + @Override + public void putArchiveEntry(TarArchiveOutputEntry archiveEntry) throws IOException { + if (finished) { + throw new IOException("Stream has already been finished"); + } + Map paxHeaders = new HashMap(); + final String entryName = archiveEntry.getName(); + final byte[] nameBytes = encoding.encode(entryName).array(); + boolean paxHeaderContainsPath = false; + if (nameBytes.length >= NAMELEN) { + if (longFileMode == LONGFILE_POSIX) { + paxHeaders.put("path", entryName); + paxHeaderContainsPath = true; + } else if (longFileMode == LONGFILE_GNU) { + // create a TarEntry for the LongLink, the contents + // of which are the entry's name + TarArchiveOutputEntry longLinkEntry = new TarArchiveOutputEntry(GNU_LONGLINK, LF_GNUTYPE_LONGNAME); + longLinkEntry.setEntrySize(nameBytes.length + 1); // +1 for NUL + putArchiveEntry(longLinkEntry); + write(nameBytes); + write(0); // NUL terminator + closeArchiveEntry(); + } else if (longFileMode != LONGFILE_TRUNCATE) { + throw new RuntimeException("file name '" + entryName + + "' is too long ( > " + + NAMELEN + " bytes)"); + } + } + if (bigNumberMode == BIGNUMBER_POSIX) { + addPaxHeadersForBigNumbers(paxHeaders, archiveEntry); + } else if (bigNumberMode != BIGNUMBER_STAR) { + failForBigNumbers(archiveEntry); + } + if (addPaxHeadersForNonAsciiNames && !paxHeaderContainsPath + && !ASCII.canEncode(entryName)) { + paxHeaders.put("path", entryName); + } + if (addPaxHeadersForNonAsciiNames + && (archiveEntry.isLink() || archiveEntry.isSymbolicLink()) + && !ASCII.canEncode(archiveEntry.getLinkName())) { + paxHeaders.put("linkpath", archiveEntry.getLinkName()); + } + if (paxHeaders.size() > 0) { + writePaxHeaders(entryName, paxHeaders); + } + archiveEntry.writeEntryHeader(recordBuf, encoding, bigNumberMode == BIGNUMBER_STAR); + writeRecord(recordBuf); + currBytes = 0; + if (archiveEntry.isDirectory()) { + currSize = 0; + } else { + currSize = archiveEntry.getEntrySize(); + } + currName = entryName; + haveUnclosedEntry = true; + } + + /** + * Close an entry. This method MUST be called for all file + * entries that contain data. The reason is that we must + * buffer data written to the stream in order to satisfy + * the buffer's record based writes. Thus, there may be + * data fragments still being assembled that must be written + * to the output stream before this entry is closed and the + * next entry written. + * + * @throws IOException on error + */ + @Override + public void closeArchiveEntry() throws IOException { + if (finished) { + throw new IOException("stream has already been finished"); + } + if (!haveUnclosedEntry) { + throw new IOException("no current entry to close"); + } + if (assemLen > 0) { + for (int i = assemLen; i < assemBuf.length; ++i) { + assemBuf[i] = 0; + } + + writeRecord(assemBuf); + + currBytes += assemLen; + assemLen = 0; + } + + if (currBytes < currSize) { + throw new IOException("entry '" + currName + "' closed at '" + + currBytes + + "' before the '" + currSize + + "' bytes specified in the header were written"); + } + haveUnclosedEntry = false; + } + + /** + * Writes bytes to the current tar archive entry. This method + * is aware of the current entry and will throw an exception if + * you attempt to write bytes past the length specified for the + * current entry. The method is also (painfully) aware of the + * record buffering required by TarBuffer, and manages buffers + * that are not a multiple of recordsize in length, including + * assembling records from small buffers. + * + * @param wBuf The buffer to write to the archive. + * @param wOffset The offset in the buffer from which to get bytes. + * @param numToWrite The number of bytes to write. + * @throws IOException on error + */ + @Override + public void write(byte[] wBuf, int wOffset, int numToWrite) throws IOException { + if ((currBytes + numToWrite) > currSize) { + throw new IOException("request to write '" + numToWrite + + "' bytes exceeds size in header of '" + + currSize + "' bytes for entry '" + + currName + "'"); + + // + // We have to deal with assembly!!! + // The programmer can be writing little 32 byte chunks for all + // we know, and we must assemble complete records for writing. + // REVIEW Maybe this should be in TarBuffer? Could that help to + // eliminate some of the buffer copying. + // + } + + if (assemLen > 0) { + if ((assemLen + numToWrite) >= recordBuf.length) { + int aLen = recordBuf.length - assemLen; + + System.arraycopy(assemBuf, 0, recordBuf, 0, + assemLen); + System.arraycopy(wBuf, wOffset, recordBuf, + assemLen, aLen); + writeRecord(recordBuf); + + currBytes += recordBuf.length; + wOffset += aLen; + numToWrite -= aLen; + assemLen = 0; + } else { + System.arraycopy(wBuf, wOffset, assemBuf, assemLen, + numToWrite); + + wOffset += numToWrite; + assemLen += numToWrite; + numToWrite = 0; + } + } + + // + // When we get here we have EITHER: + // o An empty "assemble" buffer. + // o No bytes to write (numToWrite == 0) + // + while (numToWrite > 0) { + if (numToWrite < recordBuf.length) { + System.arraycopy(wBuf, wOffset, assemBuf, assemLen, + numToWrite); + + assemLen += numToWrite; + + break; + } + + writeRecord(wBuf, wOffset); + + int num = recordBuf.length; + + currBytes += num; + numToWrite -= num; + wOffset += num; + } + } + + /** + * Writes a PAX extended header with the given map as contents. + */ + void writePaxHeaders(String entryName, Map headers) throws IOException { + String name = "./PaxHeaders.X/" + stripTo7Bits(entryName); + if (name.length() >= NAMELEN) { + name = name.substring(0, NAMELEN - 1); + } + TarArchiveOutputEntry pex = new TarArchiveOutputEntry(name, LF_PAX_EXTENDED_HEADER_LC); + StringWriter w = new StringWriter(); + for (Map.Entry h : headers.entrySet()) { + String key = h.getKey(); + String value = h.getValue(); + int len = key.length() + value.length() + + 3 /* blank, equals and newline */ + + 2 /* guess 9 < actual length < 100 */; + String line = len + " " + key + "=" + value + "\n"; + int actualLength = line.getBytes(Charset.forName("UTF-8")).length; + while (len != actualLength) { + // Adjust for cases where length < 10 or > 100 + // or where UTF-8 encoding isn't a single octet + // per character. + // Must be in loop as size may go from 99 to 100 in + // first pass so we'd need a second. + len = actualLength; + line = len + " " + key + "=" + value + "\n"; + actualLength = line.getBytes(Charset.forName("UTF-8")).length; + } + w.write(line); + } + byte[] data = w.toString().getBytes(Charset.forName("UTF-8")); + pex.setEntrySize(data.length); + putArchiveEntry(pex); + write(data); + closeArchiveEntry(); + } + + private String stripTo7Bits(String name) { + final int length = name.length(); + StringBuilder result = new StringBuilder(length); + for (int i = 0; i < length; i++) { + char stripped = (char) (name.charAt(i) & 0x7F); + if (stripped != 0) { // would be read as Trailing null + result.append(stripped); + } + } + return result.toString(); + } + + /** + * Write an EOF (end of archive) record to the tar archive. + * An EOF record consists of a record of all zeros. + */ + private void writeEOFRecord() throws IOException { + for (int i = 0; i < recordBuf.length; ++i) { + recordBuf[i] = 0; + } + + writeRecord(recordBuf); + } + + @Override + public void flush() throws IOException { + outStream.flush(); + } + + private void addPaxHeadersForBigNumbers(Map paxHeaders, + TarArchiveOutputEntry entry) { + addPaxHeaderForBigNumber(paxHeaders, "size", entry.getEntrySize(), MAXSIZE); + addPaxHeaderForBigNumber(paxHeaders, "gid", entry.getGroupId(), MAXID); + addPaxHeaderForBigNumber(paxHeaders, "mtime", entry.getLastModified().getTime() / 1000, MAXSIZE); + addPaxHeaderForBigNumber(paxHeaders, "uid", entry.getUserId(), MAXID); + // star extensions by J\u00f6rg Schilling + addPaxHeaderForBigNumber(paxHeaders, "SCHILY.devmajor", entry.getDevMajor(), MAXID); + addPaxHeaderForBigNumber(paxHeaders, "SCHILY.devminor", entry.getDevMinor(), MAXID); + // there is no PAX header for file mode + failForBigNumber("mode", entry.getMode(), MAXID); + } + + private void addPaxHeaderForBigNumber(Map paxHeaders, + String header, long value, + long maxValue) { + if (value < 0 || value > maxValue) { + paxHeaders.put(header, String.valueOf(value)); + } + } + + private void failForBigNumbers(TarArchiveOutputEntry entry) { + failForBigNumber("entry size", entry.getEntrySize(), MAXSIZE); + failForBigNumber("group id", entry.getGroupId(), MAXID); + failForBigNumber("last modification time", entry.getLastModified().getTime() / 1000, MAXSIZE); + failForBigNumber("user id", entry.getUserId(), MAXID); + failForBigNumber("mode", entry.getMode(), MAXID); + failForBigNumber("major device number", entry.getDevMajor(), MAXID); + failForBigNumber("minor device number", entry.getDevMinor(), MAXID); + } + + private void failForBigNumber(String field, long value, long maxValue) { + if (value < 0 || value > maxValue) { + throw new RuntimeException(field + " '" + value + + "' is too big ( > " + + maxValue + " )"); + } + } + + private void writeRecord(byte[] record) throws IOException { + if (outStream == null) { + throw new IOException("Output buffer is closed"); + } + if (record.length != recordSize) { + throw new IOException("record to write has length '" + + record.length + + "' which is not the record size of '" + + recordSize + "'"); + } + if (currRecIdx >= recsPerBlock) { + writeBlock(); + } + System.arraycopy(record, 0, blockBuffer, (currRecIdx * recordSize), recordSize); + currRecIdx++; + } + + private void writeRecord(byte[] buf, int offset) throws IOException { + if (outStream == null) { + throw new IOException("Output buffer is closed"); + } + if ((offset + recordSize) > buf.length) { + throw new IOException("record has length '" + buf.length + + "' with offset '" + offset + + "' which is less than the record size of '" + + recordSize + "'"); + } + if (currRecIdx >= recsPerBlock) { + writeBlock(); + } + System.arraycopy(buf, offset, blockBuffer, (currRecIdx * recordSize), recordSize); + currRecIdx++; + } +} diff --git a/io-archive-tar/src/main/java/org/xbib/io/archive/tar/TarConstants.java b/io-archive-tar/src/main/java/org/xbib/io/archive/tar/TarConstants.java new file mode 100644 index 0000000..19e9704 --- /dev/null +++ b/io-archive-tar/src/main/java/org/xbib/io/archive/tar/TarConstants.java @@ -0,0 +1,299 @@ +package org.xbib.io.archive.tar; + +/** + * This interface contains all the definitions used in the package. + * For tar formats (FORMAT_OLDGNU, FORMAT_POSIX, etc.) see GNU tar + * tar.h type enum archive_format + */ +public interface TarConstants { + + int GNU_FORMAT = 0; + + int USTAR_FORMAT = 1; + + int UNIX_FORMAT = 2; + + /** + * Pure Posix format. + */ + int POSIX_FORMAT = 3; + + /** + * The length of the name field in a header buffer. + */ + int NAMELEN = 100; + + /** + * The length of the mode field in a header buffer. + */ + int MODELEN = 8; + + /** + * The length of the user id field in a header buffer. + */ + int UIDLEN = 8; + + /** + * The length of the group id field in a header buffer. + */ + int GIDLEN = 8; + + /** + * The maximum value of gid/uid in a tar archive which can + * be expressed in octal char notation (that's 7 sevens, octal). + */ + long MAXID = 07777777L; + + /** + * The length of the checksum field in a header buffer. + */ + int CHKSUMLEN = 8; + + /** + * The length of the size field in a header buffer. + * Includes the trailing space or NUL. + */ + int SIZELEN = 12; + + /** + * The maximum size of a file in a tar archive + * which can be expressed in octal char notation (that's 11 sevens, octal). + */ + long MAXSIZE = 077777777777L; + + /** + * Offset of start of magic field within header record + */ + int MAGIC_OFFSET = 257; + /** + * The length of the magic field in a header buffer. + */ + int MAGICLEN = 6; + + /** + * Offset of start of magic field within header record + */ + int VERSION_OFFSET = 263; + /** + * Previously this was regarded as part of "magic" field, but it is separate. + */ + int VERSIONLEN = 2; + + /** + * The length of the modification time field in a header buffer. + */ + int MODTIMELEN = 12; + + /** + * The length of the user name field in a header buffer. + */ + int UNAMELEN = 32; + + /** + * The length of the group name field in a header buffer. + */ + int GNAMELEN = 32; + + /** + * The length of each of the device fields (major and minor) in a header buffer. + */ + int DEVLEN = 8; + + /** + * Length of the prefix field. + */ + int PREFIXLEN = 155; + + /** + * The length of the access time field in an old GNU header buffer. + */ + int ATIMELEN_GNU = 12; + + /** + * The length of the created time field in an old GNU header buffer. + */ + int CTIMELEN_GNU = 12; + + /** + * The length of the multivolume start offset field in an old GNU header buffer. + */ + int OFFSETLEN_GNU = 12; + + /** + * The length of the long names field in an old GNU header buffer. + */ + int LONGNAMESLEN_GNU = 4; + + /** + * The length of the padding field in an old GNU header buffer. + */ + int PAD2LEN_GNU = 1; + + /** + * The sum of the length of all sparse headers in an old GNU header buffer. + */ + int SPARSELEN_GNU = 96; + + /** + * The length of the is extension field in an old GNU header buffer. + */ + int ISEXTENDEDLEN_GNU = 1; + + /** + * The length of the real size field in an old GNU header buffer. + */ + int REALSIZELEN_GNU = 12; + + /** + * LF_ constants represent the "link flag" of an entry, or more commonly, + * the "entry type". This is the "old way" of indicating a normal file. + */ + byte LF_OLDNORM = 0; + + /** + * Normal file type. + */ + byte LF_NORMAL = (byte) '0'; + + /** + * Link file type. + */ + byte LF_LINK = (byte) '1'; + + /** + * Symbolic link file type. + */ + byte LF_SYMLINK = (byte) '2'; + + /** + * Character device file type. + */ + byte LF_CHR = (byte) '3'; + + /** + * Block device file type. + */ + byte LF_BLK = (byte) '4'; + + /** + * Directory file type. + */ + byte LF_DIR = (byte) '5'; + + /** + * FIFO (pipe) file type. + */ + byte LF_FIFO = (byte) '6'; + + /** + * Contiguous file type. + */ + byte LF_CONTIG = (byte) '7'; + + /** + * Identifies the *next* file on the tape as having a long name. + */ + byte LF_GNUTYPE_LONGNAME = (byte) 'L'; + + /** + * Sparse file type. + */ + byte LF_GNUTYPE_SPARSE = (byte) 'S'; + + // See "http://www.opengroup.org/onlinepubs/009695399/utilities/pax.html#tag_04_100_13_02" + + /** + * Identifies the entry as a Pax extended header. + */ + byte LF_PAX_EXTENDED_HEADER_LC = (byte) 'x'; + + /** + * Identifies the entry as a Pax extended header (SunOS tar -E). + */ + byte LF_PAX_EXTENDED_HEADER_UC = (byte) 'X'; + + /** + * Identifies the entry as a Pax global extended header. + */ + byte LF_PAX_GLOBAL_EXTENDED_HEADER = (byte) 'g'; + + String MAGIC_UNIX = "\0\0\0\0\0"; + + /** + * The magic tag representing a POSIX tar archive. + */ + String MAGIC_POSIX = "ustar\0"; + String VERSION_POSIX = "00"; + + /** + * The magic tag representing a GNU tar archive. + */ + String MAGIC_GNU = "ustar "; + // Appear to be two possible GNU versions + String VERSION_GNU_SPACE = " \0"; + String VERSION_GNU_ZERO = "0\0"; + + /** + * The magic tag representing an Ant tar archive. + */ + String MAGIC_ANT = "ustar\0"; + + /** + * The "version" representing an Ant tar archive. + */ + // Does not appear to have a version, however Ant does write 8 bytes, + // so assume the version is 2 nulls + String VERSION_ANT = "\0\0"; + + /** + * The name of the GNU tar entry which contains a long name. + */ + String GNU_LONGLINK = "././@LongLink"; + + /** + * Fail if a long file name is required in the archive. + */ + int LONGFILE_ERROR = 0; + + /** + * Long paths will be truncated in the archive. + */ + int LONGFILE_TRUNCATE = 1; + + /** + * GNU tar extensions are used to store long file names in the archive. + */ + int LONGFILE_GNU = 2; + + /** + * POSIX/PAX extensions are used to store long file names in the archive. + */ + int LONGFILE_POSIX = 3; + + /** + * Fail if a big number (e.g. size > 8GiB) is required in the archive. + */ + int BIGNUMBER_ERROR = 0; + + /** + * star/GNU tar/BSD tar extensions are used to store big number in the archive. + */ + int BIGNUMBER_STAR = 1; + + /** + * POSIX/PAX extensions are used to store big numbers in the archive. + */ + int BIGNUMBER_POSIX = 2; + + /** + * Default record size + */ + int DEFAULT_RECORD_SIZE = 512; + + /** + * Default block size + */ + int DEFAULT_BLOCK_SIZE = DEFAULT_RECORD_SIZE * 20; + + int SMALL_BUFFER_SIZE = 256; +} diff --git a/io-archive-tar/src/test/java/org/xbib/io/archive/tar/TarTest.java b/io-archive-tar/src/test/java/org/xbib/io/archive/tar/TarTest.java new file mode 100644 index 0000000..9a82b56 --- /dev/null +++ b/io-archive-tar/src/test/java/org/xbib/io/archive/tar/TarTest.java @@ -0,0 +1,26 @@ +package org.xbib.io.archive.tar; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import java.io.IOException; +import java.io.InputStream; + +import org.junit.jupiter.api.Test; + +public class TarTest { + + @Test + public void testTar() throws IOException { + InputStream in = getClass().getResourceAsStream("test.tar"); + TarArchiveInputStream tarArchiveInputStream = new TarArchiveInputStream(in); + byte[] buffer = new byte[1024]; + long total = 0L; + while (tarArchiveInputStream.getNextEntry() != null) { + int len = 0; + while ((len = tarArchiveInputStream.read(buffer)) > 0) { + total += len; + } + } + assertEquals(1889L, total); + tarArchiveInputStream.close(); + } +} diff --git a/io-archive-zip/build.gradle b/io-archive-zip/build.gradle new file mode 100644 index 0000000..7c4e2c7 --- /dev/null +++ b/io-archive-zip/build.gradle @@ -0,0 +1,3 @@ +dependencies { + api project(':io-archive') +} diff --git a/io-archive-zip/src/main/java/module-info.java b/io-archive-zip/src/main/java/module-info.java new file mode 100644 index 0000000..97dce68 --- /dev/null +++ b/io-archive-zip/src/main/java/module-info.java @@ -0,0 +1,4 @@ +module org.xbib.io.archive.zip { + exports org.xbib.io.archive.zip; + requires org.xbib.io.archive; +} diff --git a/io-archive-zip/src/main/java/org/xbib/io/archive/zip/AbstractUnicodeExtraField.java b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/AbstractUnicodeExtraField.java new file mode 100644 index 0000000..dc08dbe --- /dev/null +++ b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/AbstractUnicodeExtraField.java @@ -0,0 +1,165 @@ + +package org.xbib.io.archive.zip; + +import java.nio.charset.Charset; +import java.util.zip.CRC32; +import java.util.zip.ZipException; + +/** + * A common base class for Unicode extra information extra fields. + */ +public abstract class AbstractUnicodeExtraField implements ZipExtraField { + + private long nameCRC32; + + private byte[] unicodeName; + + private byte[] data; + + protected AbstractUnicodeExtraField() { + } + + /** + * Assemble as unicode extension from the name/comment and + * encoding of the orginal zip entry. + * + * @param text The file name or comment. + * @param bytes The encoded of the filename or comment in the zip + * file. + * @param off The offset of the encoded filename or comment in + * bytes. + * @param len The length of the encoded filename or commentin + * bytes. + */ + protected AbstractUnicodeExtraField(String text, byte[] bytes, int off, int len) { + CRC32 crc32 = new CRC32(); + crc32.update(bytes, off, len); + nameCRC32 = crc32.getValue(); + + unicodeName = text.getBytes(Charset.forName("UTF-8")); + } + + /** + * Assemble as unicode extension from the name/comment and + * encoding of the orginal zip entry. + * + * @param text The file name or comment. + * @param bytes The encoded of the filename or comment in the zip + * file. + */ + protected AbstractUnicodeExtraField(String text, byte[] bytes) { + this(text, bytes, 0, bytes.length); + } + + private void assembleData() { + if (unicodeName == null) { + return; + } + + data = new byte[5 + unicodeName.length]; + // version 1 + data[0] = 0x01; + System.arraycopy(ZipLong.getBytes(nameCRC32), 0, data, 1, 4); + System.arraycopy(unicodeName, 0, data, 5, unicodeName.length); + } + + /** + * @return The CRC32 checksum of the filename or comment as + * encoded in the central directory of the zip file. + */ + public long getNameCRC32() { + return nameCRC32; + } + + /** + * @param nameCRC32 The CRC32 checksum of the filename as encoded + * in the central directory of the zip file to set. + */ + public void setNameCRC32(long nameCRC32) { + this.nameCRC32 = nameCRC32; + data = null; + } + + /** + * @return The utf-8 encoded name. + */ + public byte[] getUnicodeName() { + byte[] b = null; + if (unicodeName != null) { + b = new byte[unicodeName.length]; + System.arraycopy(unicodeName, 0, b, 0, b.length); + } + return b; + } + + /** + * @param unicodeName The utf-8 encoded name to set. + */ + public void setUnicodeName(byte[] unicodeName) { + if (unicodeName != null) { + this.unicodeName = new byte[unicodeName.length]; + System.arraycopy(unicodeName, 0, this.unicodeName, 0, + unicodeName.length); + } else { + this.unicodeName = null; + } + data = null; + } + + public byte[] getCentralDirectoryData() { + if (data == null) { + this.assembleData(); + } + byte[] b = null; + if (data != null) { + b = new byte[data.length]; + System.arraycopy(data, 0, b, 0, b.length); + } + return b; + } + + public ZipShort getCentralDirectoryLength() { + if (data == null) { + assembleData(); + } + return new ZipShort(data.length); + } + + public byte[] getLocalFileDataData() { + return getCentralDirectoryData(); + } + + public ZipShort getLocalFileDataLength() { + return getCentralDirectoryLength(); + } + + public void parseFromLocalFileData(byte[] buffer, int offset, int length) + throws ZipException { + + if (length < 5) { + throw new ZipException("UniCode path extra data must have at least 5 bytes."); + } + + int version = buffer[offset]; + + if (version != 0x01) { + throw new ZipException("Unsupported version [" + version + + "] for UniCode path extra data."); + } + + nameCRC32 = ZipLong.getValue(buffer, offset + 1); + unicodeName = new byte[length - 5]; + System.arraycopy(buffer, offset + 5, unicodeName, 0, length - 5); + data = null; + } + + /** + * Doesn't do anything special since this class always uses the + * same data in central directory and local file data. + */ + public void parseFromCentralDirectoryData(byte[] buffer, int offset, + int length) + throws ZipException { + parseFromLocalFileData(buffer, offset, length); + } +} diff --git a/io-archive-zip/src/main/java/org/xbib/io/archive/zip/AsiExtraField.java b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/AsiExtraField.java new file mode 100644 index 0000000..7e2332f --- /dev/null +++ b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/AsiExtraField.java @@ -0,0 +1,318 @@ +package org.xbib.io.archive.zip; + +import java.util.zip.CRC32; +import java.util.zip.ZipException; + +/** + * Adds Unix file permission and UID/GID fields as well as symbolic + * link handling. + * This class uses the ASi extra field in the format: + *

+ *         Value         Size            Description
+ *         -----         ----            -----------
+ * (Unix3) 0x756e        Short           tag for this extra block type
+ *         TSize         Short           total data size for this block
+ *         CRC           Long            CRC-32 of the remaining data
+ *         Mode          Short           file permissions
+ *         SizDev        Long            symlink'd size OR major/minor dev num
+ *         UID           Short           user ID
+ *         GID           Short           group ID
+ *         (var.)        variable        symbolic link filename
+ * 
+ * taken from appnote.iz (Info-ZIP note, 981119) found at + * ftp://ftp.uu.net/pub/archiving/zip/doc/ + * Short is two bytes and Long is four bytes in big endian byte and + * word order, device numbers are currently not supported. + * Since the documentation this class is based upon doesn't mention + * the character encoding of the file name at all, it is assumed that + * it uses the current platform's default encoding. + */ +public class AsiExtraField implements ZipExtraField, UnixStat, Cloneable { + + private static final ZipShort HEADER_ID = new ZipShort(0x756E); + private static final int WORD = 4; + /** + * Standard Unix stat(2) file mode. + */ + private int mode = 0; + /** + * User ID. + */ + private int uid = 0; + /** + * Group ID. + */ + private int gid = 0; + /** + * File this entry points to, if it is a symbolic link. + *

+ *

empty string - if entry is not a symbolic link.

+ */ + private String link = ""; + /** + * Is this an entry for a directory? + */ + private boolean dirFlag = false; + + /** + * Instance used to calculate checksums. + */ + private CRC32 crc = new CRC32(); + + /** + * Constructor for AsiExtraField. + */ + public AsiExtraField() { + } + + /** + * The Header-ID. + * + * @return the value for the header id for this extrafield + */ + public ZipShort getHeaderId() { + return HEADER_ID; + } + + /** + * Length of the extra field in the local file data - without + * Header-ID or length specifier. + * + * @return a ZipShort for the length of the data of this extra field + */ + public ZipShort getLocalFileDataLength() { + return new ZipShort(WORD // CRC + + 2 // Mode + + WORD // SizDev + + 2 // UID + + 2 // GID + + getLinkedFile().getBytes().length); + // Uses default charset - see class Javadoc + } + + /** + * Delegate to local file data. + * + * @return the centralDirectory length + */ + public ZipShort getCentralDirectoryLength() { + return getLocalFileDataLength(); + } + + /** + * The actual data to put into local file data - without Header-ID + * or length specifier. + * + * @return get the data + */ + public byte[] getLocalFileDataData() { + // CRC will be added later + byte[] data = new byte[getLocalFileDataLength().getValue() - WORD]; + System.arraycopy(ZipShort.getBytes(getMode()), 0, data, 0, 2); + + byte[] linkArray = getLinkedFile().getBytes(); // Uses default charset - see class Javadoc + // CheckStyle:MagicNumber OFF + System.arraycopy(ZipLong.getBytes(linkArray.length), + 0, data, 2, WORD); + + System.arraycopy(ZipShort.getBytes(getUserId()), + 0, data, 6, 2); + System.arraycopy(ZipShort.getBytes(getGroupId()), + 0, data, 8, 2); + + System.arraycopy(linkArray, 0, data, 10, linkArray.length); + + crc.reset(); + crc.update(data); + long checksum = crc.getValue(); + + byte[] result = new byte[data.length + WORD]; + System.arraycopy(ZipLong.getBytes(checksum), 0, result, 0, WORD); + System.arraycopy(data, 0, result, WORD, data.length); + return result; + } + + /** + * Delegate to local file data. + * + * @return the local file data + */ + public byte[] getCentralDirectoryData() { + return getLocalFileDataData(); + } + + /** + * Set the user id. + * + * @param uid the user id + */ + public void setUserId(int uid) { + this.uid = uid; + } + + /** + * Get the user id. + * + * @return the user id + */ + public int getUserId() { + return uid; + } + + /** + * Set the group id. + * + * @param gid the group id + */ + public void setGroupId(int gid) { + this.gid = gid; + } + + /** + * Get the group id. + * + * @return the group id + */ + public int getGroupId() { + return gid; + } + + /** + * Indicate that this entry is a symbolic link to the given filename. + * + * @param name Name of the file this entry links to, empty String + * if it is not a symbolic link. + */ + public void setLinkedFile(String name) { + link = name; + mode = getMode(mode); + } + + /** + * Name of linked file + * + * @return name of the file this entry links to if it is a + * symbolic link, the empty string otherwise. + */ + public String getLinkedFile() { + return link; + } + + /** + * Is this entry a symbolic link? + * + * @return true if this is a symbolic link + */ + public boolean isLink() { + return getLinkedFile().length() != 0; + } + + /** + * File mode of this file. + * + * @param mode the file mode + */ + public void setMode(int mode) { + this.mode = getMode(mode); + } + + /** + * File mode of this file. + * + * @return the file mode + */ + public int getMode() { + return mode; + } + + /** + * Indicate whether this entry is a directory. + * + * @param dirFlag if true, this entry is a directory + */ + public void setDirectory(boolean dirFlag) { + this.dirFlag = dirFlag; + mode = getMode(mode); + } + + /** + * Is this entry a directory? + * + * @return true if this entry is a directory + */ + public boolean isDirectory() { + return dirFlag && !isLink(); + } + + /** + * Populate data from this array as if it was in local file data. + * + * @param data an array of bytes + * @param offset the start offset + * @param length the number of bytes in the array from offset + * @throws java.util.zip.ZipException on error + */ + public void parseFromLocalFileData(byte[] data, int offset, int length) + throws ZipException { + + long givenChecksum = ZipLong.getValue(data, offset); + byte[] tmp = new byte[length - WORD]; + System.arraycopy(data, offset + WORD, tmp, 0, length - WORD); + crc.reset(); + crc.update(tmp); + long realChecksum = crc.getValue(); + if (givenChecksum != realChecksum) { + throw new ZipException("bad CRC checksum " + + Long.toHexString(givenChecksum) + + " instead of " + + Long.toHexString(realChecksum)); + } + + int newMode = ZipShort.getValue(tmp, 0); + byte[] linkArray = new byte[(int) ZipLong.getValue(tmp, 2)]; + uid = ZipShort.getValue(tmp, 6); + gid = ZipShort.getValue(tmp, 8); + + if (linkArray.length == 0) { + link = ""; + } else { + System.arraycopy(tmp, 10, linkArray, 0, linkArray.length); + link = new String(linkArray); // Uses default charset - see class Javadoc + } + setDirectory((newMode & DIR_FLAG) != 0); + setMode(newMode); + } + + /** + * Doesn't do anything special since this class always uses the + * same data in central directory and local file data. + */ + public void parseFromCentralDirectoryData(byte[] buffer, int offset, + int length) + throws ZipException { + parseFromLocalFileData(buffer, offset, length); + } + + /** + * Get the file mode for given permissions with the correct file type. + * + * @param mode the mode + * @return the type with the mode + */ + protected int getMode(int mode) { + int type = FILE_FLAG; + if (isLink()) { + type = LINK_FLAG; + } else if (isDirectory()) { + type = DIR_FLAG; + } + return type | (mode & PERM_MASK); + } + + @Override + public Object clone() throws CloneNotSupportedException { + AsiExtraField cloned = (AsiExtraField) super.clone(); + cloned.crc = new CRC32(); + return cloned; + } +} diff --git a/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ExtraFieldUtils.java b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ExtraFieldUtils.java new file mode 100644 index 0000000..8036f3a --- /dev/null +++ b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ExtraFieldUtils.java @@ -0,0 +1,290 @@ + +package org.xbib.io.archive.zip; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.zip.ZipException; + +/** + * ZipExtraField related methods + */ +public class ExtraFieldUtils { + + private static final int WORD = 4; + + /** + * Static registry of known extra fields. + */ + private static final Map> implementations; + + static { + implementations = new HashMap>(); + register(AsiExtraField.class); + register(JarMarker.class); + register(UnicodePathExtraField.class); + register(UnicodeCommentExtraField.class); + register(Zip64ExtendedInformationExtraField.class); + } + + /** + * Register a ZipExtraField implementation. + * The given class must have a no-arg constructor and implement + * the {@link ZipExtraField ZipExtraField interface}. + * + * @param c the class to register + */ + public static void register(Class c) { + try { + ZipExtraField ze = (ZipExtraField) c.newInstance(); + implementations.put(ze.getHeaderId(), c); + } catch (ClassCastException cc) { + throw new RuntimeException(c + " doesn\'t implement ZipExtraField"); + } catch (InstantiationException ie) { + throw new RuntimeException(c + " is not a concrete class"); + } catch (IllegalAccessException ie) { + throw new RuntimeException(c + "\'s no-arg constructor is not public"); + } + } + + /** + * Create an instance of the approriate ExtraField, falls back to + * {@link UnrecognizedExtraField UnrecognizedExtraField}. + * + * @param headerId the header identifier + * @return an instance of the appropiate ExtraField + * @throws InstantiationException if unable to instantiate the class + * @throws IllegalAccessException if not allowed to instatiate the class + */ + public static ZipExtraField createExtraField(ZipShort headerId) + throws InstantiationException, IllegalAccessException { + Class c = implementations.get(headerId); + if (c != null) { + return (ZipExtraField) c.newInstance(); + } + UnrecognizedExtraField u = new UnrecognizedExtraField(); + u.setHeaderId(headerId); + return u; + } + + /** + * Split the array into ExtraFields and populate them with the + * given data as local file data, throwing an exception if the + * data cannot be parsed. + * + * @param data an array of bytes as it appears in local file data + * @return an array of ExtraFields + * @throws java.util.zip.ZipException on error + */ + public static ZipExtraField[] parse(byte[] data) throws ZipException { + return parse(data, true, UnparseableExtraField.THROW); + } + + /** + * Split the array into ExtraFields and populate them with the + * given data, throwing an exception if the data cannot be parsed. + * + * @param data an array of bytes + * @param local whether data originates from the local file data + * or the central directory + * @return an array of ExtraFields + * @throws java.util.zip.ZipException on error + */ + public static ZipExtraField[] parse(byte[] data, boolean local) + throws ZipException { + return parse(data, local, UnparseableExtraField.THROW); + } + + /** + * Split the array into ExtraFields and populate them with the + * given data. + * + * @param data an array of bytes + * @param local whether data originates from the local file data + * or the central directory + * @param onUnparseableData what to do if the extra field data + * cannot be parsed. + * @return an array of ExtraFields + * @throws java.util.zip.ZipException on error + */ + public static ZipExtraField[] parse(byte[] data, boolean local, + UnparseableExtraField onUnparseableData) + throws ZipException { + List v = new ArrayList(); + int start = 0; + LOOP: + while (start <= data.length - WORD) { + ZipShort headerId = new ZipShort(data, start); + int length = (new ZipShort(data, start + 2)).getValue(); + if (start + WORD + length > data.length) { + switch (onUnparseableData.getKey()) { + case UnparseableExtraField.THROW_KEY: + throw new ZipException("bad extra field starting at " + + start + ". Block length of " + + length + " bytes exceeds remaining" + + " data of " + + (data.length - start - WORD) + + " bytes."); + case UnparseableExtraField.READ_KEY: + UnparseableExtraFieldData field = + new UnparseableExtraFieldData(); + if (local) { + field.parseFromLocalFileData(data, start, + data.length - start); + } else { + field.parseFromCentralDirectoryData(data, start, + data.length - start); + } + v.add(field); + //$FALL-THROUGH$ + case UnparseableExtraField.SKIP_KEY: + // since we cannot parse the data we must assume + // the extra field consumes the whole rest of the + // available data + break LOOP; + default: + throw new ZipException("unknown UnparseableExtraField key: " + + onUnparseableData.getKey()); + } + } + try { + ZipExtraField ze = createExtraField(headerId); + if (local) { + ze.parseFromLocalFileData(data, start + WORD, length); + } else { + ze.parseFromCentralDirectoryData(data, start + WORD, + length); + } + v.add(ze); + } catch (InstantiationException | IllegalAccessException ie) { + throw new ZipException(ie.getMessage()); + } + start += (length + WORD); + } + + ZipExtraField[] result = new ZipExtraField[v.size()]; + return v.toArray(result); + } + + /** + * Merges the local file data fields of the given ZipExtraFields. + * + * @param data an array of ExtraFiles + * @return an array of bytes + */ + public static byte[] mergeLocalFileDataData(ZipExtraField[] data) { + final boolean lastIsUnparseableHolder = data.length > 0 + && data[data.length - 1] instanceof UnparseableExtraFieldData; + int regularExtraFieldCount = + lastIsUnparseableHolder ? data.length - 1 : data.length; + + int sum = WORD * regularExtraFieldCount; + for (ZipExtraField element : data) { + sum += element.getLocalFileDataLength().getValue(); + } + + byte[] result = new byte[sum]; + int start = 0; + for (int i = 0; i < regularExtraFieldCount; i++) { + System.arraycopy(data[i].getHeaderId().getBytes(), + 0, result, start, 2); + System.arraycopy(data[i].getLocalFileDataLength().getBytes(), + 0, result, start + 2, 2); + byte[] local = data[i].getLocalFileDataData(); + System.arraycopy(local, 0, result, start + WORD, local.length); + start += (local.length + WORD); + } + if (lastIsUnparseableHolder) { + byte[] local = data[data.length - 1].getLocalFileDataData(); + System.arraycopy(local, 0, result, start, local.length); + } + return result; + } + + /** + * Merges the central directory fields of the given ZipExtraFields. + * + * @param data an array of ExtraFields + * @return an array of bytes + */ + public static byte[] mergeCentralDirectoryData(ZipExtraField[] data) { + final boolean lastIsUnparseableHolder = data.length > 0 + && data[data.length - 1] instanceof UnparseableExtraFieldData; + int regularExtraFieldCount = + lastIsUnparseableHolder ? data.length - 1 : data.length; + + int sum = WORD * regularExtraFieldCount; + for (ZipExtraField element : data) { + sum += element.getCentralDirectoryLength().getValue(); + } + byte[] result = new byte[sum]; + int start = 0; + for (int i = 0; i < regularExtraFieldCount; i++) { + System.arraycopy(data[i].getHeaderId().getBytes(), + 0, result, start, 2); + System.arraycopy(data[i].getCentralDirectoryLength().getBytes(), + 0, result, start + 2, 2); + byte[] local = data[i].getCentralDirectoryData(); + System.arraycopy(local, 0, result, start + WORD, local.length); + start += (local.length + WORD); + } + if (lastIsUnparseableHolder) { + byte[] local = data[data.length - 1].getCentralDirectoryData(); + System.arraycopy(local, 0, result, start, local.length); + } + return result; + } + + /** + * "enum" for the possible actions to take if the extra field + * cannot be parsed. + */ + public static final class UnparseableExtraField { + /** + * Key for "throw an exception" action. + */ + public static final int THROW_KEY = 0; + /** + * Key for "skip" action. + */ + public static final int SKIP_KEY = 1; + /** + * Key for "read" action. + */ + public static final int READ_KEY = 2; + + /** + * Throw an exception if field cannot be parsed. + */ + public static final UnparseableExtraField THROW + = new UnparseableExtraField(THROW_KEY); + + /** + * Skip the extra field entirely and don't make its data + * available - effectively removing the extra field data. + */ + public static final UnparseableExtraField SKIP + = new UnparseableExtraField(SKIP_KEY); + + /** + * Read the extra field data into an instance of {@link + * UnparseableExtraFieldData UnparseableExtraFieldData}. + */ + public static final UnparseableExtraField READ + = new UnparseableExtraField(READ_KEY); + + private final int key; + + private UnparseableExtraField(int k) { + key = k; + } + + /** + * Key of the action to take. + */ + public int getKey() { + return key; + } + } +} diff --git a/io-archive-zip/src/main/java/org/xbib/io/archive/zip/GeneralPurposeBit.java b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/GeneralPurposeBit.java new file mode 100644 index 0000000..86c5054 --- /dev/null +++ b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/GeneralPurposeBit.java @@ -0,0 +1,152 @@ +package org.xbib.io.archive.zip; + +/** + * Parser/encoder for the "general purpose bit" field in ZIP's local + * file and central directory headers. + */ +public final class GeneralPurposeBit { + /** + * Indicates that the file is encrypted. + */ + private static final int ENCRYPTION_FLAG = 1; + + /** + * Indicates that a data descriptor stored after the file contents + * will hold CRC and size information. + */ + private static final int DATA_DESCRIPTOR_FLAG = 1 << 3; + + /** + * Indicates strong encryption. + */ + private static final int STRONG_ENCRYPTION_FLAG = 1 << 6; + + /** + * Indicates that filenames are written in utf-8. + */ + protected static final int UFT8_NAMES_FLAG = 1 << 11; + + private boolean languageEncodingFlag = false; + + private boolean dataDescriptorFlag = false; + + private boolean encryptionFlag = false; + + private boolean strongEncryptionFlag = false; + + public GeneralPurposeBit() { + } + + /** + * whether the current entry uses UTF8 for file name and comment. + */ + public boolean usesUTF8ForNames() { + return languageEncodingFlag; + } + + /** + * whether the current entry will use UTF8 for file name and comment. + */ + public void useUTF8ForNames(boolean b) { + languageEncodingFlag = b; + } + + /** + * whether the current entry uses the data descriptor to store CRC + * and size information + */ + public boolean usesDataDescriptor() { + return dataDescriptorFlag; + } + + /** + * whether the current entry will use the data descriptor to store + * CRC and size information + */ + public void useDataDescriptor(boolean b) { + dataDescriptorFlag = b; + } + + /** + * whether the current entry is encrypted + */ + public boolean usesEncryption() { + return encryptionFlag; + } + + /** + * whether the current entry will be encrypted + */ + public void useEncryption(boolean b) { + encryptionFlag = b; + } + + /** + * whether the current entry is encrypted using strong encryption + */ + public boolean usesStrongEncryption() { + return encryptionFlag && strongEncryptionFlag; + } + + /** + * whether the current entry will be encrypted using strong encryption + */ + public void useStrongEncryption(boolean b) { + strongEncryptionFlag = b; + if (b) { + useEncryption(true); + } + } + + /** + * Encodes the set bits in a form suitable for ZIP archives. + */ + public byte[] encode() { + return + ZipShort.getBytes((dataDescriptorFlag ? DATA_DESCRIPTOR_FLAG : 0) + | + (languageEncodingFlag ? UFT8_NAMES_FLAG : 0) + | + (encryptionFlag ? ENCRYPTION_FLAG : 0) + | + (strongEncryptionFlag ? STRONG_ENCRYPTION_FLAG : 0) + ); + } + + /** + * Parses the supported flags from the given archive data. + * + * @param data local file header or a central directory entry. + * @param offset offset at which the general purpose bit starts + */ + public static GeneralPurposeBit parse(final byte[] data, final int offset) { + final int generalPurposeFlag = ZipShort.getValue(data, offset); + GeneralPurposeBit b = new GeneralPurposeBit(); + b.useDataDescriptor((generalPurposeFlag & DATA_DESCRIPTOR_FLAG) != 0); + b.useUTF8ForNames((generalPurposeFlag & UFT8_NAMES_FLAG) != 0); + b.useStrongEncryption((generalPurposeFlag & STRONG_ENCRYPTION_FLAG) + != 0); + b.useEncryption((generalPurposeFlag & ENCRYPTION_FLAG) != 0); + return b; + } + + @Override + public int hashCode() { + return 3 * (7 * (13 * (17 * (encryptionFlag ? 1 : 0) + + (strongEncryptionFlag ? 1 : 0)) + + (languageEncodingFlag ? 1 : 0)) + + (dataDescriptorFlag ? 1 : 0)); + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof GeneralPurposeBit)) { + return false; + } + GeneralPurposeBit g = (GeneralPurposeBit) o; + return g.encryptionFlag == encryptionFlag + && g.strongEncryptionFlag == strongEncryptionFlag + && g.languageEncodingFlag == languageEncodingFlag + && g.dataDescriptorFlag == dataDescriptorFlag; + } +} diff --git a/io-archive-zip/src/main/java/org/xbib/io/archive/zip/JarMarker.java b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/JarMarker.java new file mode 100644 index 0000000..7ad7eef --- /dev/null +++ b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/JarMarker.java @@ -0,0 +1,102 @@ + +package org.xbib.io.archive.zip; + +import java.util.zip.ZipException; + +/** + * If this extra field is added as the very first extra field of the + * archive, Solaris will consider it an executable jar file. + */ +public final class JarMarker implements ZipExtraField { + + private static final ZipShort ID = new ZipShort(0xCAFE); + private static final ZipShort NULL = new ZipShort(0); + private static final byte[] NO_BYTES = new byte[0]; + private static final JarMarker DEFAULT = new JarMarker(); + + public JarMarker() { + } + + /** + * Since JarMarker is stateless we can always use the same instance. + * + * @return the DEFAULT jarmaker. + */ + public static JarMarker getInstance() { + return DEFAULT; + } + + /** + * The Header-ID. + * + * @return the header id + */ + public ZipShort getHeaderId() { + return ID; + } + + /** + * Length of the extra field in the local file data - without + * Header-ID or length specifier. + * + * @return 0 + */ + public ZipShort getLocalFileDataLength() { + return NULL; + } + + /** + * Length of the extra field in the central directory - without + * Header-ID or length specifier. + * + * @return 0 + */ + public ZipShort getCentralDirectoryLength() { + return NULL; + } + + /** + * The actual data to put into local file data - without Header-ID + * or length specifier. + * + * @return the data + */ + public byte[] getLocalFileDataData() { + return NO_BYTES; + } + + /** + * The actual data to put central directory - without Header-ID or + * length specifier. + * + * @return the data + */ + public byte[] getCentralDirectoryData() { + return NO_BYTES; + } + + /** + * Populate data from this array as if it was in local file data. + * + * @param data an array of bytes + * @param offset the start offset + * @param length the number of bytes in the array from offset + * @throws java.util.zip.ZipException on error + */ + public void parseFromLocalFileData(byte[] data, int offset, int length) + throws ZipException { + if (length != 0) { + throw new ZipException("JarMarker doesn't expect any data"); + } + } + + /** + * Doesn't do anything special since this class always uses the + * same data in central directory and local file data. + */ + public void parseFromCentralDirectoryData(byte[] buffer, int offset, + int length) + throws ZipException { + parseFromLocalFileData(buffer, offset, length); + } +} diff --git a/io-archive-zip/src/main/java/org/xbib/io/archive/zip/UnicodeCommentExtraField.java b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/UnicodeCommentExtraField.java new file mode 100644 index 0000000..b0491c5 --- /dev/null +++ b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/UnicodeCommentExtraField.java @@ -0,0 +1,42 @@ +package org.xbib.io.archive.zip; + +/** + * Info-ZIP Unicode Comment Extra Field (0x6375): + * Stores the UTF-8 version of the file comment as stored in the + * central directory header. + *
+ *         Value         Size        Description
+ *         -----         ----        -----------
+ *  (UCom) 0x6375        Short       tag for this extra block type ("uc")
+ *         TSize         Short       total data size for this block
+ *         Version       1 byte      version of this extra field, currently 1
+ *         ComCRC32      4 bytes     Comment Field CRC32 Checksum
+ *         UnicodeCom    Variable    UTF-8 version of the entry comment
+ * 
+ */ +public class UnicodeCommentExtraField extends AbstractUnicodeExtraField { + + public static final ZipShort UCOM_ID = new ZipShort(0x6375); + + public UnicodeCommentExtraField() { + } + + /** + * Assemble as unicode comment extension from the name given as + * text as well as the encoded bytes actually written to the archive. + * + * @param text The file name + * @param bytes the bytes actually written to the archive + * @param off The offset of the encoded comment in bytes. + * @param len The length of the encoded comment or comment in + * bytes. + */ + public UnicodeCommentExtraField(String text, byte[] bytes, int off, int len) { + super(text, bytes, off, len); + } + + public ZipShort getHeaderId() { + return UCOM_ID; + } + +} diff --git a/io-archive-zip/src/main/java/org/xbib/io/archive/zip/UnicodePathExtraField.java b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/UnicodePathExtraField.java new file mode 100644 index 0000000..918273b --- /dev/null +++ b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/UnicodePathExtraField.java @@ -0,0 +1,52 @@ +package org.xbib.io.archive.zip; + +/** + * Info-ZIP Unicode Path Extra Field (0x7075): + * Stores the UTF-8 version of the file name field as stored in the + * local header and central directory header. + *
+ *         Value         Size        Description
+ *         -----         ----        -----------
+ * (UPath) 0x7075        Short       tag for this extra block type ("up")
+ *         TSize         Short       total data size for this block
+ *         Version       1 byte      version of this extra field, currently 1
+ *         NameCRC32     4 bytes     File Name Field CRC32 Checksum
+ *         UnicodeName   Variable    UTF-8 version of the entry File Name
+ * 
+ */ +public class UnicodePathExtraField extends AbstractUnicodeExtraField { + + public static final ZipShort UPATH_ID = new ZipShort(0x7075); + + public UnicodePathExtraField() { + } + + /** + * Assemble as unicode path extension from the name given as + * text as well as the encoded bytes actually written to the archive. + * + * @param text The file name + * @param bytes the bytes actually written to the archive + * @param off The offset of the encoded filename in bytes. + * @param len The length of the encoded filename or comment in + * bytes. + */ + public UnicodePathExtraField(String text, byte[] bytes, int off, int len) { + super(text, bytes, off, len); + } + + /** + * Assemble as unicode path extension from the name given as + * text as well as the encoded bytes actually written to the archive. + * + * @param name The file name + * @param bytes the bytes actually written to the archive + */ + public UnicodePathExtraField(String name, byte[] bytes) { + super(name, bytes); + } + + public ZipShort getHeaderId() { + return UPATH_ID; + } +} diff --git a/io-archive-zip/src/main/java/org/xbib/io/archive/zip/UnixStat.java b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/UnixStat.java new file mode 100644 index 0000000..ce240b2 --- /dev/null +++ b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/UnixStat.java @@ -0,0 +1,25 @@ + +package org.xbib.io.archive.zip; + +/** + * Constants from stat.h on Unix systems. + */ +public interface UnixStat { + /** + * Bits used for permissions (and sticky bit) + */ + int PERM_MASK = 07777; + /** + * Indicates symbolic links. + */ + int LINK_FLAG = 0120000; + /** + * Indicates plain files. + */ + int FILE_FLAG = 0100000; + /** + * Indicates directories. + */ + int DIR_FLAG = 040000; + +} diff --git a/io-archive-zip/src/main/java/org/xbib/io/archive/zip/UnparseableExtraFieldData.java b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/UnparseableExtraFieldData.java new file mode 100644 index 0000000..2c8f72e --- /dev/null +++ b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/UnparseableExtraFieldData.java @@ -0,0 +1,91 @@ +package org.xbib.io.archive.zip; + +/** + * Wrapper for extra field data that doesn't conform to the recommended format of header-tag + size + data. + * The header-id is artificial (and not listed as a known ID in + * APPNOTE.TXT + * Since it isn't used anywhere except to satisfy the + * ZipExtraField contract it shouldn't matter anyway. + */ +public final class UnparseableExtraFieldData implements ZipExtraField { + private static final ZipShort HEADER_ID = new ZipShort(0xACC1); + + private byte[] localFileData; + private byte[] centralDirectoryData; + + /** + * The Header-ID. + * + * @return a completely arbitrary value that should be ignored. + */ + public ZipShort getHeaderId() { + return HEADER_ID; + } + + /** + * Length of the complete extra field in the local file data. + * + * @return The LocalFileDataLength value + */ + public ZipShort getLocalFileDataLength() { + return new ZipShort(localFileData == null ? 0 : localFileData.length); + } + + /** + * Length of the complete extra field in the central directory. + * + * @return The CentralDirectoryLength value + */ + public ZipShort getCentralDirectoryLength() { + return centralDirectoryData == null + ? getLocalFileDataLength() + : new ZipShort(centralDirectoryData.length); + } + + /** + * The actual data to put into local file data. + * + * @return The LocalFileDataData value + */ + public byte[] getLocalFileDataData() { + return ZipUtil.copy(localFileData); + } + + /** + * The actual data to put into central directory. + * + * @return The CentralDirectoryData value + */ + public byte[] getCentralDirectoryData() { + return centralDirectoryData == null + ? getLocalFileDataData() : ZipUtil.copy(centralDirectoryData); + } + + /** + * Populate data from this array as if it was in local file data. + * + * @param buffer the buffer to read data from + * @param offset offset into buffer to read data + * @param length the length of data + */ + public void parseFromLocalFileData(byte[] buffer, int offset, int length) { + localFileData = new byte[length]; + System.arraycopy(buffer, offset, localFileData, 0, length); + } + + /** + * Populate data from this array as if it was in central directory data. + * + * @param buffer the buffer to read data from + * @param offset offset into buffer to read data + * @param length the length of data + */ + public void parseFromCentralDirectoryData(byte[] buffer, int offset, + int length) { + centralDirectoryData = new byte[length]; + System.arraycopy(buffer, offset, centralDirectoryData, 0, length); + if (localFileData == null) { + parseFromLocalFileData(buffer, offset, length); + } + } +} diff --git a/io-archive-zip/src/main/java/org/xbib/io/archive/zip/UnrecognizedExtraField.java b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/UnrecognizedExtraField.java new file mode 100644 index 0000000..4090b7d --- /dev/null +++ b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/UnrecognizedExtraField.java @@ -0,0 +1,135 @@ +package org.xbib.io.archive.zip; + +/** + * Simple placeholder for all those extra fields we don't want to deal + * with. Assumes local file data and central directory entries are + * identical - unless told the opposite. + */ +public class UnrecognizedExtraField implements ZipExtraField { + + /** + * The Header-ID. + */ + private ZipShort headerId; + + /** + * Set the header id. + * + * @param headerId the header id to use + */ + public void setHeaderId(ZipShort headerId) { + this.headerId = headerId; + } + + /** + * Get the header id. + * + * @return the header id + */ + public ZipShort getHeaderId() { + return headerId; + } + + /** + * Extra field data in local file data - without + * Header-ID or length specifier. + */ + private byte[] localData; + + /** + * Set the extra field data in the local file data - + * without Header-ID or length specifier. + * + * @param data the field data to use + */ + public void setLocalFileDataData(byte[] data) { + localData = ZipUtil.copy(data); + } + + /** + * Get the length of the local data. + * + * @return the length of the local data + */ + public ZipShort getLocalFileDataLength() { + return new ZipShort(localData.length); + } + + /** + * Get the local data. + * + * @return the local data + */ + public byte[] getLocalFileDataData() { + return ZipUtil.copy(localData); + } + + /** + * Extra field data in central directory - without + * Header-ID or length specifier. + */ + private byte[] centralData; + + /** + * Set the extra field data in central directory. + * + * @param data the data to use + */ + public void setCentralDirectoryData(byte[] data) { + centralData = ZipUtil.copy(data); + } + + /** + * Get the central data length. + * If there is no central data, get the local file data length. + * + * @return the central data length + */ + public ZipShort getCentralDirectoryLength() { + if (centralData != null) { + return new ZipShort(centralData.length); + } + return getLocalFileDataLength(); + } + + /** + * Get the central data. + * + * @return the central data if present, else return the local file data + */ + public byte[] getCentralDirectoryData() { + if (centralData != null) { + return ZipUtil.copy(centralData); + } + return getLocalFileDataData(); + } + + /** + * @param data the array of bytes. + * @param offset the source location in the data array. + * @param length the number of bytes to use in the data array. + * @see ZipExtraField#parseFromLocalFileData(byte[], int, int) + */ + public void parseFromLocalFileData(byte[] data, int offset, int length) { + byte[] tmp = new byte[length]; + System.arraycopy(data, offset, tmp, 0, length); + setLocalFileDataData(tmp); + } + + /** + * @param data the array of bytes. + * @param offset the source location in the data array. + * @param length the number of bytes to use in the data array. + * @see ZipExtraField#parseFromCentralDirectoryData(byte[], int, int) + */ + public void parseFromCentralDirectoryData(byte[] data, int offset, + int length) { + byte[] tmp = new byte[length]; + System.arraycopy(data, offset, tmp, 0, length); + setCentralDirectoryData(tmp); + if (localData == null) { + setLocalFileDataData(tmp); + } + } + +} diff --git a/io-archive-zip/src/main/java/org/xbib/io/archive/zip/UnsupportedZipFeatureException.java b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/UnsupportedZipFeatureException.java new file mode 100644 index 0000000..e3d1fe8 --- /dev/null +++ b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/UnsupportedZipFeatureException.java @@ -0,0 +1,72 @@ + +package org.xbib.io.archive.zip; + +import java.util.zip.ZipException; + +/** + * Exception thrown when attempting to read or write data for a zip + * entry that uses ZIP features not supported by this library. + */ +public class UnsupportedZipFeatureException extends ZipException { + + private final Feature reason; + + private final ZipArchiveEntry entry; + + /** + * Creates an exception. + * + * @param reason the feature that is not supported + * @param entry the entry using the feature + */ + public UnsupportedZipFeatureException(Feature reason, + ZipArchiveEntry entry) { + super("unsupported feature " + reason + " used in entry " + + entry.getName()); + this.reason = reason; + this.entry = entry; + } + + /** + * The unsupported feature that has been used. + */ + public Feature getFeature() { + return reason; + } + + /** + * The entry using the unsupported feature. + */ + public ZipArchiveEntry getEntry() { + return entry; + } + + /** + * ZIP Features that may or may not be supported. + */ + public static class Feature { + /** + * The entry is encrypted. + */ + public static final Feature ENCRYPTION = new Feature("encryption"); + /** + * The entry used an unsupported compression method. + */ + public static final Feature METHOD = new Feature("compression method"); + /** + * The entry uses a data descriptor. + */ + public static final Feature DATA_DESCRIPTOR = new Feature("data descriptor"); + + private final String name; + + private Feature(String name) { + this.name = name; + } + + @Override + public String toString() { + return name; + } + } +} \ No newline at end of file diff --git a/io-archive-zip/src/main/java/org/xbib/io/archive/zip/Zip64ExtendedInformationExtraField.java b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/Zip64ExtendedInformationExtraField.java new file mode 100644 index 0000000..a015910 --- /dev/null +++ b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/Zip64ExtendedInformationExtraField.java @@ -0,0 +1,315 @@ +package org.xbib.io.archive.zip; + +import java.util.zip.ZipException; + +import static org.xbib.io.archive.zip.ZipConstants.DWORD; +import static org.xbib.io.archive.zip.ZipConstants.WORD; + +/** + * Holds size and other extended information for entries that use Zip64 + * features. + * From PKWARE's APPNOTE.TXT + *
+ * Zip64 Extended Information Extra Field (0x0001):
+ *
+ *          The following is the layout of the zip64 extended
+ *          information "extra" block. If one of the size or
+ *          offset fields in the Local or Central directory
+ *          record is too small to hold the required data,
+ *          a Zip64 extended information record is created.
+ *          The order of the fields in the zip64 extended
+ *          information record is fixed, but the fields will
+ *          only appear if the corresponding Local or Central
+ *          directory record field is set to 0xFFFF or 0xFFFFFFFF.
+ *
+ *          Note: all fields stored in Intel low-byte/high-byte order.
+ *
+ *          Value      Size       Description
+ *          -----      ----       -----------
+ *  (ZIP64) 0x0001     2 bytes    Tag for this "extra" block type
+ *          Size       2 bytes    Size of this "extra" block
+ *          Original
+ *          Size       8 bytes    Original uncompressed file size
+ *          Compressed
+ *          Size       8 bytes    Size of compressed data
+ *          Relative Header
+ *          Offset     8 bytes    Offset of local header record
+ *          Disk Start
+ *          Number     4 bytes    Number of the disk on which
+ *                                this file starts
+ *
+ *          This entry in the Local header must include BOTH original
+ *          and compressed file size fields. If encrypting the
+ *          central directory and bit 13 of the general purpose bit
+ *          flag is set indicating masking, the value stored in the
+ *          Local Header for the original file size will be zero.
+ * 
+ * Currently this code doesn't support encrypting the + * central directory so the not about masking doesn't apply. + * The implementation relies on data being read from the local file + * header and assumes that both size values are always present. + */ +public class Zip64ExtendedInformationExtraField implements ZipExtraField { + + static final ZipShort HEADER_ID = new ZipShort(0x0001); + + private static final String LFH_MUST_HAVE_BOTH_SIZES_MSG = + "Zip64 extended information must contain" + + " both size values in the local file header."; + + private ZipEightByteInteger size, compressedSize, relativeHeaderOffset; + private ZipLong diskStart; + + /** + * Stored in {@link #parseFromCentralDirectoryData + * parseFromCentralDirectoryData} so it can be reused when ZipFile + * calls {@link #reparseCentralDirectoryData + * reparseCentralDirectoryData}. + */ + private byte[] rawCentralDirectoryData; + + public Zip64ExtendedInformationExtraField() { + } + + /** + * Creates an extra field based on the original and compressed size. + * + * @param size the entry's original size + * @param compressedSize the entry's compressed size + * @throws IllegalArgumentException if size or compressedSize is null + */ + public Zip64ExtendedInformationExtraField(ZipEightByteInteger size, + ZipEightByteInteger compressedSize) { + this(size, compressedSize, null, null); + } + + /** + * Creates an extra field based on all four possible values. + * + * @param size the entry's original size + * @param compressedSize the entry's compressed size + * @throws IllegalArgumentException if size or compressedSize is null + */ + public Zip64ExtendedInformationExtraField(ZipEightByteInteger size, + ZipEightByteInteger compressedSize, + ZipEightByteInteger relativeHeaderOffset, + ZipLong diskStart) { + this.size = size; + this.compressedSize = compressedSize; + this.relativeHeaderOffset = relativeHeaderOffset; + this.diskStart = diskStart; + } + + public ZipShort getHeaderId() { + return HEADER_ID; + } + + public ZipShort getLocalFileDataLength() { + return new ZipShort(size != null ? 2 * DWORD : 0); + } + + public ZipShort getCentralDirectoryLength() { + return new ZipShort((size != null ? DWORD : 0) + + (compressedSize != null ? DWORD : 0) + + (relativeHeaderOffset != null ? DWORD : 0) + + (diskStart != null ? WORD : 0)); + } + + public byte[] getLocalFileDataData() { + if (size != null || compressedSize != null) { + if (size == null || compressedSize == null) { + throw new IllegalArgumentException(LFH_MUST_HAVE_BOTH_SIZES_MSG); + } + byte[] data = new byte[2 * DWORD]; + addSizes(data); + return data; + } + return new byte[0]; + } + + public byte[] getCentralDirectoryData() { + byte[] data = new byte[getCentralDirectoryLength().getValue()]; + int off = addSizes(data); + if (relativeHeaderOffset != null) { + System.arraycopy(relativeHeaderOffset.getBytes(), 0, data, off, DWORD); + off += DWORD; + } + if (diskStart != null) { + System.arraycopy(diskStart.getBytes(), 0, data, off, WORD); + off += WORD; + } + return data; + } + + public void parseFromLocalFileData(byte[] buffer, int offset, int length) + throws ZipException { + if (length == 0) { + // no local file data at all, may happen if an archive + // only holds a ZIP64 extended information extra field + // inside the central directory but not inside the local + // file header + return; + } + if (length < 2 * DWORD) { + throw new ZipException(LFH_MUST_HAVE_BOTH_SIZES_MSG); + } + size = new ZipEightByteInteger(buffer, offset); + offset += DWORD; + compressedSize = new ZipEightByteInteger(buffer, offset); + offset += DWORD; + int remaining = length - 2 * DWORD; + if (remaining >= DWORD) { + relativeHeaderOffset = new ZipEightByteInteger(buffer, offset); + offset += DWORD; + remaining -= DWORD; + } + if (remaining >= WORD) { + diskStart = new ZipLong(buffer, offset); + offset += WORD; + remaining -= WORD; + } + } + + public void parseFromCentralDirectoryData(byte[] buffer, int offset, + int length) + throws ZipException { + // store for processing in reparseCentralDirectoryData + rawCentralDirectoryData = new byte[length]; + System.arraycopy(buffer, offset, rawCentralDirectoryData, 0, length); + + // if there is no size information in here, we are screwed and + // can only hope things will get resolved by LFH data later + // But there are some cases that can be detected + // * all data is there + // * length == 24 -> both sizes and offset + // * length % 8 == 4 -> at least we can identify the diskStart field + if (length >= 3 * DWORD + WORD) { + parseFromLocalFileData(buffer, offset, length); + } else if (length == 3 * DWORD) { + size = new ZipEightByteInteger(buffer, offset); + offset += DWORD; + compressedSize = new ZipEightByteInteger(buffer, offset); + offset += DWORD; + relativeHeaderOffset = new ZipEightByteInteger(buffer, offset); + } else if (length % DWORD == WORD) { + diskStart = new ZipLong(buffer, offset + length - WORD); + } + } + + /** + * Parses the raw bytes read from the central directory extra + * field with knowledge which fields are expected to be there. + * All four fields inside the zip64 extended information extra + * field are optional and only present if their corresponding + * entry inside the central directory contains the correct magic + * value. + */ + public void reparseCentralDirectoryData(boolean hasUncompressedSize, + boolean hasCompressedSize, + boolean hasRelativeHeaderOffset, + boolean hasDiskStart) + throws ZipException { + if (rawCentralDirectoryData != null) { + int expectedLength = (hasUncompressedSize ? DWORD : 0) + + (hasCompressedSize ? DWORD : 0) + + (hasRelativeHeaderOffset ? DWORD : 0) + + (hasDiskStart ? WORD : 0); + if (rawCentralDirectoryData.length != expectedLength) { + throw new ZipException("central directory zip64 extended" + + " information extra field's length" + + " doesn't match central directory" + + " data. Expected length " + + expectedLength + " but is " + + rawCentralDirectoryData.length); + } + int offset = 0; + if (hasUncompressedSize) { + size = new ZipEightByteInteger(rawCentralDirectoryData, offset); + offset += DWORD; + } + if (hasCompressedSize) { + compressedSize = new ZipEightByteInteger(rawCentralDirectoryData, + offset); + offset += DWORD; + } + if (hasRelativeHeaderOffset) { + relativeHeaderOffset = + new ZipEightByteInteger(rawCentralDirectoryData, offset); + offset += DWORD; + } + if (hasDiskStart) { + diskStart = new ZipLong(rawCentralDirectoryData, offset); + offset += WORD; + } + } + } + + /** + * The uncompressed size stored in this extra field. + */ + public ZipEightByteInteger getSize() { + return size; + } + + /** + * The uncompressed size stored in this extra field. + */ + public void setSize(ZipEightByteInteger size) { + this.size = size; + } + + /** + * The compressed size stored in this extra field. + */ + public ZipEightByteInteger getCompressedSize() { + return compressedSize; + } + + /** + * The uncompressed size stored in this extra field. + */ + public void setCompressedSize(ZipEightByteInteger compressedSize) { + this.compressedSize = compressedSize; + } + + /** + * The relative header offset stored in this extra field. + */ + public ZipEightByteInteger getRelativeHeaderOffset() { + return relativeHeaderOffset; + } + + /** + * The relative header offset stored in this extra field. + */ + public void setRelativeHeaderOffset(ZipEightByteInteger rho) { + relativeHeaderOffset = rho; + } + + /** + * The disk start number stored in this extra field. + */ + public ZipLong getDiskStartNumber() { + return diskStart; + } + + /** + * The disk start number stored in this extra field. + */ + public void setDiskStartNumber(ZipLong ds) { + diskStart = ds; + } + + private int addSizes(byte[] data) { + int off = 0; + if (size != null) { + System.arraycopy(size.getBytes(), 0, data, 0, DWORD); + off += DWORD; + } + if (compressedSize != null) { + System.arraycopy(compressedSize.getBytes(), 0, data, off, DWORD); + off += DWORD; + } + return off; + } +} diff --git a/io-archive-zip/src/main/java/org/xbib/io/archive/zip/Zip64Mode.java b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/Zip64Mode.java new file mode 100644 index 0000000..3537f36 --- /dev/null +++ b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/Zip64Mode.java @@ -0,0 +1,26 @@ +package org.xbib.io.archive.zip; + +/** + * The different modes {@link ZipArchiveOutputStream} can operate in. + * + * @see ZipArchiveOutputStream#setUseZip64 + */ +public enum Zip64Mode { + /** + * Use Zip64 extensions for all entries, even if it is clear it is + * not required. + */ + Always, + /** + * Don't use Zip64 extensions for any entries. + * This will cause a {@link Zip64RequiredException} to be + * thrown if {@link ZipArchiveOutputStream} detects it needs Zip64 + * support. + */ + Never, + /** + * Use Zip64 extensions for all entries where they are required, + * don't use them for entries that clearly don't require them. + */ + AsNeeded +} diff --git a/io-archive-zip/src/main/java/org/xbib/io/archive/zip/Zip64RequiredException.java b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/Zip64RequiredException.java new file mode 100644 index 0000000..be1c05b --- /dev/null +++ b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/Zip64RequiredException.java @@ -0,0 +1,29 @@ + +package org.xbib.io.archive.zip; + +import java.util.zip.ZipException; + +/** + * Exception thrown when attempting to write data that requires Zip64 + * support to an archive and {@link ZipArchiveOutputStream#setUseZip64 + * UseZip64} has been set to {@link Zip64Mode#Never Never}. + */ +public class Zip64RequiredException extends ZipException { + + /** + * Helper to format "entry too big" messages. + */ + static String getEntryTooBigMessage(ZipArchiveEntry ze) { + return ze.getName() + "'s size exceeds the limit of 4GByte."; + } + + static final String ARCHIVE_TOO_BIG_MESSAGE = + "archive's size exceeds the limit of 4GByte."; + + static final String TOO_MANY_ENTRIES_MESSAGE = + "archive contains more than 65535 entries."; + + public Zip64RequiredException(String reason) { + super(reason); + } +} diff --git a/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipArchiveEntry.java b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipArchiveEntry.java new file mode 100644 index 0000000..1f46a4a --- /dev/null +++ b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipArchiveEntry.java @@ -0,0 +1,673 @@ +package org.xbib.io.archive.zip; + +import org.xbib.io.archive.entry.ArchiveEntry; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.zip.ZipException; + +/** + * Extension that adds better handling of extra fields and provides + * access to the internal and external file attributes. + * The extra data is expected to follow the recommendation of + * APPNOTE.txt + *
    + *
  • the extra byte array consists of a sequence of extra fields
  • + *
  • each extra fields starts by a two byte header id followed by + * a two byte sequence holding the length of the remainder of + * data.
  • + *
+ * Any extra data that cannot be parsed by the rules above will be + * consumed as "unparseable" extra data and treated differently by the + * methods of this class. + */ +public class ZipArchiveEntry extends java.util.zip.ZipEntry implements ArchiveEntry { + + public static final int PLATFORM_UNIX = 3; + + public static final int PLATFORM_FAT = 0; + + private static final int SHORT_MASK = 0xFFFF; + + private static final int SHORT_SHIFT = 16; + + /** + * The {@link java.util.zip.ZipEntry} base class only supports + * the compression methods STORED and DEFLATED. We override the + * field so that any compression methods can be used. + * The default value -1 means that the method has not been specified. + * COMPRESS-93 + */ + private int method = -1; + + /** + * The {@link java.util.zip.ZipEntry#setSize} method in the base + * class throws an IllegalArgumentException if the size is bigger + * than 2GB for Java versions < 7. Need to keep our own size + * information for Zip64 support. + */ + private long size = ArchiveEntry.SIZE_UNKNOWN; + + private int internalAttributes = 0; + + private int platform = PLATFORM_FAT; + + private long externalAttributes = 0; + + private LinkedHashMap extraFields = null; + + private UnparseableExtraFieldData unparseableExtra = null; + + private String name = null; + + private byte[] rawName = null; + + private GeneralPurposeBit gpb = new GeneralPurposeBit(); + + public ZipArchiveEntry() { + this(""); + } + + /** + * Creates a new zip entry with the specified name. + * Assumes the entry represents a directory if and only if the + * name ends with a forward slash "/". + * + * @param name the name of the entry + */ + public ZipArchiveEntry(String name) { + super(name); + setName(name); + } + + /** + * Creates a new zip entry with fields taken from the specified zip entry. + * Assumes the entry represents a directory if and only if the + * name ends with a forward slash "/". + * + * @param entry the entry to get fields from + * @throws java.util.zip.ZipException on error + */ + public ZipArchiveEntry(java.util.zip.ZipEntry entry) throws ZipException { + super(entry); + setName(entry.getName()); + byte[] extra = entry.getExtra(); + if (extra != null) { + setExtraFields(ExtraFieldUtils.parse(extra, true, + ExtraFieldUtils + .UnparseableExtraField.READ)); + } else { + // initializes extra data to an empty byte array + setExtra(); + } + setMethod(entry.getMethod()); + setEntrySize(entry.getSize()); + } + + /** + * Creates a new zip entry with fields taken from the specified zip entry. + * Assumes the entry represents a directory if and only if the + * name ends with a forward slash "/". + * + * @param entry the entry to get fields from + * @throws java.util.zip.ZipException on error + */ + public ZipArchiveEntry(ZipArchiveEntry entry) throws ZipException { + this((java.util.zip.ZipEntry) entry); + setInternalAttributes(entry.getInternalAttributes()); + setExternalAttributes(entry.getExternalAttributes()); + setExtraFields(entry.getExtraFields(true)); + } + + /** + * Creates a new zip entry taking some information from the given + * file and using the provided name. + * The name will be adjusted to end with a forward slash "/" if + * the file is a directory. If the file is not a directory a + * potential trailing forward slash will be stripped from the + * entry name. + */ + public ZipArchiveEntry(File inputFile, String entryName) { + this(inputFile.isDirectory() && !entryName.endsWith("/") ? + entryName + "/" : entryName); + if (inputFile.isFile()) { + setSize(inputFile.length()); + } + setTime(inputFile.lastModified()); + // TODO are there any other fields we can set here? + } + + /** + * Overwrite clone. + * + * @return a cloned copy of this ZipArchiveEntry + */ + @Override + public Object clone() { + ZipArchiveEntry e = (ZipArchiveEntry) super.clone(); + e.setInternalAttributes(getInternalAttributes()); + e.setExternalAttributes(getExternalAttributes()); + e.setExtraFields(getExtraFields(true)); + return e; + } + + /** + * Returns the compression method of this entry, or -1 if the + * compression method has not been specified. + * + * @return compression method + */ + @Override + public int getMethod() { + return method; + } + + /** + * Sets the compression method of this entry. + * + * @param method compression method + */ + @Override + public void setMethod(int method) { + if (method < 0) { + throw new IllegalArgumentException( + "ZIP compression method can not be negative: " + method); + } + this.method = method; + } + + /** + * Retrieves the internal file attributes. + * + * @return the internal file attributes + */ + public int getInternalAttributes() { + return internalAttributes; + } + + /** + * Sets the internal file attributes. + * + * @param value an int value + */ + public void setInternalAttributes(int value) { + internalAttributes = value; + } + + /** + * Retrieves the external file attributes. + * + * @return the external file attributes + */ + public long getExternalAttributes() { + return externalAttributes; + } + + /** + * Sets the external file attributes. + * + * @param value an long value + */ + public void setExternalAttributes(long value) { + externalAttributes = value; + } + + /** + * Sets Unix permissions in a way that is understood by Info-Zip's + * unzip command. + * + * @param mode an int value + */ + public void setUnixMode(int mode) { + // CheckStyle:MagicNumberCheck OFF - no point + setExternalAttributes((mode << SHORT_SHIFT) + // MS-DOS read-only attribute + | ((mode & 0200) == 0 ? 1 : 0) + // MS-DOS directory flag + | (isDirectory() ? 0x10 : 0)); + // CheckStyle:MagicNumberCheck ON + platform = PLATFORM_UNIX; + } + + /** + * Unix permission. + * + * @return the unix permissions + */ + public int getUnixMode() { + return platform != PLATFORM_UNIX ? 0 : + (int) ((getExternalAttributes() >> SHORT_SHIFT) & SHORT_MASK); + } + + /** + * Platform specification to put into the "version made + * by" part of the central file header. + * + * @return PLATFORM_FAT unless {@link #setUnixMode setUnixMode} + * has been called, in which case PLATORM_UNIX will be returned. + */ + public int getPlatform() { + return platform; + } + + /** + * Set the platform (UNIX or FAT). + * + * @param platform an int value - 0 is FAT, 3 is UNIX + */ + protected void setPlatform(int platform) { + this.platform = platform; + } + + /** + * Replaces all currently attached extra fields with the new array. + * + * @param fields an array of extra fields + */ + public void setExtraFields(ZipExtraField[] fields) { + extraFields = new LinkedHashMap(); + for (ZipExtraField field : fields) { + if (field instanceof UnparseableExtraFieldData) { + unparseableExtra = (UnparseableExtraFieldData) field; + } else { + extraFields.put(field.getHeaderId(), field); + } + } + setExtra(); + } + + /** + * Retrieves all extra fields that have been parsed successfully. + * + * @return an array of the extra fields + */ + public ZipExtraField[] getExtraFields() { + return getExtraFields(false); + } + + /** + * Retrieves extra fields. + * + * @param includeUnparseable whether to also return unparseable + * extra fields as {@link UnparseableExtraFieldData} if such data + * exists. + * @return an array of the extra fields + */ + public ZipExtraField[] getExtraFields(boolean includeUnparseable) { + if (extraFields == null) { + return !includeUnparseable || unparseableExtra == null + ? new ZipExtraField[0] + : new ZipExtraField[]{unparseableExtra}; + } + List result = + new ArrayList(extraFields.values()); + if (includeUnparseable && unparseableExtra != null) { + result.add(unparseableExtra); + } + return result.toArray(new ZipExtraField[0]); + } + + /** + * Adds an extra field - replacing an already present extra field + * of the same type. + * If no extra field of the same type exists, the field will be + * added as last field. + * + * @param ze an extra field + */ + public void addExtraField(ZipExtraField ze) { + if (ze instanceof UnparseableExtraFieldData) { + unparseableExtra = (UnparseableExtraFieldData) ze; + } else { + if (extraFields == null) { + extraFields = new LinkedHashMap(); + } + extraFields.put(ze.getHeaderId(), ze); + } + setExtra(); + } + + /** + * Adds an extra field - replacing an already present extra field + * of the same type. + * The new extra field will be the first one. + * + * @param ze an extra field + */ + public void addAsFirstExtraField(ZipExtraField ze) { + if (ze instanceof UnparseableExtraFieldData) { + unparseableExtra = (UnparseableExtraFieldData) ze; + } else { + LinkedHashMap copy = extraFields; + extraFields = new LinkedHashMap(); + extraFields.put(ze.getHeaderId(), ze); + if (copy != null) { + copy.remove(ze.getHeaderId()); + extraFields.putAll(copy); + } + } + setExtra(); + } + + /** + * Remove an extra field. + * + * @param type the type of extra field to remove + */ + public void removeExtraField(ZipShort type) { + if (extraFields == null) { + throw new java.util.NoSuchElementException(); + } + if (extraFields.remove(type) == null) { + throw new java.util.NoSuchElementException(); + } + setExtra(); + } + + /** + * Removes unparseable extra field data. + */ + public void removeUnparseableExtraFieldData() { + if (unparseableExtra == null) { + throw new java.util.NoSuchElementException(); + } + unparseableExtra = null; + setExtra(); + } + + /** + * Looks up an extra field by its header id. + * + * @return null if no such field exists. + */ + public ZipExtraField getExtraField(ZipShort type) { + if (extraFields != null) { + return extraFields.get(type); + } + return null; + } + + /** + * Looks up extra field data that couldn't be parsed correctly. + * + * @return null if no such field exists. + */ + public UnparseableExtraFieldData getUnparseableExtraFieldData() { + return unparseableExtra; + } + + /** + * Parses the given bytes as extra field data and consumes any + * unparseable data as an {@link UnparseableExtraFieldData} + * instance. + * + * @param extra an array of bytes to be parsed into extra fields + * @throws RuntimeException if the bytes cannot be parsed + * @throws RuntimeException on error + */ + @Override + public void setExtra(byte[] extra) throws RuntimeException { + try { + ZipExtraField[] local = + ExtraFieldUtils.parse(extra, true, + ExtraFieldUtils.UnparseableExtraField.READ); + mergeExtraFields(local, true); + } catch (ZipException e) { + throw new RuntimeException("Error parsing extra fields for entry: " + + getName() + " - " + e.getMessage(), e); + } + } + + /** + * Unfortunately {@link java.util.zip.ZipOutputStream + * java.util.zip.ZipOutputStream} seems to access the extra data + * directly, so overriding getExtra doesn't help - we need to + * modify super's data directly. + */ + protected void setExtra() { + super.setExtra(ExtraFieldUtils.mergeLocalFileDataData(getExtraFields(true))); + } + + /** + * Sets the central directory part of extra fields. + */ + public void setCentralDirectoryExtra(byte[] b) { + try { + ZipExtraField[] central = + ExtraFieldUtils.parse(b, false, + ExtraFieldUtils.UnparseableExtraField.READ); + mergeExtraFields(central, false); + } catch (ZipException e) { + throw new RuntimeException(e.getMessage(), e); + } + } + + /** + * Retrieves the extra data for the local file data. + * + * @return the extra data for local file + */ + public byte[] getLocalFileDataExtra() { + byte[] extra = getExtra(); + return extra != null ? extra : new byte[0]; + } + + /** + * Retrieves the extra data for the central directory. + * + * @return the central directory extra data + */ + public byte[] getCentralDirectoryExtra() { + return ExtraFieldUtils.mergeCentralDirectoryData(getExtraFields(true)); + } + + /** + * Get the name of the entry. + * + * @return the entry name + */ + @Override + public String getName() { + return name == null ? super.getName() : name; + } + + /** + * Is this entry a directory? + * + * @return true if the entry is a directory + */ + @Override + public boolean isDirectory() { + return getName().endsWith("/"); + } + + /** + * Set the name of the entry. + * + * @param name the name to use + */ + public ZipArchiveEntry setName(String name) { + if (name != null && getPlatform() == PLATFORM_FAT + && name.indexOf("/") == -1) { + name = name.replace('\\', '/'); + } + this.name = name; + return this; + } + + /** + * Gets the uncompressed size of the entry data. + * + * @return the entry size + */ + @Override + public long getEntrySize() { + return size; + } + + /** + * Sets the uncompressed size of the entry data. + * + * @param size the uncompressed size in bytes + * @throws IllegalArgumentException if the specified size is less + * than 0 + */ + @Override + public ZipArchiveEntry setEntrySize(long size) { + if (size < 0) { + throw new IllegalArgumentException("invalid entry size"); + } + this.size = size; + return this; + } + + /** + * Sets the name using the raw bytes and the string created from + * it by guessing or using the configured encoding. + * + * @param name the name to use created from the raw bytes using + * the guessed or configured encoding + * @param rawName the bytes originally read as name from the + * archive + */ + protected void setName(String name, byte[] rawName) { + setName(name); + this.rawName = rawName; + } + + /** + * Returns the raw bytes that made up the name before it has been + * converted using the configured or guessed encoding. + * This method will return null if this instance has not been + * read from an archive. + */ + public byte[] getRawName() { + if (rawName != null) { + byte[] b = new byte[rawName.length]; + System.arraycopy(rawName, 0, b, 0, rawName.length); + return b; + } + return null; + } + + /** + * Get the hashCode of the entry. + * This uses the name as the hashcode. + * + * @return a hashcode. + */ + @Override + public int hashCode() { + // this method has severe consequences on performance. We cannot rely + // on the super.hashCode() method since super.getName() always return + // the empty string in the current implemention (there's no setter) + // so it is basically draining the performance of a hashmap lookup + return getName().hashCode(); + } + + /** + * The "general purpose bit" field. + */ + public GeneralPurposeBit getGeneralPurposeBit() { + return gpb; + } + + /** + * The "general purpose bit" field. + */ + public void setGeneralPurposeBit(GeneralPurposeBit b) { + gpb = b; + } + + /** + * If there are no extra fields, use the given fields as new extra + * data - otherwise merge the fields assuming the existing fields + * and the new fields stem from different locations inside the + * archive. + * + * @param f the extra fields to merge + * @param local whether the new fields originate from local data + */ + private void mergeExtraFields(ZipExtraField[] f, boolean local) + throws ZipException { + if (extraFields == null) { + setExtraFields(f); + } else { + for (ZipExtraField element : f) { + ZipExtraField existing; + if (element instanceof UnparseableExtraFieldData) { + existing = unparseableExtra; + } else { + existing = getExtraField(element.getHeaderId()); + } + if (existing == null) { + addExtraField(element); + } else { + if (local) { + byte[] b = element.getLocalFileDataData(); + existing.parseFromLocalFileData(b, 0, b.length); + } else { + byte[] b = element.getCentralDirectoryData(); + existing.parseFromCentralDirectoryData(b, 0, b.length); + } + } + } + setExtra(); + } + } + + public ZipArchiveEntry setLastModified(Date date) { + setTime(date.getTime()); + return this; + } + + public Date getLastModified() { + return new Date(getTime()); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + ZipArchiveEntry other = (ZipArchiveEntry) obj; + String myName = getName(); + String otherName = other.getName(); + if (myName == null) { + if (otherName != null) { + return false; + } + } else if (!myName.equals(otherName)) { + return false; + } + String myComment = getComment(); + String otherComment = other.getComment(); + if (myComment == null) { + if (otherComment != null) { + return false; + } + } else if (!myComment.equals(otherComment)) { + return false; + } + return getTime() == other.getTime() + && getInternalAttributes() == other.getInternalAttributes() + && getPlatform() == other.getPlatform() + && getExternalAttributes() == other.getExternalAttributes() + && getMethod() == other.getMethod() + && getEntrySize() == other.getEntrySize() + && getCrc() == other.getCrc() + && getCompressedSize() == other.getCompressedSize() + && Arrays.equals(getCentralDirectoryExtra(), + other.getCentralDirectoryExtra()) + && Arrays.equals(getLocalFileDataExtra(), + other.getLocalFileDataExtra()) + && gpb.equals(other.gpb); + } +} diff --git a/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipArchiveInputStream.java b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipArchiveInputStream.java new file mode 100644 index 0000000..648b1d9 --- /dev/null +++ b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipArchiveInputStream.java @@ -0,0 +1,748 @@ +package org.xbib.io.archive.zip; + +import org.xbib.io.archive.entry.ArchiveEntry; +import org.xbib.io.archive.stream.ArchiveInputStream; +import org.xbib.io.archive.entry.ArchiveEntryEncoding; +import org.xbib.io.archive.entry.ArchiveEntryEncodingHelper; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; +import java.io.PushbackInputStream; +import java.util.zip.CRC32; +import java.util.zip.DataFormatException; +import java.util.zip.Inflater; +import java.util.zip.ZipEntry; +import java.util.zip.ZipException; + +import static org.xbib.io.archive.zip.ZipConstants.DWORD; +import static org.xbib.io.archive.zip.ZipConstants.SHORT; +import static org.xbib.io.archive.zip.ZipConstants.WORD; +import static org.xbib.io.archive.zip.ZipConstants.ZIP64_MAGIC; + +/** + * Implements an input stream that can read Zip archives. + * Note that {@link ZipArchiveEntry#getSize()} may return -1 if the + * DEFLATE algorithm is used, as the size information is not available + * from the header. + * The {@link ZipFile} class is preferred when reading from files. + * This code transparently supports Zip64 + * extensions and thus individual entries and archives larger than 4 + * GB or with more than 65536 entries. + * + * @see ZipFile + */ +public class ZipArchiveInputStream extends ArchiveInputStream { + + /** + * The zip encoding to use for filenames and the file comment. + */ + private final ArchiveEntryEncoding archiveEntryEncoding; + + /** + * Whether to look for and use Unicode extra fields. + */ + private final boolean useUnicodeExtraFields; + + /** + * Wrapped stream, will always be a PushbackInputStream. + */ + private final InputStream in; + + /** + * Inflater used for all deflated entries. + */ + private final Inflater inf = new Inflater(true); + + /** + * Calculates checkusms for all entries. + */ + private final CRC32 crc = new CRC32(); + + /** + * Buffer used to read from the wrapped stream. + */ + private final Buffer buf = new Buffer(); + /** + * The entry that is currently being read. + */ + private CurrentEntry current = null; + /** + * Whether the stream has been closed. + */ + private boolean closed = false; + /** + * Whether the stream has reached the central directory - and thus + * found all entries. + */ + private boolean hitCentralDirectory = false; + /** + * When reading a stored entry that uses the data descriptor this + * stream has to read the full entry and caches it. This is the + * cache. + */ + private ByteArrayInputStream lastStoredEntry = null; + + /** + * Whether the stream will try to read STORED entries that use a + * data descriptor. + */ + private boolean allowStoredEntriesWithDataDescriptor = false; + + private static final int LFH_LEN = 30; + /* + local file header signature 4 bytes (0x04034b50) + version needed to extract 2 bytes + general purpose bit flag 2 bytes + compression method 2 bytes + last mod file time 2 bytes + last mod file date 2 bytes + crc-32 4 bytes + compressed size 4 bytes + uncompressed size 4 bytes + file name length 2 bytes + extra field length 2 bytes + */ + + private static final long TWO_EXP_32 = ZIP64_MAGIC + 1; + + public ZipArchiveInputStream(InputStream inputStream) { + this(inputStream, ArchiveEntryEncodingHelper.UTF8, true); + } + + /** + * @param encoding the encoding to use for file names, use null + * for the platform's default encoding + * @param useUnicodeExtraFields whether to use InfoZIP Unicode + * Extra Fields (if present) to set the file names. + */ + public ZipArchiveInputStream(InputStream inputStream, + String encoding, + boolean useUnicodeExtraFields) { + this(inputStream, encoding, useUnicodeExtraFields, false); + } + + /** + * @param encoding the encoding to use for file names, use null + * for the platform's default encoding + * @param useUnicodeExtraFields whether to use InfoZIP Unicode + * Extra Fields (if present) to set the file names. + * @param allowStoredEntriesWithDataDescriptor whether the stream + * will try to read STORED entries that use a data descriptor + */ + public ZipArchiveInputStream(InputStream inputStream, + String encoding, + boolean useUnicodeExtraFields, + boolean allowStoredEntriesWithDataDescriptor) { + archiveEntryEncoding = ArchiveEntryEncodingHelper.getEncoding(encoding); + this.useUnicodeExtraFields = useUnicodeExtraFields; + in = new PushbackInputStream(inputStream, buf.buf.length); + this.allowStoredEntriesWithDataDescriptor = + allowStoredEntriesWithDataDescriptor; + } + + public org.xbib.io.archive.zip.ZipArchiveEntry getNextZipEntry() throws IOException { + if (closed || hitCentralDirectory) { + return null; + } + if (current != null) { + closeEntry(); + } + byte[] lfh = new byte[LFH_LEN]; + try { + readFully(lfh); + } catch (EOFException e) { + return null; + } + ZipLong sig = new ZipLong(lfh); + if (sig.equals(ZipLong.CFH_SIG)) { + hitCentralDirectory = true; + return null; + } + if (!sig.equals(ZipLong.LFH_SIG)) { + return null; + } + + int off = WORD; + current = new CurrentEntry(); + + int versionMadeBy = ZipShort.getValue(lfh, off); + off += SHORT; + current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) + & ZipFile.NIBLET_MASK); + + final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfh, off); + final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); + final ArchiveEntryEncoding entryEncoding = + hasUTF8Flag ? ArchiveEntryEncodingHelper.UTF8_ENCODING : archiveEntryEncoding; + current.hasDataDescriptor = gpFlag.usesDataDescriptor(); + current.entry.setGeneralPurposeBit(gpFlag); + + off += SHORT; + + current.entry.setMethod(ZipShort.getValue(lfh, off)); + off += SHORT; + + long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfh, off)); + current.entry.setTime(time); + off += WORD; + + ZipLong size = null, cSize = null; + if (!current.hasDataDescriptor) { + current.entry.setCrc(ZipLong.getValue(lfh, off)); + off += WORD; + + cSize = new ZipLong(lfh, off); + off += WORD; + + size = new ZipLong(lfh, off); + off += WORD; + } else { + off += 3 * WORD; + } + + int fileNameLen = ZipShort.getValue(lfh, off); + + off += SHORT; + + int extraLen = ZipShort.getValue(lfh, off); + off += SHORT; + + byte[] fileName = new byte[fileNameLen]; + readFully(fileName); + current.entry.setName(entryEncoding.decode(fileName), fileName); + + byte[] extraData = new byte[extraLen]; + readFully(extraData); + current.entry.setExtra(extraData); + + if (!hasUTF8Flag && useUnicodeExtraFields) { + ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, + null); + } + + processZip64Extra(size, cSize); + return current.entry; + } + + /** + * Records whether a Zip64 extra is present and sets the size + * information from it if sizes are 0xFFFFFFFF and the entry + * doesn't use a data descriptor. + */ + private void processZip64Extra(ZipLong size, ZipLong cSize) { + Zip64ExtendedInformationExtraField z64 = + (Zip64ExtendedInformationExtraField) + current.entry.getExtraField(Zip64ExtendedInformationExtraField + .HEADER_ID); + current.usesZip64 = z64 != null; + if (!current.hasDataDescriptor) { + if (current.usesZip64 && (cSize.equals(ZipLong.ZIP64_MAGIC) + || size.equals(ZipLong.ZIP64_MAGIC)) + ) { + current.entry.setCompressedSize(z64.getCompressedSize() // z64 cannot be null here + .getLongValue()); + current.entry.setSize(z64.getSize().getLongValue()); + } else { + current.entry.setCompressedSize(cSize.getValue()); + current.entry.setSize(size.getValue()); + } + } + } + + @Override + public ArchiveEntry getNextEntry() throws IOException { + return getNextZipEntry(); + } + + @Override + public int read(byte[] buffer, int start, int length) throws IOException { + if (closed) { + throw new IOException("The stream is closed"); + } + if (inf.finished() || current == null) { + return -1; + } + + // avoid int overflow, check null buffer + if (start <= buffer.length && length >= 0 && start >= 0 + && buffer.length - start >= length) { + ZipUtil.checkRequestedFeatures(current.entry); + if (!supportsDataDescriptorFor(current.entry)) { + throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException + .Feature + .DATA_DESCRIPTOR, + current.entry); + } + + if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { + return readStored(buffer, start, length); + } + return readDeflated(buffer, start, length); + } + throw new ArrayIndexOutOfBoundsException(); + } + + /** + * Implementation of read for STORED entries. + */ + private int readStored(byte[] buffer, int start, int length) + throws IOException { + + if (current.hasDataDescriptor) { + if (lastStoredEntry == null) { + readStoredEntry(); + } + return lastStoredEntry.read(buffer, start, length); + } + + long csize = current.entry.getSize(); + if (current.bytesRead >= csize) { + return -1; + } + + if (buf.offsetInBuffer >= buf.lengthOfLastRead) { + buf.offsetInBuffer = 0; + if ((buf.lengthOfLastRead = in.read(buf.buf)) == -1) { + return -1; + } + current.bytesReadFromStream += buf.lengthOfLastRead; + } + + int toRead = length > buf.lengthOfLastRead + ? buf.lengthOfLastRead - buf.offsetInBuffer + : length; + if ((csize - current.bytesRead) < toRead) { + // if it is smaller than toRead then it fits into an int + toRead = (int) (csize - current.bytesRead); + } + System.arraycopy(buf.buf, buf.offsetInBuffer, buffer, start, toRead); + buf.offsetInBuffer += toRead; + current.bytesRead += toRead; + crc.update(buffer, start, toRead); + return toRead; + } + + /** + * Implementation of read for DEFLATED entries. + */ + private int readDeflated(byte[] buffer, int start, int length) + throws IOException { + if (inf.needsInput()) { + fill(); + if (buf.lengthOfLastRead > 0) { + current.bytesReadFromStream += buf.lengthOfLastRead; + } + } + int read = 0; + try { + read = inf.inflate(buffer, start, length); + } catch (DataFormatException e) { + throw new ZipException(e.getMessage()); + } + if (read == 0) { + if (inf.finished()) { + return -1; + } else if (buf.lengthOfLastRead == -1) { + throw new IOException("Truncated ZIP file"); + } + } + crc.update(buffer, start, read); + return read; + } + + @Override + public void close() throws IOException { + if (!closed) { + closed = true; + in.close(); + inf.end(); + } + } + + /** + * Skips over and discards value bytes of data from this input + * stream. + * This implementation may end up skipping over some smaller + * number of bytes, possibly 0, if and only if it reaches the end + * of the underlying stream. + * The actual number of bytes skipped is returned. + * + * @param value the number of bytes to be skipped. + * @return the actual number of bytes skipped. + * @throws java.io.IOException - if an I/O error occurs. + * @throws IllegalArgumentException - if value is negative. + */ + @Override + public long skip(long value) throws IOException { + if (value >= 0) { + long skipped = 0; + byte[] b = new byte[1024]; + while (skipped < value) { + long rem = value - skipped; + int x = read(b, 0, (int) (b.length > rem ? rem : b.length)); + if (x == -1) { + return skipped; + } + skipped += x; + } + return skipped; + } + throw new IllegalArgumentException(); + } + + /** + * Closes the current ZIP archive entry and positions the underlying + * stream to the beginning of the next entry. All per-entry variables + * and data structures are cleared. + * If the compressed size of this entry is included in the entry header, + * then any outstanding bytes are simply skipped from the underlying + * stream without uncompressing them. This allows an entry to be safely + * closed even if the compression method is unsupported. + * In case we don't know the compressed size of this entry or have + * already buffered too much data from the underlying stream to support + * uncompression, then the uncompression process is completed and the + * end position of the stream is adjusted based on the result of that + * process. + * + * @throws java.io.IOException if an error occurs + */ + private void closeEntry() throws IOException { + if (closed) { + throw new IOException("The stream is closed"); + } + if (current == null) { + return; + } + + // Ensure all entry bytes are read + if (current.bytesReadFromStream <= current.entry.getCompressedSize() + && !current.hasDataDescriptor) { + drainCurrentEntryData(); + } else { + skip(Long.MAX_VALUE); + + long inB = + current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED + ? getBytesInflated() : current.bytesRead; + + // this is at most a single read() operation and can't + // exceed the range of int + int diff = (int) (current.bytesReadFromStream - inB); + + // Pushback any required bytes + if (diff > 0) { + pushback(buf.buf, buf.lengthOfLastRead - diff, diff); + } + } + + if (lastStoredEntry == null && current.hasDataDescriptor) { + readDataDescriptor(); + } + + inf.reset(); + buf.reset(); + crc.reset(); + current = null; + lastStoredEntry = null; + } + + /** + * Read all data of the current entry from the underlying stream + * that hasn't been read, yet. + */ + private void drainCurrentEntryData() throws IOException { + long remaining = current.entry.getCompressedSize() + - current.bytesReadFromStream; + while (remaining > 0) { + long n = in.read(buf.buf, 0, (int) Math.min(buf.buf.length, + remaining)); + if (n < 0) { + throw new EOFException( + "Truncated ZIP entry: " + current.entry.getName()); + } else { + remaining -= n; + } + } + } + + /** + * Get the number of bytes Inflater has actually processed. + * for Java < Java7 the getBytes* methods in + * Inflater/Deflater seem to return unsigned ints rather than + * longs that start over with 0 at 2^32. + * The stream knows how many bytes it has read, but not how + * many the Inflater actually consumed - it should be between the + * total number of bytes read for the entry and the total number + * minus the last read operation. Here we just try to make the + * value close enough to the bytes we've read by assuming the + * number of bytes consumed must be smaller than (or equal to) the + * number of bytes read but not smaller by more than 2^32. + */ + private long getBytesInflated() { + long inB = inf.getBytesRead(); + if (current.bytesReadFromStream >= TWO_EXP_32) { + while (inB + TWO_EXP_32 <= current.bytesReadFromStream) { + inB += TWO_EXP_32; + } + } + return inB; + } + + private void fill() throws IOException { + if (closed) { + throw new IOException("The stream is closed"); + } + if ((buf.lengthOfLastRead = in.read(buf.buf)) > 0) { + inf.setInput(buf.buf, 0, buf.lengthOfLastRead); + } + } + + private void readFully(byte[] b) throws IOException { + int count = 0, x = 0; + while (count != b.length) { + count += x = in.read(b, count, b.length - count); + if (x == -1) { + throw new EOFException(); + } + } + } + + private void readDataDescriptor() throws IOException { + byte[] b = new byte[WORD]; + readFully(b); + ZipLong val = new ZipLong(b); + if (ZipLong.DD_SIG.equals(val)) { + // data descriptor with signature, skip sig + readFully(b); + val = new ZipLong(b); + } + current.entry.setCrc(val.getValue()); + + // if there is a ZIP64 extra field, sizes are eight bytes + // each, otherwise four bytes each. Unfortunately some + // implementations - namely Java7 - use eight bytes without + // using a ZIP64 extra field - + // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588 + + // just read 16 bytes and check whether bytes nine to twelve + // look like one of the signatures of what could follow a data + // descriptor (ignoring archive decryption headers for now). + // If so, push back eight bytes and assume sizes are four + // bytes, otherwise sizes are eight bytes each. + b = new byte[2 * DWORD]; + readFully(b); + ZipLong potentialSig = new ZipLong(b, DWORD); + if (potentialSig.equals(ZipLong.CFH_SIG) + || potentialSig.equals(ZipLong.LFH_SIG)) { + pushback(b, DWORD, DWORD); + current.entry.setCompressedSize(ZipLong.getValue(b)); + current.entry.setSize(ZipLong.getValue(b, WORD)); + } else { + current.entry + .setCompressedSize(ZipEightByteInteger.getLongValue(b)); + current.entry.setSize(ZipEightByteInteger.getLongValue(b, DWORD)); + } + } + + /** + * Whether this entry requires a data descriptor this library can work with. + * + * @return true if allowStoredEntriesWithDataDescriptor is true, + * the entry doesn't require any data descriptor or the method is + * DEFLATED. + */ + private boolean supportsDataDescriptorFor(ZipArchiveEntry entry) { + return allowStoredEntriesWithDataDescriptor || + !entry.getGeneralPurposeBit().usesDataDescriptor() + || entry.getMethod() == ZipEntry.DEFLATED; + } + + /** + * Caches a stored entry that uses the data descriptor. + *
    + *
  • Reads a stored entry until the signature of a local file + * header, central directory header or data descriptor has been + * found.
  • + *
  • Stores all entry data in lastStoredEntry. + *
  • Rewinds the stream to position at the data + * descriptor.
  • + *
  • reads the data descriptor
  • + *
+ * After calling this method the entry should know its size, + * the entry's data is cached and the stream is positioned at the + * next local file or central directory header. + */ + private void readStoredEntry() throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + int off = 0; + boolean done = false; + + // length of DD without signature + int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD; + + while (!done) { + int r = in.read(buf.buf, off, + ZipArchiveOutputStream.BUFFER_SIZE - off); + if (r <= 0) { + // read the whole archive without ever finding a + // central directory + throw new IOException("Truncated ZIP file"); + } + if (r + off < 4) { + // buf is too small to check for a signature, loop + off += r; + continue; + } + + done = bufferContainsSignature(bos, off, r, ddLen); + if (!done) { + off = cacheBytesRead(bos, off, r, ddLen); + } + } + + byte[] b = bos.toByteArray(); + lastStoredEntry = new ByteArrayInputStream(b); + } + + private static final byte[] LFH = ZipLong.LFH_SIG.getBytes(); + private static final byte[] CFH = ZipLong.CFH_SIG.getBytes(); + private static final byte[] DD = ZipLong.DD_SIG.getBytes(); + + /** + * Checks whether the current buffer contains the signature of a + * "data decsriptor", "local file header" or + * "central directory entry". + * If it contains such a signature, reads the data descriptor + * and positions the stream right after the data descriptor. + */ + private boolean bufferContainsSignature(ByteArrayOutputStream bos, + int offset, int lastRead, + int expectedDDLen) throws IOException { + boolean done = false; + int readTooMuch = 0; + for (int i = 0; !done && i < lastRead - 4; i++) { + if (buf.buf[i] == LFH[0] && buf.buf[i + 1] == LFH[1]) { + if ((buf.buf[i + 2] == LFH[2] && buf.buf[i + 3] == LFH[3]) + || (buf.buf[i] == CFH[2] && buf.buf[i + 3] == CFH[3])) { + // found a LFH or CFH: + readTooMuch = offset + lastRead - i - expectedDDLen; + done = true; + } else if (buf.buf[i + 2] == DD[2] && buf.buf[i + 3] == DD[3]) { + // found DD: + readTooMuch = offset + lastRead - i; + done = true; + } + if (done) { + // * push back bytes read in excess as well as the data + // descriptor + // * copy the remaining bytes to cache + // * read data descriptor + pushback(buf.buf, offset + lastRead - readTooMuch, + readTooMuch); + bos.write(buf.buf, 0, i); + readDataDescriptor(); + } + } + } + return done; + } + + /** + * If the last read bytes could hold a data descriptor and an + * incomplete signature then save the last bytes to the front of + * the buffer and cache everything in front of the potential data + * descriptor into the given ByteArrayOutputStream. + * Data descriptor plus incomplete signature (3 bytes in the + * worst case) can be 20 bytes max. + */ + private int cacheBytesRead(ByteArrayOutputStream bos, int offset, + int lastRead, int expecteDDLen) { + final int cacheable = offset + lastRead - expecteDDLen - 3; + if (cacheable > 0) { + bos.write(buf.buf, 0, cacheable); + System.arraycopy(buf.buf, cacheable, buf.buf, 0, + expecteDDLen + 3); + offset = expecteDDLen + 3; + } else { + offset += lastRead; + } + return offset; + } + + private void pushback(byte[] buf, int offset, int length) + throws IOException { + ((PushbackInputStream) in).unread(buf, offset, length); + pushedBackBytes(length); + } + + /** + * Decrements the counter of already read bytes. + * + * @param pushedBack the number of bytes pushed back. + */ + protected void pushedBackBytes(long pushedBack) { + } + + /** + * Structure collecting information for the entry that is + * currently being read. + */ + private static final class CurrentEntry { + /** + * Current ZIP entry. + */ + private final ZipArchiveEntry entry = new ZipArchiveEntry(); + /** + * Does the entry use a data descriptor? + */ + private boolean hasDataDescriptor; + /** + * Does the entry have a ZIP64 extended information extra field. + */ + private boolean usesZip64; + /** + * Number of bytes of entry content read by the client if the + * entry is STORED. + */ + private long bytesRead; + /** + * Number of bytes of entry content read so from the stream. + * This may be more than the actual entry's length as some + * stuff gets buffered up and needs to be pushed back when the + * end of the entry has been reached. + */ + private long bytesReadFromStream; + } + + /** + * Contains a temporary buffer used to read from the wrapped + * stream together with some information needed for internal + * housekeeping. + */ + private static final class Buffer { + /** + * Buffer used as temporary buffer when reading from the stream. + */ + private final byte[] buf = new byte[ZipArchiveOutputStream.BUFFER_SIZE]; + /** + * {@link #buf buf} may contain data the client hasnt read, yet, + * this is the first byte that hasn't been read so far. + */ + private int offsetInBuffer = 0; + /** + * Number of bytes read from the wrapped stream into {@link #buf + * buf} with the last read operation. + */ + private int lengthOfLastRead = 0; + + /** + * Reset internal housekeeping. + */ + private void reset() { + offsetInBuffer = lengthOfLastRead = 0; + } + } +} diff --git a/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipArchiveOutputStream.java b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipArchiveOutputStream.java new file mode 100644 index 0000000..1dbfa13 --- /dev/null +++ b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipArchiveOutputStream.java @@ -0,0 +1,1418 @@ +package org.xbib.io.archive.zip; + +import org.xbib.io.archive.stream.ArchiveOutputStream; +import org.xbib.io.archive.entry.ArchiveEntryEncoding; +import org.xbib.io.archive.entry.ArchiveEntryEncodingHelper; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.zip.CRC32; +import java.util.zip.Deflater; +import java.util.zip.ZipException; + +import static org.xbib.io.archive.zip.ZipConstants.DATA_DESCRIPTOR_MIN_VERSION; +import static org.xbib.io.archive.zip.ZipConstants.DWORD; +import static org.xbib.io.archive.zip.ZipConstants.INITIAL_VERSION; +import static org.xbib.io.archive.zip.ZipConstants.SHORT; +import static org.xbib.io.archive.zip.ZipConstants.WORD; +import static org.xbib.io.archive.zip.ZipConstants.ZIP64_MAGIC; +import static org.xbib.io.archive.zip.ZipConstants.ZIP64_MAGIC_SHORT; +import static org.xbib.io.archive.zip.ZipConstants.ZIP64_MIN_VERSION; + +/** + * Reimplementation of {@link java.util.zip.ZipOutputStream + * java.util.zip.ZipOutputStream} that does handle the extended + * functionality of this package, especially internal/external file + * attributes and extra fields with different layouts for local file + * data and central directory entries. + * This class will try to use {@link java.io.RandomAccessFile + * RandomAccessFile} when you know that the output is going to go to a + * file. + * If RandomAccessFile cannot be used, this implementation will use + * a Data Descriptor to store size and CRC information for {@link + * #DEFLATED DEFLATED} entries, this means, you don't need to + * calculate them yourself. Unfortunately this is not possible for + * the {@link #STORED STORED} method, here setting the CRC and + * uncompressed size information is required before {@link + * #putArchiveEntry(ZipArchiveEntry)} can be called. + * It transparently supports Zip64 + * extensions and thus individual entries and archives larger than 4 + * GB or with more than 65536 entries in most cases but explicit + * control is provided via {@link #setUseZip64}. If the stream can not + * user RandomAccessFile and you try to write a ZipArchiveEntry of + * unknown size then Zip64 extensions will be disabled by default. + */ +public class ZipArchiveOutputStream extends ArchiveOutputStream { + + static final int BUFFER_SIZE = 512; + + /** + * indicates if this archive is finished. protected for use in Jar implementation + */ + protected boolean finished = false; + + /* + * Apparently Deflater.setInput gets slowed down a lot on Sun JVMs + * when it gets handed a really big buffer. See + * https://issues.apache.org/bugzilla/show_bug.cgi?id=45396 + * + * Using a buffer size of 8 kB proved to be a good compromise + */ + private static final int DEFLATER_BLOCK_SIZE = 8192; + + /** + * Compression method for deflated entries. + */ + public static final int DEFLATED = java.util.zip.ZipEntry.DEFLATED; + + /** + * Default compression level for deflated entries. + */ + public static final int DEFAULT_COMPRESSION = Deflater.DEFAULT_COMPRESSION; + + /** + * Compression method for stored entries. + */ + public static final int STORED = java.util.zip.ZipEntry.STORED; + + /** + * local file header signature + */ + public static final byte[] LFH_SIG = ZipLong.LFH_SIG.getBytes(); + /** + * data descriptor signature + */ + public static final byte[] DD_SIG = ZipLong.DD_SIG.getBytes(); + /** + * central file header signature + */ + public static final byte[] CFH_SIG = ZipLong.CFH_SIG.getBytes(); + /** + * end of central dir signature + */ + public static final byte[] EOCD_SIG = ZipLong.getBytes(0X06054B50L); + /** + * ZIP64 end of central dir signature + */ + public static final byte[] ZIP64_EOCD_SIG = ZipLong.getBytes(0X06064B50L); + /** + * ZIP64 end of central dir locator signature + */ + public static final byte[] ZIP64_EOCD_LOC_SIG = ZipLong.getBytes(0X07064B50L); + + /** + * default encoding for file names and comment. + */ + static final String DEFAULT_ENCODING = ArchiveEntryEncodingHelper.UTF8; + + /** + * Current entry. + */ + private CurrentEntry entry; + + /** + * The file comment. + */ + private String comment = ""; + + /** + * Compression level for next entry. + */ + private int level = DEFAULT_COMPRESSION; + + /** + * Has the compression level changed when compared to the last + * entry? + */ + private boolean hasCompressionLevelChanged = false; + + /** + * Default compression method for next entry. + */ + private int method = java.util.zip.ZipEntry.DEFLATED; + + /** + * List of ZipArchiveEntries written so far. + */ + private final List entries = + new LinkedList(); + + /** + * CRC instance to avoid parsing DEFLATED data twice. + */ + private final CRC32 crc = new CRC32(); + + /** + * Count the bytes written to out. + */ + private long written = 0; + + /** + * Start of central directory. + */ + private long cdOffset = 0; + + /** + * Length of central directory. + */ + private long cdLength = 0; + + /** + * Helper, a 0 as ZipShort. + */ + private static final byte[] ZERO = {0, 0}; + + /** + * Helper, a 0 as ZipLong. + */ + private static final byte[] LZERO = {0, 0, 0, 0}; + + /** + * Holds the offsets of the LFH starts for each entry. + */ + private final Map offsets = + new HashMap(); + + /** + * The encoding to use for filenames and the file comment. + *

+ *

For a list of possible values see http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html. + * Defaults to UTF-8.

+ */ + private String encoding = DEFAULT_ENCODING; + + /** + * The zip encoding to use for filenames and the file comment. + *

+ * This field is of internal use and will be set in {@link + * #setEncoding(String)}. + */ + private ArchiveEntryEncoding archiveEntryEncoding = + ArchiveEntryEncodingHelper.getEncoding(DEFAULT_ENCODING); + + /** + * This Deflater object is used for output. + */ + protected final Deflater def = new Deflater(level, true); + + /** + * This buffer servers as a Deflater. + */ + private final byte[] buf = new byte[BUFFER_SIZE]; + + /** + * Optional random access output. + */ + private final RandomAccessFile raf; + + private final OutputStream out; + + /** + * whether to use the general purpose bit flag when writing UTF-8 + * filenames or not. + */ + private boolean useUTF8Flag = true; + + /** + * Whether to encode non-encodable file names as UTF-8. + */ + private boolean fallbackToUTF8 = false; + + /** + * whether to create UnicodePathExtraField-s for each entry. + */ + private UnicodeExtraFieldPolicy createUnicodeExtraFields = UnicodeExtraFieldPolicy.NEVER; + + /** + * Whether anything inside this archive has used a ZIP64 feature. + */ + private boolean hasUsedZip64 = false; + + private Zip64Mode zip64Mode = Zip64Mode.AsNeeded; + + /** + * Creates a new ZIP OutputStream filtering the underlying stream. + * + * @param out the outputstream to zip + */ + public ZipArchiveOutputStream(OutputStream out) { + this.out = out; + this.raf = null; + } + + /** + * Creates a new ZIP OutputStream writing to a File. Will use + * random access if possible. + * + * @param file the file to zip to + * @throws java.io.IOException on error + */ + public ZipArchiveOutputStream(File file) throws IOException { + OutputStream o = null; + RandomAccessFile _raf = null; + try { + _raf = new RandomAccessFile(file, "rw"); + _raf.setLength(0); + } catch (IOException e) { + if (_raf != null) { + try { + _raf.close(); + } catch (IOException inner) { // NOPMD + // ignore + } + _raf = null; + } + o = new FileOutputStream(file); + } + out = o; + raf = _raf; + } + + /** + * This method indicates whether this archive is writing to a + * seekable stream (i.e., to a random access file). + * For seekable streams, you don't need to calculate the CRC or + * uncompressed size for {@link #STORED} entries before + * invoking {@link #putArchiveEntry(ZipArchiveEntry)}. + * + * @return true if seekable + */ + public boolean isSeekable() { + return raf != null; + } + + /** + * The encoding to use for filenames and the file comment. + * For a list of possible values see + * http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html + * Defaults to UTF-8. + * + * @param encoding the encoding to use for file names, use null + * for the platform's default encoding + */ + public void setEncoding(final String encoding) { + this.encoding = encoding; + this.archiveEntryEncoding = ArchiveEntryEncodingHelper.getEncoding(encoding); + if (useUTF8Flag && !ArchiveEntryEncodingHelper.isUTF8(encoding)) { + useUTF8Flag = false; + } + } + + /** + * The encoding to use for filenames and the file comment. + * + * @return null if using the platform's default character encoding. + */ + public String getEncoding() { + return encoding; + } + + /** + * Whether to set the language encoding flag if the file name + * encoding is UTF-8. Defaults to true. + */ + public void setUseLanguageEncodingFlag(boolean b) { + useUTF8Flag = b && ArchiveEntryEncodingHelper.isUTF8(encoding); + } + + /** + * Whether to create Unicode Extra Fields. Defaults to NEVER. + */ + public void setCreateUnicodeExtraFields(UnicodeExtraFieldPolicy b) { + createUnicodeExtraFields = b; + } + + /** + * Whether to fall back to UTF and the language encoding flag if + * the file name cannot be encoded using the specified encoding. + * Defaults to false. + */ + public void setFallbackToUTF8(boolean b) { + fallbackToUTF8 = b; + } + + /** + * Whether Zip64 extensions will be used. + * When setting the mode to {@link Zip64Mode#Never Never}, + * {@link #putArchiveEntry}, {@link #closeArchiveEntry}, {@link + * #finish} or {@link #close} may throw a {@link + * Zip64RequiredException} if the entry's size or the total size + * of the archive exceeds 4GB or there are more than 65536 entries + * inside the archive. Any archive created in this mode will be + * readable by implementations that don't support Zip64. + * When setting the mode to {@link Zip64Mode#Always Always}, + * Zip64 extensions will be used for all entries. Any archive + * created in this mode may be unreadable by implementations that + * don't support Zip64 even if all its contents would be. + * When setting the mode to {@link Zip64Mode#AsNeeded + * AsNeeded}, Zip64 extensions will transparently be used for + * those entries that require them. This mode can only be used if + * the uncompressed size of the {@link ZipArchiveEntry} is known + * when calling {@link #putArchiveEntry} or the archive is written + * to a seekable output (i.e. you have used the {@link + * #ZipArchiveOutputStream(java.io.File) File-arg constructor}) - + * this mode is not valid when the output stream is not seekable + * and the uncompressed size is unknown when {@link + * #putArchiveEntry} is called. + * If no entry inside the resulting archive requires Zip64 + * extensions then {@link Zip64Mode#Never Never} will create the + * smallest archive. {@link Zip64Mode#AsNeeded AsNeeded} will + * create a slightly bigger archive if the uncompressed size of + * any entry has initially been unknown and create an archive + * identical to {@link Zip64Mode#Never Never} otherwise. {@link + * Zip64Mode#Always Always} will create an archive that is at + * least 24 bytes per entry bigger than the one {@link + * Zip64Mode#Never Never} would create. + * Defaults to {@link Zip64Mode#AsNeeded AsNeeded} unless + * {@link #putArchiveEntry} is called with an entry of unknown + * size and data is written to a non-seekable stream - in this + * case the default is {@link Zip64Mode#Never Never}. + */ + public void setUseZip64(Zip64Mode mode) { + zip64Mode = mode; + } + + /** + * @throws Zip64RequiredException if the archive's size exceeds 4 + * GByte or there are more than 65535 entries inside the archive + * and {@link #setUseZip64} is {@link Zip64Mode#Never}. + */ + @Override + public void finish() throws IOException { + if (finished) { + throw new IOException("This archive has already been finished"); + } + + if (entry != null) { + throw new IOException("This archives contains unclosed entries."); + } + + cdOffset = written; + for (ZipArchiveEntry ze : entries) { + writeCentralFileHeader(ze); + } + cdLength = written - cdOffset; + writeZip64CentralDirectory(); + writeCentralDirectoryEnd(); + offsets.clear(); + entries.clear(); + def.end(); + finished = true; + } + + /** + * Writes all necessary data for this entry. + * + * @throws java.io.IOException on error + * @throws Zip64RequiredException if the entry's uncompressed or + * compressed size exceeds 4 GByte and {@link #setUseZip64} + * is {@link Zip64Mode#Never}. + */ + @Override + public void closeArchiveEntry() throws IOException { + if (finished) { + throw new IOException("Stream has already been finished"); + } + + if (entry == null) { + throw new IOException("No current entry to close"); + } + + if (!entry.hasWritten) { + write(new byte[0], 0, 0); + } + + flushDeflater(); + + final Zip64Mode effectiveMode = getEffectiveZip64Mode(entry.entry); + long bytesWritten = written - entry.dataStart; + long realCrc = crc.getValue(); + crc.reset(); + + final boolean actuallyNeedsZip64 = + handleSizesAndCrc(bytesWritten, realCrc, effectiveMode); + + if (raf != null) { + rewriteSizesAndCrc(actuallyNeedsZip64); + } + + writeDataDescriptor(entry.entry); + entry = null; + } + + /** + * Ensures all bytes sent to the deflater are written to the stream. + */ + private void flushDeflater() throws IOException { + if (entry.entry.getMethod() == DEFLATED) { + def.finish(); + while (!def.finished()) { + deflate(); + } + } + } + + /** + * Ensures the current entry's size and CRC information is set to + * the values just written, verifies it isn't too big in the + * Zip64Mode.Never case and returns whether the entry would + * require a Zip64 extra field. + */ + private boolean handleSizesAndCrc(long bytesWritten, long crc, + Zip64Mode effectiveMode) + throws ZipException { + if (entry.entry.getMethod() == DEFLATED) { + /* It turns out def.getBytesRead() returns wrong values if + * the size exceeds 4 GB on Java < Java7 + entry.entry.setSize(def.getBytesRead()); + */ + entry.entry.setSize(entry.bytesRead); + entry.entry.setCompressedSize(bytesWritten); + entry.entry.setCrc(crc); + + def.reset(); + } else if (raf == null) { + if (entry.entry.getCrc() != crc) { + throw new ZipException("bad CRC checksum for entry " + + entry.entry.getName() + ": " + + Long.toHexString(entry.entry.getCrc()) + + " instead of " + + Long.toHexString(crc)); + } + + if (entry.entry.getSize() != bytesWritten) { + throw new ZipException("bad size for entry " + + entry.entry.getName() + ": " + + entry.entry.getSize() + + " instead of " + + bytesWritten); + } + } else { /* method is STORED and we used RandomAccessFile */ + entry.entry.setSize(bytesWritten); + entry.entry.setCompressedSize(bytesWritten); + entry.entry.setCrc(crc); + } + + final boolean actuallyNeedsZip64 = effectiveMode == Zip64Mode.Always + || entry.entry.getSize() >= ZIP64_MAGIC + || entry.entry.getCompressedSize() >= ZIP64_MAGIC; + if (actuallyNeedsZip64 && effectiveMode == Zip64Mode.Never) { + throw new Zip64RequiredException(Zip64RequiredException + .getEntryTooBigMessage(entry.entry)); + } + return actuallyNeedsZip64; + } + + /** + * When using random access output, write the local file header + * and potentiall the ZIP64 extra containing the correct CRC and + * compressed/uncompressed sizes. + */ + private void rewriteSizesAndCrc(boolean actuallyNeedsZip64) + throws IOException { + long save = raf.getFilePointer(); + + raf.seek(entry.localDataStart); + writeOut(ZipLong.getBytes(entry.entry.getCrc())); + if (!hasZip64Extra(entry.entry) || !actuallyNeedsZip64) { + writeOut(ZipLong.getBytes(entry.entry.getCompressedSize())); + writeOut(ZipLong.getBytes(entry.entry.getSize())); + } else { + writeOut(ZipLong.ZIP64_MAGIC.getBytes()); + writeOut(ZipLong.ZIP64_MAGIC.getBytes()); + } + + if (hasZip64Extra(entry.entry)) { + // seek to ZIP64 extra, skip header and size information + raf.seek(entry.localDataStart + 3 * WORD + 2 * SHORT + + getName(entry.entry).limit() + 2 * SHORT); + // inside the ZIP64 extra uncompressed size comes + // first, unlike the LFH, CD or data descriptor + writeOut(ZipEightByteInteger.getBytes(entry.entry.getSize())); + writeOut(ZipEightByteInteger.getBytes(entry.entry.getCompressedSize())); + + if (!actuallyNeedsZip64) { + // do some cleanup: + // * rewrite version needed to extract + raf.seek(entry.localDataStart - 5 * SHORT); + writeOut(ZipShort.getBytes(INITIAL_VERSION)); + + // * remove ZIP64 extra so it doesn't get written + // to the central directory + entry.entry.removeExtraField(Zip64ExtendedInformationExtraField + .HEADER_ID); + entry.entry.setExtra(); + + // * reset hasUsedZip64 if it has been set because + // of this entry + if (entry.causedUseOfZip64) { + hasUsedZip64 = false; + } + } + } + raf.seek(save); + } + + @Override + public E newArchiveEntry() { + return (E)new ZipArchiveEntry(); + } + + /** + * {@inheritDoc} + * + * @throws ClassCastException if entry is not an instance of ZipArchiveEntry + * @throws Zip64RequiredException if the entry's uncompressed or + * compressed size is known to exceed 4 GByte and {@link #setUseZip64} + * is {@link Zip64Mode#Never}. + */ + @Override + public void putArchiveEntry(E archiveEntry) throws IOException { + if (finished) { + throw new IOException("Stream has already been finished"); + } + + if (entry != null) { + closeArchiveEntry(); + } + + entry = new CurrentEntry((ZipArchiveEntry) archiveEntry); + entries.add(entry.entry); + + setDefaults(entry.entry); + + final Zip64Mode effectiveMode = getEffectiveZip64Mode(entry.entry); + validateSizeInformation(effectiveMode); + + if (shouldAddZip64Extra(entry.entry, effectiveMode)) { + + Zip64ExtendedInformationExtraField z64 = getZip64Extra(entry.entry); + + // just a placeholder, real data will be in data + // descriptor or inserted later via RandomAccessFile + ZipEightByteInteger size = ZipEightByteInteger.ZERO; + if (entry.entry.getMethod() == STORED + && entry.entry.getSize() != ZipArchiveEntry.SIZE_UNKNOWN) { + // actually, we already know the sizes + size = new ZipEightByteInteger(entry.entry.getSize()); + } + z64.setSize(size); + z64.setCompressedSize(size); + entry.entry.setExtra(); + } + + if (entry.entry.getMethod() == DEFLATED && hasCompressionLevelChanged) { + def.setLevel(level); + hasCompressionLevelChanged = false; + } + writeLocalFileHeader(entry.entry); + } + + /** + * Provides default values for compression method and last + * modification time. + */ + private void setDefaults(ZipArchiveEntry entry) { + if (entry.getMethod() == -1) { // not specified + entry.setMethod(method); + } + + if (entry.getTime() == -1) { // not specified + entry.setTime(System.currentTimeMillis()); + } + } + + /** + * Throws an exception if the size is unknown for a stored entry + * that is written to a non-seekable output or the entry is too + * big to be written without Zip64 extra but the mode has been set + * to Never. + */ + private void validateSizeInformation(Zip64Mode effectiveMode) + throws ZipException { + // Size/CRC not required if RandomAccessFile is used + if (entry.entry.getMethod() == STORED && raf == null) { + if (entry.entry.getSize() == ZipArchiveEntry.SIZE_UNKNOWN) { + throw new ZipException("uncompressed size is required for" + + " STORED method when not writing to a" + + " file"); + } + if (entry.entry.getCrc() == -1) { + throw new ZipException("crc checksum is required for STORED" + + " method when not writing to a file"); + } + entry.entry.setCompressedSize(entry.entry.getSize()); + } + + if ((entry.entry.getSize() >= ZIP64_MAGIC + || entry.entry.getCompressedSize() >= ZIP64_MAGIC) + && effectiveMode == Zip64Mode.Never) { + throw new Zip64RequiredException(Zip64RequiredException + .getEntryTooBigMessage(entry.entry)); + } + } + + /** + * Whether to addd a Zip64 extended information extra field to the + * local file header. + *

+ *

Returns true if

+ *

+ *

    + *
  • mode is Always
  • + *
  • or we already know it is going to be needed
  • + *
  • or the size is unknown and we can ensure it won't hurt + * other implementations if we add it (i.e. we can erase its + * usage
  • + *
+ */ + private boolean shouldAddZip64Extra(ZipArchiveEntry entry, Zip64Mode mode) { + return mode == Zip64Mode.Always + || entry.getSize() >= ZIP64_MAGIC + || entry.getCompressedSize() >= ZIP64_MAGIC + || (entry.getSize() == ZipArchiveEntry.SIZE_UNKNOWN + && raf != null && mode != Zip64Mode.Never); + } + + /** + * Set the file comment. + * + * @param comment the comment + */ + public void setComment(String comment) { + this.comment = comment; + } + + /** + * Sets the compression level for subsequent entries. + * Default is Deflater.DEFAULT_COMPRESSION. + * + * @param level the compression level. + * @throws IllegalArgumentException if an invalid compression + * level is specified. + */ + public void setLevel(int level) { + if (level < Deflater.DEFAULT_COMPRESSION + || level > Deflater.BEST_COMPRESSION) { + throw new IllegalArgumentException("Invalid compression level: " + + level); + } + hasCompressionLevelChanged = (this.level != level); + this.level = level; + } + + /** + * Sets the default compression method for subsequent entries. Default is DEFLATED. + * + * @param method an int from java.util.zip.ZipEntry + */ + public void setMethod(int method) { + this.method = method; + } + + /** + * Writes bytes to ZIP entry. + * + * @param b the byte array to write + * @param offset the start position to write from + * @param length the number of bytes to write + * @throws java.io.IOException on error + */ + @Override + public void write(byte[] b, int offset, int length) throws IOException { + ZipUtil.checkRequestedFeatures(entry.entry); + entry.hasWritten = true; + if (entry.entry.getMethod() == DEFLATED) { + writeDeflated(b, offset, length); + } else { + writeOut(b, offset, length); + written += length; + } + crc.update(b, offset, length); + } + + /** + * write implementation for DEFLATED entries. + */ + private void writeDeflated(byte[] b, int offset, int length) + throws IOException { + if (length > 0 && !def.finished()) { + entry.bytesRead += length; + if (length <= DEFLATER_BLOCK_SIZE) { + def.setInput(b, offset, length); + deflateUntilInputIsNeeded(); + } else { + final int fullblocks = length / DEFLATER_BLOCK_SIZE; + for (int i = 0; i < fullblocks; i++) { + def.setInput(b, offset + i * DEFLATER_BLOCK_SIZE, + DEFLATER_BLOCK_SIZE); + deflateUntilInputIsNeeded(); + } + final int done = fullblocks * DEFLATER_BLOCK_SIZE; + if (done < length) { + def.setInput(b, offset + done, length - done); + deflateUntilInputIsNeeded(); + } + } + } + } + + /** + * Closes this output stream and releases any system resources + * associated with the stream. + * + * @throws java.io.IOException if an I/O error occurs. + * @throws Zip64RequiredException if the archive's size exceeds 4 + * GByte or there are more than 65535 entries inside the archive + * and {@link #setUseZip64} is {@link Zip64Mode#Never}. + */ + @Override + public void close() throws IOException { + if (!finished) { + finish(); + } + destroy(); + } + + /** + * Flushes this output stream and forces any buffered output bytes + * to be written out to the stream. + * + * @throws java.io.IOException if an I/O error occurs. + */ + @Override + public void flush() throws IOException { + if (out != null) { + out.flush(); + } + } + + /** + * Writes next block of compressed data to the output stream. + * + * @throws java.io.IOException on error + */ + protected final void deflate() throws IOException { + int len = def.deflate(buf, 0, buf.length); + if (len > 0) { + writeOut(buf, 0, len); + written += len; + } + } + + /** + * Writes the local file header entry + * + * @param ze the entry to write + * @throws java.io.IOException on error + */ + protected void writeLocalFileHeader(ZipArchiveEntry ze) throws IOException { + + boolean encodable = archiveEntryEncoding.canEncode(ze.getName()); + ByteBuffer name = getName(ze); + + if (createUnicodeExtraFields != UnicodeExtraFieldPolicy.NEVER) { + addUnicodeExtraFields(ze, encodable, name); + } + + offsets.put(ze, Long.valueOf(written)); + + writeOut(LFH_SIG); + written += WORD; + + //store method in local variable to prevent multiple method calls + final int zipMethod = ze.getMethod(); + + writeVersionNeededToExtractAndGeneralPurposeBits(zipMethod, + !encodable + && fallbackToUTF8, + hasZip64Extra(ze)); + written += WORD; + + // compression method + writeOut(ZipShort.getBytes(zipMethod)); + written += SHORT; + + // last mod. time and date + writeOut(ZipUtil.toDosTime(ze.getTime())); + written += WORD; + + // CRC + // compressed length + // uncompressed length + entry.localDataStart = written; + if (zipMethod == DEFLATED || raf != null) { + writeOut(LZERO); + if (hasZip64Extra(entry.entry)) { + // point to ZIP64 extended information extra field for + // sizes, may get rewritten once sizes are known if + // stream is seekable + writeOut(ZipLong.ZIP64_MAGIC.getBytes()); + writeOut(ZipLong.ZIP64_MAGIC.getBytes()); + } else { + writeOut(LZERO); + writeOut(LZERO); + } + } else { + writeOut(ZipLong.getBytes(ze.getCrc())); + byte[] size = ZipLong.ZIP64_MAGIC.getBytes(); + if (!hasZip64Extra(ze)) { + size = ZipLong.getBytes(ze.getSize()); + } + writeOut(size); + writeOut(size); + } + // CheckStyle:MagicNumber OFF + written += 12; + // CheckStyle:MagicNumber ON + + // file name length + writeOut(ZipShort.getBytes(name.limit())); + written += SHORT; + + // extra field length + byte[] extra = ze.getLocalFileDataExtra(); + writeOut(ZipShort.getBytes(extra.length)); + written += SHORT; + + // file name + writeOut(name.array(), name.arrayOffset(), name.limit()); + written += name.limit(); + + // extra field + writeOut(extra); + written += extra.length; + + entry.dataStart = written; + } + + /** + * Adds UnicodeExtra fields for name and file comment if mode is + * ALWAYS or the data cannot be encoded using the configured + * encoding. + */ + private void addUnicodeExtraFields(ZipArchiveEntry ze, boolean encodable, + ByteBuffer name) + throws IOException { + if (createUnicodeExtraFields == UnicodeExtraFieldPolicy.ALWAYS + || !encodable) { + ze.addExtraField(new UnicodePathExtraField(ze.getName(), + name.array(), + name.arrayOffset(), + name.limit())); + } + + String comm = ze.getComment(); + if (comm != null && !"".equals(comm)) { + + boolean commentEncodable = archiveEntryEncoding.canEncode(comm); + + if (createUnicodeExtraFields == UnicodeExtraFieldPolicy.ALWAYS + || !commentEncodable) { + ByteBuffer commentB = getEntryEncoding(ze).encode(comm); + ze.addExtraField(new UnicodeCommentExtraField(comm, + commentB.array(), + commentB.arrayOffset(), + commentB.limit()) + ); + } + } + } + + /** + * Writes the data descriptor entry. + * + * @param ze the entry to write + * @throws java.io.IOException on error + */ + protected void writeDataDescriptor(ZipArchiveEntry ze) throws IOException { + if (ze.getMethod() != DEFLATED || raf != null) { + return; + } + writeOut(DD_SIG); + writeOut(ZipLong.getBytes(ze.getCrc())); + int sizeFieldSize = WORD; + if (!hasZip64Extra(ze)) { + writeOut(ZipLong.getBytes(ze.getCompressedSize())); + writeOut(ZipLong.getBytes(ze.getSize())); + } else { + sizeFieldSize = DWORD; + writeOut(ZipEightByteInteger.getBytes(ze.getCompressedSize())); + writeOut(ZipEightByteInteger.getBytes(ze.getSize())); + } + written += 2 * WORD + 2 * sizeFieldSize; + } + + /** + * Writes the central file header entry. + * + * @param ze the entry to write + * @throws java.io.IOException on error + * @throws Zip64RequiredException if the archive's size exceeds 4 + * GByte and {@link Zip64Mode #setUseZip64} is {@link + * Zip64Mode#Never}. + */ + protected void writeCentralFileHeader(ZipArchiveEntry ze) throws IOException { + writeOut(CFH_SIG); + written += WORD; + + final long lfhOffset = offsets.get(ze).longValue(); + final boolean needsZip64Extra = hasZip64Extra(ze) + || ze.getCompressedSize() >= ZIP64_MAGIC + || ze.getSize() >= ZIP64_MAGIC + || lfhOffset >= ZIP64_MAGIC; + + if (needsZip64Extra && zip64Mode == Zip64Mode.Never) { + // must be the offset that is too big, otherwise an + // exception would have been throw in putArchiveEntry or + // closeArchiveEntry + throw new Zip64RequiredException(Zip64RequiredException + .ARCHIVE_TOO_BIG_MESSAGE); + } + + handleZip64Extra(ze, lfhOffset, needsZip64Extra); + + // version made by + // CheckStyle:MagicNumber OFF + writeOut(ZipShort.getBytes((ze.getPlatform() << 8) | + (!hasUsedZip64 ? DATA_DESCRIPTOR_MIN_VERSION + : ZIP64_MIN_VERSION))); + written += SHORT; + + final int zipMethod = ze.getMethod(); + final boolean encodable = archiveEntryEncoding.canEncode(ze.getName()); + writeVersionNeededToExtractAndGeneralPurposeBits(zipMethod, + !encodable + && fallbackToUTF8, + needsZip64Extra); + written += WORD; + + // compression method + writeOut(ZipShort.getBytes(zipMethod)); + written += SHORT; + + // last mod. time and date + writeOut(ZipUtil.toDosTime(ze.getTime())); + written += WORD; + + // CRC + // compressed length + // uncompressed length + writeOut(ZipLong.getBytes(ze.getCrc())); + if (ze.getCompressedSize() >= ZIP64_MAGIC + || ze.getSize() >= ZIP64_MAGIC) { + writeOut(ZipLong.ZIP64_MAGIC.getBytes()); + writeOut(ZipLong.ZIP64_MAGIC.getBytes()); + } else { + writeOut(ZipLong.getBytes(ze.getCompressedSize())); + writeOut(ZipLong.getBytes(ze.getSize())); + } + // CheckStyle:MagicNumber OFF + written += 12; + // CheckStyle:MagicNumber ON + + ByteBuffer name = getName(ze); + + writeOut(ZipShort.getBytes(name.limit())); + written += SHORT; + + // extra field length + byte[] extra = ze.getCentralDirectoryExtra(); + writeOut(ZipShort.getBytes(extra.length)); + written += SHORT; + + // file comment length + String comm = ze.getComment(); + if (comm == null) { + comm = ""; + } + + ByteBuffer commentB = getEntryEncoding(ze).encode(comm); + + writeOut(ZipShort.getBytes(commentB.limit())); + written += SHORT; + + // disk number start + writeOut(ZERO); + written += SHORT; + + // internal file attributes + writeOut(ZipShort.getBytes(ze.getInternalAttributes())); + written += SHORT; + + // external file attributes + writeOut(ZipLong.getBytes(ze.getExternalAttributes())); + written += WORD; + + // relative offset of LFH + writeOut(ZipLong.getBytes(Math.min(lfhOffset, ZIP64_MAGIC))); + written += WORD; + + // file name + writeOut(name.array(), name.arrayOffset(), name.limit()); + written += name.limit(); + + // extra field + writeOut(extra); + written += extra.length; + + // file comment + writeOut(commentB.array(), commentB.arrayOffset(), commentB.limit()); + written += commentB.limit(); + } + + /** + * If the entry needs Zip64 extra information inside the central + * directory then configure its data. + */ + private void handleZip64Extra(ZipArchiveEntry ze, long lfhOffset, + boolean needsZip64Extra) { + if (needsZip64Extra) { + Zip64ExtendedInformationExtraField z64 = getZip64Extra(ze); + if (ze.getCompressedSize() >= ZIP64_MAGIC + || ze.getSize() >= ZIP64_MAGIC) { + z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize())); + z64.setSize(new ZipEightByteInteger(ze.getSize())); + } else { + // reset value that may have been set for LFH + z64.setCompressedSize(null); + z64.setSize(null); + } + if (lfhOffset >= ZIP64_MAGIC) { + z64.setRelativeHeaderOffset(new ZipEightByteInteger(lfhOffset)); + } + ze.setExtra(); + } + } + + /** + * Writes the "End of central dir record". + * + * @throws java.io.IOException on error + * @throws Zip64RequiredException if the archive's size exceeds 4 + * GByte or there are more than 65535 entries inside the archive + * and {@link Zip64Mode #setUseZip64} is {@link Zip64Mode#Never}. + */ + protected void writeCentralDirectoryEnd() throws IOException { + writeOut(EOCD_SIG); + + // disk numbers + writeOut(ZERO); + writeOut(ZERO); + + // number of entries + int numberOfEntries = entries.size(); + if (numberOfEntries > ZIP64_MAGIC_SHORT + && zip64Mode == Zip64Mode.Never) { + throw new Zip64RequiredException(Zip64RequiredException + .TOO_MANY_ENTRIES_MESSAGE); + } + if (cdOffset > ZIP64_MAGIC && zip64Mode == Zip64Mode.Never) { + throw new Zip64RequiredException(Zip64RequiredException + .ARCHIVE_TOO_BIG_MESSAGE); + } + + byte[] num = ZipShort.getBytes(Math.min(numberOfEntries, + ZIP64_MAGIC_SHORT)); + writeOut(num); + writeOut(num); + + // length and location of CD + writeOut(ZipLong.getBytes(Math.min(cdLength, ZIP64_MAGIC))); + writeOut(ZipLong.getBytes(Math.min(cdOffset, ZIP64_MAGIC))); + + // ZIP file comment + ByteBuffer data = this.archiveEntryEncoding.encode(comment); + writeOut(ZipShort.getBytes(data.limit())); + writeOut(data.array(), data.arrayOffset(), data.limit()); + } + + private static final byte[] ONE = ZipLong.getBytes(1L); + + /** + * Writes the "ZIP64 End of central dir record" and + * "ZIP64 End of central dir locator". + * + * @throws java.io.IOException on error + */ + protected void writeZip64CentralDirectory() throws IOException { + if (zip64Mode == Zip64Mode.Never) { + return; + } + + if (!hasUsedZip64 + && (cdOffset >= ZIP64_MAGIC || cdLength >= ZIP64_MAGIC + || entries.size() >= ZIP64_MAGIC_SHORT)) { + // actually "will use" + hasUsedZip64 = true; + } + + if (!hasUsedZip64) { + return; + } + + long offset = written; + + writeOut(ZIP64_EOCD_SIG); + // size, we don't have any variable length as we don't support + // the extensible data sector, yet + writeOut(ZipEightByteInteger + .getBytes(SHORT /* version made by */ + + SHORT /* version needed to extract */ + + WORD /* disk number */ + + WORD /* disk with central directory */ + + DWORD /* number of entries in CD on this disk */ + + DWORD /* total number of entries */ + + DWORD /* size of CD */ + + DWORD /* offset of CD */ + )); + + // version made by and version needed to extract + writeOut(ZipShort.getBytes(ZIP64_MIN_VERSION)); + writeOut(ZipShort.getBytes(ZIP64_MIN_VERSION)); + + // disk numbers - four bytes this time + writeOut(LZERO); + writeOut(LZERO); + + // number of entries + byte[] num = ZipEightByteInteger.getBytes(entries.size()); + writeOut(num); + writeOut(num); + + // length and location of CD + writeOut(ZipEightByteInteger.getBytes(cdLength)); + writeOut(ZipEightByteInteger.getBytes(cdOffset)); + + // no "zip64 extensible data sector" for now + + // and now the "ZIP64 end of central directory locator" + writeOut(ZIP64_EOCD_LOC_SIG); + + // disk number holding the ZIP64 EOCD record + writeOut(LZERO); + // relative offset of ZIP64 EOCD record + writeOut(ZipEightByteInteger.getBytes(offset)); + // total number of disks + writeOut(ONE); + } + + /** + * Write bytes to output or random access file. + * + * @param data the byte array to write + * @throws java.io.IOException on error + */ + protected final void writeOut(byte[] data) throws IOException { + writeOut(data, 0, data.length); + } + + /** + * Write bytes to output or random access file. + * + * @param data the byte array to write + * @param offset the start position to write from + * @param length the number of bytes to write + * @throws java.io.IOException on error + */ + protected final void writeOut(byte[] data, int offset, int length) + throws IOException { + if (raf != null) { + raf.write(data, offset, length); + } else { + out.write(data, offset, length); + } + } + + private void deflateUntilInputIsNeeded() throws IOException { + while (!def.needsInput()) { + deflate(); + } + } + + private void writeVersionNeededToExtractAndGeneralPurposeBits(final int + zipMethod, + final boolean + utfFallback, + final boolean + zip64) + throws IOException { + + // CheckStyle:MagicNumber OFF + int versionNeededToExtract = INITIAL_VERSION; + GeneralPurposeBit b = new GeneralPurposeBit(); + b.useUTF8ForNames(useUTF8Flag || utfFallback); + if (zipMethod == DEFLATED && raf == null) { + // requires version 2 as we are going to store length info + // in the data descriptor + versionNeededToExtract = DATA_DESCRIPTOR_MIN_VERSION; + b.useDataDescriptor(true); + } + if (zip64) { + versionNeededToExtract = ZIP64_MIN_VERSION; + } + // CheckStyle:MagicNumber ON + + // version needed to extract + writeOut(ZipShort.getBytes(versionNeededToExtract)); + // general purpose bit flag + writeOut(b.encode()); + } + + /** + * Get the existing ZIP64 extended information extra field or + * create a new one and add it to the entry. + */ + private Zip64ExtendedInformationExtraField + getZip64Extra(ZipArchiveEntry ze) { + if (entry != null) { + entry.causedUseOfZip64 = !hasUsedZip64; + } + hasUsedZip64 = true; + Zip64ExtendedInformationExtraField z64 = + (Zip64ExtendedInformationExtraField) + ze.getExtraField(Zip64ExtendedInformationExtraField + .HEADER_ID); + if (z64 == null) { + z64 = new Zip64ExtendedInformationExtraField(); + } + + // even if the field is there already, make sure it is the first one + ze.addAsFirstExtraField(z64); + + return z64; + } + + /** + * Is there a ZIP64 extended information extra field for the + * entry? + */ + private boolean hasZip64Extra(ZipArchiveEntry ze) { + return ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID) != null; + } + + /** + * If the mode is AsNeeded and the entry is a compressed entry of + * unknown size that gets written to a non-seekable stream the + * change the default to Never. + */ + private Zip64Mode getEffectiveZip64Mode(ZipArchiveEntry ze) { + if (zip64Mode != Zip64Mode.AsNeeded + || raf != null + || ze.getMethod() != DEFLATED + || ze.getSize() != ZipArchiveEntry.SIZE_UNKNOWN) { + return zip64Mode; + } + return Zip64Mode.Never; + } + + private ArchiveEntryEncoding getEntryEncoding(ZipArchiveEntry ze) { + boolean encodable = archiveEntryEncoding.canEncode(ze.getName()); + return !encodable && fallbackToUTF8 + ? ArchiveEntryEncodingHelper.UTF8_ENCODING : archiveEntryEncoding; + } + + private ByteBuffer getName(ZipArchiveEntry ze) throws IOException { + return getEntryEncoding(ze).encode(ze.getName()); + } + + /** + * Closes the underlying stream/file without finishing the + * archive, the result will likely be a corrupt archive. + *

+ *

This method only exists to support tests that generate + * corrupt archives so they can clean up any temporary files.

+ */ + void destroy() throws IOException { + if (raf != null) { + raf.close(); + } + if (out != null) { + out.close(); + } + } + + /** + * enum that represents the possible policies for creating Unicode + * extra fields. + */ + public static final class UnicodeExtraFieldPolicy { + /** + * Always create Unicode extra fields. + */ + public static final UnicodeExtraFieldPolicy ALWAYS = new UnicodeExtraFieldPolicy("always"); + /** + * Never create Unicode extra fields. + */ + public static final UnicodeExtraFieldPolicy NEVER = new UnicodeExtraFieldPolicy("never"); + /** + * Create Unicode extra fields for filenames that cannot be + * encoded using the specified encoding. + */ + public static final UnicodeExtraFieldPolicy NOT_ENCODEABLE = + new UnicodeExtraFieldPolicy("not encodeable"); + + private final String name; + + private UnicodeExtraFieldPolicy(String n) { + name = n; + } + + @Override + public String toString() { + return name; + } + } + + /** + * Structure collecting information for the entry that is + * currently being written. + */ + private static final class CurrentEntry { + private CurrentEntry(ZipArchiveEntry entry) { + this.entry = entry; + } + + /** + * Current ZIP entry. + */ + private final ZipArchiveEntry entry; + /** + * Offset for CRC entry in the local file header data for the + * current entry starts here. + */ + private long localDataStart = 0; + /** + * Data for local header data + */ + private long dataStart = 0; + /** + * Number of bytes read for the current entry (can't rely on + * Deflater#getBytesRead) when using DEFLATED. + */ + private long bytesRead = 0; + /** + * Whether current entry was the first one using ZIP64 features. + */ + private boolean causedUseOfZip64 = false; + /** + * Whether write() has been called at all. + *

+ *

In order to create a valid archive {@link + * #closeArchiveEntry closeArchiveEntry} will write an empty + * array to get the CRC right if nothing has been written to + * the stream at all.

+ */ + private boolean hasWritten; + } + +} diff --git a/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipConstants.java b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipConstants.java new file mode 100644 index 0000000..ba5d9f1 --- /dev/null +++ b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipConstants.java @@ -0,0 +1,56 @@ + +package org.xbib.io.archive.zip; + +/** + * Various constants used throughout the package. + */ +interface ZipConstants { + + /** + * Masks last eight bits + */ + int BYTE_MASK = 0xFF; + + /** + * length of a ZipShort in bytes + */ + int SHORT = 2; + + /** + * length of a ZipLong in bytes + */ + int WORD = 4; + + /** + * length of a ZipEightByteInteger in bytes + */ + int DWORD = 8; + + /** + * Initial ZIP specification version + */ + int INITIAL_VERSION = 10; + + /** + * ZIP specification version that introduced data descriptor method + */ + int DATA_DESCRIPTOR_MIN_VERSION = 20; + + /** + * ZIP specification version that introduced ZIP64 + */ + int ZIP64_MIN_VERSION = 45; + + /** + * Value stored in two-byte size and similar fields if ZIP64 + * extensions are used. + */ + int ZIP64_MAGIC_SHORT = 0xFFFF; + + /** + * Value stored in four-byte size and similar fields if ZIP64 + * extensions are used. + */ + long ZIP64_MAGIC = 0xFFFFFFFFL; + +} diff --git a/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipEightByteInteger.java b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipEightByteInteger.java new file mode 100644 index 0000000..89ce25f --- /dev/null +++ b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipEightByteInteger.java @@ -0,0 +1,226 @@ + +package org.xbib.io.archive.zip; + +import java.math.BigInteger; + +import static org.xbib.io.archive.zip.ZipConstants.BYTE_MASK; + +/** + * Utility class that represents an eight byte integer with conversion + * rules for the big endian byte order of ZIP files. + */ +public final class ZipEightByteInteger { + + private static final int BYTE_1 = 1; + private static final int BYTE_1_MASK = 0xFF00; + private static final int BYTE_1_SHIFT = 8; + + private static final int BYTE_2 = 2; + private static final int BYTE_2_MASK = 0xFF0000; + private static final int BYTE_2_SHIFT = 16; + + private static final int BYTE_3 = 3; + private static final long BYTE_3_MASK = 0xFF000000L; + private static final int BYTE_3_SHIFT = 24; + + private static final int BYTE_4 = 4; + private static final long BYTE_4_MASK = 0xFF00000000L; + private static final int BYTE_4_SHIFT = 32; + + private static final int BYTE_5 = 5; + private static final long BYTE_5_MASK = 0xFF0000000000L; + private static final int BYTE_5_SHIFT = 40; + + private static final int BYTE_6 = 6; + private static final long BYTE_6_MASK = 0xFF000000000000L; + private static final int BYTE_6_SHIFT = 48; + + private static final int BYTE_7 = 7; + private static final long BYTE_7_MASK = 0x7F00000000000000L; + private static final int BYTE_7_SHIFT = 56; + + private static final int LEFTMOST_BIT_SHIFT = 63; + private static final byte LEFTMOST_BIT = (byte) 0x80; + + private final BigInteger value; + + public static final ZipEightByteInteger ZERO = new ZipEightByteInteger(0); + + /** + * Create instance from a number. + * + * @param value the long to store as a ZipEightByteInteger + */ + public ZipEightByteInteger(long value) { + this(BigInteger.valueOf(value)); + } + + /** + * Create instance from a number. + * + * @param value the BigInteger to store as a ZipEightByteInteger + */ + public ZipEightByteInteger(BigInteger value) { + this.value = value; + } + + /** + * Create instance from bytes. + * + * @param bytes the bytes to store as a ZipEightByteInteger + */ + public ZipEightByteInteger(byte[] bytes) { + this(bytes, 0); + } + + /** + * Create instance from the eight bytes starting at offset. + * + * @param bytes the bytes to store as a ZipEightByteInteger + * @param offset the offset to start + */ + public ZipEightByteInteger(byte[] bytes, int offset) { + value = ZipEightByteInteger.getValue(bytes, offset); + } + + /** + * Get value as eight bytes in big endian byte order. + * + * @return value as eight bytes in big endian order + */ + public byte[] getBytes() { + return ZipEightByteInteger.getBytes(value); + } + + /** + * Get value as Java long. + * + * @return value as a long + */ + public long getLongValue() { + return value.longValue(); + } + + /** + * Get value as Java long. + * + * @return value as a long + */ + public BigInteger getValue() { + return value; + } + + /** + * Get value as eight bytes in big endian byte order. + * + * @param value the value to convert + * @return value as eight bytes in big endian byte order + */ + public static byte[] getBytes(long value) { + return getBytes(BigInteger.valueOf(value)); + } + + /** + * Get value as eight bytes in big endian byte order. + * + * @param value the value to convert + * @return value as eight bytes in big endian byte order + */ + public static byte[] getBytes(BigInteger value) { + byte[] result = new byte[8]; + long val = value.longValue(); + result[0] = (byte) ((val & BYTE_MASK)); + result[BYTE_1] = (byte) ((val & BYTE_1_MASK) >> BYTE_1_SHIFT); + result[BYTE_2] = (byte) ((val & BYTE_2_MASK) >> BYTE_2_SHIFT); + result[BYTE_3] = (byte) ((val & BYTE_3_MASK) >> BYTE_3_SHIFT); + result[BYTE_4] = (byte) ((val & BYTE_4_MASK) >> BYTE_4_SHIFT); + result[BYTE_5] = (byte) ((val & BYTE_5_MASK) >> BYTE_5_SHIFT); + result[BYTE_6] = (byte) ((val & BYTE_6_MASK) >> BYTE_6_SHIFT); + result[BYTE_7] = (byte) ((val & BYTE_7_MASK) >> BYTE_7_SHIFT); + if (value.testBit(LEFTMOST_BIT_SHIFT)) { + result[BYTE_7] |= LEFTMOST_BIT; + } + return result; + } + + /** + * Helper method to get the value as a Java long from eight bytes + * starting at given array offset + * + * @param bytes the array of bytes + * @param offset the offset to start + * @return the corresponding Java long value + */ + public static long getLongValue(byte[] bytes, int offset) { + return getValue(bytes, offset).longValue(); + } + + /** + * Helper method to get the value as a Java BigInteger from eight + * bytes starting at given array offset + * + * @param bytes the array of bytes + * @param offset the offset to start + * @return the corresponding Java BigInteger value + */ + public static BigInteger getValue(byte[] bytes, int offset) { + long value = ((long) bytes[offset + BYTE_7] << BYTE_7_SHIFT) & BYTE_7_MASK; + value += ((long) bytes[offset + BYTE_6] << BYTE_6_SHIFT) & BYTE_6_MASK; + value += ((long) bytes[offset + BYTE_5] << BYTE_5_SHIFT) & BYTE_5_MASK; + value += ((long) bytes[offset + BYTE_4] << BYTE_4_SHIFT) & BYTE_4_MASK; + value += ((long) bytes[offset + BYTE_3] << BYTE_3_SHIFT) & BYTE_3_MASK; + value += ((long) bytes[offset + BYTE_2] << BYTE_2_SHIFT) & BYTE_2_MASK; + value += ((long) bytes[offset + BYTE_1] << BYTE_1_SHIFT) & BYTE_1_MASK; + value += ((long) bytes[offset] & BYTE_MASK); + BigInteger val = BigInteger.valueOf(value); + return (bytes[offset + BYTE_7] & LEFTMOST_BIT) == LEFTMOST_BIT + ? val.setBit(LEFTMOST_BIT_SHIFT) : val; + } + + /** + * Helper method to get the value as a Java long from an eight-byte array + * + * @param bytes the array of bytes + * @return the corresponding Java long value + */ + public static long getLongValue(byte[] bytes) { + return getLongValue(bytes, 0); + } + + /** + * Helper method to get the value as a Java long from an eight-byte array + * + * @param bytes the array of bytes + * @return the corresponding Java BigInteger value + */ + public static BigInteger getValue(byte[] bytes) { + return getValue(bytes, 0); + } + + /** + * Override to make two instances with same value equal. + * + * @param o an object to compare + * @return true if the objects are equal + */ + @Override + public boolean equals(Object o) { + return !(o == null || !(o instanceof ZipEightByteInteger)) + && value.equals(((ZipEightByteInteger) o).getValue()); + } + + /** + * Override to make two instances with same value equal. + * + * @return the hashCode of the value stored in the ZipEightByteInteger + */ + @Override + public int hashCode() { + return value.hashCode(); + } + + @Override + public String toString() { + return "ZipEightByteInteger value: " + value; + } +} diff --git a/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipExtraField.java b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipExtraField.java new file mode 100644 index 0000000..87e29b5 --- /dev/null +++ b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipExtraField.java @@ -0,0 +1,73 @@ +package org.xbib.io.archive.zip; + +import java.util.zip.ZipException; + +/** + * General format of extra field data. + * Extra fields usually appear twice per file, once in the local file data and + * once in the central directory. Usually they are the same, but they don't have + * to be. {@link java.util.zip.ZipOutputStream java.util.zip.ZipOutputStream} + * will only use the local file data in both places. + */ +public interface ZipExtraField { + /** + * The Header-ID. + * + * @return The HeaderId value + */ + ZipShort getHeaderId(); + + /** + * Length of the extra field in the local file data - without Header-ID or + * length specifier. + * + * @return The LocalFileDataLength value + */ + ZipShort getLocalFileDataLength(); + + /** + * Length of the extra field in the central directory - without Header-ID or + * length specifier. + * + * @return The CentralDirectoryLength value + */ + ZipShort getCentralDirectoryLength(); + + /** + * The actual data to put into local file data - without Header-ID or length + * specifier. + * + * @return The LocalFileDataData value + */ + byte[] getLocalFileDataData(); + + /** + * The actual data to put into central directory - without Header-ID or + * length specifier. + * + * @return The CentralDirectoryData value + */ + byte[] getCentralDirectoryData(); + + /** + * Populate data from this array as if it was in local file data. + * + * @param buffer the buffer to read data from + * @param offset offset into buffer to read data + * @param length the length of data + * @throws java.util.zip.ZipException on error + */ + void parseFromLocalFileData(byte[] buffer, int offset, int length) + throws ZipException; + + /** + * Populate data from this array as if it was in central directory data. + * + * @param buffer the buffer to read data from + * @param offset offset into buffer to read data + * @param length the length of data + * @throws java.util.zip.ZipException on error + */ + void parseFromCentralDirectoryData(byte[] buffer, int offset, int length) + throws ZipException; +} diff --git a/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipFile.java b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipFile.java new file mode 100644 index 0000000..33161d5 --- /dev/null +++ b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipFile.java @@ -0,0 +1,941 @@ +package org.xbib.io.archive.zip; + +import org.xbib.io.archive.entry.ArchiveEntryEncoding; +import org.xbib.io.archive.entry.ArchiveEntryEncodingHelper; + +import java.io.EOFException; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.RandomAccessFile; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.zip.Inflater; +import java.util.zip.InflaterInputStream; +import java.util.zip.ZipEntry; +import java.util.zip.ZipException; + +import static org.xbib.io.archive.zip.ZipConstants.DWORD; +import static org.xbib.io.archive.zip.ZipConstants.SHORT; +import static org.xbib.io.archive.zip.ZipConstants.WORD; +import static org.xbib.io.archive.zip.ZipConstants.ZIP64_MAGIC; +import static org.xbib.io.archive.zip.ZipConstants.ZIP64_MAGIC_SHORT; + +/** + * Replacement for java.util.ZipFile. + * This class adds support for file name encodings other than UTF-8 + * (which is required to work on ZIP files created by native zip tools + * and is able to skip a preamble like the one found in self + * extracting archives. Furthermore it returns instances of + * org.apache.commons.compress.archivers.zip.ZipArchiveEntry + * instead of java.util.zip.ZipEntry. + * It doesn't extend java.util.zip.ZipFile as it would + * have to reimplement all methods anyway. Like + * java.util.ZipFile, it uses RandomAccessFile under the + * covers and supports compressed and uncompressed entries. This code + * also transparently supports Zip64 + * extensions and thus individual entries and archives larger than 4 + * GB or with more than 65536 entries. + * The method signatures mimic the ones of + * java.util.zip.ZipFile, with a couple of exceptions: + *
    + *
  • There is no getName method.
  • + *
  • entries has been renamed to getEntries.
  • + *
  • getEntries and getEntry return + * org.apache.commons.compress.archivers.zip.ZipArchiveEntry + * instances.
  • + *
  • close is allowed to throw IOException.
  • + *
+ */ +public class ZipFile { + private static final int HASH_SIZE = 509; + static final int NIBLET_MASK = 0x0f; + static final int BYTE_SHIFT = 8; + private static final int POS_0 = 0; + private static final int POS_1 = 1; + private static final int POS_2 = 2; + private static final int POS_3 = 3; + + /** + * Maps ZipArchiveEntrys to two longs, recording the offsets of + * the local file headers and the start of entry data. + */ + private final Map entries = + new LinkedHashMap(HASH_SIZE); + + /** + * Maps String to ZipArchiveEntrys, name -> actual entry. + */ + private final Map nameMap = + new HashMap(HASH_SIZE); + + private static final class OffsetEntry { + private long headerOffset = -1; + private long dataOffset = -1; + } + + /** + * The encoding to use for filenames and the file comment. + *

+ *

For a list of possible values see http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html. + * Defaults to UTF-8.

+ */ + private final String encoding; + + /** + * The zip encoding to use for filenames and the file comment. + */ + private final ArchiveEntryEncoding archiveEntryEncoding; + + /** + * File name of actual source. + */ + private final String archiveName; + + /** + * The actual data source. + */ + private final RandomAccessFile archive; + + /** + * Whether to look for and use Unicode extra fields. + */ + private final boolean useUnicodeExtraFields; + + /** + * Whether the file is closed. + */ + private boolean closed; + + /** + * Opens the given file for reading, assuming "UTF8" for file names. + * + * @param f the archive. + * @throws java.io.IOException if an error occurs while reading the file. + */ + public ZipFile(File f) throws IOException { + this(f, ArchiveEntryEncodingHelper.UTF8); + } + + /** + * Opens the given file for reading, assuming "UTF8". + * + * @param name name of the archive. + * @throws java.io.IOException if an error occurs while reading the file. + */ + public ZipFile(String name) throws IOException { + this(new File(name), ArchiveEntryEncodingHelper.UTF8); + } + + /** + * Opens the given file for reading, assuming the specified + * encoding for file names, scanning unicode extra fields. + * + * @param name name of the archive. + * @param encoding the encoding to use for file names, use null + * for the platform's default encoding + * @throws java.io.IOException if an error occurs while reading the file. + */ + public ZipFile(String name, String encoding) throws IOException { + this(new File(name), encoding, true); + } + + /** + * Opens the given file for reading, assuming the specified + * encoding for file names and scanning for unicode extra fields. + * + * @param f the archive. + * @param encoding the encoding to use for file names, use null + * for the platform's default encoding + * @throws java.io.IOException if an error occurs while reading the file. + */ + public ZipFile(File f, String encoding) throws IOException { + this(f, encoding, true); + } + + /** + * Opens the given file for reading, assuming the specified + * encoding for file names. + * + * @param f the archive. + * @param encoding the encoding to use for file names, use null + * for the platform's default encoding + * @param useUnicodeExtraFields whether to use InfoZIP Unicode + * Extra Fields (if present) to set the file names. + * @throws java.io.IOException if an error occurs while reading the file. + */ + public ZipFile(File f, String encoding, boolean useUnicodeExtraFields) + throws IOException { + this.archiveName = f.getAbsolutePath(); + this.encoding = encoding; + this.archiveEntryEncoding = ArchiveEntryEncodingHelper.getEncoding(encoding); + this.useUnicodeExtraFields = useUnicodeExtraFields; + archive = new RandomAccessFile(f, "r"); + boolean success = false; + try { + Map entriesWithoutUTF8Flag = populateFromCentralDirectory(); + resolveLocalFileHeaderData(entriesWithoutUTF8Flag); + success = true; + } finally { + if (!success) { + try { + closed = true; + archive.close(); + } catch (IOException e2) { + // swallow, throw the original exception instead + } + } + } + } + + /** + * The encoding to use for filenames and the file comment. + * + * @return null if using the platform's default character encoding. + */ + public String getEncoding() { + return encoding; + } + + /** + * Closes the archive. + * + * @throws java.io.IOException if an error occurs closing the archive. + */ + public void close() throws IOException { + // this flag is only written here and read in finalize() which + // can never be run in parallel. + // no synchronization needed. + closed = true; + + archive.close(); + } + + /** + * close a zipfile quietly; throw no io fault, do nothing + * on a null parameter + * + * @param zipfile file to close, can be null + */ + public static void closeQuietly(ZipFile zipfile) { + if (zipfile != null) { + try { + zipfile.close(); + } catch (IOException e) { // NOPMD + //ignore, that's why the method is called "quietly" + } + } + } + + /** + * Returns all entries. + * Entries will be returned in the same order they appear + * within the archive's central directory. + * + * @return all entries as {@link ZipArchiveEntry} instances + */ + public Enumeration getEntries() { + return Collections.enumeration(entries.keySet()); + } + + /** + * Returns all entries in physical order. + * Entries will be returned in the same order their contents + * appear within the archive. + * + * @return all entries as {@link ZipArchiveEntry} instances + */ + public Enumeration getEntriesInPhysicalOrder() { + ZipArchiveEntry[] allEntries = + entries.keySet().toArray(new ZipArchiveEntry[0]); + Arrays.sort(allEntries, OFFSET_COMPARATOR); + return Collections.enumeration(Arrays.asList(allEntries)); + } + + /** + * Returns a named entry - or {@code null} if no entry by + * that name exists. + * + * @param name name of the entry. + * @return the ZipArchiveEntry corresponding to the given name - or + * {@code null} if not present. + */ + public ZipArchiveEntry getEntry(String name) { + return nameMap.get(name); + } + + /** + * Whether this class is able to read the given entry. + * May return false if it is set up to use encryption or a + * compression method that hasn't been implemented yet. + */ + public boolean canReadEntryData(ZipArchiveEntry ze) { + return ZipUtil.canHandleEntryData(ze); + } + + /** + * Returns an InputStream for reading the contents of the given entry. + * + * @param ze the entry to get the stream for. + * @return a stream to read the entry from. + * @throws java.io.IOException if unable to create an input stream from the zipenty + * @throws java.util.zip.ZipException if the zipentry uses an unsupported feature + */ + public InputStream getInputStream(ZipArchiveEntry ze) + throws IOException, ZipException { + OffsetEntry offsetEntry = entries.get(ze); + if (offsetEntry == null) { + return null; + } + ZipUtil.checkRequestedFeatures(ze); + long start = offsetEntry.dataOffset; + BoundedInputStream bis = + new BoundedInputStream(start, ze.getCompressedSize()); + switch (ze.getMethod()) { + case ZipEntry.STORED: + return bis; + case ZipEntry.DEFLATED: + bis.addDummy(); + final Inflater inflater = new Inflater(true); + return new InflaterInputStream(bis, inflater) { + @Override + public void close() throws IOException { + super.close(); + inflater.end(); + } + }; + default: + throw new ZipException("Found unsupported compression method " + + ze.getMethod()); + } + } + + /** + * Ensures that the close method of this zipfile is called when + * there are no more references to it. + * + * @see #close() + */ + @Override + protected void finalize() throws Throwable { + try { + if (!closed) { + close(); + } + } finally { + super.finalize(); + } + } + + /** + * Length of a "central directory" entry structure without file + * name, extra fields or comment. + */ + private static final int CFH_LEN = + /* version made by */ SHORT + /* version needed to extract */ + SHORT + /* general purpose bit flag */ + SHORT + /* compression method */ + SHORT + /* last mod file time */ + SHORT + /* last mod file date */ + SHORT + /* crc-32 */ + WORD + /* compressed size */ + WORD + /* uncompressed size */ + WORD + /* filename length */ + SHORT + /* extra field length */ + SHORT + /* file comment length */ + SHORT + /* disk number start */ + SHORT + /* internal file attributes */ + SHORT + /* external file attributes */ + WORD + /* relative offset of local header */ + WORD; + + private static final long CFH_SIG = + ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG); + + /** + * Reads the central directory of the given archive and populates + * the internal tables with ZipArchiveEntry instances. + * The ZipArchiveEntrys will know all data that can be obtained from + * the central directory alone, but not the data that requires the + * local file header or additional data to be read. + * + * @return a map of zipentries that didn't have the language + * encoding flag set when read. + */ + private Map populateFromCentralDirectory() + throws IOException { + HashMap noUTF8Flag = + new HashMap(); + + positionAtCentralDirectory(); + + byte[] signatureBytes = new byte[WORD]; + archive.readFully(signatureBytes); + long sig = ZipLong.getValue(signatureBytes); + + if (sig != CFH_SIG && startsWithLocalFileHeader()) { + throw new IOException("central directory is empty, can't expand" + + " corrupt archive."); + } + + while (sig == CFH_SIG) { + readCentralDirectoryEntry(noUTF8Flag); + archive.readFully(signatureBytes); + sig = ZipLong.getValue(signatureBytes); + } + return noUTF8Flag; + } + + /** + * Reads an individual entry of the central directory, creats an + * ZipArchiveEntry from it and adds it to the global maps. + * + * @param noUTF8Flag map used to collect entries that don't have + * their UTF-8 flag set and whose name will be set by data read + * from the local file header later. The current entry may be + * added to this map. + */ + private void + readCentralDirectoryEntry(Map noUTF8Flag) + throws IOException { + byte[] cfh = new byte[CFH_LEN]; + + archive.readFully(cfh); + int off = 0; + ZipArchiveEntry ze = new ZipArchiveEntry(); + + int versionMadeBy = ZipShort.getValue(cfh, off); + off += SHORT; + ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK); + + off += SHORT; // skip version info + + final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfh, off); + final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); + final ArchiveEntryEncoding entryEncoding = + hasUTF8Flag ? ArchiveEntryEncodingHelper.UTF8_ENCODING : archiveEntryEncoding; + ze.setGeneralPurposeBit(gpFlag); + + off += SHORT; + + ze.setMethod(ZipShort.getValue(cfh, off)); + off += SHORT; + + long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfh, off)); + ze.setTime(time); + off += WORD; + + ze.setCrc(ZipLong.getValue(cfh, off)); + off += WORD; + + ze.setCompressedSize(ZipLong.getValue(cfh, off)); + off += WORD; + + ze.setSize(ZipLong.getValue(cfh, off)); + off += WORD; + + int fileNameLen = ZipShort.getValue(cfh, off); + off += SHORT; + + int extraLen = ZipShort.getValue(cfh, off); + off += SHORT; + + int commentLen = ZipShort.getValue(cfh, off); + off += SHORT; + + int diskStart = ZipShort.getValue(cfh, off); + off += SHORT; + + ze.setInternalAttributes(ZipShort.getValue(cfh, off)); + off += SHORT; + + ze.setExternalAttributes(ZipLong.getValue(cfh, off)); + off += WORD; + + byte[] fileName = new byte[fileNameLen]; + archive.readFully(fileName); + ze.setName(entryEncoding.decode(fileName), fileName); + + // LFH offset, + OffsetEntry offset = new OffsetEntry(); + offset.headerOffset = ZipLong.getValue(cfh, off); + // data offset will be filled later + entries.put(ze, offset); + + nameMap.put(ze.getName(), ze); + + byte[] cdExtraData = new byte[extraLen]; + archive.readFully(cdExtraData); + ze.setCentralDirectoryExtra(cdExtraData); + + setSizesAndOffsetFromZip64Extra(ze, offset, diskStart); + + byte[] comment = new byte[commentLen]; + archive.readFully(comment); + ze.setComment(entryEncoding.decode(comment)); + + if (!hasUTF8Flag && useUnicodeExtraFields) { + noUTF8Flag.put(ze, new NameAndComment(fileName, comment)); + } + } + + /** + * If the entry holds a Zip64 extended information extra field, + * read sizes from there if the entry's sizes are set to + * 0xFFFFFFFFF, do the same for the offset of the local file + * header. + *

+ *

Ensures the Zip64 extra either knows both compressed and + * uncompressed size or neither of both as the internal logic in + * ExtraFieldUtils forces the field to create local header data + * even if they are never used - and here a field with only one + * size would be invalid.

+ */ + private void setSizesAndOffsetFromZip64Extra(ZipArchiveEntry ze, + OffsetEntry offset, + int diskStart) + throws IOException { + Zip64ExtendedInformationExtraField z64 = + (Zip64ExtendedInformationExtraField) + ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); + if (z64 != null) { + boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC; + boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC; + boolean hasRelativeHeaderOffset = + offset.headerOffset == ZIP64_MAGIC; + z64.reparseCentralDirectoryData(hasUncompressedSize, + hasCompressedSize, + hasRelativeHeaderOffset, + diskStart == ZIP64_MAGIC_SHORT); + + if (hasUncompressedSize) { + ze.setSize(z64.getSize().getLongValue()); + } else if (hasCompressedSize) { + z64.setSize(new ZipEightByteInteger(ze.getSize())); + } + + if (hasCompressedSize) { + ze.setCompressedSize(z64.getCompressedSize().getLongValue()); + } else if (hasUncompressedSize) { + z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize())); + } + + if (hasRelativeHeaderOffset) { + offset.headerOffset = + z64.getRelativeHeaderOffset().getLongValue(); + } + } + } + + /** + * Length of the "End of central directory record" - which is + * supposed to be the last structure of the archive - without file + * comment. + */ + private static final int MIN_EOCD_SIZE = + /* end of central dir signature */ WORD + /* number of this disk */ + SHORT + /* number of the disk with the */ + /* start of the central directory */ + SHORT + /* total number of entries in */ + /* the central dir on this disk */ + SHORT + /* total number of entries in */ + /* the central dir */ + SHORT + /* size of the central directory */ + WORD + /* offset of start of central */ + /* directory with respect to */ + /* the starting disk number */ + WORD + /* zipfile comment length */ + SHORT; + + /** + * Maximum length of the "End of central directory record" with a + * file comment. + */ + private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE + /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT; + + /** + * Offset of the field that holds the location of the first + * central directory entry inside the "End of central directory + * record" relative to the start of the "End of central directory + * record". + */ + private static final int CFD_LOCATOR_OFFSET = + /* end of central dir signature */ WORD + /* number of this disk */ + SHORT + /* number of the disk with the */ + /* start of the central directory */ + SHORT + /* total number of entries in */ + /* the central dir on this disk */ + SHORT + /* total number of entries in */ + /* the central dir */ + SHORT + /* size of the central directory */ + WORD; + + /** + * Length of the "Zip64 end of central directory locator" - which + * should be right in front of the "end of central directory + * record" if one is present at all. + */ + private static final int ZIP64_EOCDL_LENGTH = + /* zip64 end of central dir locator sig */ WORD + /* number of the disk with the start */ + /* start of the zip64 end of */ + /* central directory */ + WORD + /* relative offset of the zip64 */ + /* end of central directory record */ + DWORD + /* total number of disks */ + WORD; + + /** + * Offset of the field that holds the location of the "Zip64 end + * of central directory record" inside the "Zip64 end of central + * directory locator" relative to the start of the "Zip64 end of + * central directory locator". + */ + private static final int ZIP64_EOCDL_LOCATOR_OFFSET = + /* zip64 end of central dir locator sig */ WORD + /* number of the disk with the start */ + /* start of the zip64 end of */ + /* central directory */ + WORD; + + /** + * Offset of the field that holds the location of the first + * central directory entry inside the "Zip64 end of central + * directory record" relative to the start of the "Zip64 end of + * central directory record". + */ + private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET = + /* zip64 end of central dir */ + /* signature */ WORD + /* size of zip64 end of central */ + /* directory record */ + DWORD + /* version made by */ + SHORT + /* version needed to extract */ + SHORT + /* number of this disk */ + WORD + /* number of the disk with the */ + /* start of the central directory */ + WORD + /* total number of entries in the */ + /* central directory on this disk */ + DWORD + /* total number of entries in the */ + /* central directory */ + DWORD + /* size of the central directory */ + DWORD; + + /** + * Searches for either the "Zip64 end of central directory + * locator" or the "End of central dir record", parses + * it and positions the stream at the first central directory + * record. + */ + private void positionAtCentralDirectory() + throws IOException { + boolean found = tryToLocateSignature(MIN_EOCD_SIZE + ZIP64_EOCDL_LENGTH, + MAX_EOCD_SIZE + ZIP64_EOCDL_LENGTH, + ZipArchiveOutputStream + .ZIP64_EOCD_LOC_SIG); + if (!found) { + // not a ZIP64 archive + positionAtCentralDirectory32(); + } else { + positionAtCentralDirectory64(); + } + } + + /** + * Parses the "Zip64 end of central directory locator", + * finds the "Zip64 end of central directory record" using the + * parsed information, parses that and positions the stream at the + * first central directory record. + */ + private void positionAtCentralDirectory64() + throws IOException { + skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET); + byte[] zip64EocdOffset = new byte[DWORD]; + archive.readFully(zip64EocdOffset); + archive.seek(ZipEightByteInteger.getLongValue(zip64EocdOffset)); + byte[] sig = new byte[WORD]; + archive.readFully(sig); + if (sig[POS_0] != ZipArchiveOutputStream.ZIP64_EOCD_SIG[POS_0] + || sig[POS_1] != ZipArchiveOutputStream.ZIP64_EOCD_SIG[POS_1] + || sig[POS_2] != ZipArchiveOutputStream.ZIP64_EOCD_SIG[POS_2] + || sig[POS_3] != ZipArchiveOutputStream.ZIP64_EOCD_SIG[POS_3] + ) { + throw new ZipException("archive's ZIP64 end of central " + + "directory locator is corrupt."); + } + skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET + - WORD /* signature has already been read */); + byte[] cfdOffset = new byte[DWORD]; + archive.readFully(cfdOffset); + archive.seek(ZipEightByteInteger.getLongValue(cfdOffset)); + } + + /** + * Searches for the "End of central dir record", parses + * it and positions the stream at the first central directory + * record. + */ + private void positionAtCentralDirectory32() + throws IOException { + boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE, + ZipArchiveOutputStream.EOCD_SIG); + if (!found) { + throw new ZipException("archive is not a ZIP archive"); + } + skipBytes(CFD_LOCATOR_OFFSET); + byte[] cfdOffset = new byte[WORD]; + archive.readFully(cfdOffset); + archive.seek(ZipLong.getValue(cfdOffset)); + } + + /** + * Searches the archive backwards from minDistance to maxDistance + * for the given signature, positions the RandomaccessFile right + * at the signature if it has been found. + */ + private boolean tryToLocateSignature(long minDistanceFromEnd, + long maxDistanceFromEnd, + byte[] sig) throws IOException { + boolean found = false; + long off = archive.length() - minDistanceFromEnd; + final long stopSearching = + Math.max(0L, archive.length() - maxDistanceFromEnd); + if (off >= 0) { + for (; off >= stopSearching; off--) { + archive.seek(off); + int curr = archive.read(); + if (curr == -1) { + break; + } + if (curr == sig[POS_0]) { + curr = archive.read(); + if (curr == sig[POS_1]) { + curr = archive.read(); + if (curr == sig[POS_2]) { + curr = archive.read(); + if (curr == sig[POS_3]) { + found = true; + break; + } + } + } + } + } + } + if (found) { + archive.seek(off); + } + return found; + } + + /** + * Skips the given number of bytes or throws an EOFException if + * skipping failed. + */ + private void skipBytes(final int count) throws IOException { + int totalSkipped = 0; + while (totalSkipped < count) { + int skippedNow = archive.skipBytes(count - totalSkipped); + if (skippedNow <= 0) { + throw new EOFException(); + } + totalSkipped += skippedNow; + } + } + + /** + * Number of bytes in local file header up to the "length of + * filename" entry. + */ + private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = + /* local file header signature */ WORD + /* version needed to extract */ + SHORT + /* general purpose bit flag */ + SHORT + /* compression method */ + SHORT + /* last mod file time */ + SHORT + /* last mod file date */ + SHORT + /* crc-32 */ + WORD + /* compressed size */ + WORD + /* uncompressed size */ + WORD; + + /** + * Walks through all recorded entries and adds the data available + * from the local file header. + *

+ *

Also records the offsets for the data to read from the + * entries.

+ */ + private void resolveLocalFileHeaderData(Map + entriesWithoutUTF8Flag) + throws IOException { + // changing the name of a ZipArchiveEntry is going to change + // the hashcode - see COMPRESS-164 + // Map needs to be reconstructed in order to keep central + // directory order + Map origMap = + new LinkedHashMap(entries); + entries.clear(); + for (Map.Entry ent : origMap.entrySet()) { + ZipArchiveEntry ze = ent.getKey(); + OffsetEntry offsetEntry = ent.getValue(); + long offset = offsetEntry.headerOffset; + archive.seek(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); + byte[] b = new byte[SHORT]; + archive.readFully(b); + int fileNameLen = ZipShort.getValue(b); + archive.readFully(b); + int extraFieldLen = ZipShort.getValue(b); + int lenToSkip = fileNameLen; + while (lenToSkip > 0) { + int skipped = archive.skipBytes(lenToSkip); + if (skipped <= 0) { + throw new IOException("failed to skip file name in" + + " local file header"); + } + lenToSkip -= skipped; + } + byte[] localExtraData = new byte[extraFieldLen]; + archive.readFully(localExtraData); + ze.setExtra(localExtraData); + offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH + + SHORT + SHORT + fileNameLen + extraFieldLen; + + if (entriesWithoutUTF8Flag.containsKey(ze)) { + String orig = ze.getName(); + NameAndComment nc = entriesWithoutUTF8Flag.get(ze); + ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, + nc.comment); + if (!orig.equals(ze.getName())) { + nameMap.remove(orig); + nameMap.put(ze.getName(), ze); + } + } + entries.put(ze, offsetEntry); + } + } + + /** + * Checks whether the archive starts with a LFH. If it doesn't, + * it may be an empty archive. + */ + private boolean startsWithLocalFileHeader() throws IOException { + archive.seek(0); + final byte[] start = new byte[WORD]; + archive.readFully(start); + for (int i = 0; i < start.length; i++) { + if (start[i] != ZipArchiveOutputStream.LFH_SIG[i]) { + return false; + } + } + return true; + } + + /** + * InputStream that delegates requests to the underlying + * RandomAccessFile, making sure that only bytes from a certain + * range can be read. + */ + private class BoundedInputStream extends InputStream { + private long remaining; + private long loc; + private boolean addDummyByte = false; + + BoundedInputStream(long start, long remaining) { + this.remaining = remaining; + loc = start; + } + + @Override + public int read() throws IOException { + if (remaining-- <= 0) { + if (addDummyByte) { + addDummyByte = false; + return 0; + } + return -1; + } + synchronized (archive) { + archive.seek(loc++); + return archive.read(); + } + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + if (remaining <= 0) { + if (addDummyByte) { + addDummyByte = false; + b[off] = 0; + return 1; + } + return -1; + } + + if (len <= 0) { + return 0; + } + + if (len > remaining) { + len = (int) remaining; + } + int ret = -1; + synchronized (archive) { + archive.seek(loc); + ret = archive.read(b, off, len); + } + if (ret > 0) { + loc += ret; + remaining -= ret; + } + return ret; + } + + /** + * Inflater needs an extra dummy byte for nowrap - see + * Inflater's javadocs. + */ + void addDummy() { + addDummyByte = true; + } + } + + private static final class NameAndComment { + private final byte[] name; + private final byte[] comment; + + private NameAndComment(byte[] name, byte[] comment) { + this.name = name; + this.comment = comment; + } + } + + /** + * Compares two ZipArchiveEntries based on their offset within the archive. + *

+ *

Won't return any meaningful results if one of the entries + * isn't part of the archive at all.

+ */ + private final Comparator OFFSET_COMPARATOR = + new Comparator() { + public int compare(ZipArchiveEntry e1, ZipArchiveEntry e2) { + if (e1 == e2) { + return 0; + } + + OffsetEntry off1 = entries.get(e1); + OffsetEntry off2 = entries.get(e2); + if (off1 == null) { + return 1; + } + if (off2 == null) { + return -1; + } + long val = (off1.headerOffset - off2.headerOffset); + return val == 0 ? 0 : val < 0 ? -1 : +1; + } + }; +} diff --git a/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipLong.java b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipLong.java new file mode 100644 index 0000000..cc8971c --- /dev/null +++ b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipLong.java @@ -0,0 +1,172 @@ + +package org.xbib.io.archive.zip; + +import static org.xbib.io.archive.zip.ZipConstants.BYTE_MASK; +import static org.xbib.io.archive.zip.ZipConstants.WORD; + +/** + * Utility class that represents a four byte integer with conversion + * rules for the big endian byte order of ZIP files. + */ +public final class ZipLong implements Cloneable { + + private static final int BYTE_1 = 1; + private static final int BYTE_1_MASK = 0xFF00; + private static final int BYTE_1_SHIFT = 8; + + private static final int BYTE_2 = 2; + private static final int BYTE_2_MASK = 0xFF0000; + private static final int BYTE_2_SHIFT = 16; + + private static final int BYTE_3 = 3; + private static final long BYTE_3_MASK = 0xFF000000L; + private static final int BYTE_3_SHIFT = 24; + + private final long value; + + /** + * Central File Header Signature + */ + public static final ZipLong CFH_SIG = new ZipLong(0X02014B50L); + + /** + * Local File Header Signature + */ + public static final ZipLong LFH_SIG = new ZipLong(0X04034B50L); + + /** + * Data Descriptor signature + */ + public static final ZipLong DD_SIG = new ZipLong(0X08074B50L); + + /** + * Value stored in size and similar fields if ZIP64 extensions are + * used. + */ + static final ZipLong ZIP64_MAGIC = new ZipLong(ZipConstants.ZIP64_MAGIC); + + /** + * Create instance from a number. + * + * @param value the long to store as a ZipLong + */ + public ZipLong(long value) { + this.value = value; + } + + /** + * Create instance from bytes. + * + * @param bytes the bytes to store as a ZipLong + */ + public ZipLong(byte[] bytes) { + this(bytes, 0); + } + + /** + * Create instance from the four bytes starting at offset. + * + * @param bytes the bytes to store as a ZipLong + * @param offset the offset to start + */ + public ZipLong(byte[] bytes, int offset) { + value = ZipLong.getValue(bytes, offset); + } + + /** + * Get value as four bytes in big endian byte order. + * + * @return value as four bytes in big endian order + */ + public byte[] getBytes() { + return ZipLong.getBytes(value); + } + + /** + * Get value as Java long. + * + * @return value as a long + */ + public long getValue() { + return value; + } + + /** + * Get value as four bytes in big endian byte order. + * + * @param value the value to convert + * @return value as four bytes in big endian byte order + */ + public static byte[] getBytes(long value) { + byte[] result = new byte[WORD]; + result[0] = (byte) ((value & BYTE_MASK)); + result[BYTE_1] = (byte) ((value & BYTE_1_MASK) >> BYTE_1_SHIFT); + result[BYTE_2] = (byte) ((value & BYTE_2_MASK) >> BYTE_2_SHIFT); + result[BYTE_3] = (byte) ((value & BYTE_3_MASK) >> BYTE_3_SHIFT); + return result; + } + + /** + * Helper method to get the value as a Java long from four bytes starting at given array offset + * + * @param bytes the array of bytes + * @param offset the offset to start + * @return the corresponding Java long value + */ + public static long getValue(byte[] bytes, int offset) { + long value = (bytes[offset + BYTE_3] << BYTE_3_SHIFT) & BYTE_3_MASK; + value += (bytes[offset + BYTE_2] << BYTE_2_SHIFT) & BYTE_2_MASK; + value += (bytes[offset + BYTE_1] << BYTE_1_SHIFT) & BYTE_1_MASK; + value += (bytes[offset] & BYTE_MASK); + return value; + } + + /** + * Helper method to get the value as a Java long from a four-byte array + * + * @param bytes the array of bytes + * @return the corresponding Java long value + */ + public static long getValue(byte[] bytes) { + return getValue(bytes, 0); + } + + /** + * Override to make two instances with same value equal. + * + * @param o an object to compare + * @return true if the objects are equal + */ + @Override + public boolean equals(Object o) { + if (o == null || !(o instanceof ZipLong)) { + return false; + } + return value == ((ZipLong) o).getValue(); + } + + /** + * Override to make two instances with same value equal. + * + * @return the value stored in the ZipLong + */ + @Override + public int hashCode() { + return (int) value; + } + + @Override + public Object clone() { + try { + return super.clone(); + } catch (CloneNotSupportedException cnfe) { + // impossible + throw new RuntimeException(cnfe); + } + } + + @Override + public String toString() { + return "ZipLong value: " + value; + } +} diff --git a/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipShort.java b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipShort.java new file mode 100644 index 0000000..4b42271 --- /dev/null +++ b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipShort.java @@ -0,0 +1,139 @@ + +package org.xbib.io.archive.zip; + +import static org.xbib.io.archive.zip.ZipConstants.BYTE_MASK; + +/** + * Utility class that represents a two byte integer with conversion + * rules for the big endian byte order of ZIP files. + */ +public final class ZipShort implements Cloneable { + private static final int BYTE_1_MASK = 0xFF00; + private static final int BYTE_1_SHIFT = 8; + + private final int value; + + /** + * Create instance from a number. + * + * @param value the int to store as a ZipShort + */ + public ZipShort(int value) { + this.value = value; + } + + /** + * Create instance from bytes. + * + * @param bytes the bytes to store as a ZipShort + */ + public ZipShort(byte[] bytes) { + this(bytes, 0); + } + + /** + * Create instance from the two bytes starting at offset. + * + * @param bytes the bytes to store as a ZipShort + * @param offset the offset to start + */ + public ZipShort(byte[] bytes, int offset) { + value = ZipShort.getValue(bytes, offset); + } + + /** + * Get value as two bytes in big endian byte order. + * + * @return the value as a a two byte array in big endian byte order + */ + public byte[] getBytes() { + byte[] result = new byte[2]; + result[0] = (byte) (value & BYTE_MASK); + result[1] = (byte) ((value & BYTE_1_MASK) >> BYTE_1_SHIFT); + return result; + } + + /** + * Get value as Java int. + * + * @return value as a Java int + */ + public int getValue() { + return value; + } + + /** + * Get value as two bytes in big endian byte order. + * + * @param value the Java int to convert to bytes + * @return the converted int as a byte array in big endian byte order + */ + public static byte[] getBytes(int value) { + byte[] result = new byte[2]; + result[0] = (byte) (value & BYTE_MASK); + result[1] = (byte) ((value & BYTE_1_MASK) >> BYTE_1_SHIFT); + return result; + } + + /** + * Helper method to get the value as a java int from two bytes starting at given array offset + * + * @param bytes the array of bytes + * @param offset the offset to start + * @return the corresponding java int value + */ + public static int getValue(byte[] bytes, int offset) { + int value = (bytes[offset + 1] << BYTE_1_SHIFT) & BYTE_1_MASK; + value += (bytes[offset] & BYTE_MASK); + return value; + } + + /** + * Helper method to get the value as a java int from a two-byte array + * + * @param bytes the array of bytes + * @return the corresponding java int value + */ + public static int getValue(byte[] bytes) { + return getValue(bytes, 0); + } + + /** + * Override to make two instances with same value equal. + * + * @param o an object to compare + * @return true if the objects are equal + */ + @Override + public boolean equals(Object o) { + if (o == null || !(o instanceof ZipShort)) { + return false; + } + return value == ((ZipShort) o).getValue(); + } + + /** + * Override to make two instances with same value equal. + * + * @return the value stored in the ZipShort + */ + @Override + public int hashCode() { + return value; + } + + @Override + public Object clone() { + try { + return super.clone(); + } catch (CloneNotSupportedException cnfe) { + // impossible + throw new RuntimeException(cnfe); + } + } + + @Override + public String toString() { + return "ZipShort value: " + value; + } +} diff --git a/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipUtil.java b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipUtil.java new file mode 100644 index 0000000..8b7cd37 --- /dev/null +++ b/io-archive-zip/src/main/java/org/xbib/io/archive/zip/ZipUtil.java @@ -0,0 +1,172 @@ +package org.xbib.io.archive.zip; + +import org.xbib.io.archive.entry.ArchiveEntryEncodingHelper; + +import java.io.IOException; +import java.util.Calendar; +import java.util.zip.CRC32; +import java.util.zip.ZipEntry; + +/** + * Utility class for handling DOS and Java time conversions. + */ +public abstract class ZipUtil { + /** + * Smallest date/time ZIP can handle. + */ + private static final byte[] DOS_TIME_MIN = ZipLong.getBytes(0x00002100L); + + /** + * Convert a Date object to a DOS date/time field. + * Stolen from InfoZip's fileio.c + * + * @param t number of milliseconds since the epoch + * @return the date as a byte array + */ + public static byte[] toDosTime(long t) { + Calendar c = Calendar.getInstance(); + c.setTimeInMillis(t); + + int year = c.get(Calendar.YEAR); + if (year < 1980) { + return copy(DOS_TIME_MIN); // stop callers from changing the array + } + int month = c.get(Calendar.MONTH) + 1; + long value = ((year - 1980) << 25) + | (month << 21) + | (c.get(Calendar.DAY_OF_MONTH) << 16) + | (c.get(Calendar.HOUR_OF_DAY) << 11) + | (c.get(Calendar.MINUTE) << 5) + | (c.get(Calendar.SECOND) >> 1); + return ZipLong.getBytes(value); + } + + + /** + * Converts DOS time to Java time (number of milliseconds since + * epoch). + */ + public static long dosToJavaTime(long dosTime) { + Calendar cal = Calendar.getInstance(); + // CheckStyle:MagicNumberCheck OFF - no point + cal.set(Calendar.YEAR, (int) ((dosTime >> 25) & 0x7f) + 1980); + cal.set(Calendar.MONTH, (int) ((dosTime >> 21) & 0x0f) - 1); + cal.set(Calendar.DATE, (int) (dosTime >> 16) & 0x1f); + cal.set(Calendar.HOUR_OF_DAY, (int) (dosTime >> 11) & 0x1f); + cal.set(Calendar.MINUTE, (int) (dosTime >> 5) & 0x3f); + cal.set(Calendar.SECOND, (int) (dosTime << 1) & 0x3e); + // CheckStyle:MagicNumberCheck ON + return cal.getTime().getTime(); + } + + /** + * If the entry has Unicode*ExtraFields and the CRCs of the + * names/comments match those of the extra fields, transfer the + * known Unicode values from the extra field. + */ + static void setNameAndCommentFromExtraFields(ZipArchiveEntry ze, + byte[] originalNameBytes, + byte[] commentBytes) { + UnicodePathExtraField name = (UnicodePathExtraField) + ze.getExtraField(UnicodePathExtraField.UPATH_ID); + String originalName = ze.getName(); + String newName = getUnicodeStringIfOriginalMatches(name, + originalNameBytes); + if (newName != null && !originalName.equals(newName)) { + ze.setName(newName); + } + + if (commentBytes != null && commentBytes.length > 0) { + UnicodeCommentExtraField cmt = (UnicodeCommentExtraField) + ze.getExtraField(UnicodeCommentExtraField.UCOM_ID); + String newComment = + getUnicodeStringIfOriginalMatches(cmt, commentBytes); + if (newComment != null) { + ze.setComment(newComment); + } + } + } + + /** + * If the stored CRC matches the one of the given name, return the + * Unicode name of the given field. + * If the field is null or the CRCs don't match, return null + * instead. + */ + private static String getUnicodeStringIfOriginalMatches(AbstractUnicodeExtraField f, + byte[] orig) { + if (f != null) { + CRC32 crc32 = new CRC32(); + crc32.update(orig); + long origCRC32 = crc32.getValue(); + + if (origCRC32 == f.getNameCRC32()) { + try { + return ArchiveEntryEncodingHelper.UTF8_ENCODING.decode(f.getUnicodeName()); + } catch (IOException ex) { + return null; + } + } + } + return null; + } + + /** + * Create a copy of the given array - or return null if the + * argument is null. + */ + static byte[] copy(byte[] from) { + if (from != null) { + byte[] to = new byte[from.length]; + System.arraycopy(from, 0, to, 0, to.length); + return to; + } + return null; + } + + /** + * Whether this library is able to read or write the given entry. + */ + static boolean canHandleEntryData(ZipArchiveEntry entry) { + return supportsEncryptionOf(entry) && supportsMethodOf(entry); + } + + /** + * Whether this library supports the encryption used by the given + * entry. + * + * @return true if the entry isn't encrypted at all + */ + private static boolean supportsEncryptionOf(ZipArchiveEntry entry) { + return !entry.getGeneralPurposeBit().usesEncryption(); + } + + /** + * Whether this library supports the compression method used by + * the given entry. + * + * @return true if the compression method is STORED or DEFLATED + */ + private static boolean supportsMethodOf(ZipArchiveEntry entry) { + return entry.getMethod() == ZipEntry.STORED + || entry.getMethod() == ZipEntry.DEFLATED; + } + + /** + * Checks whether the entry requires features not (yet) supported + * by the library and throws an exception if it does. + */ + static void checkRequestedFeatures(ZipArchiveEntry ze) + throws UnsupportedZipFeatureException { + if (!supportsEncryptionOf(ze)) { + throw + new UnsupportedZipFeatureException(UnsupportedZipFeatureException + .Feature.ENCRYPTION, ze); + } + if (!supportsMethodOf(ze)) { + throw + new UnsupportedZipFeatureException(UnsupportedZipFeatureException + .Feature.METHOD, ze); + } + } +} \ No newline at end of file diff --git a/io-archive-zip/src/test/java/org/xbib/io/archive/zip/ZipTest.java b/io-archive-zip/src/test/java/org/xbib/io/archive/zip/ZipTest.java new file mode 100644 index 0000000..ec6d436 --- /dev/null +++ b/io-archive-zip/src/test/java/org/xbib/io/archive/zip/ZipTest.java @@ -0,0 +1,24 @@ +package org.xbib.io.archive.zip; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import org.junit.jupiter.api.Test; +import java.io.InputStream; + +public class ZipTest { + + @Test + public void testZip() throws Exception { + InputStream in = getClass().getResourceAsStream("test.zip"); + ZipArchiveInputStream z = new ZipArchiveInputStream(in); + byte[] buffer = new byte[1024]; + long total = 0L; + while ((z.getNextEntry()) != null) { + int len = 0; + while ((len = z.read(buffer)) > 0) { + total += len; + } + } + assertEquals(1813L, total); + z.close(); + } +} diff --git a/io-archive/src/main/java/module-info.java b/io-archive/src/main/java/module-info.java new file mode 100644 index 0000000..0f78bae --- /dev/null +++ b/io-archive/src/main/java/module-info.java @@ -0,0 +1,5 @@ +module org.xbib.io.archive { + exports org.xbib.io.archive.entry; + exports org.xbib.io.archive.stream; + exports org.xbib.io.archive.util; +} diff --git a/io-archive/src/main/java/org/xbib/io/archive/entry/ArchiveEntry.java b/io-archive/src/main/java/org/xbib/io/archive/entry/ArchiveEntry.java new file mode 100644 index 0000000..d62dd71 --- /dev/null +++ b/io-archive/src/main/java/org/xbib/io/archive/entry/ArchiveEntry.java @@ -0,0 +1,44 @@ +package org.xbib.io.archive.entry; + +import java.util.Date; + +/** + * Represents an entry of an archive. + */ +public interface ArchiveEntry { + + /** + * Special value indicating that the size is unknown + */ + long SIZE_UNKNOWN = -1; + + ArchiveEntry setName(String name); + + /** + * The name of the entry in the archive. May refer to a file or directory or other item + */ + String getName(); + + /** + * Set the entry size in bytes + */ + ArchiveEntry setEntrySize(long size); + + /** + * The size of the entry. May be -1 (SIZE_UNKNOWN) if the size is unknown + */ + long getEntrySize(); + + ArchiveEntry setLastModified(Date date); + + /** + * The last modified date of the entry. + */ + Date getLastModified(); + + /** + * True if the entry refers to a directory + */ + boolean isDirectory(); + +} diff --git a/io-archive/src/main/java/org/xbib/io/archive/entry/ArchiveEntryEncoding.java b/io-archive/src/main/java/org/xbib/io/archive/entry/ArchiveEntryEncoding.java new file mode 100644 index 0000000..f1d3f93 --- /dev/null +++ b/io-archive/src/main/java/org/xbib/io/archive/entry/ArchiveEntryEncoding.java @@ -0,0 +1,61 @@ +package org.xbib.io.archive.entry; + +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * An interface for encoders that do a pretty encoding of archive + * filenames. + * There are mostly two implementations, one that uses java.nio + * {@link java.nio.charset.Charset Charset} and one implementation, + * which copes with simple 8 bit charsets, because java-1.4 did not + * support Cp437 in java.nio. + * The main reason for defining an own encoding layer comes from + * the problems with {@link String#getBytes(String) + * String.getBytes}, which encodes unknown characters as ASCII + * quotation marks ('?'). Quotation marks are per definition an + * invalid filename on some operating systems like Windows, which + * leads to ignored ZIP entries. All implementations should + * implement this interface in a + * reentrant way. + */ +public interface ArchiveEntryEncoding { + /** + * Check, whether the given string may be losslessly encoded using this + * encoding. + * + * @param name A filename or ZIP comment. + * @return Whether the given name may be encoded with out any losses. + */ + boolean canEncode(String name); + + /** + * Encode a filename or a comment to a byte array suitable for + * storing it to a zip entry. + * Examples for CP 437 (in pseudo-notation, right hand side is + * C-style notation): + *
+     *  encode("\u20AC_for_Dollar.txt") = "%U20AC_for_Dollar.txt"
+     *  encode("\u00D6lf\u00E4sser.txt") = "\231lf\204sser.txt"
+     * 
+ * + * @param name A filename or ZIP comment. + * @return A byte buffer with a backing array containing the + * encoded name. Unmappable characters or malformed + * character sequences are mapped to a sequence of utf-16 + * words encoded in the format %Uxxxx. It is + * assumed, that the byte buffer is positioned at the + * beginning of the encoded result, the byte buffer has a + * backing array and the limit of the byte buffer points + * to the end of the encoded result. + * @throws java.io.IOException + */ + ByteBuffer encode(String name) throws IOException; + + /** + * @param data The byte values to decode. + * @return The decoded string. + * @throws java.io.IOException + */ + String decode(byte[] data) throws IOException; +} diff --git a/io-archive/src/main/java/org/xbib/io/archive/entry/ArchiveEntryEncodingHelper.java b/io-archive/src/main/java/org/xbib/io/archive/entry/ArchiveEntryEncodingHelper.java new file mode 100644 index 0000000..eb8b8ba --- /dev/null +++ b/io-archive/src/main/java/org/xbib/io/archive/entry/ArchiveEntryEncodingHelper.java @@ -0,0 +1,222 @@ +package org.xbib.io.archive.entry; + +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.nio.charset.UnsupportedCharsetException; +import java.util.HashMap; +import java.util.Map; + +/** + * Static helper functions for encoding filenames in archives. + */ +public abstract class ArchiveEntryEncodingHelper { + + /** + * A class, which holds the high characters of a simple encoding + * and lazily instantiates a {@link Simple8BitArchiveEntryEncoding} instance in a + * thread-safe manner. + */ + private static class SimpleEncodingHolder { + + private final char[] highChars; + + private Simple8BitArchiveEntryEncoding encoding; + + /** + * Instantiate a simple encoding holder. + * + * @param highChars The characters for byte codes 128 to 255. + * @see Simple8BitArchiveEntryEncoding#Simple8BitArchiveEntryEncoding(char[]) + */ + SimpleEncodingHolder(char[] highChars) { + this.highChars = highChars; + } + + /** + * @return The associated {@link Simple8BitArchiveEntryEncoding}, which + * is instantiated if not done so far. + */ + public synchronized Simple8BitArchiveEntryEncoding getEncoding() { + if (this.encoding == null) { + this.encoding = new Simple8BitArchiveEntryEncoding(this.highChars); + } + return this.encoding; + } + } + + private static final Map simpleEncodings; + + static { + simpleEncodings = new HashMap(); + + char[] cp437_high_chars = + new char[]{0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, + 0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef, + 0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6, + 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9, + 0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5, + 0x20a7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa, + 0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x2310, + 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, + 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, + 0x255d, 0x255c, 0x255b, 0x2510, 0x2514, 0x2534, + 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, + 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, + 0x256c, 0x2567, 0x2568, 0x2564, 0x2565, 0x2559, + 0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518, + 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, + 0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, + 0x00b5, 0x03c4, 0x03a6, 0x0398, 0x03a9, 0x03b4, + 0x221e, 0x03c6, 0x03b5, 0x2229, 0x2261, 0x00b1, + 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, + 0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2, + 0x25a0, 0x00a0}; + + SimpleEncodingHolder cp437 = new SimpleEncodingHolder(cp437_high_chars); + + simpleEncodings.put("CP437", cp437); + simpleEncodings.put("Cp437", cp437); + simpleEncodings.put("cp437", cp437); + simpleEncodings.put("IBM437", cp437); + simpleEncodings.put("ibm437", cp437); + + char[] cp850_high_chars = + new char[]{0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, + 0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef, + 0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6, + 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9, + 0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8, + 0x00d7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa, + 0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x00ae, + 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1, + 0x00c2, 0x00c0, 0x00a9, 0x2563, 0x2551, 0x2557, + 0x255d, 0x00a2, 0x00a5, 0x2510, 0x2514, 0x2534, + 0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3, + 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, + 0x256c, 0x00a4, 0x00f0, 0x00d0, 0x00ca, 0x00cb, + 0x00c8, 0x0131, 0x00cd, 0x00ce, 0x00cf, 0x2518, + 0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580, + 0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5, + 0x00b5, 0x00fe, 0x00de, 0x00da, 0x00db, 0x00d9, + 0x00fd, 0x00dd, 0x00af, 0x00b4, 0x00ad, 0x00b1, + 0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8, + 0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2, + 0x25a0, 0x00a0}; + + SimpleEncodingHolder cp850 = new SimpleEncodingHolder(cp850_high_chars); + + simpleEncodings.put("CP850", cp850); + simpleEncodings.put("Cp850", cp850); + simpleEncodings.put("cp850", cp850); + simpleEncodings.put("IBM850", cp850); + simpleEncodings.put("ibm850", cp850); + } + + /** + * Grow a byte buffer, so it has a minimal capacity or at least + * the double capacity of the original buffer + * + * @param b The original buffer. + * @param newCapacity The minimal requested new capacity. + * @return A byte buffer r with + * r.capacity() = max(b.capacity()*2,newCapacity) and + * all the data contained in b copied to the beginning + * of r. + */ + static ByteBuffer growBuffer(ByteBuffer b, int newCapacity) { + b.limit(b.position()); + b.rewind(); + + int c2 = b.capacity() * 2; + ByteBuffer on = ByteBuffer.allocate(c2 < newCapacity ? newCapacity : c2); + + on.put(b); + return on; + } + + + /** + * The hexadecimal digits 0,...,9,A,...,F encoded as + * ASCII bytes. + */ + private static final byte[] HEX_DIGITS = + new byte[]{ + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41, + 0x42, 0x43, 0x44, 0x45, 0x46 + }; + + /** + * Append %Uxxxx to the given byte buffer. + * The caller must assure, that bb.remaining()>=6. + * + * @param bb The byte buffer to write to. + * @param c The character to write. + */ + public static void appendSurrogate(ByteBuffer bb, char c) { + + bb.put((byte) '%'); + bb.put((byte) 'U'); + + bb.put(HEX_DIGITS[(c >> 12) & 0x0f]); + bb.put(HEX_DIGITS[(c >> 8) & 0x0f]); + bb.put(HEX_DIGITS[(c >> 4) & 0x0f]); + bb.put(HEX_DIGITS[c & 0x0f]); + } + + /** + * name of the encoding UTF-8 + */ + public static final String UTF8 = "UTF8"; + + /** + * variant name of the encoding UTF-8 used for comparisions. + */ + private static final String UTF_DASH_8 = "UTF_8"; + + /** + * name of the encoding UTF-8 + */ + public static final ArchiveEntryEncoding UTF8_ENCODING = new FallbackArchiveEntryEncoding(UTF8); + + /** + * Instantiates an encoding. + * + * @param name The name of the encoding. Specify {@code null} for + * the platform's default encoding. + * @return An encoding for the given encoding name. + */ + public static ArchiveEntryEncoding getEncoding(String name) { + // fallback encoding is good enough for utf-8. + if (isUTF8(name)) { + return UTF8_ENCODING; + } + if (name == null) { + return new FallbackArchiveEntryEncoding(); + } + SimpleEncodingHolder h = simpleEncodings.get(name); + if (h != null) { + return h.getEncoding(); + } + try { + Charset cs = Charset.forName(name); + return new NioArchiveEntryEncoding(cs); + } catch (UnsupportedCharsetException e) { + return new FallbackArchiveEntryEncoding(name); + } + } + + /** + * Whether a given encoding - or the platform's default encoding + * if the parameter is null - is UTF-8. + */ + public static boolean isUTF8(String encoding) { + if (encoding == null) { + // check platform's default encoding + encoding = System.getProperty("file.encoding"); + } + return UTF8.equalsIgnoreCase(encoding) + || UTF_DASH_8.equalsIgnoreCase(encoding); + } +} diff --git a/io-archive/src/main/java/org/xbib/io/archive/entry/FallbackArchiveEntryEncoding.java b/io-archive/src/main/java/org/xbib/io/archive/entry/FallbackArchiveEntryEncoding.java new file mode 100644 index 0000000..68ed291 --- /dev/null +++ b/io-archive/src/main/java/org/xbib/io/archive/entry/FallbackArchiveEntryEncoding.java @@ -0,0 +1,62 @@ + +package org.xbib.io.archive.entry; + +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * A fallback ZipEncoding, which uses a java.io means to encode names. + *

+ *

This implementation is not suitable for encodings other than + * utf-8, because java.io encodes unmappable character as question + * marks leading to unreadable ZIP entries on some operating + * systems.

+ *

+ *

Furthermore this implementation is unable to tell whether a + * given name can be safely encoded or not.

+ *

+ *

This implementation acts as a last resort implementation, when + * neither {@link Simple8BitArchiveEntryEncoding} nor {@link NioArchiveEntryEncoding} is + * available.

+ */ +class FallbackArchiveEntryEncoding implements ArchiveEntryEncoding { + private final String charset; + + /** + * Construct a fallback zip encoding, which uses the platform's + * default charset. + */ + public FallbackArchiveEntryEncoding() { + this.charset = null; + } + + /** + * Construct a fallback zip encoding, which uses the given charset. + * + * @param charset The name of the charset or {@code null} for + * the platform's default character set. + */ + public FallbackArchiveEntryEncoding(String charset) { + this.charset = charset; + } + + public boolean canEncode(String name) { + return true; + } + + public ByteBuffer encode(String name) throws IOException { + if (this.charset == null) { // i.e. use default charset, see no-args constructor + return ByteBuffer.wrap(name.getBytes()); + } else { + return ByteBuffer.wrap(name.getBytes(this.charset)); + } + } + + public String decode(byte[] data) throws IOException { + if (this.charset == null) { // i.e. use default charset, see no-args constructor + return new String(data); + } else { + return new String(data, this.charset); + } + } +} diff --git a/io-archive/src/main/java/org/xbib/io/archive/entry/NioArchiveEntryEncoding.java b/io-archive/src/main/java/org/xbib/io/archive/entry/NioArchiveEntryEncoding.java new file mode 100644 index 0000000..85a0ba4 --- /dev/null +++ b/io-archive/src/main/java/org/xbib/io/archive/entry/NioArchiveEntryEncoding.java @@ -0,0 +1,92 @@ + +package org.xbib.io.archive.entry; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; + +/** + * A ZipEncoding, which uses a java.nio {@link + * java.nio.charset.Charset Charset} to encode names. + *

+ *

This implementation works for all cases under java-1.5 or + * later. However, in java-1.4, some charsets don't have a java.nio + * implementation, most notably the default ZIP encoding Cp437.

+ *

+ *

The methods of this class are reentrant.

+ */ +class NioArchiveEntryEncoding implements ArchiveEntryEncoding { + private final Charset charset; + + /** + * Construct an NIO based zip encoding, which wraps the given + * charset. + * + * @param charset The NIO charset to wrap. + */ + public NioArchiveEntryEncoding(Charset charset) { + this.charset = charset; + } + + public boolean canEncode(String name) { + CharsetEncoder enc = this.charset.newEncoder(); + enc.onMalformedInput(CodingErrorAction.REPORT); + enc.onUnmappableCharacter(CodingErrorAction.REPORT); + + return enc.canEncode(name); + } + + public ByteBuffer encode(String name) { + CharsetEncoder enc = this.charset.newEncoder(); + + enc.onMalformedInput(CodingErrorAction.REPORT); + enc.onUnmappableCharacter(CodingErrorAction.REPORT); + + CharBuffer cb = CharBuffer.wrap(name); + ByteBuffer out = ByteBuffer.allocate(name.length() + + (name.length() + 1) / 2); + + while (cb.remaining() > 0) { + CoderResult res = enc.encode(cb, out, true); + + if (res.isUnmappable() || res.isMalformed()) { + + // write the unmappable characters in utf-16 + // pseudo-URL encoding style to ByteBuffer. + if (res.length() * 6 > out.remaining()) { + out = ArchiveEntryEncodingHelper.growBuffer(out, out.position() + + res.length() * 6); + } + + for (int i = 0; i < res.length(); ++i) { + ArchiveEntryEncodingHelper.appendSurrogate(out, cb.get()); + } + + } else if (res.isOverflow()) { + + out = ArchiveEntryEncodingHelper.growBuffer(out, 0); + + } else if (res.isUnderflow()) { + + enc.flush(out); + break; + + } + } + + out.limit(out.position()); + out.rewind(); + return out; + } + + public String decode(byte[] data) throws IOException { + return this.charset.newDecoder() + .onMalformedInput(CodingErrorAction.REPORT) + .onUnmappableCharacter(CodingErrorAction.REPORT) + .decode(ByteBuffer.wrap(data)).toString(); + } +} diff --git a/io-archive/src/main/java/org/xbib/io/archive/entry/Simple8BitArchiveEntryEncoding.java b/io-archive/src/main/java/org/xbib/io/archive/entry/Simple8BitArchiveEntryEncoding.java new file mode 100644 index 0000000..ca2dbac --- /dev/null +++ b/io-archive/src/main/java/org/xbib/io/archive/entry/Simple8BitArchiveEntryEncoding.java @@ -0,0 +1,212 @@ +package org.xbib.io.archive.entry; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * This ZipEncoding implementation implements a simple 8bit character + * set, which mets the following restrictions: + *

+ *

    + *
  • Characters 0x0000 to 0x007f are encoded as the corresponding + * byte values 0x00 to 0x7f.
  • + *
  • All byte codes from 0x80 to 0xff are mapped to a unique unicode + * character in the range 0x0080 to 0x7fff. (No support for + * UTF-16 surrogates) + *
+ *

+ *

These restrictions most notably apply to the most prominent + * omissions of java-1.4's {@link java.nio.charset.Charset Charset} + * implementation, Cp437 and Cp850.

+ *

+ *

The methods of this class are reentrant.

+ */ +class Simple8BitArchiveEntryEncoding implements ArchiveEntryEncoding { + + /** + * A character entity, which is put to the reverse mapping table + * of a simple encoding. + */ + private static final class Simple8BitChar implements Comparable { + public final char unicode; + public final byte code; + + Simple8BitChar(byte code, char unicode) { + this.code = code; + this.unicode = unicode; + } + + public int compareTo(Simple8BitChar a) { + return this.unicode - a.unicode; + } + + @Override + public String toString() { + return "0x" + Integer.toHexString(0xffff & unicode) + + "->0x" + Integer.toHexString(0xff & code); + } + + @Override + public boolean equals(Object o) { + if (o instanceof Simple8BitChar) { + Simple8BitChar other = (Simple8BitChar) o; + return unicode == other.unicode && code == other.code; + } + return false; + } + + @Override + public int hashCode() { + return unicode; + } + } + + /** + * The characters for byte values of 128 to 255 stored as an array of + * 128 chars. + */ + private final char[] highChars; + + /** + * A list of {@link Simple8BitArchiveEntryEncoding.Simple8BitChar} objects sorted by the unicode + * field. This list is used to binary search reverse mapping of + * unicode characters with a character code greater than 127. + */ + private final List reverseMapping; + + /** + * @param highChars The characters for byte values of 128 to 255 + * stored as an array of 128 chars. + */ + public Simple8BitArchiveEntryEncoding(char[] highChars) { + this.highChars = highChars.clone(); + List temp = new ArrayList(this.highChars.length); + byte code = 127; + for (char highChar : this.highChars) { + temp.add(new Simple8BitChar(++code, highChar)); + } + Collections.sort(temp); + this.reverseMapping = Collections.unmodifiableList(temp); + } + + /** + * Return the character code for a given encoded byte. + * + * @param b The byte to decode. + * @return The associated character value. + */ + public char decodeByte(byte b) { + // code 0-127 + if (b >= 0) { + return (char) b; + } + // byte is signed, so 128 == -128 and 255 == -1 + return this.highChars[128 + b]; + } + + /** + * @param c The character to encode. + * @return Whether the given unicode character is covered by this encoding. + */ + public boolean canEncodeChar(char c) { + if (c >= 0 && c < 128) { + return true; + } + Simple8BitChar r = this.encodeHighChar(c); + return r != null; + } + + /** + * Pushes the encoded form of the given character to the given byte buffer. + * + * @param bb The byte buffer to write to. + * @param c The character to encode. + * @return Whether the given unicode character is covered by this encoding. + * If {@code false} is returned, nothing is pushed to the + * byte buffer. + */ + public boolean pushEncodedChar(ByteBuffer bb, char c) { + if (c >= 0 && c < 128) { + bb.put((byte) c); + return true; + } + Simple8BitChar r = this.encodeHighChar(c); + if (r == null) { + return false; + } + bb.put(r.code); + return true; + } + + /** + * @param c A unicode character in the range from 0x0080 to 0x7f00 + * @return A Simple8BitChar, if this character is covered by this encoding. + * A {@code null} value is returned, if this character is not + * covered by this encoding. + */ + private Simple8BitChar encodeHighChar(char c) { + // for performance an simplicity, yet another reincarnation of + // binary search... + int i0 = 0; + int i1 = this.reverseMapping.size(); + while (i1 > i0) { + int i = i0 + (i1 - i0) / 2; + Simple8BitChar m = this.reverseMapping.get(i); + if (m.unicode == c) { + return m; + } + if (m.unicode < c) { + i0 = i + 1; + } else { + i1 = i; + } + } + if (i0 >= this.reverseMapping.size()) { + return null; + } + Simple8BitChar r = this.reverseMapping.get(i0); + if (r.unicode != c) { + return null; + } + return r; + } + + public boolean canEncode(String name) { + for (int i = 0; i < name.length(); ++i) { + char c = name.charAt(i); + if (!this.canEncodeChar(c)) { + return false; + } + } + return true; + } + + public ByteBuffer encode(String name) { + ByteBuffer out = ByteBuffer.allocate(name.length() + 6 + (name.length() + 1) / 2); + for (int i = 0; i < name.length(); ++i) { + char c = name.charAt(i); + if (out.remaining() < 6) { + out = ArchiveEntryEncodingHelper.growBuffer(out, out.position() + 6); + } + if (!this.pushEncodedChar(out, c)) { + ArchiveEntryEncodingHelper.appendSurrogate(out, c); + } + } + out.limit(out.position()); + out.rewind(); + return out; + } + + public String decode(byte[] data) throws IOException { + char[] ret = new char[data.length]; + for (int i = 0; i < data.length; ++i) { + ret[i] = this.decodeByte(data[i]); + } + return new String(ret); + } + + +} diff --git a/io-archive/src/main/java/org/xbib/io/archive/stream/ArchiveInputStream.java b/io-archive/src/main/java/org/xbib/io/archive/stream/ArchiveInputStream.java new file mode 100644 index 0000000..e5fdcd4 --- /dev/null +++ b/io-archive/src/main/java/org/xbib/io/archive/stream/ArchiveInputStream.java @@ -0,0 +1,43 @@ +package org.xbib.io.archive.stream; + +import org.xbib.io.archive.entry.ArchiveEntry; +import java.io.IOException; +import java.io.InputStream; + +/** + * Archive input streams must override the + * {@link #read(byte[], int, int)} - or {@link #read()} - + * method so that reading from the stream generates EOF for the end of + * data in each entry as well as at the end of the file proper. + * The {@link #getNextEntry()} method is used to reset the input stream + * ready for reading the data from the next entry. + */ +public abstract class ArchiveInputStream extends InputStream { + + /** + * Returns the next archive entry in this stream. + * + * @return the next entry, + * or {@code null} if there are no more entries + * @throws java.io.IOException if the next entry could not be read + */ + public abstract E getNextEntry() throws IOException; + + /** + * Reads a byte of data. This method will block until enough input is + * available. + * Simply calls the {@link #read(byte[], int, int)} method. + * MUST be overridden if the {@link #read(byte[], int, int)} method + * is not overridden; may be overridden otherwise. + * + * @return the byte read, or -1 if end of input is reached + * @throws IOException if an I/O error has occurred + */ + @Override + public int read() throws IOException { + byte[] b = new byte[1]; + int num = read(b, 0, 1); + return num == -1 ? -1 : b[0] & 0xFF; + } + +} diff --git a/io-archive/src/main/java/org/xbib/io/archive/stream/ArchiveOutputStream.java b/io-archive/src/main/java/org/xbib/io/archive/stream/ArchiveOutputStream.java new file mode 100644 index 0000000..077d8d1 --- /dev/null +++ b/io-archive/src/main/java/org/xbib/io/archive/stream/ArchiveOutputStream.java @@ -0,0 +1,78 @@ +package org.xbib.io.archive.stream; + +import org.xbib.io.archive.entry.ArchiveEntry; +import java.io.IOException; +import java.io.OutputStream; + +/** + * Archive output stream implementations are expected to override the + * {@link #write(byte[], int, int)} method to improve performance. + * They should also override {@link #close()} to ensure that any necessary + * trailers are added. + * The normal sequence of calls for working with ArchiveOutputStreams is: + * + create ArchiveOutputStream object + * + write SFX header (optional, Zip only) + * + repeat as needed: + * - putArchiveEntry() (writes entry header) + * - write() (writes entry data) + * - closeArchiveEntry() (closes entry) + * + finish() (ends the addition of entries) + * + write additional data if format supports it (optional) + * + close() + */ +public abstract class ArchiveOutputStream extends OutputStream { + + /** + * Temporary buffer used for the {@link #write(int)} method + */ + private final byte[] oneByte = new byte[1]; + + static final int BYTE_MASK = 0xFF; + + public abstract E newArchiveEntry() throws IOException; + + /** + * Writes the headers for an archive entry to the output stream. + * The caller must then write the content to the stream and call + * {@link #closeArchiveEntry()} to complete the process. + * + * @param entry describes the entry + * @throws java.io.IOException + */ + public abstract void putArchiveEntry(E entry) throws IOException; + + /** + * Closes the archive entry, writing any trailer information that may + * be required. + * + * @throws java.io.IOException + */ + public abstract void closeArchiveEntry() throws IOException; + + /** + * Finishes the addition of entries to this stream, without closing it. + * Additional data can be written, if the format supports it. + * + * The finish() method throws an Exception if the user forgets to close the entry + * . + * + * @throws IOException + */ + public abstract void finish() throws IOException; + + /** + * Writes a byte to the current archive entry. + * This method simply calls write( byte[], 0, 1 ). + * MUST be overridden if the {@link #write(byte[], int, int)} method + * is not overridden; may be overridden otherwise. + * + * @param b The byte to be written. + * @throws java.io.IOException on error + */ + @Override + public void write(int b) throws IOException { + oneByte[0] = (byte) (b & BYTE_MASK); + write(oneByte, 0, 1); + } + +} diff --git a/io-archive/src/main/java/org/xbib/io/archive/util/ArchiveUtils.java b/io-archive/src/main/java/org/xbib/io/archive/util/ArchiveUtils.java new file mode 100644 index 0000000..f79ab2c --- /dev/null +++ b/io-archive/src/main/java/org/xbib/io/archive/util/ArchiveUtils.java @@ -0,0 +1,277 @@ +package org.xbib.io.archive.util; + +import org.xbib.io.archive.entry.ArchiveEntry; +import org.xbib.io.archive.entry.ArchiveEntryEncoding; +import org.xbib.io.archive.entry.ArchiveEntryEncodingHelper; + +import java.io.File; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.nio.ByteBuffer; +import java.util.Locale; + +/** + * Generic Archive utilities + */ +public class ArchiveUtils { + + /** + * Private constructor to prevent instantiation of this utility class. + */ + private ArchiveUtils() { + } + + /** + * Strips Windows' drive letter as well as any leading slashes, + * turns path separators into forward slahes. + */ + public static String normalizeFileName(String fileName, boolean preserveLeadingSlashes) { + String osname = System.getProperty("os.name").toLowerCase(Locale.ENGLISH); + if (osname.startsWith("windows")) { + if (fileName.length() > 2) { + char ch1 = fileName.charAt(0); + char ch2 = fileName.charAt(1); + if (ch2 == ':' && ((ch1 >= 'a' && ch1 <= 'z') || (ch1 >= 'A' && ch1 <= 'Z'))) { + fileName = fileName.substring(2); + } + } + } else if (osname.contains("netware")) { + int colon = fileName.indexOf(':'); + if (colon != -1) { + fileName = fileName.substring(colon + 1); + } + } + fileName = fileName.replace(File.separatorChar, '/'); + // No absolute pathnames. Windows paths can start with "\\NetworkDrive\", so we loop on starting /'s. + while (!preserveLeadingSlashes && fileName.startsWith("/")) { + fileName = fileName.substring(1); + } + return fileName; + } + + public static final ArchiveEntryEncoding DEFAULT_ENCODING = ArchiveEntryEncodingHelper.getEncoding(null); + + public static final ArchiveEntryEncoding FALLBACK_ENCODING = new ArchiveEntryEncoding() { + public boolean canEncode(String name) { + return true; + } + + public ByteBuffer encode(String name) { + final int length = name.length(); + byte[] buf = new byte[length]; + for (int i = 0; i < length; ++i) { + buf[i] = (byte) name.charAt(i); + } + return ByteBuffer.wrap(buf); + } + + public String decode(byte[] buffer) { + final int length = buffer.length; + StringBuilder result = new StringBuilder(length); + for (byte b : buffer) { + if (b == 0) { + break; + } + result.append((char) (b & 0xFF)); + } + return result.toString(); + } + }; + + /** + * Copy a name into a buffer. + * Copies characters from the name into the buffer + * starting at the specified offset. + * If the buffer is longer than the name, the buffer + * is filled with trailing NULs. + * If the name is longer than the buffer, + * the output is truncated. + * + * @param name The header name from which to copy the characters. + * @param buf The buffer where the name is to be stored. + * @param offset The starting offset into the buffer + * @param length The maximum number of header bytes to copy. + * @return The updated offset, i.e. offset + length + */ + public static int formatNameBytes(String name, byte[] buf, final int offset, final int length) { + try { + return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING); + } catch (IOException ex) { + try { + return formatNameBytes(name, buf, offset, length, ArchiveUtils.FALLBACK_ENCODING); + } catch (IOException ex2) { + // impossible + throw new RuntimeException(ex2); + } + } + } + + /** + * Copy a name into a buffer. + * Copies characters from the name into the buffer + * starting at the specified offset. + * If the buffer is longer than the name, the buffer + * is filled with trailing NULs. + * If the name is longer than the buffer, + * the output is truncated. + * + * @param name The header name from which to copy the characters. + * @param buf The buffer where the name is to be stored. + * @param offset The starting offset into the buffer + * @param length The maximum number of header bytes to copy. + * @param encoding name of the encoding to use for file names + * @return The updated offset, i.e. offset + length + */ + public static int formatNameBytes(String name, byte[] buf, final int offset, + final int length, + final ArchiveEntryEncoding encoding) + throws IOException { + int len = name.length(); + ByteBuffer b = encoding.encode(name); + while (b.limit() > length && len > 0) { + b = encoding.encode(name.substring(0, --len)); + } + final int limit = b.limit(); + System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit); + + // Pad any remaining output bytes with NUL + for (int i = limit; i < length; ++i) { + buf[offset + i] = 0; + } + + return offset + length; + } + + + /** + * Generates a string containing the name, isDirectory setting and size of an entry. + * For example: + * 2000 main.c + * 100 testfiles + * + * @return the representation of the entry + */ + public static String toString(ArchiveEntry entry) { + StringBuilder sb = new StringBuilder(); + sb.append(entry.isDirectory() ? 'd' : '-');// c.f. "ls -l" output + String size = Long.toString((entry.getEntrySize())); + sb.append(' '); + // Pad output to 7 places, leading spaces + for (int i = 7; i > size.length(); i--) { + sb.append(' '); + } + sb.append(size); + sb.append(' ').append(entry.getName()); + return sb.toString(); + } + + /** + * Check if buffer contents matches Ascii String. + * + * @param expected + * @param buffer + * @param offset + * @param length + * @return {@code true} if buffer is the same as the expected string + */ + public static boolean matchAsciiBuffer( + String expected, byte[] buffer, int offset, int length) { + byte[] buffer1; + try { + buffer1 = expected.getBytes("ASCII"); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); // Should not happen + } + return isEqual(buffer1, 0, buffer1.length, buffer, offset, length, false); + } + + /** + * Convert a string to Ascii bytes. + * Used for comparing "magic" strings which need to be independent of the default Locale. + * + * @param inputString + * @return the bytes + */ + public static byte[] toAsciiBytes(String inputString) { + try { + return inputString.getBytes("ASCII"); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); // Should never happen + } + } + + /** + * Convert an input byte array to a String using the ASCII character set. + * + * @param inputBytes + * @return the bytes, interpreted as an Ascii string + */ + public static String toAsciiString(final byte[] inputBytes) { + try { + return new String(inputBytes, "ASCII"); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); // Should never happen + } + } + + /** + * Convert an input byte array to a String using the ASCII character set. + * + * @param inputBytes input byte array + * @param offset offset within array + * @param length length of array + * @return the bytes, interpreted as an Ascii string + */ + public static String toAsciiString(final byte[] inputBytes, int offset, int length) { + try { + return new String(inputBytes, offset, length, "ASCII"); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); // Should never happen + } + } + + /** + * Compare byte buffers, optionally ignoring trailing nulls + * + * @param buffer1 + * @param offset1 + * @param length1 + * @param buffer2 + * @param offset2 + * @param length2 + * @param ignoreTrailingNulls + * @return {@code true} if buffer1 and buffer2 have same contents, having regard to trailing nulls + */ + public static boolean isEqual( + final byte[] buffer1, final int offset1, final int length1, + final byte[] buffer2, final int offset2, final int length2, + boolean ignoreTrailingNulls) { + int minLen = length1 < length2 ? length1 : length2; + for (int i = 0; i < minLen; i++) { + if (buffer1[offset1 + i] != buffer2[offset2 + i]) { + return false; + } + } + if (length1 == length2) { + return true; + } + if (ignoreTrailingNulls) { + if (length1 > length2) { + for (int i = length2; i < length1; i++) { + if (buffer1[offset1 + i] != 0) { + return false; + } + } + } else { + for (int i = length1; i < length2; i++) { + if (buffer2[offset2 + i] != 0) { + return false; + } + } + } + return true; + } + return false; + } + +} diff --git a/io-codec/build.gradle b/io-codec/build.gradle new file mode 100644 index 0000000..bdf594f --- /dev/null +++ b/io-codec/build.gradle @@ -0,0 +1,13 @@ +dependencies { + api project(':io-archive') + implementation project(':io-compress-bzip2') + implementation project(':io-compress-lzf') + implementation project(':io-compress-xz') + implementation project(':io-compress-zlib') + implementation project(':io-archive-ar') + implementation project(':io-archive-cpio') + implementation project(':io-archive-dump') + implementation project(':io-archive-jar') + implementation project(':io-archive-tar') + implementation project(':io-archive-zip') +} diff --git a/io-codec/src/main/java/module-info.java b/io-codec/src/main/java/module-info.java new file mode 100644 index 0000000..0623f1e --- /dev/null +++ b/io-codec/src/main/java/module-info.java @@ -0,0 +1,20 @@ +module org.xbib.io.codec { + exports org.xbib.io.codec; + exports org.xbib.io.codec.ar; + exports org.xbib.io.codec.cpio; + exports org.xbib.io.codec.file; + exports org.xbib.io.codec.jar; + exports org.xbib.io.codec.tar; + exports org.xbib.io.codec.zip; + requires org.xbib.io.compress.bzip; + requires org.xbib.io.compress.lzf; + requires org.xbib.io.compress.xz; + requires org.xbib.io.compress.zlib; + requires org.xbib.io.archive; + requires org.xbib.io.archive.ar; + requires org.xbib.io.archive.cpio; + requires org.xbib.io.archive.dump; + requires org.xbib.io.archive.jar; + requires org.xbib.io.archive.tar; + requires org.xbib.io.archive.zip; +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/ArchiveCodec.java b/io-codec/src/main/java/org/xbib/io/codec/ArchiveCodec.java new file mode 100644 index 0000000..745b9fb --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/ArchiveCodec.java @@ -0,0 +1,52 @@ +package org.xbib.io.codec; + +import org.xbib.io.archive.stream.ArchiveInputStream; +import org.xbib.io.archive.stream.ArchiveOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +/** + * An archive codec defines the session and the input or output stream that are + * used for reading or writing to an archive. + * + * @param the archive session type + * @param the archive input stream type + * @param the archive output type + */ +public interface ArchiveCodec { + + /** + * Returns the name of this archive codec ("cpio", "tar", "zip") + * + * @return the name + */ + String getName(); + + /** + * Creates a new archive session with a progress watcher. + * + * @param watcher the progress watcher + * @return the new archive session + */ + S newSession(BytesProgressWatcher watcher); + + /** + * Creates a new archive input stream + * + * @param in the input stream for the archive input stream + * @return the archive input stream + * @throws IOException if archive input stream can not be created + */ + I createArchiveInputStream(InputStream in) throws IOException; + + /** + * Creates a new archive output stream + * + * @param out the output stream for the archive output stream + * @return the archive output stream + * @throws IOException if archive output stream can not be created + */ + O createArchiveOutputStream(OutputStream out) throws IOException; + +} \ No newline at end of file diff --git a/io-codec/src/main/java/org/xbib/io/codec/ArchiveSession.java b/io-codec/src/main/java/org/xbib/io/codec/ArchiveSession.java new file mode 100644 index 0000000..842524f --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/ArchiveSession.java @@ -0,0 +1,250 @@ +/* + * Licensed to Jörg Prante and xbib under one or more contributor + * license agreements. See the NOTICE.txt file distributed with this work + * for additional information regarding copyright ownership. + * + * Copyright (C) 2012 Jörg Prante and xbib + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program; if not, see http://www.gnu.org/licenses + * or write to the Free Software Foundation, Inc., 51 Franklin Street, + * Fifth Floor, Boston, MA 02110-1301 USA. + * + * The interactive user interfaces in modified source and object code + * versions of this program must display Appropriate Legal Notices, + * as required under Section 5 of the GNU Affero General Public License. + * + * In accordance with Section 7(b) of the GNU Affero General Public + * License, these Appropriate Legal Notices must retain the display of the + * "Powered by xbib" logo. If the display of the logo is not reasonably + * feasible for technical reasons, the Appropriate Legal Notices must display + * the words "Powered by xbib". + */ +package org.xbib.io.codec; + +import org.xbib.io.archive.entry.ArchiveEntry; +import org.xbib.io.archive.stream.ArchiveInputStream; +import org.xbib.io.archive.stream.ArchiveOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URI; +import java.nio.file.Files; +import java.nio.file.OpenOption; +import java.nio.file.Path; +import java.util.Date; +import java.util.Set; + +/** + * Archive session + */ +public abstract class ArchiveSession + implements Session { + + private final static StreamCodecService codecFactory = StreamCodecService.getInstance(); + + private final static int DEFAULT_INPUT_BUFSIZE = 65536; + + protected int bufferSize = DEFAULT_INPUT_BUFSIZE; + + private boolean isOpen; + + private Path path; + + private OpenOption option; + + protected ArchiveSession() { + } + + public ArchiveSession setPath(Path path, OpenOption option) { + this.path = path; + this.option = option; + return this; + } + + public ArchiveSession setBufferSize(int bufferSize) { + this.bufferSize = bufferSize; + return this; + } + + @Override + public synchronized void open(Session.Mode mode) throws IOException { + if (isOpen) { + return; + } + switch (mode) { + case READ: { + InputStream in = newInputStream(path, option); + open(in); + this.isOpen = getInputStream() != null; + break; + } + case WRITE: { + OutputStream out = newOutputStream(path, option); + open(out); + this.isOpen = getOutputStream() != null; + break; + } + } + } + + @Override + public StringPacket newPacket() { + return new StringPacket(); + } + + @Override + public synchronized StringPacket read() throws IOException { + if (!isOpen()) { + throw new IOException("not open"); + } + if (getInputStream() == null) { + throw new IOException("no input stream found"); + } + ArchiveEntry entry = getInputStream().getNextEntry(); + if (entry == null) { + return null; + } + StringPacket packet = newPacket(); + String name = entry.getName(); + packet.name(name); + int size = (int)entry.getEntrySize(); + byte[] b = new byte[size]; + getInputStream().read(b, 0, size); + packet.packet(new String(b)); + return packet; + } + + @Override + public synchronized void write(StringPacket packet) throws IOException { + if (!isOpen()) { + throw new IOException("not open"); + } + if (getOutputStream() == null) { + throw new IOException("no output stream found"); + } + if (packet == null || packet.toString() == null) { + throw new IOException("no packet to write"); + } + byte[] buf = packet.toString().getBytes(); + if (buf.length > 0) { + String name = packet.name(); + ArchiveEntry entry = getOutputStream().newArchiveEntry(); + entry.setName(name); + entry.setLastModified(new Date()); + entry.setEntrySize(buf.length); + getOutputStream().putArchiveEntry(entry); + getOutputStream().write(buf); + getOutputStream().closeArchiveEntry(); + } + } + + @Override + public synchronized void close() throws IOException { + if (!isOpen) { + return; + } + if (getOutputStream() != null) { + getOutputStream().close(); + } + if (getInputStream() != null) { + getInputStream().close(); + } + this.isOpen = false; + } + + @Override + public boolean isOpen() { + return isOpen; + } + + public boolean canOpen(URI uri) { + return canOpen(uri, getSuffix(), true); + } + + public static boolean canOpen(URI uri, String suffix, boolean withCodecs) { + final String scheme = uri.getScheme(); + final String part = uri.getSchemeSpecificPart(); + if (scheme.equals(suffix) || + (scheme.equals("file") && part.endsWith("." + suffix.toLowerCase())) || + (scheme.equals("file") && part.endsWith("." + suffix.toUpperCase()))) { + return true; + } + if (withCodecs) { + Set codecs = StreamCodecService.getCodecs(); + for (String codec : codecs) { + String s = "." + suffix + "." + codec; + if (part.endsWith(s) || part.endsWith(s.toLowerCase()) || part.endsWith(s.toUpperCase())) { + return true; + } + } + } + return false; + } + + protected abstract String getSuffix(); + + protected abstract void open(InputStream in) throws IOException; + + protected abstract void open(OutputStream in) throws IOException; + + protected abstract I getInputStream(); + + protected abstract O getOutputStream(); + + /** + * Helper method for creating the FileInputStream + * + * @param path the path + * @return an InputStream + * @throws java.io.IOException if existence or access rights do not suffice + */ + public static InputStream newInputStream(Path path, OpenOption option) throws IOException { + if (path == null) { + throw new IOException("no path given"); + } + String part = path.toUri().getSchemeSpecificPart(); + if (Files.isReadable(path) && Files.isRegularFile(path)) { + InputStream in = Files.newInputStream(path, option); + Set codecs = StreamCodecService.getCodecs(); + for (String codec : codecs) { + String s = "." + codec; + if (part.endsWith(s.toLowerCase()) || part.endsWith(s.toUpperCase())) { + in = StreamCodecService.getInstance().getCodec(codec).decode(in); + } + } + return in; + } else { + throw new IOException("can't open for input, check existence or access rights: " + path); + } + } + + /** + * Helper method for creating the FileOutputStream. Creates the directory if + * it does not exist. + * + * @throws java.io.IOException if existence or access rights do not suffice + */ + public static OutputStream newOutputStream(Path path, OpenOption option) throws IOException { + String part = path.toUri().getSchemeSpecificPart(); + OutputStream out = Files.newOutputStream(path, option); + Set codecs = StreamCodecService.getCodecs(); + for (String codec : codecs) { + String s = "." + codec; + if (part.endsWith(s.toLowerCase()) || part.endsWith(s.toUpperCase())) { + out = StreamCodecService.getInstance().getCodec(codec).encode(out); + } + } + return out; + } + +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/BytesProgressWatcher.java b/io-codec/src/main/java/org/xbib/io/codec/BytesProgressWatcher.java new file mode 100644 index 0000000..df54e3d --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/BytesProgressWatcher.java @@ -0,0 +1,315 @@ +package org.xbib.io.codec; + +import java.util.Map; +import java.util.TreeMap; + +/** + * Utility class that tracks the number of bytes transferred from a source, and + * uses this information to calculate transfer rates and estimate end times. The + * watcher stores the number of bytes that will be transferred, the number of + * bytes that have been transferred in the current session and the time this has + * taken, and the number of bytes and time taken overal (eg for transfers that + * have been restarted). + */ +public class BytesProgressWatcher { + + /** + * The number of seconds worth of historical byte transfer information that + * will be stored and used to calculate the recent transfer rate. + */ + private static final int SECONDS_OF_HISTORY = 5; + + private boolean isStarted = false; + + private long bytesToTransfer = 0; + + private long startTimeAllTransfersMS = -1; + + private long totalBytesInAllTransfers = 0; + + private long startTimeCurrentTransferMS = -1; + + private long totalBytesInCurrentTransfer = 0; + + private long endTimeCurrentTransferMS = -1; + + private Map historyOfBytesBySecond = new TreeMap(); + + private long earliestHistorySecond = Long.MAX_VALUE; + + /** + * Construct a watcher for a transfer that will involve a given number of + * bytes. + * + * @param bytesToTransfer the number of bytes that will be transferred, eg + * the size of a file being uploaded. + */ + public BytesProgressWatcher(long bytesToTransfer) { + this.bytesToTransfer = bytesToTransfer; + } + + /** + * @return the count of bytes that will be transferred by the object watched + * by this class. + */ + public synchronized long getBytesToTransfer() { + return bytesToTransfer; + } + + /** + * Resets the byte count and timer variables for a watcher. This method is + * called automatically when a transfer is started (ie the first bytes are + * registered in the method {@link #updateBytesTransferred(long)}), or when + * a transfer is restarted (eg due to transmission errors). + */ + public synchronized void resetWatcher() { + startTimeCurrentTransferMS = System.currentTimeMillis(); + if (startTimeAllTransfersMS == -1) { + startTimeAllTransfersMS = startTimeCurrentTransferMS; + } + endTimeCurrentTransferMS = -1; + totalBytesInCurrentTransfer = 0; + isStarted = true; + } + + /** + * Notifies this watcher that bytes have been transferred. + * + * @param byteCount the number of bytes that have been transferred. + */ + public synchronized void updateBytesTransferred(long byteCount) { + // Start the monitor when we are notified of the first bytes transferred. + if (!isStarted) { + resetWatcher(); + } + + // Store the total byte count for the current transfer, and for all transfers. + totalBytesInCurrentTransfer += byteCount; + totalBytesInAllTransfers += byteCount; + + // Recognise when all the expected bytes have been transferred and mark the end time. + if (totalBytesInCurrentTransfer >= bytesToTransfer) { + endTimeCurrentTransferMS = System.currentTimeMillis(); + } + + // Keep historical records of the byte counts transferred in a given second. + Long currentSecond = System.currentTimeMillis() / 1000; + Long bytesInSecond = historyOfBytesBySecond.get(currentSecond); + if (bytesInSecond != null) { + historyOfBytesBySecond.put(currentSecond, byteCount + bytesInSecond); + } else { + historyOfBytesBySecond.put(currentSecond, byteCount); + } + + // Remember the earliest second value for which we have historical info. + if (currentSecond < earliestHistorySecond) { + earliestHistorySecond = currentSecond; + } + + // Remove any history records we are no longer interested in. + long removeHistoryBeforeSecond = currentSecond - SECONDS_OF_HISTORY; + for (long sec = earliestHistorySecond; sec < removeHistoryBeforeSecond; sec++) { + historyOfBytesBySecond.remove(sec); + } + earliestHistorySecond = removeHistoryBeforeSecond; + } + + /** + * @return the number of bytes that have so far been transferred in the most + * recent transfer session. + */ + public synchronized long getBytesTransferred() { + return totalBytesInCurrentTransfer; + } + + /** + * @return the number of bytes that are remaining to be transferred. + */ + public synchronized long getBytesRemaining() { + return bytesToTransfer - totalBytesInCurrentTransfer; + } + + /** + * @return an estimate of the time (in seconds) it will take for the + * transfer to completed, based on the number of bytes remaining to transfer + * and the overall bytes/second rate. + */ + public synchronized long getRemainingTime() { + BytesProgressWatcher[] progressWatchers = new BytesProgressWatcher[1]; + progressWatchers[0] = this; + + long bytesRemaining = bytesToTransfer - totalBytesInCurrentTransfer; + double remainingSecs = + (double) bytesRemaining / calculateOverallBytesPerSecond(progressWatchers); + return Math.round(remainingSecs); + } + + /** + * @return the byte rate (per second) based on the historical information + * for the last + * {@link #SECONDS_OF_HISTORY} seconds before the current time. + */ + public synchronized double getRecentByteRatePerSecond() { + if (!isStarted) { + return 0; + } + + long currentSecond = System.currentTimeMillis() / 1000; + long startSecond = 1 + (currentSecond - SECONDS_OF_HISTORY); + long endSecond = (endTimeCurrentTransferMS != -1 + ? endTimeCurrentTransferMS / 1000 + : currentSecond); + + if (currentSecond - SECONDS_OF_HISTORY > endSecond) { + // This item finished too long ago, ignore it now. + historyOfBytesBySecond.clear(); + return 0; + } + + // Count the number of bytes transferred from SECONDS_OF_HISTORY ago to the second before now. + long sumOfBytes = 0; + long numberOfSecondsInHistory = 0; + for (long sec = startSecond; sec <= endSecond; sec++) { + numberOfSecondsInHistory++; + Long bytesInSecond = historyOfBytesBySecond.get(sec); + if (bytesInSecond != null) { + sumOfBytes += bytesInSecond; + } + } + return (numberOfSecondsInHistory == 0 ? 0 : (double) sumOfBytes / numberOfSecondsInHistory); + } + + /** + * @return the number of milliseconds time elapsed for a transfer. The value + * returned is the time elapsed so far if the transfer is ongoing, the total + * time taken for the transfer if it is complete, or 0 if the transfer has + * not yet started. + */ + public synchronized long getElapsedTimeMS() { + if (!isStarted) { + return 0; + } + if (endTimeCurrentTransferMS != -1) { + // Transfer is complete, report the time it took. + return endTimeCurrentTransferMS - startTimeCurrentTransferMS; + } else { + return System.currentTimeMillis() - startTimeCurrentTransferMS; + } + } + + /** + * @return the number of bytes that have been transferred over all sessions, + * including any sessions that have been restarted. + */ + public synchronized long getTotalBytesInAllTransfers() { + return totalBytesInAllTransfers; + } + + protected synchronized boolean isStarted() { + return isStarted; + } + + /** + * @return the time (in milliseconds) when the first bytes were transferred, + * regardless of how many times the transfer was reset. + */ + public synchronized long getHistoricStartTimeMS() { + return startTimeAllTransfersMS; + } + + /** + * @param progressWatchers all the watchers involved in the same byte + * transfer operation. + * @return the total number of bytes to transfer. + */ + public static long sumBytesToTransfer(BytesProgressWatcher[] progressWatchers) { + long sumOfBytes = 0; + for (BytesProgressWatcher progressWatcher : progressWatchers) { + sumOfBytes += progressWatcher.getBytesToTransfer(); + } + return sumOfBytes; + } + + /** + * @param progressWatchers all the watchers involved in the same byte + * transfer operation. + * @return the total number of bytes already transferred. + */ + public static long sumBytesTransferred(BytesProgressWatcher[] progressWatchers) { + long sumOfBytes = 0; + for (BytesProgressWatcher progressWatcher : progressWatchers) { + sumOfBytes += progressWatcher.getBytesTransferred(); + } + return sumOfBytes; + } + + /** + * @param progressWatchers all the watchers involved in the same byte + * transfer operation. + * @return an estimate of the time (in seconds) it will take for the + * transfer to completed, based on the number of bytes remaining to transfer + * and the overall bytes/second rate. + */ + public static long calculateRemainingTime(BytesProgressWatcher[] progressWatchers) { + long bytesRemaining = sumBytesToTransfer(progressWatchers) + - sumBytesTransferred(progressWatchers); + double bytesPerSecond = calculateOverallBytesPerSecond(progressWatchers); + if (Math.abs(bytesPerSecond) < 0.001d) { + // No transfer has occurred yet. + return 0; + } + double remainingSecs = + (double) bytesRemaining / bytesPerSecond; + return Math.round(remainingSecs); + } + + /** + * @param progressWatchers all the watchers involved in the same byte + * transfer operation. + * @return the overall rate of bytes/second over all transfers for all + * watchers. + */ + public static double calculateOverallBytesPerSecond(BytesProgressWatcher[] progressWatchers) { + long initialStartTime = Long.MAX_VALUE; // The oldest start time of any monitor. + + long bytesTotal = 0; + for (BytesProgressWatcher progressWatcher : progressWatchers) { + // Ignore any watchers that have not yet started. + if (!progressWatcher.isStarted()) { + continue; + } + + // Add up all the bytes transferred by all started watchers. + bytesTotal += progressWatcher.getTotalBytesInAllTransfers(); + + // Find the earliest starting time of any monitor. + if (progressWatcher.getHistoricStartTimeMS() < initialStartTime) { + initialStartTime = progressWatcher.getHistoricStartTimeMS(); + } + } + + // Determine how much time has elapsed since the earliest watcher start time. + long elapsedTimeSecs = (System.currentTimeMillis() - initialStartTime) / 1000; + + // Calculate the overall rate of bytes/second over all transfers for all watchers. + + return elapsedTimeSecs == 0 ? bytesTotal : (double) bytesTotal / elapsedTimeSecs; + } + + /** + * @param progressWatchers all the watchers involved in the same byte + * transfer operation. + * @return the rate of bytes/second that has been achieved recently (ie + * within the last + * {@link #SECONDS_OF_HISTORY} seconds). + */ + public static long calculateRecentByteRatePerSecond(BytesProgressWatcher[] progressWatchers) { + double sumOfRates = 0; + for (BytesProgressWatcher progressWatcher : progressWatchers) { + if (progressWatcher.isStarted()) { + sumOfRates += progressWatcher.getRecentByteRatePerSecond(); + } + } + return Math.round(sumOfRates); + } +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/Connection.java b/io-codec/src/main/java/org/xbib/io/codec/Connection.java new file mode 100644 index 0000000..9e3c726 --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/Connection.java @@ -0,0 +1,19 @@ +package org.xbib.io.codec; + +import java.io.Closeable; +import java.io.IOException; + +/** + * A Connection is an access to a resource via a scheme or a protocol. + * Each connection can serve multiple sessions in parallel. + */ +public interface Connection> extends Closeable { + + /** + * Create a new session on this connection + * + * @return the session + * @throws java.io.IOException if the session can not be created + */ + S createSession() throws IOException; +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/CustomURLStreamHandler.java b/io-codec/src/main/java/org/xbib/io/codec/CustomURLStreamHandler.java new file mode 100644 index 0000000..4e2895f --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/CustomURLStreamHandler.java @@ -0,0 +1,8 @@ +package org.xbib.io.codec; + +import java.net.URLStreamHandler; + +public abstract class CustomURLStreamHandler extends URLStreamHandler { + + public abstract String getName(); +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/CustomURLStreamHandlerFactory.java b/io-codec/src/main/java/org/xbib/io/codec/CustomURLStreamHandlerFactory.java new file mode 100644 index 0000000..992c99d --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/CustomURLStreamHandlerFactory.java @@ -0,0 +1,67 @@ +/* + * Licensed to Jörg Prante and xbib under one or more contributor + * license agreements. See the NOTICE.txt file distributed with this work + * for additional information regarding copyright ownership. + * + * Copyright (C) 2012 Jörg Prante and xbib + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program; if not, see http://www.gnu.org/licenses + * or write to the Free Software Foundation, Inc., 51 Franklin Street, + * Fifth Floor, Boston, MA 02110-1301 USA. + * + * The interactive user interfaces in modified source and object code + * versions of this program must display Appropriate Legal Notices, + * as required under Section 5 of the GNU Affero General Public License. + * + * In accordance with Section 7(b) of the GNU Affero General Public + * License, these Appropriate Legal Notices must retain the display of the + * "Powered by xbib" logo. If the display of the logo is not reasonably + * feasible for technical reasons, the Appropriate Legal Notices must display + * the words "Powered by xbib". + */ +package org.xbib.io.codec; + +import java.net.URLStreamHandler; +import java.net.URLStreamHandlerFactory; +import java.util.HashMap; +import java.util.Map; +import java.util.ServiceLoader; + +public class CustomURLStreamHandlerFactory implements URLStreamHandlerFactory { + + private final static Map protocolHandlers = new HashMap<>(); + + private final static CustomURLStreamHandlerFactory factory = new CustomURLStreamHandlerFactory(); + + public CustomURLStreamHandlerFactory() { + ServiceLoader serviceLoader = ServiceLoader.load(CustomURLStreamHandler.class); + for (CustomURLStreamHandler handler : serviceLoader) { + if (!protocolHandlers.containsKey(handler.getName())) { + protocolHandlers.put(handler.getName(), handler); + } + } + } + + public static CustomURLStreamHandlerFactory getFactory() { + return factory; + } + + public void addHandler(String protocol, CustomURLStreamHandler urlHandler) { + protocolHandlers.put(protocol, urlHandler); + } + + @Override + public URLStreamHandler createURLStreamHandler(String protocol) { + return protocolHandlers.get(protocol); + } +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/Packet.java b/io-codec/src/main/java/org/xbib/io/codec/Packet.java new file mode 100644 index 0000000..669854f --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/Packet.java @@ -0,0 +1,15 @@ +package org.xbib.io.codec; + +/** + * A packet for transporting data chunks in sessions + */ +public interface Packet

{ + + String name(); + + Packet

name(String name); + + P packet(); + + Packet

packet(P packet); +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/Session.java b/io-codec/src/main/java/org/xbib/io/codec/Session.java new file mode 100644 index 0000000..bde8f92 --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/Session.java @@ -0,0 +1,43 @@ +package org.xbib.io.codec; + +import java.io.IOException; + +/** + * The Session interface is used for being opened, receive + * operations, and being closed. Sessions must be opened before the first + * operation and closed after the last operation. + */ +public interface Session

{ + + enum Mode { + + READ, WRITE, APPEND,CONTROL, DELETE; + } + + /** + * Open valve with a given input/output mode + * + * @throws java.io.IOException if valve can not be opened + */ + void open(Mode mode) throws IOException; + + P newPacket(); + + P read() throws IOException; + + void write(P packet) throws IOException; + + /** + * Close valve + * + * @throws java.io.IOException if valve can not be closed + */ + void close() throws IOException; + + /** + * Checks if this session has been successfully opened. + * + * @return true if the session is open + */ + boolean isOpen(); +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/StreamCodec.java b/io-codec/src/main/java/org/xbib/io/codec/StreamCodec.java new file mode 100644 index 0000000..80c39e7 --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/StreamCodec.java @@ -0,0 +1,49 @@ +/* + * Licensed to Jörg Prante and xbib under one or more contributor + * license agreements. See the NOTICE.txt file distributed with this work + * for additional information regarding copyright ownership. + * + * Copyright (C) 2012 Jörg Prante and xbib + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program; if not, see http://www.gnu.org/licenses + * or write to the Free Software Foundation, Inc., 51 Franklin Street, + * Fifth Floor, Boston, MA 02110-1301 USA. + * + * The interactive user interfaces in modified source and object code + * versions of this program must display Appropriate Legal Notices, + * as required under Section 5 of the GNU Affero General Public License. + * + * In accordance with Section 7(b) of the GNU Affero General Public + * License, these Appropriate Legal Notices must retain the display of the + * "Powered by xbib" logo. If the display of the logo is not reasonably + * feasible for technical reasons, the Appropriate Legal Notices must display + * the words "Powered by xbib". + */ +package org.xbib.io.codec; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +public interface StreamCodec { + + String getName(); + + I decode(InputStream in) throws IOException; + + I decode(InputStream in, int bufsize) throws IOException; + + O encode(OutputStream out) throws IOException; + + O encode(OutputStream out, int bufsize) throws IOException; +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/StreamCodecService.java b/io-codec/src/main/java/org/xbib/io/codec/StreamCodecService.java new file mode 100644 index 0000000..f40db9d --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/StreamCodecService.java @@ -0,0 +1,40 @@ +package org.xbib.io.codec; + +import java.util.Map; +import java.util.ServiceLoader; +import java.util.Set; +import java.util.WeakHashMap; + +/** + * + */ +public class StreamCodecService { + + private final static Map codecs = new WeakHashMap<>(); + + private final static StreamCodecService instance = new StreamCodecService(); + + private StreamCodecService() { + ServiceLoader loader = ServiceLoader.load(StreamCodec.class); + for (StreamCodec codec : loader) { + if (!codecs.containsKey(codec.getName())) { + codecs.put(codec.getName(), codec); + } + } + } + + public static StreamCodecService getInstance() { + return instance; + } + + public StreamCodec getCodec(String suffix) { + if (codecs.containsKey(suffix)) { + return codecs.get(suffix); + } + throw new IllegalArgumentException("Stream codec for " + suffix + " not found in " + codecs); + } + + public static Set getCodecs() { + return codecs.keySet(); + } +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/StringPacket.java b/io-codec/src/main/java/org/xbib/io/codec/StringPacket.java new file mode 100644 index 0000000..4f3fcf7 --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/StringPacket.java @@ -0,0 +1,36 @@ +package org.xbib.io.codec; + +/** + * + */ +public class StringPacket implements Packet { + + private String name; + private String string; + + public StringPacket() { + } + + public StringPacket name(String name) { + this.name = name; + return this; + } + + public String name() { + return name; + } + + public StringPacket packet(String string) { + this.string = string; + return this; + } + + public String packet() { + return string; + } + + @Override + public String toString() { + return string; + } +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/ar/ArConnection.java b/io-codec/src/main/java/org/xbib/io/codec/ar/ArConnection.java new file mode 100644 index 0000000..277aa3f --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/ar/ArConnection.java @@ -0,0 +1,62 @@ +package org.xbib.io.codec.ar; + +import org.xbib.io.codec.Connection; +import org.xbib.io.codec.Packet; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.net.URLConnection; +import java.nio.file.OpenOption; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; + +/** + * Ar connection + */ +public class ArConnection extends URLConnection implements Connection { + + private ArSession session; + + private Path path; + + private OpenOption option; + + /** + * Constructs a URL connection to the specified URL. A connection to + * the object referenced by the URL is not created. + * + * @param url the specified URL. + */ + public ArConnection(URL url) throws URISyntaxException { + super(url); + this.path = Paths.get(url.toURI().getSchemeSpecificPart()); + this.option = StandardOpenOption.READ; + } + + @Override + public void connect() throws IOException { + this.session = createSession(); + } + + public void setPath(Path path, OpenOption option) { + this.path = path; + this.option = option; + } + + public Path getPath() { + return path; + } + + @Override + public ArSession createSession() throws IOException { + ArSession session = new ArSession(); + session.setPath(path, option); + return session; + } + + @Override + public void close() throws IOException { + session.close(); + } +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/ar/ArSession.java b/io-codec/src/main/java/org/xbib/io/codec/ar/ArSession.java new file mode 100644 index 0000000..57def9a --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/ar/ArSession.java @@ -0,0 +1,43 @@ +package org.xbib.io.codec.ar; + +import org.xbib.io.codec.ArchiveSession; +import org.xbib.io.archive.ar.ArArchiveInputStream; +import org.xbib.io.archive.ar.ArArchiveOutputStream; +import org.xbib.io.codec.Packet; +import org.xbib.io.codec.Session; +import org.xbib.io.codec.StringPacket; +import java.io.InputStream; +import java.io.OutputStream; + +/** + * Ar Session + */ +public class ArSession extends ArchiveSession + implements Session { + + private final static String SUFFIX = "ar"; + + private ArArchiveInputStream in; + + private ArArchiveOutputStream out; + + protected String getSuffix() { + return SUFFIX; + } + + protected void open(InputStream in) { + this.in = new ArArchiveInputStream(in); + } + + protected void open(OutputStream out) { + this.out = new ArArchiveOutputStream(out); + } + + public ArArchiveInputStream getInputStream() { + return in; + } + + public ArArchiveOutputStream getOutputStream() { + return out; + } +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/ar/ArURLStreamHandler.java b/io-codec/src/main/java/org/xbib/io/codec/ar/ArURLStreamHandler.java new file mode 100644 index 0000000..1f28074 --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/ar/ArURLStreamHandler.java @@ -0,0 +1,27 @@ +package org.xbib.io.codec.ar; + +import org.xbib.io.codec.CustomURLStreamHandler; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.net.URLConnection; + +/** + * + */ +public class ArURLStreamHandler extends CustomURLStreamHandler { + + @Override + public String getName() { + return "ar"; + } + + @Override + protected URLConnection openConnection(URL u) throws IOException { + try { + return new ArConnection(u); + } catch (URISyntaxException e) { + throw new IOException(e); + } + } +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/bzip2/Bzip2StreamCodec.java b/io-codec/src/main/java/org/xbib/io/codec/bzip2/Bzip2StreamCodec.java new file mode 100644 index 0000000..ccd046b --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/bzip2/Bzip2StreamCodec.java @@ -0,0 +1,40 @@ +package org.xbib.io.codec.bzip2; + +import org.xbib.io.codec.StreamCodec; +import org.xbib.io.compress.bzip2.Bzip2InputStream; +import org.xbib.io.compress.bzip2.Bzip2OutputStream; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +/** + * + */ +public class Bzip2StreamCodec implements StreamCodec { + + @Override + public String getName() { + return "bz2"; + } + + @Override + public Bzip2InputStream decode(InputStream in) throws IOException { + return new Bzip2InputStream(in); + } + + @Override + public Bzip2InputStream decode(InputStream in, int bufsize) throws IOException { + return new Bzip2InputStream(in, bufsize); + } + + @Override + public Bzip2OutputStream encode(OutputStream out) throws IOException { + return new Bzip2OutputStream(out); + } + + @Override + public Bzip2OutputStream encode(OutputStream out, int bufsize) throws IOException { + return new Bzip2OutputStream(out, bufsize); + } +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/cpio/CpioConnection.java b/io-codec/src/main/java/org/xbib/io/codec/cpio/CpioConnection.java new file mode 100644 index 0000000..de941b0 --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/cpio/CpioConnection.java @@ -0,0 +1,61 @@ +package org.xbib.io.codec.cpio; + +import org.xbib.io.codec.Connection; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.net.URLConnection; +import java.nio.file.OpenOption; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; + +/** + * Cpio connection. + */ +public class CpioConnection extends URLConnection implements Connection { + + private CpioSession session; + + private Path path; + + private OpenOption option; + + /** + * Constructs a URL connection to the specified URL. A connection to + * the object referenced by the URL is not created. + * + * @param url the specified URL. + */ + public CpioConnection(URL url) throws URISyntaxException { + super(url); + this.path = Paths.get(url.toURI().getSchemeSpecificPart()); + this.option = StandardOpenOption.READ; + } + + @Override + public void connect() throws IOException { + this.session = createSession(); + } + + public void setPath(Path path, OpenOption option) { + this.path = path; + this.option = option; + } + + public Path getPath() { + return path; + } + + @Override + public CpioSession createSession() throws IOException { + CpioSession session = new CpioSession(); + session.setPath(path, option); + return session; + } + + @Override + public void close() throws IOException { + session.close(); + } +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/cpio/CpioSession.java b/io-codec/src/main/java/org/xbib/io/codec/cpio/CpioSession.java new file mode 100644 index 0000000..de1d223 --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/cpio/CpioSession.java @@ -0,0 +1,39 @@ +package org.xbib.io.codec.cpio; + +import org.xbib.io.codec.ArchiveSession; +import org.xbib.io.archive.cpio.CpioArchiveInputStream; +import org.xbib.io.archive.cpio.CpioArchiveOutputStream; +import java.io.InputStream; +import java.io.OutputStream; + +/** + * Cpio Session + */ +public class CpioSession extends ArchiveSession { + + private final static String SUFFIX = "cpio"; + + private CpioArchiveInputStream in; + + private CpioArchiveOutputStream out; + + protected String getSuffix() { + return SUFFIX; + } + + protected void open(InputStream in) { + this.in = new CpioArchiveInputStream(in); + } + + protected void open(OutputStream out) { + this.out = new CpioArchiveOutputStream(out); + } + + public CpioArchiveInputStream getInputStream() { + return in; + } + + public CpioArchiveOutputStream getOutputStream() { + return out; + } +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/cpio/CpioURLStreamHandler.java b/io-codec/src/main/java/org/xbib/io/codec/cpio/CpioURLStreamHandler.java new file mode 100644 index 0000000..c28afa9 --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/cpio/CpioURLStreamHandler.java @@ -0,0 +1,27 @@ +package org.xbib.io.codec.cpio; + +import org.xbib.io.codec.CustomURLStreamHandler; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.net.URLConnection; + +/** + * + */ +public class CpioURLStreamHandler extends CustomURLStreamHandler { + + @Override + public String getName() { + return "cpio"; + } + + @Override + protected URLConnection openConnection(URL u) throws IOException { + try { + return new CpioConnection(u); + } catch (URISyntaxException e) { + throw new IOException(e); + } + } +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/file/FileConnection.java b/io-codec/src/main/java/org/xbib/io/codec/file/FileConnection.java new file mode 100644 index 0000000..5c37b3d --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/file/FileConnection.java @@ -0,0 +1,106 @@ +/* + * Licensed to Jörg Prante and xbib under one or more contributor + * license agreements. See the NOTICE.txt file distributed with this work + * for additional information regarding copyright ownership. + * + * Copyright (C) 2012 Jörg Prante and xbib + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program; if not, see http://www.gnu.org/licenses + * or write to the Free Software Foundation, Inc., 51 Franklin Street, + * Fifth Floor, Boston, MA 02110-1301 USA. + * + * The interactive user interfaces in modified source and object code + * versions of this program must display Appropriate Legal Notices, + * as required under Section 5 of the GNU Affero General Public License. + * + * In accordance with Section 7(b) of the GNU Affero General Public + * License, these Appropriate Legal Notices must retain the display of the + * "Powered by xbib" logo. If the display of the logo is not reasonably + * feasible for technical reasons, the Appropriate Legal Notices must display + * the words "Powered by xbib". + */ +package org.xbib.io.codec.file; + +import org.xbib.io.codec.Connection; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URISyntaxException; +import java.net.URL; +import java.net.URLConnection; +import java.nio.file.Files; +import java.nio.file.OpenOption; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; + +/** + * File connection + */ +public class FileConnection extends URLConnection implements Connection { + + private FileSession session; + + private Path path; + + private OpenOption option; + + /** + * Constructs a URL connection to the specified URL. A connection to + * the object referenced by the URL is not created. + * + * @param url the specified URL. + */ + public FileConnection(URL url) throws URISyntaxException { + super(url); + this.path = Paths.get(url.toURI().getSchemeSpecificPart()); + this.option = StandardOpenOption.READ; + } + + @Override + public void connect() throws IOException { + this.session = createSession(); + } + + @Override + public InputStream getInputStream() throws IOException { + return Files.newInputStream(path, StandardOpenOption.READ); + } + + @Override + public OutputStream getOutputStream() throws IOException { + return Files.newOutputStream(path, StandardOpenOption.CREATE); + } + + public void setPath(Path path, OpenOption option) { + this.path = path; + this.option = option; + } + + public Path getPath() { + return path; + } + + @Override + public FileSession createSession() throws IOException { + FileSession session = new FileSession(); + session.setPath(path, option); + return session; + } + + @Override + public void close() throws IOException { + session.close(); + } +} + diff --git a/io-codec/src/main/java/org/xbib/io/codec/file/FileSession.java b/io-codec/src/main/java/org/xbib/io/codec/file/FileSession.java new file mode 100644 index 0000000..ceb8e45 --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/file/FileSession.java @@ -0,0 +1,190 @@ +/* + * Licensed to Jörg Prante and xbib under one or more contributor + * license agreements. See the NOTICE.txt file distributed with this work + * for additional information regarding copyright ownership. + * + * Copyright (C) 2012 Jörg Prante and xbib + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program; if not, see http://www.gnu.org/licenses + * or write to the Free Software Foundation, Inc., 51 Franklin Street, + * Fifth Floor, Boston, MA 02110-1301 USA. + * + * The interactive user interfaces in modified source and object code + * versions of this program must display Appropriate Legal Notices, + * as required under Section 5 of the GNU Affero General Public License. + * + * In accordance with Section 7(b) of the GNU Affero General Public + * License, these Appropriate Legal Notices must retain the display of the + * "Powered by xbib" logo. If the display of the logo is not reasonably + * feasible for technical reasons, the Appropriate Legal Notices must display + * the words "Powered by xbib". + */ +package org.xbib.io.codec.file; + +import org.xbib.io.codec.Session; +import org.xbib.io.codec.StreamCodecService; +import org.xbib.io.codec.StringPacket; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Reader; +import java.io.Writer; +import java.nio.file.OpenOption; +import java.nio.file.Path; + +/** + * A File session + */ +public class FileSession implements Session { + + private final static StreamCodecService factory = StreamCodecService.getInstance(); + + private final static String encoding = System.getProperty("file.encoding"); + + private Path path; + + private OpenOption option; + + private boolean isOpen; + + private Writer writer; + + private Reader reader; + + public FileSession() { + } + + public void setPath(Path path, OpenOption option) { + this.path = path; + this.option = option; + } + + @Override + public void open(Mode mode) throws IOException { + if (isOpen()) { + return; + } + this.isOpen = false; + String filename = path.toString(); + File file = new File(filename); + switch (mode) { + case READ: { + if (!file.exists()) { + throw new IOException("file does not exist: " + path); + } + if (filename.endsWith(".gz")) { + FileInputStream in = new FileInputStream(file); + this.reader = new InputStreamReader(factory.getCodec("gz").decode(in), encoding); + this.isOpen = true; + } else if (filename.endsWith(".bz2")) { + FileInputStream in = new FileInputStream(file); + this.reader = new InputStreamReader(factory.getCodec("bz2").decode(in), encoding); + this.isOpen = true; + } else if (filename.endsWith(".xz")) { + FileInputStream in = new FileInputStream(file); + this.reader = new InputStreamReader(factory.getCodec("xz").decode(in), encoding); + this.isOpen = true; + } else { + FileInputStream in = new FileInputStream(file); + this.reader = new InputStreamReader(in, encoding); + this.isOpen = true; + } + break; + } + case WRITE: { + if (file.exists()) { + throw new IOException("cowardly not overwriting file: " + file.getAbsolutePath()); + } else { + // create directories if required + file.getParentFile().mkdirs(); + } + if (filename.endsWith(".gz")) { + FileOutputStream out = new FileOutputStream(file); + this.writer = new OutputStreamWriter(factory.getCodec("gz").encode(out), encoding); + this.isOpen = true; + } else if (filename.endsWith(".bz2")) { + FileOutputStream out = new FileOutputStream(file); + this.writer = new OutputStreamWriter(factory.getCodec("bz2").encode(out), encoding); + this.isOpen = true; + } else if (filename.endsWith(".xz")) { + FileInputStream in = new FileInputStream(file); + this.reader = new InputStreamReader(factory.getCodec("xz").decode(in), encoding); + this.isOpen = true; + } else { + FileOutputStream out = new FileOutputStream(file); + this.writer = new OutputStreamWriter(out, encoding); + this.isOpen = true; + } + break; + } + case APPEND: { + if (!file.exists()) { + file.getParentFile().mkdirs(); + } + if (filename.endsWith(".gz")) { + FileOutputStream out = new FileOutputStream(file, true); + this.writer = new OutputStreamWriter(factory.getCodec("gz").encode(out), encoding); + this.isOpen = true; + } else if (filename.endsWith(".bz2")) { + FileOutputStream out = new FileOutputStream(file, true); + this.writer = new OutputStreamWriter(factory.getCodec("bz2").encode(out), encoding); + this.isOpen = true; + } else if (filename.endsWith(".xz")) { + FileOutputStream out = new FileOutputStream(file, true); + this.writer = new OutputStreamWriter(factory.getCodec("xz").encode(out), encoding); + this.isOpen = true; + } else { + FileOutputStream out = new FileOutputStream(file, true); + this.writer = new OutputStreamWriter(out, encoding); + this.isOpen = true; + } + break; + } + } + } + + @Override + public boolean isOpen() { + return isOpen; + } + + @Override + public void close() throws IOException { + if (reader != null) { + reader.close(); + this.isOpen = false; + } + if (writer != null) { + writer.close(); + this.isOpen = false; + } + } + + public StringPacket newPacket() { + return new StringPacket(); + } + + public StringPacket read() throws IOException { + char[] ch = new char[1024]; + reader.read(ch); + return new StringPacket().packet(new String(ch)); + } + + public void write(StringPacket packet) throws IOException { + writer.write(packet.packet()); + } + +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/file/FileURLStreamHandler.java b/io-codec/src/main/java/org/xbib/io/codec/file/FileURLStreamHandler.java new file mode 100644 index 0000000..d8ded93 --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/file/FileURLStreamHandler.java @@ -0,0 +1,55 @@ +/* + * Licensed to Jörg Prante and xbib under one or more contributor + * license agreements. See the NOTICE.txt file distributed with this work + * for additional information regarding copyright ownership. + * + * Copyright (C) 2012 Jörg Prante and xbib + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program; if not, see http://www.gnu.org/licenses + * or write to the Free Software Foundation, Inc., 51 Franklin Street, + * Fifth Floor, Boston, MA 02110-1301 USA. + * + * The interactive user interfaces in modified source and object code + * versions of this program must display Appropriate Legal Notices, + * as required under Section 5 of the GNU Affero General Public License. + * + * In accordance with Section 7(b) of the GNU Affero General Public + * License, these Appropriate Legal Notices must retain the display of the + * "Powered by xbib" logo. If the display of the logo is not reasonably + * feasible for technical reasons, the Appropriate Legal Notices must display + * the words "Powered by xbib". + */ +package org.xbib.io.codec.file; + +import org.xbib.io.codec.CustomURLStreamHandler; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.net.URLConnection; + +public class FileURLStreamHandler extends CustomURLStreamHandler { + + @Override + public String getName() { + return "file"; + } + + @Override + protected URLConnection openConnection(URL u) throws IOException { + try { + return new FileConnection(u); + } catch (URISyntaxException e) { + throw new IOException(e); + } + } +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/gzip/GzipStreamCodec.java b/io-codec/src/main/java/org/xbib/io/codec/gzip/GzipStreamCodec.java new file mode 100644 index 0000000..c78b7e4 --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/gzip/GzipStreamCodec.java @@ -0,0 +1,39 @@ +package org.xbib.io.codec.gzip; + +import org.xbib.io.codec.StreamCodec; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; + +/** + * + */ +public class GzipStreamCodec implements StreamCodec { + + @Override + public String getName() { + return "gz"; + } + + @Override + public GZIPInputStream decode(InputStream in) throws IOException { + return new GZIPInputStream(in); + } + + @Override + public GZIPInputStream decode(InputStream in, int bufsize) throws IOException { + return new GZIPInputStream(in, bufsize); + } + + @Override + public GZIPOutputStream encode(OutputStream out) throws IOException { + return new GZIPOutputStream(out); + } + + @Override + public GZIPOutputStream encode(OutputStream out, int bufsize) throws IOException { + return new GZIPOutputStream(out, bufsize); + } +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/jar/JarConnection.java b/io-codec/src/main/java/org/xbib/io/codec/jar/JarConnection.java new file mode 100644 index 0000000..e592940 --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/jar/JarConnection.java @@ -0,0 +1,92 @@ +/* + * Licensed to Jörg Prante and xbib under one or more contributor + * license agreements. See the NOTICE.txt file distributed with this work + * for additional information regarding copyright ownership. + * + * Copyright (C) 2012 Jörg Prante and xbib + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program; if not, see http://www.gnu.org/licenses + * or write to the Free Software Foundation, Inc., 51 Franklin Street, + * Fifth Floor, Boston, MA 02110-1301 USA. + * + * The interactive user interfaces in modified source and object code + * versions of this program must display Appropriate Legal Notices, + * as required under Section 5 of the GNU Affero General Public License. + * + * In accordance with Section 7(b) of the GNU Affero General Public + * License, these Appropriate Legal Notices must retain the display of the + * "Powered by xbib" logo. If the display of the logo is not reasonably + * feasible for technical reasons, the Appropriate Legal Notices must display + * the words "Powered by xbib". + */ +package org.xbib.io.codec.jar; + +import org.xbib.io.codec.Connection; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.net.URLConnection; +import java.nio.file.OpenOption; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; + +/** + * Jar connection + */ +public class JarConnection extends URLConnection implements Connection { + + private JarSession session; + + private Path path; + + private OpenOption option; + + /** + * Constructs a URL connection to the specified URL. A connection to + * the object referenced by the URL is not created. + * + * @param url the specified URL. + */ + public JarConnection(URL url) throws URISyntaxException { + super(url); + this.path = Paths.get(url.toURI().getSchemeSpecificPart()); + this.option = StandardOpenOption.READ; + } + + @Override + public void connect() throws IOException { + this.session = createSession(); + } + + public void setPath(Path path, OpenOption option) { + this.path = path; + this.option = option; + } + + public Path getPath() { + return path; + } + + @Override + public JarSession createSession() throws IOException { + JarSession session = new JarSession(); + session.setPath(path, option); + return session; + } + + @Override + public void close() throws IOException { + session.close(); + } +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/jar/JarSession.java b/io-codec/src/main/java/org/xbib/io/codec/jar/JarSession.java new file mode 100644 index 0000000..f80c232 --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/jar/JarSession.java @@ -0,0 +1,37 @@ +package org.xbib.io.codec.jar; + +import org.xbib.io.codec.ArchiveSession; +import org.xbib.io.archive.jar.JarArchiveInputStream; +import org.xbib.io.archive.jar.JarArchiveOutputStream; +import java.io.InputStream; +import java.io.OutputStream; + +public class JarSession extends ArchiveSession { + + private final static String SUFFIX = "jar"; + + private JarArchiveInputStream in; + + private JarArchiveOutputStream out; + + protected String getSuffix() { + return SUFFIX; + } + + protected void open(InputStream in) { + this.in = new JarArchiveInputStream(in); + } + + protected void open(OutputStream out) { + this.out = new JarArchiveOutputStream(out); + } + + public JarArchiveInputStream getInputStream() { + return in; + } + + public JarArchiveOutputStream getOutputStream() { + return out; + } + +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/jar/JarURLStreamHandler.java b/io-codec/src/main/java/org/xbib/io/codec/jar/JarURLStreamHandler.java new file mode 100644 index 0000000..5ecc9c4 --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/jar/JarURLStreamHandler.java @@ -0,0 +1,55 @@ +/* + * Licensed to Jörg Prante and xbib under one or more contributor + * license agreements. See the NOTICE.txt file distributed with this work + * for additional information regarding copyright ownership. + * + * Copyright (C) 2012 Jörg Prante and xbib + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program; if not, see http://www.gnu.org/licenses + * or write to the Free Software Foundation, Inc., 51 Franklin Street, + * Fifth Floor, Boston, MA 02110-1301 USA. + * + * The interactive user interfaces in modified source and object code + * versions of this program must display Appropriate Legal Notices, + * as required under Section 5 of the GNU Affero General Public License. + * + * In accordance with Section 7(b) of the GNU Affero General Public + * License, these Appropriate Legal Notices must retain the display of the + * "Powered by xbib" logo. If the display of the logo is not reasonably + * feasible for technical reasons, the Appropriate Legal Notices must display + * the words "Powered by xbib". + */ +package org.xbib.io.codec.jar; + +import org.xbib.io.codec.CustomURLStreamHandler; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.net.URLConnection; + +public class JarURLStreamHandler extends CustomURLStreamHandler { + + @Override + public String getName() { + return "jar"; + } + + @Override + protected URLConnection openConnection(URL u) throws IOException { + try { + return new JarConnection(u); + } catch (URISyntaxException e) { + throw new IOException(e); + } + } +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/lzf/LZFStreamCodec.java b/io-codec/src/main/java/org/xbib/io/codec/lzf/LZFStreamCodec.java new file mode 100644 index 0000000..e68cd74 --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/lzf/LZFStreamCodec.java @@ -0,0 +1,39 @@ +package org.xbib.io.codec.lzf; + +import org.xbib.io.codec.StreamCodec; +import org.xbib.io.compress.lzf.LZFInputStream; +import org.xbib.io.compress.lzf.LZFOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +/** + * + */ +public class LZFStreamCodec implements StreamCodec { + + @Override + public String getName() { + return "lzf"; + } + + @Override + public LZFInputStream decode(InputStream in) throws IOException { + return new LZFInputStream(in); + } + + @Override + public LZFInputStream decode(InputStream in, int bufsize) throws IOException { + return new LZFInputStream(in, true); + } + + @Override + public LZFOutputStream encode(OutputStream out) throws IOException { + return new LZFOutputStream(out); + } + + @Override + public LZFOutputStream encode(OutputStream out, int bufsize) throws IOException { + return new LZFOutputStream(out, bufsize); + } +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/tar/TarConnection.java b/io-codec/src/main/java/org/xbib/io/codec/tar/TarConnection.java new file mode 100644 index 0000000..0fc5e5b --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/tar/TarConnection.java @@ -0,0 +1,67 @@ +package org.xbib.io.codec.tar; + +import org.xbib.io.codec.Connection; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.net.URLConnection; +import java.nio.file.OpenOption; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; + +/** + * Tar connection + */ +public class TarConnection extends URLConnection implements Connection { + + private TarSession session; + + private Path path; + + private OpenOption option; + + public TarConnection() { + super(null); + } + + /** + * Constructs a URL connection to the specified URL. A connection to + * the object referenced by the URL is not created. + * + * @param url the specified URL. + */ + protected TarConnection(URL url) throws URISyntaxException { + super(url); + this.path = Paths.get(url.toURI().getSchemeSpecificPart()); + this.option = StandardOpenOption.READ; + } + + @Override + public void connect() throws IOException { + this.session = createSession(); + } + + public void setPath(Path path, OpenOption option) { + this.path = path; + this.option = option; + } + + public Path getPath() { + return path; + } + + @Override + public TarSession createSession() throws IOException { + TarSession session = new TarSession(); + session.setPath(path, option); + return session; + } + + @Override + public void close() throws IOException { + if (session != null) { + session.close(); + } + } +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/tar/TarSession.java b/io-codec/src/main/java/org/xbib/io/codec/tar/TarSession.java new file mode 100644 index 0000000..c2b9155 --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/tar/TarSession.java @@ -0,0 +1,69 @@ +/* + * Licensed to Jörg Prante and xbib under one or more contributor + * license agreements. See the NOTICE.txt file distributed with this work + * for additional information regarding copyright ownership. + * + * Copyright (C) 2012 Jörg Prante and xbib + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program; if not, see http://www.gnu.org/licenses + * or write to the Free Software Foundation, Inc., 51 Franklin Street, + * Fifth Floor, Boston, MA 02110-1301 USA. + * + * The interactive user interfaces in modified source and object code + * versions of this program must display Appropriate Legal Notices, + * as required under Section 5 of the GNU Affero General Public License. + * + * In accordance with Section 7(b) of the GNU Affero General Public + * License, these Appropriate Legal Notices must retain the display of the + * "Powered by xbib" logo. If the display of the logo is not reasonably + * feasible for technical reasons, the Appropriate Legal Notices must display + * the words "Powered by xbib". + */ +package org.xbib.io.codec.tar; + +import org.xbib.io.codec.ArchiveSession; +import org.xbib.io.archive.tar.TarArchiveInputStream; +import org.xbib.io.archive.tar.TarArchiveOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +public class TarSession extends ArchiveSession { + + private final static String SUFFIX = "tar"; + + private TarArchiveInputStream in; + + private TarArchiveOutputStream out; + + protected String getSuffix() { + return SUFFIX; + } + + protected void open(InputStream in) throws IOException { + this.in = new TarArchiveInputStream(in); + } + + protected void open(OutputStream out) { + this.out = new TarArchiveOutputStream(out); + } + + public TarArchiveInputStream getInputStream() { + return in; + } + + public TarArchiveOutputStream getOutputStream() { + return out; + } + +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/tar/TarURLStreamHandler.java b/io-codec/src/main/java/org/xbib/io/codec/tar/TarURLStreamHandler.java new file mode 100644 index 0000000..ba8fd97 --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/tar/TarURLStreamHandler.java @@ -0,0 +1,55 @@ +/* + * Licensed to Jörg Prante and xbib under one or more contributor + * license agreements. See the NOTICE.txt file distributed with this work + * for additional information regarding copyright ownership. + * + * Copyright (C) 2012 Jörg Prante and xbib + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program; if not, see http://www.gnu.org/licenses + * or write to the Free Software Foundation, Inc., 51 Franklin Street, + * Fifth Floor, Boston, MA 02110-1301 USA. + * + * The interactive user interfaces in modified source and object code + * versions of this program must display Appropriate Legal Notices, + * as required under Section 5 of the GNU Affero General Public License. + * + * In accordance with Section 7(b) of the GNU Affero General Public + * License, these Appropriate Legal Notices must retain the display of the + * "Powered by xbib" logo. If the display of the logo is not reasonably + * feasible for technical reasons, the Appropriate Legal Notices must display + * the words "Powered by xbib". + */ +package org.xbib.io.codec.tar; + +import org.xbib.io.codec.CustomURLStreamHandler; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.net.URLConnection; + +public class TarURLStreamHandler extends CustomURLStreamHandler { + + @Override + public String getName() { + return "tar"; + } + + @Override + protected URLConnection openConnection(URL u) throws IOException { + try { + return new TarConnection(u); + } catch (URISyntaxException e) { + throw new IOException(e); + } + } +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/xz/XZStreamCodec.java b/io-codec/src/main/java/org/xbib/io/codec/xz/XZStreamCodec.java new file mode 100644 index 0000000..4e5af52 --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/xz/XZStreamCodec.java @@ -0,0 +1,40 @@ +package org.xbib.io.codec.xz; + +import org.xbib.io.codec.StreamCodec; +import org.xbib.io.compress.xz.LZMA2Options; +import org.xbib.io.compress.xz.XZInputStream; +import org.xbib.io.compress.xz.XZOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +/** + * + */ +public class XZStreamCodec implements StreamCodec { + + @Override + public String getName() { + return "xz"; + } + + @Override + public XZInputStream decode(InputStream in) throws IOException { + return new XZInputStream(in); + } + + @Override + public XZInputStream decode(InputStream in, int bufsize) throws IOException { + return new XZInputStream(in, bufsize / 1024); // KB limit + } + + @Override + public XZOutputStream encode(OutputStream out) throws IOException { + return new XZOutputStream(out, new LZMA2Options()); + } + + @Override + public XZOutputStream encode(OutputStream out, int bufsize) throws IOException { + return new XZOutputStream(out, new LZMA2Options()); // ignore bufsize + } +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/zip/ZipConnection.java b/io-codec/src/main/java/org/xbib/io/codec/zip/ZipConnection.java new file mode 100644 index 0000000..8ca4f63 --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/zip/ZipConnection.java @@ -0,0 +1,58 @@ +package org.xbib.io.codec.zip; + +import org.xbib.io.codec.Connection; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.net.URLConnection; +import java.nio.file.OpenOption; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; + +public class ZipConnection extends URLConnection implements Connection { + + private ZipSession session; + + private Path path; + + private OpenOption option; + + /** + * Constructs a URL connection to the specified URL. A connection to + * the object referenced by the URL is not created. + * + * @param url the specified URL. + */ + public ZipConnection(URL url) throws URISyntaxException { + super(url); + this.path = Paths.get(url.toURI().getSchemeSpecificPart()); + this.option = StandardOpenOption.READ; + } + + @Override + public void connect() throws IOException { + this.session = createSession(); + } + + public void setPath(Path path, OpenOption option) { + this.path = path; + this.option = option; + } + + public Path getPath() { + return path; + } + + @Override + public ZipSession createSession() throws IOException { + ZipSession session = new ZipSession(); + session.setPath(path, option); + return session; + } + + @Override + public void close() throws IOException { + session.close(); + } +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/zip/ZipSession.java b/io-codec/src/main/java/org/xbib/io/codec/zip/ZipSession.java new file mode 100644 index 0000000..56f4a75 --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/zip/ZipSession.java @@ -0,0 +1,37 @@ +package org.xbib.io.codec.zip; + +import org.xbib.io.codec.ArchiveSession; +import org.xbib.io.archive.zip.ZipArchiveInputStream; +import org.xbib.io.archive.zip.ZipArchiveOutputStream; +import java.io.InputStream; +import java.io.OutputStream; + +public class ZipSession extends ArchiveSession { + + private final static String SUFFIX = "zip"; + + private ZipArchiveInputStream in; + + private ZipArchiveOutputStream out; + + protected String getSuffix() { + return SUFFIX; + } + + protected void open(InputStream in) { + this.in = new ZipArchiveInputStream(in); + } + + protected void open(OutputStream out) { + this.out = new ZipArchiveOutputStream(out); + } + + public ZipArchiveInputStream getInputStream() { + return in; + } + + public ZipArchiveOutputStream getOutputStream() { + return out; + } + +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/zip/ZipURLStreamHandler.java b/io-codec/src/main/java/org/xbib/io/codec/zip/ZipURLStreamHandler.java new file mode 100644 index 0000000..8acf5bf --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/zip/ZipURLStreamHandler.java @@ -0,0 +1,55 @@ +/* + * Licensed to Jörg Prante and xbib under one or more contributor + * license agreements. See the NOTICE.txt file distributed with this work + * for additional information regarding copyright ownership. + * + * Copyright (C) 2012 Jörg Prante and xbib + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program; if not, see http://www.gnu.org/licenses + * or write to the Free Software Foundation, Inc., 51 Franklin Street, + * Fifth Floor, Boston, MA 02110-1301 USA. + * + * The interactive user interfaces in modified source and object code + * versions of this program must display Appropriate Legal Notices, + * as required under Section 5 of the GNU Affero General Public License. + * + * In accordance with Section 7(b) of the GNU Affero General Public + * License, these Appropriate Legal Notices must retain the display of the + * "Powered by xbib" logo. If the display of the logo is not reasonably + * feasible for technical reasons, the Appropriate Legal Notices must display + * the words "Powered by xbib". + */ +package org.xbib.io.codec.zip; + +import org.xbib.io.codec.CustomURLStreamHandler; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.net.URLConnection; + +public class ZipURLStreamHandler extends CustomURLStreamHandler { + + @Override + public String getName() { + return "zip"; + } + + @Override + protected URLConnection openConnection(URL u) throws IOException { + try { + return new ZipConnection(u); + } catch (URISyntaxException e) { + throw new IOException(e); + } + } +} diff --git a/io-codec/src/main/java/org/xbib/io/codec/zlib/ZStreamCodec.java b/io-codec/src/main/java/org/xbib/io/codec/zlib/ZStreamCodec.java new file mode 100644 index 0000000..28b9dfa --- /dev/null +++ b/io-codec/src/main/java/org/xbib/io/codec/zlib/ZStreamCodec.java @@ -0,0 +1,39 @@ +package org.xbib.io.codec.zlib; + +import org.xbib.io.codec.StreamCodec; +import org.xbib.io.compress.zlib.ZInputStream; +import org.xbib.io.compress.zlib.ZOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +/** + * + */ +public class ZStreamCodec implements StreamCodec { + + @Override + public String getName() { + return "Z"; + } + + @Override + public ZInputStream decode(InputStream in) throws IOException { + return new ZInputStream(in); + } + + @Override + public ZInputStream decode(InputStream in, int bufsize) throws IOException { + return new ZInputStream(in, bufsize); + } + + @Override + public ZOutputStream encode(OutputStream out) throws IOException { + return new ZOutputStream(out); + } + + @Override + public ZOutputStream encode(OutputStream out, int bufsize) throws IOException { + return new ZOutputStream(out, bufsize); + } +} diff --git a/io-compress-bzip2/src/main/java/module-info.java b/io-compress-bzip2/src/main/java/module-info.java new file mode 100644 index 0000000..75118c7 --- /dev/null +++ b/io-compress-bzip2/src/main/java/module-info.java @@ -0,0 +1,3 @@ +module org.xbib.io.compress.bzip { + exports org.xbib.io.compress.bzip2; +} diff --git a/io-compress-bzip2/src/main/java/org/xbib/io/compress/bzip2/BZip2Deflate.java b/io-compress-bzip2/src/main/java/org/xbib/io/compress/bzip2/BZip2Deflate.java new file mode 100644 index 0000000..3948691 --- /dev/null +++ b/io-compress-bzip2/src/main/java/org/xbib/io/compress/bzip2/BZip2Deflate.java @@ -0,0 +1,1782 @@ +package org.xbib.io.compress.bzip2; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +/** + * A class to compress any input into the BZip2 format. + * This class is a rewrite from the CBZip2OutputStream from the Ant program. + * The goal was to provide a class that is compatible in use to the standard + * java.util.zip.Deflate class. + * Unlike the original class by default the stream will contain the BZ header + * (0x42, 0x5A) as the first 2 bytes in the stream. + */ +public class BZip2Deflate implements Bzip2Constants { + + private static final int SETMASK = (1 << 21); + + private static final int CLEARMASK = (~SETMASK); + + private static final int GREATER_ICOST = 15; + + private static final int LESSER_ICOST = 0; + + private static final int SMALL_THRESH = 20; + + private static final int DEPTH_THRESH = 10; + + private long totalIn = 0; + + private long totalOut = 0; + + private boolean wroteHeader = false; + + private int bufferSize; + + /* + If you are ever unlucky/improbable enough + to get a stack overflow whilst sorting, + increase the following constant and try + again. In practice I have never seen the + stack go above 27 elems, so the following + limit seems very generous. + */ + protected static final int QSORT_STACK_SIZE = 1000; + + + /* + index of the last char in the block, so + the block size == last + 1. + */ + private int last; + + /* + index in zptr[] of original string after sorting. + */ + private int origPtr; + + /* + always: in the range 0 .. 9. + The current block size is 100000 * this number. + */ + private int blockSize100k; + + private boolean blockRandomised; + + private int bytesOut; + + private int bsBuff; + + private int bsLive; + + private CRC mCrc = new CRC(); + + private boolean[] inUse = new boolean[256]; + + private int nInUse; + + private char[] unseqToSeq = new char[256]; + + private char[] selector = new char[MAX_SELECTORS]; + + private char[] selectorMtf = new char[MAX_SELECTORS]; + + private char[] block; + + private int[] quadrant; + + private int[] zptr; + + private short[] szptr; + + private int[] ftab; + + private int nMTF; + + private int[] mtfFreq = new int[MAX_ALPHA_SIZE]; + + /* + * Used when sorting. If too many long comparisons + * happen, we stop sorting, randomise the block + * slightly, and try again. + */ + private int workFactor; + private int workDone; + private int workLimit; + private boolean firstAttempt; + private int currentChar = -1; + private int runLength = 0; + + /** + * Default constructor + * This contructor has the same effect as calling BZip2Deflate(9,true) + * + * @throws java.io.IOException + */ + public BZip2Deflate() throws IOException { + this(9, 8192, true); + } + + /** + * This contructor has the same effect as calling BZip2Deflate(inBlockSize, true) + * + * @param inBlockSize input blocksize + * @throws java.io.IOException + */ + public BZip2Deflate(int inBlockSize) throws IOException { + this(inBlockSize, 8192, true); + } + + /** + * + * @param inBlockSize input blocksize + * @param writeBZHeader true is you want to automatically add the BZ header. + * @throws java.io.IOException + */ + public BZip2Deflate(int inBlockSize, int bufferSize, boolean writeBZHeader) throws IOException { + this.wroteHeader = !writeBZHeader; + this.bufferSize = bufferSize; + if (inBlockSize > 9) { + inBlockSize = 9; + } + if (inBlockSize < 1) { + inBlockSize = 1; + } + blockSize100k = inBlockSize; + this.reset(); + } + + private void makeMaps() { + int i; + nInUse = 0; + for (i = 0; i < 256; i++) { + if (inUse[i]) { + unseqToSeq[i] = (char) nInUse; + nInUse++; + } + } + } + + protected static void hbMakeCodeLengths(char[] len, int[] freq, + int alphaSize, int maxLen) throws IOException { + /* + Nodes and heap entries run from 1. Entry 0 + for both the heap and nodes is a sentinel. + */ + int nNodes, nHeap, n1, n2, i, j, k; + boolean tooLong; + + int[] heap = new int[MAX_ALPHA_SIZE + 2]; + int[] weight = new int[MAX_ALPHA_SIZE * 2]; + int[] parent = new int[MAX_ALPHA_SIZE * 2]; + + for (i = 0; i < alphaSize; i++) { + weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; + } + + while (true) { + nNodes = alphaSize; + nHeap = 0; + + heap[0] = 0; + weight[0] = 0; + parent[0] = -2; + + for (i = 1; i <= alphaSize; i++) { + parent[i] = -1; + nHeap++; + heap[nHeap] = i; + { + int zz, tmp; + zz = nHeap; + tmp = heap[zz]; + while (weight[tmp] < weight[heap[zz >> 1]]) { + heap[zz] = heap[zz >> 1]; + zz >>= 1; + } + heap[zz] = tmp; + } + } + if (!(nHeap < (MAX_ALPHA_SIZE + 2))) { + throw new IOException("panic"); + } + + while (nHeap > 1) { + n1 = heap[1]; + heap[1] = heap[nHeap]; + nHeap--; + { + int zz = 0, yy = 0, tmp = 0; + zz = 1; + tmp = heap[zz]; + while (true) { + yy = zz << 1; + if (yy > nHeap) { + break; + } + if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) { + yy++; + } + if (weight[tmp] < weight[heap[yy]]) { + break; + } + heap[zz] = heap[yy]; + zz = yy; + } + heap[zz] = tmp; + } + n2 = heap[1]; + heap[1] = heap[nHeap]; + nHeap--; + { + int zz = 0, yy = 0, tmp = 0; + zz = 1; + tmp = heap[zz]; + while (true) { + yy = zz << 1; + if (yy > nHeap) { + break; + } + if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) { + yy++; + } + if (weight[tmp] < weight[heap[yy]]) { + break; + } + heap[zz] = heap[yy]; + zz = yy; + } + heap[zz] = tmp; + } + nNodes++; + parent[n1] = parent[n2] = nNodes; + + weight[nNodes] = ((weight[n1] & 0xffffff00) + (weight[n2] & 0xffffff00)) | (1 + (((weight[n1] & 0x000000ff) > (weight[n2] & 0x000000ff)) + ? (weight[n1] & 0x000000ff) + : (weight[n2] & 0x000000ff))); + + parent[nNodes] = -1; + nHeap++; + heap[nHeap] = nNodes; + { + int zz = 0, tmp = 0; + zz = nHeap; + tmp = heap[zz]; + while (weight[tmp] < weight[heap[zz >> 1]]) { + heap[zz] = heap[zz >> 1]; + zz >>= 1; + } + heap[zz] = tmp; + } + } + if (!(nNodes < (MAX_ALPHA_SIZE * 2))) { + throw new IOException("panic"); + } + + tooLong = false; + for (i = 1; i <= alphaSize; i++) { + j = 0; + k = i; + while (parent[k] >= 0) { + k = parent[k]; + j++; + } + len[i - 1] = (char) j; + if (j > maxLen) { + tooLong = true; + } + } + + if (!tooLong) { + break; + } + + for (i = 1; i < alphaSize; i++) { + j = weight[i] >> 8; + j = 1 + (j / 2); + weight[i] = j << 8; + } + } + } + + /** + * Resets deflater so that a new set of input data can be processed. + * + * @throws java.io.IOException + */ + public void reset() throws IOException { + block = null; + quadrant = null; + zptr = null; + ftab = null; + + bsSetStream(); + + workFactor = 50; + + allocateCompressStructures(); + initialize(); + initBlock(); + } + + /** + * Closes the compressor and discards any unprocessed input. + */ + public void end() { + try { + this.finish(); + } catch (IOException e) { + } + + this.inputBuffer = null; + this.outputBuffer = null; + } + + /** + * Sets input data for compression. + * + * @param bv data to add + * @return the number of bytes in the outbuffer after adding this input + */ + public int setInput(int bv) throws IOException { + int b = (256 + bv) % 256; + if (currentChar != -1) { + if (currentChar == b) { + runLength++; + if (runLength > 254) { + writeRun(); + currentChar = -1; + runLength = 0; + } + } else { + writeRun(); + runLength = 1; + currentChar = b; + } + } else { + currentChar = b; + runLength++; + } + + this.totalIn++; + return this.inputBuffer.size(); + } + + /** + * Sets input data for compression. + * + * @param b byte array to add + * @return the number of bytes in the outbuffer after adding this input + * @throws java.io.IOException + */ + public int setInput(byte[] b) throws IOException { + return this.setInput(b, 0, b.length); + } + + /** + * Sets input data for compression. + * + * @param b byte array to add + * @param offset index in the array to start adding + * @param len number of bytes to add from array + * @return the number of bytes in the outbuffer after adding this input + * @throws java.io.IOException + */ + public int setInput(byte[] b, int offset, int len) throws IOException { + int rc = 0; + + for (int i = offset; i < offset + len; i++) { + setInput(b[i]); + } + + if (this.closed || this.inputBuffer.size() > bufferSize) { + rc = this.inputBuffer.size(); + } + + return rc; + } + + /** + * Fills specified buffer with compressed data. + * + * @param b array to fill + * @return number of bytes added to the array + */ + public int deflate(byte[] b) { + int rc = 0; + + if (!this.wroteHeader) { + this.wroteHeader = true; + + this.outputBuffer = new ByteArrayInputStream(new byte[]{0x42, 0x5A}); + } + + if (this.outputBuffer != null) { + try { + if (this.outputBuffer.available() > 0) { + rc = this.outputBuffer.read(b); + } + } catch (IOException eIO) { + } + } + + if (rc < b.length && this.inputBuffer.size() != 0) { + if (this.inputBuffer.size() < b.length - rc) { + byte[] t = this.inputBuffer.toByteArray(); + this.inputBuffer.reset(); + + System.arraycopy(t, 0, b, rc, t.length); + rc += t.length; + } else { + byte[] t = this.inputBuffer.toByteArray(); + this.inputBuffer.reset(); + + System.arraycopy(t, 0, b, rc, b.length - rc); + this.outputBuffer = new ByteArrayInputStream(t, b.length - rc, t.length - (b.length - rc)); + rc = b.length; + } + } + + this.totalOut += rc; + + return rc; + } + + /** + * Fills specified buffer with compressed data. + * + * @param b array to fill + * @param off index to start filling at + * @param len maximum number of bytes to add + * @return number of bytes added to the array + */ + public int deflate(byte[] b, int off, int len) { + byte[] buff = new byte[len - off]; + + int rc = this.deflate(buff); + + if (rc > 0) { + System.arraycopy(buff, 0, b, off, rc); + } + + return rc; + } + + private void writeRun() throws IOException { + if (last < allowableBlockSize) { + inUse[currentChar] = true; + for (int i = 0; i < runLength; i++) { + mCrc.updateCRC((char) currentChar); + } + switch (runLength) { + case 1: + last++; + block[last + 1] = (char) currentChar; + break; + case 2: + last++; + block[last + 1] = (char) currentChar; + last++; + block[last + 1] = (char) currentChar; + break; + case 3: + last++; + block[last + 1] = (char) currentChar; + last++; + block[last + 1] = (char) currentChar; + last++; + block[last + 1] = (char) currentChar; + break; + default: + inUse[runLength - 4] = true; + last++; + block[last + 1] = (char) currentChar; + last++; + block[last + 1] = (char) currentChar; + last++; + block[last + 1] = (char) currentChar; + last++; + block[last + 1] = (char) currentChar; + last++; + block[last + 1] = (char) (runLength - 4); + break; + } + } else { + endBlock(); + initBlock(); + writeRun(); + } + } + + private boolean closed = false; + + /** + * When called, indicates that compression should end with the current contents of the input buffer. + * + * @throws IOException if finish fails + */ + public void finish() throws IOException { + if (closed) { + return; + } + + if (runLength > 0) { + writeRun(); + } + currentChar = -1; + endBlock(); + endCompression(); + closed = true; + } + + /** + * Returns true if the end of the compressed data output stream has been reached. + * + * @return true is this compressor is finished. + */ + public boolean finished() { + return this.closed; + } + + /** + * Returns the total number of uncompressed bytes input so far. + * + * @return number of bytes read + */ + public long getBytesRead() { + return this.totalIn; + } + + /** + * Returns the total number of uncompressed bytes input so far. + * + * @return number of bytes read + */ + public int getTotalIn() { + return (int) this.totalIn; + } + + /** + * Returns the total number of compressed bytes output so far. + * + * @return number of compressed bytes + */ + public long getBytesWritten() { + return this.totalOut; + } + + /** + * Returns the total number of compressed bytes output so far. + * + * @return number of compressed bytes + */ + public int getTotalOut() { + return (int) this.totalOut; + } + + /** + * Returns true if the input data buffer is empty and setInput() should be called in order to provide more input. + * + * @return true if setInput() should be called before getting more output + */ + public boolean needsInput() { + return (this.inputBuffer.size() == 0 && !this.closed); + } + + private int blockCRC, combinedCRC; + + private void initialize() throws IOException { + bytesOut = 0; + + /* Write `magic' bytes h indicating file-format == huffmanised, + followed by a digit indicating blockSize100k. + */ + bsPutUChar('h'); + bsPutUChar('0' + blockSize100k); + + combinedCRC = 0; + } + + private int allowableBlockSize; + + private void initBlock() { + // blockNo++; + mCrc.initialiseCRC(); + last = -1; + // ch = 0; + + for (int i = 0; i < 256; i++) { + inUse[i] = false; + } + + /* 20 is just a paranoia constant */ + allowableBlockSize = BASE_BLOCK_SIZE * blockSize100k - 20; + } + + private void endBlock() throws IOException { + blockCRC = mCrc.getFinalCRC(); + combinedCRC = (combinedCRC << 1) | (combinedCRC >>> 31); + combinedCRC ^= blockCRC; + + /* sort the block and establish posn of original string */ + doReversibleTransformation(); + + /* + A 6-byte block header, the value chosen arbitrarily + as 0x314159265359 :-). A 32 bit value does not really + give a strong enough guarantee that the value will not + appear by chance in the compressed datastream. Worst-case + probability of this event, for a 900k block, is about + 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 bits. + For a compressed file of size 100Gb -- about 100000 blocks -- + only a 48-bit marker will do. NB: normal compression/ + decompression do *not* rely on these statistical properties. + They are only important when trying to recover blocks from + damaged files. + */ + bsPutUChar(0x31); + bsPutUChar(0x41); + bsPutUChar(0x59); + bsPutUChar(0x26); + bsPutUChar(0x53); + bsPutUChar(0x59); + + /* Now the block's CRC, so it is in a known place. */ + bsPutint(blockCRC); + + /* Now a single bit indicating randomisation. */ + if (blockRandomised) { + bsW(1, 1); + } else { + bsW(1, 0); + } + + /* Finally, block's contents proper. */ + moveToFrontCodeAndSend(); + } + + private void endCompression() throws IOException { + /* + Now another magic 48-bit number, 0x177245385090, to + indicate the end of the last block. (sqrt(pi), if + you want to know. I did want to use e, but it contains + too much repetition -- 27 18 28 18 28 46 -- for me + to feel statistically comfortable. Call me paranoid.) + */ + bsPutUChar(0x17); + bsPutUChar(0x72); + bsPutUChar(0x45); + bsPutUChar(0x38); + bsPutUChar(0x50); + bsPutUChar(0x90); + + bsPutint(combinedCRC); + + bsFinishedWithStream(); + } + + private void hbAssignCodes(int[] code, char[] length, int minLen, + int maxLen, int alphaSize) { + int n, vec, i; + + vec = 0; + for (n = minLen; n <= maxLen; n++) { + for (i = 0; i < alphaSize; i++) { + if (length[i] == n) { + code[i] = vec; + vec++; + } + } + vec <<= 1; + } + } + + private void bsSetStream() { + inputBuffer = new ByteArrayOutputStream(); + bsLive = 0; + bsBuff = 0; + bytesOut = 0; + } + + private void bsFinishedWithStream() throws IOException { + while (bsLive > 0) { + int ch = (bsBuff >> 24); + + inputBuffer.write(ch); // write 8-bit + + bsBuff <<= 8; + bsLive -= 8; + bytesOut++; + } + } + + private void bsW(int n, int v) throws IOException { + while (bsLive >= 8) { + int ch = (bsBuff >> 24); + + inputBuffer.write(ch); // write 8-bit + + bsBuff <<= 8; + bsLive -= 8; + bytesOut++; + } + bsBuff |= (v << (32 - bsLive - n)); + bsLive += n; + } + + private void bsPutUChar(int c) throws IOException { + bsW(8, c); + } + + private void bsPutint(int u) throws IOException { + bsW(8, (u >> 24) & 0xff); + bsW(8, (u >> 16) & 0xff); + bsW(8, (u >> 8) & 0xff); + bsW(8, u & 0xff); + } + + private void bsPutIntVS(int numBits, int c) throws IOException { + bsW(numBits, c); + } + + private void sendMTFValues() throws IOException { + char len[][] = new char[N_GROUPS][MAX_ALPHA_SIZE]; + + int v, t, i, j, gs, ge, totc, bt, bc, iter; + int nSelectors = 0, alphaSize, minLen, maxLen, selCtr; + int nGroups; + + alphaSize = nInUse + 2; + for (t = 0; t < N_GROUPS; t++) { + for (v = 0; v < alphaSize; v++) { + len[t][v] = (char) GREATER_ICOST; + } + } + + /* Decide how many coding tables to use */ + if (nMTF <= 0) { + throw new IOException("panic"); + } + + if (nMTF < 200) { + nGroups = 2; + } else if (nMTF < 600) { + nGroups = 3; + } else if (nMTF < 1200) { + nGroups = 4; + } else if (nMTF < 2400) { + nGroups = 5; + } else { + nGroups = 6; + } + + /* Generate an initial set of coding tables */ + { + int nPart, remF, tFreq, aFreq; + + nPart = nGroups; + remF = nMTF; + gs = 0; + while (nPart > 0) { + tFreq = remF / nPart; + ge = gs - 1; + aFreq = 0; + while (aFreq < tFreq && ge < alphaSize - 1) { + ge++; + aFreq += mtfFreq[ge]; + } + + if (ge > gs && nPart != nGroups && nPart != 1 && ((nGroups - nPart) % 2 == 1)) { + aFreq -= mtfFreq[ge]; + ge--; + } + + for (v = 0; v < alphaSize; v++) { + if (v >= gs && v <= ge) { + len[nPart - 1][v] = (char) LESSER_ICOST; + } else { + len[nPart - 1][v] = (char) GREATER_ICOST; + } + } + + nPart--; + gs = ge + 1; + remF -= aFreq; + } + } + + int[][] rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; + int[] fave = new int[N_GROUPS]; + short[] cost = new short[N_GROUPS]; + /* + Iterate up to N_ITERS times to improve the tables. + */ + for (iter = 0; iter < N_ITERS; iter++) { + for (t = 0; t < nGroups; t++) { + fave[t] = 0; + } + + for (t = 0; t < nGroups; t++) { + for (v = 0; v < alphaSize; v++) { + rfreq[t][v] = 0; + } + } + + nSelectors = 0; + totc = 0; + gs = 0; + while (true) { + + /* Set group start & end marks. */ + if (gs >= nMTF) { + break; + } + ge = gs + G_SIZE - 1; + if (ge >= nMTF) { + ge = nMTF - 1; + } + + /* + Calculate the cost of this group as coded + by each of the coding tables. + */ + for (t = 0; t < nGroups; t++) { + cost[t] = 0; + } + + if (nGroups == 6) { + short cost0, cost1, cost2, cost3, cost4, cost5; + cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0; + for (i = gs; i <= ge; i++) { + short icv = szptr[i]; + cost0 += len[0][icv]; + cost1 += len[1][icv]; + cost2 += len[2][icv]; + cost3 += len[3][icv]; + cost4 += len[4][icv]; + cost5 += len[5][icv]; + } + cost[0] = cost0; + cost[1] = cost1; + cost[2] = cost2; + cost[3] = cost3; + cost[4] = cost4; + cost[5] = cost5; + } else { + for (i = gs; i <= ge; i++) { + short icv = szptr[i]; + for (t = 0; t < nGroups; t++) { + cost[t] += len[t][icv]; + } + } + } + + /* + Find the coding table which is best for this group, + and record its identity in the selector table. + */ + bc = 999999999; + bt = -1; + for (t = 0; t < nGroups; t++) { + if (cost[t] < bc) { + bc = cost[t]; + bt = t; + } + } + totc += bc; + fave[bt]++; + selector[nSelectors] = (char) bt; + nSelectors++; + + /* + Increment the symbol frequencies for the selected table. + */ + for (i = gs; i <= ge; i++) { + rfreq[bt][szptr[i]]++; + } + + gs = ge + 1; + } + + /* + Recompute the tables based on the accumulated frequencies. + */ + for (t = 0; t < nGroups; t++) { + hbMakeCodeLengths(len[t], rfreq[t], alphaSize, 20); + } + } + if (!(nGroups < 8)) { + throw new IOException("panic"); + } + if (!(nSelectors < 32768 && nSelectors <= (2 + (900000 / G_SIZE)))) { + throw new IOException("panic"); + } + /* Compute MTF values for the selectors. */ + { + char[] pos = new char[N_GROUPS]; + char lli, tmp2, tmp; + for (i = 0; i < nGroups; i++) { + pos[i] = (char) i; + } + for (i = 0; i < nSelectors; i++) { + lli = selector[i]; + j = 0; + tmp = pos[j]; + while (lli != tmp) { + j++; + tmp2 = tmp; + tmp = pos[j]; + pos[j] = tmp2; + } + pos[0] = tmp; + selectorMtf[i] = (char) j; + } + } + int[][] code = new int[N_GROUPS][MAX_ALPHA_SIZE]; + /* Assign actual codes for the tables. */ + for (t = 0; t < nGroups; t++) { + minLen = 32; + maxLen = 0; + for (i = 0; i < alphaSize; i++) { + if (len[t][i] > maxLen) { + maxLen = len[t][i]; + } + if (len[t][i] < minLen) { + minLen = len[t][i]; + } + } + if (maxLen > 20) { + throw new IOException("panic"); + } + if (minLen < 1) { + throw new IOException("panic"); + } + hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize); + } + + /* Transmit the mapping table. */ + { + boolean[] inUse16 = new boolean[16]; + for (i = 0; i < 16; i++) { + inUse16[i] = false; + for (j = 0; j < 16; j++) { + if (inUse[i * 16 + j]) { + inUse16[i] = true; + } + } + } + + //int nBytes = bytesOut; + for (i = 0; i < 16; i++) { + if (inUse16[i]) { + bsW(1, 1); + } else { + bsW(1, 0); + } + } + + for (i = 0; i < 16; i++) { + if (inUse16[i]) { + for (j = 0; j < 16; j++) { + if (inUse[i * 16 + j]) { + bsW(1, 1); + } else { + bsW(1, 0); + } + } + } + } + + } + + /* Now the selectors. */ + bsW(3, nGroups); + bsW(15, nSelectors); + for (i = 0; i < nSelectors; i++) { + for (j = 0; j < selectorMtf[i]; j++) { + bsW(1, 1); + } + bsW(1, 0); + } + + /* Now the coding tables. */ + + for (t = 0; t < nGroups; t++) { + int curr = len[t][0]; + bsW(5, curr); + for (i = 0; i < alphaSize; i++) { + while (curr < len[t][i]) { + bsW(2, 2); + curr++; /* 10 */ + } + while (curr > len[t][i]) { + bsW(2, 3); + curr--; /* 11 */ + } + bsW(1, 0); + } + } + + /* And finally, the block data proper */ + selCtr = 0; + gs = 0; + while (true) { + if (gs >= nMTF) { + break; + } + ge = gs + G_SIZE - 1; + if (ge >= nMTF) { + ge = nMTF - 1; + } + for (i = gs; i <= ge; i++) { + bsW(len[selector[selCtr]][szptr[i]], + code[selector[selCtr]][szptr[i]]); + } + + gs = ge + 1; + selCtr++; + } + if (!(selCtr == nSelectors)) { + throw new IOException("panic"); + } + } + + private void moveToFrontCodeAndSend() throws IOException { + bsPutIntVS(24, origPtr); + generateMTFValues(); + sendMTFValues(); + } + + private ByteArrayOutputStream inputBuffer; + private ByteArrayInputStream outputBuffer; + + private void simpleSort(int lo, int hi, int d) { + int i, j, h, bigN, hp; + int v; + + bigN = hi - lo + 1; + if (bigN < 2) { + return; + } + + hp = 0; + while (incs[hp] < bigN) { + hp++; + } + hp--; + + for (; hp >= 0; hp--) { + h = incs[hp]; + + i = lo + h; + while (true) { + /* copy 1 */ + if (i > hi) { + break; + } + v = zptr[i]; + j = i; + while (fullGtU(zptr[j - h] + d, v + d)) { + zptr[j] = zptr[j - h]; + j = j - h; + if (j <= (lo + h - 1)) { + break; + } + } + zptr[j] = v; + i++; + + /* copy 2 */ + if (i > hi) { + break; + } + v = zptr[i]; + j = i; + while (fullGtU(zptr[j - h] + d, v + d)) { + zptr[j] = zptr[j - h]; + j = j - h; + if (j <= (lo + h - 1)) { + break; + } + } + zptr[j] = v; + i++; + + /* copy 3 */ + if (i > hi) { + break; + } + v = zptr[i]; + j = i; + while (fullGtU(zptr[j - h] + d, v + d)) { + zptr[j] = zptr[j - h]; + j = j - h; + if (j <= (lo + h - 1)) { + break; + } + } + zptr[j] = v; + i++; + + if (workDone > workLimit && firstAttempt) { + return; + } + } + } + } + + private void vswap(int p1, int p2, int n) { + int temp = 0; + while (n > 0) { + temp = zptr[p1]; + zptr[p1] = zptr[p2]; + zptr[p2] = temp; + p1++; + p2++; + n--; + } + } + + private char med3(char a, char b, char c) { + char t; + if (a > b) { + t = a; + a = b; + b = t; + } + if (b > c) { + t = b; + b = c; + c = t; + } + if (a > b) { + b = a; + } + return b; + } + + private static class StackElem { + + int ll; + int hh; + int dd; + } + + private void qSort3(int loSt, int hiSt, int dSt) throws IOException { + int unLo, unHi, ltLo, gtHi, med, n, m; + int sp, lo, hi, d; + StackElem[] stack = new StackElem[QSORT_STACK_SIZE]; + for (int count = 0; count < QSORT_STACK_SIZE; count++) { + stack[count] = new StackElem(); + } + + sp = 0; + + stack[sp].ll = loSt; + stack[sp].hh = hiSt; + stack[sp].dd = dSt; + sp++; + + while (sp > 0) { + if (sp >= QSORT_STACK_SIZE) { + throw new IOException("panic"); + } + + sp--; + lo = stack[sp].ll; + hi = stack[sp].hh; + d = stack[sp].dd; + + if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) { + simpleSort(lo, hi, d); + if (workDone > workLimit && firstAttempt) { + return; + } + continue; + } + + med = med3(block[zptr[lo] + d + 1], + block[zptr[hi] + d + 1], + block[zptr[(lo + hi) >> 1] + d + 1]); + + unLo = ltLo = lo; + unHi = gtHi = hi; + + while (true) { + while (true) { + if (unLo > unHi) { + break; + } + n = ((int) block[zptr[unLo] + d + 1]) - med; + if (n == 0) { + int temp = 0; + temp = zptr[unLo]; + zptr[unLo] = zptr[ltLo]; + zptr[ltLo] = temp; + ltLo++; + unLo++; + continue; + } + if (n > 0) { + break; + } + unLo++; + } + while (true) { + if (unLo > unHi) { + break; + } + n = ((int) block[zptr[unHi] + d + 1]) - med; + if (n == 0) { + int temp = 0; + temp = zptr[unHi]; + zptr[unHi] = zptr[gtHi]; + zptr[gtHi] = temp; + gtHi--; + unHi--; + continue; + } + if (n < 0) { + break; + } + unHi--; + } + if (unLo > unHi) { + break; + } + int temp = 0; + temp = zptr[unLo]; + zptr[unLo] = zptr[unHi]; + zptr[unHi] = temp; + unLo++; + unHi--; + } + + if (gtHi < ltLo) { + stack[sp].ll = lo; + stack[sp].hh = hi; + stack[sp].dd = d + 1; + sp++; + continue; + } + + n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo) : (unLo - ltLo); + vswap(lo, unLo - n, n); + m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi) : (gtHi - unHi); + vswap(unLo, hi - m + 1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + stack[sp].ll = lo; + stack[sp].hh = n; + stack[sp].dd = d; + sp++; + + stack[sp].ll = n + 1; + stack[sp].hh = m - 1; + stack[sp].dd = d + 1; + sp++; + + stack[sp].ll = m; + stack[sp].hh = hi; + stack[sp].dd = d; + sp++; + } + } + + private void mainSort() throws IOException { + int i, j, ss, sb; + int[] runningOrder = new int[256]; + int[] copy = new int[256]; + boolean[] bigDone = new boolean[256]; + int c1, c2; + + + /* + In the various block-sized structures, live data runs + from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First, + set up the overshoot area for block. + */ + + // if (verbosity >= 4) fprintf ( stderr, " sort initialise ...\n" ); + + // if nothing was written, last is -1, which gives div by zero + if (last >= 0) { + for (i = 0; i < NUM_OVERSHOOT_BYTES; i++) { + block[last + i + 2] = block[(i % (last + 1)) + 1]; + } + } + for (i = 0; i <= last + NUM_OVERSHOOT_BYTES; i++) { + quadrant[i] = 0; + } + + block[0] = (block[last + 1]); + + if (last < 4000) { + /* + Use simpleSort(), since the full sorting mechanism + has quite a large constant overhead. + */ + for (i = 0; i <= last; i++) { + zptr[i] = i; + } + firstAttempt = false; + workDone = workLimit = 0; + simpleSort(0, last, 0); + } else { + for (i = 0; i <= 255; i++) { + bigDone[i] = false; + } + + for (i = 0; i <= 65536; i++) { + ftab[i] = 0; + } + + c1 = block[0]; + for (i = 0; i <= last; i++) { + c2 = block[i + 1]; + ftab[(c1 << 8) + c2]++; + c1 = c2; + } + + for (i = 1; i <= 65536; i++) { + ftab[i] += ftab[i - 1]; + } + + c1 = block[1]; + for (i = 0; i < last; i++) { + c2 = block[i + 2]; + j = (c1 << 8) + c2; + c1 = c2; + ftab[j]--; + zptr[ftab[j]] = i; + } + + j = ((block[last + 1]) << 8) + (block[1]); + ftab[j]--; + zptr[ftab[j]] = last; + + /* + Now ftab contains the first loc of every small bucket. + Calculate the running order, from smallest to largest + big bucket. + */ + + for (i = 0; i <= 255; i++) { + runningOrder[i] = i; + } + + { + int vv; + int h = 1; + do { + h = 3 * h + 1; + } while (h <= 256); + do { + h = h / 3; + for (i = h; i <= 255; i++) { + vv = runningOrder[i]; + j = i; + while ((ftab[((runningOrder[j - h]) + 1) << 8] - ftab[(runningOrder[j - h]) << 8]) > (ftab[((vv) + 1) << 8] - ftab[(vv) << 8])) { + runningOrder[j] = runningOrder[j - h]; + j = j - h; + if (j <= (h - 1)) { + break; + } + } + runningOrder[j] = vv; + } + } while (h != 1); + } + + /* + The main sorting loop. + */ + for (i = 0; i <= 255; i++) { + + /* + Process big buckets, starting with the least full. + */ + ss = runningOrder[i]; + + /* + Complete the big bucket [ss] by quicksorting + any unsorted small buckets [ss, j]. Hopefully + previous pointer-scanning phases have already + completed many of the small buckets [ss, j], so + we don't have to sort them at all. + */ + for (j = 0; j <= 255; j++) { + sb = (ss << 8) + j; + if (!((ftab[sb] & SETMASK) == SETMASK)) { + int lo = ftab[sb] & CLEARMASK; + int hi = (ftab[sb + 1] & CLEARMASK) - 1; + if (hi > lo) { + qSort3(lo, hi, 2); + if (workDone > workLimit && firstAttempt) { + return; + } + } + ftab[sb] |= SETMASK; + } + } + + /* + The ss big bucket is now done. Record this fact, + and update the quadrant descriptors. Remember to + update quadrants in the overshoot area too, if + necessary. The "if (i < 255)" test merely skips + this updating for the last bucket processed, since + updating for the last bucket is pointless. + */ + bigDone[ss] = true; + + if (i < 255) { + int bbStart = ftab[ss << 8] & CLEARMASK; + int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart; + int shifts = 0; + + while ((bbSize >> shifts) > 65534) { + shifts++; + } + + for (j = 0; j < bbSize; j++) { + int a2update = zptr[bbStart + j]; + int qVal = (j >> shifts); + quadrant[a2update] = qVal; + if (a2update < NUM_OVERSHOOT_BYTES) { + quadrant[a2update + last + 1] = qVal; + } + } + + if (!(((bbSize - 1) >> shifts) <= 65535)) { + throw new IOException("panic"); + } + } + + /* + Now scan this big bucket so as to synthesise the + sorted order for small buckets [t, ss] for all t != ss. + */ + for (j = 0; j <= 255; j++) { + copy[j] = ftab[(j << 8) + ss] & CLEARMASK; + } + + for (j = ftab[ss << 8] & CLEARMASK; + j < (ftab[(ss + 1) << 8] & CLEARMASK); j++) { + c1 = block[zptr[j]]; + if (!bigDone[c1]) { + zptr[copy[c1]] = zptr[j] == 0 ? last : zptr[j] - 1; + copy[c1]++; + } + } + + for (j = 0; j <= 255; j++) { + ftab[(j << 8) + ss] |= SETMASK; + } + } + } + } + + private void randomiseBlock() { + int i; + int rNToGo = 0; + int rTPos = 0; + for (i = 0; i < 256; i++) { + inUse[i] = false; + } + + for (i = 0; i <= last; i++) { + if (rNToGo == 0) { + rNToGo = (char) NUMS[rTPos]; + rTPos++; + if (rTPos == 512) { + rTPos = 0; + } + } + rNToGo--; + block[i + 1] ^= ((rNToGo == 1) ? 1 : 0); + // handle 16 bit signed numbers + block[i + 1] &= 0xFF; + + inUse[block[i + 1]] = true; + } + } + + private void doReversibleTransformation() throws IOException { + int i; + + workLimit = workFactor * last; + workDone = 0; + blockRandomised = false; + firstAttempt = true; + + mainSort(); + + if (workDone > workLimit && firstAttempt) { + randomiseBlock(); + workLimit = workDone = 0; + blockRandomised = true; + firstAttempt = false; + mainSort(); + } + + origPtr = -1; + for (i = 0; i <= last; i++) { + if (zptr[i] == 0) { + origPtr = i; + break; + } + } + + if (origPtr == -1) { + throw new IOException("panic"); + } + } + + private boolean fullGtU(int p1, int p2) { + int i1 = p1, i2 = p2; + int k; + char c1, c2; + int s1, s2; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + i1++; + i2++; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + i1++; + i2++; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + i1++; + i2++; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + i1++; + i2++; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + i1++; + i2++; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + i1++; + i2++; + + k = last + 1; + + do { + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + s1 = quadrant[i1]; + s2 = quadrant[i2]; + if (s1 != s2) { + return (s1 > s2); + } + i1++; + i2++; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + s1 = quadrant[i1]; + s2 = quadrant[i2]; + if (s1 != s2) { + return (s1 > s2); + } + i1++; + i2++; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + s1 = quadrant[i1]; + s2 = quadrant[i2]; + if (s1 != s2) { + return (s1 > s2); + } + i1++; + i2++; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + s1 = quadrant[i1]; + s2 = quadrant[i2]; + if (s1 != s2) { + return (s1 > s2); + } + i1++; + i2++; + + if (i1 > last) { + i1 -= last; + i1--; + } + if (i2 > last) { + i2 -= last; + i2--; + } + + k -= 4; + workDone++; + } while (k >= 0); + + return false; + } + + /* + Knuth's increments seem to work better + than Incerpi-Sedgewick here. Possibly + because the number of elems to sort is + usually small, typically <= 20. + */ + private int[] incs = {1, 4, 13, 40, 121, 364, 1093, 3280, + 9841, 29524, 88573, 265720, + 797161, 2391484}; + + private void allocateCompressStructures() { + int n = BASE_BLOCK_SIZE * blockSize100k; + block = new char[(n + 1 + NUM_OVERSHOOT_BYTES)]; + quadrant = new int[(n + NUM_OVERSHOOT_BYTES)]; + zptr = new int[n]; + ftab = new int[65537]; + szptr = new short[2 * n]; + } + + private void generateMTFValues() { + char[] yy = new char[256]; + int i, j; + char tmp; + char tmp2; + int zPend; + int wr; + int eob; + + makeMaps(); + eob = nInUse + 1; + + for (i = 0; i <= eob; i++) { + mtfFreq[i] = 0; + } + + wr = 0; + zPend = 0; + for (i = 0; i < nInUse; i++) { + yy[i] = (char) i; + } + + + for (i = 0; i <= last; i++) { + char lli; + + lli = unseqToSeq[block[zptr[i]]]; + + j = 0; + tmp = yy[j]; + while (lli != tmp) { + j++; + tmp2 = tmp; + tmp = yy[j]; + yy[j] = tmp2; + } + + yy[0] = tmp; + + if (j == 0) { + zPend++; + } else { + if (zPend > 0) { + zPend--; + while (true) { + switch (zPend % 2) { + case 0: + szptr[wr] = (short) RUNA; + wr++; + mtfFreq[RUNA]++; + break; + case 1: + szptr[wr] = (short) RUNB; + wr++; + mtfFreq[RUNB]++; + break; + } + + if (zPend < 2) { + break; + } + zPend = (zPend - 2) / 2; + } + zPend = 0; + } + szptr[wr] = (short) (j + 1); + wr++; + mtfFreq[j + 1]++; + } + } + + if (zPend > 0) { + zPend--; + while (true) { + switch (zPend % 2) { + case 0: + szptr[wr] = (short) RUNA; + wr++; + mtfFreq[RUNA]++; + break; + case 1: + szptr[wr] = (short) RUNB; + wr++; + mtfFreq[RUNB]++; + break; + } + if (zPend < 2) { + break; + } + zPend = (zPend - 2) / 2; + } + } + + szptr[wr] = (short) eob; + wr++; + mtfFreq[eob]++; + + nMTF = wr; + } +} diff --git a/io-compress-bzip2/src/main/java/org/xbib/io/compress/bzip2/BZip2Inflate.java b/io-compress-bzip2/src/main/java/org/xbib/io/compress/bzip2/BZip2Inflate.java new file mode 100644 index 0000000..5cf0c96 --- /dev/null +++ b/io-compress-bzip2/src/main/java/org/xbib/io/compress/bzip2/BZip2Inflate.java @@ -0,0 +1,853 @@ +package org.xbib.io.compress.bzip2; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +/** + * An input stream that decompresses from the BZip2 format (without the file + * header chars) to be read as any other stream. + */ +public class BZip2Inflate implements Bzip2Constants { + + private ByteArrayOutputStream inputBuffer; + + private boolean readHeader = false; + + /* + index of the last char in the block, so + the block size == last + 1. + */ + private int last; + + /* + index in zptr[] of original string after sorting. + */ + private int origPtr; + + /* + always: in the range 0 .. 9. + The current block size is 100000 * this number. + */ + private int blockSize100k; + + private boolean blockRandomised; + + private int bsBuff; + + private int bsLive; + + private CRC mCrc = new CRC(); + + private boolean[] inUse = new boolean[256]; + + private int nInUse; + + private char[] seqToUnseq = new char[256]; + + private char[] selector = new char[MAX_SELECTORS]; + + private char[] selectorMtf = new char[MAX_SELECTORS]; + + private int[] tt; + + private char[] ll8; + + /* + freq table collected to save a pass over the data + during decompression. + */ + private int[] unzftab = new int[256]; + private int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; + private int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; + private int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; + private int[] minLens = new int[N_GROUPS]; + private ByteArrayInputStream bsStream; + private boolean streamEnd = false; + private int currentChar = -1; + private static final int START_BLOCK_STATE = 1; + private static final int RAND_PART_A_STATE = 2; + private static final int RAND_PART_B_STATE = 3; + private static final int RAND_PART_C_STATE = 4; + private static final int NO_RAND_PART_A_STATE = 5; + private static final int NO_RAND_PART_B_STATE = 6; + private static final int NO_RAND_PART_C_STATE = 7; + private int currentState = START_BLOCK_STATE; + private int storedBlockCRC, storedCombinedCRC; + private int computedBlockCRC, computedCombinedCRC; + private int i2, count, chPrev, ch2; + private int tPos; + private int rNToGo = 0; + private int rTPos = 0; + private int j2; + private char z; + + public BZip2Inflate() { + this(8192); + } + + public BZip2Inflate(int bufsize) { + ll8 = null; + tt = null; + this.inputBuffer = new ByteArrayOutputStream(bufsize); + bsSetStream(); + } + + private void makeMaps() { + nInUse = 0; + for (int j = 0; j < 256; j++) { + if (inUse[j]) { + seqToUnseq[nInUse] = (char) j; + nInUse++; + } + } + } + + private void initStream() throws IOException { + initialize(); + initBlock(); + setupBlock(); + } + + private int read() throws IOException { + if (streamEnd) { + return -1; + } else { + int retChar = currentChar; + switch (currentState) { + case START_BLOCK_STATE: + break; + case RAND_PART_A_STATE: + break; + case RAND_PART_B_STATE: + setupRandPartB(); + break; + case RAND_PART_C_STATE: + setupRandPartC(); + break; + case NO_RAND_PART_A_STATE: + break; + case NO_RAND_PART_B_STATE: + setupNoRandPartB(); + break; + case NO_RAND_PART_C_STATE: + setupNoRandPartC(); + break; + default: + break; + } + return retChar; + } + } + + private void initialize() throws IOException { + char magic1, magic2; + char magic3, magic4; + magic1 = bsGetUChar(); + magic2 = bsGetUChar(); + magic3 = bsGetUChar(); + magic4 = bsGetUChar(); + if (magic1 != 'B' || magic2 != 'Z' || magic3 != 'h' || magic4 < '1' || magic4 > '9') { + bsFinishedWithStream(); + streamEnd = true; + return; + } + + setDecompressStructureSizes(magic4 - '0'); + computedCombinedCRC = 0; + } + + private void initBlock() throws IOException { + char magic1, magic2, magic3, magic4; + char magic5, magic6; + magic1 = bsGetUChar(); + magic2 = bsGetUChar(); + magic3 = bsGetUChar(); + magic4 = bsGetUChar(); + magic5 = bsGetUChar(); + magic6 = bsGetUChar(); + if (magic1 == 0x17 && magic2 == 0x72 && magic3 == 0x45 && magic4 == 0x38 && magic5 == 0x50 && magic6 == 0x90) { + complete(); + return; + } + + if (magic1 != 0x31 || magic2 != 0x41 || magic3 != 0x59 || magic4 != 0x26 || magic5 != 0x53 || magic6 != 0x59) { + throw new IOException("bad block header"); + } + + storedBlockCRC = bsGetInt32(); + + if (bsR(1) == 1) { + blockRandomised = true; + } else { + blockRandomised = false; + } + + // currBlockNo++; + getAndMoveToFrontDecode(); + + mCrc.initialiseCRC(); + currentState = START_BLOCK_STATE; + } + + private void endBlock() throws IOException { + computedBlockCRC = mCrc.getFinalCRC(); + /* A bad CRC is considered a fatal error. */ + if (storedBlockCRC != computedBlockCRC) { + throw new IOException("CRC error"); + } + + computedCombinedCRC = (computedCombinedCRC << 1) | (computedCombinedCRC >>> 31); + computedCombinedCRC ^= computedBlockCRC; + } + + private void complete() throws IOException { + storedCombinedCRC = bsGetInt32(); + if (storedCombinedCRC != computedCombinedCRC) { + throw new IOException("CRC error"); + } + + bsFinishedWithStream(); + streamEnd = true; + } + + private void bsFinishedWithStream() { + try { + if (this.bsStream != null && this.bsStream != System.in) { + this.bsStream.close(); + this.bsStream = null; + + } + } catch (IOException ioe) { + //ignore + } + } + + private void bsSetStream() { + bsStream = null; + bsLive = 0; + bsBuff = 0; + } + + private int bsR(int n) throws IOException { + int v; + while (bsLive < n) { + int zzi; + char thech = 0; + + thech = (char) bsStream.read(); + + if (thech == -1) { + throw new IOException("unexpected end of file"); + } + zzi = thech; + bsBuff = (bsBuff << 8) | (zzi & 0xff); + bsLive += 8; + } + + v = (bsBuff >> (bsLive - n)) & ((1 << n) - 1); + bsLive -= n; + return v; + } + + private char bsGetUChar() throws IOException { + return (char) bsR(8); + } + + private int bsGetint() throws IOException { + int u = 0; + u = (u << 8) | bsR(8); + u = (u << 8) | bsR(8); + u = (u << 8) | bsR(8); + u = (u << 8) | bsR(8); + return u; + } + + private int bsGetIntVS(int numBits) throws IOException { + return bsR(numBits); + } + + private int bsGetInt32() throws IOException { + return bsGetint(); + } + + private void hbCreateDecodeTables(int[] limit, int[] base, + int[] perm, char[] length, + int minLen, int maxLen, int alphaSize) { + int pp, i, j, vec; + + pp = 0; + for (i = minLen; i <= maxLen; i++) { + for (j = 0; j < alphaSize; j++) { + if (length[j] == i) { + perm[pp] = j; + pp++; + } + } + } + + for (i = 0; i < MAX_CODE_LEN; i++) { + base[i] = 0; + } + for (i = 0; i < alphaSize; i++) { + base[length[i] + 1]++; + } + + for (i = 1; i < MAX_CODE_LEN; i++) { + base[i] += base[i - 1]; + } + + for (i = 0; i < MAX_CODE_LEN; i++) { + limit[i] = 0; + } + vec = 0; + + for (i = minLen; i <= maxLen; i++) { + vec += (base[i + 1] - base[i]); + limit[i] = vec - 1; + vec <<= 1; + } + for (i = minLen + 1; i <= maxLen; i++) { + base[i] = ((limit[i - 1] + 1) << 1) - base[i]; + } + } + + private void recvDecodingTables() throws IOException { + char len[][] = new char[N_GROUPS][MAX_ALPHA_SIZE]; + int i, j, t, nGroups, nSelectors, alphaSize; + int minLen, maxLen; + boolean[] inUse16 = new boolean[16]; + + /* Receive the mapping table */ + for (i = 0; i < 16; i++) { + if (bsR(1) == 1) { + inUse16[i] = true; + } else { + inUse16[i] = false; + } + } + + for (i = 0; i < 256; i++) { + inUse[i] = false; + } + + for (i = 0; i < 16; i++) { + if (inUse16[i]) { + for (j = 0; j < 16; j++) { + if (bsR(1) == 1) { + inUse[i * 16 + j] = true; + } + } + } + } + + makeMaps(); + alphaSize = nInUse + 2; + + /* Now the selectors */ + nGroups = bsR(3); + nSelectors = bsR(15); + for (i = 0; i < nSelectors; i++) { + j = 0; + while (bsR(1) == 1) { + j++; + } + selectorMtf[i] = (char) j; + } + + /* Undo the MTF values for the selectors. */ + { + char[] pos = new char[N_GROUPS]; + char tmp, v; + for (v = 0; v < nGroups; v++) { + pos[v] = v; + } + + for (i = 0; i < nSelectors; i++) { + v = selectorMtf[i]; + tmp = pos[v]; + while (v > 0) { + pos[v] = pos[v - 1]; + v--; + } + pos[0] = tmp; + selector[i] = tmp; + } + } + + /* Now the coding tables */ + for (t = 0; t < nGroups; t++) { + int curr = bsR(5); + for (i = 0; i < alphaSize; i++) { + while (bsR(1) == 1) { + if (bsR(1) == 0) { + curr++; + } else { + curr--; + } + } + len[t][i] = (char) curr; + } + } + + /* Create the Huffman decoding tables */ + for (t = 0; t < nGroups; t++) { + minLen = 32; + maxLen = 0; + for (i = 0; i < alphaSize; i++) { + if (len[t][i] > maxLen) { + maxLen = len[t][i]; + } + if (len[t][i] < minLen) { + minLen = len[t][i]; + } + } + hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen, + maxLen, alphaSize); + minLens[t] = minLen; + } + } + + private void getAndMoveToFrontDecode() throws IOException { + char[] yy = new char[256]; + int i, j, nextSym, limitLast; + int EOB, groupNo, groupPos; + + limitLast = BASE_BLOCK_SIZE * blockSize100k; + origPtr = bsGetIntVS(24); + + recvDecodingTables(); + EOB = nInUse + 1; + groupNo = -1; + groupPos = 0; + + /* + Setting up the unzftab entries here is not strictly + necessary, but it does save having to do it later + in a separate pass, and so saves a block's worth of + cache misses. + */ + for (i = 0; i <= 255; i++) { + unzftab[i] = 0; + } + + for (i = 0; i <= 255; i++) { + yy[i] = (char) i; + } + + last = -1; + + { + int zt, zn, zvec, zj; + if (groupPos == 0) { + groupNo++; + groupPos = G_SIZE; + } + groupPos--; + zt = selector[groupNo]; + zn = minLens[zt]; + zvec = bsR(zn); + while (zvec > limit[zt][zn]) { + zn++; + { + { + while (bsLive < 1) { + int zzi; + char thech = 0; + + thech = (char) bsStream.read(); + + if (thech == -1) { + throw new IOException("unexpected end of file"); + } + zzi = thech; + bsBuff = (bsBuff << 8) | (zzi & 0xff); + bsLive += 8; + } + } + zj = (bsBuff >> (bsLive - 1)) & 1; + bsLive--; + } + zvec = (zvec << 1) | zj; + } + nextSym = perm[zt][zvec - base[zt][zn]]; + } + + while (true) { + + if (nextSym == EOB) { + break; + } + + if (nextSym == RUNA || nextSym == RUNB) { + char ch; + int s = -1; + int N = 1; + do { + if (nextSym == RUNA) { + s = s + (0 + 1) * N; + } else if (nextSym == RUNB) { + s = s + (1 + 1) * N; + } + N = N * 2; + { + int zt, zn, zvec, zj; + if (groupPos == 0) { + groupNo++; + groupPos = G_SIZE; + } + groupPos--; + zt = selector[groupNo]; + zn = minLens[zt]; + zvec = bsR(zn); + while (zvec > limit[zt][zn]) { + zn++; + { + { + while (bsLive < 1) { + int zzi; + char thech = 0; + + thech = (char) bsStream.read(); + + if (thech == -1) { + throw new IOException("unexpected end of file"); + } + zzi = thech; + bsBuff = (bsBuff << 8) | (zzi & 0xff); + bsLive += 8; + } + } + zj = (bsBuff >> (bsLive - 1)) & 1; + bsLive--; + } + zvec = (zvec << 1) | zj; + } + nextSym = perm[zt][zvec - base[zt][zn]]; + } + } while (nextSym == RUNA || nextSym == RUNB); + + s++; + ch = seqToUnseq[yy[0]]; + unzftab[ch] += s; + + while (s > 0) { + last++; + ll8[last] = ch; + s--; + } + + if (last >= limitLast) { + throw new IOException("block overrun"); + } + continue; + } else { + char tmp; + last++; + if (last >= limitLast) { + throw new IOException("block overrun"); + } + + tmp = yy[nextSym - 1]; + unzftab[seqToUnseq[tmp]]++; + ll8[last] = seqToUnseq[tmp]; + + /* + This loop is hammered during decompression, + hence the unrolling. + + for (j = nextSym-1; j > 0; j--) yy[j] = yy[j-1]; + */ + + j = nextSym - 1; + for (; j > 3; j -= 4) { + yy[j] = yy[j - 1]; + yy[j - 1] = yy[j - 2]; + yy[j - 2] = yy[j - 3]; + yy[j - 3] = yy[j - 4]; + } + for (; j > 0; j--) { + yy[j] = yy[j - 1]; + } + + yy[0] = tmp; + { + int zt, zn, zvec, zj; + if (groupPos == 0) { + groupNo++; + groupPos = G_SIZE; + } + groupPos--; + zt = selector[groupNo]; + zn = minLens[zt]; + zvec = bsR(zn); + while (zvec > limit[zt][zn]) { + zn++; + { + { + while (bsLive < 1) { + int zzi; + char thech = 0; + + thech = (char) bsStream.read(); + + zzi = thech; + bsBuff = (bsBuff << 8) | (zzi & 0xff); + bsLive += 8; + } + } + zj = (bsBuff >> (bsLive - 1)) & 1; + bsLive--; + } + zvec = (zvec << 1) | zj; + } + nextSym = perm[zt][zvec - base[zt][zn]]; + } + continue; + } + } + } + + private void setupBlock() throws IOException { + int i; + int[] cftab = new int[257]; + char ch; + + cftab[0] = 0; + for (i = 1; i <= 256; i++) { + cftab[i] = unzftab[i - 1]; + } + for (i = 1; i <= 256; i++) { + cftab[i] += cftab[i - 1]; + } + + for (i = 0; i <= last; i++) { + ch = ll8[i]; + tt[cftab[ch]] = i; + cftab[ch]++; + } + cftab = null; + + tPos = tt[origPtr]; + + count = 0; + i2 = 0; + ch2 = 256; /* not a char and not EOF */ + + if (blockRandomised) { + rNToGo = 0; + rTPos = 0; + setupRandPartA(); + } else { + setupNoRandPartA(); + } + } + + private void setupRandPartA() throws IOException { + if (i2 <= last) { + chPrev = ch2; + ch2 = ll8[tPos]; + tPos = tt[tPos]; + if (rNToGo == 0) { + rNToGo = NUMS[rTPos]; + rTPos++; + if (rTPos == 512) { + rTPos = 0; + } + } + rNToGo--; + ch2 ^= ((rNToGo == 1) ? 1 : 0); + i2++; + + currentChar = ch2; + currentState = RAND_PART_B_STATE; + mCrc.updateCRC(ch2); + } else { + endBlock(); + initBlock(); + setupBlock(); + } + } + + private void setupNoRandPartA() throws IOException { + if (i2 <= last) { + chPrev = ch2; + ch2 = ll8[tPos]; + tPos = tt[tPos]; + i2++; + + currentChar = ch2; + currentState = NO_RAND_PART_B_STATE; + mCrc.updateCRC(ch2); + } else { + endBlock(); + initBlock(); + setupBlock(); + } + } + + private void setupRandPartB() throws IOException { + if (ch2 != chPrev) { + currentState = RAND_PART_A_STATE; + count = 1; + setupRandPartA(); + } else { + count++; + if (count >= 4) { + z = ll8[tPos]; + tPos = tt[tPos]; + if (rNToGo == 0) { + rNToGo = NUMS[rTPos]; + rTPos++; + if (rTPos == 512) { + rTPos = 0; + } + } + rNToGo--; + z ^= ((rNToGo == 1) ? 1 : 0); + j2 = 0; + currentState = RAND_PART_C_STATE; + setupRandPartC(); + } else { + currentState = RAND_PART_A_STATE; + setupRandPartA(); + } + } + } + + private void setupRandPartC() throws IOException { + if (j2 < (int) z) { + currentChar = ch2; + mCrc.updateCRC(ch2); + j2++; + } else { + currentState = RAND_PART_A_STATE; + i2++; + count = 0; + setupRandPartA(); + } + } + + private void setupNoRandPartB() throws IOException { + if (ch2 != chPrev) { + currentState = NO_RAND_PART_A_STATE; + count = 1; + setupNoRandPartA(); + } else { + count++; + if (count >= 4) { + z = ll8[tPos]; + tPos = tt[tPos]; + currentState = NO_RAND_PART_C_STATE; + j2 = 0; + setupNoRandPartC(); + } else { + currentState = NO_RAND_PART_A_STATE; + setupNoRandPartA(); + } + } + } + + private void setupNoRandPartC() throws IOException { + if (j2 < (int) z) { + currentChar = ch2; + mCrc.updateCRC(ch2); + j2++; + } else { + currentState = NO_RAND_PART_A_STATE; + i2++; + count = 0; + setupNoRandPartA(); + } + } + + private void setDecompressStructureSizes(int newSize100k) throws IOException { + if (!(0 <= newSize100k && newSize100k <= 9 && 0 <= blockSize100k && blockSize100k <= 9)) { + throw new IOException("invalid block size"); + } + + blockSize100k = newSize100k; + + if (newSize100k == 0) { + return; + } + + int n = BASE_BLOCK_SIZE * newSize100k; + ll8 = new char[n]; + tt = new int[n]; + } + + public void setInput(byte[] b) throws IOException { + this.setInput(b, 0, b.length); + } + + public void setInput(byte[] b, int off, int len) throws IOException { + this.inputBuffer.write(b, off, len); + + if (!this.readHeader) { + if (this.inputBuffer.size() > 10) { + this.bsStream = new ByteArrayInputStream(this.inputBuffer.toByteArray()); + this.inputBuffer.reset(); + + this.initStream(); + + if (this.streamEnd == true) { + throw new IOException("invalid BZip2 stream"); + } + + this.readHeader = true; + } + } else if (this.inputBuffer.size() > 0 && (this.bsStream == null || !(this.bsStream.available() > 0))) { + this.flush(); + } else if (this.inputBuffer.size() > 8192) { + this.flush(); + } + } + + public boolean needsInput() { + this.flush(); + + return !(this.bsStream.available() > 0); + } + + public int inflate(byte[] b) throws IOException { + int rc = 0; + + while (this.bsStream != null && this.bsStream.available() > 0 && rc < b.length) { + b[rc++] = (byte) this.read(); + } + + this.flush(); + + return rc; + } + + public void flush() { + if (this.inputBuffer.size() > 0) { + if (this.bsStream == null || !(this.bsStream.available() > 0)) { + this.bsStream = new ByteArrayInputStream(this.inputBuffer.toByteArray()); + } else { + byte[] buff = new byte[this.bsStream.available() + this.inputBuffer.size()]; + + int i = 0; + + try { + i = this.bsStream.read(buff); + } catch (IOException e) { + } + + if (i == 0) { + this.bsStream = new ByteArrayInputStream(this.inputBuffer.toByteArray()); + } else { + byte[] o = this.inputBuffer.toByteArray(); + + System.arraycopy(o, 0, buff, i, o.length); + + this.bsStream = new ByteArrayInputStream(buff); + } + } + + this.inputBuffer.reset(); + } + } +} diff --git a/io-compress-bzip2/src/main/java/org/xbib/io/compress/bzip2/Bzip2Constants.java b/io-compress-bzip2/src/main/java/org/xbib/io/compress/bzip2/Bzip2Constants.java new file mode 100644 index 0000000..4891005 --- /dev/null +++ b/io-compress-bzip2/src/main/java/org/xbib/io/compress/bzip2/Bzip2Constants.java @@ -0,0 +1,73 @@ +package org.xbib.io.compress.bzip2; + +/** + * Base class for both the compress and decompress classes. Holds common arrays, + * and static data. + */ +interface Bzip2Constants { + + int BASE_BLOCK_SIZE = 100000; + int MAX_ALPHA_SIZE = 258; + int MAX_CODE_LEN = 23; + int RUNA = 0; + int RUNB = 1; + int N_GROUPS = 6; + int G_SIZE = 50; + int N_ITERS = 4; + int MAX_SELECTORS = (2 + (900000 / G_SIZE)); + int NUM_OVERSHOOT_BYTES = 20; + int[] NUMS = { + 619, 720, 127, 481, 931, 816, 813, 233, 566, 247, + 985, 724, 205, 454, 863, 491, 741, 242, 949, 214, + 733, 859, 335, 708, 621, 574, 73, 654, 730, 472, + 419, 436, 278, 496, 867, 210, 399, 680, 480, 51, + 878, 465, 811, 169, 869, 675, 611, 697, 867, 561, + 862, 687, 507, 283, 482, 129, 807, 591, 733, 623, + 150, 238, 59, 379, 684, 877, 625, 169, 643, 105, + 170, 607, 520, 932, 727, 476, 693, 425, 174, 647, + 73, 122, 335, 530, 442, 853, 695, 249, 445, 515, + 909, 545, 703, 919, 874, 474, 882, 500, 594, 612, + 641, 801, 220, 162, 819, 984, 589, 513, 495, 799, + 161, 604, 958, 533, 221, 400, 386, 867, 600, 782, + 382, 596, 414, 171, 516, 375, 682, 485, 911, 276, + 98, 553, 163, 354, 666, 933, 424, 341, 533, 870, + 227, 730, 475, 186, 263, 647, 537, 686, 600, 224, + 469, 68, 770, 919, 190, 373, 294, 822, 808, 206, + 184, 943, 795, 384, 383, 461, 404, 758, 839, 887, + 715, 67, 618, 276, 204, 918, 873, 777, 604, 560, + 951, 160, 578, 722, 79, 804, 96, 409, 713, 940, + 652, 934, 970, 447, 318, 353, 859, 672, 112, 785, + 645, 863, 803, 350, 139, 93, 354, 99, 820, 908, + 609, 772, 154, 274, 580, 184, 79, 626, 630, 742, + 653, 282, 762, 623, 680, 81, 927, 626, 789, 125, + 411, 521, 938, 300, 821, 78, 343, 175, 128, 250, + 170, 774, 972, 275, 999, 639, 495, 78, 352, 126, + 857, 956, 358, 619, 580, 124, 737, 594, 701, 612, + 669, 112, 134, 694, 363, 992, 809, 743, 168, 974, + 944, 375, 748, 52, 600, 747, 642, 182, 862, 81, + 344, 805, 988, 739, 511, 655, 814, 334, 249, 515, + 897, 955, 664, 981, 649, 113, 974, 459, 893, 228, + 433, 837, 553, 268, 926, 240, 102, 654, 459, 51, + 686, 754, 806, 760, 493, 403, 415, 394, 687, 700, + 946, 670, 656, 610, 738, 392, 760, 799, 887, 653, + 978, 321, 576, 617, 626, 502, 894, 679, 243, 440, + 680, 879, 194, 572, 640, 724, 926, 56, 204, 700, + 707, 151, 457, 449, 797, 195, 791, 558, 945, 679, + 297, 59, 87, 824, 713, 663, 412, 693, 342, 606, + 134, 108, 571, 364, 631, 212, 174, 643, 304, 329, + 343, 97, 430, 751, 497, 314, 983, 374, 822, 928, + 140, 206, 73, 263, 980, 736, 876, 478, 430, 305, + 170, 514, 364, 692, 829, 82, 855, 953, 676, 246, + 369, 970, 294, 750, 807, 827, 150, 790, 288, 923, + 804, 378, 215, 828, 592, 281, 565, 555, 710, 82, + 896, 831, 547, 261, 524, 462, 293, 465, 502, 56, + 661, 821, 976, 991, 658, 869, 905, 758, 745, 193, + 768, 550, 608, 933, 378, 286, 215, 979, 792, 961, + 61, 688, 793, 644, 986, 403, 106, 366, 905, 644, + 372, 567, 466, 434, 645, 210, 389, 550, 919, 135, + 780, 773, 635, 389, 707, 100, 626, 958, 165, 504, + 920, 176, 193, 713, 857, 265, 203, 50, 668, 108, + 645, 990, 626, 197, 510, 357, 358, 850, 858, 364, + 936, 638 + }; +} diff --git a/io-compress-bzip2/src/main/java/org/xbib/io/compress/bzip2/Bzip2InputStream.java b/io-compress-bzip2/src/main/java/org/xbib/io/compress/bzip2/Bzip2InputStream.java new file mode 100644 index 0000000..54213ed --- /dev/null +++ b/io-compress-bzip2/src/main/java/org/xbib/io/compress/bzip2/Bzip2InputStream.java @@ -0,0 +1,915 @@ +package org.xbib.io.compress.bzip2; + +import java.io.IOException; +import java.io.InputStream; + +/** + * An input stream that decompresses from the BZip2 format (without the file + * header chars) to be read as any other stream. + */ +public class Bzip2InputStream extends InputStream implements Bzip2Constants { + + private void makeMaps() { + final boolean[] inUse = this.data.inUse; + final byte[] seqToUnseq = this.data.seqToUnseq; + + int nInUseShadow = 0; + + for (int i = 0; i < 256; i++) { + if (inUse[i]) { + seqToUnseq[nInUseShadow++] = (byte) i; + } + } + + this.nInUse = nInUseShadow; + } + + /** + * Index of the last char in the block, so the block size == last + 1. + */ + private int last; + /** + * Index in zptr[] of original string after sorting. + */ + private int origPtr; + /** + * always: in the range 0 .. 9. + * The current block size is 100000 * this number. + */ + private int blockSize100k; + private boolean blockRandomised; + private int bsBuff; + private int bsLive; + private final CRC crc = new CRC(); + private int nInUse; + private InputStream in; + private int currentChar = -1; + private static final int EOF = 0; + private static final int START_BLOCK_STATE = 1; + private static final int RAND_PART_A_STATE = 2; + private static final int RAND_PART_B_STATE = 3; + private static final int RAND_PART_C_STATE = 4; + private static final int NO_RAND_PART_A_STATE = 5; + private static final int NO_RAND_PART_B_STATE = 6; + private static final int NO_RAND_PART_C_STATE = 7; + private int currentState = START_BLOCK_STATE; + private int storedBlockCRC, storedCombinedCRC; + private int computedBlockCRC, computedCombinedCRC; + + // Variables used by setup* methods exclusively + private int setupcount; + private int setupch2; + private int setupchPrev; + private int setupi2; + private int setupj2; + private int setuprNToGo; + private int setuprTPos; + private int setuptPos; + private char setupz; + /** + * All memory intensive stuff. + * This field is initialized by initBlock(). + */ + private Data data; + + /** + * Constructs a new CBZip2InputStream which decompresses bytes read from + * the specified stream. + * Although BZip2 headers are marked with the magic + * "Bz" this constructor expects the next byte in the + * stream to be the first one after the magic. Thus callers have + * to skip the first two bytes. Otherwise this constructor will + * throw an exception. + * + * @throws java.io.IOException if the stream content is malformed or an I/O error occurs. + * @throws NullPointerException if in == null + */ + public Bzip2InputStream(final InputStream in) throws IOException { + super(); + this.in = in; + init(); + } + + public Bzip2InputStream(final InputStream in, int bufsize) throws IOException { + super(); + this.in = in; + init(); + } + + @Override + public final int read() throws IOException { + if (this.in != null) { + return read0(); + } else { + throw new IOException("stream closed"); + } + } + + @Override + public final int read(final byte[] dest, final int offs, final int len) + throws IOException { + if (offs < 0) { + throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); + } + if (len < 0) { + throw new IndexOutOfBoundsException("len(" + len + ") < 0."); + } + if (offs + len > dest.length) { + throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" + len + ") > dest.length(" + dest.length + ")."); + } + if (this.in == null) { + throw new IOException("stream closed"); + } + + final int hi = offs + len; + int destOffs = offs; + for (int b; (destOffs < hi) && ((b = read0()) >= 0); ) { + dest[destOffs++] = (byte) b; + } + + return (destOffs == offs) ? -1 : (destOffs - offs); + } + + private int read0() throws IOException { + final int retChar = this.currentChar; + + switch (this.currentState) { + case EOF: + return -1; + + case START_BLOCK_STATE: + throw new IllegalStateException(); + + case RAND_PART_A_STATE: + throw new IllegalStateException(); + + case RAND_PART_B_STATE: + setupRandPartB(); + break; + + case RAND_PART_C_STATE: + setupRandPartC(); + break; + + case NO_RAND_PART_A_STATE: + throw new IllegalStateException(); + + case NO_RAND_PART_B_STATE: + setupNoRandPartB(); + break; + + case NO_RAND_PART_C_STATE: + setupNoRandPartC(); + break; + + default: + throw new IllegalStateException(); + } + + return retChar; + } + + private void init() throws IOException { + if (null == in) { + throw new IOException("no input stream"); + } + if (in.available() == 0) { + throw new IOException("empty input stream"); + } + // skip "BZ" marker + in.read(); + in.read(); + int magic2 = this.in.read(); + if (magic2 != 'h') { + throw new IOException("stream is not bzip2: expected 'h'" + " as first byte but got '" + (char) magic2 + "'"); + } + + int blockSize = this.in.read(); + if ((blockSize < '1') || (blockSize > '9')) { + throw new IOException("stream is not bzip2: illegal " + "blocksize " + (char) blockSize); + } + + this.blockSize100k = blockSize - '0'; + + initBlock(); + setupBlock(); + } + + private void initBlock() throws IOException { + char magic0 = bsGetUByte(); + char magic1 = bsGetUByte(); + char magic2 = bsGetUByte(); + char magic3 = bsGetUByte(); + char magic4 = bsGetUByte(); + char magic5 = bsGetUByte(); + + if (magic0 == 0x17 && + magic1 == 0x72 && + magic2 == 0x45 && + magic3 == 0x38 && + magic4 == 0x50 && + magic5 == 0x90) { + complete(); // end of file + } else if (magic0 != 0x31 || // '1' + magic1 != 0x41 || // ')' + magic2 != 0x59 || // 'Y' + magic3 != 0x26 || // '&' + magic4 != 0x53 || // 'S' + magic5 != 0x59 // 'Y' + ) { + this.currentState = EOF; + throw new IOException("bad block header"); + } else { + this.storedBlockCRC = bsGetInt(); + this.blockRandomised = bsR(1) == 1; + + /** + * Allocate data here instead in constructor, so we do not + * allocate it if the input file is empty. + */ + if (this.data == null) { + this.data = new Data(this.blockSize100k); + } + + // currBlockNo++; + getAndMoveToFrontDecode(); + + this.crc.initialiseCRC(); + this.currentState = START_BLOCK_STATE; + } + } + + private void endBlock() throws IOException { + this.computedBlockCRC = this.crc.getFinalCRC(); + + // A bad CRC is considered a fatal error. + if (this.storedBlockCRC != this.computedBlockCRC) { + // make next blocks readable without error + // (repair feature, not yet documented, not tested) + this.computedCombinedCRC = (this.storedCombinedCRC << 1) | (this.storedCombinedCRC >>> 31); + this.computedCombinedCRC ^= this.storedBlockCRC; + + throw new IOException("CRC error"); + } + + this.computedCombinedCRC = (this.computedCombinedCRC << 1) | (this.computedCombinedCRC >>> 31); + this.computedCombinedCRC ^= this.computedBlockCRC; + } + + private void complete() throws IOException { + this.storedCombinedCRC = bsGetInt(); + this.currentState = EOF; + this.data = null; + + if (this.storedCombinedCRC != this.computedCombinedCRC) { + throw new IOException("CRC error"); + } + } + + @Override + public final void close() throws IOException { + InputStream inShadow = this.in; + if (inShadow != null) { + try { + if (inShadow != System.in) { + inShadow.close(); + } + } finally { + this.data = null; + this.in = null; + } + } + } + + private int bsR(final int n) throws IOException { + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + if (bsLiveShadow < n) { + final InputStream inShadow = this.in; + do { + int thech = inShadow.read(); + + if (thech < 0) { + throw new IOException("unexpected end of stream"); + } + + bsBuffShadow = (bsBuffShadow << 8) | thech; + bsLiveShadow += 8; + } while (bsLiveShadow < n); + + this.bsBuff = bsBuffShadow; + } + + this.bsLive = bsLiveShadow - n; + return (bsBuffShadow >> (bsLiveShadow - n)) & ((1 << n) - 1); + } + + private boolean bsGetBit() throws IOException { + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + if (bsLiveShadow < 1) { + int thech = this.in.read(); + + if (thech < 0) { + throw new IOException("unexpected end of stream"); + } + + bsBuffShadow = (bsBuffShadow << 8) | thech; + bsLiveShadow += 8; + this.bsBuff = bsBuffShadow; + } + + this.bsLive = bsLiveShadow - 1; + return ((bsBuffShadow >> (bsLiveShadow - 1)) & 1) != 0; + } + + private char bsGetUByte() throws IOException { + return (char) bsR(8); + } + + private int bsGetInt() throws IOException { + return (((((bsR(8) << 8) | bsR(8)) << 8) | bsR(8)) << 8) | bsR(8); + } + + /** + * Called by createHuffmanDecodingTables() exclusively. + */ + private static void hbCreateDecodeTables(final int[] limit, + final int[] base, + final int[] perm, + final char[] length, + final int minLen, + final int maxLen, + final int alphaSize) { + for (int i = minLen, pp = 0; i <= maxLen; i++) { + for (int j = 0; j < alphaSize; j++) { + if (length[j] == i) { + perm[pp++] = j; + } + } + } + + for (int i = MAX_CODE_LEN; --i > 0; ) { + base[i] = 0; + limit[i] = 0; + } + + for (int i = 0; i < alphaSize; i++) { + base[length[i] + 1]++; + } + + for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) { + b += base[i]; + base[i] = b; + } + + for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) { + final int nb = base[i + 1]; + vec += nb - b; + b = nb; + limit[i] = vec - 1; + vec <<= 1; + } + + for (int i = minLen + 1; i <= maxLen; i++) { + base[i] = ((limit[i - 1] + 1) << 1) - base[i]; + } + } + + private void recvDecodingTables() throws IOException { + final Data dataShadow = this.data; + final boolean[] inUse = dataShadow.inUse; + final byte[] pos = dataShadow.recvDecodingTables_pos; + final byte[] selector = dataShadow.selector; + final byte[] selectorMtf = dataShadow.selectorMtf; + + int inUse16 = 0; + + /* Receive the mapping table */ + for (int i = 0; i < 16; i++) { + if (bsGetBit()) { + inUse16 |= 1 << i; + } + } + + for (int i = 256; --i >= 0; ) { + inUse[i] = false; + } + + for (int i = 0; i < 16; i++) { + if ((inUse16 & (1 << i)) != 0) { + final int i16 = i << 4; + for (int j = 0; j < 16; j++) { + if (bsGetBit()) { + inUse[i16 + j] = true; + } + } + } + } + + makeMaps(); + final int alphaSize = this.nInUse + 2; + + /* Now the selectors */ + final int nGroups = bsR(3); + final int nSelectors = bsR(15); + + for (int i = 0; i < nSelectors; i++) { + int j = 0; + while (bsGetBit()) { + j++; + } + selectorMtf[i] = (byte) j; + } + + /* Undo the MTF values for the selectors. */ + for (int v = nGroups; --v >= 0; ) { + pos[v] = (byte) v; + } + + for (int i = 0; i < nSelectors; i++) { + int v = selectorMtf[i] & 0xff; + final byte tmp = pos[v]; + while (v > 0) { + // nearly all times v is zero, 4 in most other cases + pos[v] = pos[v - 1]; + v--; + } + pos[0] = tmp; + selector[i] = tmp; + } + + final char[][] len = dataShadow.temp_charArray2d; + + /* Now the coding tables */ + for (int t = 0; t < nGroups; t++) { + int curr = bsR(5); + final char[] len_t = len[t]; + for (int i = 0; i < alphaSize; i++) { + while (bsGetBit()) { + curr += bsGetBit() ? -1 : 1; + } + len_t[i] = (char) curr; + } + } + + // finally create the Huffman tables + createHuffmanDecodingTables(alphaSize, nGroups); + } + + /** + * Called by recvDecodingTables() exclusively. + */ + private void createHuffmanDecodingTables(final int alphaSize, + final int nGroups) { + final Data dataShadow = this.data; + final char[][] len = dataShadow.temp_charArray2d; + final int[] minLens = dataShadow.minLens; + final int[][] limit = dataShadow.limit; + final int[][] base = dataShadow.base; + final int[][] perm = dataShadow.perm; + + for (int t = 0; t < nGroups; t++) { + int minLen = 32; + int maxLen = 0; + final char[] len_t = len[t]; + for (int i = alphaSize; --i >= 0; ) { + final char lent = len_t[i]; + if (lent > maxLen) { + maxLen = lent; + } + if (lent < minLen) { + minLen = lent; + } + } + hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen, + maxLen, alphaSize); + minLens[t] = minLen; + } + } + + private void getAndMoveToFrontDecode() throws IOException { + this.origPtr = bsR(24); + recvDecodingTables(); + + final InputStream inShadow = this.in; + final Data dataShadow = this.data; + final byte[] ll8 = dataShadow.ll8; + final int[] unzftab = dataShadow.unzftab; + final byte[] selector = dataShadow.selector; + final byte[] seqToUnseq = dataShadow.seqToUnseq; + final char[] yy = dataShadow.getAndMoveToFrontDecodeyy; + final int[] minLens = dataShadow.minLens; + final int[][] limit = dataShadow.limit; + final int[][] base = dataShadow.base; + final int[][] perm = dataShadow.perm; + final int limitLast = this.blockSize100k * 100000; + + /* + Setting up the unzftab entries here is not strictly + necessary, but it does save having to do it later + in a separate pass, and so saves a block's worth of + cache misses. + */ + for (int i = 256; --i >= 0; ) { + yy[i] = (char) i; + unzftab[i] = 0; + } + + int groupNo = 0; + int groupPos = G_SIZE - 1; + final int eob = this.nInUse + 1; + int nextSym = getAndMoveToFrontDecode0(0); + int bsBuffShadow = this.bsBuff; + int bsLiveShadow = this.bsLive; + int lastShadow = -1; + int zt = selector[groupNo] & 0xff; + int[] basezt = base[zt]; + int[] limitzt = limit[zt]; + int[] permzt = perm[zt]; + int minLenszt = minLens[zt]; + + while (nextSym != eob) { + if ((nextSym == RUNA) || (nextSym == RUNB)) { + int s = -1; + + for (int n = 1; true; n <<= 1) { + if (nextSym == RUNA) { + s += n; + } else if (nextSym == RUNB) { + s += n << 1; + } else { + break; + } + + if (groupPos == 0) { + groupPos = G_SIZE - 1; + zt = selector[++groupNo] & 0xff; + basezt = base[zt]; + limitzt = limit[zt]; + permzt = perm[zt]; + minLenszt = minLens[zt]; + } else { + groupPos--; + } + + int zn = minLenszt; + + // Inlined: + // int zvec = bsR(zn); + while (bsLiveShadow < zn) { + final int thech = inShadow.read(); + if (thech >= 0) { + bsBuffShadow = (bsBuffShadow << 8) | thech; + bsLiveShadow += 8; + continue; + } else { + throw new IOException("unexpected end of stream"); + } + } + int zvec = (bsBuffShadow >> (bsLiveShadow - zn)) & ((1 << zn) - 1); + bsLiveShadow -= zn; + + while (zvec > limitzt[zn]) { + zn++; + while (bsLiveShadow < 1) { + final int thech = inShadow.read(); + if (thech >= 0) { + bsBuffShadow = (bsBuffShadow << 8) | thech; + bsLiveShadow += 8; + continue; + } else { + throw new IOException("unexpected end of stream"); + } + } + bsLiveShadow--; + zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1); + } + nextSym = permzt[zvec - basezt[zn]]; + } + + final byte ch = seqToUnseq[yy[0]]; + unzftab[ch & 0xff] += s + 1; + + while (s-- >= 0) { + ll8[++lastShadow] = ch; + } + + if (lastShadow >= limitLast) { + throw new IOException("block overrun"); + } + } else { + if (++lastShadow >= limitLast) { + throw new IOException("block overrun"); + } + + final char tmp = yy[nextSym - 1]; + unzftab[seqToUnseq[tmp] & 0xff]++; + ll8[lastShadow] = seqToUnseq[tmp]; + + /* + This loop is hammered during decompression, + hence avoid native method call overhead of + System.arraycopy for very small ranges to copy. + */ + if (nextSym <= 16) { + for (int j = nextSym - 1; j > 0; ) { + yy[j] = yy[--j]; + } + } else { + System.arraycopy(yy, 0, yy, 1, nextSym - 1); + } + + yy[0] = tmp; + + if (groupPos == 0) { + groupPos = G_SIZE - 1; + zt = selector[++groupNo] & 0xff; + basezt = base[zt]; + limitzt = limit[zt]; + permzt = perm[zt]; + minLenszt = minLens[zt]; + } else { + groupPos--; + } + + int zn = minLenszt; + + // Inlined: + // int zvec = bsR(zn); + while (bsLiveShadow < zn) { + final int thech = inShadow.read(); + if (thech >= 0) { + bsBuffShadow = (bsBuffShadow << 8) | thech; + bsLiveShadow += 8; + continue; + } else { + throw new IOException("unexpected end of stream"); + } + } + int zvec = (bsBuffShadow >> (bsLiveShadow - zn)) & ((1 << zn) - 1); + bsLiveShadow -= zn; + + while (zvec > limitzt[zn]) { + zn++; + while (bsLiveShadow < 1) { + final int thech = inShadow.read(); + if (thech >= 0) { + bsBuffShadow = (bsBuffShadow << 8) | thech; + bsLiveShadow += 8; + continue; + } else { + throw new IOException("unexpected end of stream"); + } + } + bsLiveShadow--; + zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1); + } + nextSym = permzt[zvec - basezt[zn]]; + } + } + + this.last = lastShadow; + this.bsLive = bsLiveShadow; + this.bsBuff = bsBuffShadow; + } + + private int getAndMoveToFrontDecode0(final int groupNo) + throws IOException { + final InputStream inShadow = this.in; + final Data dataShadow = this.data; + final int zt = dataShadow.selector[groupNo] & 0xff; + final int[] limitzt = dataShadow.limit[zt]; + int zn = dataShadow.minLens[zt]; + int zvec = bsR(zn); + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + while (zvec > limitzt[zn]) { + zn++; + while (bsLiveShadow < 1) { + final int thech = inShadow.read(); + + if (thech >= 0) { + bsBuffShadow = (bsBuffShadow << 8) | thech; + bsLiveShadow += 8; + continue; + } else { + throw new IOException("unexpected end of stream"); + } + } + bsLiveShadow--; + zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1); + } + + this.bsLive = bsLiveShadow; + this.bsBuff = bsBuffShadow; + + return dataShadow.perm[zt][zvec - dataShadow.base[zt][zn]]; + } + + private void setupBlock() throws IOException { + if (this.data == null) { + return; + } + + final int[] cftab = this.data.cftab; + final int[] tt = this.data.initTT(this.last + 1); + final byte[] ll8 = this.data.ll8; + cftab[0] = 0; + System.arraycopy(this.data.unzftab, 0, cftab, 1, 256); + + for (int i = 1, c = cftab[0]; i <= 256; i++) { + c += cftab[i]; + cftab[i] = c; + } + + for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) { + tt[cftab[ll8[i] & 0xff]++] = i; + } + + if ((this.origPtr < 0) || (this.origPtr >= tt.length)) { + throw new IOException("stream corrupted"); + } + + this.setuptPos = tt[this.origPtr]; + this.setupcount = 0; + this.setupi2 = 0; + this.setupch2 = 256; /* not a char and not EOF */ + + if (this.blockRandomised) { + this.setuprNToGo = 0; + this.setuprTPos = 0; + setupRandPartA(); + } else { + setupNoRandPartA(); + } + } + + private void setupRandPartA() throws IOException { + if (this.setupi2 <= this.last) { + this.setupchPrev = this.setupch2; + int setupch2Shadow = this.data.ll8[this.setuptPos] & 0xff; + this.setuptPos = this.data.tt[this.setuptPos]; + if (this.setuprNToGo == 0) { + this.setuprNToGo = NUMS[this.setuprTPos] - 1; + if (++this.setuprTPos == 512) { + this.setuprTPos = 0; + } + } else { + this.setuprNToGo--; + } + this.setupch2 = setupch2Shadow ^= (this.setuprNToGo == 1) ? 1 : 0; + this.setupi2++; + this.currentChar = setupch2Shadow; + this.currentState = RAND_PART_B_STATE; + this.crc.updateCRC(setupch2Shadow); + } else { + endBlock(); + initBlock(); + setupBlock(); + } + } + + private void setupNoRandPartA() throws IOException { + if (this.setupi2 <= this.last) { + this.setupchPrev = this.setupch2; + int setupch2Shadow = this.data.ll8[this.setuptPos] & 0xff; + this.setupch2 = setupch2Shadow; + this.setuptPos = this.data.tt[this.setuptPos]; + this.setupi2++; + this.currentChar = setupch2Shadow; + this.currentState = NO_RAND_PART_B_STATE; + this.crc.updateCRC(setupch2Shadow); + } else { + this.currentState = NO_RAND_PART_A_STATE; + endBlock(); + initBlock(); + setupBlock(); + } + } + + private void setupRandPartB() throws IOException { + if (this.setupch2 != this.setupchPrev) { + this.currentState = RAND_PART_A_STATE; + this.setupcount = 1; + setupRandPartA(); + } else if (++this.setupcount >= 4) { + this.setupz = (char) (this.data.ll8[this.setuptPos] & 0xff); + this.setuptPos = this.data.tt[this.setuptPos]; + if (this.setuprNToGo == 0) { + this.setuprNToGo = NUMS[this.setuprTPos] - 1; + if (++this.setuprTPos == 512) { + this.setuprTPos = 0; + } + } else { + this.setuprNToGo--; + } + this.setupj2 = 0; + this.currentState = RAND_PART_C_STATE; + if (this.setuprNToGo == 1) { + this.setupz ^= 1; + } + setupRandPartC(); + } else { + this.currentState = RAND_PART_A_STATE; + setupRandPartA(); + } + } + + private void setupRandPartC() throws IOException { + if (this.setupj2 < this.setupz) { + this.currentChar = this.setupch2; + this.crc.updateCRC(this.setupch2); + this.setupj2++; + } else { + this.currentState = RAND_PART_A_STATE; + this.setupi2++; + this.setupcount = 0; + setupRandPartA(); + } + } + + private void setupNoRandPartB() throws IOException { + if (this.setupch2 != this.setupchPrev) { + this.setupcount = 1; + setupNoRandPartA(); + } else if (++this.setupcount >= 4) { + this.setupz = (char) (this.data.ll8[this.setuptPos] & 0xff); + this.setuptPos = this.data.tt[this.setuptPos]; + this.setupj2 = 0; + setupNoRandPartC(); + } else { + setupNoRandPartA(); + } + } + + private void setupNoRandPartC() throws IOException { + if (this.setupj2 < this.setupz) { + int setupch2Shadow = this.setupch2; + this.currentChar = setupch2Shadow; + this.crc.updateCRC(setupch2Shadow); + this.setupj2++; + this.currentState = NO_RAND_PART_C_STATE; + } else { + this.setupi2++; + this.setupcount = 0; + setupNoRandPartA(); + } + } + + private static final class Data extends Object { + + // (with blockSize 900k) + private final boolean[] inUse = new boolean[256]; // 256 byte + private final byte[] seqToUnseq = new byte[256]; // 256 byte + private final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte + private final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte + /** + * Freq table collected to save a pass over the data during + * decompression. + */ + private final int[] unzftab = new int[256]; // 1024 byte + private final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte + private final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte + private final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte + private final int[] minLens = new int[N_GROUPS]; // 24 byte + private final int[] cftab = new int[257]; // 1028 byte + private final char[] getAndMoveToFrontDecodeyy = new char[256]; // 512 byte + private final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096 byte + private final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte + // 60798 byte + private int[] tt; // 3600000 byte + private byte[] ll8; // 900000 byte + // 4560782 byte + + Data(int blockSize100k) { + super(); + + this.ll8 = new byte[blockSize100k * BASE_BLOCK_SIZE]; + } + + /** + * Initializes the {@link #tt} array. + * This method is called when the required length of the array + * is known. I don't initialize it at construction time to + * avoid unneccessary memory allocation when compressing small + * files. + */ + int[] initTT(int length) { + int[] ttShadow = this.tt; + + // tt.length should always be >= length, but theoretically + // it can happen, if the compressor mixed small and large + // blocks. Normally only the last block will be smaller + // than others. + if ((ttShadow == null) || (ttShadow.length < length)) { + this.tt = ttShadow = new int[length]; + } + + return ttShadow; + } + } +} diff --git a/io-compress-bzip2/src/main/java/org/xbib/io/compress/bzip2/Bzip2OutputStream.java b/io-compress-bzip2/src/main/java/org/xbib/io/compress/bzip2/Bzip2OutputStream.java new file mode 100644 index 0000000..dd00eee --- /dev/null +++ b/io-compress-bzip2/src/main/java/org/xbib/io/compress/bzip2/Bzip2OutputStream.java @@ -0,0 +1,121 @@ +package org.xbib.io.compress.bzip2; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +/** + * BZip2OutputSream. Encapsulates any OutputStream to write a bzip2-ed stream to it. + */ +public class Bzip2OutputStream extends OutputStream { + + private ByteArrayOutputStream buffer; + private BZip2Deflate deflater; + private OutputStream os; + private int bufferMax; + + /** + * Wraps an OutputStream in a BZip2OutputStream. + * Any bytes written to this stream will be compressed using the bzip2 algorithm on the fly + * + * @param os OutputStream to encapsulate + */ + public Bzip2OutputStream(OutputStream os) throws IOException { + this(os, 8192); + } + + /** + * Wraps an OutputStream in a BZip2OutputStream. + * Any bytes written to this stream will be compressed using the bzip2 algorithm on the fly + * + * @param os OutputStream to encapsulate + */ + public Bzip2OutputStream(OutputStream os, int bufferMax) throws IOException { + super(); + this.bufferMax = bufferMax; + this.buffer = new ByteArrayOutputStream(bufferMax); + this.deflater = new BZip2Deflate(1, bufferMax, true); + this.os = os; + } + + /** + * Write integer to OutputStream + * + * @param b integer to write to stream + */ + @Override + public void write(int b) throws IOException { + if (buffer.size() > bufferMax) { + flush(); + } + if (deflater.setInput(b) > 0) { + deflate(); + } + } + + /** + * Write byte array OutputStream + * + * @param b byte array to write + */ + @Override + public void write(byte[] b) throws IOException { + write(b, 0, b.length); + } + + /** + * Write part of a byte array OutputStream + * + * @param b byte array to write + * @param off index in byte array to start output at + * @param len number of bytes to write + */ + @Override + public void write(byte[] b, int off, int len) throws IOException { + if (buffer.size() > bufferMax) { + flush(); + } + if (deflater.setInput(b, off, len) > 0) { + deflate(); + } + } + + /** + * Close OutputStream + */ + @Override + public void close() throws IOException { + deflater.finish(); + while (deflate() > 0) { + flush(); + } + os.close(); + } + + /** + * Flush OutputStream + */ + @Override + public void flush() throws IOException { + if (buffer.size() > 0) { + os.write(buffer.toByteArray()); + buffer.reset(); + os.flush(); + } + } + + /** + * reads add empties the BZip2Deflate buffer + * + * @return the number of bytes read into the local output buffer. + */ + private int deflate() { + byte[] b = new byte[bufferMax]; + int c; + c = deflater.deflate(b); + if (c > 0) { + buffer.write(b, 0, c); + } + return c; + } +} diff --git a/io-compress-bzip2/src/main/java/org/xbib/io/compress/bzip2/CRC.java b/io-compress-bzip2/src/main/java/org/xbib/io/compress/bzip2/CRC.java new file mode 100644 index 0000000..3555966 --- /dev/null +++ b/io-compress-bzip2/src/main/java/org/xbib/io/compress/bzip2/CRC.java @@ -0,0 +1,117 @@ +package org.xbib.io.compress.bzip2; + +/** + * A simple class the hold and calculate the CRC for sanity checking of the + * data. + */ +class CRC { + + static final int CRC32_TABLE[] = { + 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, + 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005, + 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, + 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, + 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, + 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75, + 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, + 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd, + 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039, + 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, + 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81, + 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d, + 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, + 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95, + 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, + 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, + 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae, + 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072, + 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, + 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca, + 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, + 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02, + 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066, + 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba, + 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, + 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692, + 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6, + 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a, + 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e, + 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, + 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, + 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a, + 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637, + 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb, + 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, + 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53, + 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, + 0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b, + 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, + 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, + 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, + 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b, + 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, + 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3, + 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, + 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, + 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f, + 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3, + 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, + 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c, + 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8, + 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24, + 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, + 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec, + 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, + 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654, + 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, + 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c, + 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, + 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4, + 0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, + 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c, + 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, + 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 + }; + + private int globalCrc; + + CRC() { + initialiseCRC(); + } + + final void initialiseCRC() { + this.globalCrc = 0xffffffff; + } + + int getFinalCRC() { + return ~globalCrc; + } + + int getGlobalCRC() { + return globalCrc; + } + + void setGlobalCRC(int newCrc) { + this.globalCrc = newCrc; + } + + void updateCRC(int inCh) { + int temp = (globalCrc >> 24) ^ inCh; + if (temp < 0) { + temp = 256 + temp; + } + this.globalCrc = (globalCrc << 8) ^ CRC.CRC32_TABLE[temp]; + } + + void updateCRC(int inCh, int repeat) { + int globalCrcShadow = globalCrc; + int r = repeat; + while (r-- > 0) { + int temp = (globalCrcShadow >> 24) ^ inCh; + globalCrcShadow = (globalCrcShadow << 8) ^ CRC32_TABLE[(temp >= 0) + ? temp + : (temp + 256)]; + } + this.globalCrc = globalCrcShadow; + } +} diff --git a/io-compress-bzip2/src/test/java/org/xbib/io/compress/bzip2/BZip2Test.java b/io-compress-bzip2/src/test/java/org/xbib/io/compress/bzip2/BZip2Test.java new file mode 100644 index 0000000..b906d16 --- /dev/null +++ b/io-compress-bzip2/src/test/java/org/xbib/io/compress/bzip2/BZip2Test.java @@ -0,0 +1,40 @@ +package org.xbib.io.compress.bzip2; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import org.junit.jupiter.api.Test; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; + +public class BZip2Test { + + @Test + public void testBZip2HelloWorld() throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + Bzip2OutputStream zOut = new Bzip2OutputStream(out); + ObjectOutputStream objOut = new ObjectOutputStream(zOut); + String helloWorld = "Hello World!"; + objOut.writeObject(helloWorld); + zOut.close(); + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); + Bzip2InputStream zIn = new Bzip2InputStream(in); + ObjectInputStream objIn = new ObjectInputStream(zIn); + assertEquals("Hello World!", objIn.readObject()); + } + + @Test + public void readBZip2File() throws IOException { + InputStream inputStream = getClass().getResourceAsStream("test.tar.bz2"); + Bzip2InputStream bzip2InputStream = new Bzip2InputStream(inputStream); + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + int i; + byte[] data = new byte[1024]; + while ((i = bzip2InputStream.read(data, 0, data.length)) != -1) { + outputStream.write(data, 0, i); + } + assertEquals(10240, outputStream.toByteArray().length); + } +} diff --git a/io-compress-bzip2/src/test/resources/org/xbib/io/compress/bzip2/test.tar.bz2 b/io-compress-bzip2/src/test/resources/org/xbib/io/compress/bzip2/test.tar.bz2 new file mode 100644 index 0000000..46864f2 Binary files /dev/null and b/io-compress-bzip2/src/test/resources/org/xbib/io/compress/bzip2/test.tar.bz2 differ diff --git a/io-compress-lzf/src/main/java/module-info.java b/io-compress-lzf/src/main/java/module-info.java new file mode 100644 index 0000000..f0baabe --- /dev/null +++ b/io-compress-lzf/src/main/java/module-info.java @@ -0,0 +1,3 @@ +module org.xbib.io.compress.lzf { + exports org.xbib.io.compress.lzf; +} diff --git a/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/BufferRecycler.java b/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/BufferRecycler.java new file mode 100644 index 0000000..4bf4b62 --- /dev/null +++ b/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/BufferRecycler.java @@ -0,0 +1,127 @@ +package org.xbib.io.compress.lzf; + +import java.lang.ref.SoftReference; + +/** + * Simple helper class to encapsulate details of basic buffer + * recycling scheme, which helps a lot (as per profiling) for + * smaller encoding cases. + */ +public final class BufferRecycler { + + private static final int MIN_ENCODING_BUFFER = 4000; + + private static final int MIN_OUTPUT_BUFFER = 8000; + + /** + * This ThreadLocal contains a {@link java.lang.ref.SoftReference} + * to a {@link BufferRecycler} used to provide a low-cost + * buffer recycling for buffers we need for encoding, decoding. + */ + private final static ThreadLocal> recyclerRef = new ThreadLocal<>(); + + + private byte[] inputBuffer; + + private byte[] outputBuffer; + + private byte[] decodingBuffer; + + private byte[] encodingBuffer; + + private int[] encodingHash; + + /** + * Accessor to get thread-local recycler instance + */ + public static BufferRecycler instance() { + SoftReference ref = recyclerRef.get(); + BufferRecycler br = (ref == null) ? null : ref.get(); + if (br == null) { + br = new BufferRecycler(); + recyclerRef.set(new SoftReference<>(br)); + } + return br; + } + + public byte[] allocEncodingBuffer(int minSize) { + byte[] buf = encodingBuffer; + if (buf == null || buf.length < minSize) { + buf = new byte[Math.max(minSize, MIN_ENCODING_BUFFER)]; + } else { + encodingBuffer = null; + } + return buf; + } + + public void releaseEncodeBuffer(byte[] buffer) { + if (encodingBuffer == null || buffer.length > encodingBuffer.length) { + encodingBuffer = buffer; + } + } + + public byte[] allocOutputBuffer(int minSize) { + byte[] buf = outputBuffer; + if (buf == null || buf.length < minSize) { + buf = new byte[Math.max(minSize, MIN_OUTPUT_BUFFER)]; + } else { + outputBuffer = null; + } + return buf; + } + + public void releaseOutputBuffer(byte[] buffer) { + if (outputBuffer == null || (buffer != null && buffer.length > outputBuffer.length)) { + outputBuffer = buffer; + } + } + + public int[] allocEncodingHash(int suggestedSize) { + int[] buf = encodingHash; + if (buf == null || buf.length < suggestedSize) { + buf = new int[suggestedSize]; + } else { + encodingHash = null; + } + return buf; + } + + public void releaseEncodingHash(int[] buffer) { + if (encodingHash == null || (buffer != null && buffer.length > encodingHash.length)) { + encodingHash = buffer; + } + } + + public byte[] allocInputBuffer(int minSize) { + byte[] buf = inputBuffer; + if (buf == null || buf.length < minSize) { + buf = new byte[Math.max(minSize, MIN_OUTPUT_BUFFER)]; + } else { + inputBuffer = null; + } + return buf; + } + + public void releaseInputBuffer(byte[] buffer) { + if (inputBuffer == null || (buffer != null && buffer.length > inputBuffer.length)) { + inputBuffer = buffer; + } + } + + public byte[] allocDecodeBuffer(int size) { + byte[] buf = decodingBuffer; + if (buf == null || buf.length < size) { + buf = new byte[size]; + } else { + decodingBuffer = null; + } + return buf; + } + + public void releaseDecodeBuffer(byte[] buffer) { + if (decodingBuffer == null || (buffer != null && buffer.length > decodingBuffer.length)) { + decodingBuffer = buffer; + } + } + +} diff --git a/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/ChunkDecoder.java b/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/ChunkDecoder.java new file mode 100644 index 0000000..dbd8b26 --- /dev/null +++ b/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/ChunkDecoder.java @@ -0,0 +1,220 @@ +package org.xbib.io.compress.lzf; + +import java.io.IOException; +import java.io.InputStream; + +/** + * Decoder that handles decoding of sequence of encoded LZF chunks, combining + * them into a single contiguous result byte array. + */ +public abstract class ChunkDecoder { + + private final static byte BYTE_NULL = 0; + + static final int HEADER_BYTES = 5; + + ChunkDecoder() { + } + + /** + * Method for decompressing a block of input data encoded in LZF block + * structure (compatible with lzf command line utility), and can consist of + * any number of blocks. Note that input MUST consists of a sequence of one + * or more complete chunks; partial chunks can not be handled. + */ + public final byte[] decode(final byte[] inputBuffer) throws IOException { + byte[] result = new byte[calculateUncompressedSize(inputBuffer, 0, inputBuffer.length)]; + decode(inputBuffer, 0, inputBuffer.length, result); + return result; + } + + /** + * Method for decompressing a block of input data encoded in LZF block + * structure (compatible with lzf command line utility), and can consist of + * any number of blocks. Note that input MUST consists of a sequence of one + * or more complete chunks; partial chunks can not be handled. + */ + public final byte[] decode(final byte[] inputBuffer, int inputPtr, int inputLen) throws IOException { + byte[] result = new byte[calculateUncompressedSize(inputBuffer, inputPtr, inputLen)]; + decode(inputBuffer, inputPtr, inputLen, result); + return result; + } + + /** + * Method for decompressing a block of input data encoded in LZF block + * structure (compatible with lzf command line utility), and can consist of + * any number of blocks. Note that input MUST consists of a sequence of one + * or more complete chunks; partial chunks can not be handled. + */ + public final int decode(final byte[] inputBuffer, final byte[] targetBuffer) throws IOException { + return decode(inputBuffer, 0, inputBuffer.length, targetBuffer); + } + + /** + * Method for decompressing a block of input data encoded in LZF block + * structure (compatible with lzf command line utility), and can consist of + * any number of blocks. Note that input MUST consists of a sequence of one + * or more complete chunks; partial chunks can not be handled. + */ + public int decode(final byte[] sourceBuffer, int inPtr, int inLength, + final byte[] targetBuffer) throws IOException { + int outPtr = 0; + int blockNr = 0; + + final int end = inPtr + inLength - 1; // -1 to offset possible end marker + + while (inPtr < end) { + // let's do basic sanity checks; no point in skimping with these checks + if (sourceBuffer[inPtr] != LZFChunk.BYTE_Z || sourceBuffer[inPtr + 1] != LZFChunk.BYTE_V) { + throw new IOException("Corrupt input data, block #" + blockNr + " (at offset " + inPtr + "): did not start with 'ZV' signature bytes"); + } + inPtr += 2; + int type = sourceBuffer[inPtr++]; + int len = uint16(sourceBuffer, inPtr); + inPtr += 2; + if (type == LZFChunk.BLOCK_TYPE_NON_COMPRESSED) { // uncompressed + if ((outPtr + len) > targetBuffer.length) { + _reportArrayOverflow(targetBuffer, outPtr, len); + } + System.arraycopy(sourceBuffer, inPtr, targetBuffer, outPtr, len); + outPtr += len; + } else { // compressed + int uncompLen = uint16(sourceBuffer, inPtr); + if ((outPtr + uncompLen) > targetBuffer.length) { + _reportArrayOverflow(targetBuffer, outPtr, uncompLen); + } + inPtr += 2; + decodeChunk(sourceBuffer, inPtr, targetBuffer, outPtr, outPtr + uncompLen); + outPtr += uncompLen; + } + inPtr += len; + ++blockNr; + } + return outPtr; + } + + /** + * Main decode from a stream. Decompressed bytes are placed in the + * outputBuffer, inputBuffer is a "scratch-area". + * + * @param is An input stream of LZF compressed bytes + * @param inputBuffer A byte array used as a scratch area. + * @param outputBuffer A byte array in which the result is returned + * @return The number of bytes placed in the outputBuffer. + */ + public abstract int decodeChunk(final InputStream is, final byte[] inputBuffer, final byte[] outputBuffer) + throws IOException; + + /** + * Main decode method for individual chunks. + */ + public abstract void decodeChunk(byte[] in, int inPos, byte[] out, int outPos, int outEnd) + throws IOException; + + /** + * Helper method that will calculate total uncompressed size, for sequence + * of one or more LZF blocks stored in given byte array. Will do basic + * sanity checking, so that this method can be called to verify against some + * types of corruption. + */ + public static int calculateUncompressedSize(byte[] data, int ptr, int length) throws IOException { + int uncompressedSize = 0; + int blockNr = 0; + final int end = ptr + length; + + while (ptr < end) { + // can use optional end marker + if (ptr == (data.length + 1) && data[ptr] == BYTE_NULL) { + ++ptr; // so that we'll be at end + break; + } + // simpler to handle bounds checks by catching exception here... + try { + if (data[ptr] != LZFChunk.BYTE_Z || data[ptr + 1] != LZFChunk.BYTE_V) { + throw new IOException("Corrupt input data, block #" + blockNr + " (at offset " + ptr + "): did not start with 'ZV' signature bytes"); + } + int type = (int) data[ptr + 2]; + int blockLen = uint16(data, ptr + 3); + if (type == LZFChunk.BLOCK_TYPE_NON_COMPRESSED) { // uncompressed + ptr += 5; + uncompressedSize += blockLen; + } else if (type == LZFChunk.BLOCK_TYPE_COMPRESSED) { // compressed + uncompressedSize += uint16(data, ptr + 5); + ptr += 7; + } else { // unknown... CRC-32 would be 2, but that's not implemented by cli tool + throw new IOException("Corrupt input data, block #" + blockNr + " (at offset " + ptr + "): unrecognized block type " + (type & 0xFF)); + } + ptr += blockLen; + } catch (ArrayIndexOutOfBoundsException e) { + throw new IOException("Corrupt input data, block #" + blockNr + " (at offset " + ptr + "): truncated block header"); + } + ++blockNr; + } + // one more sanity check: + if (ptr != end) { + throw new IOException("Corrupt input data: block #" + blockNr + " extends " + (data.length - ptr) + " beyond end of input"); + } + return uncompressedSize; + } + + protected static int uint16(byte[] data, int ptr) { + return ((data[ptr] & 0xFF) << 8) + (data[ptr + 1] & 0xFF); + } + + /** + * Helper method to forcibly load header bytes that must be read before + * chunk can be handled. + */ + protected static int readHeader(final InputStream is, final byte[] inputBuffer) + throws IOException { + // Ok: simple case first, where we just get all data we need + int needed = HEADER_BYTES; + int count = is.read(inputBuffer, 0, needed); + + if (count == needed) { + return count; + } + if (count <= 0) { + return 0; + } + + // if not, a source that trickles data (network etc); must loop + int offset = count; + needed -= count; + + do { + count = is.read(inputBuffer, offset, needed); + if (count <= 0) { + break; + } + offset += count; + needed -= count; + } while (needed > 0); + return offset; + } + + protected static void readFully(InputStream is, boolean compressed, + byte[] outputBuffer, int offset, int len) throws IOException { + int left = len; + while (left > 0) { + int count = is.read(outputBuffer, offset, left); + if (count < 0) { // EOF not allowed here + throw new IOException("EOF in " + len + " byte (" + + (compressed ? "" : "un") + "compressed) block: could only read " + + (len - left) + " bytes"); + } + offset += count; + left -= count; + } + } + + /** + * Helper method called when it is determined that the target buffer can not + * hold all data to copy or uncompress + */ + protected void _reportArrayOverflow(byte[] targetBuffer, int outPtr, int dataLen) + throws IOException { + throw new IOException("Target buffer too small (" + targetBuffer.length + "): can not copy/uncompress " + + dataLen + " bytes to offset " + outPtr); + } +} diff --git a/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/ChunkDecoderFactory.java b/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/ChunkDecoderFactory.java new file mode 100644 index 0000000..b17a7b0 --- /dev/null +++ b/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/ChunkDecoderFactory.java @@ -0,0 +1,49 @@ +package org.xbib.io.compress.lzf; + +/** + * Simple helper class used for loading {@link ChunkDecoder} implementations, + * based on criteria such as "fastest available".

Yes, it looks butt-ugly, + * but does the job. Nonetheless, if anyone has lipstick for this pig, let me + * know. + */ +public class ChunkDecoderFactory { + + private static final ChunkDecoderFactory INSTANCE; + + static { + INSTANCE = new ChunkDecoderFactory( VanillaChunkDecoder.class); + } + + private final Class implClass; + + @SuppressWarnings("unchecked") + private ChunkDecoderFactory(Class imp) { + implClass = (Class) imp; + } + + /** + * Method to use for getting decompressor instance that uses the most + * optimal available methods for underlying data access. It should be safe + * to call this method as implementations are dynamically loaded; however, + * on some non-standard platforms it may be necessary to either directly + * load instances, or use {@link #safeInstance()}. + */ + public static ChunkDecoder optimalInstance() { + try { + return INSTANCE.implClass.newInstance(); + } catch (Exception e) { + throw new IllegalStateException("Failed to load a ChunkDecoder instance (" + e.getClass().getName() + "): " + + e.getMessage(), e); + } + } + + /** + * Method that can be used to ensure that a "safe" decompressor instance is + * loaded. Safe here means that it should work on any and all Java + * platforms. + */ + public static ChunkDecoder safeInstance() { + // this will always succeed loading; no need to use dynamic class loading or instantiation + return new VanillaChunkDecoder(); + } +} diff --git a/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/ChunkEncoder.java b/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/ChunkEncoder.java new file mode 100644 index 0000000..5568eff --- /dev/null +++ b/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/ChunkEncoder.java @@ -0,0 +1,245 @@ + +package org.xbib.io.compress.lzf; + +import java.io.IOException; +import java.io.OutputStream; + +/** + * Class that handles actual encoding of individual chunks. Resulting chunks can + * be compressed or non-compressed; compression is only used if it actually + * reduces chunk size (including overhead of additional header bytes) + */ +public final class ChunkEncoder { + // Beyond certain point we won't be able to compress; let's use 16 bytes as cut-off + + private static final int MIN_BLOCK_TO_COMPRESS = 16; + private static final int MIN_HASH_SIZE = 256; + // Not much point in bigger tables, with 8k window + private static final int MAX_HASH_SIZE = 16384; + private static final int MAX_OFF = 1 << 13; // 8k + private static final int MAX_REF = (1 << 8) + (1 << 3); // 264 + // // Encoding tables etc + private final BufferRecycler bufferRecycler; + /** + * Hash table contains lookup based on 3-byte sequence; key is hash of such + * triplet, value is offset in buffer. + */ + private int[] hashTable; + private final int hashModulo; + /** + * Buffer in which encoded content is stored during processing + */ + private byte[] encodeBuffer; + /** + * Small buffer passed to LZFChunk, needed for writing chunk header + */ + private byte[] headerBuffer; + + /** + * @param totalLength Total encoded length; used for calculating size of + * hash table to use + */ + public ChunkEncoder(int totalLength) { + int largestChunkLen = Math.max(totalLength, LZFChunk.MAX_CHUNK_LEN); + + int suggestedHashLen = calcHashLen(largestChunkLen); + bufferRecycler = BufferRecycler.instance(); + hashTable = bufferRecycler.allocEncodingHash(suggestedHashLen); + hashModulo = hashTable.length - 1; + // Ok, then, what's the worst case output buffer length? + // length indicator for each 32 literals, so: + int bufferLen = largestChunkLen + ((largestChunkLen + 31) >> 5); + encodeBuffer = bufferRecycler.allocEncodingBuffer(bufferLen); + } + + /** + * Alternate constructor used when we want to avoid allocation encoding + * buffer, in cases where caller wants full control over allocations. + */ + private ChunkEncoder(int totalLength, boolean bogus) { + int largestChunkLen = Math.max(totalLength, LZFChunk.MAX_CHUNK_LEN); + int suggestedHashLen = calcHashLen(largestChunkLen); + bufferRecycler = BufferRecycler.instance(); + hashTable = bufferRecycler.allocEncodingHash(suggestedHashLen); + hashModulo = hashTable.length - 1; + encodeBuffer = null; + } + + public static ChunkEncoder nonAllocatingEncoder(int totalLength) { + return new ChunkEncoder(totalLength, true); + } + + /** + * Method to close once encoder is no longer in use. Note: after calling + * this method, further calls to {@link #encodeChunk} will fail + */ + public void close() { + byte[] buf = encodeBuffer; + if (buf != null) { + encodeBuffer = null; + bufferRecycler.releaseEncodeBuffer(buf); + } + int[] ibuf = hashTable; + if (ibuf != null) { + hashTable = null; + bufferRecycler.releaseEncodingHash(ibuf); + } + } + + /** + * Method for compressing (or not) individual chunks + */ + public LZFChunk encodeChunk(byte[] data, int offset, int len) { + if (len >= MIN_BLOCK_TO_COMPRESS) { + /* If we have non-trivial block, and can compress it by at least + * 2 bytes (since header is 2 bytes longer), let's compress: + */ + int compLen = tryCompress(data, offset, offset + len, encodeBuffer, 0); + if (compLen < (len - 2)) { // nah; just return uncompressed + return LZFChunk.createCompressed(len, encodeBuffer, 0, compLen); + } + } + // Otherwise leave uncompressed: + return LZFChunk.createNonCompressed(data, offset, len); + } + + /** + * Method for encoding individual chunk, writing it to given output stream. + */ + public void encodeAndWriteChunk(byte[] data, int offset, int len, OutputStream out) + throws IOException { + byte[] headerBuf = headerBuffer; + if (headerBuf == null) { + headerBuffer = headerBuf = new byte[LZFChunk.MAX_HEADER_LEN]; + } + if (len >= MIN_BLOCK_TO_COMPRESS) { + /* If we have non-trivial block, and can compress it by at least + * 2 bytes (since header is 2 bytes longer), let's compress: + */ + int compLen = tryCompress(data, offset, offset + len, encodeBuffer, 0); + if (compLen < (len - 2)) { // nah; just return uncompressed + LZFChunk.writeCompressedHeader(len, compLen, out, headerBuf); + out.write(encodeBuffer, 0, compLen); + return; + } + } + // Otherwise leave uncompressed: + LZFChunk.writeNonCompressedHeader(len, out, headerBuf); + out.write(data, offset, len); + } + + /** + * Main workhorse method that will try to compress given chunk, and return + * end position (offset to byte after last included byte) + */ + protected int tryCompress(byte[] in, int inPos, int inEnd, byte[] out, int outPos) { + final int[] hashTable = this.hashTable; + ++outPos; + int seen = first(in, inPos); // past 4 bytes we have seen... (last one is LSB) + int literals = 0; + inEnd -= 4; + final int firstPos = inPos; // so that we won't have back references across block boundary + + while (inPos < inEnd) { + byte p2 = in[inPos + 2]; + // next + seen = (seen << 8) + (p2 & 255); + int off = hash(seen); + int ref = hashTable[off]; + hashTable[off] = inPos; + + // First expected common case: no back-ref (for whatever reason) + if (ref >= inPos // can't refer forward (i.e. leftovers) + || ref < firstPos // or to previous block + || (off = inPos - ref) > MAX_OFF + || in[ref + 2] != p2 // must match hash + || in[ref + 1] != (byte) (seen >> 8) + || in[ref] != (byte) (seen >> 16)) { + out[outPos++] = in[inPos++]; + literals++; + if (literals == LZFChunk.MAX_LITERAL) { + out[outPos - 33] = (byte) 31; // <= out[outPos - literals - 1] = MAX_LITERAL_MINUS_1; + literals = 0; + outPos++; + } + continue; + } + // match + int maxLen = inEnd - inPos + 2; + if (maxLen > MAX_REF) { + maxLen = MAX_REF; + } + if (literals == 0) { + outPos--; + } else { + out[outPos - literals - 1] = (byte) (literals - 1); + literals = 0; + } + int len = 3; + while (len < maxLen && in[ref + len] == in[inPos + len]) { + len++; + } + len -= 2; + --off; // was off by one earlier + if (len < 7) { + out[outPos++] = (byte) ((off >> 8) + (len << 5)); + } else { + out[outPos++] = (byte) ((off >> 8) + (7 << 5)); + out[outPos++] = (byte) (len - 7); + } + out[outPos++] = (byte) off; + outPos++; + inPos += len; + seen = first(in, inPos); + seen = (seen << 8) + (in[inPos + 2] & 255); + hashTable[hash(seen)] = inPos; + ++inPos; + seen = (seen << 8) + (in[inPos + 2] & 255); // hash = next(hash, in, inPos); + hashTable[hash(seen)] = inPos; + ++inPos; + } + // try offlining the tail + return handleTail(in, inPos, inEnd + 4, out, outPos, literals); + } + + private int handleTail(byte[] in, int inPos, int inEnd, byte[] out, int outPos, + int literals) { + while (inPos < inEnd) { + out[outPos++] = in[inPos++]; + literals++; + if (literals == LZFChunk.MAX_LITERAL) { + out[outPos - literals - 1] = (byte) (literals - 1); + literals = 0; + outPos++; + } + } + out[outPos - literals - 1] = (byte) (literals - 1); + if (literals == 0) { + outPos--; + } + return outPos; + } + + private static int calcHashLen(int chunkSize) { + // in general try get hash table size of 2x input size + chunkSize += chunkSize; + // but no larger than max size: + if (chunkSize >= MAX_HASH_SIZE) { + return MAX_HASH_SIZE; + } + // otherwise just need to round up to nearest 2x + int hashLen = MIN_HASH_SIZE; + while (hashLen < chunkSize) { + hashLen += hashLen; + } + return hashLen; + } + + private int first(byte[] in, int inPos) { + return (in[inPos] << 8) + (in[inPos + 1] & 0xFF); + } + + private int hash(int h) { + return ((h * 57321) >> 9) & hashModulo; + } +} diff --git a/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/LZFChunk.java b/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/LZFChunk.java new file mode 100644 index 0000000..df4b0a2 --- /dev/null +++ b/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/LZFChunk.java @@ -0,0 +1,128 @@ +package org.xbib.io.compress.lzf; + +import java.io.IOException; +import java.io.OutputStream; + +/** + * Helper class used to store LZF encoded segments (compressed and + * non-compressed) that can be sequenced to produce LZF files/streams. + */ +public class LZFChunk { + + /** + * Maximum length of literal run for LZF encoding. + */ + static final int MAX_LITERAL = 1 << 5; // 32 + // Chunk length is limited by 2-byte length indicator, to 64k + static final int MAX_CHUNK_LEN = 0xFFFF; + /** + * Header can be either 7 bytes (compressed) or 5 bytes (uncompressed) long + */ + static final int MAX_HEADER_LEN = 7; + static final byte BYTE_Z = 'Z'; + static final byte BYTE_V = 'V'; + static final int BLOCK_TYPE_NON_COMPRESSED = 0; + static final int BLOCK_TYPE_COMPRESSED = 1; + private final byte[] data; + private LZFChunk next; + + private LZFChunk(byte[] data) { + this.data = data; + } + + /** + * Factory method for constructing compressed chunk + */ + public static LZFChunk createCompressed(int origLen, byte[] encData, int encPtr, int encLen) { + byte[] result = new byte[encLen + 7]; + result[0] = BYTE_Z; + result[1] = BYTE_V; + result[2] = BLOCK_TYPE_COMPRESSED; + result[3] = (byte) (encLen >> 8); + result[4] = (byte) encLen; + result[5] = (byte) (origLen >> 8); + result[6] = (byte) origLen; + System.arraycopy(encData, encPtr, result, 7, encLen); + return new LZFChunk(result); + } + + public static int appendCompressedHeader(int origLen, int encLen, byte[] headerBuffer, int offset) + throws IOException { + headerBuffer[offset++] = BYTE_Z; + headerBuffer[offset++] = BYTE_V; + headerBuffer[offset++] = BLOCK_TYPE_COMPRESSED; + headerBuffer[offset++] = (byte) (encLen >> 8); + headerBuffer[offset++] = (byte) encLen; + headerBuffer[offset++] = (byte) (origLen >> 8); + headerBuffer[offset++] = (byte) origLen; + return offset; + } + + public static void writeCompressedHeader(int origLen, int encLen, OutputStream out, byte[] headerBuffer) + throws IOException { + headerBuffer[0] = BYTE_Z; + headerBuffer[1] = BYTE_V; + headerBuffer[2] = BLOCK_TYPE_COMPRESSED; + headerBuffer[3] = (byte) (encLen >> 8); + headerBuffer[4] = (byte) encLen; + headerBuffer[5] = (byte) (origLen >> 8); + headerBuffer[6] = (byte) origLen; + out.write(headerBuffer, 0, 7); + } + + /** + * Factory method for constructing compressed chunk + */ + public static LZFChunk createNonCompressed(byte[] plainData, int ptr, int len) { + byte[] result = new byte[len + 5]; + result[0] = BYTE_Z; + result[1] = BYTE_V; + result[2] = BLOCK_TYPE_NON_COMPRESSED; + result[3] = (byte) (len >> 8); + result[4] = (byte) len; + System.arraycopy(plainData, ptr, result, 5, len); + return new LZFChunk(result); + } + + public static int appendNonCompressedHeader(int len, byte[] headerBuffer, int offset) + throws IOException { + headerBuffer[offset++] = BYTE_Z; + headerBuffer[offset++] = BYTE_V; + headerBuffer[offset++] = BLOCK_TYPE_NON_COMPRESSED; + headerBuffer[offset++] = (byte) (len >> 8); + headerBuffer[offset++] = (byte) len; + return offset; + } + + public static void writeNonCompressedHeader(int len, OutputStream out, byte[] headerBuffer) + throws IOException { + headerBuffer[0] = BYTE_Z; + headerBuffer[1] = BYTE_V; + headerBuffer[2] = BLOCK_TYPE_NON_COMPRESSED; + headerBuffer[3] = (byte) (len >> 8); + headerBuffer[4] = (byte) len; + out.write(headerBuffer, 0, 5); + } + + public void setNext(LZFChunk next) { + this.next = next; + } + + public LZFChunk next() { + return next; + } + + public int length() { + return data.length; + } + + public byte[] getData() { + return data; + } + + public int copyTo(byte[] dst, int ptr) { + int len = data.length; + System.arraycopy(data, 0, dst, ptr, len); + return ptr + len; + } +} diff --git a/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/LZFCompressingInputStream.java b/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/LZFCompressingInputStream.java new file mode 100644 index 0000000..ca69c02 --- /dev/null +++ b/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/LZFCompressingInputStream.java @@ -0,0 +1,241 @@ +package org.xbib.io.compress.lzf; + +import java.io.IOException; +import java.io.InputStream; + +/** + * Decorator {@link java.io.InputStream} implementation used for reading + * uncompressed data and compressing it on the fly, such that + * reads return compressed data. It is reverse of {@link LZFInputStream} (which + * instead uncompresses data). + */ +public class LZFCompressingInputStream extends InputStream { + + private final BufferRecycler _recycler; + private ChunkEncoder _encoder; + /** + * stream to be decompressed + */ + protected final InputStream _inputStream; + /** + * Flag that indicates if we have already called 'inputStream.close()' (to + * avoid calling it multiple times) + */ + protected boolean _inputStreamClosed; + /** + * Flag that indicates whether we force full reads (reading of as many bytes + * as requested), or 'optimal' reads (up to as many as available, but at + * least one). Default is false, meaning that 'optimal' read is used. + */ + protected boolean _cfgFullReads = false; + /** + * Buffer in which uncompressed input is first read, before getting encoded + * in {@link #_encodedBytes}. + */ + protected byte[] _inputBuffer; + /** + * Buffer that contains compressed data that is returned to readers. + */ + protected byte[] _encodedBytes; + /** + * The current position (next char to output) in the uncompressed bytes + * buffer. + */ + protected int _bufferPosition = 0; + /** + * Length of the current uncompressed bytes buffer + */ + protected int _bufferLength = 0; + /** + * Number of bytes read from the underlying {@link #_inputStream} + */ + protected int _readCount = 0; + + /* + /////////////////////////////////////////////////////////////////////// + // Construction, configuration + /////////////////////////////////////////////////////////////////////// + */ + public LZFCompressingInputStream(InputStream in) { + _inputStream = in; + _recycler = BufferRecycler.instance(); + _inputBuffer = _recycler.allocInputBuffer(LZFChunk.MAX_CHUNK_LEN); + // let's not yet allocate encoding buffer; don't know optimal size + } + + /** + * Method that can be used define whether reads should be "full" or + * "optimal": former means that full compressed blocks are read right away + * as needed, optimal that only smaller chunks are read at a time, more + * being read as needed. + */ + public void setUseFullReads(boolean b) { + _cfgFullReads = b; + } + + @Override + public int available() { + // if closed, return -1; + if (_inputStreamClosed) { + return -1; + } + int left = (_bufferLength - _bufferPosition); + return (left <= 0) ? 0 : left; + } + + @Override + public int read() throws IOException { + if (!readyBuffer()) { + return -1; + } + return _encodedBytes[_bufferPosition++] & 255; + } + + @Override + public int read(final byte[] buffer) throws IOException { + return read(buffer, 0, buffer.length); + } + + @Override + public int read(final byte[] buffer, int offset, int length) throws IOException { + if (length < 1) { + return 0; + } + if (!readyBuffer()) { + return -1; + } + // First let's read however much data we happen to have... + int chunkLength = Math.min(_bufferLength - _bufferPosition, length); + System.arraycopy(_encodedBytes, _bufferPosition, buffer, offset, chunkLength); + _bufferPosition += chunkLength; + + if (chunkLength == length || !_cfgFullReads) { + return chunkLength; + } + // Need more data, then + int totalRead = chunkLength; + do { + offset += chunkLength; + if (!readyBuffer()) { + break; + } + chunkLength = Math.min(_bufferLength - _bufferPosition, (length - totalRead)); + System.arraycopy(_encodedBytes, _bufferPosition, buffer, offset, chunkLength); + _bufferPosition += chunkLength; + totalRead += chunkLength; + } while (totalRead < length); + + return totalRead; + } + + @Override + public void close() throws IOException { + _bufferPosition = _bufferLength = 0; + byte[] buf = _encodedBytes; + if (buf != null) { + _encodedBytes = null; + _recycler.releaseEncodeBuffer(buf); + } + if (_encoder != null) { + _encoder.close(); + } + _closeInput(); + } + + private void _closeInput() throws IOException { + byte[] buf = _inputBuffer; + if (buf != null) { + _inputBuffer = null; + _recycler.releaseInputBuffer(buf); + } + if (!_inputStreamClosed) { + _inputStreamClosed = true; + _inputStream.close(); + } + } + + /** + * Overridden to just skip at most a single chunk at a time + */ + @Override + public long skip(long n) throws IOException { + if (_inputStreamClosed) { + return -1; + } + int left = (_bufferLength - _bufferPosition); + // if none left, must read more: + if (left <= 0) { + // otherwise must read more to skip... + int b = read(); + if (b < 0) { // EOF + return -1; + } + // push it back to get accurate skip count + --_bufferPosition; + left = (_bufferLength - _bufferPosition); + } + // either way, just skip whatever we have decoded + if (left > n) { + left = (int) n; + } + _bufferPosition += left; + return left; + } + + /** + * Fill the uncompressed bytes buffer by reading the underlying inputStream. + * + * @throws java.io.IOException + */ + protected boolean readyBuffer() throws IOException { + if (_bufferPosition < _bufferLength) { + return true; + } + if (_inputStreamClosed) { + return false; + } + // Ok: read as much as we can from input source first + int count = _inputStream.read(_inputBuffer, 0, _inputBuffer.length); + if (count < 0) { // if no input read, it's EOF + _closeInput(); // and we can close input source as well + return false; + } + int chunkLength = count; + int left = _inputBuffer.length - count; + + while ((count = _inputStream.read(_inputBuffer, chunkLength, left)) > 0) { + chunkLength += count; + left -= count; + if (left < 1) { + break; + } + } + + _bufferPosition = 0; + + // Ok: if we don't yet have an encoder (and buffer for it), let's get one + if (_encoder == null) { + // need 7 byte header, plus regular max buffer size: + int bufferLen = chunkLength + ((chunkLength + 31) >> 5) + 7; + _encoder = ChunkEncoder.nonAllocatingEncoder(bufferLen); + _encodedBytes = _recycler.allocEncodingBuffer(bufferLen); + } + // offset of 7 so we can prepend header as necessary + int encodeEnd = _encoder.tryCompress(_inputBuffer, 0, chunkLength, _encodedBytes, 7); + // but did it compress? + if (encodeEnd < (chunkLength + 5)) { // yes! (compared to 5 byte uncomp prefix, data) + // prepend header in situ + LZFChunk.appendCompressedHeader(chunkLength, encodeEnd - 7, _encodedBytes, 0); + _bufferLength = encodeEnd; + } else { // no -- so sad... + int ptr = LZFChunk.appendNonCompressedHeader(chunkLength, _encodedBytes, 0); + // TODO: figure out a way to avoid this copy; need a header + System.arraycopy(_inputBuffer, 0, _encodedBytes, ptr, chunkLength); + _bufferLength = ptr + chunkLength; + } + if (count < 0) { // did we get end-of-input? + _closeInput(); + } + return true; + } +} diff --git a/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/LZFDecoder.java b/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/LZFDecoder.java new file mode 100644 index 0000000..a73f808 --- /dev/null +++ b/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/LZFDecoder.java @@ -0,0 +1,34 @@ +package org.xbib.io.compress.lzf; + +import java.io.IOException; + +/** + * Decoder that handles decoding of sequence of encoded LZF chunks, combining + * them into a single contiguous result byte array. As of version 0.9, this + * class has been mostly replaced by {@link ChunkDecoder}, although static + * methods are left here and may still be used for convenience. All static + * methods use {@link ChunkDecoderFactory#optimalInstance} to find actual + * {@link ChunkDecoder} instance to use. + */ +public class LZFDecoder { + + public static byte[] decode(final byte[] inputBuffer) throws IOException { + return decode(inputBuffer, 0, inputBuffer.length); + } + + public static byte[] decode(final byte[] inputBuffer, int offset, int length) throws IOException { + return ChunkDecoderFactory.optimalInstance().decode(inputBuffer, offset, length); + } + + public static int decode(final byte[] inputBuffer, final byte[] targetBuffer) throws IOException { + return decode(inputBuffer, 0, inputBuffer.length, targetBuffer); + } + + public static int decode(final byte[] sourceBuffer, int offset, int length, final byte[] targetBuffer) throws IOException { + return ChunkDecoderFactory.optimalInstance().decode(sourceBuffer, offset, length, targetBuffer); + } + + public static int calculateUncompressedSize(byte[] data, int offset, int length) throws IOException { + return ChunkDecoder.calculateUncompressedSize(data, length, length); + } +} diff --git a/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/LZFEncoder.java b/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/LZFEncoder.java new file mode 100644 index 0000000..897ad65 --- /dev/null +++ b/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/LZFEncoder.java @@ -0,0 +1,78 @@ +package org.xbib.io.compress.lzf; + +import java.io.IOException; + +/** + * Encoder that handles splitting of input into chunks to encode, calls + * {@link ChunkEncoder} to compress individual chunks and combines resulting + * chunks into contiguous output byte array. + */ +public class LZFEncoder { + + private LZFEncoder() { + } + + public static byte[] encode(byte[] data) throws IOException { + return encode(data, data.length); + } + + /** + * Method for compressing given input data using LZF encoding and block + * structure (compatible with lzf command line utility). Result consists of + * a sequence of chunks. + */ + public static byte[] encode(byte[] data, int length) throws IOException { + return encode(data, 0, length); + } + + /** + * Method for compressing given input data using LZF encoding and block + * structure (compatible with lzf command line utility). Result consists of + * a sequence of chunks. + */ + public static byte[] encode(byte[] data, int offset, int length) throws IOException { + ChunkEncoder enc = new ChunkEncoder(length); + byte[] result = encode(enc, data, offset, length); + // important: may be able to reuse buffers + enc.close(); + return result; + } + + public static byte[] encode(ChunkEncoder enc, byte[] data, int length) + throws IOException { + return encode(enc, data, 0, length); + } + + public static byte[] encode(ChunkEncoder enc, byte[] data, int offset, int length) + throws IOException { + int left = length; + int chunkLen = Math.min(LZFChunk.MAX_CHUNK_LEN, left); + LZFChunk first = enc.encodeChunk(data, offset, chunkLen); + left -= chunkLen; + // shortcut: if it all fit in, no need to coalesce: + if (left < 1) { + return first.getData(); + } + // otherwise need to get other chunks: + int resultBytes = first.length(); + offset += chunkLen; + LZFChunk last = first; + + do { + chunkLen = Math.min(left, LZFChunk.MAX_CHUNK_LEN); + LZFChunk chunk = enc.encodeChunk(data, offset, chunkLen); + offset += chunkLen; + left -= chunkLen; + resultBytes += chunk.length(); + last.setNext(chunk); + last = chunk; + } while (left > 0); + // and then coalesce returns into single contiguous byte array + byte[] result = new byte[resultBytes]; + int ptr = 0; + for (; first != null; first = first.next()) { + ptr = first.copyTo(result, ptr); + } + return result; + } +} diff --git a/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/LZFInputStream.java b/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/LZFInputStream.java new file mode 100644 index 0000000..5a96bce --- /dev/null +++ b/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/LZFInputStream.java @@ -0,0 +1,272 @@ +package org.xbib.io.compress.lzf; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +/** + * Decorator {@link java.io.InputStream} implementation used for reading compressed data + * and uncompressing it on the fly, such that reads return uncompressed data. + * Its direct counterpart is {@link LZFOutputStream}; but there is also + * {@link LZFCompressingInputStream} which does reverse of this class. + */ +public class LZFInputStream extends InputStream { + + /** + * Underlying decoder in use. + */ + protected final ChunkDecoder _decoder; + /** + * Object that handles details of buffer recycling + */ + protected final BufferRecycler _recycler; + /** + * stream to be decompressed + */ + protected final InputStream _inputStream; + /** + * Flag that indicates if we have already called 'inputStream.close()' (to + * avoid calling it multiple times) + */ + protected boolean _inputStreamClosed; + /** + * Flag that indicates whether we force full reads (reading of as many bytes + * as requested), or 'optimal' reads (up to as many as available, but at + * least one). Default is false, meaning that 'optimal' read is used. + */ + protected boolean _cfgFullReads = false; + /** + * the current buffer of compressed bytes (from which to decode) + */ + protected byte[] _inputBuffer; + /** + * the buffer of uncompressed bytes from which content is read + */ + protected byte[] _decodedBytes; + /** + * The current position (next char to output) in the uncompressed bytes + * buffer. + */ + protected int _bufferPosition = 0; + /** + * Length of the current uncompressed bytes buffer + */ + protected int _bufferLength = 0; + /** + * Number of bytes read from the underlying {@link #_inputStream} + */ + protected int _readCount = 0; + + /* + // Construction + */ + public LZFInputStream(final InputStream inputStream) throws IOException { + this(inputStream, false); + } + + /** + * @param in Underlying input stream to use + * @param fullReads Whether {@link #read(byte[])} should try to read exactly + * as many bytes as requested (true); or just however many happen to be + * available (false) + */ + public LZFInputStream(final InputStream in, boolean fullReads) throws IOException { + this(in, fullReads, ChunkDecoderFactory.optimalInstance()); + } + + public LZFInputStream(final InputStream in, boolean fullReads, ChunkDecoder decoder) + throws IOException { + super(); + _decoder = decoder; + _recycler = BufferRecycler.instance(); + _inputStream = in; + _inputStreamClosed = false; + _cfgFullReads = fullReads; + + _inputBuffer = _recycler.allocInputBuffer(LZFChunk.MAX_CHUNK_LEN); + _decodedBytes = _recycler.allocDecodeBuffer(LZFChunk.MAX_CHUNK_LEN); + } + + /** + * Method that can be used define whether reads should be "full" or + * "optimal": former means that full compressed blocks are read right away + * as needed, optimal that only smaller chunks are read at a time, more + * being read as needed. + */ + public void setUseFullReads(boolean b) { + _cfgFullReads = b; + } + + /* + * InputStream impl + */ + + /** + * Method is overridden to report number of bytes that can now be read from + * decoded data buffer, without reading bytes from the underlying stream. + * Never throws an exception; returns number of bytes available without + * further reads from underlying source; -1 if stream has been closed, or 0 + * if an actual read (and possible blocking) is needed to find out. + */ + @Override + public int available() { + // if closed, return -1; + if (_inputStreamClosed) { + return -1; + } + int left = (_bufferLength - _bufferPosition); + return (left <= 0) ? 0 : left; + } + + @Override + public int read() throws IOException { + if (!readyBuffer()) { + return -1; + } + return _decodedBytes[_bufferPosition++] & 255; + } + + @Override + public int read(final byte[] buffer) throws IOException { + return read(buffer, 0, buffer.length); + } + + @Override + public int read(final byte[] buffer, int offset, int length) throws IOException { + if (length < 1) { + return 0; + } + if (!readyBuffer()) { + return -1; + } + // First let's read however much data we happen to have... + int chunkLength = Math.min(_bufferLength - _bufferPosition, length); + System.arraycopy(_decodedBytes, _bufferPosition, buffer, offset, chunkLength); + _bufferPosition += chunkLength; + + if (chunkLength == length || !_cfgFullReads) { + return chunkLength; + } + // Need more data, then + int totalRead = chunkLength; + do { + offset += chunkLength; + if (!readyBuffer()) { + break; + } + chunkLength = Math.min(_bufferLength - _bufferPosition, (length - totalRead)); + System.arraycopy(_decodedBytes, _bufferPosition, buffer, offset, chunkLength); + _bufferPosition += chunkLength; + totalRead += chunkLength; + } while (totalRead < length); + + return totalRead; + } + + @Override + public void close() throws IOException { + _bufferPosition = _bufferLength = 0; + byte[] buf = _inputBuffer; + if (buf != null) { + _inputBuffer = null; + _recycler.releaseInputBuffer(buf); + } + buf = _decodedBytes; + if (buf != null) { + _decodedBytes = null; + _recycler.releaseDecodeBuffer(buf); + } + if (!_inputStreamClosed) { + _inputStreamClosed = true; + _inputStream.close(); + } + } + + /** + * Overridden to just skip at most a single chunk at a time + */ + @Override + public long skip(long n) throws IOException { + if (_inputStreamClosed) { + return -1; + } + int left = (_bufferLength - _bufferPosition); + // if none left, must read more: + if (left <= 0) { + // otherwise must read more to skip... + int b = read(); + if (b < 0) { // EOF + return -1; + } + // push it back to get accurate skip count + --_bufferPosition; + left = (_bufferLength - _bufferPosition); + } + // either way, just skip whatever we have decoded + if (left > n) { + left = (int) n; + } + _bufferPosition += left; + return left; + } + + /** + * Method that can be used to find underlying {@link java.io.InputStream} that we + * read from to get LZF encoded data to decode. Will never return null; + * although underlying stream may be closed (if this stream has been + * closed). + */ + public InputStream getUnderlyingInputStream() { + return _inputStream; + } + + /** + * Method that can be called to discard any already buffered input, read + * from input source. Specialized method that only makes sense if the + * underlying {@link java.io.InputStream} can be repositioned reliably. + */ + public void discardBuffered() { + _bufferPosition = _bufferLength = 0; + } + + /** + * Convenience method that will read and uncompress all data available, and + * write it using given {@link java.io.OutputStream}. This avoids having to make an + * intermediate copy of uncompressed data which would be needed when doing + * the same manually. + * + * @param out OutputStream to use for writing content + * @return Number of bytes written (uncompressed) + */ + public int readAndWrite(OutputStream out) throws IOException { + int total = 0; + + while (readyBuffer()) { + int avail = _bufferLength - _bufferPosition; + out.write(_decodedBytes, _bufferPosition, avail); + _bufferPosition += avail; // to ensure it looks like we consumed it all + total += avail; + } + return total; + } + + /** + * Fill the uncompressed bytes buffer by reading the underlying inputStream. + * + * @throws java.io.IOException + */ + protected boolean readyBuffer() throws IOException { + if (_bufferPosition < _bufferLength) { + return true; + } + if (_inputStreamClosed) { + return false; + } + _bufferLength = _decoder.decodeChunk(_inputStream, _inputBuffer, _decodedBytes); + if (_bufferLength < 0) { + return false; + } + _bufferPosition = 0; + return (_bufferPosition < _bufferLength); + } +} diff --git a/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/LZFOutputStream.java b/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/LZFOutputStream.java new file mode 100644 index 0000000..b4c49f3 --- /dev/null +++ b/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/LZFOutputStream.java @@ -0,0 +1,181 @@ +package org.xbib.io.compress.lzf; + +import java.io.IOException; +import java.io.OutputStream; + +/** + * Decorator {@link java.io.OutputStream} implementation that will compress output using + * LZF compression algorithm, given uncompressed input to write. Its counterpart + * is {@link LZFInputStream}; although in some ways + * {@link LZFCompressingInputStream} can be seen as the opposite. + */ +public class LZFOutputStream extends OutputStream { + + private final ChunkEncoder _encoder; + private final BufferRecycler _recycler; + protected final OutputStream _outputStream; + protected byte[] _outputBuffer; + protected int _position = 0; + /** + * Configuration setting that governs whether basic 'flush()' should first + * complete a block or not.

Default value is 'true' + */ + protected boolean _cfgFinishBlockOnFlush = true; + /** + * Flag that indicates if we have already called '_outputStream.close()' (to + * avoid calling it multiple times) + */ + protected boolean _outputStreamClosed; + + /* + // Construction, configuration + */ + public LZFOutputStream(final OutputStream outputStream) { + this(outputStream, LZFChunk.MAX_CHUNK_LEN); + } + + public LZFOutputStream(final OutputStream outputStream, int bufsize) { + _encoder = new ChunkEncoder(bufsize); + _recycler = BufferRecycler.instance(); + _outputStream = outputStream; + _outputBuffer = _recycler.allocOutputBuffer(bufsize); + _outputStreamClosed = false; + } + + /** + * Method for defining whether call to {@link #flush} will also complete + * current block (similar to calling {@link #finishBlock()}) or not. + */ + public LZFOutputStream setFinishBlockOnFlush(boolean b) { + _cfgFinishBlockOnFlush = b; + return this; + } + + /* + // OutputStream impl + */ + @Override + public void write(final int singleByte) throws IOException { + checkNotClosed(); + if (_position >= _outputBuffer.length) { + writeCompressedBlock(); + } + _outputBuffer[_position++] = (byte) singleByte; + } + + @Override + public void write(final byte[] buffer, int offset, int length) throws IOException { + checkNotClosed(); + + final int BUFFER_LEN = _outputBuffer.length; + + // simple case first: buffering only (for trivially short writes) + int free = BUFFER_LEN - _position; + if (free >= length) { + System.arraycopy(buffer, offset, _outputBuffer, _position, length); + _position += length; + return; + } + // otherwise, copy whatever we can, flush + System.arraycopy(buffer, offset, _outputBuffer, _position, free); + offset += free; + length -= free; + _position += free; + writeCompressedBlock(); + + // then write intermediate full block, if any, without copying: + while (length >= BUFFER_LEN) { + _encoder.encodeAndWriteChunk(buffer, offset, BUFFER_LEN, _outputStream); + offset += BUFFER_LEN; + length -= BUFFER_LEN; + } + + // and finally, copy leftovers in buffer, if any + if (length > 0) { + System.arraycopy(buffer, offset, _outputBuffer, 0, length); + } + _position = length; + } + + @Override + public void flush() throws IOException { + checkNotClosed(); + if (_cfgFinishBlockOnFlush && _position > 0) { + writeCompressedBlock(); + } + _outputStream.flush(); + } + + @Override + public void close() throws IOException { + if (!_outputStreamClosed) { + if (_position > 0) { + writeCompressedBlock(); + } + _outputStream.flush(); + _encoder.close(); + byte[] buf = _outputBuffer; + if (buf != null) { + _outputBuffer = null; + _recycler.releaseOutputBuffer(buf); + } + _outputStreamClosed = true; + _outputStream.close(); + } + } + + /** + * Method that can be used to find underlying {@link java.io.OutputStream} that we + * write encoded LZF encoded data into, after compressing it. Will never + * return null; although underlying stream may be closed (if this stream has + * been closed). + */ + public OutputStream getUnderlyingOutputStream() { + return _outputStream; + } + + /** + * Accessor for checking whether call to "flush()" will first finish the + * current block or not + */ + public boolean getFinishBlockOnFlush() { + return _cfgFinishBlockOnFlush; + } + + /** + * Method that can be used to force completion of the current block, which + * means that all buffered data will be compressed into an LZF block. This + * typically results in lower compression ratio as larger blocks compress + * better; but may be necessary for network connections to ensure timely + * sending of data. + */ + public LZFOutputStream finishBlock() throws IOException { + checkNotClosed(); + if (_position > 0) { + writeCompressedBlock(); + } + return this; + } + + /** + * Compress and write the current block to the OutputStream + */ + protected void writeCompressedBlock() throws IOException { + int left = _position; + _position = 0; + int offset = 0; + + do { + int chunkLen = Math.min(LZFChunk.MAX_CHUNK_LEN, left); + _encoder.encodeAndWriteChunk(_outputBuffer, offset, chunkLen, _outputStream); + offset += chunkLen; + left -= chunkLen; + } while (left > 0); + } + + protected void checkNotClosed() throws IOException { + if (_outputStreamClosed) { + throw new IOException(getClass().getName() + " already closed"); + } + } +} diff --git a/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/VanillaChunkDecoder.java b/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/VanillaChunkDecoder.java new file mode 100644 index 0000000..89d24ce --- /dev/null +++ b/io-compress-lzf/src/main/java/org/xbib/io/compress/lzf/VanillaChunkDecoder.java @@ -0,0 +1,267 @@ +package org.xbib.io.compress.lzf; + +import java.io.IOException; +import java.io.InputStream; + +/** + * Safe {@link ChunkDecoder} implementation that can be used on any platform. + */ +public class VanillaChunkDecoder extends ChunkDecoder { + + public VanillaChunkDecoder() { + } + + @Override + public final int decodeChunk(final InputStream is, final byte[] inputBuffer, final byte[] outputBuffer) + throws IOException { + int bytesInOutput; + /* note: we do NOT read more than 5 bytes because otherwise might need to shuffle bytes + * for output buffer (could perhaps optimize in future?) + */ + int bytesRead = readHeader(is, inputBuffer); + if ((bytesRead < HEADER_BYTES) + || inputBuffer[0] != LZFChunk.BYTE_Z || inputBuffer[1] != LZFChunk.BYTE_V) { + if (bytesRead == 0) { // probably fine, clean EOF + return -1; + } + throw new IOException("Corrupt input data, block did not start with 2 byte signature ('ZV') followed by type byte, 2-byte length)"); + } + int type = inputBuffer[2]; + int compLen = uint16(inputBuffer, 3); + if (type == LZFChunk.BLOCK_TYPE_NON_COMPRESSED) { // uncompressed + readFully(is, false, outputBuffer, 0, compLen); + bytesInOutput = compLen; + } else { // compressed + readFully(is, true, inputBuffer, 0, 2 + compLen); // first 2 bytes are uncompressed length + int uncompLen = uint16(inputBuffer, 0); + decodeChunk(inputBuffer, 2, outputBuffer, 0, uncompLen); + bytesInOutput = uncompLen; + } + return bytesInOutput; + } + + @Override + public final void decodeChunk(byte[] in, int inPos, byte[] out, int outPos, int outEnd) + throws IOException { + do { + int ctrl = in[inPos++] & 255; + if (ctrl < LZFChunk.MAX_LITERAL) { // literal run + switch (ctrl) { + case 31: + out[outPos++] = in[inPos++]; + case 30: + out[outPos++] = in[inPos++]; + case 29: + out[outPos++] = in[inPos++]; + case 28: + out[outPos++] = in[inPos++]; + case 27: + out[outPos++] = in[inPos++]; + case 26: + out[outPos++] = in[inPos++]; + case 25: + out[outPos++] = in[inPos++]; + case 24: + out[outPos++] = in[inPos++]; + case 23: + out[outPos++] = in[inPos++]; + case 22: + out[outPos++] = in[inPos++]; + case 21: + out[outPos++] = in[inPos++]; + case 20: + out[outPos++] = in[inPos++]; + case 19: + out[outPos++] = in[inPos++]; + case 18: + out[outPos++] = in[inPos++]; + case 17: + out[outPos++] = in[inPos++]; + case 16: + out[outPos++] = in[inPos++]; + case 15: + out[outPos++] = in[inPos++]; + case 14: + out[outPos++] = in[inPos++]; + case 13: + out[outPos++] = in[inPos++]; + case 12: + out[outPos++] = in[inPos++]; + case 11: + out[outPos++] = in[inPos++]; + case 10: + out[outPos++] = in[inPos++]; + case 9: + out[outPos++] = in[inPos++]; + case 8: + out[outPos++] = in[inPos++]; + case 7: + out[outPos++] = in[inPos++]; + case 6: + out[outPos++] = in[inPos++]; + case 5: + out[outPos++] = in[inPos++]; + case 4: + out[outPos++] = in[inPos++]; + case 3: + out[outPos++] = in[inPos++]; + case 2: + out[outPos++] = in[inPos++]; + case 1: + out[outPos++] = in[inPos++]; + case 0: + out[outPos++] = in[inPos++]; + } + continue; + } + // back reference + int len = ctrl >> 5; + ctrl = -((ctrl & 0x1f) << 8) - 1; + if (len < 7) { // 2 bytes; length of 3 - 8 bytes + ctrl -= in[inPos++] & 255; + out[outPos] = out[outPos++ + ctrl]; + out[outPos] = out[outPos++ + ctrl]; + switch (len) { + case 6: + out[outPos] = out[outPos++ + ctrl]; + case 5: + out[outPos] = out[outPos++ + ctrl]; + case 4: + out[outPos] = out[outPos++ + ctrl]; + case 3: + out[outPos] = out[outPos++ + ctrl]; + case 2: + out[outPos] = out[outPos++ + ctrl]; + case 1: + out[outPos] = out[outPos++ + ctrl]; + } + continue; + } + + // long version (3 bytes, length of up to 264 bytes) + len = in[inPos++] & 255; + ctrl -= in[inPos++] & 255; + + // First: if there is no overlap, can just use arraycopy: + if ((ctrl + len) < -9) { + len += 9; + if (len <= 32) { + copyUpTo32WithSwitch(out, outPos + ctrl, out, outPos, len - 1); + } else { + System.arraycopy(out, outPos + ctrl, out, outPos, len); + } + outPos += len; + continue; + } + + // otherwise manual copy: so first just copy 9 bytes we know are needed + out[outPos] = out[outPos++ + ctrl]; + out[outPos] = out[outPos++ + ctrl]; + out[outPos] = out[outPos++ + ctrl]; + out[outPos] = out[outPos++ + ctrl]; + out[outPos] = out[outPos++ + ctrl]; + out[outPos] = out[outPos++ + ctrl]; + out[outPos] = out[outPos++ + ctrl]; + out[outPos] = out[outPos++ + ctrl]; + out[outPos] = out[outPos++ + ctrl]; + + // then loop + // Odd: after extensive profiling, looks like magic number + // for unrolling is 4: with 8 performance is worse (even + // bit less than with no unrolling). + len += outPos; + final int end = len - 3; + while (outPos < end) { + out[outPos] = out[outPos++ + ctrl]; + out[outPos] = out[outPos++ + ctrl]; + out[outPos] = out[outPos++ + ctrl]; + out[outPos] = out[outPos++ + ctrl]; + } + switch (len - outPos) { + case 3: + out[outPos] = out[outPos++ + ctrl]; + case 2: + out[outPos] = out[outPos++ + ctrl]; + case 1: + out[outPos] = out[outPos++ + ctrl]; + } + } while (outPos < outEnd); + + // sanity check to guard against corrupt data: + if (outPos != outEnd) { + throw new IOException("Corrupt data: overrun in decompress, input offset " + inPos + ", output offset " + outPos); + } + } + + protected static void copyUpTo32WithSwitch(byte[] in, int inPos, byte[] out, int outPos, + int lengthMinusOne) { + switch (lengthMinusOne) { + case 31: + out[outPos++] = in[inPos++]; + case 30: + out[outPos++] = in[inPos++]; + case 29: + out[outPos++] = in[inPos++]; + case 28: + out[outPos++] = in[inPos++]; + case 27: + out[outPos++] = in[inPos++]; + case 26: + out[outPos++] = in[inPos++]; + case 25: + out[outPos++] = in[inPos++]; + case 24: + out[outPos++] = in[inPos++]; + case 23: + out[outPos++] = in[inPos++]; + case 22: + out[outPos++] = in[inPos++]; + case 21: + out[outPos++] = in[inPos++]; + case 20: + out[outPos++] = in[inPos++]; + case 19: + out[outPos++] = in[inPos++]; + case 18: + out[outPos++] = in[inPos++]; + case 17: + out[outPos++] = in[inPos++]; + case 16: + out[outPos++] = in[inPos++]; + case 15: + out[outPos++] = in[inPos++]; + case 14: + out[outPos++] = in[inPos++]; + case 13: + out[outPos++] = in[inPos++]; + case 12: + out[outPos++] = in[inPos++]; + case 11: + out[outPos++] = in[inPos++]; + case 10: + out[outPos++] = in[inPos++]; + case 9: + out[outPos++] = in[inPos++]; + case 8: + out[outPos++] = in[inPos++]; + case 7: + out[outPos++] = in[inPos++]; + case 6: + out[outPos++] = in[inPos++]; + case 5: + out[outPos++] = in[inPos++]; + case 4: + out[outPos++] = in[inPos++]; + case 3: + out[outPos++] = in[inPos++]; + case 2: + out[outPos++] = in[inPos++]; + case 1: + out[outPos++] = in[inPos++]; + case 0: + out[outPos] = in[inPos]; + default: + break; + } + } +} diff --git a/io-compress-lzf/src/test/java/org/xbib/io/compress/lzf/LZFTest.java b/io-compress-lzf/src/test/java/org/xbib/io/compress/lzf/LZFTest.java new file mode 100644 index 0000000..2a6ec49 --- /dev/null +++ b/io-compress-lzf/src/test/java/org/xbib/io/compress/lzf/LZFTest.java @@ -0,0 +1,25 @@ +package org.xbib.io.compress.lzf; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import org.junit.jupiter.api.Test; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; + +public class LZFTest { + + @Test + public void testHelloWorld() throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + LZFOutputStream zOut = new LZFOutputStream(out); + ObjectOutputStream objOut = new ObjectOutputStream(zOut); + String helloWorld = "Hello World!"; + objOut.writeObject(helloWorld); + zOut.close(); + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); + LZFInputStream zIn = new LZFInputStream(in); + ObjectInputStream objIn = new ObjectInputStream(zIn); + assertEquals("Hello World!", objIn.readObject()); + } +} diff --git a/io-compress-xz/src/main/java/module-info.java b/io-compress-xz/src/main/java/module-info.java new file mode 100644 index 0000000..834968b --- /dev/null +++ b/io-compress-xz/src/main/java/module-info.java @@ -0,0 +1,3 @@ +module org.xbib.io.compress.xz { + exports org.xbib.io.compress.xz; +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/ARMOptions.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/ARMOptions.java new file mode 100644 index 0000000..6fd7793 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/ARMOptions.java @@ -0,0 +1,28 @@ +package org.xbib.io.compress.xz; + +import org.xbib.io.compress.xz.simple.ARM; + +import java.io.InputStream; + +/** + * BCJ filter for little endian ARM instructions. + */ +public class ARMOptions extends BCJOptions { + private static final int ALIGNMENT = 4; + + public ARMOptions() { + super(ALIGNMENT); + } + + public FinishableOutputStream getOutputStream(FinishableOutputStream out) { + return new SimpleOutputStream(out, new ARM(true, startOffset)); + } + + public InputStream getInputStream(InputStream in) { + return new SimpleInputStream(in, new ARM(false, startOffset)); + } + + FilterEncoder getFilterEncoder() { + return new BCJEncoder(this, BCJCoder.ARM_FILTER_ID); + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/ARMThumbOptions.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/ARMThumbOptions.java new file mode 100644 index 0000000..dac0efc --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/ARMThumbOptions.java @@ -0,0 +1,28 @@ +package org.xbib.io.compress.xz; + +import org.xbib.io.compress.xz.simple.ARMThumb; + +import java.io.InputStream; + +/** + * BCJ filter for little endian ARM-Thumb instructions. + */ +public class ARMThumbOptions extends BCJOptions { + private static final int ALIGNMENT = 2; + + public ARMThumbOptions() { + super(ALIGNMENT); + } + + public FinishableOutputStream getOutputStream(FinishableOutputStream out) { + return new SimpleOutputStream(out, new ARMThumb(true, startOffset)); + } + + public InputStream getInputStream(InputStream in) { + return new SimpleInputStream(in, new ARMThumb(false, startOffset)); + } + + FilterEncoder getFilterEncoder() { + return new BCJEncoder(this, BCJCoder.ARMTHUMB_FILTER_ID); + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/BCJCoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/BCJCoder.java new file mode 100644 index 0000000..ca8852a --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/BCJCoder.java @@ -0,0 +1,26 @@ +package org.xbib.io.compress.xz; + +abstract class BCJCoder implements FilterCoder { + public static final long X86_FILTER_ID = 0x04; + public static final long POWERPC_FILTER_ID = 0x05; + public static final long IA64_FILTER_ID = 0x06; + public static final long ARM_FILTER_ID = 0x07; + public static final long ARMTHUMB_FILTER_ID = 0x08; + public static final long SPARC_FILTER_ID = 0x09; + + public static boolean isBCJFilterID(long filterID) { + return filterID >= 0x04 && filterID <= 0x09; + } + + public boolean changesSize() { + return false; + } + + public boolean nonLastOK() { + return true; + } + + public boolean lastOK() { + return false; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/BCJDecoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/BCJDecoder.java new file mode 100644 index 0000000..fd219f9 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/BCJDecoder.java @@ -0,0 +1,62 @@ +package org.xbib.io.compress.xz; + +import org.xbib.io.compress.xz.simple.ARM; +import org.xbib.io.compress.xz.simple.ARMThumb; +import org.xbib.io.compress.xz.simple.IA64; +import org.xbib.io.compress.xz.simple.PowerPC; +import org.xbib.io.compress.xz.simple.SPARC; +import org.xbib.io.compress.xz.simple.SimpleFilter; +import org.xbib.io.compress.xz.simple.X86; + +import java.io.InputStream; + +class BCJDecoder extends BCJCoder implements FilterDecoder { + private final long filterID; + private final int startOffset; + + BCJDecoder(long filterID, byte[] props) + throws UnsupportedOptionsException { + assert isBCJFilterID(filterID); + this.filterID = filterID; + + if (props.length == 0) { + startOffset = 0; + } else if (props.length == 4) { + int n = 0; + for (int i = 0; i < 4; ++i) { + n |= (props[i] & 0xFF) << (i * 8); + } + + startOffset = n; + } else { + throw new UnsupportedOptionsException( + "Unsupported BCJ filter properties"); + } + } + + public int getMemoryUsage() { + return SimpleInputStream.getMemoryUsage(); + } + + public InputStream getInputStream(InputStream in) { + SimpleFilter simpleFilter = null; + + if (filterID == X86_FILTER_ID) { + simpleFilter = new X86(false, startOffset); + } else if (filterID == POWERPC_FILTER_ID) { + simpleFilter = new PowerPC(false, startOffset); + } else if (filterID == IA64_FILTER_ID) { + simpleFilter = new IA64(false, startOffset); + } else if (filterID == ARM_FILTER_ID) { + simpleFilter = new ARM(false, startOffset); + } else if (filterID == ARMTHUMB_FILTER_ID) { + simpleFilter = new ARMThumb(false, startOffset); + } else if (filterID == SPARC_FILTER_ID) { + simpleFilter = new SPARC(false, startOffset); + } else { + assert false; + } + + return new SimpleInputStream(in, simpleFilter); + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/BCJEncoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/BCJEncoder.java new file mode 100644 index 0000000..bb2ca17 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/BCJEncoder.java @@ -0,0 +1,40 @@ +package org.xbib.io.compress.xz; + +class BCJEncoder extends BCJCoder implements FilterEncoder { + private final BCJOptions options; + private final long filterID; + private final byte[] props; + + BCJEncoder(BCJOptions options, long filterID) { + assert isBCJFilterID(filterID); + int startOffset = options.getStartOffset(); + + if (startOffset == 0) { + props = new byte[0]; + } else { + props = new byte[4]; + for (int i = 0; i < 4; ++i) { + props[i] = (byte) (startOffset >>> (i * 8)); + } + } + + this.filterID = filterID; + this.options = (BCJOptions) options.clone(); + } + + public long getFilterID() { + return filterID; + } + + public byte[] getFilterProps() { + return props; + } + + public boolean supportsFlushing() { + return false; + } + + public FinishableOutputStream getOutputStream(FinishableOutputStream out) { + return options.getOutputStream(out); + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/BCJOptions.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/BCJOptions.java new file mode 100644 index 0000000..f5315cd --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/BCJOptions.java @@ -0,0 +1,44 @@ +package org.xbib.io.compress.xz; + +/** + * + */ +abstract class BCJOptions extends FilterOptions { + private final int alignment; + int startOffset = 0; + + BCJOptions(int alignment) { + this.alignment = alignment; + } + + public void setStartOffset(int startOffset) + throws UnsupportedOptionsException { + if ((startOffset & (alignment - 1)) != 0) { + throw new UnsupportedOptionsException( + "Start offset must be a multiple of " + alignment); + } + + this.startOffset = startOffset; + } + + public int getStartOffset() { + return startOffset; + } + + public int getEncoderMemoryUsage() { + return SimpleOutputStream.getMemoryUsage(); + } + + public int getDecoderMemoryUsage() { + return SimpleInputStream.getMemoryUsage(); + } + + public Object clone() { + try { + return super.clone(); + } catch (CloneNotSupportedException e) { + assert false; + throw new RuntimeException(); + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/BlockInputStream.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/BlockInputStream.java new file mode 100644 index 0000000..5d52101 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/BlockInputStream.java @@ -0,0 +1,293 @@ +package org.xbib.io.compress.xz; + +import org.xbib.io.compress.xz.check.Check; +import org.xbib.io.compress.xz.common.DecoderUtil; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; + +/** + * + */ +class BlockInputStream extends InputStream { + private final InputStream in; + private final DataInputStream inData; + private final CountingInputStream inCounted; + private InputStream filterChain; + private final Check check; + + private long uncompressedSizeInHeader = -1; + private long compressedSizeInHeader = -1; + private long compressedSizeLimit; + private final int headerSize; + private long uncompressedSize = 0; + private boolean endReached = false; + + public BlockInputStream(InputStream in, Check check, int memoryLimit, + long unpaddedSizeInIndex, + long uncompressedSizeInIndex) + throws IOException, IndexIndicatorException { + this.in = in; + this.check = check; + inData = new DataInputStream(in); + + byte[] buf = new byte[DecoderUtil.BLOCK_HEADER_SIZE_MAX]; + + // Block Header Size or Index Indicator + inData.readFully(buf, 0, 1); + + // See if this begins the Index field. + if (buf[0] == 0x00) { + throw new IndexIndicatorException(); + } + + // Read the rest of the Block Header. + headerSize = 4 * ((buf[0] & 0xFF) + 1); + inData.readFully(buf, 1, headerSize - 1); + + // Validate the CRC32. + if (!DecoderUtil.isCRC32Valid(buf, 0, headerSize - 4, headerSize - 4)) { + throw new CorruptedInputException("XZ Block Header is corrupt"); + } + + // Check for reserved bits in Block Flags. + if ((buf[1] & 0x3C) != 0) { + throw new UnsupportedOptionsException( + "Unsupported options in XZ Block Header"); + } + + // Memory for the Filter Flags field + int filterCount = (buf[1] & 0x03) + 1; + long[] filterIDs = new long[filterCount]; + byte[][] filterProps = new byte[filterCount][]; + + // Use a stream to parse the fields after the Block Flags field. + // Exclude the CRC32 field at the end. + ByteArrayInputStream bufStream = new ByteArrayInputStream( + buf, 2, headerSize - 6); + + try { + // Set the maximum valid compressed size. This is overriden + // by the value from the Compressed Size field if it is present. + compressedSizeLimit = (DecoderUtil.VLI_MAX & ~3) + - headerSize - check.getSize(); + + // Decode and validate Compressed Size if the relevant flag + // is set in Block Flags. + if ((buf[1] & 0x40) != 0x00) { + compressedSizeInHeader = DecoderUtil.decodeVLI(bufStream); + + if (compressedSizeInHeader == 0 + || compressedSizeInHeader > compressedSizeLimit) { + throw new CorruptedInputException(); + } + + compressedSizeLimit = compressedSizeInHeader; + } + + // Decode Uncompressed Size if the relevant flag is set + // in Block Flags. + if ((buf[1] & 0x80) != 0x00) { + uncompressedSizeInHeader = DecoderUtil.decodeVLI(bufStream); + } + + // Decode Filter Flags. + for (int i = 0; i < filterCount; ++i) { + filterIDs[i] = DecoderUtil.decodeVLI(bufStream); + + long filterPropsSize = DecoderUtil.decodeVLI(bufStream); + if (filterPropsSize > bufStream.available()) { + throw new CorruptedInputException(); + } + + filterProps[i] = new byte[(int) filterPropsSize]; + bufStream.read(filterProps[i]); + } + + } catch (IOException e) { + throw new CorruptedInputException("XZ Block Header is corrupt"); + } + + // Check that the remaining bytes are zero. + for (int i = bufStream.available(); i > 0; --i) { + if (bufStream.read() != 0x00) { + throw new UnsupportedOptionsException( + "Unsupported options in XZ Block Header"); + } + } + + // Validate the Blcok Header against the Index when doing + // random access reading. + if (unpaddedSizeInIndex != -1) { + // Compressed Data must be at least one byte, so if Block Header + // and Check alone take as much or more space than the size + // stored in the Index, the file is corrupt. + int headerAndCheckSize = headerSize + check.getSize(); + if (headerAndCheckSize >= unpaddedSizeInIndex) { + throw new CorruptedInputException( + "XZ Index does not match a Block Header"); + } + + // The compressed size calculated from Unpadded Size must + // match the value stored in the Compressed Size field in + // the Block Header. + long compressedSizeFromIndex + = unpaddedSizeInIndex - headerAndCheckSize; + if (compressedSizeFromIndex > compressedSizeLimit + || (compressedSizeInHeader != -1 + && compressedSizeInHeader != compressedSizeFromIndex)) { + throw new CorruptedInputException( + "XZ Index does not match a Block Header"); + } + + // The uncompressed size stored in the Index must match + // the value stored in the Uncompressed Size field in + // the Block Header. + if (uncompressedSizeInHeader != -1 + && uncompressedSizeInHeader != uncompressedSizeInIndex) { + throw new CorruptedInputException( + "XZ Index does not match a Block Header"); + } + + // For further validation, pretend that the values from the Index + // were stored in the Block Header. + compressedSizeLimit = compressedSizeFromIndex; + compressedSizeInHeader = compressedSizeFromIndex; + uncompressedSizeInHeader = uncompressedSizeInIndex; + } + + // Check if the Filter IDs are supported, decode + // the Filter Properties, and check that they are + // supported by this decoder implementation. + FilterDecoder[] filters = new FilterDecoder[filterIDs.length]; + + for (int i = 0; i < filters.length; ++i) { + if (filterIDs[i] == LZMA2Coder.FILTER_ID) { + filters[i] = new LZMA2Decoder(filterProps[i]); + } else if (filterIDs[i] == DeltaCoder.FILTER_ID) { + filters[i] = new DeltaDecoder(filterProps[i]); + } else if (BCJDecoder.isBCJFilterID(filterIDs[i])) { + filters[i] = new BCJDecoder(filterIDs[i], filterProps[i]); + } else { + throw new UnsupportedOptionsException( + "Unknown Filter ID " + filterIDs[i]); + } + } + + RawCoder.validate(filters); + + // Check the memory usage limit. + if (memoryLimit >= 0) { + int memoryNeeded = 0; + for (int i = 0; i < filters.length; ++i) { + memoryNeeded += filters[i].getMemoryUsage(); + } + + if (memoryNeeded > memoryLimit) { + throw new MemoryLimitException(memoryNeeded, memoryLimit); + } + } + + // Use an input size counter to calculate + // the size of the Compressed Data field. + inCounted = new CountingInputStream(in); + + // Initialize the filter chain. + filterChain = inCounted; + for (int i = filters.length - 1; i >= 0; --i) { + filterChain = filters[i].getInputStream(filterChain); + } + } + + public int read() throws IOException { + byte[] buf = new byte[1]; + return read(buf, 0, 1) == -1 ? -1 : (buf[0] & 0xFF); + } + + public int read(byte[] buf, int off, int len) throws IOException { + if (endReached) { + return -1; + } + + int ret = filterChain.read(buf, off, len); + + if (ret > 0) { + check.update(buf, off, ret); + uncompressedSize += ret; + + // Catch invalid values. + long compressedSize = inCounted.getSize(); + if (compressedSize < 0 + || compressedSize > compressedSizeLimit + || uncompressedSize < 0 + || (uncompressedSizeInHeader != -1 + && uncompressedSize > uncompressedSizeInHeader)) { + throw new CorruptedInputException(); + } + + // Check the Block integrity as soon as possible: + // - The filter chain shouldn't return less than requested + // unless it hit the end of the input. + // - If the uncompressed size is known, we know when there + // shouldn't be more data coming. We still need to read + // one byte to let the filter chain catch errors and to + // let it read end of payload marker(s). + if (ret < len || uncompressedSize == uncompressedSizeInHeader) { + if (filterChain.read() != -1) { + throw new CorruptedInputException(); + } + + validate(); + endReached = true; + } + } else if (ret == -1) { + validate(); + endReached = true; + } + + return ret; + } + + private void validate() throws IOException { + long compressedSize = inCounted.getSize(); + + // Validate Compressed Size and Uncompressed Size if they were + // present in Block Header. + if ((compressedSizeInHeader != -1 + && compressedSizeInHeader != compressedSize) + || (uncompressedSizeInHeader != -1 + && uncompressedSizeInHeader != uncompressedSize)) { + throw new CorruptedInputException(); + } + + // Block Padding bytes must be zeros. + while ((compressedSize++ & 3) != 0) { + if (inData.readUnsignedByte() != 0x00) { + throw new CorruptedInputException(); + } + } + + // Validate the integrity check. + byte[] storedCheck = new byte[check.getSize()]; + inData.readFully(storedCheck); + if (!Arrays.equals(check.finish(), storedCheck)) { + throw new CorruptedInputException("Integrity check (" + + check.getName() + ") does not match"); + } + } + + public int available() throws IOException { + return filterChain.available(); + } + + public long getUnpaddedSize() { + return headerSize + inCounted.getSize() + check.getSize(); + } + + public long getUncompressedSize() { + return uncompressedSize; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/BlockOutputStream.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/BlockOutputStream.java new file mode 100644 index 0000000..3119c89 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/BlockOutputStream.java @@ -0,0 +1,131 @@ + +package org.xbib.io.compress.xz; + +import org.xbib.io.compress.xz.check.Check; +import org.xbib.io.compress.xz.common.EncoderUtil; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +class BlockOutputStream extends FinishableOutputStream { + private final OutputStream out; + private final CountingOutputStream outCounted; + private FinishableOutputStream filterChain; + private final Check check; + + private final int headerSize; + private final long compressedSizeLimit; + private long uncompressedSize = 0; + + public BlockOutputStream(OutputStream out, FilterEncoder[] filters, + Check check) throws IOException { + this.out = out; + this.check = check; + + // Initialize the filter chain. + outCounted = new CountingOutputStream(out); + filterChain = outCounted; + for (int i = filters.length - 1; i >= 0; --i) { + filterChain = filters[i].getOutputStream(filterChain); + } + + // Prepare to encode the Block Header field. + ByteArrayOutputStream bufStream = new ByteArrayOutputStream(); + + // Write a dummy Block Header Size field. The real value is written + // once everything else except CRC32 has been written. + bufStream.write(0x00); + + // Write Block Flags. Storing Compressed Size or Uncompressed Size + // isn't supported for now. + bufStream.write(filters.length - 1); + + // List of Filter Flags + for (int i = 0; i < filters.length; ++i) { + EncoderUtil.encodeVLI(bufStream, filters[i].getFilterID()); + byte[] filterProps = filters[i].getFilterProps(); + EncoderUtil.encodeVLI(bufStream, filterProps.length); + bufStream.write(filterProps); + } + + // Header Padding + while ((bufStream.size() & 3) != 0) { + bufStream.write(0x00); + } + + byte[] buf = bufStream.toByteArray(); + + // Total size of the Block Header: Take the size of the CRC32 field + // into account. + headerSize = buf.length + 4; + + // This is just a sanity check. + if (headerSize > EncoderUtil.BLOCK_HEADER_SIZE_MAX) { + throw new UnsupportedOptionsException(); + } + + // Block Header Size + buf[0] = (byte) (buf.length / 4); + + // Write the Block Header field to the output stream. + out.write(buf); + EncoderUtil.writeCRC32(out, buf); + + // Calculate the maximum allowed size of the Compressed Data field. + // It is hard to exceed it so this is mostly to be pedantic. + compressedSizeLimit = (EncoderUtil.VLI_MAX & ~3) + - headerSize - check.getSize(); + } + + public void write(int b) throws IOException { + byte[] buf = new byte[1]; + buf[0] = (byte) b; + write(buf, 0, 1); + } + + public void write(byte[] buf, int off, int len) throws IOException { + filterChain.write(buf, off, len); + check.update(buf, off, len); + uncompressedSize += len; + validate(); + } + + public void flush() throws IOException { + filterChain.flush(); + validate(); + } + + public void finish() throws IOException { + // Finish the Compressed Data field. + filterChain.finish(); + validate(); + + // Block Padding + for (long i = outCounted.getSize(); (i & 3) != 0; ++i) { + out.write(0x00); + } + + // Check + out.write(check.finish()); + } + + private void validate() throws IOException { + long compressedSize = outCounted.getSize(); + + // It is very hard to trigger this exception. + // This is just to be pedantic. + if (compressedSize < 0 || compressedSize > compressedSizeLimit + || uncompressedSize < 0) { + throw new XZIOException("XZ Stream has grown too big"); + } + } + + public long getUnpaddedSize() { + return headerSize + outCounted.getSize() + check.getSize(); + } + + public long getUncompressedSize() { + return uncompressedSize; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/CorruptedInputException.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/CorruptedInputException.java new file mode 100644 index 0000000..64147e3 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/CorruptedInputException.java @@ -0,0 +1,28 @@ +package org.xbib.io.compress.xz; + +/** + * Thrown when the compressed input data is corrupt. + * However, it is possible that some or all of the data + * already read from the input stream was corrupt too. + */ +public class CorruptedInputException extends XZIOException { + private static final long serialVersionUID = 3L; + + /** + * Creates a new CorruptedInputException with + * the default error detail message. + */ + public CorruptedInputException() { + super("Compressed data is corrupt"); + } + + /** + * Creates a new CorruptedInputException with + * the specified error detail message. + * + * @param s error detail message + */ + public CorruptedInputException(String s) { + super(s); + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/CountingInputStream.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/CountingInputStream.java new file mode 100644 index 0000000..78a97a7 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/CountingInputStream.java @@ -0,0 +1,38 @@ +package org.xbib.io.compress.xz; + +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; + +/** + * Counts the number of bytes read from an input stream. + */ +class CountingInputStream extends FilterInputStream { + private long size = 0; + + public CountingInputStream(InputStream in) { + super(in); + } + + public int read() throws IOException { + int ret = in.read(); + if (ret != -1 && size >= 0) { + ++size; + } + + return ret; + } + + public int read(byte[] b, int off, int len) throws IOException { + int ret = in.read(b, off, len); + if (ret > 0 && size >= 0) { + size += ret; + } + + return ret; + } + + public long getSize() { + return size; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/CountingOutputStream.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/CountingOutputStream.java new file mode 100644 index 0000000..5c2babc --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/CountingOutputStream.java @@ -0,0 +1,47 @@ +package org.xbib.io.compress.xz; + +import java.io.IOException; +import java.io.OutputStream; + +/** + * Counts the number of bytes written to an output stream. + *

+ * The finish method does nothing. + * This is FinishableOutputStream instead + * of OutputStream solely because it allows + * using this as the output stream for a chain of raw filters. + */ +class CountingOutputStream extends FinishableOutputStream { + private final OutputStream out; + private long size = 0; + + public CountingOutputStream(OutputStream out) { + this.out = out; + } + + public void write(int b) throws IOException { + out.write(b); + if (size >= 0) { + ++size; + } + } + + public void write(byte[] b, int off, int len) throws IOException { + out.write(b, off, len); + if (size >= 0) { + size += len; + } + } + + public void flush() throws IOException { + out.flush(); + } + + public void close() throws IOException { + out.close(); + } + + public long getSize() { + return size; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/DeltaCoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/DeltaCoder.java new file mode 100644 index 0000000..f834649 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/DeltaCoder.java @@ -0,0 +1,17 @@ +package org.xbib.io.compress.xz; + +abstract class DeltaCoder implements FilterCoder { + public static final long FILTER_ID = 0x03; + + public boolean changesSize() { + return false; + } + + public boolean nonLastOK() { + return true; + } + + public boolean lastOK() { + return false; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/DeltaDecoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/DeltaDecoder.java new file mode 100644 index 0000000..3f6c458 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/DeltaDecoder.java @@ -0,0 +1,24 @@ +package org.xbib.io.compress.xz; + +import java.io.InputStream; + +class DeltaDecoder extends DeltaCoder implements FilterDecoder { + private final int distance; + + DeltaDecoder(byte[] props) throws UnsupportedOptionsException { + if (props.length != 1) { + throw new UnsupportedOptionsException( + "Unsupported Delta filter properties"); + } + + distance = (props[0] & 0xFF) + 1; + } + + public int getMemoryUsage() { + return 1; + } + + public InputStream getInputStream(InputStream in) { + return new DeltaInputStream(in, distance); + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/DeltaEncoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/DeltaEncoder.java new file mode 100644 index 0000000..705fda4 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/DeltaEncoder.java @@ -0,0 +1,30 @@ +package org.xbib.io.compress.xz; + +/** + * + */ +class DeltaEncoder extends DeltaCoder implements FilterEncoder { + private final DeltaOptions options; + private final byte[] props = new byte[1]; + + DeltaEncoder(DeltaOptions options) { + props[0] = (byte) (options.getDistance() - 1); + this.options = (DeltaOptions) options.clone(); + } + + public long getFilterID() { + return FILTER_ID; + } + + public byte[] getFilterProps() { + return props; + } + + public boolean supportsFlushing() { + return true; + } + + public FinishableOutputStream getOutputStream(FinishableOutputStream out) { + return options.getOutputStream(out); + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/DeltaInputStream.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/DeltaInputStream.java new file mode 100644 index 0000000..96b8bda --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/DeltaInputStream.java @@ -0,0 +1,138 @@ +package org.xbib.io.compress.xz; + +import org.xbib.io.compress.xz.delta.DeltaDecoder; + +import java.io.IOException; +import java.io.InputStream; + + +/** + * Decodes raw Delta-filtered data (no XZ headers). + * The delta filter doesn't change the size of the data and thus it + * cannot have an end-of-payload marker. It will simply decode until + * its input stream indicates end of input. + */ +public class DeltaInputStream extends InputStream { + /** + * Smallest supported delta calculation distance. + */ + public static final int DISTANCE_MIN = 1; + + /** + * Largest supported delta calculation distance. + */ + public static final int DISTANCE_MAX = 256; + + private InputStream in; + private final DeltaDecoder delta; + + private IOException exception = null; + + /** + * Creates a new Delta decoder with the given delta calculation distance. + * + * @param in input stream from which Delta filtered data + * is read + * @param distance delta calculation distance, must be in the + * range [DISTANCE_MIN, + * DISTANCE_MAX] + */ + public DeltaInputStream(InputStream in, int distance) { + // Check for null because otherwise null isn't detect + // in this constructor. + if (in == null) { + throw new NullPointerException(); + } + + this.in = in; + this.delta = new DeltaDecoder(distance); + } + + /** + * Decode the next byte from this input stream. + * + * @return the next decoded byte, or -1 to indicate + * the end of input on the input stream in + * @throws java.io.IOException may be thrown by in + */ + public int read() throws IOException { + byte[] buf = new byte[1]; + return read(buf, 0, 1) == -1 ? -1 : (buf[0] & 0xFF); + } + + /** + * Decode into an array of bytes. + * This calls in.read(buf, off, len) and defilters the + * returned data. + * + * @param buf target buffer for decoded data + * @param off start offset in buf + * @param len maximum number of bytes to read + * @return number of bytes read, or -1 to indicate + * the end of the input stream in + * @throws XZIOException if the stream has been closed + * @throws java.io.IOException may be thrown by underlaying input + * stream in + */ + public int read(byte[] buf, int off, int len) throws IOException { + if (len == 0) { + return 0; + } + + if (in == null) { + throw new XZIOException("Stream closed"); + } + + if (exception != null) { + throw exception; + } + + int size; + try { + size = in.read(buf, off, len); + } catch (IOException e) { + exception = e; + throw e; + } + + if (size == -1) { + return -1; + } + + delta.decode(buf, off, size); + return size; + } + + /** + * Calls in.available(). + * + * @return the value returned by in.available() + */ + public int available() throws IOException { + if (in == null) { + throw new XZIOException("Stream closed"); + } + + if (exception != null) { + throw exception; + } + + return in.available(); + } + + /** + * Closes the stream and calls in.close(). + * If the stream was already closed, this does nothing. + * + * @throws java.io.IOException if thrown by in.close() + */ + public void close() throws IOException { + if (in != null) { + try { + in.close(); + } finally { + in = null; + } + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/DeltaOptions.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/DeltaOptions.java new file mode 100644 index 0000000..9242092 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/DeltaOptions.java @@ -0,0 +1,92 @@ +package org.xbib.io.compress.xz; + +import java.io.InputStream; + +/** + * Delta filter options. The Delta filter can be used only as a non-last + * filter in the chain, for example Delta + LZMA2. + * Currently only simple byte-wise delta is supported. The only option + * is the delta distance, which you should set to match your data. + * It's not possible to provide a generic default value for it. + * For example, with distance = 2 and eight-byte input + * A1 B1 A2 B3 A3 B5 A4 B7, the output will be A1 B1 01 02 01 02 01 02. + * The Delta filter can be good with uncompressed bitmap images. It can + * also help with PCM audio, although special-purpose compressors like + * FLAC will give much smaller result at much better compression speed. + */ +public class DeltaOptions extends FilterOptions { + /** + * Smallest supported delta calculation distance. + */ + public static final int DISTANCE_MIN = 1; + + /** + * Largest supported delta calculation distance. + */ + public static final int DISTANCE_MAX = 256; + + private int distance = DISTANCE_MIN; + + /** + * Creates new Delta options and sets the delta distance to 1 byte. + */ + public DeltaOptions() { + } + + /** + * Creates new Delta options and sets the distance to the given value. + */ + public DeltaOptions(int distance) throws UnsupportedOptionsException { + setDistance(distance); + } + + /** + * Sets the delta distance in bytes. The new distance must be in + * the range [DISTANCE_MIN, DISTANCE_MAX]. + */ + public void setDistance(int distance) throws UnsupportedOptionsException { + if (distance < DISTANCE_MIN || distance > DISTANCE_MAX) { + throw new UnsupportedOptionsException( + "Delta distance must be in the range [" + DISTANCE_MIN + + ", " + DISTANCE_MAX + "]: " + distance); + } + + this.distance = distance; + } + + /** + * Gets the delta distance. + */ + public int getDistance() { + return distance; + } + + public int getEncoderMemoryUsage() { + return DeltaOutputStream.getMemoryUsage(); + } + + public FinishableOutputStream getOutputStream(FinishableOutputStream out) { + return new DeltaOutputStream(out, this); + } + + public int getDecoderMemoryUsage() { + return 1; + } + + public InputStream getInputStream(InputStream in) { + return new DeltaInputStream(in, distance); + } + + FilterEncoder getFilterEncoder() { + return new DeltaEncoder(this); + } + + public Object clone() { + try { + return super.clone(); + } catch (CloneNotSupportedException e) { + assert false; + throw new RuntimeException(); + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/DeltaOutputStream.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/DeltaOutputStream.java new file mode 100644 index 0000000..894b75c --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/DeltaOutputStream.java @@ -0,0 +1,115 @@ +package org.xbib.io.compress.xz; + +import org.xbib.io.compress.xz.delta.DeltaEncoder; + +import java.io.IOException; + +/** + * + */ +class DeltaOutputStream extends FinishableOutputStream { + private static final int TMPBUF_SIZE = 4096; + + private FinishableOutputStream out; + private final DeltaEncoder delta; + private final byte[] tmpbuf = new byte[TMPBUF_SIZE]; + + private boolean finished = false; + private IOException exception = null; + + static int getMemoryUsage() { + return 1 + TMPBUF_SIZE / 1024; + } + + DeltaOutputStream(FinishableOutputStream out, DeltaOptions options) { + this.out = out; + delta = new DeltaEncoder(options.getDistance()); + } + + public void write(int b) throws IOException { + byte[] buf = new byte[1]; + buf[0] = (byte) b; + write(buf, 0, 1); + } + + public void write(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) { + throw new IndexOutOfBoundsException(); + } + + if (exception != null) { + throw exception; + } + + if (finished) { + throw new XZIOException("Stream finished"); + } + + try { + while (len > TMPBUF_SIZE) { + delta.encode(buf, off, TMPBUF_SIZE, tmpbuf); + out.write(tmpbuf); + off += TMPBUF_SIZE; + len -= TMPBUF_SIZE; + } + + delta.encode(buf, off, len, tmpbuf); + out.write(tmpbuf, 0, len); + } catch (IOException e) { + exception = e; + throw e; + } + } + + public void flush() throws IOException { + if (exception != null) { + throw exception; + } + + if (finished) { + throw new XZIOException("Stream finished or closed"); + } + + try { + out.flush(); + } catch (IOException e) { + exception = e; + throw e; + } + } + + public void finish() throws IOException { + if (!finished) { + if (exception != null) { + throw exception; + } + + try { + out.finish(); + } catch (IOException e) { + exception = e; + throw e; + } + + finished = true; + } + } + + public void close() throws IOException { + if (out != null) { + try { + out.close(); + } catch (IOException e) { + if (exception == null) { + exception = e; + } + } + + out = null; + } + + if (exception != null) { + throw exception; + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/FilterCoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/FilterCoder.java new file mode 100644 index 0000000..7e2f5c2 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/FilterCoder.java @@ -0,0 +1,11 @@ + +package org.xbib.io.compress.xz; + +interface FilterCoder { + + boolean changesSize(); + + boolean nonLastOK(); + + boolean lastOK(); +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/FilterDecoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/FilterDecoder.java new file mode 100644 index 0000000..6315b21 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/FilterDecoder.java @@ -0,0 +1,13 @@ +package org.xbib.io.compress.xz; + +import java.io.InputStream; + +/** + * + */ +interface FilterDecoder extends FilterCoder { + + int getMemoryUsage(); + + InputStream getInputStream(InputStream in); +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/FilterEncoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/FilterEncoder.java new file mode 100644 index 0000000..4e9407d --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/FilterEncoder.java @@ -0,0 +1,13 @@ + +package org.xbib.io.compress.xz; + +interface FilterEncoder extends FilterCoder { + + long getFilterID(); + + byte[] getFilterProps(); + + boolean supportsFlushing(); + + FinishableOutputStream getOutputStream(FinishableOutputStream out); +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/FilterOptions.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/FilterOptions.java new file mode 100644 index 0000000..700bd43 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/FilterOptions.java @@ -0,0 +1,57 @@ +package org.xbib.io.compress.xz; + +import java.io.IOException; +import java.io.InputStream; + +/** + * Base class for filter-specific options classes. + */ +public abstract class FilterOptions implements Cloneable { + public static int getEncoderMemoryUsage(FilterOptions[] options) { + int m = 0; + for (FilterOptions option : options) { + m += option.getEncoderMemoryUsage(); + } + return m; + } + + public static int getDecoderMemoryUsage(FilterOptions[] options) { + int m = 0; + for (FilterOptions option : options) { + m += option.getDecoderMemoryUsage(); + } + return m; + } + + /** + * Gets how much memory the encoder will need with these options. + */ + public abstract int getEncoderMemoryUsage(); + + /** + * Gets a raw (no XZ headers) encoder output stream using these options. + * Raw streams are an advanced feature. In most cases you want to store + * the compressed data in the .xz container format instead of using + * a raw stream. To use this filter in a .xz file, pass this object + * to XZOutputStream. + */ + public abstract FinishableOutputStream getOutputStream( + FinishableOutputStream out); + + /** + * Gets how much memory the decoder will need to decompress the data + * that was encoded with these options. + */ + public abstract int getDecoderMemoryUsage(); + + /** + * Gets a raw (no XZ headers) decoder input stream using these options. + */ + public abstract InputStream getInputStream(InputStream in) + throws IOException; + + abstract FilterEncoder getFilterEncoder(); + + FilterOptions() { + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/FinishableOutputStream.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/FinishableOutputStream.java new file mode 100644 index 0000000..0cbe249 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/FinishableOutputStream.java @@ -0,0 +1,25 @@ + +package org.xbib.io.compress.xz; + +import java.io.IOException; +import java.io.OutputStream; + +/** + * Output stream that supports finishing without closing + * the underlying stream. + */ +public abstract class FinishableOutputStream extends OutputStream { + /** + * Finish the stream without closing the underlying stream. + * No more data may be written to the stream after finishing. + * The finish method of FinishableOutputStream + * does nothing. Subclasses should override it if they need finishing + * support, which is the case, for example, with compressors. + * + * @throws java.io.IOException + */ + public void finish() throws IOException { + } + + ; +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/FinishableWrapperOutputStream.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/FinishableWrapperOutputStream.java new file mode 100644 index 0000000..129afd4 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/FinishableWrapperOutputStream.java @@ -0,0 +1,62 @@ + +package org.xbib.io.compress.xz; + +import java.io.IOException; +import java.io.OutputStream; + +/** + * Wraps an output stream to a finishable output stream for use with + * raw encoders. This is not needed for XZ compression and thus most + * people will never need this. + */ +public class FinishableWrapperOutputStream extends FinishableOutputStream { + /** + * The {@link java.io.OutputStream OutputStream} that has been + * wrapped into a FinishableWrapperOutputStream. + */ + protected OutputStream out; + + /** + * Creates a new output stream which support finishing. + * The finish() method will do nothing. + */ + public FinishableWrapperOutputStream(OutputStream out) { + this.out = out; + } + + /** + * Calls {@link java.io.OutputStream#write(int) out.write(b)}. + */ + public void write(int b) throws IOException { + out.write(b); + } + + /** + * Calls {@link java.io.OutputStream#write(byte[]) out.write(buf)}. + */ + public void write(byte[] buf) throws IOException { + out.write(buf); + } + + /** + * Calls {@link java.io.OutputStream#write(byte[], int, int) + * out.write(buf, off, len)}. + */ + public void write(byte[] buf, int off, int len) throws IOException { + out.write(buf, off, len); + } + + /** + * Calls {@link java.io.OutputStream#flush() out.flush()}. + */ + public void flush() throws IOException { + out.flush(); + } + + /** + * Calls {@link java.io.OutputStream#close() out.close()}. + */ + public void close() throws IOException { + out.close(); + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/IA64Options.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/IA64Options.java new file mode 100644 index 0000000..3fde7b5 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/IA64Options.java @@ -0,0 +1,29 @@ + +package org.xbib.io.compress.xz; + +import org.xbib.io.compress.xz.simple.IA64; + +import java.io.InputStream; + +/** + * BCJ filter for Itanium (IA-64) instructions. + */ +public class IA64Options extends BCJOptions { + private static final int ALIGNMENT = 16; + + public IA64Options() { + super(ALIGNMENT); + } + + public FinishableOutputStream getOutputStream(FinishableOutputStream out) { + return new SimpleOutputStream(out, new IA64(true, startOffset)); + } + + public InputStream getInputStream(InputStream in) { + return new SimpleInputStream(in, new IA64(false, startOffset)); + } + + FilterEncoder getFilterEncoder() { + return new BCJEncoder(this, BCJCoder.IA64_FILTER_ID); + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/IndexIndicatorException.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/IndexIndicatorException.java new file mode 100644 index 0000000..0b24fba --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/IndexIndicatorException.java @@ -0,0 +1,5 @@ + +package org.xbib.io.compress.xz; + +class IndexIndicatorException extends Exception { +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/LZMA2Coder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/LZMA2Coder.java new file mode 100644 index 0000000..f45836d --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/LZMA2Coder.java @@ -0,0 +1,18 @@ + +package org.xbib.io.compress.xz; + +abstract class LZMA2Coder implements FilterCoder { + public static final long FILTER_ID = 0x21; + + public boolean changesSize() { + return true; + } + + public boolean nonLastOK() { + return false; + } + + public boolean lastOK() { + return true; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/LZMA2Decoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/LZMA2Decoder.java new file mode 100644 index 0000000..1562a39 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/LZMA2Decoder.java @@ -0,0 +1,28 @@ + +package org.xbib.io.compress.xz; + +import java.io.InputStream; + +class LZMA2Decoder extends LZMA2Coder implements FilterDecoder { + private int dictSize; + + LZMA2Decoder(byte[] props) throws UnsupportedOptionsException { + // Up to 1.5 GiB dictionary is supported. The bigger ones + // are too big for int. + if (props.length != 1 || (props[0] & 0xFF) > 37) { + throw new UnsupportedOptionsException( + "Unsupported LZMA2 properties"); + } + + dictSize = 2 | (props[0] & 1); + dictSize <<= (props[0] >>> 1) + 11; + } + + public int getMemoryUsage() { + return LZMA2InputStream.getMemoryUsage(dictSize); + } + + public InputStream getInputStream(InputStream in) { + return new LZMA2InputStream(in, dictSize); + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/LZMA2Encoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/LZMA2Encoder.java new file mode 100644 index 0000000..d3254c5 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/LZMA2Encoder.java @@ -0,0 +1,46 @@ + +package org.xbib.io.compress.xz; + +import org.xbib.io.compress.xz.lzma.LZMAEncoder; + +class LZMA2Encoder extends LZMA2Coder implements FilterEncoder { + private final LZMA2Options options; + private final byte[] props = new byte[1]; + + LZMA2Encoder(LZMA2Options options) { + if (options.getPresetDict() != null) { + throw new IllegalArgumentException( + "XZ doesn't support a preset dictionary for now"); + } + + if (options.getMode() == LZMA2Options.MODE_UNCOMPRESSED) { + props[0] = (byte) 0; + } else { + int d = Math.max(options.getDictSize(), LZMA2Options.DICT_SIZE_MIN); + props[0] = (byte) (LZMAEncoder.getDistSlot(d - 1) - 23); + } + + // Make a private copy so that the caller is free to change its copy. + this.options = (LZMA2Options) options.clone(); + } + + @Override + public long getFilterID() { + return FILTER_ID; + } + + @Override + public byte[] getFilterProps() { + return props; + } + + @Override + public boolean supportsFlushing() { + return true; + } + + @Override + public FinishableOutputStream getOutputStream(FinishableOutputStream out) { + return options.getOutputStream(out); + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/LZMA2InputStream.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/LZMA2InputStream.java new file mode 100644 index 0000000..1a5b91e --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/LZMA2InputStream.java @@ -0,0 +1,334 @@ +package org.xbib.io.compress.xz; + +import org.xbib.io.compress.xz.lz.LZDecoder; +import org.xbib.io.compress.xz.lzma.LZMADecoder; +import org.xbib.io.compress.xz.rangecoder.RangeDecoder; + +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; + +/** + * Decompresses a raw LZMA2 stream (no XZ headers). + */ +public class LZMA2InputStream extends InputStream { + /** + * Smallest valid LZMA2 dictionary size. + * Very tiny dictionaries would be a performance problem, so + * the minimum is 4 KiB. + */ + public static final int DICT_SIZE_MIN = 4096; + + /** + * Largest dictionary size supported by this implementation. + * The LZMA2 algorithm allows dictionaries up to one byte less than 4 GiB. + * This implementation supports only 16 bytes less than 2 GiB for raw + * LZMA2 streams, and for .xz files the maximum is 1.5 GiB. This + * limitation is due to Java using signed 32-bit integers for array + * indexing. The limitation shouldn't matter much in practice since so + * huge dictionaries are not normally used. + */ + public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15; + + private static final int COMPRESSED_SIZE_MAX = 1 << 16; + + private DataInputStream in; + + private final LZDecoder lz; + private final RangeDecoder rc = new RangeDecoder(COMPRESSED_SIZE_MAX); + private LZMADecoder lzma; + + private int uncompressedSize = 0; + private boolean isLZMAChunk; + + private boolean needDictReset = true; + private boolean needProps = true; + private boolean endReached = false; + + private IOException exception = null; + + /** + * Gets approximate decompressor memory requirements as kibibytes for + * the given dictionary size. + * + * @param dictSize LZMA2 dictionary size as bytes, must be + * in the range [DICT_SIZE_MIN, + * DICT_SIZE_MAX] + * @return approximate memory requirements as kibibytes (KiB) + */ + public static int getMemoryUsage(int dictSize) { + // The base state is aroudn 30-40 KiB (probabilities etc.), + // range decoder needs COMPRESSED_SIZE_MAX bytes for buffering, + // and LZ decoder needs a dictionary buffer. + return 40 + COMPRESSED_SIZE_MAX / 1024 + getDictSize(dictSize) / 1024; + } + + private static int getDictSize(int dictSize) { + if (dictSize < DICT_SIZE_MIN || dictSize > DICT_SIZE_MAX) { + throw new IllegalArgumentException( + "Unsupported dictionary size " + dictSize); + } + + // Round dictionary size upward to a multiple of 16. This way LZMA + // can use LZDecoder.getPos() for calculating LZMA's posMask. + // Note that this check is needed only for raw LZMA2 streams; it is + // redundant with .xz. + return (dictSize + 15) & ~15; + } + + /** + * Creates a new input stream that decompresses raw LZMA2 data + * from in. + * The caller needs to know the dictionary size used when compressing; + * the dictionary size isn't stored as part of a raw LZMA2 stream. + * Specifying a too small dictionary size will prevent decompressing + * the stream. Specifying a too big dictionary is waste of memory but + * decompression will work. + * There is no need to specify a dictionary bigger than + * the uncompressed size of the data even if a bigger dictionary + * was used when compressing. If you know the uncompressed size + * of the data, this might allow saving some memory. + * + * @param in input stream from which LZMA2-compressed + * data is read + * @param dictSize LZMA2 dictionary size as bytes, must be + * in the range [DICT_SIZE_MIN, + * DICT_SIZE_MAX] + */ + public LZMA2InputStream(InputStream in, int dictSize) { + this(in, dictSize, null); + } + + /** + * Creates a new LZMA2 decompressor using a preset dictionary. + * This is like LZMAInputStream(InputStream, int) except + * that the dictionary may be initialized using a preset dictionary. + * If a preset dictionary was used when compressing the data, the + * same preset dictionary must be provided when decompressing. + * + * @param in input stream from which LZMA2-compressed + * data is read + * @param dictSize LZMA2 dictionary size as bytes, must be + * in the range [DICT_SIZE_MIN, + * DICT_SIZE_MAX] + * @param presetDict preset dictionary or null + * to use no preset dictionary + */ + public LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict) { + // Check for null because otherwise null isn't detect + // in this constructor. + if (in == null) { + throw new NullPointerException(); + } + + this.in = new DataInputStream(in); + this.lz = new LZDecoder(getDictSize(dictSize), presetDict); + + if (presetDict != null && presetDict.length > 0) { + needDictReset = false; + } + } + + /** + * Decompresses the next byte from this input stream. + * Reading lots of data with read() from this input stream + * may be inefficient. Wrap it in java.io.BufferedInputStream + * if you need to read lots of data one byte at a time. + * + * @return the next decompressed byte, or -1 + * to indicate the end of the compressed stream + * @throws CorruptedInputException + * @throws XZIOException if the stream has been closed + * @throws java.io.IOException may be thrown by in + */ + public int read() throws IOException { + byte[] buf = new byte[1]; + return read(buf, 0, 1) == -1 ? -1 : (buf[0] & 0xFF); + } + + /** + * Decompresses into an array of bytes. + * If len is zero, no bytes are read and 0 + * is returned. Otherwise this will block until len + * bytes have been decompressed, the end of LZMA2 stream is reached, + * or an exception is thrown. + * + * @param buf target buffer for uncompressed data + * @param off start offset in buf + * @param len maximum number of uncompressed bytes to read + * @return number of bytes read, or -1 to indicate + * the end of the compressed stream + * @throws CorruptedInputException + * @throws XZIOException if the stream has been closed + * @throws java.io.IOException may be thrown by in + */ + public int read(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) { + throw new IndexOutOfBoundsException(); + } + + if (len == 0) { + return 0; + } + + if (in == null) { + throw new XZIOException("Stream closed"); + } + + if (exception != null) { + throw exception; + } + + if (endReached) { + return -1; + } + + try { + int size = 0; + + while (len > 0) { + if (uncompressedSize == 0) { + decodeChunkHeader(); + if (endReached) { + return size == 0 ? -1 : size; + } + } + + int copySizeMax = Math.min(uncompressedSize, len); + + if (!isLZMAChunk) { + lz.copyUncompressed(in, copySizeMax); + } else { + lz.setLimit(copySizeMax); + lzma.decode(); + } + + int copiedSize = lz.flush(buf, off); + off += copiedSize; + len -= copiedSize; + size += copiedSize; + uncompressedSize -= copiedSize; + + if (uncompressedSize == 0) { + if (!rc.isFinished() || lz.hasPending()) { + throw new CorruptedInputException(); + } + } + } + + return size; + + } catch (IOException e) { + exception = e; + throw e; + } + } + + private void decodeChunkHeader() throws IOException { + int control = in.readUnsignedByte(); + + if (control == 0x00) { + endReached = true; + return; + } + + if (control >= 0xE0 || control == 0x01) { + needProps = true; + needDictReset = false; + lz.reset(); + } else if (needDictReset) { + throw new CorruptedInputException(); + } + + if (control >= 0x80) { + isLZMAChunk = true; + + uncompressedSize = (control & 0x1F) << 16; + uncompressedSize += in.readUnsignedShort() + 1; + + int compressedSize = in.readUnsignedShort() + 1; + + if (control >= 0xC0) { + needProps = false; + decodeProps(); + + } else if (needProps) { + throw new CorruptedInputException(); + + } else if (control >= 0xA0) { + lzma.reset(); + } + + rc.prepareInputBuffer(in, compressedSize); + + } else if (control > 0x02) { + throw new CorruptedInputException(); + + } else { + isLZMAChunk = false; + uncompressedSize = in.readUnsignedShort() + 1; + } + } + + private void decodeProps() throws IOException { + int props = in.readUnsignedByte(); + + if (props > (4 * 5 + 4) * 9 + 8) { + throw new CorruptedInputException(); + } + + int pb = props / (9 * 5); + props -= pb * 9 * 5; + int lp = props / 9; + int lc = props - lp * 9; + + if (lc + lp > 4) { + throw new CorruptedInputException(); + } + + lzma = new LZMADecoder(lz, rc, lc, lp, pb); + } + + /** + * Returns the number of uncompressed bytes that can be read + * without blocking. The value is returned with an assumption + * that the compressed input data will be valid. If the compressed + * data is corrupt, CorruptedInputException may get + * thrown before the number of bytes claimed to be available have + * been read from this input stream. + * In LZMAInputStream, the return value will be non-zero when the + * decompressor is in the middle of an LZMA2 chunk. The return value + * will then be the number of uncompressed bytes remaining from that + * chunk. + * + * @return the number of uncompressed bytes that can be read + * without blocking + */ + public int available() throws IOException { + if (in == null) { + throw new XZIOException("Stream closed"); + } + + if (exception != null) { + throw exception; + } + + return uncompressedSize; + } + + /** + * Closes the stream and calls in.close(). + * If the stream was already closed, this does nothing. + * + * @throws java.io.IOException if thrown by in.close() + */ + public void close() throws IOException { + if (in != null) { + try { + in.close(); + } finally { + in = null; + } + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/LZMA2Options.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/LZMA2Options.java new file mode 100644 index 0000000..a490770 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/LZMA2Options.java @@ -0,0 +1,542 @@ +package org.xbib.io.compress.xz; + +import org.xbib.io.compress.xz.lz.LZEncoder; +import org.xbib.io.compress.xz.lzma.LZMAEncoder; + +import java.io.IOException; +import java.io.InputStream; + +/** + * LZMA2 compression options. + * While this allows setting the LZMA2 compression options in detail, + * often you only need LZMA2Options() or + * LZMA2Options(int). + */ +public class LZMA2Options extends FilterOptions { + /** + * Minimum valid compression preset level is 0. + */ + public static final int PRESET_MIN = 0; + + /** + * Maximum valid compression preset level is 9. + */ + public static final int PRESET_MAX = 9; + + /** + * Default compression preset level is 6. + */ + public static final int PRESET_DEFAULT = 6; + + /** + * Minimum dictionary size is 4 KiB. + */ + public static final int DICT_SIZE_MIN = 4096; + + /** + * Maximum dictionary size for compression is 768 MiB. + * The decompressor supports bigger dictionaries, up to almost 2 GiB. + * With HC4 the encoder would support dictionaries bigger than 768 MiB. + * The 768 MiB limit comes from the current implementation of BT4 where + * we would otherwise hit the limits of signed ints in array indexing. + * If you really need bigger dictionary for decompression, + * use {@link LZMA2InputStream} directly. + */ + public static final int DICT_SIZE_MAX = 768 << 20; + + /** + * The default dictionary size is 8 MiB. + */ + public static final int DICT_SIZE_DEFAULT = 8 << 20; + + /** + * Maximum value for lc + lp is 4. + */ + public static final int LC_LP_MAX = 4; + + /** + * The default number of literal context bits is 3. + */ + public static final int LC_DEFAULT = 3; + + /** + * The default number of literal position bits is 0. + */ + public static final int LP_DEFAULT = 0; + + /** + * Maximum value for pb is 4. + */ + public static final int PB_MAX = 4; + + /** + * The default number of position bits is 2. + */ + public static final int PB_DEFAULT = 2; + + /** + * Compression mode: uncompressed. + * The data is wrapped into a LZMA2 stream without compression. + */ + public static final int MODE_UNCOMPRESSED = 0; + + /** + * Compression mode: fast. + * This is usually combined with a hash chain match finder. + */ + public static final int MODE_FAST = LZMAEncoder.MODE_FAST; + + /** + * Compression mode: normal. + * This is usually combined with a binary tree match finder. + */ + public static final int MODE_NORMAL = LZMAEncoder.MODE_NORMAL; + + /** + * Minimum value for niceLen is 8. + */ + public static final int NICE_LEN_MIN = 8; + + /** + * Maximum value for niceLen is 273. + */ + public static final int NICE_LEN_MAX = 273; + + /** + * Match finder: Hash Chain 2-3-4 + */ + public static final int MF_HC4 = LZEncoder.MF_HC4; + + /** + * Match finder: Binary tree 2-3-4 + */ + public static final int MF_BT4 = LZEncoder.MF_BT4; + + private static final int[] presetToDictSize = { + 1 << 18, 1 << 20, 1 << 21, 1 << 22, 1 << 22, + 1 << 23, 1 << 23, 1 << 24, 1 << 25, 1 << 26}; + + private static final int[] presetToDepthLimit = {4, 8, 24, 48}; + + private int dictSize; + private byte[] presetDict = null; + private int lc; + private int lp; + private int pb; + private int mode; + private int niceLen; + private int mf; + private int depthLimit; + + /** + * Creates new LZMA2 options and sets them to the default values. + * This is equivalent to LZMA2Options(PRESET_DEFAULT). + */ + public LZMA2Options() { + try { + setPreset(PRESET_DEFAULT); + } catch (UnsupportedOptionsException e) { + assert false; + throw new RuntimeException(); + } + } + + /** + * Creates new LZMA2 options and sets them to the given preset. + * + * @throws UnsupportedOptionsException preset is not supported + */ + public LZMA2Options(int preset) throws UnsupportedOptionsException { + setPreset(preset); + } + + /** + * Creates new LZMA2 options and sets them to the given custom values. + * + * @throws UnsupportedOptionsException unsupported options were specified + */ + public LZMA2Options(int dictSize, int lc, int lp, int pb, int mode, + int niceLen, int mf, int depthLimit) + throws UnsupportedOptionsException { + setDictSize(dictSize); + setLcLp(lc, lp); + setPb(pb); + setMode(mode); + setNiceLen(niceLen); + setMatchFinder(mf); + setDepthLimit(depthLimit); + } + + /** + * Sets the compression options to the given preset. + * The presets 0-3 are fast presets with medium compression. + * The presets 4-6 are fairly slow presets with high compression. + * The default preset (PRESET_DEFAULT) is 6. + * The presets 7-9 are like the preset 6 but use bigger dictionaries + * and have higher compressor and decompressor memory requirements. + * Unless the uncompressed size of the file exceeds 8 MiB, + * 16 MiB, or 32 MiB, it is waste of memory to use the + * presets 7, 8, or 9, respectively. + * + * @throws UnsupportedOptionsException preset is not supported + */ + public void setPreset(int preset) throws UnsupportedOptionsException { + if (preset < 0 || preset > 9) { + throw new UnsupportedOptionsException("Unsupported preset: " + preset); + } + + lc = LC_DEFAULT; + lp = LP_DEFAULT; + pb = PB_DEFAULT; + dictSize = presetToDictSize[preset]; + + if (preset <= 3) { + mode = MODE_FAST; + mf = MF_HC4; + niceLen = preset <= 1 ? 128 : NICE_LEN_MAX; + depthLimit = presetToDepthLimit[preset]; + } else { + mode = MODE_NORMAL; + mf = MF_BT4; + niceLen = (preset == 4) ? 16 : (preset == 5) ? 32 : 64; + depthLimit = 0; + } + } + + /** + * Sets the dictionary size in bytes. + * The dictionary (or history buffer) holds the most recently seen + * uncompressed data. Bigger dictionary usually means better compression. + * However, using a dictioanary bigger than the size of the uncompressed + * data is waste of memory. + * Any value in the range [DICT_SIZE_MIN, DICT_SIZE_MAX] is valid, + * but sizes of 2^n and 2^n + 2^(n-1) bytes are somewhat + * recommended. + * + * @throws UnsupportedOptionsException dictSize is not supported + */ + public void setDictSize(int dictSize) throws UnsupportedOptionsException { + if (dictSize < DICT_SIZE_MIN) { + throw new UnsupportedOptionsException("LZMA2 dictionary size must be at least 4 KiB: " + + dictSize + " B"); + } + if (dictSize > DICT_SIZE_MAX) { + throw new UnsupportedOptionsException("LZMA2 dictionary size must not exceed " + + (DICT_SIZE_MAX >> 20) + " MiB: " + dictSize + " B"); + } + this.dictSize = dictSize; + } + + /** + * Gets the dictionary size in bytes. + */ + public int getDictSize() { + return dictSize; + } + + /** + * Sets a preset dictionary. Use null to disable the use of + * a preset dictionary. By default there is no preset dictionary. + * The .xz format doesn't support a preset dictionary for now. + * Do not set a preset dictionary unless you use raw LZMA2. + * Preset dictionary can be useful when compressing many similar, + * relatively small chunks of data independently from each other. + * A preset dictionary should contain typical strings that occur in + * the files being compressed. The most probable strings should be + * near the end of the preset dictionary. The preset dictionary used + * for compression is also needed for decompression. + */ + public void setPresetDict(byte[] presetDict) { + this.presetDict = presetDict; + } + + /** + * Gets the preset dictionary. + */ + public byte[] getPresetDict() { + return presetDict; + } + + /** + * Sets the number of literal context bits and literal position bits. + * The sum of lc and lp is limited to 4. + * Trying to exceed it will throw an exception. This function lets + * you change both at the same time. + * + * @throws UnsupportedOptionsException lc and lp + * are invalid + */ + public void setLcLp(int lc, int lp) throws UnsupportedOptionsException { + if (lc < 0 || lp < 0 || lc > LC_LP_MAX || lp > LC_LP_MAX + || lc + lp > LC_LP_MAX) { + throw new UnsupportedOptionsException( + "lc + lp must not exceed " + LC_LP_MAX + ": " + + lc + " + " + lp); + } + + this.lc = lc; + this.lp = lp; + } + + /** + * Sets the number of literal context bits. + * All bytes that cannot be encoded as matches are encoded as literals. + * That is, literals are simply 8-bit bytes that are encoded one at + * a time. + * The literal coding makes an assumption that the highest lc + * bits of the previous uncompressed byte correlate with the next byte. + * For example, in typical English text, an upper-case letter is often + * followed by a lower-case letter, and a lower-case letter is usually + * followed by another lower-case letter. In the US-ASCII character set, + * the highest three bits are 010 for upper-case letters and 011 for + * lower-case letters. When lc is at least 3, the literal + * coding can take advantage of this property in the uncompressed data. + * The default value (3) is usually good. If you want maximum compression, + * try setLc(4). Sometimes it helps a little, and sometimes it + * makes compression worse. If it makes it worse, test for example + * setLc(2) too. + * + * @throws UnsupportedOptionsException lc is invalid, or the sum + * of lc and lp + * exceed LC_LP_MAX + */ + public void setLc(int lc) throws UnsupportedOptionsException { + setLcLp(lc, lp); + } + + /** + * Sets the number of literal position bits. + * This affets what kind of alignment in the uncompressed data is + * assumed when encoding literals. See {@link #setPb(int) setPb} for + * more information about alignment. + * + * @throws UnsupportedOptionsException lp is invalid, or the sum + * of lc and lp + * exceed LC_LP_MAX + */ + public void setLp(int lp) throws UnsupportedOptionsException { + setLcLp(lc, lp); + } + + /** + * Gets the number of literal context bits. + */ + public int getLc() { + return lc; + } + + /** + * Gets the number of literal position bits. + */ + public int getLp() { + return lp; + } + + /** + * Sets the number of position bits. + * This affects what kind of alignment in the uncompressed data is + * assumed in general. The default (2) means four-byte alignment + * (2^pb = 2^2 = 4), which is often a good choice when + * there's no better guess. + * When the alignment is known, setting the number of position bits + * accordingly may reduce the file size a little. For example with text + * files having one-byte alignment (US-ASCII, ISO-8859-*, UTF-8), using + * setPb(0) can improve compression slightly. For UTF-16 + * text, setPb(1) is a good choice. If the alignment is + * an odd number like 3 bytes, setPb(0) might be the best + * choice. + * Even though the assumed alignment can be adjusted with + * setPb and setLp, LZMA2 still slightly favors + * 16-byte alignment. It might be worth taking into account when designing + * file formats that are likely to be often compressed with LZMA2. + * + * @throws UnsupportedOptionsException pb is invalid + */ + public void setPb(int pb) throws UnsupportedOptionsException { + if (pb < 0 || pb > PB_MAX) { + throw new UnsupportedOptionsException( + "pb must not exceed " + PB_MAX + ": " + pb); + } + + this.pb = pb; + } + + /** + * Gets the number of position bits. + */ + public int getPb() { + return pb; + } + + /** + * Sets the compression mode. + * This specifies the method to analyze the data produced by + * a match finder. The default is MODE_FAST for presets + * 0-3 and MODE_NORMAL for presets 4-9. + * Usually MODE_FAST is used with Hash Chain match finders + * and MODE_NORMAL with Binary Tree match finders. This is + * also what the presets do. + * The special mode MODE_UNCOMPRESSED doesn't try to + * compress the data at all (and doesn't use a match finder) and will + * simply wrap it in uncompressed LZMA2 chunks. + * + * @throws UnsupportedOptionsException mode is not supported + */ + public void setMode(int mode) throws UnsupportedOptionsException { + if (mode < MODE_UNCOMPRESSED || mode > MODE_NORMAL) { + throw new UnsupportedOptionsException( + "Unsupported compression mode: " + mode); + } + + this.mode = mode; + } + + /** + * Gets the compression mode. + */ + public int getMode() { + return mode; + } + + /** + * Sets the nice length of matches. + * Once a match of at least niceLen bytes is found, + * the algorithm stops looking for better matches. Higher values tend + * to give better compression at the expense of speed. The default + * depends on the preset. + * + * @throws UnsupportedOptionsException niceLen is invalid + */ + public void setNiceLen(int niceLen) throws UnsupportedOptionsException { + if (niceLen < NICE_LEN_MIN) { + throw new UnsupportedOptionsException( + "Minimum nice length of matches is " + + NICE_LEN_MIN + " bytes: " + niceLen); + } + + if (niceLen > NICE_LEN_MAX) { + throw new UnsupportedOptionsException( + "Maximum nice length of matches is " + NICE_LEN_MAX + + ": " + niceLen); + } + + this.niceLen = niceLen; + } + + /** + * Gets the nice length of matches. + */ + public int getNiceLen() { + return niceLen; + } + + /** + * Sets the match finder type. + * Match finder has a major effect on compression speed, memory usage, + * and compression ratio. Usually Hash Chain match finders are faster + * than Binary Tree match finders. The default depends on the preset: + * 0-3 use MF_HC4 and 4-9 use MF_BT4. + * + * @throws UnsupportedOptionsException mf is not supported + */ + public void setMatchFinder(int mf) throws UnsupportedOptionsException { + if (mf != MF_HC4 && mf != MF_BT4) { + throw new UnsupportedOptionsException( + "Unsupported match finder: " + mf); + } + + this.mf = mf; + } + + /** + * Gets the match finder type. + */ + public int getMatchFinder() { + return mf; + } + + /** + * Sets the match finder search depth limit. + * The default is a special value of 0 which indicates that + * the depth limit should be automatically calculated by the selected + * match finder from the nice length of matches. + * Reasonable depth limit for Hash Chain match finders is 4-100 and + * 16-1000 for Binary Tree match finders. Using very high values can + * make the compressor extremely slow with some files. Avoid settings + * higher than 1000 unless you are prepared to interrupt the compression + * in case it is taking far too long. + * + * @throws UnsupportedOptionsException depthLimit is invalid + */ + public void setDepthLimit(int depthLimit) + throws UnsupportedOptionsException { + if (depthLimit < 0) { + throw new UnsupportedOptionsException( + "Depth limit cannot be negative: " + depthLimit); + } + + this.depthLimit = depthLimit; + } + + /** + * Gets the match finder search depth limit. + */ + public int getDepthLimit() { + return depthLimit; + } + + public int getEncoderMemoryUsage() { + return (mode == MODE_UNCOMPRESSED) + ? UncompressedLZMA2OutputStream.getMemoryUsage() + : LZMA2OutputStream.getMemoryUsage(this); + } + + public FinishableOutputStream getOutputStream(FinishableOutputStream out) { + if (mode == MODE_UNCOMPRESSED) { + return new UncompressedLZMA2OutputStream(out); + } + + return new LZMA2OutputStream(out, this); + } + + /** + * Gets how much memory the LZMA2 decoder will need to decompress the data + * that was encoded with these options and stored in a .xz file. + * The returned value may bigger than the value returned by a direct call + * to {@link LZMA2InputStream#getMemoryUsage(int)} if the dictionary size + * is not 2^n or 2^n + 2^(n-1) bytes. This is because the .xz + * headers store the dictionary size in such a format and other values + * are rounded up to the next such value. Such rounding is harmess except + * it might waste some memory if an unsual dictionary size is used. + * If you use raw LZMA2 streams and unusual dictioanary size, call + * {@link LZMA2InputStream#getMemoryUsage} directly to get raw decoder + * memory requirements. + */ + public int getDecoderMemoryUsage() { + // Round the dictionary size up to the next 2^n or 2^n + 2^(n-1). + int d = dictSize - 1; + d |= d >>> 2; + d |= d >>> 3; + d |= d >>> 4; + d |= d >>> 8; + d |= d >>> 16; + return LZMA2InputStream.getMemoryUsage(d + 1); + } + + public InputStream getInputStream(InputStream in) throws IOException { + return new LZMA2InputStream(in, dictSize); + } + + FilterEncoder getFilterEncoder() { + return new LZMA2Encoder(this); + } + + public Object clone() { + try { + return super.clone(); + } catch (CloneNotSupportedException e) { + assert false; + throw new RuntimeException(); + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/LZMA2OutputStream.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/LZMA2OutputStream.java new file mode 100644 index 0000000..7f2d79d --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/LZMA2OutputStream.java @@ -0,0 +1,266 @@ + +package org.xbib.io.compress.xz; + +import org.xbib.io.compress.xz.lz.LZEncoder; +import org.xbib.io.compress.xz.lzma.LZMAEncoder; +import org.xbib.io.compress.xz.rangecoder.RangeEncoder; + +import java.io.DataOutputStream; +import java.io.IOException; + +class LZMA2OutputStream extends FinishableOutputStream { + static final int COMPRESSED_SIZE_MAX = 64 << 10; + + private FinishableOutputStream out; + private final DataOutputStream outData; + + private final LZEncoder lz; + private final RangeEncoder rc; + private final LZMAEncoder lzma; + + private final int props; // Cannot change props on the fly for now. + private boolean dictResetNeeded = true; + private boolean stateResetNeeded = true; + private boolean propsNeeded = true; + + private int pendingSize = 0; + private boolean finished = false; + private IOException exception = null; + + private static int getExtraSizeBefore(int dictSize) { + return COMPRESSED_SIZE_MAX > dictSize + ? COMPRESSED_SIZE_MAX - dictSize : 0; + } + + static int getMemoryUsage(LZMA2Options options) { + // 64 KiB buffer for the range encoder + a little extra + LZMAEncoder + int dictSize = options.getDictSize(); + int extraSizeBefore = getExtraSizeBefore(dictSize); + return 70 + LZMAEncoder.getMemoryUsage(options.getMode(), + dictSize, extraSizeBefore, + options.getMatchFinder()); + } + + LZMA2OutputStream(FinishableOutputStream out, LZMA2Options options) { + if (out == null) { + throw new NullPointerException(); + } + + this.out = out; + outData = new DataOutputStream(out); + rc = new RangeEncoder(COMPRESSED_SIZE_MAX); + + int dictSize = options.getDictSize(); + int extraSizeBefore = getExtraSizeBefore(dictSize); + lzma = LZMAEncoder.getInstance(rc, + options.getLc(), options.getLp(), options.getPb(), + options.getMode(), + dictSize, extraSizeBefore, options.getNiceLen(), + options.getMatchFinder(), options.getDepthLimit()); + + lz = lzma.getLZEncoder(); + + byte[] presetDict = options.getPresetDict(); + if (presetDict != null && presetDict.length > 0) { + lz.setPresetDict(dictSize, presetDict); + dictResetNeeded = false; + } + + props = (options.getPb() * 5 + options.getLp()) * 9 + options.getLc(); + } + + public void write(int b) throws IOException { + byte[] buf = new byte[1]; + buf[0] = (byte) b; + write(buf, 0, 1); + } + + public void write(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) { + throw new IndexOutOfBoundsException(); + } + + if (exception != null) { + throw exception; + } + + if (finished) { + throw new XZIOException("Stream finished or closed"); + } + + try { + while (len > 0) { + int used = lz.fillWindow(buf, off, len); + off += used; + len -= used; + pendingSize += used; + + if (lzma.encodeForLZMA2()) { + writeChunk(); + } + } + } catch (IOException e) { + exception = e; + throw e; + } + } + + private void writeChunk() throws IOException { + int compressedSize = rc.finish(); + int uncompressedSize = lzma.getUncompressedSize(); + + assert compressedSize > 0 : compressedSize; + assert uncompressedSize > 0 : uncompressedSize; + + // +2 because the header of a compressed chunk is 2 bytes + // bigger than the header of an uncompressed chunk. + if (compressedSize + 2 < uncompressedSize) { + writeLZMA(uncompressedSize, compressedSize); + } else { + lzma.reset(); + uncompressedSize = lzma.getUncompressedSize(); + assert uncompressedSize > 0 : uncompressedSize; + writeUncompressed(uncompressedSize); + } + + pendingSize -= uncompressedSize; + lzma.resetUncompressedSize(); + rc.reset(); + } + + private void writeLZMA(int uncompressedSize, int compressedSize) + throws IOException { + int control; + + if (propsNeeded) { + if (dictResetNeeded) { + control = 0x80 + (3 << 5); + } else { + control = 0x80 + (2 << 5); + } + } else { + if (stateResetNeeded) { + control = 0x80 + (1 << 5); + } else { + control = 0x80; + } + } + + control |= (uncompressedSize - 1) >>> 16; + outData.writeByte(control); + + outData.writeShort(uncompressedSize - 1); + outData.writeShort(compressedSize - 1); + + if (propsNeeded) { + outData.writeByte(props); + } + + rc.write(out); + + propsNeeded = false; + stateResetNeeded = false; + dictResetNeeded = false; + } + + private void writeUncompressed(int uncompressedSize) throws IOException { + while (uncompressedSize > 0) { + int chunkSize = Math.min(uncompressedSize, COMPRESSED_SIZE_MAX); + outData.writeByte(dictResetNeeded ? 0x01 : 0x02); + outData.writeShort(chunkSize - 1); + lz.copyUncompressed(out, uncompressedSize, chunkSize); + uncompressedSize -= chunkSize; + dictResetNeeded = false; + } + + stateResetNeeded = true; + } + + private void writeEndMarker() throws IOException { + assert !finished; + + if (exception != null) { + throw exception; + } + + lz.setFinishing(); + + try { + while (pendingSize > 0) { + lzma.encodeForLZMA2(); + writeChunk(); + } + + out.write(0x00); + } catch (IOException e) { + exception = e; + throw e; + } + + finished = true; + } + + public void flush() throws IOException { + if (exception != null) { + throw exception; + } + + if (finished) { + throw new XZIOException("Stream finished or closed"); + } + + try { + lz.setFlushing(); + + while (pendingSize > 0) { + lzma.encodeForLZMA2(); + writeChunk(); + } + + out.flush(); + } catch (IOException e) { + exception = e; + throw e; + } + } + + public void finish() throws IOException { + if (!finished) { + writeEndMarker(); + + try { + out.finish(); + } catch (IOException e) { + exception = e; + throw e; + } + + finished = true; + } + } + + public void close() throws IOException { + if (out != null) { + if (!finished) { + try { + writeEndMarker(); + } catch (IOException e) { + } + } + + try { + out.close(); + } catch (IOException e) { + if (exception == null) { + exception = e; + } + } + + out = null; + } + + if (exception != null) { + throw exception; + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/MemoryLimitException.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/MemoryLimitException.java new file mode 100644 index 0000000..9ac6aff --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/MemoryLimitException.java @@ -0,0 +1,49 @@ +package org.xbib.io.compress.xz; + +/** + * Thrown when the memory usage limit given to the XZ decompressor + * would be exceeded. + * The amount of memory required and the memory usage limit are + * included in the error detail message in human readable format. + */ +public class MemoryLimitException extends XZIOException { + private static final long serialVersionUID = 3L; + + private final int memoryNeeded; + private final int memoryLimit; + + /** + * Creates a new MemoryLimitException. + * The amount of memory needed and the memory usage limit are + * included in the error detail message. + * + * @param memoryNeeded amount of memory needed as kibibytes (KiB) + * @param memoryLimit specified memory usage limit as kibibytes (KiB) + */ + public MemoryLimitException(int memoryNeeded, int memoryLimit) { + super("" + memoryNeeded + " KiB of memory would be needed; limit was " + + memoryLimit + " KiB"); + + this.memoryNeeded = memoryNeeded; + this.memoryLimit = memoryLimit; + } + + /** + * Gets how much memory is required to decompress the data. + * + * @return amount of memory needed as kibibytes (KiB) + */ + public int getMemoryNeeded() { + return memoryNeeded; + } + + /** + * Gets what the memory usage limit was at the time the exception + * was created. + * + * @return memory usage limit as kibibytes (KiB) + */ + public int getMemoryLimit() { + return memoryLimit; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/PowerPCOptions.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/PowerPCOptions.java new file mode 100644 index 0000000..aee3576 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/PowerPCOptions.java @@ -0,0 +1,29 @@ + +package org.xbib.io.compress.xz; + +import org.xbib.io.compress.xz.simple.PowerPC; + +import java.io.InputStream; + +/** + * BCJ filter for big endian PowerPC instructions. + */ +public class PowerPCOptions extends BCJOptions { + private static final int ALIGNMENT = 4; + + public PowerPCOptions() { + super(ALIGNMENT); + } + + public FinishableOutputStream getOutputStream(FinishableOutputStream out) { + return new SimpleOutputStream(out, new PowerPC(true, startOffset)); + } + + public InputStream getInputStream(InputStream in) { + return new SimpleInputStream(in, new PowerPC(false, startOffset)); + } + + FilterEncoder getFilterEncoder() { + return new BCJEncoder(this, BCJCoder.POWERPC_FILTER_ID); + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/RawCoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/RawCoder.java new file mode 100644 index 0000000..b1b1eb5 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/RawCoder.java @@ -0,0 +1,31 @@ + +package org.xbib.io.compress.xz; + +class RawCoder { + static void validate(FilterCoder[] filters) + throws UnsupportedOptionsException { + for (int i = 0; i < filters.length - 1; ++i) { + if (!filters[i].nonLastOK()) { + throw new UnsupportedOptionsException( + "Unsupported XZ filter chain"); + } + } + + if (!filters[filters.length - 1].lastOK()) { + throw new UnsupportedOptionsException( + "Unsupported XZ filter chain"); + } + + int changesSizeCount = 0; + for (int i = 0; i < filters.length; ++i) { + if (filters[i].changesSize()) { + ++changesSizeCount; + } + } + + if (changesSizeCount > 3) { + throw new UnsupportedOptionsException( + "Unsupported XZ filter chain"); + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/SPARCOptions.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/SPARCOptions.java new file mode 100644 index 0000000..a0b5e8e --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/SPARCOptions.java @@ -0,0 +1,29 @@ + +package org.xbib.io.compress.xz; + +import org.xbib.io.compress.xz.simple.SPARC; + +import java.io.InputStream; + +/** + * BCJ filter for SPARC. + */ +public class SPARCOptions extends BCJOptions { + private static final int ALIGNMENT = 4; + + public SPARCOptions() { + super(ALIGNMENT); + } + + public FinishableOutputStream getOutputStream(FinishableOutputStream out) { + return new SimpleOutputStream(out, new SPARC(true, startOffset)); + } + + public InputStream getInputStream(InputStream in) { + return new SimpleInputStream(in, new SPARC(false, startOffset)); + } + + FilterEncoder getFilterEncoder() { + return new BCJEncoder(this, BCJCoder.SPARC_FILTER_ID); + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/SeekableFileInputStream.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/SeekableFileInputStream.java new file mode 100644 index 0000000..1b5243a --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/SeekableFileInputStream.java @@ -0,0 +1,94 @@ + +package org.xbib.io.compress.xz; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.RandomAccessFile; + +/** + * Wraps a {@link java.io.RandomAccessFile RandomAccessFile} + * in a SeekableInputStream. + */ +public class SeekableFileInputStream extends SeekableInputStream { + /** + * The RandomAccessFile that has been wrapped + * into a SeekableFileInputStream. + */ + protected RandomAccessFile randomAccessFile; + + /** + * Creates a new seekable input stream that reads from the specified file. + */ + public SeekableFileInputStream(File file) throws FileNotFoundException { + randomAccessFile = new RandomAccessFile(file, "r"); + } + + /** + * Creates a new seekable input stream that reads from a file with + * the specified name. + */ + public SeekableFileInputStream(String name) throws FileNotFoundException { + randomAccessFile = new RandomAccessFile(name, "r"); + } + + /** + * Creates a new seekable input stream from an existing + * RandomAccessFile object. + */ + public SeekableFileInputStream(RandomAccessFile randomAccessFile) { + this.randomAccessFile = randomAccessFile; + } + + /** + * Calls {@link java.io.RandomAccessFile#read() randomAccessFile.read()}. + */ + public int read() throws IOException { + return randomAccessFile.read(); + } + + /** + * Calls {@link java.io.RandomAccessFile#read(byte[]) randomAccessFile.read(buf)}. + */ + public int read(byte[] buf) throws IOException { + return randomAccessFile.read(buf); + } + + /** + * Calls + * {@link java.io.RandomAccessFile#read(byte[], int, int) + * randomAccessFile.read(buf, off, len)}. + */ + public int read(byte[] buf, int off, int len) throws IOException { + return randomAccessFile.read(buf, off, len); + } + + /** + * Calls {@link java.io.RandomAccessFile#close() randomAccessFile.close()}. + */ + public void close() throws IOException { + randomAccessFile.close(); + } + + /** + * Calls {@link java.io.RandomAccessFile#length() randomAccessFile.length()}. + */ + public long length() throws IOException { + return randomAccessFile.length(); + } + + /** + * Calls {@link java.io.RandomAccessFile#getFilePointer() + * randomAccessFile.getFilePointer()}. + */ + public long position() throws IOException { + return randomAccessFile.getFilePointer(); + } + + /** + * Calls {@link java.io.RandomAccessFile#seek(long) randomAccessFile.seek(long)}. + */ + public void seek(long pos) throws IOException { + randomAccessFile.seek(pos); + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/SeekableInputStream.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/SeekableInputStream.java new file mode 100644 index 0000000..7898465 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/SeekableInputStream.java @@ -0,0 +1,70 @@ +package org.xbib.io.compress.xz; + +import java.io.IOException; +import java.io.InputStream; + +/** + * Input stream with random access support. + */ +public abstract class SeekableInputStream extends InputStream { + /** + * Seeks n bytes forward in this stream. + * This will not seek past the end of the file. If the current position + * is already at or past the end of the file, this doesn't seek at all + * and returns 0. Otherwise, if skipping n bytes + * would cause the position to exceed the stream size, this will do + * equivalent of seek(length()) and the return value will + * be adjusted accordingly. + * If n is negative, the position isn't changed and + * the return value is 0. It doesn't seek backward + * because it would conflict with the specification of + * {@link java.io.InputStream#skip(long) InputStream.skip}. + * + * @return 0 if n is negative, + * less than n if skipping n + * bytes would seek past the end of the file, + * n otherwise + * @throws java.io.IOException might be thrown by {@link #seek(long)} + */ + public long skip(long n) throws IOException { + if (n <= 0) { + return 0; + } + + long size = length(); + long pos = position(); + if (pos >= size) { + return 0; + } + + if (size - pos < n) { + n = size - pos; + } + + seek(pos + n); + return n; + } + + /** + * Gets the size of the stream. + */ + public abstract long length() throws IOException; + + /** + * Gets the current position in the stream. + */ + public abstract long position() throws IOException; + + /** + * Seeks to the specified absolute position in the stream. + * Seeking past the end of the file should be supported by the subclasses + * unless there is a good reason to do otherwise. If one has seeked + * past the end of the stream, read will return + * -1 to indicate end of stream. + * + * @param pos new read position in the stream + * @throws java.io.IOException if pos is negative or if + * a stream-specific I/O error occurs + */ + public abstract void seek(long pos) throws IOException; +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/SimpleInputStream.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/SimpleInputStream.java new file mode 100644 index 0000000..5eda3c4 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/SimpleInputStream.java @@ -0,0 +1,137 @@ + +package org.xbib.io.compress.xz; + +import org.xbib.io.compress.xz.simple.SimpleFilter; + +import java.io.IOException; +import java.io.InputStream; + +class SimpleInputStream extends InputStream { + private static final int TMPBUF_SIZE = 4096; + + private InputStream in; + private final SimpleFilter simpleFilter; + + private final byte[] tmpbuf = new byte[TMPBUF_SIZE]; + private int pos = 0; + private int filtered = 0; + private int unfiltered = 0; + + private boolean endReached = false; + private IOException exception = null; + + static int getMemoryUsage() { + return 1 + TMPBUF_SIZE / 1024; + } + + SimpleInputStream(InputStream in, SimpleFilter simpleFilter) { + // Check for null because otherwise null isn't detect + // in this constructor. + if (in == null) { + throw new NullPointerException(); + } + + // The simpleFilter argument comes from this package + // so it is known to be non-null already. + assert simpleFilter == null; + + this.in = in; + this.simpleFilter = simpleFilter; + } + + public int read() throws IOException { + byte[] buf = new byte[1]; + return read(buf, 0, 1) == -1 ? -1 : (buf[0] & 0xFF); + } + + public int read(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) { + throw new IndexOutOfBoundsException(); + } + + if (len == 0) { + return 0; + } + + if (in == null) { + throw new XZIOException("Stream closed"); + } + + if (exception != null) { + throw exception; + } + + try { + int size = 0; + + while (true) { + // Copy filtered data into the caller-provided buffer. + int copySize = Math.min(filtered, len); + System.arraycopy(tmpbuf, pos, buf, off, copySize); + pos += copySize; + filtered -= copySize; + off += copySize; + len -= copySize; + size += copySize; + + // If end of tmpbuf was reached, move the pending data to + // the beginning of the buffer so that more data can be + // copied into tmpbuf on the next loop iteration. + if (pos + filtered + unfiltered == TMPBUF_SIZE) { + System.arraycopy(tmpbuf, pos, tmpbuf, 0, + filtered + unfiltered); + pos = 0; + } + + if (len == 0 || endReached) { + return size > 0 ? size : -1; + } + + assert filtered == 0; + + // Get more data into the temporary buffer. + int inSize = TMPBUF_SIZE - (pos + filtered + unfiltered); + inSize = in.read(tmpbuf, pos + filtered + unfiltered, inSize); + + if (inSize == -1) { + // Mark the remaining unfiltered bytes to be ready + // to be copied out. + endReached = true; + filtered = unfiltered; + unfiltered = 0; + } else { + // Filter the data in tmpbuf. + unfiltered += inSize; + filtered = simpleFilter.code(tmpbuf, pos, unfiltered); + assert filtered <= unfiltered; + unfiltered -= filtered; + } + } + } catch (IOException e) { + exception = e; + throw e; + } + } + + public int available() throws IOException { + if (in == null) { + throw new XZIOException("Stream closed"); + } + + if (exception != null) { + throw exception; + } + + return filtered; + } + + public void close() throws IOException { + if (in != null) { + try { + in.close(); + } finally { + in = null; + } + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/SimpleOutputStream.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/SimpleOutputStream.java new file mode 100644 index 0000000..6d78014 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/SimpleOutputStream.java @@ -0,0 +1,155 @@ +package org.xbib.io.compress.xz; + +import org.xbib.io.compress.xz.simple.SimpleFilter; + +import java.io.IOException; + +class SimpleOutputStream extends FinishableOutputStream { + + private static final int TMPBUF_SIZE = 4096; + + private FinishableOutputStream out; + + private final SimpleFilter simpleFilter; + + private final byte[] tmpbuf = new byte[TMPBUF_SIZE]; + + private int pos = 0; + + private int unfiltered = 0; + + private IOException exception = null; + + private boolean finished = false; + + static int getMemoryUsage() { + return 1 + TMPBUF_SIZE / 1024; + } + + SimpleOutputStream(FinishableOutputStream out, + SimpleFilter simpleFilter) { + if (out == null) { + throw new NullPointerException(); + } + + this.out = out; + this.simpleFilter = simpleFilter; + } + + public void write(int b) throws IOException { + byte[] buf = new byte[1]; + buf[0] = (byte) b; + write(buf, 0, 1); + } + + public void write(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) { + throw new IndexOutOfBoundsException(); + } + + if (exception != null) { + throw exception; + } + + if (finished) { + throw new XZIOException("Stream finished or closed"); + } + + while (len > 0) { + // Copy more unfiltered data into tmpbuf. + int copySize = Math.min(len, TMPBUF_SIZE - (pos + unfiltered)); + System.arraycopy(buf, off, tmpbuf, pos + unfiltered, copySize); + off += copySize; + len -= copySize; + unfiltered += copySize; + + // Filter the data in tmpbuf. + int filtered = simpleFilter.code(tmpbuf, pos, unfiltered); + assert filtered <= unfiltered; + unfiltered -= filtered; + + // Write out the filtered data. + try { + out.write(tmpbuf, pos, filtered); + } catch (IOException e) { + exception = e; + throw e; + } + + pos += filtered; + + // If end of tmpbuf was reached, move the pending unfiltered + // data to the beginning of the buffer so that more data can + // be copied into tmpbuf on the next loop iteration. + if (pos + unfiltered == TMPBUF_SIZE) { + System.arraycopy(tmpbuf, pos, tmpbuf, 0, unfiltered); + pos = 0; + } + } + } + + private void writePending() throws IOException { + assert !finished; + + if (exception != null) { + throw exception; + } + + try { + out.write(tmpbuf, pos, unfiltered); + } catch (IOException e) { + exception = e; + throw e; + } + + finished = true; + } + + public void flush() throws IOException { + throw new UnsupportedOptionsException("Flushing is not supported"); + } + + public void finish() throws IOException { + if (!finished) { + // If it fails, don't call out.finish(). + writePending(); + + try { + out.finish(); + } catch (IOException e) { + exception = e; + throw e; + } + } + } + + public void close() throws IOException { + if (out != null) { + if (!finished) { + // out.close() must be called even if writePending() fails. + // writePending() saves the possible exception so we can + // ignore exceptions here. + try { + writePending(); + } catch (IOException e) { + } + } + + try { + out.close(); + } catch (IOException e) { + // If there is an earlier exception, the exception + // from out.close() is lost. + if (exception == null) { + exception = e; + } + } + + out = null; + } + + if (exception != null) { + throw exception; + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/SingleXZInputStream.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/SingleXZInputStream.java new file mode 100644 index 0000000..38d411b --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/SingleXZInputStream.java @@ -0,0 +1,273 @@ +package org.xbib.io.compress.xz; + +import org.xbib.io.compress.xz.check.Check; +import org.xbib.io.compress.xz.common.DecoderUtil; +import org.xbib.io.compress.xz.common.StreamFlags; +import org.xbib.io.compress.xz.index.IndexHash; + +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; + +/** + * Decompresses exactly one XZ Stream in streamed mode (no seeking). + * The decompression stops after the first XZ Stream has been decompressed, + * and the read position in the input stream is left at the first byte + * after the end of the XZ Stream. This can be useful when XZ data has + * been stored inside some other file format or protocol. + * Unless you know what you are doing, don't use this class to decompress + * standalone .xz files. For that purpose, use XZInputStream. + * When uncompressed size is known beforehand + * If you are decompressing complete XZ streams and your application knows + * exactly how much uncompressed data there should be, it is good to try + * reading one more byte by calling read() and checking + * that it returns -1. This way the decompressor will parse the + * file footers and verify the integrity checks, giving the caller more + * confidence that the uncompressed data is valid. + * + * @see XZInputStream + */ +public class SingleXZInputStream extends InputStream { + private InputStream in; + private int memoryLimit; + private StreamFlags streamHeaderFlags; + private Check check; + private BlockInputStream blockDecoder = null; + private final IndexHash indexHash = new IndexHash(); + private boolean endReached = false; + private IOException exception = null; + + /** + * Creates a new XZ decompressor that decompresses exactly one + * XZ Stream from in without a memory usage limit. + * This constructor reads and parses the XZ Stream Header (12 bytes) + * from in. The header of the first Block is not read + * until read is called. + * + * @param in input stream from which XZ-compressed + * data is read + * @throws XZFormatException input is not in the XZ format + * @throws CorruptedInputException XZ header CRC32 doesn't match + * @throws UnsupportedOptionsException XZ header is valid but specifies options + * not supported by this implementation + * @throws java.io.EOFException less than 12 bytes of input was available + * from in + * @throws java.io.IOException may be thrown by in + */ + public SingleXZInputStream(InputStream in) throws IOException { + initialize(in, -1); + } + + /** + * Creates a new XZ decompressor that decompresses exactly one + * XZ Stream from in with an optional memory usage limit. + * This is identical to SingleXZInputStream(InputStream) + * except that this takes also the memoryLimit argument. + * + * @param in input stream from which XZ-compressed + * data is read + * @param memoryLimit memory usage limit in kibibytes (KiB) + * or -1 to impose no + * memory usage limit + * @throws XZFormatException input is not in the XZ format + * @throws CorruptedInputException XZ header CRC32 doesn't match + * @throws UnsupportedOptionsException XZ header is valid but specifies options + * not supported by this implementation + * @throws java.io.EOFException less than 12 bytes of input was available + * from in + * @throws java.io.IOException may be thrown by in + */ + public SingleXZInputStream(InputStream in, int memoryLimit) + throws IOException { + initialize(in, memoryLimit); + } + + SingleXZInputStream(InputStream in, int memoryLimit, + byte[] streamHeader) throws IOException { + initialize(in, memoryLimit, streamHeader); + } + + private void initialize(InputStream in, int memoryLimit) + throws IOException { + byte[] streamHeader = new byte[DecoderUtil.STREAM_HEADER_SIZE]; + new DataInputStream(in).readFully(streamHeader); + initialize(in, memoryLimit, streamHeader); + } + + private void initialize(InputStream in, int memoryLimit, + byte[] streamHeader) throws IOException { + this.in = in; + this.memoryLimit = memoryLimit; + streamHeaderFlags = DecoderUtil.decodeStreamHeader(streamHeader); + check = Check.getInstance(streamHeaderFlags.checkType); + } + + /** + * Gets the ID of the integrity check used in this XZ Stream. + * + * @return the Check ID specified in the XZ Stream Header + */ + public int getCheckType() { + return streamHeaderFlags.checkType; + } + + /** + * Gets the name of the integrity check used in this XZ Stream. + * + * @return the name of the check specified in the XZ Stream Header + */ + public String getCheckName() { + return check.getName(); + } + + /** + * Decompresses the next byte from this input stream. + * Reading lots of data with read() from this input stream + * may be inefficient. Wrap it in {@link java.io.BufferedInputStream} + * if you need to read lots of data one byte at a time. + * + * @return the next decompressed byte, or -1 + * to indicate the end of the compressed stream + * @throws CorruptedInputException + * @throws UnsupportedOptionsException + * @throws MemoryLimitException + * @throws XZIOException if the stream has been closed + * @throws java.io.EOFException compressed input is truncated or corrupt + * @throws java.io.IOException may be thrown by in + */ + public int read() throws IOException { + byte[] buf = new byte[1]; + return read(buf, 0, 1) == -1 ? -1 : (buf[0] & 0xFF); + } + + /** + * Decompresses into an array of bytes. + * If len is zero, no bytes are read and 0 + * is returned. Otherwise this will try to decompress len + * bytes of uncompressed data. Less than len bytes may + * be read only in the following situations: + *

    + *
  • The end of the compressed data was reached successfully.
  • + *
  • An error is detected after at least one but less len + * bytes have already been successfully decompressed. + * The next call with non-zero len will immediately + * throw the pending exception.
  • + *
  • An exception is thrown.
  • + *
+ * + * @param buf target buffer for uncompressed data + * @param off start offset in buf + * @param len maximum number of uncompressed bytes to read + * @return number of bytes read, or -1 to indicate + * the end of the compressed stream + * @throws CorruptedInputException + * @throws UnsupportedOptionsException + * @throws MemoryLimitException + * @throws XZIOException if the stream has been closed + * @throws java.io.EOFException compressed input is truncated or corrupt + * @throws java.io.IOException may be thrown by in + */ + public int read(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) { + throw new IndexOutOfBoundsException(); + } + if (len == 0) { + return 0; + } + if (in == null) { + throw new XZIOException("Stream closed"); + } + if (exception != null) { + throw exception; + } + if (endReached) { + return -1; + } + int size = 0; + try { + while (len > 0) { + if (blockDecoder == null) { + try { + blockDecoder = new BlockInputStream( + in, check, memoryLimit, -1, -1); + } catch (IndexIndicatorException e) { + indexHash.validate(in); + validateStreamFooter(); + endReached = true; + return size > 0 ? size : -1; + } + } + + int ret = blockDecoder.read(buf, off, len); + + if (ret > 0) { + size += ret; + off += ret; + len -= ret; + } else if (ret == -1) { + indexHash.add(blockDecoder.getUnpaddedSize(), + blockDecoder.getUncompressedSize()); + blockDecoder = null; + } + } + } catch (IOException e) { + exception = e; + if (size == 0) { + throw e; + } + } + + return size; + } + + private void validateStreamFooter() throws IOException { + byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE]; + new DataInputStream(in).readFully(buf); + StreamFlags streamFooterFlags = DecoderUtil.decodeStreamFooter(buf); + + if (!DecoderUtil.areStreamFlagsEqual(streamHeaderFlags, + streamFooterFlags) + || indexHash.getIndexSize() != streamFooterFlags.backwardSize) { + throw new CorruptedInputException("XZ Stream Footer does not match Stream Header"); + } + } + + /** + * Returns the number of uncompressed bytes that can be read + * without blocking. The value is returned with an assumption + * that the compressed input data will be valid. If the compressed + * data is corrupt, CorruptedInputException may get + * thrown before the number of bytes claimed to be available have + * been read from this input stream. + * + * @return the number of uncompressed bytes that can be read + * without blocking + */ + public int available() throws IOException { + if (in == null) { + throw new XZIOException("Stream closed"); + } + + if (exception != null) { + throw exception; + } + + return blockDecoder == null ? 0 : blockDecoder.available(); + } + + /** + * Closes the stream and calls in.close(). + * If the stream was already closed, this does nothing. + * + * @throws java.io.IOException if thrown by in.close() + */ + public void close() throws IOException { + if (in != null) { + try { + in.close(); + } finally { + in = null; + } + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/UncompressedLZMA2OutputStream.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/UncompressedLZMA2OutputStream.java new file mode 100644 index 0000000..01e4bb9 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/UncompressedLZMA2OutputStream.java @@ -0,0 +1,158 @@ + +package org.xbib.io.compress.xz; + +import java.io.DataOutputStream; +import java.io.IOException; + +class UncompressedLZMA2OutputStream extends FinishableOutputStream { + private FinishableOutputStream out; + private final DataOutputStream outData; + + private final byte[] uncompBuf + = new byte[LZMA2OutputStream.COMPRESSED_SIZE_MAX]; + private int uncompPos = 0; + private boolean dictResetNeeded = true; + + private boolean finished = false; + private IOException exception = null; + + static int getMemoryUsage() { + // uncompBuf + a little extra + return 70; + } + + UncompressedLZMA2OutputStream(FinishableOutputStream out) { + if (out == null) { + throw new NullPointerException(); + } + + this.out = out; + outData = new DataOutputStream(out); + } + + public void write(int b) throws IOException { + byte[] buf = new byte[1]; + buf[0] = (byte) b; + write(buf, 0, 1); + } + + public void write(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) { + throw new IndexOutOfBoundsException(); + } + + if (exception != null) { + throw exception; + } + + if (finished) { + throw new XZIOException("Stream finished or closed"); + } + + try { + while (len > 0) { + int copySize = Math.min(uncompBuf.length - uncompPos, len); + System.arraycopy(buf, off, uncompBuf, uncompPos, copySize); + len -= copySize; + uncompPos += copySize; + + if (uncompPos == uncompBuf.length) { + writeChunk(); + } + } + } catch (IOException e) { + exception = e; + throw e; + } + } + + private void writeChunk() throws IOException { + outData.writeByte(dictResetNeeded ? 0x01 : 0x02); + outData.writeShort(uncompPos - 1); + outData.write(uncompBuf, 0, uncompPos); + uncompPos = 0; + dictResetNeeded = false; + } + + private void writeEndMarker() throws IOException { + if (exception != null) { + throw exception; + } + + if (finished) { + throw new XZIOException("Stream finished or closed"); + } + + try { + if (uncompPos > 0) { + writeChunk(); + } + + out.write(0x00); + } catch (IOException e) { + exception = e; + throw e; + } + } + + public void flush() throws IOException { + if (exception != null) { + throw exception; + } + + if (finished) { + throw new XZIOException("Stream finished or closed"); + } + + try { + if (uncompPos > 0) { + writeChunk(); + } + + out.flush(); + } catch (IOException e) { + exception = e; + throw e; + } + } + + public void finish() throws IOException { + if (!finished) { + writeEndMarker(); + + try { + out.finish(); + } catch (IOException e) { + exception = e; + throw e; + } + + finished = true; + } + } + + public void close() throws IOException { + if (out != null) { + if (!finished) { + try { + writeEndMarker(); + } catch (IOException e) { + } + } + + try { + out.close(); + } catch (IOException e) { + if (exception == null) { + exception = e; + } + } + + out = null; + } + + if (exception != null) { + throw exception; + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/UnsupportedOptionsException.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/UnsupportedOptionsException.java new file mode 100644 index 0000000..fdfea8d --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/UnsupportedOptionsException.java @@ -0,0 +1,27 @@ + +package org.xbib.io.compress.xz; + +/** + * Thrown when compression options not supported by this implementation + * are detected. Some other implementation might support those options. + */ +public class UnsupportedOptionsException extends XZIOException { + private static final long serialVersionUID = 3L; + + /** + * Creates a new UnsupportedOptionsException with null + * as its error detail message. + */ + public UnsupportedOptionsException() { + } + + /** + * Creates a new UnsupportedOptionsException with the given + * error detail message. + * + * @param s error detail message + */ + public UnsupportedOptionsException(String s) { + super(s); + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/X86Options.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/X86Options.java new file mode 100644 index 0000000..5721b52 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/X86Options.java @@ -0,0 +1,29 @@ + +package org.xbib.io.compress.xz; + +import org.xbib.io.compress.xz.simple.X86; + +import java.io.InputStream; + +/** + * BCJ filter for x86 (32-bit and 64-bit) instructions. + */ +public class X86Options extends BCJOptions { + private static final int ALIGNMENT = 1; + + public X86Options() { + super(ALIGNMENT); + } + + public FinishableOutputStream getOutputStream(FinishableOutputStream out) { + return new SimpleOutputStream(out, new X86(true, startOffset)); + } + + public InputStream getInputStream(InputStream in) { + return new SimpleInputStream(in, new X86(false, startOffset)); + } + + FilterEncoder getFilterEncoder() { + return new BCJEncoder(this, BCJCoder.X86_FILTER_ID); + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/XZ.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/XZ.java new file mode 100644 index 0000000..57592b0 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/XZ.java @@ -0,0 +1,44 @@ +package org.xbib.io.compress.xz; + +/** + * XZ constants. + */ +public class XZ { + /** + * XZ Header Magic Bytes begin a XZ file. + * This can be useful to detect XZ compressed data. + */ + public static final byte[] HEADER_MAGIC = { + (byte) 0xFD, '7', 'z', 'X', 'Z', '\0'}; + + /** + * XZ Footer Magic Bytes are the last bytes of a XZ Stream. + */ + public static final byte[] FOOTER_MAGIC = {'Y', 'Z'}; + + /** + * Integrity check ID indicating that no integrity check is calculated. + * Omitting the integrity check is strongly discouraged except when + * the integrity of the data will be verified by other means anyway, + * and calculating the check twice would be useless. + */ + public static final int CHECK_NONE = 0; + + /** + * Integrity check ID for CRC32. + */ + public static final int CHECK_CRC32 = 1; + + /** + * Integrity check ID for CRC64. + */ + public static final int CHECK_CRC64 = 4; + + /** + * Integrity check ID for SHA-256. + */ + public static final int CHECK_SHA256 = 10; + + private XZ() { + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/XZFormatException.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/XZFormatException.java new file mode 100644 index 0000000..0fd5fb5 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/XZFormatException.java @@ -0,0 +1,16 @@ + +package org.xbib.io.compress.xz; + +/** + * Thrown when the input data is not in the XZ format. + */ +public class XZFormatException extends XZIOException { + private static final long serialVersionUID = 3L; + + /** + * Creates a new exception with the default error detail message. + */ + public XZFormatException() { + super("Input is not in the XZ format"); + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/XZIOException.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/XZIOException.java new file mode 100644 index 0000000..be13360 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/XZIOException.java @@ -0,0 +1,19 @@ + +package org.xbib.io.compress.xz; + +/** + * Generic {@link java.io.IOException IOException} specific to this package. + * The other IOExceptions in this package extend + * from XZIOException. + */ +public class XZIOException extends java.io.IOException { + private static final long serialVersionUID = 3L; + + public XZIOException() { + super(); + } + + public XZIOException(String s) { + super(s); + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/XZInputStream.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/XZInputStream.java new file mode 100644 index 0000000..7e2e6d6 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/XZInputStream.java @@ -0,0 +1,270 @@ +package org.xbib.io.compress.xz; + +import org.xbib.io.compress.xz.common.DecoderUtil; + +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; + +/** + * Decompresses a .xz file in streamed mode (no seeking). + * Use this to decompress regular standalone .xz files. This reads from + * its input stream until the end of the input or until an error occurs. + * This supports decompressing concatenated .xz files. + * Typical use cases + * Getting an input stream to decompress a .xz file: + *

+ * InputStream infile = new FileInputStream("foo.xz");
+ * XZInputStream inxz = new XZInputStream(infile);
+ * 
+ * It's important to keep in mind that decompressor memory usage depends + * on the settings used to compress the file. The worst-case memory usage + * of XZInputStream is currently 1.5 GiB. Still, very few files will + * require more than about 65 MiB because that's how much decompressing + * a file created with the highest preset level will need, and only a few + * people use settings other than the predefined presets. + * It is possible to specify a memory usage limit for + * XZInputStream. If decompression requires more memory than + * the specified limit, MemoryLimitException will be thrown when reading + * from the stream. For example, the following sets the memory usage limit + * to 100 MiB: + *
+ * InputStream infile = new FileInputStream("foo.xz");
+ * XZInputStream inxz = new XZInputStream(infile, 100 * 1024);
+ * 
+ * When uncompressed size is known beforehand + * If you are decompressing complete files and your application knows + * exactly how much uncompressed data there should be, it is good to try + * reading one more byte by calling read() and checking + * that it returns -1. This way the decompressor will parse the + * file footers and verify the integrity checks, giving the caller more + * confidence that the uncompressed data is valid. (This advice seems to + * apply to java.util.zip.GZIPInputStream too.) + * + * @see SingleXZInputStream + */ +public class XZInputStream extends InputStream { + private final int memoryLimit; + private InputStream in; + private SingleXZInputStream xzIn; + private boolean endReached = false; + private IOException exception = null; + + /** + * Creates a new XZ decompressor without a memory usage limit. + * This constructor reads and parses the XZ Stream Header (12 bytes) + * from in. The header of the first Block is not read + * until read is called. + * + * @param in input stream from which XZ-compressed + * data is read + * @throws XZFormatException input is not in the XZ format + * @throws CorruptedInputException XZ header CRC32 doesn't match + * @throws UnsupportedOptionsException XZ header is valid but specifies options + * not supported by this implementation + * @throws java.io.EOFException less than 12 bytes of input was available + * from in + * @throws java.io.IOException may be thrown by in + */ + public XZInputStream(InputStream in) throws IOException { + this(in, -1); + } + + /** + * Creates a new XZ decompressor with an optional memory usage limit. + * This is identical to XZInputStream(InputStream) except + * that this takes also the memoryLimit argument. + * + * @param in input stream from which XZ-compressed + * data is read + * @param memoryLimit memory usage limit in kibibytes (KiB) + * or -1 to impose no + * memory usage limit + * @throws XZFormatException input is not in the XZ format + * @throws CorruptedInputException XZ header CRC32 doesn't match + * @throws UnsupportedOptionsException XZ header is valid but specifies options + * not supported by this implementation + * @throws java.io.EOFException less than 12 bytes of input was available + * from in + * @throws java.io.IOException may be thrown by in + */ + public XZInputStream(InputStream in, int memoryLimit) throws IOException { + this.in = in; + this.memoryLimit = memoryLimit; + this.xzIn = new SingleXZInputStream(in, memoryLimit); + } + + /** + * Decompresses the next byte from this input stream. + * Reading lots of data with read() from this input stream + * may be inefficient. Wrap it in {@link java.io.BufferedInputStream} + * if you need to read lots of data one byte at a time. + * + * @return the next decompressed byte, or -1 + * to indicate the end of the compressed stream + * @throws CorruptedInputException + * @throws UnsupportedOptionsException + * @throws MemoryLimitException + * @throws XZIOException if the stream has been closed + * @throws java.io.EOFException compressed input is truncated or corrupt + * @throws java.io.IOException may be thrown by in + */ + public int read() throws IOException { + byte[] buf = new byte[1]; + return read(buf, 0, 1) == -1 ? -1 : (buf[0] & 0xFF); + } + + /** + * Decompresses into an array of bytes. + * If len is zero, no bytes are read and 0 + * is returned. Otherwise this will try to decompress len + * bytes of uncompressed data. Less than len bytes may + * be read only in the following situations: + *
    + *
  • The end of the compressed data was reached successfully.
  • + *
  • An error is detected after at least one but less len + * bytes have already been successfully decompressed. + * The next call with non-zero len will immediately + * throw the pending exception.
  • + *
  • An exception is thrown.
  • + *
+ * + * @param buf target buffer for uncompressed data + * @param off start offset in buf + * @param len maximum number of uncompressed bytes to read + * @return number of bytes read, or -1 to indicate + * the end of the compressed stream + * @throws CorruptedInputException + * @throws UnsupportedOptionsException + * @throws MemoryLimitException + * @throws XZIOException if the stream has been closed + * @throws java.io.EOFException compressed input is truncated or corrupt + * @throws java.io.IOException may be thrown by in + */ + public int read(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) { + throw new IndexOutOfBoundsException(); + } + + if (len == 0) { + return 0; + } + + if (in == null) { + throw new XZIOException("Stream closed"); + } + + if (exception != null) { + throw exception; + } + + if (endReached) { + return -1; + } + + int size = 0; + + try { + while (len > 0) { + if (xzIn == null) { + prepareNextStream(); + if (endReached) { + return size == 0 ? -1 : size; + } + } + + int ret = xzIn.read(buf, off, len); + + if (ret > 0) { + size += ret; + off += ret; + len -= ret; + } else if (ret == -1) { + xzIn = null; + } + } + } catch (IOException e) { + exception = e; + if (size == 0) { + throw e; + } + } + + return size; + } + + private void prepareNextStream() throws IOException { + DataInputStream inData = new DataInputStream(in); + byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE]; + + // The size of Stream Padding must be a multiple of four bytes, + // all bytes zero. + do { + // First try to read one byte to see if we have reached the end + // of the file. + int ret = inData.read(buf, 0, 1); + if (ret == -1) { + endReached = true; + return; + } + + // Since we got one byte of input, there must be at least + // three more available in a valid file. + inData.readFully(buf, 1, 3); + + } while (buf[0] == 0 && buf[1] == 0 && buf[2] == 0 && buf[3] == 0); + + // Not all bytes are zero. In a valid Stream it indicates the + // beginning of the next Stream. Read the rest of the Stream Header + // and initialize the XZ decoder. + inData.readFully(buf, 4, DecoderUtil.STREAM_HEADER_SIZE - 4); + + try { + xzIn = new SingleXZInputStream(in, memoryLimit, buf); + } catch (XZFormatException e) { + // Since this isn't the first .xz Stream, it is more + // logical to tell that the data is corrupt. + throw new CorruptedInputException( + "Garbage after a valid XZ Stream"); + } + } + + /** + * Returns the number of uncompressed bytes that can be read + * without blocking. The value is returned with an assumption + * that the compressed input data will be valid. If the compressed + * data is corrupt, CorruptedInputException may get + * thrown before the number of bytes claimed to be available have + * been read from this input stream. + * + * @return the number of uncompressed bytes that can be read + * without blocking + */ + public int available() throws IOException { + if (in == null) { + throw new XZIOException("Stream closed"); + } + + if (exception != null) { + throw exception; + } + + return xzIn == null ? 0 : xzIn.available(); + } + + /** + * Closes the stream and calls in.close(). + * If the stream was already closed, this does nothing. + * + * @throws java.io.IOException if thrown by in.close() + */ + public void close() throws IOException { + if (in != null) { + try { + in.close(); + } finally { + in = null; + } + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/XZOutputStream.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/XZOutputStream.java new file mode 100644 index 0000000..7fe2ed5 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/XZOutputStream.java @@ -0,0 +1,439 @@ +package org.xbib.io.compress.xz; + +import org.xbib.io.compress.xz.check.Check; +import org.xbib.io.compress.xz.common.EncoderUtil; +import org.xbib.io.compress.xz.common.StreamFlags; +import org.xbib.io.compress.xz.index.IndexEncoder; + +import java.io.IOException; +import java.io.OutputStream; + +/** + * Compresses into the .xz file format. + * Examples + * Getting an output stream to compress with LZMA2 using the default + * settings and the default integrity check type (CRC64): + *
+ * FileOutputStream outfile = new FileOutputStream("foo.xz");
+ * XZOutputStream outxz = new XZOutputStream(outfile, new LZMA2Options());
+ * 
+ * Using the preset level 8 for LZMA2 (the default + * is 6) and SHA-256 instead of CRC64 for integrity checking: + *
+ * XZOutputStream outxz = new XZOutputStream(outfile, new LZMA2Options(8),
+ *                                           XZ.CHECK_SHA256);
+ * 
+ * Using the x86 BCJ filter together with LZMA2 to compress x86 executables + * and printing the memory usage information before creating the + * XZOutputStream: + *
+ * X86Options x86 = new X86Options();
+ * LZMA2Options lzma2 = new LZMA2Options();
+ * FilterOptions[] options = { x86, lzma2 };
+ * String msg = "Encoder memory usage: "
+ *                    + FilterOptions.getEncoderMemoryUsage(options)
+ *                    + " KiB";
+ * trign msg2 = "Decoder memory usage: "
+ *                    + FilterOptions.getDecoderMemoryUsage(options)
+ *                    + " KiB";
+ * XZOutputStream outxz = new XZOutputStream(outfile, options);
+ * 
+ */ +public class XZOutputStream extends FinishableOutputStream { + + private OutputStream out; + + private final StreamFlags streamFlags = new StreamFlags(); + + private final Check check; + + private final IndexEncoder index = new IndexEncoder(); + + private BlockOutputStream blockEncoder = null; + + private FilterEncoder[] filters; + + /** + * True if the current filter chain supports flushing. + * If it doesn't support flushing, flush() + * will use endBlock() as a fallback. + */ + private boolean filtersSupportFlushing; + + private IOException exception = null; + + private boolean finished = false; + + public XZOutputStream(OutputStream out) throws IOException { + this(out, new LZMA2Options()); + } + + /** + * Creates a new XZ compressor using one filter and CRC64 as + * the integrity check. This constructor is equivalent to passing + * a single-member FilterOptions array to + * XZOutputStream(OutputStream, FilterOptions[]). + * + * @param out output stream to which the compressed data + * will be written + * @param filterOptions filter options to use + * @throws UnsupportedOptionsException invalid filter chain + * @throws java.io.IOException may be thrown from out + */ + public XZOutputStream(OutputStream out, FilterOptions filterOptions) + throws IOException { + this(out, filterOptions, XZ.CHECK_CRC64); + } + + /** + * Creates a new XZ compressor using one filter and the specified + * integrity check type. This constructor is equivalent to + * passing a single-member FilterOptions array to + * XZOutputStream(OutputStream, FilterOptions[], int). + * + * @param out output stream to which the compressed data + * will be written + * @param filterOptions filter options to use + * @param checkType type of the integrity check, + * for example XZ.CHECK_CRC32 + * @throws UnsupportedOptionsException invalid filter chain + * @throws java.io.IOException may be thrown from out + */ + public XZOutputStream(OutputStream out, FilterOptions filterOptions, + int checkType) throws IOException { + this(out, new FilterOptions[] { filterOptions }, checkType); + } + + /** + * Creates a new XZ compressor using 1-4 filters and CRC64 as + * the integrity check. This constructor is equivalent + * XZOutputStream(out, filterOptions, XZ.CHECK_CRC64). + * + * @param out output stream to which the compressed data + * will be written + * @param filterOptions array of filter options to use + * @throws UnsupportedOptionsException invalid filter chain + * @throws java.io.IOException may be thrown from out + */ + public XZOutputStream(OutputStream out, FilterOptions[] filterOptions) + throws IOException { + this(out, filterOptions, XZ.CHECK_CRC64); + } + + /** + * Creates a new XZ compressor using 1-4 filters and the specified + * integrity check type. + * + * @param out output stream to which the compressed data + * will be written + * @param filterOptions array of filter options to use + * @param checkType type of the integrity check, + * for example XZ.CHECK_CRC32 + * @throws UnsupportedOptionsException invalid filter chain + * @throws java.io.IOException may be thrown from out + */ + public XZOutputStream(OutputStream out, FilterOptions[] filterOptions, + int checkType) throws IOException { + this.out = out; + updateFilters(filterOptions); + streamFlags.checkType = checkType; + check = Check.getInstance(checkType); + encodeStreamHeader(); + } + + /** + * Updates the filter chain with a single filter. + * This is equivalent to passing a single-member FilterOptions array + * to updateFilters(FilterOptions[]). + * + * @param filterOptions new filter to use + * @throws UnsupportedOptionsException unsupported filter chain, or trying to change + * the filter chain in the middle of a Block + */ + public void updateFilters(FilterOptions filterOptions) + throws XZIOException { + FilterOptions[] opts = new FilterOptions[1]; + opts[0] = filterOptions; + updateFilters(opts); + } + + /** + * Updates the filter chain with 1-4 filters. + * Currently this cannot be used to update e.g. LZMA2 options in the + * middle of a XZ Block. Use endBlock() to finish the + * current XZ Block before calling this function. The new filter chain + * will then be used for the next XZ Block. + * + * @param filterOptions new filter chain to use + * @throws UnsupportedOptionsException unsupported filter chain, or trying to change + * the filter chain in the middle of a Block + */ + public void updateFilters(FilterOptions[] filterOptions) + throws XZIOException { + if (blockEncoder != null) { + throw new UnsupportedOptionsException("Changing filter options " + + "in the middle of a XZ Block not implemented"); + } + + if (filterOptions.length < 1 || filterOptions.length > 4) { + throw new UnsupportedOptionsException( + "XZ filter chain must be 1-4 filters"); + } + + filtersSupportFlushing = true; + FilterEncoder[] newFilters = new FilterEncoder[filterOptions.length]; + for (int i = 0; i < filterOptions.length; ++i) { + newFilters[i] = filterOptions[i].getFilterEncoder(); + filtersSupportFlushing &= newFilters[i].supportsFlushing(); + } + + RawCoder.validate(newFilters); + filters = newFilters; + } + + /** + * Writes one byte to be compressed. + * + * @throws XZIOException XZ Stream has grown too big + * @throws XZIOException finish() or close() + * was already called + * @throws java.io.IOException may be thrown by the underlying output stream + */ + public void write(int b) throws IOException { + byte[] buf = new byte[]{(byte) b}; + write(buf, 0, 1); + } + + /** + * Writes an array of bytes to be compressed. + * The compressors tend to do internal buffering and thus the written + * data won't be readable from the compressed output immediately. + * Use flush() to force everything written so far to + * be written to the underlaying output stream, but be aware that + * flushing reduces compression ratio. + * + * @param buf buffer of bytes to be written + * @param off start offset in buf + * @param len number of bytes to write + * @throws XZIOException XZ Stream has grown too big: total file size + * about 8 EiB or the Index field exceeds + * 16 GiB; you shouldn't reach these sizes + * in practice + * @throws XZIOException finish() or close() + * was already called and len > 0 + * @throws java.io.IOException may be thrown by the underlying output stream + */ + public void write(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) { + throw new IndexOutOfBoundsException(); + } + + if (exception != null) { + throw exception; + } + + if (finished) { + throw new XZIOException("Stream finished or closed"); + } + + try { + if (blockEncoder == null) { + blockEncoder = new BlockOutputStream(out, filters, check); + } + + blockEncoder.write(buf, off, len); + } catch (IOException e) { + exception = e; + throw e; + } + } + + /** + * Finishes the current XZ Block (but not the whole XZ Stream). + * This doesn't flush the stream so it's possible that not all data will + * be decompressible from the output stream when this function returns. + * Call also flush() if flushing is wanted in addition to + * finishing the current XZ Block. + * If there is no unfinished Block open, this function will do nothing. + * (No empty XZ Block will be created.) + * This function can be useful, for example, to create + * random-accessible .xz files. + * Starting a new XZ Block means that the encoder state is reset. + * Doing this very often will increase the size of the compressed + * file a lot (more than plain flush() would do). + * + * @throws XZIOException XZ Stream has grown too big + * @throws XZIOException stream finished or closed + * @throws java.io.IOException may be thrown by the underlying output stream + */ + public void endBlock() throws IOException { + if (exception != null) { + throw exception; + } + + if (finished) { + throw new XZIOException("Stream finished or closed"); + } + + // NOTE: Once there is threading with multiple Blocks, it's possible + // that this function will be more like a barrier that returns + // before the last Block has been finished. + if (blockEncoder != null) { + try { + blockEncoder.finish(); + index.add(blockEncoder.getUnpaddedSize(), + blockEncoder.getUncompressedSize()); + blockEncoder = null; + } catch (IOException e) { + exception = e; + throw e; + } + } + } + + /** + * Flushes the encoder and calls out.flush(). + * All buffered pending data will then be decompressible from + * the output stream. + * Calling this function very often may increase the compressed + * file size a lot. The filter chain options may affect the size + * increase too. For example, with LZMA2 the HC4 match finder has + * smaller penalty with flushing than BT4. + * Some filters don't support flushing. If the filter chain has + * such a filter, flush() will call endBlock() + * before flushing. + * + * @throws XZIOException XZ Stream has grown too big + * @throws XZIOException stream finished or closed + * @throws java.io.IOException may be thrown by the underlying output stream + */ + public void flush() throws IOException { + if (exception != null) { + throw exception; + } + + if (finished) { + throw new XZIOException("Stream finished or closed"); + } + + try { + if (blockEncoder != null) { + if (filtersSupportFlushing) { + // This will eventually call out.flush() so + // no need to do it here again. + blockEncoder.flush(); + } else { + endBlock(); + out.flush(); + } + } else { + out.flush(); + } + } catch (IOException e) { + exception = e; + throw e; + } + } + + /** + * Finishes compression without closing the underlying stream. + * No more data can be written to this stream after finishing + * (calling write with an empty buffer is OK). + * Repeated calls to finish() do nothing unless + * an exception was thrown by this stream earlier. In that case + * the same exception is thrown again. + * After finishing, the stream may be closed normally with + * close(). If the stream will be closed anyway, there + * usually is no need to call finish() separately. + * + * @throws XZIOException XZ Stream has grown too big + * @throws java.io.IOException may be thrown by the underlying output stream + */ + public void finish() throws IOException { + if (!finished) { + // This checks for pending exceptions so we don't need to + // worry about it here. + endBlock(); + + try { + index.encode(out); + encodeStreamFooter(); + } catch (IOException e) { + exception = e; + throw e; + } + + // Set it to true only if everything goes fine. Setting it earlier + // would cause repeated calls to finish() do nothing instead of + // throwing an exception to indicate an earlier error. + finished = true; + } + } + + /** + * Finishes compression and closes the underlying stream. + * The underlying stream out is closed even if finishing + * fails. If both finishing and closing fail, the exception thrown + * by finish() is thrown and the exception from the failed + * out.close() is lost. + * + * @throws XZIOException XZ Stream has grown too big + * @throws java.io.IOException may be thrown by the underlying output stream + */ + public void close() throws IOException { + if (out != null) { + // If finish() throws an exception, it stores the exception to + // the variable "exception". So we can ignore the possible + // exception here. + try { + finish(); + } catch (IOException e) { + } + + try { + out.close(); + } catch (IOException e) { + // Remember the exception but only if there is no previous + // pending exception. + if (exception == null) { + exception = e; + } + } + + out = null; + } + + if (exception != null) { + throw exception; + } + } + + private void encodeStreamFlags(byte[] buf, int off) { + buf[off] = 0x00; + buf[off + 1] = (byte) streamFlags.checkType; + } + + private void encodeStreamHeader() throws IOException { + out.write(XZ.HEADER_MAGIC); + + byte[] buf = new byte[2]; + encodeStreamFlags(buf, 0); + out.write(buf); + + EncoderUtil.writeCRC32(out, buf); + } + + private void encodeStreamFooter() throws IOException { + byte[] buf = new byte[6]; + long backwardSize = index.getIndexSize() / 4 - 1; + for (int i = 0; i < 4; ++i) { + buf[i] = (byte) (backwardSize >>> (i * 8)); + } + + encodeStreamFlags(buf, 4); + + EncoderUtil.writeCRC32(out, buf); + out.write(buf); + out.write(XZ.FOOTER_MAGIC); + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/check/CRC32.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/check/CRC32.java new file mode 100644 index 0000000..ae2f11d --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/check/CRC32.java @@ -0,0 +1,25 @@ +package org.xbib.io.compress.xz.check; + +public class CRC32 extends Check { + + private final java.util.zip.CRC32 state = new java.util.zip.CRC32(); + + public CRC32() { + size = 4; + name = "CRC32"; + } + + public void update(byte[] buf, int off, int len) { + state.update(buf, off, len); + } + + public byte[] finish() { + long value = state.getValue(); + byte[] buf = new byte[]{(byte) (value), + (byte) (value >>> 8), + (byte) (value >>> 16), + (byte) (value >>> 24)}; + state.reset(); + return buf; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/check/CRC64.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/check/CRC64.java new file mode 100644 index 0000000..84c72e5 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/check/CRC64.java @@ -0,0 +1,50 @@ +package org.xbib.io.compress.xz.check; + +/** + * + */ +public class CRC64 extends Check { + + private static final long poly = 0xC96C5795D7870F42L; + + private static final long crcTable[] = new long[256]; + + private long crc = -1; + + static { + for (int b = 0; b < crcTable.length; ++b) { + long r = b; + for (int i = 0; i < 8; ++i) { + if ((r & 1) == 1) { + r = (r >>> 1) ^ poly; + } else { + r >>>= 1; + } + } + + crcTable[b] = r; + } + } + + public CRC64() { + size = 8; + name = "CRC64"; + } + + public void update(byte[] buf, int off, int len) { + int end = off + len; + while (off < end) { + crc = crcTable[(buf[off++] ^ (int) crc) & 0xFF] ^ (crc >>> 8); + } + } + + public byte[] finish() { + long value = ~crc; + crc = -1; + byte[] buf = new byte[8]; + for (int i = 0; i < buf.length; ++i) { + buf[i] = (byte) (value >> (i * 8)); + } + return buf; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/check/Check.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/check/Check.java new file mode 100644 index 0000000..a652727 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/check/Check.java @@ -0,0 +1,52 @@ +package org.xbib.io.compress.xz.check; + +import org.xbib.io.compress.xz.UnsupportedOptionsException; +import org.xbib.io.compress.xz.XZ; + +import java.security.NoSuchAlgorithmException; + +public abstract class Check { + + int size; + + String name; + + public abstract void update(byte[] buf, int off, int len); + + public abstract byte[] finish(); + + public void update(byte[] buf) { + update(buf, 0, buf.length); + } + + public int getSize() { + return size; + } + + public String getName() { + return name; + } + + public static Check getInstance(int checkType) + throws UnsupportedOptionsException { + switch (checkType) { + case XZ.CHECK_NONE: + return new None(); + + case XZ.CHECK_CRC32: + return new CRC32(); + + case XZ.CHECK_CRC64: + return new CRC64(); + + case XZ.CHECK_SHA256: + try { + return new SHA256(); + } catch (NoSuchAlgorithmException e) { + } + break; + } + throw new UnsupportedOptionsException( + "Unsupported Check ID " + checkType); + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/check/None.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/check/None.java new file mode 100644 index 0000000..654ed6b --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/check/None.java @@ -0,0 +1,17 @@ +package org.xbib.io.compress.xz.check; + +public class None extends Check { + + public None() { + size = 0; + name = "None"; + } + + public void update(byte[] buf, int off, int len) { + } + + public byte[] finish() { + byte[] empty = new byte[0]; + return empty; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/check/SHA256.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/check/SHA256.java new file mode 100644 index 0000000..f92fb23 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/check/SHA256.java @@ -0,0 +1,25 @@ +package org.xbib.io.compress.xz.check; + +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; + +public class SHA256 extends Check { + + private final MessageDigest sha256; + + public SHA256() throws NoSuchAlgorithmException { + size = 32; + name = "SHA-256"; + sha256 = MessageDigest.getInstance("SHA-256"); + } + + public void update(byte[] buf, int off, int len) { + sha256.update(buf, off, len); + } + + public byte[] finish() { + byte[] buf = sha256.digest(); + sha256.reset(); + return buf; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/common/DecoderUtil.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/common/DecoderUtil.java new file mode 100644 index 0000000..d2106ec --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/common/DecoderUtil.java @@ -0,0 +1,128 @@ +package org.xbib.io.compress.xz.common; + +import org.xbib.io.compress.xz.CorruptedInputException; +import org.xbib.io.compress.xz.UnsupportedOptionsException; +import org.xbib.io.compress.xz.XZ; +import org.xbib.io.compress.xz.XZFormatException; + +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; +import java.util.zip.CRC32; + +/** + * + */ +public class DecoderUtil extends Util { + public static boolean isCRC32Valid(byte[] buf, int off, int len, + int ref_off) { + CRC32 crc32 = new CRC32(); + crc32.update(buf, off, len); + long value = crc32.getValue(); + + for (int i = 0; i < 4; ++i) { + if ((byte) (value >>> (i * 8)) != buf[ref_off + i]) { + return false; + } + } + + return true; + } + + public static StreamFlags decodeStreamHeader(byte[] buf) + throws IOException { + for (int i = 0; i < XZ.HEADER_MAGIC.length; ++i) { + if (buf[i] != XZ.HEADER_MAGIC[i]) { + throw new XZFormatException(); + } + } + + if (!isCRC32Valid(buf, XZ.HEADER_MAGIC.length, 2, + XZ.HEADER_MAGIC.length + 2)) { + throw new CorruptedInputException("XZ Stream Header is corrupt"); + } + + try { + return decodeStreamFlags(buf, XZ.HEADER_MAGIC.length); + } catch (UnsupportedOptionsException e) { + throw new UnsupportedOptionsException( + "Unsupported options in XZ Stream Header"); + } + } + + public static StreamFlags decodeStreamFooter(byte[] buf) + throws IOException { + if (buf[10] != XZ.FOOTER_MAGIC[0] || buf[11] != XZ.FOOTER_MAGIC[1]) { + // NOTE: The exception could be XZFormatException too. + // It depends on the situation which one is better. + throw new CorruptedInputException("XZ Stream Footer is corrupt"); + } + + if (!isCRC32Valid(buf, 4, 6, 0)) { + throw new CorruptedInputException("XZ Stream Footer is corrupt"); + } + + StreamFlags streamFlags; + try { + streamFlags = decodeStreamFlags(buf, 8); + } catch (UnsupportedOptionsException e) { + throw new UnsupportedOptionsException( + "Unsupported options in XZ Stream Footer"); + } + + streamFlags.backwardSize = 0; + for (int i = 0; i < 4; ++i) { + streamFlags.backwardSize |= (buf[i + 4] & 0xFF) << (i * 8); + } + + streamFlags.backwardSize = (streamFlags.backwardSize + 1) * 4; + + return streamFlags; + } + + private static StreamFlags decodeStreamFlags(byte[] buf, int off) + throws UnsupportedOptionsException { + if (buf[off] != 0x00 || (buf[off + 1] & 0xFF) >= 0x10) { + throw new UnsupportedOptionsException(); + } + + StreamFlags streamFlags = new StreamFlags(); + streamFlags.checkType = buf[off + 1]; + + return streamFlags; + } + + public static boolean areStreamFlagsEqual(StreamFlags a, StreamFlags b) { + // backwardSize is intentionally not compared. + return a.checkType == b.checkType; + } + + public static long decodeVLI(InputStream in) throws IOException { + int b = in.read(); + if (b == -1) { + throw new EOFException(); + } + + long num = b & 0x7F; + int i = 0; + + while ((b & 0x80) != 0x00) { + if (++i >= VLI_SIZE_MAX) { + throw new CorruptedInputException(); + } + + b = in.read(); + if (b == -1) { + throw new EOFException(); + } + + if (b == 0x00) { + throw new CorruptedInputException(); + } + + num |= (long) (b & 0x7F) << (i * 7); + } + + return num; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/common/EncoderUtil.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/common/EncoderUtil.java new file mode 100644 index 0000000..bb7c5d8 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/common/EncoderUtil.java @@ -0,0 +1,31 @@ +package org.xbib.io.compress.xz.common; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.zip.CRC32; + +/** + * + */ +public class EncoderUtil extends Util { + public static void writeCRC32(OutputStream out, byte[] buf) + throws IOException { + CRC32 crc32 = new CRC32(); + crc32.update(buf); + long value = crc32.getValue(); + + for (int i = 0; i < 4; ++i) { + out.write((byte) (value >>> (i * 8))); + } + } + + public static void encodeVLI(OutputStream out, long num) + throws IOException { + while (num >= 0x80) { + out.write((byte) (num | 0x80)); + num >>>= 7; + } + + out.write((byte) num); + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/common/StreamFlags.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/common/StreamFlags.java new file mode 100644 index 0000000..18dc81c --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/common/StreamFlags.java @@ -0,0 +1,9 @@ +package org.xbib.io.compress.xz.common; + +/** + * + */ +public class StreamFlags { + public int checkType = -1; + public long backwardSize = -1; +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/common/Util.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/common/Util.java new file mode 100644 index 0000000..83973b9 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/common/Util.java @@ -0,0 +1,22 @@ +package org.xbib.io.compress.xz.common; + +/** + * + */ +public class Util { + public static final int STREAM_HEADER_SIZE = 12; + public static final long BACKWARD_SIZE_MAX = 1L << 34; + public static final int BLOCK_HEADER_SIZE_MAX = 1024; + public static final long VLI_MAX = Long.MAX_VALUE; + public static final int VLI_SIZE_MAX = 9; + + public static int getVLISize(long num) { + int size = 0; + do { + ++size; + num >>= 7; + } while (num != 0); + + return size; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/delta/DeltaCoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/delta/DeltaCoder.java new file mode 100644 index 0000000..13d361d --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/delta/DeltaCoder.java @@ -0,0 +1,21 @@ +package org.xbib.io.compress.xz.delta; + +/** + * + */ +abstract class DeltaCoder { + static final int DISTANCE_MIN = 1; + static final int DISTANCE_MAX = 256; + static final int DISTANCE_MASK = DISTANCE_MAX - 1; + + final int distance; + final byte[] history = new byte[DISTANCE_MAX]; + int pos = 0; + + DeltaCoder(int distance) { + if (distance < DISTANCE_MIN || distance > DISTANCE_MAX) { + throw new IllegalArgumentException(); + } + this.distance = distance; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/delta/DeltaDecoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/delta/DeltaDecoder.java new file mode 100644 index 0000000..bf3b2f6 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/delta/DeltaDecoder.java @@ -0,0 +1,16 @@ + +package org.xbib.io.compress.xz.delta; + +public class DeltaDecoder extends DeltaCoder { + public DeltaDecoder(int distance) { + super(distance); + } + + public void decode(byte[] buf, int off, int len) { + int end = off + len; + for (int i = off; i < end; ++i) { + buf[i] += history[(distance + pos) & DISTANCE_MASK]; + history[pos-- & DISTANCE_MASK] = buf[i]; + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/delta/DeltaEncoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/delta/DeltaEncoder.java new file mode 100644 index 0000000..f3683f2 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/delta/DeltaEncoder.java @@ -0,0 +1,18 @@ +package org.xbib.io.compress.xz.delta; + +/** + * + */ +public class DeltaEncoder extends DeltaCoder { + public DeltaEncoder(int distance) { + super(distance); + } + + public void encode(byte[] in, int in_off, int len, byte[] out) { + for (int i = 0; i < len; ++i) { + byte tmp = history[(distance + pos) & DISTANCE_MASK]; + history[pos-- & DISTANCE_MASK] = in[in_off + i]; + out[i] = (byte) (in[in_off + i] - tmp); + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/index/BlockInfo.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/index/BlockInfo.java new file mode 100644 index 0000000..b0dc1be --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/index/BlockInfo.java @@ -0,0 +1,12 @@ + +package org.xbib.io.compress.xz.index; + +import org.xbib.io.compress.xz.common.StreamFlags; + +public class BlockInfo { + public StreamFlags streamFlags; + public long compressedOffset; + public long uncompressedOffset; + public long unpaddedSize; + public long uncompressedSize; +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/index/IndexBase.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/index/IndexBase.java new file mode 100644 index 0000000..abf5827 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/index/IndexBase.java @@ -0,0 +1,49 @@ + +package org.xbib.io.compress.xz.index; + +import org.xbib.io.compress.xz.XZIOException; +import org.xbib.io.compress.xz.common.Util; + +abstract class IndexBase { + private final XZIOException invalidIndexException; + long blocksSum = 0; + long uncompressedSum = 0; + long indexListSize = 0; + long recordCount = 0; + + IndexBase(XZIOException invalidIndexException) { + this.invalidIndexException = invalidIndexException; + } + + private long getUnpaddedIndexSize() { + // Index Indicator + Number of Records + List of Records + CRC32 + return 1 + Util.getVLISize(recordCount) + indexListSize + 4; + } + + public long getIndexSize() { + return (getUnpaddedIndexSize() + 3) & ~3; + } + + public long getStreamSize() { + return Util.STREAM_HEADER_SIZE + blocksSum + getIndexSize() + + Util.STREAM_HEADER_SIZE; + } + + int getIndexPaddingSize() { + return (int) ((4 - getUnpaddedIndexSize()) & 3); + } + + void add(long unpaddedSize, long uncompressedSize) throws XZIOException { + blocksSum += (unpaddedSize + 3) & ~3; + uncompressedSum += uncompressedSize; + indexListSize += Util.getVLISize(unpaddedSize) + + Util.getVLISize(uncompressedSize); + ++recordCount; + + if (blocksSum < 0 || uncompressedSum < 0 + || getIndexSize() > Util.BACKWARD_SIZE_MAX + || getStreamSize() < 0) { + throw invalidIndexException; + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/index/IndexDecoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/index/IndexDecoder.java new file mode 100644 index 0000000..20f48a3 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/index/IndexDecoder.java @@ -0,0 +1,195 @@ + +package org.xbib.io.compress.xz.index; + +import org.xbib.io.compress.xz.CorruptedInputException; +import org.xbib.io.compress.xz.MemoryLimitException; +import org.xbib.io.compress.xz.SeekableInputStream; +import org.xbib.io.compress.xz.UnsupportedOptionsException; +import org.xbib.io.compress.xz.common.DecoderUtil; +import org.xbib.io.compress.xz.common.StreamFlags; + +import java.io.EOFException; +import java.io.IOException; +import java.util.zip.CheckedInputStream; + +public class IndexDecoder extends IndexBase { + private final BlockInfo info = new BlockInfo(); + private final long streamPadding; + private final int memoryUsage; + private final long[] unpadded; + private final long[] uncompressed; + private long largestBlockSize = 0; + + /** + * Current position in the arrays. This is initialized to -1 + * because then it is possible to use hasNext() and + * getNext() to get BlockInfo of the first Block. + */ + private int pos = -1; + + public IndexDecoder(SeekableInputStream in, StreamFlags streamFooterFlags, + long streamPadding, int memoryLimit) + throws IOException { + super(new CorruptedInputException("XZ Index is corrupt")); + info.streamFlags = streamFooterFlags; + this.streamPadding = streamPadding; + + // If endPos is exceeded before the CRC32 field has been decoded, + // the Index is corrupt. + long endPos = in.position() + streamFooterFlags.backwardSize - 4; + + java.util.zip.CRC32 crc32 = new java.util.zip.CRC32(); + CheckedInputStream inChecked = new CheckedInputStream(in, crc32); + + // Index Indicator + if (inChecked.read() != 0x00) { + throw new CorruptedInputException("XZ Index is corrupt"); + } + + try { + // Number of Records + long count = DecoderUtil.decodeVLI(inChecked); + + // Catch Record counts that obviously too high to be valid. + // This test isn't exact because it ignores Index Indicator, + // Number of Records, and CRC32 fields, but this is good enough + // to catch the most obvious problems. + if (count >= streamFooterFlags.backwardSize / 2) { + throw new CorruptedInputException("XZ Index is corrupt"); + } + + // If the Record count doesn't fit into an int, we cannot + // allocate the arrays to hold the Records. + if (count > Integer.MAX_VALUE) { + throw new UnsupportedOptionsException("XZ Index has over " + + Integer.MAX_VALUE + " Records"); + } + + // Calculate approximate memory requirements and check the + // memory usage limit. + memoryUsage = 1 + (int) ((16L * count + 1023) / 1024); + if (memoryLimit >= 0 && memoryUsage > memoryLimit) { + throw new MemoryLimitException(memoryUsage, memoryLimit); + } + + // Allocate the arrays for the Records. + unpadded = new long[(int) count]; + uncompressed = new long[(int) count]; + int record = 0; + + // Decode the Records. + for (int i = (int) count; i > 0; --i) { + // Get the next Record. + long unpaddedSize = DecoderUtil.decodeVLI(inChecked); + long uncompressedSize = DecoderUtil.decodeVLI(inChecked); + + // Check that the input position stays sane. Since this is + // checked only once per loop iteration instead of for + // every input byte read, it's still possible that + // EOFException gets thrown with corrupt input. + if (in.position() > endPos) { + throw new CorruptedInputException("XZ Index is corrupt"); + } + + // Add the new Record. + unpadded[record] = blocksSum + unpaddedSize; + uncompressed[record] = uncompressedSum + uncompressedSize; + ++record; + super.add(unpaddedSize, uncompressedSize); + assert record == recordCount; + + // Remember the uncompressed size of the largest Block. + if (largestBlockSize < uncompressedSize) { + largestBlockSize = uncompressedSize; + } + } + } catch (EOFException e) { + // EOFException is caught just in case a corrupt input causes + // DecoderUtil.decodeVLI to read too much at once. + throw new CorruptedInputException("XZ Index is corrupt"); + } + + // Validate that the size of the Index field matches + // Backward Size. + int indexPaddingSize = getIndexPaddingSize(); + if (in.position() + indexPaddingSize != endPos) { + throw new CorruptedInputException("XZ Index is corrupt"); + } + + // Index Padding + while (indexPaddingSize-- > 0) { + if (inChecked.read() != 0x00) { + throw new CorruptedInputException("XZ Index is corrupt"); + } + } + + // CRC32 + long value = crc32.getValue(); + for (int i = 0; i < 4; ++i) { + if (((value >>> (i * 8)) & 0xFF) != in.read()) { + throw new CorruptedInputException("XZ Index is corrupt"); + } + } + } + + public BlockInfo locate(long target) { + assert target < uncompressedSum; + + int left = 0; + int right = unpadded.length - 1; + + while (left < right) { + int i = left + (right - left) / 2; + + if (uncompressed[i] <= target) { + left = i + 1; + } else { + right = i; + } + } + + pos = left; + return getInfo(); + } + + public int getMemoryUsage() { + return memoryUsage; + } + + public long getStreamAndPaddingSize() { + return getStreamSize() + streamPadding; + } + + public long getUncompressedSize() { + return uncompressedSum; + } + + public long getLargestBlockSize() { + return largestBlockSize; + } + + public boolean hasNext() { + return pos + 1 < recordCount; + } + + public BlockInfo getNext() { + ++pos; + return getInfo(); + } + + private BlockInfo getInfo() { + if (pos == 0) { + info.compressedOffset = 0; + info.uncompressedOffset = 0; + } else { + info.compressedOffset = (unpadded[pos - 1] + 3) & ~3; + info.uncompressedOffset = uncompressed[pos - 1]; + } + + info.unpaddedSize = unpadded[pos] - info.compressedOffset; + info.uncompressedSize = uncompressed[pos] - info.uncompressedOffset; + + info.compressedOffset += DecoderUtil.STREAM_HEADER_SIZE; + return info; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/index/IndexEncoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/index/IndexEncoder.java new file mode 100644 index 0000000..8862ecd --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/index/IndexEncoder.java @@ -0,0 +1,54 @@ + +package org.xbib.io.compress.xz.index; + +import org.xbib.io.compress.xz.XZIOException; +import org.xbib.io.compress.xz.common.EncoderUtil; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.zip.CheckedOutputStream; + +public class IndexEncoder extends IndexBase { + private final ArrayList records = new ArrayList(); + + public IndexEncoder() { + super(new XZIOException("XZ Stream or its Index has grown too big")); + } + + public void add(long unpaddedSize, long uncompressedSize) + throws XZIOException { + super.add(unpaddedSize, uncompressedSize); + records.add(new IndexRecord(unpaddedSize, uncompressedSize)); + } + + public void encode(OutputStream out) throws IOException { + java.util.zip.CRC32 crc32 = new java.util.zip.CRC32(); + CheckedOutputStream outChecked = new CheckedOutputStream(out, crc32); + + // Index Indicator + outChecked.write(0x00); + + // Number of Records + EncoderUtil.encodeVLI(outChecked, recordCount); + + // List of Records + for (Iterator i = records.iterator(); i.hasNext(); ) { + IndexRecord record = (IndexRecord) i.next(); + EncoderUtil.encodeVLI(outChecked, record.unpadded); + EncoderUtil.encodeVLI(outChecked, record.uncompressed); + } + + // Index Padding + for (int i = getIndexPaddingSize(); i > 0; --i) { + outChecked.write(0x00); + } + + // CRC32 + long value = crc32.getValue(); + for (int i = 0; i < 4; ++i) { + out.write((byte) (value >>> (i * 8))); + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/index/IndexHash.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/index/IndexHash.java new file mode 100644 index 0000000..07c61cd --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/index/IndexHash.java @@ -0,0 +1,94 @@ + +package org.xbib.io.compress.xz.index; + +import org.xbib.io.compress.xz.CorruptedInputException; +import org.xbib.io.compress.xz.XZIOException; +import org.xbib.io.compress.xz.common.DecoderUtil; + +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.zip.CheckedInputStream; + +public class IndexHash extends IndexBase { + private org.xbib.io.compress.xz.check.Check hash; + + public IndexHash() { + super(new CorruptedInputException()); + + try { + hash = new org.xbib.io.compress.xz.check.SHA256(); + } catch (java.security.NoSuchAlgorithmException e) { + hash = new org.xbib.io.compress.xz.check.CRC32(); + } + } + + public void add(long unpaddedSize, long uncompressedSize) + throws XZIOException { + super.add(unpaddedSize, uncompressedSize); + + ByteBuffer buf = ByteBuffer.allocate(2 * 8); + buf.putLong(unpaddedSize); + buf.putLong(uncompressedSize); + hash.update(buf.array()); + } + + public void validate(InputStream in) throws IOException { + // Index Indicator (0x00) has already been read by BlockInputStream + // so add 0x00 to the CRC32 here. + java.util.zip.CRC32 crc32 = new java.util.zip.CRC32(); + crc32.update('\0'); + CheckedInputStream inChecked = new CheckedInputStream(in, crc32); + + // Get and validate the Number of Records field. + long storedRecordCount = DecoderUtil.decodeVLI(inChecked); + if (storedRecordCount != recordCount) { + throw new CorruptedInputException("XZ Index is corrupt"); + } + + // Decode and hash the Index field and compare it to + // the hash value calculated from the decoded Blocks. + IndexHash stored = new IndexHash(); + for (long i = 0; i < recordCount; ++i) { + long unpaddedSize = DecoderUtil.decodeVLI(inChecked); + long uncompressedSize = DecoderUtil.decodeVLI(inChecked); + + try { + stored.add(unpaddedSize, uncompressedSize); + } catch (XZIOException e) { + throw new CorruptedInputException("XZ Index is corrupt"); + } + + if (stored.blocksSum > blocksSum + || stored.uncompressedSum > uncompressedSum + || stored.indexListSize > indexListSize) { + throw new CorruptedInputException("XZ Index is corrupt"); + } + } + + if (stored.blocksSum != blocksSum + || stored.uncompressedSum != uncompressedSum + || stored.indexListSize != indexListSize + || !Arrays.equals(stored.hash.finish(), hash.finish())) { + throw new CorruptedInputException("XZ Index is corrupt"); + } + + // Index Padding + DataInputStream inData = new DataInputStream(inChecked); + for (int i = getIndexPaddingSize(); i > 0; --i) { + if (inData.readUnsignedByte() != 0x00) { + throw new CorruptedInputException("XZ Index is corrupt"); + } + } + + // CRC32 + long value = crc32.getValue(); + for (int i = 0; i < 4; ++i) { + if (((value >>> (i * 8)) & 0xFF) != inData.readUnsignedByte()) { + throw new CorruptedInputException("XZ Index is corrupt"); + } + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/index/IndexRecord.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/index/IndexRecord.java new file mode 100644 index 0000000..87a7e5b --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/index/IndexRecord.java @@ -0,0 +1,12 @@ + +package org.xbib.io.compress.xz.index; + +class IndexRecord { + final long unpadded; + final long uncompressed; + + IndexRecord(long unpadded, long uncompressed) { + this.unpadded = unpadded; + this.uncompressed = uncompressed; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lz/BT4.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lz/BT4.java new file mode 100644 index 0000000..f6b78e0 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lz/BT4.java @@ -0,0 +1,254 @@ + +package org.xbib.io.compress.xz.lz; + +final class BT4 extends LZEncoder { + private final Hash234 hash; + private final int[] tree; + private final Matches matches; + private final int depthLimit; + + private final int cyclicSize; + private int cyclicPos = -1; + private int lzPos; + + static int getMemoryUsage(int dictSize) { + return Hash234.getMemoryUsage(dictSize) + dictSize / (1024 / 8) + 10; + } + + BT4(int dictSize, int beforeSizeMin, int readAheadMax, + int niceLen, int matchLenMax, int depthLimit) { + super(dictSize, beforeSizeMin, readAheadMax, niceLen, matchLenMax); + + cyclicSize = dictSize + 1; + lzPos = cyclicSize; + + hash = new Hash234(dictSize); + tree = new int[cyclicSize * 2]; + + // Substracting 1 because the shortest match that this match + // finder can find is 2 bytes, so there's no need to reserve + // space for one-byte matches. + matches = new Matches(niceLen - 1); + + this.depthLimit = depthLimit > 0 ? depthLimit : 16 + niceLen / 2; + } + + private int movePos() { + int avail = movePos(niceLen, 4); + + if (avail != 0) { + if (++lzPos == Integer.MAX_VALUE) { + int normalizationOffset = Integer.MAX_VALUE - cyclicSize; + hash.normalize(normalizationOffset); + normalize(tree, normalizationOffset); + lzPos -= normalizationOffset; + } + + if (++cyclicPos == cyclicSize) { + cyclicPos = 0; + } + } + + return avail; + } + + public Matches getMatches() { + matches.count = 0; + + int matchLenLimit = matchLenMax; + int niceLenLimit = niceLen; + int avail = movePos(); + + if (avail < matchLenLimit) { + if (avail == 0) { + return matches; + } + + matchLenLimit = avail; + if (niceLenLimit > avail) { + niceLenLimit = avail; + } + } + + hash.calcHashes(buf, readPos); + int delta2 = lzPos - hash.getHash2Pos(); + int delta3 = lzPos - hash.getHash3Pos(); + int currentMatch = hash.getHash4Pos(); + hash.updateTables(lzPos); + + int lenBest = 0; + + // See if the hash from the first two bytes found a match. + // The hashing algorithm guarantees that if the first byte + // matches, also the second byte does, so there's no need to + // test the second byte. + if (delta2 < cyclicSize && buf[readPos - delta2] == buf[readPos]) { + lenBest = 2; + matches.len[0] = 2; + matches.dist[0] = delta2 - 1; + matches.count = 1; + } + + // See if the hash from the first three bytes found a match that + // is different from the match possibly found by the two-byte hash. + // Also here the hashing algorithm guarantees that if the first byte + // matches, also the next two bytes do. + if (delta2 != delta3 && delta3 < cyclicSize + && buf[readPos - delta3] == buf[readPos]) { + lenBest = 3; + matches.dist[matches.count++] = delta3 - 1; + delta2 = delta3; + } + + // If a match was found, see how long it is. + if (matches.count > 0) { + while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2] + == buf[readPos + lenBest]) { + ++lenBest; + } + + matches.len[matches.count - 1] = lenBest; + + // Return if it is long enough (niceLen or reached the end of + // the dictionary). + if (lenBest >= niceLenLimit) { + skip(niceLenLimit, currentMatch); + return matches; + } + } + + // Long enough match wasn't found so easily. Look for better matches + // from the binary tree. + if (lenBest < 3) { + lenBest = 3; + } + + int depth = depthLimit; + + int ptr0 = (cyclicPos << 1) + 1; + int ptr1 = cyclicPos << 1; + int len0 = 0; + int len1 = 0; + + while (true) { + int delta = lzPos - currentMatch; + + // Return if the search depth limit has been reached or + // if the distance of the potential match exceeds the + // dictionary size. + if (depth-- == 0 || delta >= cyclicSize) { + tree[ptr0] = 0; + tree[ptr1] = 0; + return matches; + } + + int pair = (cyclicPos - delta + + (delta > cyclicPos ? cyclicSize : 0)) << 1; + int len = Math.min(len0, len1); + + if (buf[readPos + len - delta] == buf[readPos + len]) { + while (++len < matchLenLimit) { + if (buf[readPos + len - delta] != buf[readPos + len]) { + break; + } + } + + if (len > lenBest) { + lenBest = len; + matches.len[matches.count] = len; + matches.dist[matches.count] = delta - 1; + ++matches.count; + + if (len >= niceLenLimit) { + tree[ptr1] = tree[pair]; + tree[ptr0] = tree[pair + 1]; + return matches; + } + } + } + + if ((buf[readPos + len - delta] & 0xFF) + < (buf[readPos + len] & 0xFF)) { + tree[ptr1] = currentMatch; + ptr1 = pair + 1; + currentMatch = tree[ptr1]; + len1 = len; + } else { + tree[ptr0] = currentMatch; + ptr0 = pair; + currentMatch = tree[ptr0]; + len0 = len; + } + } + } + + private void skip(int niceLenLimit, int currentMatch) { + int depth = depthLimit; + + int ptr0 = (cyclicPos << 1) + 1; + int ptr1 = cyclicPos << 1; + int len0 = 0; + int len1 = 0; + + while (true) { + int delta = lzPos - currentMatch; + + if (depth-- == 0 || delta >= cyclicSize) { + tree[ptr0] = 0; + tree[ptr1] = 0; + return; + } + + int pair = (cyclicPos - delta + + (delta > cyclicPos ? cyclicSize : 0)) << 1; + int len = Math.min(len0, len1); + + if (buf[readPos + len - delta] == buf[readPos + len]) { + // No need to look for longer matches than niceLenLimit + // because we only are updating the tree, not returning + // matches found to the caller. + do { + if (++len == niceLenLimit) { + tree[ptr1] = tree[pair]; + tree[ptr0] = tree[pair + 1]; + return; + } + } while (buf[readPos + len - delta] == buf[readPos + len]); + } + + if ((buf[readPos + len - delta] & 0xFF) + < (buf[readPos + len] & 0xFF)) { + tree[ptr1] = currentMatch; + ptr1 = pair + 1; + currentMatch = tree[ptr1]; + len1 = len; + } else { + tree[ptr0] = currentMatch; + ptr0 = pair; + currentMatch = tree[ptr0]; + len0 = len; + } + } + } + + public void skip(int len) { + while (len-- > 0) { + int niceLenLimit = niceLen; + int avail = movePos(); + + if (avail < niceLenLimit) { + if (avail == 0) { + continue; + } + + niceLenLimit = avail; + } + + hash.calcHashes(buf, readPos); + int currentMatch = hash.getHash4Pos(); + hash.updateTables(lzPos); + + skip(niceLenLimit, currentMatch); + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lz/CRC32Hash.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lz/CRC32Hash.java new file mode 100644 index 0000000..bfda4ef --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lz/CRC32Hash.java @@ -0,0 +1,27 @@ + +package org.xbib.io.compress.xz.lz; + +/** + * Provides a CRC32 table using the polynomial from IEEE 802.3. + */ +class CRC32Hash { + private static final int CRC32_POLY = 0xEDB88320; + + static final int[] crcTable = new int[256]; + + static { + for (int i = 0; i < 256; ++i) { + int r = i; + + for (int j = 0; j < 8; ++j) { + if ((r & 1) != 0) { + r = (r >>> 1) ^ CRC32_POLY; + } else { + r >>>= 1; + } + } + + crcTable[i] = r; + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lz/HC4.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lz/HC4.java new file mode 100644 index 0000000..57eb746 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lz/HC4.java @@ -0,0 +1,201 @@ + +package org.xbib.io.compress.xz.lz; + +final class HC4 extends LZEncoder { + private final Hash234 hash; + private final int[] chain; + private final Matches matches; + private final int depthLimit; + + private final int cyclicSize; + private int cyclicPos = -1; + private int lzPos; + + /** + * Gets approximate memory usage of the match finder as kibibytes. + */ + static int getMemoryUsage(int dictSize) { + return Hash234.getMemoryUsage(dictSize) + dictSize / (1024 / 4) + 10; + } + + /** + * Creates a new LZEncoder with the HC4 match finder. + * See LZEncoder.getInstance for parameter descriptions. + */ + HC4(int dictSize, int beforeSizeMin, int readAheadMax, + int niceLen, int matchLenMax, int depthLimit) { + super(dictSize, beforeSizeMin, readAheadMax, niceLen, matchLenMax); + + hash = new Hash234(dictSize); + + // +1 because we need dictSize bytes of history + the current byte. + cyclicSize = dictSize + 1; + chain = new int[cyclicSize]; + lzPos = cyclicSize; + + // Substracting 1 because the shortest match that this match + // finder can find is 2 bytes, so there's no need to reserve + // space for one-byte matches. + matches = new Matches(niceLen - 1); + + // Use a default depth limit if no other value was specified. + // The default is just something based on experimentation; + // it's nothing magic. + this.depthLimit = (depthLimit > 0) ? depthLimit : 4 + niceLen / 4; + } + + /** + * Moves to the next byte, checks that there is enough available space, + * and possibly normalizes the hash tables and the hash chain. + * + * @return number of bytes available, including the current byte + */ + private int movePos() { + int avail = movePos(4, 4); + + if (avail != 0) { + if (++lzPos == Integer.MAX_VALUE) { + int normalizationOffset = Integer.MAX_VALUE - cyclicSize; + hash.normalize(normalizationOffset); + normalize(chain, normalizationOffset); + lzPos -= normalizationOffset; + } + + if (++cyclicPos == cyclicSize) { + cyclicPos = 0; + } + } + + return avail; + } + + public Matches getMatches() { + matches.count = 0; + int matchLenLimit = matchLenMax; + int niceLenLimit = niceLen; + int avail = movePos(); + + if (avail < matchLenLimit) { + if (avail == 0) { + return matches; + } + + matchLenLimit = avail; + if (niceLenLimit > avail) { + niceLenLimit = avail; + } + } + + hash.calcHashes(buf, readPos); + int delta2 = lzPos - hash.getHash2Pos(); + int delta3 = lzPos - hash.getHash3Pos(); + int currentMatch = hash.getHash4Pos(); + hash.updateTables(lzPos); + + chain[cyclicPos] = currentMatch; + + int lenBest = 0; + + // See if the hash from the first two bytes found a match. + // The hashing algorithm guarantees that if the first byte + // matches, also the second byte does, so there's no need to + // test the second byte. + if (delta2 < cyclicSize && buf[readPos - delta2] == buf[readPos]) { + lenBest = 2; + matches.len[0] = 2; + matches.dist[0] = delta2 - 1; + matches.count = 1; + } + + // See if the hash from the first three bytes found a match that + // is different from the match possibly found by the two-byte hash. + // Also here the hashing algorithm guarantees that if the first byte + // matches, also the next two bytes do. + if (delta2 != delta3 && delta3 < cyclicSize + && buf[readPos - delta3] == buf[readPos]) { + lenBest = 3; + matches.dist[matches.count++] = delta3 - 1; + delta2 = delta3; + } + + // If a match was found, see how long it is. + if (matches.count > 0) { + while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2] + == buf[readPos + lenBest]) { + ++lenBest; + } + + matches.len[matches.count - 1] = lenBest; + + // Return if it is long enough (niceLen or reached the end of + // the dictionary). + if (lenBest >= niceLenLimit) { + return matches; + } + } + + // Long enough match wasn't found so easily. Look for better matches + // from the hash chain. + if (lenBest < 3) { + lenBest = 3; + } + + int depth = depthLimit; + + while (true) { + int delta = lzPos - currentMatch; + + // Return if the search depth limit has been reached or + // if the distance of the potential match exceeds the + // dictionary size. + if (depth-- == 0 || delta >= cyclicSize) { + return matches; + } + + currentMatch = chain[cyclicPos - delta + + (delta > cyclicPos ? cyclicSize : 0)]; + + // Test the first byte and the first new byte that would give us + // a match that is at least one byte longer than lenBest. This + // too short matches get quickly skipped. + if (buf[readPos + lenBest - delta] == buf[readPos + lenBest] + && buf[readPos - delta] == buf[readPos]) { + // Calculate the length of the match. + int len = 0; + while (++len < matchLenLimit) { + if (buf[readPos + len - delta] != buf[readPos + len]) { + break; + } + } + + // Use the match if and only if it is better than the longest + // match found so far. + if (len > lenBest) { + lenBest = len; + matches.len[matches.count] = len; + matches.dist[matches.count] = delta - 1; + ++matches.count; + + // Return if it is long enough (niceLen or reached the + // end of the dictionary). + if (len >= niceLenLimit) { + return matches; + } + } + } + } + } + + public void skip(int len) { + assert len >= 0; + + while (len-- > 0) { + if (movePos() != 0) { + // Update the hash chain and hash tables. + hash.calcHashes(buf, readPos); + chain[cyclicPos] = hash.getHash4Pos(); + hash.updateTables(lzPos); + } + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lz/Hash234.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lz/Hash234.java new file mode 100644 index 0000000..0aae5da --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lz/Hash234.java @@ -0,0 +1,81 @@ + +package org.xbib.io.compress.xz.lz; + +final class Hash234 extends CRC32Hash { + private static final int HASH_2_SIZE = 1 << 10; + private static final int HASH_2_MASK = HASH_2_SIZE - 1; + + private static final int HASH_3_SIZE = 1 << 16; + private static final int HASH_3_MASK = HASH_3_SIZE - 1; + + private final int hash4Mask; + + private final int[] hash2Table = new int[HASH_2_SIZE]; + private final int[] hash3Table = new int[HASH_3_SIZE]; + private final int[] hash4Table; + + private int hash2Value = 0; + private int hash3Value = 0; + private int hash4Value = 0; + + static int getHash4Size(int dictSize) { + int h = dictSize - 1; + h |= h >>> 1; + h |= h >>> 2; + h |= h >>> 4; + h |= h >>> 8; + h >>>= 1; + h |= 0xFFFF; + if (h > (1 << 24)) { + h >>>= 1; + } + + return h + 1; + } + + static int getMemoryUsage(int dictSize) { + // Sizes of the hash arrays + a little extra + return (HASH_2_SIZE + HASH_3_SIZE + getHash4Size(dictSize)) + / (1024 / 4) + 4; + } + + Hash234(int dictSize) { + hash4Table = new int[getHash4Size(dictSize)]; + hash4Mask = hash4Table.length - 1; + } + + void calcHashes(byte[] buf, int off) { + int temp = crcTable[buf[off] & 0xFF] ^ (buf[off + 1] & 0xFF); + hash2Value = temp & HASH_2_MASK; + + temp ^= (buf[off + 2] & 0xFF) << 8; + hash3Value = temp & HASH_3_MASK; + + temp ^= crcTable[buf[off + 3] & 0xFF] << 5; + hash4Value = temp & hash4Mask; + } + + int getHash2Pos() { + return hash2Table[hash2Value]; + } + + int getHash3Pos() { + return hash3Table[hash3Value]; + } + + int getHash4Pos() { + return hash4Table[hash4Value]; + } + + void updateTables(int pos) { + hash2Table[hash2Value] = pos; + hash3Table[hash3Value] = pos; + hash4Table[hash4Value] = pos; + } + + void normalize(int normalizeOffset) { + LZEncoder.normalize(hash2Table, normalizeOffset); + LZEncoder.normalize(hash3Table, normalizeOffset); + LZEncoder.normalize(hash4Table, normalizeOffset); + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lz/LZDecoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lz/LZDecoder.java new file mode 100644 index 0000000..891a996 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lz/LZDecoder.java @@ -0,0 +1,128 @@ + +package org.xbib.io.compress.xz.lz; + +import org.xbib.io.compress.xz.CorruptedInputException; + +import java.io.DataInputStream; +import java.io.IOException; + +public final class LZDecoder { + private final byte[] buf; + private int start = 0; + private int pos = 0; + private int full = 0; + private int limit = 0; + private int pendingLen = 0; + private int pendingDist = 0; + + public LZDecoder(int dictSize, byte[] presetDict) { + buf = new byte[dictSize]; + + if (presetDict != null) { + pos = Math.min(presetDict.length, dictSize); + full = pos; + start = pos; + System.arraycopy(presetDict, presetDict.length - pos, buf, 0, pos); + } + } + + public void reset() { + start = 0; + pos = 0; + full = 0; + limit = 0; + buf[buf.length - 1] = 0x00; + } + + public void setLimit(int outMax) { + if (buf.length - pos <= outMax) { + limit = buf.length; + } else { + limit = pos + outMax; + } + } + + public boolean hasSpace() { + return pos < limit; + } + + public boolean hasPending() { + return pendingLen > 0; + } + + public int getPos() { + return pos; + } + + public int getByte(int dist) { + int offset = pos - dist - 1; + if (dist >= pos) { + offset += buf.length; + } + + return buf[offset] & 0xFF; + } + + public void putByte(byte b) { + buf[pos++] = b; + + if (full < pos) { + full = pos; + } + } + + public void repeat(int dist, int len) throws IOException { + if (dist < 0 || dist >= full) { + throw new CorruptedInputException(); + } + + int left = Math.min(limit - pos, len); + pendingLen = len - left; + pendingDist = dist; + + int back = pos - dist - 1; + if (dist >= pos) { + back += buf.length; + } + + do { + buf[pos++] = buf[back++]; + if (back == buf.length) { + back = 0; + } + } while (--left > 0); + + if (full < pos) { + full = pos; + } + } + + public void repeatPending() throws IOException { + if (pendingLen > 0) { + repeat(pendingDist, pendingLen); + } + } + + public void copyUncompressed(DataInputStream inData, int len) + throws IOException { + int copySize = Math.min(buf.length - pos, len); + inData.readFully(buf, pos, copySize); + pos += copySize; + + if (full < pos) { + full = pos; + } + } + + public int flush(byte[] out, int outOff) { + int copySize = pos - start; + if (pos == buf.length) { + pos = 0; + } + + System.arraycopy(buf, start, out, outOff, copySize); + start = pos; + + return copySize; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lz/LZEncoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lz/LZEncoder.java new file mode 100644 index 0000000..0198326 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lz/LZEncoder.java @@ -0,0 +1,391 @@ +package org.xbib.io.compress.xz.lz; + +import java.io.IOException; +import java.io.OutputStream; + +/** + * + */ +public abstract class LZEncoder { + public static final int MF_HC4 = 0x04; + public static final int MF_BT4 = 0x14; + + /** + * Number of bytes to keep available before the current byte + * when moving the LZ window. + */ + private final int keepSizeBefore; + + /** + * Number of bytes that must be available, the current byte included, + * to make hasEnoughInput return true. Flushing and finishing are + * naturally exceptions to this since there cannot be any data after + * the end of the uncompressed input. + */ + private final int keepSizeAfter; + + final int matchLenMax; + final int niceLen; + + final byte[] buf; + + int readPos = -1; + private int readLimit = -1; + private boolean finishing = false; + private int writePos = 0; + private int pendingSize = 0; + + static void normalize(int[] positions, int normalizationOffset) { + for (int i = 0; i < positions.length; ++i) { + if (positions[i] <= normalizationOffset) { + positions[i] = 0; + } else { + positions[i] -= normalizationOffset; + } + } + } + + /** + * Gets the size of the LZ window buffer that needs to be allocated. + */ + private static int getBufSize( + int dictSize, int extraSizeBefore, int extraSizeAfter, + int matchLenMax) { + int keepSizeBefore = extraSizeBefore + dictSize; + int keepSizeAfter = extraSizeAfter + matchLenMax; + int reserveSize = Math.min(dictSize / 2 + (256 << 10), 512 << 20); + return keepSizeBefore + keepSizeAfter + reserveSize; + } + + /** + * Gets approximate memory usage of the LZEncoder base structure and + * the match finder as kibibytes. + */ + public static int getMemoryUsage( + int dictSize, int extraSizeBefore, int extraSizeAfter, + int matchLenMax, int mf) { + // Buffer size + a little extra + int m = getBufSize(dictSize, extraSizeBefore, extraSizeAfter, + matchLenMax) / 1024 + 10; + + switch (mf) { + case MF_HC4: + m += HC4.getMemoryUsage(dictSize); + break; + + case MF_BT4: + m += BT4.getMemoryUsage(dictSize); + break; + + default: + throw new IllegalArgumentException(); + } + + return m; + } + + /** + * Creates a new LZEncoder. + * + * @param dictSize dictionary size + * @param extraSizeBefore number of bytes to keep available in the + * history in addition to dictSize + * @param extraSizeAfter number of bytes that must be available + * after current position + matchLenMax + * @param niceLen if a match of at least niceLen + * bytes is found, be happy with it and don't + * stop looking for longer matches + * @param matchLenMax don't test for matches longer than + * matchLenMax bytes + * @param mf match finder ID + * @param depthLimit match finder search depth limit + */ + public static LZEncoder getInstance( + int dictSize, int extraSizeBefore, int extraSizeAfter, + int niceLen, int matchLenMax, int mf, int depthLimit) { + switch (mf) { + case MF_HC4: + return new HC4(dictSize, extraSizeBefore, extraSizeAfter, + niceLen, matchLenMax, depthLimit); + + case MF_BT4: + return new BT4(dictSize, extraSizeBefore, extraSizeAfter, + niceLen, matchLenMax, depthLimit); + } + + throw new IllegalArgumentException(); + } + + /** + * Creates a new LZEncoder. See getInstance. + */ + LZEncoder(int dictSize, int extraSizeBefore, int extraSizeAfter, + int niceLen, int matchLenMax) { + buf = new byte[getBufSize(dictSize, extraSizeBefore, extraSizeAfter, + matchLenMax)]; + + keepSizeBefore = extraSizeBefore + dictSize; + keepSizeAfter = extraSizeAfter + matchLenMax; + + this.matchLenMax = matchLenMax; + this.niceLen = niceLen; + } + + /** + * Sets a preset dictionary. If a preset dictionary is wanted, this + * function must be called immediately after creating the LZEncoder + * before any data has been encoded. + */ + public void setPresetDict(int dictSize, byte[] presetDict) { + assert !isStarted(); + assert writePos == 0; + + if (presetDict != null) { + // If the preset dictionary buffer is bigger than the dictionary + // size, copy only the tail of the preset dictionary. + int copySize = Math.min(presetDict.length, dictSize); + int offset = presetDict.length - copySize; + System.arraycopy(presetDict, offset, buf, 0, copySize); + writePos += copySize; + skip(copySize); + } + } + + /** + * Moves data from the end of the buffer to the beginning, discarding + * old data and making space for new input. + */ + private void moveWindow() { + // Align the move to a multiple of 16 bytes. LZMA2 needs this + // because it uses the lowest bits from readPos to get the + // alignment of the uncompressed data. + int moveOffset = (readPos + 1 - keepSizeBefore) & ~15; + int moveSize = writePos - moveOffset; + System.arraycopy(buf, moveOffset, buf, 0, moveSize); + + readPos -= moveOffset; + readLimit -= moveOffset; + writePos -= moveOffset; + } + + /** + * Copies new data into the LZEncoder's buffer. + */ + public int fillWindow(byte[] in, int off, int len) { + assert !finishing; + + // Move the sliding window if needed. + if (readPos >= buf.length - keepSizeAfter) { + moveWindow(); + } + + // Try to fill the dictionary buffer. If it becomes full, + // some of the input bytes may be left unused. + if (len > buf.length - writePos) { + len = buf.length - writePos; + } + + System.arraycopy(in, off, buf, writePos, len); + writePos += len; + + // Set the new readLimit but only if there's enough data to allow + // encoding of at least one more byte. + if (writePos >= keepSizeAfter) { + readLimit = writePos - keepSizeAfter; + } + + // After flushing or setting a preset dictionary there may be pending + // data that hasn't been ran through the match finder yet. + // + // NOTE: The test for readLimit is only to avoid wasting time + // if we get very little new input and thus readLimit wasn't + // increased above. + if (pendingSize > 0 && readPos < readLimit) { + readPos -= pendingSize; + int oldPendingSize = pendingSize; + pendingSize = 0; + skip(oldPendingSize); + assert pendingSize < oldPendingSize; + } + + // Tell the caller how much input we actually copied into + // the dictionary. + return len; + } + + /** + * Returns true if at least one byte has already been run through + * the match finder. + */ + public boolean isStarted() { + return readPos != -1; + } + + /** + * Marks that all the input needs to be made available in + * the encoded output. + */ + public void setFlushing() { + readLimit = writePos - 1; + } + + /** + * Marks that there is no more input remaining. The read position + * can be advanced until the end of the data. + */ + public void setFinishing() { + readLimit = writePos - 1; + finishing = true; + } + + /** + * Tests if there is enough input available to let the caller encode + * at least one more byte. + */ + public boolean hasEnoughData(int alreadyReadLen) { + return readPos - alreadyReadLen < readLimit; + } + + public void copyUncompressed(OutputStream out, int backward, int len) + throws IOException { + out.write(buf, readPos + 1 - backward, len); + } + + /** + * Get the number of bytes available, including the current byte. + * Note that the result is undefined if getMatches or + * skip hasn't been called yet and no preset dictionary + * is being used. + */ + public int getAvail() { + assert isStarted(); + return writePos - readPos; + } + + /** + * Gets the lowest four bits of the absolute offset of the current byte. + * Bits other than the lowest four are undefined. + */ + public int getPos() { + return readPos; + } + + /** + * Gets the byte from the given backward offset. + * The current byte is at 0, the previous byte + * at 1 etc. To get a byte at zero-based distance, + * use getByte(dist + 1). + * This function is equivalent to getByte(0, backward). + */ + public int getByte(int backward) { + return buf[readPos - backward] & 0xFF; + } + + /** + * Gets the byte from the given forward minus backward offset. + * The forward offset is added to the current position. This lets + * one read bytes ahead of the current byte. + */ + public int getByte(int forward, int backward) { + return buf[readPos + forward - backward] & 0xFF; + } + + /** + * Get the length of a match at the given distance. + * + * @param dist zero-based distance of the match to test + * @param lenLimit don't test for a match longer than this + * @return length of the match; it is in the range [0, lenLimit] + */ + public int getMatchLen(int dist, int lenLimit) { + int backPos = readPos - dist - 1; + int len = 0; + + while (len < lenLimit && buf[readPos + len] == buf[backPos + len]) { + ++len; + } + + return len; + } + + /** + * Get the length of a match at the given distance and forward offset. + * + * @param forward forward offset + * @param dist zero-based distance of the match to test + * @param lenLimit don't test for a match longer than this + * @return length of the match; it is in the range [0, lenLimit] + */ + public int getMatchLen(int forward, int dist, int lenLimit) { + int curPos = readPos + forward; + int backPos = curPos - dist - 1; + int len = 0; + + while (len < lenLimit && buf[curPos + len] == buf[backPos + len]) { + ++len; + } + + return len; + } + + /** + * Verifies that the matches returned by the match finder are valid. + * This is meant to be used in an assert statement. This is totally + * useless for actual encoding since match finder's results should + * naturally always be valid if it isn't broken. + * + * @param matches return value from getMatches + * @return true if matches are valid, false if match finder is broken + */ + public boolean verifyMatches(Matches matches) { + int lenLimit = Math.min(getAvail(), matchLenMax); + + for (int i = 0; i < matches.count; ++i) { + if (getMatchLen(matches.dist[i], lenLimit) != matches.len[i]) { + return false; + } + } + + return true; + } + + /** + * Moves to the next byte, checks if there is enough input available, + * and returns the amount of input available. + * + * @param requiredForFlushing minimum number of available bytes when + * flushing; encoding may be continued with + * new input after flushing + * @param requiredForFinishing minimum number of available bytes when + * finishing; encoding must not be continued + * after finishing or the match finder state + * may be corrupt + * @return the number of bytes available or zero if there + * is not enough input available + */ + int movePos(int requiredForFlushing, int requiredForFinishing) { + assert requiredForFlushing >= requiredForFinishing; + + ++readPos; + int avail = writePos - readPos; + + if (avail < requiredForFlushing) { + if (avail < requiredForFinishing || !finishing) { + ++pendingSize; + avail = 0; + } + } + + return avail; + } + + /** + * Runs match finder for the next byte and returns the matches found. + */ + public abstract Matches getMatches(); + + /** + * Skips the given number of bytes in the match finder. + */ + public abstract void skip(int len); +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lz/Matches.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lz/Matches.java new file mode 100644 index 0000000..49dae6e --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lz/Matches.java @@ -0,0 +1,13 @@ + +package org.xbib.io.compress.xz.lz; + +public final class Matches { + public final int[] len; + public final int[] dist; + public int count = 0; + + Matches(int countMax) { + len = new int[countMax]; + dist = new int[countMax]; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lzma/LZMACoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lzma/LZMACoder.java new file mode 100644 index 0000000..d0fc0f8 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lzma/LZMACoder.java @@ -0,0 +1,137 @@ + +package org.xbib.io.compress.xz.lzma; + +import org.xbib.io.compress.xz.rangecoder.RangeCoder; + +abstract class LZMACoder { + static final int POS_STATES_MAX = 1 << 4; + + static final int MATCH_LEN_MIN = 2; + static final int MATCH_LEN_MAX = MATCH_LEN_MIN + LengthCoder.LOW_SYMBOLS + + LengthCoder.MID_SYMBOLS + + LengthCoder.HIGH_SYMBOLS - 1; + + static final int DIST_STATES = 4; + static final int DIST_SLOTS = 1 << 6; + static final int DIST_MODEL_START = 4; + static final int DIST_MODEL_END = 14; + static final int FULL_DISTANCES = 1 << (DIST_MODEL_END / 2); + + static final int ALIGN_BITS = 4; + static final int ALIGN_SIZE = 1 << ALIGN_BITS; + static final int ALIGN_MASK = ALIGN_SIZE - 1; + + static final int REPS = 4; + + final int posMask; + + final int[] reps = new int[REPS]; + final State state = new State(); + + final short[][] isMatch = new short[State.STATES][POS_STATES_MAX]; + final short[] isRep = new short[State.STATES]; + final short[] isRep0 = new short[State.STATES]; + final short[] isRep1 = new short[State.STATES]; + final short[] isRep2 = new short[State.STATES]; + final short[][] isRep0Long = new short[State.STATES][POS_STATES_MAX]; + final short[][] distSlots = new short[DIST_STATES][DIST_SLOTS]; + final short[][] distSpecial = {new short[2], new short[2], + new short[4], new short[4], + new short[8], new short[8], + new short[16], new short[16], + new short[32], new short[32]}; + final short[] distAlign = new short[ALIGN_SIZE]; + + static final int getDistState(int len) { + return len < DIST_STATES + MATCH_LEN_MIN + ? len - MATCH_LEN_MIN + : DIST_STATES - 1; + } + + LZMACoder(int pb) { + posMask = (1 << pb) - 1; + } + + void reset() { + reps[0] = 0; + reps[1] = 0; + reps[2] = 0; + reps[3] = 0; + state.reset(); + + for (int i = 0; i < isMatch.length; ++i) { + RangeCoder.initProbs(isMatch[i]); + } + + RangeCoder.initProbs(isRep); + RangeCoder.initProbs(isRep0); + RangeCoder.initProbs(isRep1); + RangeCoder.initProbs(isRep2); + + for (int i = 0; i < isRep0Long.length; ++i) { + RangeCoder.initProbs(isRep0Long[i]); + } + + for (int i = 0; i < distSlots.length; ++i) { + RangeCoder.initProbs(distSlots[i]); + } + + for (int i = 0; i < distSpecial.length; ++i) { + RangeCoder.initProbs(distSpecial[i]); + } + + RangeCoder.initProbs(distAlign); + } + + + abstract class LiteralCoder { + private final int lc; + private final int literalPosMask; + + LiteralCoder(int lc, int lp) { + this.lc = lc; + this.literalPosMask = (1 << lp) - 1; + } + + final int getSubcoderIndex(int prevByte, int pos) { + int low = prevByte >> (8 - lc); + int high = (pos & literalPosMask) << lc; + return low + high; + } + + + abstract class LiteralSubcoder { + final short[] probs = new short[0x300]; + + void reset() { + RangeCoder.initProbs(probs); + } + } + } + + + abstract class LengthCoder { + static final int LOW_SYMBOLS = 1 << 3; + static final int MID_SYMBOLS = 1 << 3; + static final int HIGH_SYMBOLS = 1 << 8; + + final short[] choice = new short[2]; + final short[][] low = new short[POS_STATES_MAX][LOW_SYMBOLS]; + final short[][] mid = new short[POS_STATES_MAX][MID_SYMBOLS]; + final short[] high = new short[HIGH_SYMBOLS]; + + void reset() { + RangeCoder.initProbs(choice); + + for (int i = 0; i < low.length; ++i) { + RangeCoder.initProbs(low[i]); + } + + for (int i = 0; i < low.length; ++i) { + RangeCoder.initProbs(mid[i]); + } + + RangeCoder.initProbs(high); + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lzma/LZMADecoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lzma/LZMADecoder.java new file mode 100644 index 0000000..e135d83 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lzma/LZMADecoder.java @@ -0,0 +1,186 @@ + +package org.xbib.io.compress.xz.lzma; + +import org.xbib.io.compress.xz.CorruptedInputException; +import org.xbib.io.compress.xz.lz.LZDecoder; +import org.xbib.io.compress.xz.rangecoder.RangeDecoder; + +import java.io.IOException; + +public final class LZMADecoder extends LZMACoder { + private final LZDecoder lz; + private final RangeDecoder rc; + private final LiteralDecoder literalDecoder; + private final LengthDecoder matchLenDecoder = new LengthDecoder(); + private final LengthDecoder repLenDecoder = new LengthDecoder(); + + public LZMADecoder(LZDecoder lz, RangeDecoder rc, int lc, int lp, int pb) { + super(pb); + this.lz = lz; + this.rc = rc; + this.literalDecoder = new LiteralDecoder(lc, lp); + reset(); + } + + public void reset() { + super.reset(); + literalDecoder.reset(); + matchLenDecoder.reset(); + repLenDecoder.reset(); + } + + public void decode() throws IOException { + lz.repeatPending(); + + while (lz.hasSpace()) { + int posState = lz.getPos() & posMask; + + if (rc.decodeBit(isMatch[state.get()], posState) == 0) { + literalDecoder.decode(); + } else { + int len = rc.decodeBit(isRep, state.get()) == 0 + ? decodeMatch(posState) + : decodeRepMatch(posState); + lz.repeat(reps[0], len); + } + } + + rc.normalize(); + + if (!rc.isInBufferOK()) { + throw new CorruptedInputException(); + } + } + + private int decodeMatch(int posState) throws IOException { + state.updateMatch(); + + reps[3] = reps[2]; + reps[2] = reps[1]; + reps[1] = reps[0]; + + int len = matchLenDecoder.decode(posState); + int distSlot = rc.decodeBitTree(distSlots[getDistState(len)]); + + if (distSlot < DIST_MODEL_START) { + reps[0] = distSlot; + } else { + int limit = (distSlot >> 1) - 1; + reps[0] = (2 | (distSlot & 1)) << limit; + + if (distSlot < DIST_MODEL_END) { + reps[0] |= rc.decodeReverseBitTree( + distSpecial[distSlot - DIST_MODEL_START]); + } else { + reps[0] |= rc.decodeDirectBits(limit - ALIGN_BITS) + << ALIGN_BITS; + reps[0] |= rc.decodeReverseBitTree(distAlign); + } + } + + return len; + } + + private int decodeRepMatch(int posState) throws IOException { + if (rc.decodeBit(isRep0, state.get()) == 0) { + if (rc.decodeBit(isRep0Long[state.get()], posState) == 0) { + state.updateShortRep(); + return 1; + } + } else { + int tmp; + + if (rc.decodeBit(isRep1, state.get()) == 0) { + tmp = reps[1]; + } else { + if (rc.decodeBit(isRep2, state.get()) == 0) { + tmp = reps[2]; + } else { + tmp = reps[3]; + reps[3] = reps[2]; + } + + reps[2] = reps[1]; + } + + reps[1] = reps[0]; + reps[0] = tmp; + } + + state.updateLongRep(); + + return repLenDecoder.decode(posState); + } + + + private class LiteralDecoder extends LiteralCoder { + LiteralSubdecoder[] subdecoders; + + LiteralDecoder(int lc, int lp) { + super(lc, lp); + + subdecoders = new LiteralSubdecoder[1 << (lc + lp)]; + for (int i = 0; i < subdecoders.length; ++i) { + subdecoders[i] = new LiteralSubdecoder(); + } + } + + void reset() { + for (int i = 0; i < subdecoders.length; ++i) { + subdecoders[i].reset(); + } + } + + void decode() throws IOException { + int i = getSubcoderIndex(lz.getByte(0), lz.getPos()); + subdecoders[i].decode(); + } + + + private class LiteralSubdecoder extends LiteralSubcoder { + void decode() throws IOException { + int symbol = 1; + + if (state.isLiteral()) { + do { + symbol = (symbol << 1) | rc.decodeBit(probs, symbol); + } while (symbol < 0x100); + + } else { + int matchByte = lz.getByte(reps[0]); + int offset = 0x100; + int matchBit; + int bit; + + do { + matchByte <<= 1; + matchBit = matchByte & offset; + bit = rc.decodeBit(probs, offset + matchBit + symbol); + symbol = (symbol << 1) | bit; + offset &= (0 - bit) ^ ~matchBit; + } while (symbol < 0x100); + } + + lz.putByte((byte) symbol); + state.updateLiteral(); + } + } + } + + + private class LengthDecoder extends LengthCoder { + int decode(int posState) throws IOException { + if (rc.decodeBit(choice, 0) == 0) { + return rc.decodeBitTree(low[posState]) + MATCH_LEN_MIN; + } + + if (rc.decodeBit(choice, 1) == 0) { + return rc.decodeBitTree(mid[posState]) + + MATCH_LEN_MIN + LOW_SYMBOLS; + } + + return rc.decodeBitTree(high) + + MATCH_LEN_MIN + LOW_SYMBOLS + MID_SYMBOLS; + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lzma/LZMAEncoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lzma/LZMAEncoder.java new file mode 100644 index 0000000..9885871 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lzma/LZMAEncoder.java @@ -0,0 +1,723 @@ + +package org.xbib.io.compress.xz.lzma; + +import org.xbib.io.compress.xz.lz.LZEncoder; +import org.xbib.io.compress.xz.lz.Matches; +import org.xbib.io.compress.xz.rangecoder.RangeEncoder; + +public abstract class LZMAEncoder extends LZMACoder { + public static final int MODE_FAST = 1; + public static final int MODE_NORMAL = 2; + + /** + * LZMA2 chunk is considered full when its uncompressed size exceeds + * LZMA2_UNCOMPRESSED_LIMIT. + *

+ * A compressed LZMA2 chunk can hold 2 MiB of uncompressed data. + * A single LZMA symbol may indicate up to MATCH_LEN_MAX bytes + * of data, so the LZMA2 chunk is considered full when there is + * less space than MATCH_LEN_MAX bytes. + */ + private static final int LZMA2_UNCOMPRESSED_LIMIT + = (2 << 20) - MATCH_LEN_MAX; + + /** + * LZMA2 chunk is considered full when its compressed size exceeds + * LZMA2_COMPRESSED_LIMIT. + *

+ * The maximum compressed size of a LZMA2 chunk is 64 KiB. + * A single LZMA symbol might use 20 bytes of space even though + * it usually takes just one byte or so. Two more bytes are needed + * for LZMA2 uncompressed chunks (see LZMA2OutputStream.writeChunk). + * Leave a little safety margin and use 26 bytes. + */ + private static final int LZMA2_COMPRESSED_LIMIT = (64 << 10) - 26; + + private static final int DIST_PRICE_UPDATE_INTERVAL = FULL_DISTANCES; + private static final int ALIGN_PRICE_UPDATE_INTERVAL = ALIGN_SIZE; + + private final RangeEncoder rc; + final LZEncoder lz; + final LiteralEncoder literalEncoder; + final LengthEncoder matchLenEncoder; + final LengthEncoder repLenEncoder; + final int niceLen; + + private int distPriceCount = 0; + private int alignPriceCount = 0; + + private final int distSlotPricesSize; + private final int[][] distSlotPrices; + private final int[][] fullDistPrices + = new int[DIST_STATES][FULL_DISTANCES]; + private final int[] alignPrices = new int[ALIGN_SIZE]; + + int back = 0; + int readAhead = -1; + private int uncompressedSize = 0; + + public static int getMemoryUsage(int mode, int dictSize, + int extraSizeBefore, int mf) { + int m = 80; + + switch (mode) { + case MODE_FAST: + m += LZMAEncoderFast.getMemoryUsage( + dictSize, extraSizeBefore, mf); + break; + + case MODE_NORMAL: + m += LZMAEncoderNormal.getMemoryUsage( + dictSize, extraSizeBefore, mf); + break; + + default: + throw new IllegalArgumentException(); + } + + return m; + } + + public static LZMAEncoder getInstance( + RangeEncoder rc, int lc, int lp, int pb, int mode, + int dictSize, int extraSizeBefore, + int niceLen, int mf, int depthLimit) { + switch (mode) { + case MODE_FAST: + return new LZMAEncoderFast(rc, lc, lp, pb, + dictSize, extraSizeBefore, + niceLen, mf, depthLimit); + + case MODE_NORMAL: + return new LZMAEncoderNormal(rc, lc, lp, pb, + dictSize, extraSizeBefore, + niceLen, mf, depthLimit); + } + + throw new IllegalArgumentException(); + } + + /** + * Gets an integer [0, 63] matching the highest two bits of an integer. + * This is like bit scan reverse (BSR) on x86 except that this also + * cares about the second highest bit. + */ + public static int getDistSlot(int dist) { + if (dist <= DIST_MODEL_START) { + return dist; + } + + int n = dist; + int i = 31; + + if ((n & 0xFFFF0000) == 0) { + n <<= 16; + i = 15; + } + + if ((n & 0xFF000000) == 0) { + n <<= 8; + i -= 8; + } + + if ((n & 0xF0000000) == 0) { + n <<= 4; + i -= 4; + } + + if ((n & 0xC0000000) == 0) { + n <<= 2; + i -= 2; + } + + if ((n & 0x80000000) == 0) { + --i; + } + + return (i << 1) + ((dist >>> (i - 1)) & 1); + } + + /** + * Gets the next LZMA symbol. + *

+ * There are three types of symbols: literal (a single byte), + * repeated match, and normal match. The symbol is indicated + * by the return value and by the variable back. + *

+ * Literal: back == -1 and return value is 1. + * The literal itself needs to be read from lz separately. + *

+ * Repeated match: back is in the range [0, 3] and + * the return value is the length of the repeated match. + *

+ * Normal match: back - REPS (back - 4) + * is the distance of the match and the return value is the length + * of the match. + */ + abstract int getNextSymbol(); + + LZMAEncoder(RangeEncoder rc, LZEncoder lz, + int lc, int lp, int pb, int dictSize, int niceLen) { + super(pb); + this.rc = rc; + this.lz = lz; + this.niceLen = niceLen; + + literalEncoder = new LiteralEncoder(lc, lp); + matchLenEncoder = new LengthEncoder(pb, niceLen); + repLenEncoder = new LengthEncoder(pb, niceLen); + + distSlotPricesSize = getDistSlot(dictSize - 1) + 1; + distSlotPrices = new int[DIST_STATES][distSlotPricesSize]; + + reset(); + } + + public LZEncoder getLZEncoder() { + return lz; + } + + public void reset() { + super.reset(); + literalEncoder.reset(); + matchLenEncoder.reset(); + repLenEncoder.reset(); + distPriceCount = 0; + alignPriceCount = 0; + + uncompressedSize += readAhead + 1; + readAhead = -1; + } + + public int getUncompressedSize() { + return uncompressedSize; + } + + public void resetUncompressedSize() { + uncompressedSize = 0; + } + + /** + * Compresses for LZMA2. + * + * @return true if the LZMA2 chunk became full, false otherwise + */ + public boolean encodeForLZMA2() { + if (!lz.isStarted() && !encodeInit()) { + return false; + } + + while (uncompressedSize <= LZMA2_UNCOMPRESSED_LIMIT + && rc.getPendingSize() <= LZMA2_COMPRESSED_LIMIT) { + if (!encodeSymbol()) { + return false; + } + } + + return true; + } + + private boolean encodeInit() { + assert readAhead == -1; + if (!lz.hasEnoughData(0)) { + return false; + } + + // The first symbol must be a literal unless using + // a preset dictionary. This code isn't run if using + // a preset dictionary. + skip(1); + rc.encodeBit(isMatch[state.get()], 0, 0); + literalEncoder.encodeInit(); + + --readAhead; + assert readAhead == -1; + + ++uncompressedSize; + assert uncompressedSize == 1; + + return true; + } + + private boolean encodeSymbol() { + if (!lz.hasEnoughData(readAhead + 1)) { + return false; + } + + int len = getNextSymbol(); + + assert readAhead >= 0; + int posState = (lz.getPos() - readAhead) & posMask; + + if (back == -1) { + // Literal i.e. eight-bit byte + assert len == 1; + rc.encodeBit(isMatch[state.get()], posState, 0); + literalEncoder.encode(); + } else { + // Some type of match + rc.encodeBit(isMatch[state.get()], posState, 1); + if (back < REPS) { + // Repeated match i.e. the same distance + // has been used earlier. + assert lz.getMatchLen(-readAhead, reps[back], len) == len; + rc.encodeBit(isRep, state.get(), 1); + encodeRepMatch(back, len, posState); + } else { + // Normal match + assert lz.getMatchLen(-readAhead, back - REPS, len) == len; + rc.encodeBit(isRep, state.get(), 0); + encodeMatch(back - REPS, len, posState); + } + } + + readAhead -= len; + uncompressedSize += len; + + return true; + } + + private void encodeMatch(int dist, int len, int posState) { + state.updateMatch(); + matchLenEncoder.encode(len, posState); + + int distSlot = getDistSlot(dist); + rc.encodeBitTree(distSlots[getDistState(len)], distSlot); + + if (distSlot >= DIST_MODEL_START) { + int footerBits = (distSlot >>> 1) - 1; + int base = (2 | (distSlot & 1)) << footerBits; + int distReduced = dist - base; + + if (distSlot < DIST_MODEL_END) { + rc.encodeReverseBitTree( + distSpecial[distSlot - DIST_MODEL_START], + distReduced); + } else { + rc.encodeDirectBits(distReduced >>> ALIGN_BITS, + footerBits - ALIGN_BITS); + rc.encodeReverseBitTree(distAlign, distReduced & ALIGN_MASK); + --alignPriceCount; + } + } + + reps[3] = reps[2]; + reps[2] = reps[1]; + reps[1] = reps[0]; + reps[0] = dist; + + --distPriceCount; + } + + private void encodeRepMatch(int rep, int len, int posState) { + if (rep == 0) { + rc.encodeBit(isRep0, state.get(), 0); + rc.encodeBit(isRep0Long[state.get()], posState, len == 1 ? 0 : 1); + } else { + int dist = reps[rep]; + rc.encodeBit(isRep0, state.get(), 1); + + if (rep == 1) { + rc.encodeBit(isRep1, state.get(), 0); + } else { + rc.encodeBit(isRep1, state.get(), 1); + rc.encodeBit(isRep2, state.get(), rep - 2); + + if (rep == 3) { + reps[3] = reps[2]; + } + + reps[2] = reps[1]; + } + + reps[1] = reps[0]; + reps[0] = dist; + } + + if (len == 1) { + state.updateShortRep(); + } else { + repLenEncoder.encode(len, posState); + state.updateLongRep(); + } + } + + Matches getMatches() { + ++readAhead; + Matches matches = lz.getMatches(); + assert lz.verifyMatches(matches); + return matches; + } + + void skip(int len) { + readAhead += len; + lz.skip(len); + } + + int getAnyMatchPrice(State state, int posState) { + return RangeEncoder.getBitPrice(isMatch[state.get()][posState], 1); + } + + int getNormalMatchPrice(int anyMatchPrice, State state) { + return anyMatchPrice + + RangeEncoder.getBitPrice(isRep[state.get()], 0); + } + + int getAnyRepPrice(int anyMatchPrice, State state) { + return anyMatchPrice + + RangeEncoder.getBitPrice(isRep[state.get()], 1); + } + + int getShortRepPrice(int anyRepPrice, State state, int posState) { + return anyRepPrice + + RangeEncoder.getBitPrice(isRep0[state.get()], 0) + + RangeEncoder.getBitPrice(isRep0Long[state.get()][posState], + 0); + } + + int getLongRepPrice(int anyRepPrice, int rep, State state, int posState) { + int price = anyRepPrice; + + if (rep == 0) { + price += RangeEncoder.getBitPrice(isRep0[state.get()], 0) + + RangeEncoder.getBitPrice( + isRep0Long[state.get()][posState], 1); + } else { + price += RangeEncoder.getBitPrice(isRep0[state.get()], 1); + + if (rep == 1) { + price += RangeEncoder.getBitPrice(isRep1[state.get()], 0); + } else { + price += RangeEncoder.getBitPrice(isRep1[state.get()], 1) + + RangeEncoder.getBitPrice(isRep2[state.get()], + rep - 2); + } + } + + return price; + } + + int getLongRepAndLenPrice(int rep, int len, State state, int posState) { + int anyMatchPrice = getAnyMatchPrice(state, posState); + int anyRepPrice = getAnyRepPrice(anyMatchPrice, state); + int longRepPrice = getLongRepPrice(anyRepPrice, rep, state, posState); + return longRepPrice + repLenEncoder.getPrice(len, posState); + } + + int getMatchAndLenPrice(int normalMatchPrice, + int dist, int len, int posState) { + int price = normalMatchPrice + + matchLenEncoder.getPrice(len, posState); + int distState = getDistState(len); + + if (dist < FULL_DISTANCES) { + price += fullDistPrices[distState][dist]; + } else { + // Note that distSlotPrices includes also + // the price of direct bits. + int distSlot = getDistSlot(dist); + price += distSlotPrices[distState][distSlot] + + alignPrices[dist & ALIGN_MASK]; + } + + return price; + } + + private void updateDistPrices() { + distPriceCount = DIST_PRICE_UPDATE_INTERVAL; + + for (int distState = 0; distState < DIST_STATES; ++distState) { + for (int distSlot = 0; distSlot < distSlotPricesSize; ++distSlot) { + distSlotPrices[distState][distSlot] + = RangeEncoder.getBitTreePrice( + distSlots[distState], distSlot); + } + + for (int distSlot = DIST_MODEL_END; distSlot < distSlotPricesSize; + ++distSlot) { + int count = (distSlot >>> 1) - 1 - ALIGN_BITS; + distSlotPrices[distState][distSlot] + += RangeEncoder.getDirectBitsPrice(count); + } + + for (int dist = 0; dist < DIST_MODEL_START; ++dist) { + fullDistPrices[distState][dist] + = distSlotPrices[distState][dist]; + } + } + + int dist = DIST_MODEL_START; + for (int distSlot = DIST_MODEL_START; distSlot < DIST_MODEL_END; + ++distSlot) { + int footerBits = (distSlot >>> 1) - 1; + int base = (2 | (distSlot & 1)) << footerBits; + + int limit = distSpecial[distSlot - DIST_MODEL_START].length; + for (int i = 0; i < limit; ++i) { + int distReduced = dist - base; + int price = RangeEncoder.getReverseBitTreePrice( + distSpecial[distSlot - DIST_MODEL_START], + distReduced); + + for (int distState = 0; distState < DIST_STATES; ++distState) { + fullDistPrices[distState][dist] + = distSlotPrices[distState][distSlot] + price; + } + + ++dist; + } + } + + assert dist == FULL_DISTANCES; + } + + private void updateAlignPrices() { + alignPriceCount = ALIGN_PRICE_UPDATE_INTERVAL; + + for (int i = 0; i < ALIGN_SIZE; ++i) { + alignPrices[i] = RangeEncoder.getReverseBitTreePrice(distAlign, + i); + } + } + + /** + * Updates the lookup tables used for calculating match distance + * and length prices. The updating is skipped for performance reasons + * if the tables haven't changed much since the previous update. + */ + void updatePrices() { + if (distPriceCount <= 0) { + updateDistPrices(); + } + + if (alignPriceCount <= 0) { + updateAlignPrices(); + } + + matchLenEncoder.updatePrices(); + repLenEncoder.updatePrices(); + } + + + class LiteralEncoder extends LiteralCoder { + LiteralSubencoder[] subencoders; + + LiteralEncoder(int lc, int lp) { + super(lc, lp); + + subencoders = new LiteralSubencoder[1 << (lc + lp)]; + for (int i = 0; i < subencoders.length; ++i) { + subencoders[i] = new LiteralSubencoder(); + } + } + + void reset() { + for (int i = 0; i < subencoders.length; ++i) { + subencoders[i].reset(); + } + } + + void encodeInit() { + // When encoding the first byte of the stream, there is + // no previous byte in the dictionary so the encode function + // wouldn't work. + assert readAhead >= 0; + subencoders[0].encode(); + } + + void encode() { + assert readAhead >= 0; + int i = getSubcoderIndex(lz.getByte(1 + readAhead), + lz.getPos() - readAhead); + subencoders[i].encode(); + } + + int getPrice(int curByte, int matchByte, + int prevByte, int pos, State state) { + int price = RangeEncoder.getBitPrice( + isMatch[state.get()][pos & posMask], 0); + + int i = getSubcoderIndex(prevByte, pos); + price += state.isLiteral() + ? subencoders[i].getNormalPrice(curByte) + : subencoders[i].getMatchedPrice(curByte, matchByte); + + return price; + } + + private class LiteralSubencoder extends LiteralSubcoder { + void encode() { + int symbol = lz.getByte(readAhead) | 0x100; + + if (state.isLiteral()) { + int subencoderIndex; + int bit; + + do { + subencoderIndex = symbol >>> 8; + bit = (symbol >>> 7) & 1; + rc.encodeBit(probs, subencoderIndex, bit); + symbol <<= 1; + } while (symbol < 0x10000); + + } else { + int matchByte = lz.getByte(reps[0] + 1 + readAhead); + int offset = 0x100; + int subencoderIndex; + int matchBit; + int bit; + + do { + matchByte <<= 1; + matchBit = matchByte & offset; + subencoderIndex = offset + matchBit + (symbol >>> 8); + bit = (symbol >>> 7) & 1; + rc.encodeBit(probs, subencoderIndex, bit); + symbol <<= 1; + offset &= ~(matchByte ^ symbol); + } while (symbol < 0x10000); + } + + state.updateLiteral(); + } + + int getNormalPrice(int symbol) { + int price = 0; + int subencoderIndex; + int bit; + + symbol |= 0x100; + + do { + subencoderIndex = symbol >>> 8; + bit = (symbol >>> 7) & 1; + price += RangeEncoder.getBitPrice(probs[subencoderIndex], + bit); + symbol <<= 1; + } while (symbol < (0x100 << 8)); + + return price; + } + + int getMatchedPrice(int symbol, int matchByte) { + int price = 0; + int offset = 0x100; + int subencoderIndex; + int matchBit; + int bit; + + symbol |= 0x100; + + do { + matchByte <<= 1; + matchBit = matchByte & offset; + subencoderIndex = offset + matchBit + (symbol >>> 8); + bit = (symbol >>> 7) & 1; + price += RangeEncoder.getBitPrice(probs[subencoderIndex], + bit); + symbol <<= 1; + offset &= ~(matchByte ^ symbol); + } while (symbol < (0x100 << 8)); + + return price; + } + } + } + + + class LengthEncoder extends LengthCoder { + /** + * The prices are updated after at least + * PRICE_UPDATE_INTERVAL many lengths + * have been encoded with the same posState. + */ + private static final int PRICE_UPDATE_INTERVAL = 32; // FIXME? + + private final int[] counters; + private final int[][] prices; + + LengthEncoder(int pb, int niceLen) { + int posStates = 1 << pb; + counters = new int[posStates]; + + // Always allocate at least LOW_SYMBOLS + MID_SYMBOLS because + // it makes updatePrices slightly simpler. The prices aren't + // usually needed anyway if niceLen < 18. + int lenSymbols = Math.max(niceLen - MATCH_LEN_MIN + 1, + LOW_SYMBOLS + MID_SYMBOLS); + prices = new int[posStates][lenSymbols]; + } + + void reset() { + super.reset(); + + // Reset counters to zero to force price update before + // the prices are needed. + for (int i = 0; i < counters.length; ++i) { + counters[i] = 0; + } + } + + void encode(int len, int posState) { + len -= MATCH_LEN_MIN; + + if (len < LOW_SYMBOLS) { + rc.encodeBit(choice, 0, 0); + rc.encodeBitTree(low[posState], len); + } else { + rc.encodeBit(choice, 0, 1); + len -= LOW_SYMBOLS; + + if (len < MID_SYMBOLS) { + rc.encodeBit(choice, 1, 0); + rc.encodeBitTree(mid[posState], len); + } else { + rc.encodeBit(choice, 1, 1); + rc.encodeBitTree(high, len - MID_SYMBOLS); + } + } + + --counters[posState]; + } + + int getPrice(int len, int posState) { + return prices[posState][len - MATCH_LEN_MIN]; + } + + void updatePrices() { + for (int posState = 0; posState < counters.length; ++posState) { + if (counters[posState] <= 0) { + counters[posState] = PRICE_UPDATE_INTERVAL; + updatePrices(posState); + } + } + } + + private void updatePrices(int posState) { + int choice0Price = RangeEncoder.getBitPrice(choice[0], 0); + + int i = 0; + for (; i < LOW_SYMBOLS; ++i) { + prices[posState][i] = choice0Price + + RangeEncoder.getBitTreePrice(low[posState], i); + } + + choice0Price = RangeEncoder.getBitPrice(choice[0], 1); + int choice1Price = RangeEncoder.getBitPrice(choice[1], 0); + + for (; i < LOW_SYMBOLS + MID_SYMBOLS; ++i) { + prices[posState][i] = choice0Price + choice1Price + + RangeEncoder.getBitTreePrice(mid[posState], + i - LOW_SYMBOLS); + } + + choice1Price = RangeEncoder.getBitPrice(choice[1], 1); + + for (; i < prices[posState].length; ++i) { + prices[posState][i] = choice0Price + choice1Price + + RangeEncoder.getBitTreePrice(high, i - LOW_SYMBOLS + - MID_SYMBOLS); + } + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lzma/LZMAEncoderFast.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lzma/LZMAEncoderFast.java new file mode 100644 index 0000000..292e243 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lzma/LZMAEncoderFast.java @@ -0,0 +1,151 @@ + +package org.xbib.io.compress.xz.lzma; + +import org.xbib.io.compress.xz.lz.LZEncoder; +import org.xbib.io.compress.xz.lz.Matches; +import org.xbib.io.compress.xz.rangecoder.RangeEncoder; + +final class LZMAEncoderFast extends LZMAEncoder { + private static int EXTRA_SIZE_BEFORE = 1; + private static int EXTRA_SIZE_AFTER = MATCH_LEN_MAX - 1; + + private Matches matches = null; + + static int getMemoryUsage(int dictSize, int extraSizeBefore, int mf) { + return LZEncoder.getMemoryUsage( + dictSize, Math.max(extraSizeBefore, EXTRA_SIZE_BEFORE), + EXTRA_SIZE_AFTER, MATCH_LEN_MAX, mf); + } + + LZMAEncoderFast(RangeEncoder rc, int lc, int lp, int pb, + int dictSize, int extraSizeBefore, + int niceLen, int mf, int depthLimit) { + super(rc, LZEncoder.getInstance(dictSize, + Math.max(extraSizeBefore, + EXTRA_SIZE_BEFORE), + EXTRA_SIZE_AFTER, + niceLen, MATCH_LEN_MAX, + mf, depthLimit), + lc, lp, pb, dictSize, niceLen); + } + + private boolean changePair(int smallDist, int bigDist) { + return smallDist < (bigDist >>> 7); + } + + int getNextSymbol() { + // Get the matches for the next byte unless readAhead indicates + // that we already got the new matches during the previous call + // to this function. + if (readAhead == -1) { + matches = getMatches(); + } + + back = -1; + + // Get the number of bytes available in the dictionary, but + // not more than the maximum match length. If there aren't + // enough bytes remaining to encode a match at all, return + // immediately to encode this byte as a literal. + int avail = Math.min(lz.getAvail(), MATCH_LEN_MAX); + if (avail < MATCH_LEN_MIN) { + return 1; + } + + // Look for a match from the previous four match distances. + int bestRepLen = 0; + int bestRepIndex = 0; + for (int rep = 0; rep < REPS; ++rep) { + int len = lz.getMatchLen(reps[rep], avail); + if (len < MATCH_LEN_MIN) { + continue; + } + + // If it is long enough, return it. + if (len >= niceLen) { + back = rep; + skip(len - 1); + return len; + } + + // Remember the index and length of the best repeated match. + if (len > bestRepLen) { + bestRepIndex = rep; + bestRepLen = len; + } + } + + int mainLen = 0; + int mainDist = 0; + + if (matches.count > 0) { + mainLen = matches.len[matches.count - 1]; + mainDist = matches.dist[matches.count - 1]; + + if (mainLen >= niceLen) { + back = mainDist + REPS; + skip(mainLen - 1); + return mainLen; + } + + while (matches.count > 1 + && mainLen == matches.len[matches.count - 2] + 1) { + if (!changePair(matches.dist[matches.count - 2], mainDist)) { + break; + } + + --matches.count; + mainLen = matches.len[matches.count - 1]; + mainDist = matches.dist[matches.count - 1]; + } + + if (mainLen == MATCH_LEN_MIN && mainDist >= 0x80) { + mainLen = 1; + } + } + + if (bestRepLen >= MATCH_LEN_MIN) { + if (bestRepLen + 1 >= mainLen + || (bestRepLen + 2 >= mainLen && mainDist >= (1 << 9)) + || (bestRepLen + 3 >= mainLen && mainDist >= (1 << 15))) { + back = bestRepIndex; + skip(bestRepLen - 1); + return bestRepLen; + } + } + + if (mainLen < MATCH_LEN_MIN || avail <= MATCH_LEN_MIN) { + return 1; + } + + // Get the next match. Test if it is better than the current match. + // If so, encode the current byte as a literal. + matches = getMatches(); + + if (matches.count > 0) { + int newLen = matches.len[matches.count - 1]; + int newDist = matches.dist[matches.count - 1]; + + if ((newLen >= mainLen && newDist < mainDist) + || (newLen == mainLen + 1 + && !changePair(mainDist, newDist)) + || newLen > mainLen + 1 + || (newLen + 1 >= mainLen + && mainLen >= MATCH_LEN_MIN + 1 + && changePair(newDist, mainDist))) { + return 1; + } + } + + int limit = Math.max(mainLen - 1, MATCH_LEN_MIN); + for (int rep = 0; rep < REPS; ++rep) { + if (lz.getMatchLen(reps[rep], limit) == limit) { + return 1; + } + } + + back = mainDist + REPS; + skip(mainLen - 2); + return mainLen; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lzma/LZMAEncoderNormal.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lzma/LZMAEncoderNormal.java new file mode 100644 index 0000000..1d4e12b --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lzma/LZMAEncoderNormal.java @@ -0,0 +1,590 @@ + +package org.xbib.io.compress.xz.lzma; + +import org.xbib.io.compress.xz.lz.LZEncoder; +import org.xbib.io.compress.xz.lz.Matches; +import org.xbib.io.compress.xz.rangecoder.RangeEncoder; + +final class LZMAEncoderNormal extends LZMAEncoder { + private static final int OPTS = 4096; + + private static int EXTRA_SIZE_BEFORE = OPTS; + private static int EXTRA_SIZE_AFTER = OPTS; + + private final Optimum[] opts = new Optimum[OPTS]; + private int optCur = 0; + private int optEnd = 0; + + private Matches matches; + + static int getMemoryUsage(int dictSize, int extraSizeBefore, int mf) { + return LZEncoder.getMemoryUsage(dictSize, + Math.max(extraSizeBefore, EXTRA_SIZE_BEFORE), + EXTRA_SIZE_AFTER, MATCH_LEN_MAX, mf) + + OPTS * 64 / 1024; + } + + LZMAEncoderNormal(RangeEncoder rc, int lc, int lp, int pb, + int dictSize, int extraSizeBefore, + int niceLen, int mf, int depthLimit) { + super(rc, LZEncoder.getInstance(dictSize, + Math.max(extraSizeBefore, + EXTRA_SIZE_BEFORE), + EXTRA_SIZE_AFTER, + niceLen, MATCH_LEN_MAX, + mf, depthLimit), + lc, lp, pb, dictSize, niceLen); + + for (int i = 0; i < OPTS; ++i) { + opts[i] = new Optimum(); + } + } + + public void reset() { + optCur = 0; + optEnd = 0; + super.reset(); + } + + /** + * Converts the opts array from backward indexes to forward indexes. + * Then it will be simple to get the next symbol from the array + * in later calls to getNextSymbol(). + */ + private int convertOpts() { + optEnd = optCur; + + int optPrev = opts[optCur].optPrev; + + do { + Optimum opt = opts[optCur]; + + if (opt.prev1IsLiteral) { + opts[optPrev].optPrev = optCur; + opts[optPrev].backPrev = -1; + optCur = optPrev--; + + if (opt.hasPrev2) { + opts[optPrev].optPrev = optPrev + 1; + opts[optPrev].backPrev = opt.backPrev2; + optCur = optPrev; + optPrev = opt.optPrev2; + } + } + + int temp = opts[optPrev].optPrev; + opts[optPrev].optPrev = optCur; + optCur = optPrev; + optPrev = temp; + } while (optCur > 0); + + optCur = opts[0].optPrev; + back = opts[optCur].backPrev; + return optCur; + } + + int getNextSymbol() { + // If there are pending symbols from an earlier call to this + // function, return those symbols first. + if (optCur < optEnd) { + int len = opts[optCur].optPrev - optCur; + optCur = opts[optCur].optPrev; + back = opts[optCur].backPrev; + return len; + } + + assert optCur == optEnd; + optCur = 0; + optEnd = 0; + back = -1; + + if (readAhead == -1) { + matches = getMatches(); + } + + // Get the number of bytes available in the dictionary, but + // not more than the maximum match length. If there aren't + // enough bytes remaining to encode a match at all, return + // immediately to encode this byte as a literal. + int avail = Math.min(lz.getAvail(), MATCH_LEN_MAX); + if (avail < MATCH_LEN_MIN) { + return 1; + } + + // Get the lengths of repeated matches. + int[] repLens = new int[REPS]; + int repBest = 0; + for (int rep = 0; rep < REPS; ++rep) { + repLens[rep] = lz.getMatchLen(reps[rep], avail); + + if (repLens[rep] < MATCH_LEN_MIN) { + repLens[rep] = 0; + continue; + } + + if (repLens[rep] > repLens[repBest]) { + repBest = rep; + } + } + + // Return if the best repeated match is at least niceLen bytes long. + if (repLens[repBest] >= niceLen) { + back = repBest; + skip(repLens[repBest] - 1); + return repLens[repBest]; + } + + // Initialize mainLen and mainDist to the longest match found + // by the match finder. + int mainLen = 0; + int mainDist = 0; + if (matches.count > 0) { + mainLen = matches.len[matches.count - 1]; + mainDist = matches.dist[matches.count - 1]; + + // Return if it is at least niceLen bytes long. + if (mainLen >= niceLen) { + back = mainDist + REPS; + skip(mainLen - 1); + return mainLen; + } + } + + int curByte = lz.getByte(0); + int matchByte = lz.getByte(reps[0] + 1); + + // If the match finder found no matches and this byte cannot be + // encoded as a repeated match (short or long), we must be return + // to have the byte encoded as a literal. + if (mainLen < MATCH_LEN_MIN && curByte != matchByte + && repLens[repBest] < MATCH_LEN_MIN) { + return 1; + } + + + int pos = lz.getPos(); + int posState = pos & posMask; + + // Calculate the price of encoding the current byte as a literal. + { + int prevByte = lz.getByte(1); + int literalPrice = literalEncoder.getPrice(curByte, matchByte, + prevByte, pos, state); + opts[1].set1(literalPrice, 0, -1); + } + + int anyMatchPrice = getAnyMatchPrice(state, posState); + int anyRepPrice = getAnyRepPrice(anyMatchPrice, state); + + // If it is possible to encode this byte as a short rep, see if + // it is cheaper than encoding it as a literal. + if (matchByte == curByte) { + int shortRepPrice = getShortRepPrice(anyRepPrice, + state, posState); + if (shortRepPrice < opts[1].price) { + opts[1].set1(shortRepPrice, 0, 0); + } + } + + // Return if there is neither normal nor long repeated match. Use + // a short match instead of a literal if is is possible and cheaper. + optEnd = Math.max(mainLen, repLens[repBest]); + if (optEnd < MATCH_LEN_MIN) { + assert optEnd == 0 : optEnd; + back = opts[1].backPrev; + return 1; + } + + + // Update the lookup tables for distances and lengths before using + // those price calculation functions. (The price function above + // don't need these tables.) + updatePrices(); + + // Initialize the state and reps of this position in opts[]. + // updateOptStateAndReps() will need these to get the new + // state and reps for the next byte. + opts[0].state.set(state); + System.arraycopy(reps, 0, opts[0].reps, 0, REPS); + + // Initialize the prices for latter opts that will be used below. + for (int i = optEnd; i >= MATCH_LEN_MIN; --i) { + opts[i].reset(); + } + + // Calculate the prices of repeated matches of all lengths. + for (int rep = 0; rep < REPS; ++rep) { + int repLen = repLens[rep]; + if (repLen < MATCH_LEN_MIN) { + continue; + } + + int longRepPrice = getLongRepPrice(anyRepPrice, rep, + state, posState); + do { + int price = longRepPrice + repLenEncoder.getPrice(repLen, + posState); + if (price < opts[repLen].price) { + opts[repLen].set1(price, 0, rep); + } + } while (--repLen >= MATCH_LEN_MIN); + } + + // Calculate the prices of normal matches that are longer than rep0. + { + int len = Math.max(repLens[0] + 1, MATCH_LEN_MIN); + if (len <= mainLen) { + int normalMatchPrice = getNormalMatchPrice(anyMatchPrice, + state); + + // Set i to the index of the shortest match that is + // at least len bytes long. + int i = 0; + while (len > matches.len[i]) { + ++i; + } + + while (true) { + int dist = matches.dist[i]; + int price = getMatchAndLenPrice(normalMatchPrice, + dist, len, posState); + if (price < opts[len].price) { + opts[len].set1(price, 0, dist + REPS); + } + + if (len == matches.len[i]) { + if (++i == matches.count) { + break; + } + } + + ++len; + } + } + } + + + avail = Math.min(lz.getAvail(), OPTS - 1); + + // Get matches for later bytes and optimize the use of LZMA symbols + // by calculating the prices and picking the cheapest symbol + // combinations. + while (++optCur < optEnd) { + matches = getMatches(); + if (matches.count > 0 + && matches.len[matches.count - 1] >= niceLen) { + break; + } + + --avail; + ++pos; + posState = pos & posMask; + + updateOptStateAndReps(); + anyMatchPrice = opts[optCur].price + + getAnyMatchPrice(opts[optCur].state, posState); + anyRepPrice = getAnyRepPrice(anyMatchPrice, opts[optCur].state); + + calc1BytePrices(pos, posState, avail, anyRepPrice); + + if (avail >= MATCH_LEN_MIN) { + int startLen = calcLongRepPrices(pos, posState, + avail, anyRepPrice); + if (matches.count > 0) { + calcNormalMatchPrices(pos, posState, avail, + anyMatchPrice, startLen); + } + } + } + + return convertOpts(); + } + + /** + * Updates the state and reps for the current byte in the opts array. + */ + private void updateOptStateAndReps() { + int optPrev = opts[optCur].optPrev; + assert optPrev < optCur; + + if (opts[optCur].prev1IsLiteral) { + --optPrev; + + if (opts[optCur].hasPrev2) { + opts[optCur].state.set(opts[opts[optCur].optPrev2].state); + if (opts[optCur].backPrev2 < REPS) { + opts[optCur].state.updateLongRep(); + } else { + opts[optCur].state.updateMatch(); + } + } else { + opts[optCur].state.set(opts[optPrev].state); + } + + opts[optCur].state.updateLiteral(); + } else { + opts[optCur].state.set(opts[optPrev].state); + } + + if (optPrev == optCur - 1) { + // Must be either a short rep or a literal. + assert opts[optCur].backPrev == 0 || opts[optCur].backPrev == -1; + + if (opts[optCur].backPrev == 0) { + opts[optCur].state.updateShortRep(); + } else { + opts[optCur].state.updateLiteral(); + } + + System.arraycopy(opts[optPrev].reps, 0, + opts[optCur].reps, 0, REPS); + } else { + int back; + if (opts[optCur].prev1IsLiteral && opts[optCur].hasPrev2) { + optPrev = opts[optCur].optPrev2; + back = opts[optCur].backPrev2; + opts[optCur].state.updateLongRep(); + } else { + back = opts[optCur].backPrev; + if (back < REPS) { + opts[optCur].state.updateLongRep(); + } else { + opts[optCur].state.updateMatch(); + } + } + + if (back < REPS) { + opts[optCur].reps[0] = opts[optPrev].reps[back]; + + int rep; + for (rep = 1; rep <= back; ++rep) { + opts[optCur].reps[rep] = opts[optPrev].reps[rep - 1]; + } + + for (; rep < REPS; ++rep) { + opts[optCur].reps[rep] = opts[optPrev].reps[rep]; + } + } else { + opts[optCur].reps[0] = back - REPS; + System.arraycopy(opts[optPrev].reps, 0, + opts[optCur].reps, 1, REPS - 1); + } + } + } + + /** + * Calculates prices of a literal, a short rep, and literal + rep0. + */ + private void calc1BytePrices(int pos, int posState, + int avail, int anyRepPrice) { + // This will be set to true if using a literal or a short rep. + boolean nextIsByte = false; + + int curByte = lz.getByte(0); + int matchByte = lz.getByte(opts[optCur].reps[0] + 1); + + // Try a literal. + int literalPrice = opts[optCur].price + + literalEncoder.getPrice(curByte, matchByte, lz.getByte(1), + pos, opts[optCur].state); + if (literalPrice < opts[optCur + 1].price) { + opts[optCur + 1].set1(literalPrice, optCur, -1); + nextIsByte = true; + } + + // Try a short rep. + if (matchByte == curByte && (opts[optCur + 1].optPrev == optCur + || opts[optCur + 1].backPrev != 0)) { + int shortRepPrice = getShortRepPrice(anyRepPrice, + opts[optCur].state, + posState); + if (shortRepPrice <= opts[optCur + 1].price) { + opts[optCur + 1].set1(shortRepPrice, optCur, 0); + nextIsByte = true; + } + } + + // If neither a literal nor a short rep was the cheapest choice, + // try literal + long rep0. + if (!nextIsByte && matchByte != curByte && avail > MATCH_LEN_MIN) { + int lenLimit = Math.min(niceLen, avail - 1); + int len = lz.getMatchLen(1, opts[optCur].reps[0], lenLimit); + + if (len >= MATCH_LEN_MIN) { + State nextState = new State(opts[optCur].state); + nextState.updateLiteral(); + int nextPosState = (pos + 1) & posMask; + int price = literalPrice + + getLongRepAndLenPrice(0, len, + nextState, nextPosState); + + int i = optCur + 1 + len; + while (optEnd < i) { + opts[++optEnd].reset(); + } + + if (price < opts[i].price) { + opts[i].set2(price, optCur, 0); + } + } + } + } + + /** + * Calculates prices of long rep and long rep + literal + rep0. + */ + private int calcLongRepPrices(int pos, int posState, + int avail, int anyRepPrice) { + int startLen = MATCH_LEN_MIN; + int lenLimit = Math.min(avail, niceLen); + + for (int rep = 0; rep < REPS; ++rep) { + int len = lz.getMatchLen(opts[optCur].reps[rep], lenLimit); + if (len < MATCH_LEN_MIN) { + continue; + } + + while (optEnd < optCur + len) { + opts[++optEnd].reset(); + } + + int longRepPrice = getLongRepPrice(anyRepPrice, rep, + opts[optCur].state, posState); + + for (int i = len; i >= MATCH_LEN_MIN; --i) { + int price = longRepPrice + + repLenEncoder.getPrice(i, posState); + if (price < opts[optCur + i].price) { + opts[optCur + i].set1(price, optCur, rep); + } + } + + if (rep == 0) { + startLen = len + 1; + } + + int len2Limit = Math.min(niceLen, avail - len - 1); + int len2 = lz.getMatchLen(len + 1, opts[optCur].reps[rep], + len2Limit); + + if (len2 >= MATCH_LEN_MIN) { + // Rep + int price = longRepPrice + + repLenEncoder.getPrice(len, posState); + State nextState = new State(opts[optCur].state); + nextState.updateLongRep(); + + // Literal + int curByte = lz.getByte(len, 0); + int matchByte = lz.getByte(0); // lz.getByte(len, len) + int prevByte = lz.getByte(len, 1); + price += literalEncoder.getPrice(curByte, matchByte, prevByte, + pos + len, nextState); + nextState.updateLiteral(); + + // Rep0 + int nextPosState = (pos + len + 1) & posMask; + price += getLongRepAndLenPrice(0, len2, + nextState, nextPosState); + + int i = optCur + len + 1 + len2; + while (optEnd < i) { + opts[++optEnd].reset(); + } + + if (price < opts[i].price) { + opts[i].set3(price, optCur, rep, len, 0); + } + } + } + + return startLen; + } + + /** + * Calculates prices of a normal match and normal match + literal + rep0. + */ + private void calcNormalMatchPrices(int pos, int posState, int avail, + int anyMatchPrice, int startLen) { + // If the longest match is so long that it would not fit into + // the opts array, shorten the matches. + if (matches.len[matches.count - 1] > avail) { + matches.count = 0; + while (matches.len[matches.count] < avail) { + ++matches.count; + } + + matches.len[matches.count++] = avail; + } + + if (matches.len[matches.count - 1] < startLen) { + return; + } + + while (optEnd < optCur + matches.len[matches.count - 1]) { + opts[++optEnd].reset(); + } + + int normalMatchPrice = getNormalMatchPrice(anyMatchPrice, + opts[optCur].state); + + int match = 0; + while (startLen > matches.len[match]) { + ++match; + } + + for (int len = startLen; ; ++len) { + int dist = matches.dist[match]; + + // Calculate the price of a match of len bytes from the nearest + // possible distance. + int matchAndLenPrice = getMatchAndLenPrice(normalMatchPrice, + dist, len, posState); + if (matchAndLenPrice < opts[optCur + len].price) { + opts[optCur + len].set1(matchAndLenPrice, + optCur, dist + REPS); + } + + if (len != matches.len[match]) { + continue; + } + + // Try match + literal + rep0. First get the length of the rep0. + int len2Limit = Math.min(niceLen, avail - len - 1); + int len2 = lz.getMatchLen(len + 1, dist, len2Limit); + + if (len2 >= MATCH_LEN_MIN) { + State nextState = new State(opts[optCur].state); + nextState.updateMatch(); + + // Literal + int curByte = lz.getByte(len, 0); + int matchByte = lz.getByte(0); // lz.getByte(len, len) + int prevByte = lz.getByte(len, 1); + int price = matchAndLenPrice + + literalEncoder.getPrice(curByte, matchByte, + prevByte, pos + len, + nextState); + nextState.updateLiteral(); + + // Rep0 + int nextPosState = (pos + len + 1) & posMask; + price += getLongRepAndLenPrice(0, len2, + nextState, nextPosState); + + int i = optCur + len + 1 + len2; + while (optEnd < i) { + opts[++optEnd].reset(); + } + + if (price < opts[i].price) { + opts[i].set3(price, optCur, dist + REPS, len, 0); + } + } + + if (++match == matches.count) { + break; + } + } + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lzma/Optimum.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lzma/Optimum.java new file mode 100644 index 0000000..54ba038 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lzma/Optimum.java @@ -0,0 +1,64 @@ + +package org.xbib.io.compress.xz.lzma; + +final class Optimum { + private static final int INFINITY_PRICE = 1 << 30; + + final State state = new State(); + final int[] reps = new int[LZMACoder.REPS]; + + /** + * Cumulative price of arriving to this byte. + */ + int price; + + int optPrev; + int backPrev; + boolean prev1IsLiteral; + + boolean hasPrev2; + int optPrev2; + int backPrev2; + + /** + * Resets the price. + */ + void reset() { + price = INFINITY_PRICE; + } + + /** + * Sets to indicate one LZMA symbol (literal, rep, or match). + */ + void set1(int newPrice, int optCur, int back) { + price = newPrice; + optPrev = optCur; + backPrev = back; + prev1IsLiteral = false; + } + + /** + * Sets to indicate two LZMA symbols of which the first one is a literal. + */ + void set2(int newPrice, int optCur, int back) { + price = newPrice; + optPrev = optCur + 1; + backPrev = back; + prev1IsLiteral = true; + hasPrev2 = false; + } + + /** + * Sets to indicate three LZMA symbols of which the second one + * is a literal. + */ + void set3(int newPrice, int optCur, int back2, int len2, int back) { + price = newPrice; + optPrev = optCur + len2 + 1; + backPrev = back; + prev1IsLiteral = true; + hasPrev2 = true; + optPrev2 = optCur; + backPrev2 = back2; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lzma/State.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lzma/State.java new file mode 100644 index 0000000..d87530f --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/lzma/State.java @@ -0,0 +1,68 @@ + +package org.xbib.io.compress.xz.lzma; + +final class State { + static final int STATES = 12; + + private static final int LIT_STATES = 7; + + private static final int LIT_LIT = 0; + private static final int MATCH_LIT_LIT = 1; + private static final int REP_LIT_LIT = 2; + private static final int SHORTREP_LIT_LIT = 3; + private static final int MATCH_LIT = 4; + private static final int REP_LIT = 5; + private static final int SHORTREP_LIT = 6; + private static final int LIT_MATCH = 7; + private static final int LIT_LONGREP = 8; + private static final int LIT_SHORTREP = 9; + private static final int NONLIT_MATCH = 10; + private static final int NONLIT_REP = 11; + + private int state; + + State() { + } + + State(State other) { + state = other.state; + } + + void reset() { + state = LIT_LIT; + } + + int get() { + return state; + } + + void set(State other) { + state = other.state; + } + + void updateLiteral() { + if (state <= SHORTREP_LIT_LIT) { + state = LIT_LIT; + } else if (state <= LIT_SHORTREP) { + state -= 3; + } else { + state -= 6; + } + } + + void updateMatch() { + state = state < LIT_STATES ? LIT_MATCH : NONLIT_MATCH; + } + + void updateLongRep() { + state = state < LIT_STATES ? LIT_LONGREP : NONLIT_REP; + } + + void updateShortRep() { + state = state < LIT_STATES ? LIT_SHORTREP : NONLIT_REP; + } + + boolean isLiteral() { + return state < LIT_STATES; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/rangecoder/RangeCoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/rangecoder/RangeCoder.java new file mode 100644 index 0000000..4b74e27 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/rangecoder/RangeCoder.java @@ -0,0 +1,17 @@ + +package org.xbib.io.compress.xz.rangecoder; + +import java.util.Arrays; + +public abstract class RangeCoder { + static final int SHIFT_BITS = 8; + static final int TOP_MASK = 0xFF000000; + static final int BIT_MODEL_TOTAL_BITS = 11; + static final int BIT_MODEL_TOTAL = 1 << BIT_MODEL_TOTAL_BITS; + static final short PROB_INIT = (short) (BIT_MODEL_TOTAL / 2); + static final int MOVE_BITS = 5; + + public static final void initProbs(short[] probs) { + Arrays.fill(probs, PROB_INIT); + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/rangecoder/RangeDecoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/rangecoder/RangeDecoder.java new file mode 100644 index 0000000..fb82f10 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/rangecoder/RangeDecoder.java @@ -0,0 +1,123 @@ + +package org.xbib.io.compress.xz.rangecoder; + +import org.xbib.io.compress.xz.CorruptedInputException; + +import java.io.DataInputStream; +import java.io.IOException; + +public final class RangeDecoder extends RangeCoder { + private static final int INIT_SIZE = 5; + + private final byte[] buf; + private int pos = 0; + private int end = 0; + + private int range = 0; + private int code = 0; + + public RangeDecoder(int inputSizeMax) { + buf = new byte[inputSizeMax - INIT_SIZE]; + } + + public void prepareInputBuffer(DataInputStream in, int len) + throws IOException { + if (len < INIT_SIZE) { + throw new CorruptedInputException(); + } + + if (in.readUnsignedByte() != 0x00) { + throw new CorruptedInputException(); + } + + code = in.readInt(); + range = 0xFFFFFFFF; + + pos = 0; + end = len - INIT_SIZE; + in.readFully(buf, 0, end); + } + + public boolean isInBufferOK() { + return pos <= end; + } + + public boolean isFinished() { + return pos == end && code == 0; + } + + public void normalize() throws IOException { + if ((range & TOP_MASK) == 0) { + try { + // If the input is corrupt, this might throw + // ArrayIndexOutOfBoundsException. + code = (code << SHIFT_BITS) | (buf[pos++] & 0xFF); + range <<= SHIFT_BITS; + } catch (ArrayIndexOutOfBoundsException e) { + throw new CorruptedInputException(); + } + } + } + + public int decodeBit(short[] probs, int index) throws IOException { + normalize(); + + int prob = probs[index]; + int bound = (range >>> BIT_MODEL_TOTAL_BITS) * prob; + int bit; + + // Compare code and bound as if they were unsigned 32-bit integers. + if ((code ^ 0x80000000) < (bound ^ 0x80000000)) { + range = bound; + probs[index] = (short) ( + prob + ((BIT_MODEL_TOTAL - prob) >>> MOVE_BITS)); + bit = 0; + } else { + range -= bound; + code -= bound; + probs[index] = (short) (prob - (prob >>> MOVE_BITS)); + bit = 1; + } + + return bit; + } + + public int decodeBitTree(short[] probs) throws IOException { + int symbol = 1; + + do { + symbol = (symbol << 1) | decodeBit(probs, symbol); + } while (symbol < probs.length); + + return symbol - probs.length; + } + + public int decodeReverseBitTree(short[] probs) throws IOException { + int symbol = 1; + int i = 0; + int result = 0; + + do { + int bit = decodeBit(probs, symbol); + symbol = (symbol << 1) | bit; + result |= bit << i++; + } while (symbol < probs.length); + + return result; + } + + public int decodeDirectBits(int count) throws IOException { + int result = 0; + + do { + normalize(); + + range >>>= 1; + int t = (code - range) >>> 31; + code -= range & (t - 1); + result = (result << 1) | (1 - t); + } while (--count != 0); + + return result; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/rangecoder/RangeEncoder.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/rangecoder/RangeEncoder.java new file mode 100644 index 0000000..aac3280 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/rangecoder/RangeEncoder.java @@ -0,0 +1,196 @@ + +package org.xbib.io.compress.xz.rangecoder; + +import java.io.IOException; +import java.io.OutputStream; + +public final class RangeEncoder extends RangeCoder { + private static final int MOVE_REDUCING_BITS = 4; + private static final int BIT_PRICE_SHIFT_BITS = 4; + + private static final int[] prices + = new int[BIT_MODEL_TOTAL >>> MOVE_REDUCING_BITS]; + + private long low; + private int range; + + // NOTE: int is OK for LZMA2 because a compressed chunk + // is not more than 64 KiB, but with LZMA1 there is no chunking + // so in theory cacheSize can grow very big. To be very safe, + // use long instead of int if you adapt this code for LZMA1. + private int cacheSize; + private byte cache; + + private final byte[] buf; + private int bufPos; + + static { + for (int i = (1 << MOVE_REDUCING_BITS) / 2; i < BIT_MODEL_TOTAL; + i += (1 << MOVE_REDUCING_BITS)) { + int w = i; + int bitCount = 0; + + for (int j = 0; j < BIT_PRICE_SHIFT_BITS; ++j) { + w *= w; + bitCount <<= 1; + + while ((w & 0xFFFF0000) != 0) { + w >>>= 1; + ++bitCount; + } + } + + prices[i >> MOVE_REDUCING_BITS] + = (BIT_MODEL_TOTAL_BITS << BIT_PRICE_SHIFT_BITS) + - 15 - bitCount; + } + } + + public RangeEncoder(int bufSize) { + buf = new byte[bufSize]; + reset(); + } + + public void reset() { + low = 0; + range = 0xFFFFFFFF; + cache = 0x00; + cacheSize = 1; + bufPos = 0; + } + + public int getPendingSize() { + return bufPos + cacheSize + 5 - 1; + } + + public int finish() { + for (int i = 0; i < 5; ++i) { + shiftLow(); + } + + return bufPos; + } + + public void write(OutputStream out) throws IOException { + out.write(buf, 0, bufPos); + } + + private void shiftLow() { + int lowHi = (int) (low >>> 32); + + if (lowHi != 0 || low < 0xFF000000L) { + int temp = cache; + + do { + buf[bufPos++] = (byte) (temp + lowHi); + temp = 0xFF; + } while (--cacheSize != 0); + + cache = (byte) (low >>> 24); + } + + ++cacheSize; + low = (low & 0x00FFFFFF) << 8; + } + + public void encodeBit(short[] probs, int index, int bit) { + int prob = probs[index]; + int bound = (range >>> BIT_MODEL_TOTAL_BITS) * prob; + + // NOTE: Any non-zero value for bit is taken as 1. + if (bit == 0) { + range = bound; + probs[index] = (short) ( + prob + ((BIT_MODEL_TOTAL - prob) >>> MOVE_BITS)); + } else { + low += bound & 0xFFFFFFFFL; + range -= bound; + probs[index] = (short) (prob - (prob >>> MOVE_BITS)); + } + + if ((range & TOP_MASK) == 0) { + range <<= SHIFT_BITS; + shiftLow(); + } + } + + public static int getBitPrice(int prob, int bit) { + // NOTE: Unlike in encodeBit(), here bit must be 0 or 1. + assert bit == 0 || bit == 1; + return prices[(prob ^ ((-bit) & (BIT_MODEL_TOTAL - 1))) + >>> MOVE_REDUCING_BITS]; + } + + public void encodeBitTree(short[] probs, int symbol) { + int index = 1; + int mask = probs.length; + + do { + mask >>>= 1; + int bit = symbol & mask; + encodeBit(probs, index, bit); + + index <<= 1; + if (bit != 0) { + index |= 1; + } + + } while (mask != 1); + } + + public static int getBitTreePrice(short[] probs, int symbol) { + int price = 0; + symbol |= probs.length; + + do { + int bit = symbol & 1; + symbol >>>= 1; + price += getBitPrice(probs[symbol], bit); + } while (symbol != 1); + + return price; + } + + public void encodeReverseBitTree(short[] probs, int symbol) { + int index = 1; + symbol |= probs.length; + + do { + int bit = symbol & 1; + symbol >>>= 1; + encodeBit(probs, index, bit); + index = (index << 1) | bit; + } while (symbol != 1); + } + + public static int getReverseBitTreePrice(short[] probs, int symbol) { + int price = 0; + int index = 1; + symbol |= probs.length; + + do { + int bit = symbol & 1; + symbol >>>= 1; + price += getBitPrice(probs[index], bit); + index = (index << 1) | bit; + } while (symbol != 1); + + return price; + } + + public void encodeDirectBits(int value, int count) { + do { + range >>>= 1; + low += range & (0 - ((value >>> --count) & 1)); + + if ((range & TOP_MASK) == 0) { + range <<= SHIFT_BITS; + shiftLow(); + } + } while (count != 0); + } + + public static int getDirectBitsPrice(int count) { + return count << BIT_PRICE_SHIFT_BITS; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/simple/ARM.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/simple/ARM.java new file mode 100644 index 0000000..2cc9361 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/simple/ARM.java @@ -0,0 +1,42 @@ + +package org.xbib.io.compress.xz.simple; + +public final class ARM implements SimpleFilter { + private final boolean isEncoder; + private int pos; + + public ARM(boolean isEncoder, int startPos) { + this.isEncoder = isEncoder; + pos = startPos + 8; + } + + public int code(byte[] buf, int off, int len) { + int end = off + len - 4; + int i; + + for (i = off; i <= end; i += 4) { + if ((buf[i + 3] & 0xFF) == 0xEB) { + int src = ((buf[i + 2] & 0xFF) << 16) + | ((buf[i + 1] & 0xFF) << 8) + | (buf[i] & 0xFF); + src <<= 2; + + int dest; + if (isEncoder) { + dest = src + (pos + i - off); + } else { + dest = src - (pos + i - off); + } + + dest >>>= 2; + buf[i + 2] = (byte) (dest >>> 16); + buf[i + 1] = (byte) (dest >>> 8); + buf[i] = (byte) dest; + } + } + + i -= off; + pos += i; + return i; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/simple/ARMThumb.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/simple/ARMThumb.java new file mode 100644 index 0000000..49cbe92 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/simple/ARMThumb.java @@ -0,0 +1,45 @@ + +package org.xbib.io.compress.xz.simple; + +public final class ARMThumb implements SimpleFilter { + private final boolean isEncoder; + private int pos; + + public ARMThumb(boolean isEncoder, int startPos) { + this.isEncoder = isEncoder; + pos = startPos + 4; + } + + public int code(byte[] buf, int off, int len) { + int end = off + len - 4; + int i; + + for (i = off; i <= end; i += 2) { + if ((buf[i + 1] & 0xF8) == 0xF0 && (buf[i + 3] & 0xF8) == 0xF8) { + int src = ((buf[i + 1] & 0x07) << 19) + | ((buf[i] & 0xFF) << 11) + | ((buf[i + 3] & 0x07) << 8) + | (buf[i + 2] & 0xFF); + src <<= 1; + + int dest; + if (isEncoder) { + dest = src + (pos + i - off); + } else { + dest = src - (pos + i - off); + } + + dest >>>= 1; + buf[i + 1] = (byte) (0xF0 | ((dest >>> 19) & 0x07)); + buf[i] = (byte) (dest >>> 11); + buf[i + 3] = (byte) (0xF8 | ((dest >>> 8) & 0x07)); + buf[i + 2] = (byte) dest; + i += 2; + } + } + + i -= off; + pos += i; + return i; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/simple/IA64.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/simple/IA64.java new file mode 100644 index 0000000..92f365b --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/simple/IA64.java @@ -0,0 +1,77 @@ + +package org.xbib.io.compress.xz.simple; + +public final class IA64 implements SimpleFilter { + private static final int[] BRANCH_TABLE = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 6, 6, 0, 0, 7, 7, + 4, 4, 0, 0, 4, 4, 0, 0}; + + private final boolean isEncoder; + private int pos; + + public IA64(boolean isEncoder, int startPos) { + this.isEncoder = isEncoder; + pos = startPos; + } + + public int code(byte[] buf, int off, int len) { + int end = off + len - 16; + int i; + + for (i = off; i <= end; i += 16) { + int instrTemplate = buf[i] & 0x1F; + int mask = BRANCH_TABLE[instrTemplate]; + + for (int slot = 0, bitPos = 5; slot < 3; ++slot, bitPos += 41) { + if (((mask >>> slot) & 1) == 0) { + continue; + } + + int bytePos = bitPos >>> 3; + int bitRes = bitPos & 7; + + long instr = 0; + for (int j = 0; j < 6; ++j) { + instr |= (buf[i + bytePos + j] & 0xFFL) << (8 * j); + } + + long instrNorm = instr >>> bitRes; + + if (((instrNorm >>> 37) & 0x0F) != 0x05 + || ((instrNorm >>> 9) & 0x07) != 0x00) { + continue; + } + + int src = (int) ((instrNorm >>> 13) & 0x0FFFFF); + src |= ((int) (instrNorm >>> 36) & 1) << 20; + src <<= 4; + + int dest; + if (isEncoder) { + dest = src + (pos + i - off); + } else { + dest = src - (pos + i - off); + } + + dest >>>= 4; + + instrNorm &= ~(0x8FFFFFL << 13); + instrNorm |= (dest & 0x0FFFFFL) << 13; + instrNorm |= (dest & 0x100000L) << (36 - 20); + + instr &= (1 << bitRes) - 1; + instr |= instrNorm << bitRes; + + for (int j = 0; j < 6; ++j) { + buf[i + bytePos + j] = (byte) (instr >>> (8 * j)); + } + } + } + + i -= off; + pos += i; + return i; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/simple/PowerPC.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/simple/PowerPC.java new file mode 100644 index 0000000..d6cfbcd --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/simple/PowerPC.java @@ -0,0 +1,42 @@ + +package org.xbib.io.compress.xz.simple; + +public final class PowerPC implements SimpleFilter { + private final boolean isEncoder; + private int pos; + + public PowerPC(boolean isEncoder, int startPos) { + this.isEncoder = isEncoder; + pos = startPos; + } + + public int code(byte[] buf, int off, int len) { + int end = off + len - 4; + int i; + + for (i = off; i <= end; i += 4) { + if ((buf[i] & 0xFC) == 0x48 && (buf[i + 3] & 0x03) == 0x01) { + int src = ((buf[i] & 0x03) << 24) + | ((buf[i + 1] & 0xFF) << 16) + | ((buf[i + 2] & 0xFF) << 8) + | (buf[i + 3] & 0xFC); + + int dest; + if (isEncoder) { + dest = src + (pos + i - off); + } else { + dest = src - (pos + i - off); + } + + buf[i] = (byte) (0x48 | ((dest >>> 24) & 0x03)); + buf[i + 1] = (byte) (dest >>> 16); + buf[i + 2] = (byte) (dest >>> 8); + buf[i + 3] = (byte) ((buf[i + 3] & 0x03) | dest); + } + } + + i -= off; + pos += i; + return i; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/simple/SPARC.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/simple/SPARC.java new file mode 100644 index 0000000..18a8a3f --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/simple/SPARC.java @@ -0,0 +1,48 @@ + +package org.xbib.io.compress.xz.simple; + +public final class SPARC implements SimpleFilter { + private final boolean isEncoder; + private int pos; + + public SPARC(boolean isEncoder, int startPos) { + this.isEncoder = isEncoder; + pos = startPos; + } + + public int code(byte[] buf, int off, int len) { + int end = off + len - 4; + int i; + + for (i = off; i <= end; i += 4) { + if ((buf[i] == 0x40 && (buf[i + 1] & 0xC0) == 0x00) + || (buf[i] == 0x7F && (buf[i + 1] & 0xC0) == 0xC0)) { + int src = ((buf[i] & 0xFF) << 24) + | ((buf[i + 1] & 0xFF) << 16) + | ((buf[i + 2] & 0xFF) << 8) + | (buf[i + 3] & 0xFF); + src <<= 2; + + int dest; + if (isEncoder) { + dest = src + (pos + i - off); + } else { + dest = src - (pos + i - off); + } + + dest >>>= 2; + dest = (((0 - ((dest >>> 22) & 1)) << 22) & 0x3FFFFFFF) + | (dest & 0x3FFFFF) | 0x40000000; + + buf[i] = (byte) (dest >>> 24); + buf[i + 1] = (byte) (dest >>> 16); + buf[i + 2] = (byte) (dest >>> 8); + buf[i + 3] = (byte) dest; + } + } + + i -= off; + pos += i; + return i; + } +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/simple/SimpleFilter.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/simple/SimpleFilter.java new file mode 100644 index 0000000..0a30e01 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/simple/SimpleFilter.java @@ -0,0 +1,6 @@ + +package org.xbib.io.compress.xz.simple; + +public interface SimpleFilter { + int code(byte[] buf, int off, int len); +} diff --git a/io-compress-xz/src/main/java/org/xbib/io/compress/xz/simple/X86.java b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/simple/X86.java new file mode 100644 index 0000000..5060b32 --- /dev/null +++ b/io-compress-xz/src/main/java/org/xbib/io/compress/xz/simple/X86.java @@ -0,0 +1,93 @@ + +package org.xbib.io.compress.xz.simple; + +public final class X86 implements SimpleFilter { + private static final boolean[] MASK_TO_ALLOWED_STATUS + = {true, true, true, false, true, false, false, false}; + + private static final int[] MASK_TO_BIT_NUMBER = {0, 1, 2, 2, 3, 3, 3, 3}; + + private final boolean isEncoder; + private int pos; + private int prevMask = 0; + + private static boolean test86MSByte(byte b) { + int i = b & 0xFF; + return i == 0x00 || i == 0xFF; + } + + public X86(boolean isEncoder, int startPos) { + this.isEncoder = isEncoder; + pos = startPos + 5; + } + + public int code(byte[] buf, int off, int len) { + int prevPos = off - 1; + int end = off + len - 5; + int i; + + for (i = off; i <= end; ++i) { + if ((buf[i] & 0xFE) != 0xE8) { + continue; + } + + prevPos = i - prevPos; + if ((prevPos & ~3) != 0) { // (unsigned)prevPos > 3 + prevMask = 0; + } else { + prevMask = (prevMask << (prevPos - 1)) & 7; + if (prevMask != 0) { + if (!MASK_TO_ALLOWED_STATUS[prevMask] || test86MSByte( + buf[i + 4 - MASK_TO_BIT_NUMBER[prevMask]])) { + prevPos = i; + prevMask = (prevMask << 1) | 1; + continue; + } + } + } + + prevPos = i; + + if (test86MSByte(buf[i + 4])) { + int src = (buf[i + 1] & 0xFF) + | ((buf[i + 2] & 0xFF) << 8) + | ((buf[i + 3] & 0xFF) << 16) + | ((buf[i + 4] & 0xFF) << 24); + int dest; + while (true) { + if (isEncoder) { + dest = src + (pos + i - off); + } else { + dest = src - (pos + i - off); + } + + if (prevMask == 0) { + break; + } + + int index = MASK_TO_BIT_NUMBER[prevMask] * 8; + if (!test86MSByte((byte) (dest >>> (24 - index)))) { + break; + } + + src = dest ^ ((1 << (32 - index)) - 1); + } + + buf[i + 1] = (byte) dest; + buf[i + 2] = (byte) (dest >>> 8); + buf[i + 3] = (byte) (dest >>> 16); + buf[i + 4] = (byte) (~(((dest >>> 24) & 1) - 1)); + i += 4; + } else { + prevMask = (prevMask << 1) | 1; + } + } + + prevPos = i - prevPos; + prevMask = ((prevPos & ~3) != 0) ? 0 : prevMask << (prevPos - 1); + + i -= off; + pos += i; + return i; + } +} diff --git a/io-compress-xz/src/test/java/org/xbib/io/compress/xz/XZTest.java b/io-compress-xz/src/test/java/org/xbib/io/compress/xz/XZTest.java new file mode 100644 index 0000000..bfbce48 --- /dev/null +++ b/io-compress-xz/src/test/java/org/xbib/io/compress/xz/XZTest.java @@ -0,0 +1,40 @@ +package org.xbib.io.compress.xz; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import org.junit.jupiter.api.Test; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; + +public class XZTest { + + @Test + public void testHelloWorld() throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + XZOutputStream zOut = new XZOutputStream(out); + ObjectOutputStream objOut = new ObjectOutputStream(zOut); + String helloWorld = "Hello World!"; + objOut.writeObject(helloWorld); + zOut.close(); + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); + XZInputStream zIn = new XZInputStream(in); + ObjectInputStream objIn = new ObjectInputStream(zIn); + assertEquals("Hello World!", objIn.readObject()); + } + + @Test + public void readXZFile() throws IOException { + InputStream inputStream = getClass().getResourceAsStream("test.xz"); + XZInputStream xzInputStream = new XZInputStream(inputStream); + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + int i; + byte[] data = new byte[1024]; + while ((i = xzInputStream.read(data, 0, data.length)) != -1) { + outputStream.write(data, 0, i); + } + assertEquals("Hello world\n", new String(outputStream.toByteArray())); + } +} diff --git a/io-compress-xz/src/test/resources/org/xbib/io/compress/xz/test.xz b/io-compress-xz/src/test/resources/org/xbib/io/compress/xz/test.xz new file mode 100644 index 0000000..bd4062a Binary files /dev/null and b/io-compress-xz/src/test/resources/org/xbib/io/compress/xz/test.xz differ diff --git a/io-compress-zlib/src/main/java/module-info.java b/io-compress-zlib/src/main/java/module-info.java new file mode 100644 index 0000000..5968f38 --- /dev/null +++ b/io-compress-zlib/src/main/java/module-info.java @@ -0,0 +1,3 @@ +module org.xbib.io.compress.zlib { + exports org.xbib.io.compress.zlib; +} diff --git a/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/Adler32.java b/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/Adler32.java new file mode 100644 index 0000000..cdd2009 --- /dev/null +++ b/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/Adler32.java @@ -0,0 +1,72 @@ + +package org.xbib.io.compress.zlib; + +public class Adler32 { + + // largest prime smaller than 65536 + private static final int BASE = 65521; + // NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 + private static final int NMAX = 5552; + + private Adler32() { + } + + public static long adler32(long adler, byte[] buf, int ind, int len) { + if (buf == null) { + return 1L; + } + int index = ind; + long s1 = adler & 0xffff; + long s2 = (adler >> 16) & 0xffff; + int k; + + while (len > 0) { + k = len < NMAX ? len : NMAX; + len -= k; + while (k >= 16) { + s1 += buf[index++] & 0xff; + s2 += s1; + s1 += buf[index++] & 0xff; + s2 += s1; + s1 += buf[index++] & 0xff; + s2 += s1; + s1 += buf[index++] & 0xff; + s2 += s1; + s1 += buf[index++] & 0xff; + s2 += s1; + s1 += buf[index++] & 0xff; + s2 += s1; + s1 += buf[index++] & 0xff; + s2 += s1; + s1 += buf[index++] & 0xff; + s2 += s1; + s1 += buf[index++] & 0xff; + s2 += s1; + s1 += buf[index++] & 0xff; + s2 += s1; + s1 += buf[index++] & 0xff; + s2 += s1; + s1 += buf[index++] & 0xff; + s2 += s1; + s1 += buf[index++] & 0xff; + s2 += s1; + s1 += buf[index++] & 0xff; + s2 += s1; + s1 += buf[index++] & 0xff; + s2 += s1; + s1 += buf[index++] & 0xff; + s2 += s1; + k -= 16; + } + if (k != 0) { + do { + s1 += buf[index++] & 0xff; + s2 += s1; + } while (--k != 0); + } + s1 %= BASE; + s2 %= BASE; + } + return (s2 << 16) | s1; + } +} diff --git a/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/BufferDeflater.java b/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/BufferDeflater.java new file mode 100644 index 0000000..31636fa --- /dev/null +++ b/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/BufferDeflater.java @@ -0,0 +1,145 @@ +package org.xbib.io.compress.zlib; + +import java.io.ByteArrayOutputStream; +import java.nio.ByteBuffer; +import java.util.zip.CRC32; +import java.util.zip.Deflater; + +public class BufferDeflater { + + + /** + * The standard 10 byte GZIP header + */ + private static final byte[] GZIP_HEADER = new byte[]{0x1f, (byte) 0x8b, + Deflater.DEFLATED, 0, 0, 0, 0, 0, 0, 0}; + + /** + * CRC-32 of uncompressed data. + */ + private final CRC32 crc = new CRC32(); + + /** + * Deflater to deflate data + */ + private final Deflater deflater = new Deflater(Deflater.BEST_COMPRESSION, + true); + + /** + * Output buffer building area + */ + private final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + + /** + * Internal transfer space + */ + private final byte[] transfer = new byte[1000]; + + /** + * The flush mode to use at the end of each buffer + */ + private final int flushMode; + + + /** + * New buffer deflater + * + * @param syncFlush if true, all data in buffer can be immediately decompressed + * from output buffer + */ + public BufferDeflater(boolean syncFlush) { + flushMode = syncFlush ? Deflater.SYNC_FLUSH : Deflater.NO_FLUSH; + buffer.write(GZIP_HEADER, 0, GZIP_HEADER.length); + } + + + /** + * Deflate the buffer + * + * @param in the buffer to deflate + * @return deflated representation of the buffer + */ + public ByteBuffer deflate(ByteBuffer in) { + byte[] inBytes; + int off = in.position(); + int len = in.remaining(); + if (in.hasArray()) { + inBytes = in.array(); + } else { + off = 0; + inBytes = new byte[len]; + in.get(inBytes); + } + crc.update(inBytes, off, len); + deflater.setInput(inBytes, off, len); + while (!deflater.needsInput()) { + int r = deflater.deflate(transfer, 0, transfer.length, flushMode); + buffer.write(transfer, 0, r); + } + byte[] outBytes = buffer.toByteArray(); + buffer.reset(); + return ByteBuffer.wrap(outBytes); + } + + /** + * Write the final buffer. This writes any remaining compressed data and the GZIP trailer. + * + * @return the final buffer + */ + public ByteBuffer doFinal() { + deflater.finish(); + int r; + do { + r = deflater.deflate(transfer, 0, transfer.length, Deflater.FULL_FLUSH); + buffer.write(transfer, 0, r); + } while (r == transfer.length); + writeInt((int) crc.getValue()); + writeInt((int) deflater.getBytesRead()); + deflater.reset(); + byte[] outBytes = buffer.toByteArray(); + buffer.reset(); + return ByteBuffer.wrap(outBytes); + } + + /** + * Write a 32 bit value in little-endian order + * + * @param v the value to write + */ + private void writeInt(int v) { + buffer.write(v & 0xff); + buffer.write((v >> 8) & 0xff); + buffer.write((v >> 16) & 0xff); + buffer.write((v >> 24) & 0xff); + } + + /** + * For testing. Pass in the name of a file to GZIP compress + * @param args + * @throws IOException + */ + /*public static void main(String[] args) throws IOException { + File inFile = new File(args[0]); + File outFile = new File(args[0]+".test.gz"); + FileChannel inChan = (new FileInputStream(inFile)).getChannel(); + FileChannel outChan = (new FileOutputStream(outFile)).getChannel(); + + BufferDeflate2 def = new BufferDeflate2(false); + + ByteBuffer buf = ByteBuffer.allocate(500); + while( true ) { + buf.clear(); + int r = inChan.read(buf); + if( r==-1 ) break; + buf.flip(); + ByteBuffer compBuf = def.deflate(buf); + outChan.write(compBuf); + } + + ByteBuffer compBuf = def.doFinal(); + outChan.write(compBuf); + + inChan.close(); + outChan.close(); + }*/ +} \ No newline at end of file diff --git a/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/Deflate.java b/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/Deflate.java new file mode 100644 index 0000000..5087c51 --- /dev/null +++ b/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/Deflate.java @@ -0,0 +1,1611 @@ + +package org.xbib.io.compress.zlib; + +public final class Deflate { + + private static final int MAX_MEM_LEVEL = 9; + private static final int Z_DEFAULT_COMPRESSION = -1; + private static final int MAX_WBITS = 15; // 32K LZ77 window + private static final int DEF_MEM_LEVEL = 8; + + static final class Config { + + private int goodlength; // reduce lazy search above this match length + private int maxlazy; // do not perform lazy search above this match length + private int nicelength; // quit search above this match length + private int maxchain; + private int func; + + private Config(int goodlength, int maxlazy, + int nicelength, int maxchain, int func) { + this.goodlength = goodlength; + this.maxlazy = maxlazy; + this.nicelength = nicelength; + this.maxchain = maxchain; + this.func = func; + } + } + + private static final int STORED = 0; + private static final int FAST = 1; + private static final int SLOW = 2; + private static final Config[] CONFIG_TABLE; + + static { + CONFIG_TABLE = new Config[10]; + // good lazy nice chain + CONFIG_TABLE[0] = new Config(0, 0, 0, 0, STORED); + CONFIG_TABLE[1] = new Config(4, 4, 8, 4, FAST); + CONFIG_TABLE[2] = new Config(4, 5, 16, 8, FAST); + CONFIG_TABLE[3] = new Config(4, 6, 32, 32, FAST); + + CONFIG_TABLE[4] = new Config(4, 4, 16, 16, SLOW); + CONFIG_TABLE[5] = new Config(8, 16, 32, 32, SLOW); + CONFIG_TABLE[6] = new Config(8, 16, 128, 128, SLOW); + CONFIG_TABLE[7] = new Config(8, 32, 128, 256, SLOW); + CONFIG_TABLE[8] = new Config(32, 128, 258, 1024, SLOW); + CONFIG_TABLE[9] = new Config(32, 258, 258, 4096, SLOW); + } + + private static final String[] zerrmsg = { + "need dictionary", // Z_NEED_DICT 2 + "stream end", // Z_STREAM_END 1 + "", // Z_OK 0 + "file error", // Z_ERRNO (-1) + "stream error", // Z_STREAM_ERROR (-2) + "data error", // Z_DATA_ERROR (-3) + "insufficient memory", // Z_MEM_ERROR (-4) + "buffer error", // Z_BUF_ERROR (-5) + "incompatible version",// Z_VERSION_ERROR (-6) + "" + }; + // block not completed, need more input or more output + private static final int NEED_MORE = 0; + // block flush performed + private static final int BLOCK_DONE = 1; + // finish started, need only more output at next deflate + private static final int FINISH_STARTED = 2; + // finish done, accept no more input or output + private static final int FINISH_DONE = 3; + // preset dictionary flag in zlib header + private static final int PRESET_DICT = 0x20; + private static final int Z_FILTERED = 1; + private static final int Z_HUFFMAN_ONLY = 2; + private static final int Z_DEFAULT_STRATEGY = 0; + private static final int Z_NO_FLUSH = 0; + private static final int Z_PARTIAL_FLUSH = 1; + private static final int Z_FULL_FLUSH = 3; + private static final int Z_FINISH = 4; + private static final int Z_OK = 0; + private static final int Z_STREAM_END = 1; + private static final int Z_NEED_DICT = 2; + private static final int Z_STREAM_ERROR = -2; + private static final int Z_DATA_ERROR = -3; + private static final int Z_BUF_ERROR = -5; + private static final int INIT_STATE = 42; + private static final int BUSY_STATE = 113; + private static final int FINISH_STATE = 666; + // The deflate compression method + private static final int Z_DEFLATED = 8; + private static final int STORED_BLOCK = 0; + private static final int STATIC_TREES = 1; + private static final int DYN_TREES = 2; + // The three kinds of block type + private static final int Z_BINARY = 0; + private static final int Z_ASCII = 1; + private static final int Z_UNKNOWN = 2; + private static final int BUF_SIZE = 8 * 2; + // repeat previous bit length 3-6 times (2 bits of repeat count) + private static final int REP_3_6 = 16; + // repeat a zero length 3-10 times (3 bits of repeat count) + private static final int REPZ_3_10 = 17; + // repeat a zero length 11-138 times (7 bits of repeat count) + private static final int REPZ_11_138 = 18; + private static final int MIN_MATCH = 3; + private static final int MAX_MATCH = 258; + private static final int MIN_LOOKAHEAD = (MAX_MATCH + MIN_MATCH + 1); + private static final int MAX_BITS = 15; + private static final int D_CODES = 30; + private static final int BL_CODES = 19; + private static final int LENGTH_CODES = 29; + private static final int LITERALS = 256; + private static final int L_CODES = (LITERALS + 1 + LENGTH_CODES); + private static final int HEAP_SIZE = (2 * L_CODES + 1); + private static final int END_BLOCK = 256; + private ZStream strm; // pointer back to this zlib stream + private int status; // as the name implies + private byte[] pendingBuf; // output still pending + private int pendingBufSize; // size of pending_buf + private int pendingOut; // next pending byte to output to the stream + private int pending; // nb of bytes in the pending buffer + private int noheader; // suppress zlib header and adler32 + private byte dataType; // UNKNOWN, BINARY or ASCII + private int lastFlush; // value of flush param for previous deflate call + private int wSize; // LZ77 window size (32K by default) + private int wBits; // log2(w_size) (8..16) + private int wMask; // w_size - 1 + private byte[] window; + // Sliding window. Input bytes are read into the second half of the window, + // and move to the first half later to keep a dictionary of at least wSize + // bytes. With this organization, matches are limited to a distance of + // wSize-MAX_MATCH bytes, but this ensures that IO is always + // performed with a length multiple of the block size. Also, it limits + // the window size to 64K, which is quite useful on MSDOS. + // To do: use the user input buffer as sliding window. + private int windowSize; + // Actual size of window: 2*wSize, except when the user input buffer + // is directly used as sliding window. + private short[] prev; + // Link to older string with same hash index. To limit the size of this + // array to 64K, this link is maintained only for the last 32K strings. + // An index in this array is thus a window index modulo 32K. + private short[] head; // Heads of the hash chains or NIL. + private int insh; // hash index of string to be inserted + private int hashSize; // number of elements in hash table + private int hashBits; // log2(hash_size) + private int hashMask; // hash_size-1 + // Number of bits by which ins_h must be shifted at each input + // step. It must be such that after MIN_MATCH steps, the oldest + // byte no longer takes part in the hash key, that is: + // hash_shift * MIN_MATCH >= hash_bits + private int hashShift; + // Window position at the beginning of the current output block. Gets + // negative when the window is moved backwards. + private int blockStart; + private int matchLength; // length of best match + private int prevMatch; // previous match + private int matchAvailable; // set if previous match exists + private int strstart; // start of string to insert + private int matchStart; // start of matching string + private int lookahead; // number of valid bytes ahead in window + // Length of the best match at previous step. Matches not greater than this + // are discarded. This is used in the lazy match evaluation. + private int prevLength; + // To speed up deflation, hash chains are never searched beyond this + // length. A higher limit improves compression ratio but degrades the speed. + private int maxChainLength; + // Attempt to find a better match only when the current match is strictly + // smaller than this value. This mechanism is used only for compression + // levels >= 4. + private int maxLazyMatch; + // Insert new strings in the hash table only if the match length is not + // greater than this length. This saves time but degrades compression. + // max_insert_length is used only for compression levels <= 3. + private int level; // compression level (1..9) + private int strategy; // favor or force Huffman coding + // Use a faster search when the previous match is longer than this + private int goodMatch; + // Stop searching when current match exceeds this + private int niceMatch; + private short[] dynLtree; // literal and length tree + private short[] dynDtree; // distance tree + private short[] blTree; // Huffman tree for bit lengths + private Tree lDesc = new Tree(); // desc for literal tree + private Tree dDesc = new Tree(); // desc for distance tree + private Tree blDesc = new Tree(); // desc for bit length tree + // number of codes at each bit length for an optimal tree + protected short[] blCount = new short[MAX_BITS + 1]; + // heap used to build the Huffman trees + protected int[] heap = new int[2 * L_CODES + 1]; + protected int heapLen; // number of elements in the heap + protected int heapMax; // element of largest frequency + // The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. + // The same heap array is used to build all trees. + // Depth of each subtree used as tie breaker for trees of equal frequency + protected byte[] depth = new byte[2 * L_CODES + 1]; + private int lBuf; // index for literals or lengths */ + // Size of match buffer for literals/lengths. There are 4 reasons for + // limiting lit_bufsize to 64K: + // - frequencies can be kept in 16 bit counters + // - if compression is not successful for the first block, all input + // data is still in the window so we can still emit a stored block even + // when input comes from standard input. (This can also be done for + // all blocks if lit_bufsize is not greater than 32K.) + // - if compression is not successful for a file smaller than 64K, we can + // even emit a stored file instead of a stored block (saving 5 bytes). + // This is applicable only for zip (not gzip or zlib). + // - creating new Huffman trees less frequently may not provide fast + // adaptation to changes in the input data statistics. (Take for + // example a binary file with poorly compressible code followed by + // a highly compressible string table.) Smaller buffer sizes give + // fast adaptation but have of course the overhead of transmitting + // trees more frequently. + // - I can't count above 4 + private int litBufsize; + private int lastLit; // running index in l_buf + // Buffer for distances. To simplify the code, d_buf and l_buf have + // the same number of elements. To use different lengths, an extra flag + // array would be necessary. + private int dBuf; // index of pendig_buf + protected int optLen; // bit length of current block with optimal trees + protected int staticLen; // bit length of current block with static trees + private int matches; // number of string matches in current block + private int lastEobLen; // bit length of EOB code for last block + // Output buffer. bits are inserted starting at the bottom (least + // significant bits). + private short biBuf; + // Number of valid bits in bi_buf. All bits above the last valid bit + // are always zero. + private int biValid; + + protected Deflate() { + dynLtree = new short[HEAP_SIZE * 2]; + dynDtree = new short[(2 * D_CODES + 1) * 2]; // distance tree + blTree = new short[(2 * BL_CODES + 1) * 2]; // Huffman tree for bit lengths + } + + protected int getPending() { + return pending; + } + + protected void setPending(int n) { + pending = n; + } + + protected byte[] getPendingBuf() { + return pendingBuf; + } + + protected int getPendingOut() { + return pendingOut; + } + + protected void setPendingOut(int n) { + pendingOut = n; + } + + protected int getNoHeader() { + return noheader; + } + + private void lminit() { + windowSize = 2 * wSize; + + head[hashSize - 1] = 0; + for (int i = 0; i < hashSize - 1; i++) { + head[i] = 0; + } + + // Set the default configuration parameters: + maxLazyMatch = Deflate.CONFIG_TABLE[level].maxlazy; + goodMatch = Deflate.CONFIG_TABLE[level].goodlength; + niceMatch = Deflate.CONFIG_TABLE[level].nicelength; + maxChainLength = Deflate.CONFIG_TABLE[level].maxchain; + + strstart = 0; + blockStart = 0; + lookahead = 0; + matchLength = prevLength = MIN_MATCH - 1; + matchAvailable = 0; + insh = 0; + } + + // Initialize the tree data structures for a new zlib stream. + private void trinit() { + + lDesc.dynTree = dynLtree; + lDesc.statDesc = StaticTree.getLDesc(); + + dDesc.dynTree = dynDtree; + dDesc.statDesc = StaticTree.getDDesc(); + + blDesc.dynTree = blTree; + blDesc.statDesc = StaticTree.getBLDesc(); + + biBuf = 0; + biValid = 0; + lastEobLen = 8; // enough lookahead for inflate + + // Initialize the first block of the first file: + initBlock(); + } + + private void initBlock() { + // Initialize the trees. + for (int i = 0; i < L_CODES; i++) { + dynLtree[i * 2] = 0; + } + for (int i = 0; i < D_CODES; i++) { + dynDtree[i * 2] = 0; + } + for (int i = 0; i < BL_CODES; i++) { + blTree[i * 2] = 0; + } + + dynLtree[END_BLOCK * 2] = 1; + optLen = staticLen = 0; + lastLit = matches = 0; + } + + // Restore the heap property by moving down the tree starting at node k, + // exchanging a node with the smallest of its two sons if necessary, stopping + // when the heap property is re-established (each father smaller than its + // two sons). + protected void pqdownheap(short[] tree, // the tree to restore + int kk // node to move down + ) { + int k = kk; + int v = heap[k]; + int j = k << 1; // left son of k + while (j <= heapLen) { + // Set j to the smallest of the two sons: + if (j < heapLen + && smaller(tree, heap[j + 1], heap[j], depth)) { + j++; + } + // Exit if v is smaller than both sons + if (smaller(tree, v, heap[j], depth)) { + break; + } + + // Exchange v with the smallest son + heap[k] = heap[j]; + k = j; + // And continue down the tree, setting j to the left son of k + j <<= 1; + } + heap[k] = v; + } + + private static boolean smaller(short[] tree, int n, int m, byte[] depth) { + short tn2 = tree[n * 2]; + short tm2 = tree[m * 2]; + return (tn2 < tm2 + || (tn2 == tm2 && depth[n] <= depth[m])); + } + + // Scan a literal or distance tree to determine the frequencies of the codes + // in the bit length tree. + private void scanTree(short[] tree,// the tree to be scanned + int maxcode // and its largest code of non zero frequency + ) { + int n; // iterates over all tree elements + int prevlen = -1; // last emitted length + int curlen; // length of current code + int nextlen = tree[0 * 2 + 1]; // length of next code + int count = 0; // repeat count of the current code + int maxcount = 7; // max repeat count + int mincount = 4; // min repeat count + + if (nextlen == 0) { + maxcount = 138; + mincount = 3; + } + tree[(maxcode + 1) * 2 + 1] = (short) 0xffff; // guard + + for (n = 0; n <= maxcode; n++) { + curlen = nextlen; + nextlen = tree[(n + 1) * 2 + 1]; + if (++count < maxcount && curlen == nextlen) { + continue; + } else if (count < mincount) { + blTree[curlen * 2] += count; + } else if (curlen != 0) { + if (curlen != prevlen) { + blTree[curlen * 2]++; + } + blTree[REP_3_6 * 2]++; + } else if (count <= 10) { + blTree[REPZ_3_10 * 2]++; + } else { + blTree[REPZ_11_138 * 2]++; + } + count = 0; + prevlen = curlen; + if (nextlen == 0) { + maxcount = 138; + mincount = 3; + } else if (curlen == nextlen) { + maxcount = 6; + mincount = 3; + } else { + maxcount = 7; + mincount = 4; + } + } + } + + // Construct the Huffman tree for the bit lengths and return the index in + // BL_ORDER of the last bit length code to send. + private int buildBLTree() { + int maxblindex; // index of last bit length code of non zero freq + + // Determine the bit length frequencies for literal and distance trees + scanTree(dynLtree, lDesc.maxCode); + scanTree(dynDtree, dDesc.maxCode); + + // Build the bit length tree: + blDesc.buildTree(this); + // opt_len now includes the length of the tree representations, except + // the lengths of the bit lengths codes and the 5+5+4 bits for the counts. + + // Determine the number of bit length codes to send. The pkzip format + // requires that at least 4 bit length codes be sent. (appnote.txt says + // 3 but the actual value used is 4.) + for (maxblindex = BL_CODES - 1; maxblindex >= 3; maxblindex--) { + if (blTree[Tree.BL_ORDER[maxblindex] * 2 + 1] != 0) { + break; + } + } + // Update opt_len to include the bit length tree and counts + optLen += 3 * (maxblindex + 1) + 5 + 5 + 4; + + return maxblindex; + } + + // Send the header for a block using dynamic Huffman trees: the counts, the + // lengths of the bit length codes, the literal tree and the distance tree. + // IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. + private void sendAllTree(int lcodes, int dcodes, int blcodes) { + int rank; // index in BL_ORDER + + sendBits(lcodes - 257, 5); // not +255 as stated in appnote.txt + sendBits(dcodes - 1, 5); + sendBits(blcodes - 4, 4); // not -3 as stated in appnote.txt + for (rank = 0; rank < blcodes; rank++) { + sendBits(blTree[Tree.BL_ORDER[rank] * 2 + 1], 3); + } + sendTree(dynLtree, lcodes - 1); // literal tree + sendTree(dynDtree, dcodes - 1); // distance tree + } + + // Send a literal or distance tree in compressed form, using the codes in + // bl_tree. + private void sendTree(short[] tree,// the tree to be sent + int max_code // and its largest code of non zero frequency + ) { + int n; // iterates over all tree elements + int prevlen = -1; // last emitted length + int curlen; // length of current code + int nextlen = tree[0 * 2 + 1]; // length of next code + int count = 0; // repeat count of the current code + int maxcount = 7; // max repeat count + int mincount = 4; // min repeat count + + if (nextlen == 0) { + maxcount = 138; + mincount = 3; + } + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; + nextlen = tree[(n + 1) * 2 + 1]; + if (++count < maxcount && curlen == nextlen) { + continue; + } else if (count < mincount) { + do { + sendCode(curlen, blTree); + } while (--count != 0); + } else if (curlen != 0) { + if (curlen != prevlen) { + sendCode(curlen, blTree); + count--; + } + sendCode(REP_3_6, blTree); + sendBits(count - 3, 2); + } else if (count <= 10) { + sendCode(REPZ_3_10, blTree); + sendBits(count - 3, 3); + } else { + sendCode(REPZ_11_138, blTree); + sendBits(count - 11, 7); + } + count = 0; + prevlen = curlen; + if (nextlen == 0) { + maxcount = 138; + mincount = 3; + } else if (curlen == nextlen) { + maxcount = 6; + mincount = 3; + } else { + maxcount = 7; + mincount = 4; + } + } + } + + // Output a byte on the stream. + // IN assertion: there is enough room in pending_buf. + private void putByte(byte[] p, int start, int len) { + System.arraycopy(p, start, pendingBuf, pending, len); + pending += len; + } + + private void putByte(byte c) { + pendingBuf[pending++] = c; + } + + private void putShort(int w) { + putByte((byte) (w/*&0xff*/)); + putByte((byte) (w >>> 8)); + } + + private void putShortMSB(int b) { + putByte((byte) (b >> 8)); + putByte((byte) (b/*&0xff*/)); + } + + private void sendCode(int c, short[] tree) { + int c2 = c * 2; + sendBits((tree[c2] & 0xffff), (tree[c2 + 1] & 0xffff)); + } + + private void sendBits(int value, int length) { + int len = length; + if (biValid > BUF_SIZE - len) { + int val = value; +// bi_buf |= (val << bi_valid); + biBuf |= ((val << biValid) & 0xffff); + putShort(biBuf); + biBuf = (short) (val >>> (BUF_SIZE - biValid)); + biValid += len - BUF_SIZE; + } else { +// bi_buf |= (value) << bi_valid; + biBuf |= (((value) << biValid) & 0xffff); + biValid += len; + } + } + + // Send one empty static block to give enough lookahead for inflate. + // This takes 10 bits, of which 7 may remain in the bit buffer. + // The current inflate code requires 9 bits of lookahead. If the + // last two codes for the previous block (real code plus EOB) were coded + // on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode + // the last real code. In this case we send two empty static blocks instead + // of one. (There are no problems if the previous block is stored or fixed.) + // To simplify the code, we assume the worst case of last real code encoded + // on one bit only. + private void trAlign() { + sendBits(STATIC_TREES << 1, 3); + sendCode(END_BLOCK, StaticTree.STATIC_LTREE); + + biFlush(); + + // Of the 10 bits for the empty block, we have already sent + // (10 - bi_valid) bits. The lookahead for the last real code (before + // the EOB of the previous block) was thus at least one plus the length + // of the EOB plus what we have just sent of the empty static block. + if (1 + lastEobLen + 10 - biValid < 9) { + sendBits(STATIC_TREES << 1, 3); + sendCode(END_BLOCK, StaticTree.STATIC_LTREE); + biFlush(); + } + lastEobLen = 7; + } + + // Save the match info and tally the frequency counts. Return true if + // the current block must be flushed. + private boolean trTally(int dist, // distance of matched string + int lc // match length-MIN_MATCH or unmatched char (if dist==0) + ) { + + pendingBuf[dBuf + lastLit * 2] = (byte) (dist >>> 8); + pendingBuf[dBuf + lastLit * 2 + 1] = (byte) dist; + + pendingBuf[lBuf + lastLit] = (byte) lc; + lastLit++; + + if (dist == 0) { + // lc is the unmatched char + dynLtree[lc * 2]++; + } else { + matches++; + // Here, lc is the match length - MIN_MATCH + dist--; // dist = match distance - 1 + dynLtree[(Tree.LENGTH_CODE[lc] + LITERALS + 1) * 2]++; + dynDtree[Tree.distanceCode(dist) * 2]++; + } + + if ((lastLit & 0x1fff) == 0 && level > 2) { + // Compute an upper bound for the compressed length + int out_length = lastLit * 8; + int in_length = strstart - blockStart; + int dcode; + for (dcode = 0; dcode < D_CODES; dcode++) { + out_length += (int) dynDtree[dcode * 2] + * (5L + Tree.EXTRA_DBITS[dcode]); + } + out_length >>>= 3; + if ((matches < (lastLit / 2)) && out_length < in_length / 2) { + return true; + } + } + + return (lastLit == litBufsize - 1); + // We avoid equality with lit_bufsize because of wraparound at 64K + // on 16 bit machines and because stored blocks are restricted to + // 64K-1 bytes. + } + + // Send the block data compressed using the given Huffman trees + private void compressBlock(short[] ltree, short[] dtree) { + int dist; // distance of matched string + int lc; // match length or unmatched char (if dist == 0) + int lx = 0; // running index in l_buf + int code; // the code to send + int extra; // number of extra bits to send + + if (lastLit != 0) { + do { + dist = ((pendingBuf[dBuf + lx * 2] << 8) & 0xff00) + | (pendingBuf[dBuf + lx * 2 + 1] & 0xff); + lc = (pendingBuf[lBuf + lx]) & 0xff; + lx++; + + if (dist == 0) { + sendCode(lc, ltree); // send a literal byte + } else { + // Here, lc is the match length - MIN_MATCH + code = Tree.LENGTH_CODE[lc]; + + sendCode(code + LITERALS + 1, ltree); // send the length code + extra = Tree.EXTRA_LBITS[code]; + if (extra != 0) { + lc -= Tree.BASE_LENGTH[code]; + sendBits(lc, extra); // send the extra length bits + } + dist--; // dist is now the match distance - 1 + code = Tree.distanceCode(dist); + + sendCode(code, dtree); // send the distance code + extra = Tree.EXTRA_DBITS[code]; + if (extra != 0) { + dist -= Tree.BASE_DIST[code]; + sendBits(dist, extra); // send the extra distance bits + } + } // literal or match pair ? + + // Check that the overlay between pending_buf and d_buf+l_buf is ok: + } while (lx < lastLit); + } + + sendCode(END_BLOCK, ltree); + lastEobLen = ltree[END_BLOCK * 2 + 1]; + } + + // Set the data type to ASCII or BINARY, using a crude approximation: + // binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise. + // IN assertion: the fields freq of dyn_ltree are set and the total of all + // frequencies does not exceed 64K (to fit in an int on 16 bit machines). + private void setDataType() { + int n = 0; + int asciifreq = 0; + int binfreq = 0; + while (n < 7) { + binfreq += dynLtree[n * 2]; + n++; + } + while (n < 128) { + asciifreq += dynLtree[n * 2]; + n++; + } + while (n < LITERALS) { + binfreq += dynLtree[n * 2]; + n++; + } + dataType = (byte) (binfreq > (asciifreq >>> 2) ? Z_BINARY : Z_ASCII); + } + + // Flush the bit buffer, keeping at most 7 bits in it. + private void biFlush() { + if (biValid == 16) { + putShort(biBuf); + biBuf = 0; + biValid = 0; + } else if (biValid >= 8) { + putByte((byte) biBuf); + biBuf >>>= 8; + biValid -= 8; + } + } + + // Flush the bit buffer and align the output on a byte boundary + private void biWindup() { + if (biValid > 8) { + putShort(biBuf); + } else if (biValid > 0) { + putByte((byte) biBuf); + } + biBuf = 0; + biValid = 0; + } + + // Copy a stored block, storing first the length and its + // one's complement if requested. + private void copyBlock(int buf, // the input data + int len, // its length + boolean header // true if block header must be written + ) { + biWindup(); // align on byte boundary + lastEobLen = 8; // enough lookahead for inflate + + if (header) { + putShort((short) len); + putShort((short) ~len); + } + putByte(window, buf, len); + } + + private void flushBlockOnly(boolean eof) { + trFlushBlock(blockStart >= 0 ? blockStart : -1, + strstart - blockStart, + eof); + blockStart = strstart; + strm.flushPending(); + } + + // Copy without compression as much as possible from the input stream, return + // the current block state. + // This function does not insert new strings in the dictionary since + // uncompressible data is probably not useful. This function is used + // only for the level=0 compression option. + // NOTE: this function should be optimized to avoid extra copying from + // window to pending_buf. + private int deflateStored(int flush) { + // Stored blocks are limited to 0xffff bytes, pending_buf is limited + // to pending_buf_size, and each stored block has a 5 byte header: + + int maxblocksize = 0xffff; + int maxstart; + + if (maxblocksize > pendingBufSize - 5) { + maxblocksize = pendingBufSize - 5; + } + + // Copy as much as possible from input to output: + while (true) { + // Fill the window as much as possible: + if (lookahead <= 1) { + fillWindow(); + if (lookahead == 0 && flush == Z_NO_FLUSH) { + return NEED_MORE; + } + if (lookahead == 0) { + break; // flush the current block + } + } + + strstart += lookahead; + lookahead = 0; + + // Emit a stored block if pending_buf will be full: + maxstart = blockStart + maxblocksize; + if (strstart == 0 || strstart >= maxstart) { + // strstart == 0 is possible when wraparound on 16-bit machine + lookahead = (strstart - maxstart); + strstart = maxstart; + + flushBlockOnly(false); + if (strm.availout == 0) { + return NEED_MORE; + } + + } + + // Flush if we may have to slide, otherwise block_start may become + // negative and the data will be gone: + if (strstart - blockStart >= wSize - MIN_LOOKAHEAD) { + flushBlockOnly(false); + if (strm.availout == 0) { + return NEED_MORE; + } + } + } + + flushBlockOnly(flush == Z_FINISH); + if (strm.availout == 0) { + return (flush == Z_FINISH) ? FINISH_STARTED : NEED_MORE; + } + + return flush == Z_FINISH ? FINISH_DONE : BLOCK_DONE; + } + + // Send a stored block + private void trStoredBlock(int buf, // input block + int storedlen, // length of input block + boolean eof // true if this is the last block for a file + ) { + sendBits((STORED_BLOCK << 1) + (eof ? 1 : 0), 3); // send block type + copyBlock(buf, storedlen, true); // with header + } + + // Determine the best encoding for the current block: dynamic trees, static + // trees or store, and output the encoded block to the zip file. + private void trFlushBlock(int buf, // input block, or NULL if too old + int storedlen, // length of input block + boolean eof // true if this is the last block for a file + ) { + int optlenb, staticlenb;// opt_len and static_len in bytes + int maxblindex = 0; // index of last bit length code of non zero freq + + // Build the Huffman trees unless a stored block is forced + if (level > 0) { + // Check if the file is ascii or binary + if (dataType == Z_UNKNOWN) { + setDataType(); + } + + // Construct the literal and distance trees + lDesc.buildTree(this); + + dDesc.buildTree(this); + + // At this point, opt_len and static_len are the total bit lengths of + // the compressed block data, excluding the tree representations. + + // Build the bit length tree for the above two trees, and get the index + // in BL_ORDER of the last bit length code to send. + maxblindex = buildBLTree(); + + // Determine the best encoding. Compute first the block length in bytes + optlenb = (optLen + 3 + 7) >>> 3; + staticlenb = (staticLen + 3 + 7) >>> 3; + + if (staticlenb <= optlenb) { + optlenb = staticlenb; + } + } else { + optlenb = staticlenb = storedlen + 5; // force a stored block + } + + if (storedlen + 4 <= optlenb && buf != -1) { + // 4: two words for the lengths + // The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. + // Otherwise we can't have processed more than WSIZE input bytes since + // the last block flush, because compression would have been + // successful. If LIT_BUFSIZE <= WSIZE, it is never too late to + // transform a block into a stored block. + trStoredBlock(buf, storedlen, eof); + } else if (staticlenb == optlenb) { + sendBits((STATIC_TREES << 1) + (eof ? 1 : 0), 3); + compressBlock(StaticTree.STATIC_LTREE, StaticTree.STATIC_DTREE); + } else { + sendBits((DYN_TREES << 1) + (eof ? 1 : 0), 3); + sendAllTree(lDesc.maxCode + 1, dDesc.maxCode + 1, maxblindex + 1); + compressBlock(dynLtree, dynDtree); + } + + // The above check is made mod 2^32, for files larger than 512 MB + // and uLong implemented on 32 bits. + + initBlock(); + + if (eof) { + biWindup(); + } + } + + // Fill the window when the lookahead becomes insufficient. + // Updates strstart and lookahead. + // + // IN assertion: lookahead < MIN_LOOKAHEAD + // OUT assertions: strstart <= window_size-MIN_LOOKAHEAD + // At least one byte has been read, or avail_in == 0; reads are + // performed for at least two bytes (required for the zip translate_eol + // option -- not supported here). + private void fillWindow() { + int n, m; + int p; + int more; // Amount of free space at the end of the window. + + do { + more = (windowSize - lookahead - strstart); + + // Deal with !@#$% 64K limit: + if (more == 0 && strstart == 0 && lookahead == 0) { + more = wSize; + } else if (more == -1) { + // Very unlikely, but possible on 16 bit machine if strstart == 0 + // and lookahead == 1 (input done one byte at time) + more--; + + // If the window is almost full and there is insufficient lookahead, + // move the upper half to the lower one to make room in the upper half. + } else if (strstart >= wSize + wSize - MIN_LOOKAHEAD) { + System.arraycopy(window, wSize, window, 0, wSize); + matchStart -= wSize; + strstart -= wSize; // we now have strstart >= MAX_DIST + blockStart -= wSize; + + // Slide the hash table (could be avoided with 32 bit values + // at the expense of memory usage). We slide even when level == 0 + // to keep the hash table consistent if we switch back to level > 0 + // later. (Using level 0 permanently is not an optimal usage of + // zlib, so we don't care about this pathological case.) + + n = hashSize; + p = n; + do { + m = (head[--p] & 0xffff); + head[p] = (m >= wSize ? (short) (m - wSize) : 0); + } while (--n != 0); + + n = wSize; + p = n; + do { + m = (prev[--p] & 0xffff); + prev[p] = (m >= wSize ? (short) (m - wSize) : 0); + // If n is not on any hash chain, prev[n] is garbage but + // its value will never be used. + } while (--n != 0); + more += wSize; + } + + if (strm.availin == 0) { + return; + } + + // If there was no sliding: + // strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && + // more == window_size - lookahead - strstart + // => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) + // => more >= window_size - 2*WSIZE + 2 + // In the BIG_MEM or MMAP case (not yet supported), + // window_size == input_size + MIN_LOOKAHEAD && + // strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. + // Otherwise, window_size == 2*WSIZE so more >= 2. + // If there was sliding, more >= WSIZE. So in all cases, more >= 2. + + n = strm.readBuf(window, strstart + lookahead, more); + lookahead += n; + + // Initialize the hash value now that we have some input: + if (lookahead >= MIN_MATCH) { + insh = window[strstart] & 0xff; + insh = (((insh) << hashShift) ^ (window[strstart + 1] & 0xff)) & hashMask; + } + // If the whole input has less than MIN_MATCH bytes, ins_h is garbage, + // but this is not important since only literal bytes will be emitted. + } while (lookahead < MIN_LOOKAHEAD && strm.availin != 0); + } + + // Compress as much as possible from the input stream, return the current + // block state. + // This function does not perform lazy evaluation of matches and inserts + // new strings in the dictionary only for unmatched strings or for short + // matches. It is used only for the fast compression options. + private int deflateFast(int flush) { +// short hash_head = 0; // head of the hash chain + int hashhead = 0; // head of the hash chain + boolean bflush; // set if current block must be flushed + + while (true) { + // Make sure that we always have enough lookahead, except + // at the end of the input file. We need MAX_MATCH bytes + // for the next match, plus MIN_MATCH bytes to insert the + // string following the next match. + if (lookahead < MIN_LOOKAHEAD) { + fillWindow(); + if (lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return NEED_MORE; + } + if (lookahead == 0) { + break; // flush the current block + } + } + + // Insert the string window[strstart .. strstart+2] in the + // dictionary, and set hash_head to the head of the hash chain: + if (lookahead >= MIN_MATCH) { + insh = (((insh) << hashShift) ^ (window[(strstart) + (MIN_MATCH - 1)] & 0xff)) & hashMask; + +// prev[strstart&w_mask]=hash_head=head[ins_h]; + hashhead = (head[insh] & 0xffff); + prev[strstart & wMask] = head[insh]; + head[insh] = (short) strstart; + } + + // Find the longest match, discarding those <= prev_length. + // At this point we have always match_length < MIN_MATCH + + // To simplify the code, we prevent matches with the string + // of window index 0 (in particular we have to avoid a match + // of the string with itself at the start of the input file). + if (hashhead != 0L + && ((strstart - hashhead) & 0xffff) <= wSize - MIN_LOOKAHEAD + && (strategy != Z_HUFFMAN_ONLY)) { + matchLength = longestMatch(hashhead); + } + // longest_match() sets match_start + + if (matchLength >= MIN_MATCH) { + // check_match(strstart, match_start, match_length); + + bflush = trTally(strstart - matchStart, matchLength - MIN_MATCH); + + lookahead -= matchLength; + + // Insert new strings in the hash table only if the match length + // is not too large. This saves time but degrades compression. + if (matchLength <= maxLazyMatch + && lookahead >= MIN_MATCH) { + matchLength--; // string at strstart already in hash table + do { + strstart++; + + insh = ((insh << hashShift) ^ (window[(strstart) + (MIN_MATCH - 1)] & 0xff)) & hashMask; +// prev[strstart&w_mask]=hash_head=head[ins_h]; + hashhead = (head[insh] & 0xffff); + prev[strstart & wMask] = head[insh]; + head[insh] = (short) strstart; + + // strstart never exceeds WSIZE-MAX_MATCH, so there are + // always MIN_MATCH bytes ahead. + } while (--matchLength != 0); + strstart++; + } else { + strstart += matchLength; + matchLength = 0; + insh = window[strstart] & 0xff; + + insh = (((insh) << hashShift) ^ (window[strstart + 1] & 0xff)) & hashMask; + // If lookahead < MIN_MATCH, ins_h is garbage, but it does not + // matter since it will be recomputed at next deflate call. + } + } else { + // No match, output a literal byte + + bflush = trTally(0, window[strstart] & 0xff); + lookahead--; + strstart++; + } + if (bflush) { + + flushBlockOnly(false); + if (strm.availout == 0) { + return NEED_MORE; + } + } + } + + flushBlockOnly(flush == Z_FINISH); + if (strm.availout == 0) { + if (flush == Z_FINISH) { + return FINISH_STARTED; + } else { + return NEED_MORE; + } + } + return flush == Z_FINISH ? FINISH_DONE : BLOCK_DONE; + } + + // Same as above, but achieves better compression. We use a lazy + // evaluation for matches: a match is finally adopted only if there is + // no better match at the next window position. + private int deflateSlow(int flush) { +// short hash_head = 0; // head of hash chain + int hashhead = 0; // head of hash chain + boolean bflush; // set if current block must be flushed + + // Process the input block. + while (true) { + // Make sure that we always have enough lookahead, except + // at the end of the input file. We need MAX_MATCH bytes + // for the next match, plus MIN_MATCH bytes to insert the + // string following the next match. + + if (lookahead < MIN_LOOKAHEAD) { + fillWindow(); + if (lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return NEED_MORE; + } + if (lookahead == 0) { + break; // flush the current block + } + } + + // Insert the string window[strstart .. strstart+2] in the + // dictionary, and set hash_head to the head of the hash chain: + + if (lookahead >= MIN_MATCH) { + insh = (((insh) << hashShift) ^ (window[(strstart) + (MIN_MATCH - 1)] & 0xff)) & hashMask; +// prev[strstart&w_mask]=hash_head=head[ins_h]; + hashhead = (head[insh] & 0xffff); + prev[strstart & wMask] = head[insh]; + head[insh] = (short) strstart; + } + + // Find the longest match, discarding those <= prev_length. + prevLength = matchLength; + prevMatch = matchStart; + matchLength = MIN_MATCH - 1; + + if (hashhead != 0 && prevLength < maxLazyMatch + && ((strstart - hashhead) & 0xffff) <= wSize - MIN_LOOKAHEAD) { + // To simplify the code, we prevent matches with the string + // of window index 0 (in particular we have to avoid a match + // of the string with itself at the start of the input file). + + if (strategy != Z_HUFFMAN_ONLY) { + matchLength = longestMatch(hashhead); + } + // longest_match() sets match_start + + if (matchLength <= 5 && (strategy == Z_FILTERED + || (matchLength == MIN_MATCH + && strstart - matchStart > 4096))) { + + // If prev_match is also MIN_MATCH, match_start is garbage + // but we will ignore the current match anyway. + matchLength = MIN_MATCH - 1; + } + } + + // If there was a match at the previous step and the current + // match is not better, output the previous match: + if (prevLength >= MIN_MATCH && matchLength <= prevLength) { + int max_insert = strstart + lookahead - MIN_MATCH; + // Do not insert strings in hash table beyond this. + + // check_match(strstart-1, prev_match, prev_length); + + bflush = trTally(strstart - 1 - prevMatch, prevLength - MIN_MATCH); + + // Insert in hash table all strings up to the end of the match. + // strstart-1 and strstart are already inserted. If there is not + // enough lookahead, the last two strings are not inserted in + // the hash table. + lookahead -= prevLength - 1; + prevLength -= 2; + do { + if (++strstart <= max_insert) { + insh = (((insh) << hashShift) ^ (window[(strstart) + (MIN_MATCH - 1)] & 0xff)) & hashMask; + //prev[strstart&w_mask]=hash_head=head[ins_h]; + hashhead = (head[insh] & 0xffff); + prev[strstart & wMask] = head[insh]; + head[insh] = (short) strstart; + } + } while (--prevLength != 0); + matchAvailable = 0; + matchLength = MIN_MATCH - 1; + strstart++; + + if (bflush) { + flushBlockOnly(false); + if (strm.availout == 0) { + return NEED_MORE; + } + } + } else if (matchAvailable != 0) { + + // If there was no match at the previous position, output a + // single literal. If there was a match but the current match + // is longer, truncate the previous match to a single literal. + + bflush = trTally(0, window[strstart - 1] & 0xff); + + if (bflush) { + flushBlockOnly(false); + } + strstart++; + lookahead--; + if (strm.availout == 0) { + return NEED_MORE; + } + } else { + // There is no previous match to compare with, wait for + // the next step to decide. + + matchAvailable = 1; + strstart++; + lookahead--; + } + } + + if (matchAvailable != 0) { + bflush = trTally(0, window[strstart - 1] & 0xff); + matchAvailable = 0; + } + flushBlockOnly(flush == Z_FINISH); + + if (strm.availout == 0) { + if (flush == Z_FINISH) { + return FINISH_STARTED; + } else { + return NEED_MORE; + } + } + + return flush == Z_FINISH ? FINISH_DONE : BLOCK_DONE; + } + + private int longestMatch(int cmatch) { + int curmatch = cmatch; + int chainlength = maxChainLength; // max hash chain length + int scan = strstart; // current string + int match; // matched string + int len; // length of current match + int bestlen = prevLength; // best match length so far + int limit = strstart > (wSize - MIN_LOOKAHEAD) ? strstart - (wSize - MIN_LOOKAHEAD) : 0; + int nmatch = this.niceMatch; + + // Stop when cur_match becomes <= limit. To simplify the code, + // we prevent matches with the string of window index 0. + + int wmask = wMask; + + int strend = strstart + MAX_MATCH; + byte scanend1 = window[scan + bestlen - 1]; + byte scanend = window[scan + bestlen]; + + // The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + // It is easy to get rid of this optimization if necessary. + + // Do not waste too much time if we already have a good match: + if (prevLength >= goodMatch) { + chainlength >>= 2; + } + + // Do not look for matches beyond the end of the input. This is necessary + // to make deflate deterministic. + if (nmatch > lookahead) { + nmatch = lookahead; + } + + do { + match = curmatch; + + // Skip to next match if the match length cannot increase + // or if the match length is less than 2: + if (window[match + bestlen] != scanend + || window[match + bestlen - 1] != scanend1 + || window[match] != window[scan] + || window[++match] != window[scan + 1]) { + continue; + } + + // The check at best_len-1 can be removed because it will be made + // again later. (This heuristic is not always a win.) + // It is not necessary to compare scan[2] and match[2] since they + // are always equal when the other bytes match, given that + // the hash keys are equal and that HASH_BITS >= 8. + scan += 2; + match++; + + // We check for insufficient lookahead only every 8th comparison; + // the 256th check will be made at strstart+258. + do { + } while (window[++scan] == window[++match] + && window[++scan] == window[++match] + && window[++scan] == window[++match] + && window[++scan] == window[++match] + && window[++scan] == window[++match] + && window[++scan] == window[++match] + && window[++scan] == window[++match] + && window[++scan] == window[++match] + && scan < strend); + + len = MAX_MATCH - (strend - scan); + scan = strend - MAX_MATCH; + + if (len > bestlen) { + matchStart = curmatch; + bestlen = len; + if (len >= nmatch) { + break; + } + scanend1 = window[scan + bestlen - 1]; + scanend = window[scan + bestlen]; + } + + } while ((curmatch = (prev[curmatch & wmask] & 0xffff)) > limit && --chainlength != 0); + + if (bestlen <= lookahead) { + return bestlen; + } + return lookahead; + } + + public int deflateInit(ZStream strm, int level, int bits) { + return deflateInit2(strm, level, Z_DEFLATED, bits, DEF_MEM_LEVEL, + Z_DEFAULT_STRATEGY); + } + + public int deflateInit(ZStream strm, int level) { + return deflateInit(strm, level, MAX_WBITS); + } + + private int deflateInit2(ZStream strm, int level, int method, int windowBits, + int memLevel, int strategy) { + int nheader = 0; + + strm.msg = null; + + if (level == Z_DEFAULT_COMPRESSION) { + level = 6; + } + + if (windowBits < 0) { // undocumented feature: suppress zlib header + nheader = 1; + windowBits = -windowBits; + } + + if (memLevel < 1 || memLevel > MAX_MEM_LEVEL + || method != Z_DEFLATED + || windowBits < 9 || windowBits > 15 || level < 0 || level > 9 + || strategy < 0 || strategy > Z_HUFFMAN_ONLY) { + return Z_STREAM_ERROR; + } + + strm.dstate = this; + + this.noheader = nheader; + wBits = windowBits; + wSize = 1 << wBits; + wMask = wSize - 1; + + hashBits = memLevel + 7; + hashSize = 1 << hashBits; + hashMask = hashSize - 1; + hashShift = ((hashBits + MIN_MATCH - 1) / MIN_MATCH); + + window = new byte[wSize * 2]; + prev = new short[wSize]; + head = new short[hashSize]; + + litBufsize = 1 << (memLevel + 6); // 16K elements by default + + // We overlay pending_buf and d_buf+l_buf. This works since the average + // output size for (length,distance) codes is <= 24 bits. + pendingBuf = new byte[litBufsize * 4]; + pendingBufSize = litBufsize * 4; + + dBuf = litBufsize / 2; + lBuf = (1 + 2) * litBufsize; + + this.level = level; + + this.strategy = strategy; + return deflateReset(strm); + } + + private int deflateReset(ZStream strm) { + strm.totalin = strm.totalout = 0; + strm.msg = null; // + strm.dataType = Z_UNKNOWN; + + pending = 0; + pendingOut = 0; + + if (noheader < 0) { + noheader = 0; // was set to -1 by deflate(..., Z_FINISH); + } + status = (noheader != 0) ? BUSY_STATE : INIT_STATE; + strm.adler = Adler32.adler32(0, null, 0, 0); + + lastFlush = Z_NO_FLUSH; + + trinit(); + lminit(); + return Z_OK; + } + + public int deflateEnd() { + if (status != INIT_STATE && status != BUSY_STATE && status != FINISH_STATE) { + return Z_STREAM_ERROR; + } + // Deallocate in reverse order of allocations: + pendingBuf = null; + head = null; + prev = null; + window = null; + // free + // dstate=null; + return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; + } + + public int deflateParams(ZStream strm, int _level, int _strategy) { + int err = Z_OK; + + if (_level == Z_DEFAULT_COMPRESSION) { + _level = 6; + } + if (_level < 0 || _level > 9 + || _strategy < 0 || _strategy > Z_HUFFMAN_ONLY) { + return Z_STREAM_ERROR; + } + + if (CONFIG_TABLE[level].func != CONFIG_TABLE[_level].func + && strm.totalin != 0) { + // Flush the last buffer: + err = strm.deflate(Z_PARTIAL_FLUSH); + } + + if (level != _level) { + level = _level; + maxLazyMatch = CONFIG_TABLE[level].maxlazy; + goodMatch = CONFIG_TABLE[level].goodlength; + niceMatch = CONFIG_TABLE[level].nicelength; + maxChainLength = CONFIG_TABLE[level].maxchain; + } + strategy = _strategy; + return err; + } + + public int deflateSetDictionary(ZStream strm, byte[] dictionary, int dictLength) { + int length = dictLength; + int index = 0; + + if (dictionary == null || status != INIT_STATE) { + return Z_STREAM_ERROR; + } + + strm.adler = Adler32.adler32(strm.adler, dictionary, 0, dictLength); + + if (length < MIN_MATCH) { + return Z_OK; + } + if (length > wSize - MIN_LOOKAHEAD) { + length = wSize - MIN_LOOKAHEAD; + index = dictLength - length; // use the tail of the dictionary + } + System.arraycopy(dictionary, index, window, 0, length); + strstart = length; + blockStart = length; + + // Insert all strings in the hash table (except for the last two bytes). + // s->lookahead stays null, so s->ins_h will be recomputed at the next + // call of fill_window. + + insh = window[0] & 0xff; + insh = (((insh) << hashShift) ^ (window[1] & 0xff)) & hashMask; + + for (int n = 0; n <= length - MIN_MATCH; n++) { + insh = (((insh) << hashShift) ^ (window[(n) + (MIN_MATCH - 1)] & 0xff)) & hashMask; + prev[n & wMask] = head[insh]; + head[insh] = (short) n; + } + return Z_OK; + } + + public int deflate(ZStream strm, int flush) { + int oldflush; + + if (flush > Z_FINISH || flush < 0) { + return Z_STREAM_ERROR; + } + + if (strm.nextout == null + || (strm.nextin == null && strm.availin != 0) + || (status == FINISH_STATE && flush != Z_FINISH)) { + strm.msg = zerrmsg[Z_NEED_DICT - (Z_STREAM_ERROR)]; + return Z_STREAM_ERROR; + } + if (strm.availout == 0) { + strm.msg = zerrmsg[Z_NEED_DICT - (Z_BUF_ERROR)]; + return Z_BUF_ERROR; + } + + this.strm = strm; // just in case + oldflush = lastFlush; + lastFlush = flush; + + // Write the zlib header + if (status == INIT_STATE) { + int header = (Z_DEFLATED + ((wBits - 8) << 4)) << 8; + int level_flags = ((level - 1) & 0xff) >> 1; + + if (level_flags > 3) { + level_flags = 3; + } + header |= (level_flags << 6); + if (strstart != 0) { + header |= PRESET_DICT; + } + header += 31 - (header % 31); + + status = BUSY_STATE; + putShortMSB(header); + + + // Save the adler32 of the preset dictionary: + if (strstart != 0) { + putShortMSB((int) (strm.adler >>> 16)); + putShortMSB((int) (strm.adler & 0xffff)); + } + strm.adler = Adler32.adler32(0, null, 0, 0); + } + + // Flush as much pending output as possible + if (pending != 0) { + strm.flushPending(); + if (strm.availout == 0) { + // Since avail_out is 0, deflate will be called again with + // more output space, but possibly with both pending and + // avail_in equal to zero. There won't be anything to do, + // but this is not an error situation so make sure we + // return OK instead of BUF_ERROR at next call of deflate: + lastFlush = -1; + return Z_OK; + } + + // Make sure there is something to do and avoid duplicate consecutive + // flushes. For repeated and useless calls with Z_FINISH, we keep + // returning Z_STREAM_END instead of Z_BUFF_ERROR. + } else if (strm.availin == 0 && flush <= oldflush + && flush != Z_FINISH) { + strm.msg = zerrmsg[Z_NEED_DICT - (Z_BUF_ERROR)]; + return Z_BUF_ERROR; + } + + // User must not provide more input after the first FINISH: + if (status == FINISH_STATE && strm.availin != 0) { + strm.msg = zerrmsg[Z_NEED_DICT - (Z_BUF_ERROR)]; + return Z_BUF_ERROR; + } + + // Start a new block or continue the current one. + if (strm.availin != 0 || lookahead != 0 + || (flush != Z_NO_FLUSH && status != FINISH_STATE)) { + int bstate = -1; + switch (CONFIG_TABLE[level].func) { + case STORED: + bstate = deflateStored(flush); + break; + case FAST: + bstate = deflateFast(flush); + break; + case SLOW: + bstate = deflateSlow(flush); + break; + default: + } + + if (bstate == FINISH_STARTED || bstate == FINISH_DONE) { + status = FINISH_STATE; + } + if (bstate == NEED_MORE || bstate == FINISH_STARTED) { + if (strm.availout == 0) { + lastFlush = -1; // avoid BUF_ERROR next call, see above + } + return Z_OK; + // If flush != Z_NO_FLUSH && avail_out == 0, the next call + // of deflate should use the same flush parameter to make sure + // that the flush is complete. So we don't have to output an + // empty block here, this will be done at next call. This also + // ensures that for a very small output buffer, we emit at most + // one empty block. + } + + if (bstate == BLOCK_DONE) { + if (flush == Z_PARTIAL_FLUSH) { + trAlign(); + } else { // FULL_FLUSH or SYNC_FLUSH + trStoredBlock(0, 0, false); + // For a full flush, this empty block will be recognized + // as a special marker by inflate_sync(). + if (flush == Z_FULL_FLUSH) { + //state.head[s.hash_size-1]=0; + for (int i = 0; i < hashSize/*-1*/; i++) // forget history + { + head[i] = 0; + } + } + } + strm.flushPending(); + if (strm.availout == 0) { + lastFlush = -1; // avoid BUF_ERROR at next call, see above + return Z_OK; + } + } + } + + if (flush != Z_FINISH) { + return Z_OK; + } + if (noheader != 0) { + return Z_STREAM_END; + } + + // Write the zlib trailer (adler32) + putShortMSB((int) (strm.adler >>> 16)); + putShortMSB((int) (strm.adler & 0xffff)); + strm.flushPending(); + + // If avail_out is zero, the application will call deflate again + // to flush the rest. + noheader = -1; // write the trailer only once! + return pending != 0 ? Z_OK : Z_STREAM_END; + } +} diff --git a/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/InfBlocks.java b/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/InfBlocks.java new file mode 100644 index 0000000..c903e1d --- /dev/null +++ b/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/InfBlocks.java @@ -0,0 +1,607 @@ + +package org.xbib.io.compress.zlib; + +final class InfBlocks { + + private final static int MANY = 1440; + // And'ing with mask[n] masks the lower n bits + private final static int[] inflate_mask = { + 0x00000000, 0x00000001, 0x00000003, 0x00000007, 0x0000000f, + 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff, 0x000001ff, + 0x000003ff, 0x000007ff, 0x00000fff, 0x00001fff, 0x00003fff, + 0x00007fff, 0x0000ffff + }; + // Table for deflate from PKZIP's appnote.txt. + private final static int[] border = { // Order of the bit length code lengths + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 + }; + private final static int Z_OK = 0; + private final static int Z_STREAM_END = 1; + private final static int Z_STREAM_ERROR = -2; + private final static int Z_DATA_ERROR = -3; + private final static int Z_BUF_ERROR = -5; + private final static int TYPE = 0; // get type bits (3, including end bit) + private final static int LENS = 1; // get lengths for stored + private final static int STORED = 2;// processing stored block + private final static int TABLE = 3; // get table lengths + private final static int BTREE = 4; // get bit lengths tree for a dynamic block + private final static int DTREE = 5; // get length, distance trees for a dynamic block + private final static int CODES = 6; // processing fixed or dynamic block + private final static int DRY = 7; // output remaining window bytes + private final static int DONE = 8; // finished last block, done + private final static int BAD = 9; // ot a data error--stuck here + private int mode; // current inflate_block mode + private int left; // if STORED, bytes left to copy + private int table; // table lengths (14 bits) + private int index; // index into blens (or border) + private int[] blens; // bit lengths of codes + private int[] bb = new int[1]; // bit length tree depth + private int[] tb = new int[1]; // bit length decoding tree + private InfCodes codes = new InfCodes(); // if CODES, current state + private int last; // true if this block is the last block + // mode independent information + protected int bitk; // bits in bit buffer + protected int bitb; // bit buffer + private int[] hufts; // single malloc for tree space + protected byte[] window; // sliding window + protected int end; // one byte after sliding window + protected int read; // window read pointer + protected int write; // window write pointer + private Object checkfn; // check function + private long check; // check on output + private InfTree inftree = new InfTree(); + + InfBlocks(ZStream z, Object checkfn, int w) { + hufts = new int[MANY * 3]; + window = new byte[w]; + end = w; + this.checkfn = checkfn; + mode = TYPE; + reset(z, null); + } + + protected void reset(ZStream z, long[] c) { + if (c != null) { + c[0] = check; + } + if (mode == CODES) { + codes.free(z); + } + mode = TYPE; + bitk = 0; + bitb = 0; + read = write = 0; + if (checkfn != null) { + z.adler = check = Adler32.adler32(0L, null, 0, 0); + } + } + + protected int proc(ZStream z, int r) { + int t; // temporary storage + int b; // bit buffer + int k; // bits in bit buffer + int p; // input data pointer + int n; // bytes available there + int q; // output window write pointer + int m; // bytes to end of window or read pointer + + // copy input/output information to locals (UPDATE macro restores) + { + p = z.nextinindex; + n = z.availin; + b = bitb; + k = bitk; + } + { + q = write; + m = q < read ? read - q - 1 : end - q; + } + + // process input based on current state + while (true) { + switch (mode) { + case TYPE: + while (k < (3)) { + if (n != 0) { + r = Z_OK; + } else { + bitb = b; + bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + write = q; + return inflateFlush(z, r); + } + n--; + b |= (z.nextin[p++] & 0xff) << k; + k += 8; + } + t = b & 7; + last = t & 1; + switch (t >>> 1) { + case 0: // stored + b >>>= (3); + k -= (3); + t = k & 7; // go to byte boundary + b >>>= (t); + k -= (t); + mode = LENS; // get length of stored block + break; + case 1: // fixed + int[] bl = new int[1]; + int[] bd = new int[1]; + int[][] tl = new int[1][]; + int[][] td = new int[1][]; + InfTree.inflate_trees_fixed(bl, bd, tl, td, z); + codes.init(bl[0], bd[0], tl[0], 0, td[0], 0, z); + b >>>= (3); + k -= (3); + mode = CODES; + break; + case 2: // dynamic + b >>>= (3); + k -= (3); + mode = TABLE; + break; + case 3: // illegal + b >>>= (3); + k -= (3); + mode = BAD; + z.msg = "invalid block type"; + r = Z_DATA_ERROR; + bitb = b; + bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + write = q; + return inflateFlush(z, r); + } + break; + case LENS: + while (k < (32)) { + if (n != 0) { + r = Z_OK; + } else { + bitb = b; + bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + write = q; + return inflateFlush(z, r); + } + n--; + b |= (z.nextin[p++] & 0xff) << k; + k += 8; + } + if ((((~b) >>> 16) & 0xffff) != (b & 0xffff)) { + mode = BAD; + z.msg = "invalid stored block lengths"; + r = Z_DATA_ERROR; + + bitb = b; + bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + write = q; + return inflateFlush(z, r); + } + left = (b & 0xffff); + b = k = 0; // dump bits + mode = left != 0 ? STORED : (last != 0 ? DRY : TYPE); + break; + case STORED: + if (n == 0) { + bitb = b; + bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + write = q; + return inflateFlush(z, r); + } + if (m == 0) { + if (q == end && read != 0) { + q = 0; + m = q < read ? read - q - 1 : end - q; + } + if (m == 0) { + write = q; + r = inflateFlush(z, r); + q = write; + m = q < read ? read - q - 1 : end - q; + if (q == end && read != 0) { + q = 0; + m = q < read ? read - q - 1 : end - q; + } + if (m == 0) { + bitb = b; + bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + write = q; + return inflateFlush(z, r); + } + } + } + r = Z_OK; + t = left; + if (t > n) { + t = n; + } + if (t > m) { + t = m; + } + System.arraycopy(z.nextin, p, window, q, t); + p += t; + n -= t; + q += t; + m -= t; + if ((left -= t) != 0) { + break; + } + mode = last != 0 ? DRY : TYPE; + break; + case TABLE: + while (k < (14)) { + if (n != 0) { + r = Z_OK; + } else { + bitb = b; + bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + write = q; + return inflateFlush(z, r); + } + n--; + b |= (z.nextin[p++] & 0xff) << k; + k += 8; + } + table = t = (b & 0x3fff); + if ((t & 0x1f) > 29 || ((t >> 5) & 0x1f) > 29) { + mode = BAD; + z.msg = "too many length or distance symbols"; + r = Z_DATA_ERROR; + + bitb = b; + bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + write = q; + return inflateFlush(z, r); + } + t = 258 + (t & 0x1f) + ((t >> 5) & 0x1f); + if (blens == null || blens.length < t) { + blens = new int[t]; + } else { + for (int i = 0; i < t; i++) { + blens[i] = 0; + } + } + b >>>= (14); + k -= (14); + index = 0; + mode = BTREE; + break; + case BTREE: + while (index < 4 + (table >>> 10)) { + while (k < (3)) { + if (n != 0) { + r = Z_OK; + } else { + bitb = b; + bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + write = q; + return inflateFlush(z, r); + } + n--; + b |= (z.nextin[p++] & 0xff) << k; + k += 8; + } + blens[border[index++]] = b & 7; + b >>>= 3; + k -= 3; + } + while (index < 19) { + blens[border[index++]] = 0; + } + bb[0] = 7; + t = inftree.inflateTreesBits(blens, bb, tb, hufts, z); + if (t != Z_OK) { + r = t; + if (r == Z_DATA_ERROR) { + blens = null; + mode = BAD; + } + bitb = b; + bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + write = q; + return inflateFlush(z, r); + } + index = 0; + mode = DTREE; + break; + case DTREE: + while (true) { + t = table; + if (!(index < 258 + (t & 0x1f) + ((t >> 5) & 0x1f))) { + break; + } + //int[] h; + int i, j, c; + t = bb[0]; + while (k < (t)) { + if (n != 0) { + r = Z_OK; + } else { + bitb = b; + bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + write = q; + return inflateFlush(z, r); + } + n--; + b |= (z.nextin[p++] & 0xff) << k; + k += 8; + } + t = hufts[(tb[0] + (b & inflate_mask[t])) * 3 + 1]; + c = hufts[(tb[0] + (b & inflate_mask[t])) * 3 + 2]; + if (c < 16) { + b >>>= (t); + k -= (t); + blens[index++] = c; + } else { // c == 16..18 + i = c == 18 ? 7 : c - 14; + j = c == 18 ? 11 : 3; + while (k < (t + i)) { + if (n != 0) { + r = Z_OK; + } else { + bitb = b; + bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + write = q; + return inflateFlush(z, r); + } + n--; + b |= (z.nextin[p++] & 0xff) << k; + k += 8; + } + b >>>= (t); + k -= (t); + j += (b & inflate_mask[i]); + b >>>= (i); + k -= (i); + i = index; + t = table; + if (i + j > 258 + (t & 0x1f) + ((t >> 5) & 0x1f) || + (c == 16 && i < 1)) { + blens = null; + mode = BAD; + z.msg = "invalid bit length repeat"; + r = Z_DATA_ERROR; + + bitb = b; + bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + write = q; + return inflateFlush(z, r); + } + c = c == 16 ? blens[i - 1] : 0; + do { + blens[i++] = c; + } while (--j != 0); + index = i; + } + } + tb[0] = -1; + { + int[] bl = new int[1]; + int[] bd = new int[1]; + int[] tl = new int[1]; + int[] td = new int[1]; + bl[0] = 9; // must be <= 9 for lookahead assumptions + bd[0] = 6; // must be <= 9 for lookahead assumptions + t = table; + t = inftree.inflate_trees_dynamic(257 + (t & 0x1f), + 1 + ((t >> 5) & 0x1f), + blens, bl, bd, tl, td, hufts, z); + if (t != Z_OK) { + if (t == Z_DATA_ERROR) { + blens = null; + mode = BAD; + } + r = t; + bitb = b; + bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + write = q; + return inflateFlush(z, r); + } + codes.init(bl[0], bd[0], hufts, tl[0], hufts, td[0], z); + } + mode = CODES; + break; + case CODES: + bitb = b; + bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + write = q; + if ((r = codes.proc(this, z, r)) != Z_STREAM_END) { + return inflateFlush(z, r); + } + r = Z_OK; + codes.free(z); + p = z.nextinindex; + n = z.availin; + b = bitb; + k = bitk; + q = write; + m = q < read ? read - q - 1 : end - q; + if (last == 0) { + mode = TYPE; + break; + } + mode = DRY; + break; + case DRY: + write = q; + r = inflateFlush(z, r); + q = write; + m = q < read ? read - q - 1 : end - q; + if (read != write) { + bitb = b; + bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + write = q; + return inflateFlush(z, r); + } + mode = DONE; + break; + case DONE: + r = Z_STREAM_END; + bitb = b; + bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + write = q; + return inflateFlush(z, r); + case BAD: + r = Z_DATA_ERROR; + bitb = b; + bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + write = q; + return inflateFlush(z, r); + default: + r = Z_STREAM_ERROR; + bitb = b; + bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + write = q; + return inflateFlush(z, r); + } + } + } + + protected void free(ZStream z) { + reset(z, null); + window = null; + hufts = null; + //ZFREE(z, s); + } + + protected void setDictionary(byte[] d, int start, int n) { + System.arraycopy(d, start, window, 0, n); + read = write = n; + } + + // Returns true if inflate is currently at the end of a block generated + // by Z_SYNC_FLUSH or Z_FULL_FLUSH. + protected int syncPoint() { + return mode == LENS ? 1 : 0; + } + + // copy as much as possible from the sliding window to the output area + protected int inflateFlush(ZStream z, int r) { + int n; + int p; + int q; + + // local copies of source and destination pointers + p = z.nextoutindex; + q = read; + + // compute number of bytes to copy as far as end of window + n = (q <= write ? write : end) - q; + if (n > z.availout) { + n = z.availout; + } + if (n != 0 && r == Z_BUF_ERROR) { + r = Z_OK; + } + + // update counters + z.availout -= n; + z.totalout += n; + + // update check information + if (checkfn != null) { + z.adler = check = Adler32.adler32(check, window, q, n); + } + + // copy as far as end of window + System.arraycopy(window, q, z.nextout, p, n); + p += n; + q += n; + + // see if more to copy at beginning of window + if (q == end) { + // wrap pointers + q = 0; + if (write == end) { + write = 0; + } + + // compute bytes to copy + n = write - q; + if (n > z.availout) { + n = z.availout; + } + if (n != 0 && r == Z_BUF_ERROR) { + r = Z_OK; + } + + // update counters + z.availout -= n; + z.totalout += n; + + // update check information + if (checkfn != null) { + z.adler = check = Adler32.adler32(check, window, q, n); + } + + // copy + System.arraycopy(window, q, z.nextout, p, n); + p += n; + q += n; + } + + // update pointers + z.nextoutindex = p; + read = q; + + // done + return r; + } +} diff --git a/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/InfCodes.java b/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/InfCodes.java new file mode 100644 index 0000000..2273f53 --- /dev/null +++ b/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/InfCodes.java @@ -0,0 +1,677 @@ + +package org.xbib.io.compress.zlib; + +class InfCodes { + + private static final int[] inflate_mask = { + 0x00000000, 0x00000001, 0x00000003, 0x00000007, 0x0000000f, + 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff, 0x000001ff, + 0x000003ff, 0x000007ff, 0x00000fff, 0x00001fff, 0x00003fff, + 0x00007fff, 0x0000ffff + }; + private static final int Z_OK = 0; + private static final int Z_STREAM_END = 1; + private static final int Z_STREAM_ERROR = -2; + private static final int Z_DATA_ERROR = -3; + + // waiting for "i:"=input, + // "o:"=output, + // "x:"=nothing + private static final int START = 0; // x: set up for LEN + private static final int LEN = 1; // i: get length/literal/eob next + private static final int LENEXT = 2; // i: getting length extra (have base) + private static final int DIST = 3; // i: get distance next + private static final int DISTEXT = 4;// i: getting distance extra + private static final int COPY = 5; // o: copying bytes in window, waiting for space + private static final int LIT = 6; // o: got literal, waiting for output space + private static final int WASH = 7; // o: got eob, possibly still output waiting + private static final int END = 8; // x: got eob and all data flushed + private static final int BADCODE = 9;// x: got error + private int mode; // current inflate_codes mode + + // mode dependent information + private int len; + private int[] tree; // pointer into tree + private int tree_index = 0; + private int need; // bits needed + private int lit; + + // if EXT or COPY, where and how much + private int get; // bits to get for extra + private int dist; // distance back to copy from + private byte lbits; // ltree bits decoded per branch + private byte dbits; // dtree bits decoder per branch + private int[] ltree; // literal/length/eob tree + private int ltree_index; // literal/length/eob tree + private int[] dtree; // distance tree + private int dtree_index; // distance tree + + + protected void init(int bl, int bd, + int[] tl, int tl_index, + int[] td, int td_index, ZStream z) { + mode = START; + lbits = (byte) bl; + dbits = (byte) bd; + ltree = tl; + ltree_index = tl_index; + dtree = td; + dtree_index = td_index; + tree = null; + } + + protected int proc(InfBlocks s, ZStream z, int r) { + int j; // temporary storage + //int[] t; // temporary pointer + int tindex; // temporary pointer + int e; // extra bits or operation + int b = 0; // bit buffer + int k = 0; // bits in bit buffer + int p = 0; // input data pointer + int n; // bytes available there + int q; // output window write pointer + int m; // bytes to end of window or read pointer + int f; // pointer to copy strings from + + // copy input/output information to locals (UPDATE macro restores) + p = z.nextinindex; + n = z.availin; + b = s.bitb; + k = s.bitk; + q = s.write; + m = q < s.read ? s.read - q - 1 : s.end - q; + + // process input and output based on current state + while (true) { + switch (mode) { + // waiting for "i:"=input, "o:"=output, "x:"=nothing + case START: // x: set up for LEN + if (m >= 258 && n >= 10) { + + s.bitb = b; + s.bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + s.write = q; + r = inflate_fast(lbits, dbits, + ltree, ltree_index, + dtree, dtree_index, + s, z); + + p = z.nextinindex; + n = z.availin; + b = s.bitb; + k = s.bitk; + q = s.write; + m = q < s.read ? s.read - q - 1 : s.end - q; + + if (r != Z_OK) { + mode = r == Z_STREAM_END ? WASH : BADCODE; + break; + } + } + need = lbits; + tree = ltree; + tree_index = ltree_index; + + mode = LEN; + break; + case LEN: // i: get length/literal/eob next + j = need; + + while (k < (j)) { + if (n != 0) { + r = Z_OK; + } else { + + s.bitb = b; + s.bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + s.write = q; + return s.inflateFlush(z, r); + } + n--; + b |= (z.nextin[p++] & 0xff) << k; + k += 8; + } + + tindex = (tree_index + (b & inflate_mask[j])) * 3; + + b >>>= (tree[tindex + 1]); + k -= (tree[tindex + 1]); + + e = tree[tindex]; + + if (e == 0) { // literal + lit = tree[tindex + 2]; + mode = LIT; + break; + } + if ((e & 16) != 0) { // length + get = e & 15; + len = tree[tindex + 2]; + mode = LENEXT; + break; + } + if ((e & 64) == 0) { // next table + need = e; + tree_index = tindex / 3 + tree[tindex + 2]; + break; + } + if ((e & 32) != 0) { // end of block + mode = WASH; + break; + } + mode = BADCODE; // invalid code + z.msg = "invalid literal/length code"; + r = Z_DATA_ERROR; + + s.bitb = b; + s.bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + s.write = q; + return s.inflateFlush(z, r); + + case LENEXT: // i: getting length extra (have base) + j = get; + + while (k < (j)) { + if (n != 0) { + r = Z_OK; + } else { + + s.bitb = b; + s.bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + s.write = q; + return s.inflateFlush(z, r); + } + n--; + b |= (z.nextin[p++] & 0xff) << k; + k += 8; + } + + len += (b & inflate_mask[j]); + + b >>= j; + k -= j; + + need = dbits; + tree = dtree; + tree_index = dtree_index; + mode = DIST; + break; + case DIST: // i: get distance next + j = need; + + while (k < (j)) { + if (n != 0) { + r = Z_OK; + } else { + + s.bitb = b; + s.bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + s.write = q; + return s.inflateFlush(z, r); + } + n--; + b |= (z.nextin[p++] & 0xff) << k; + k += 8; + } + + tindex = (tree_index + (b & inflate_mask[j])) * 3; + + b >>= tree[tindex + 1]; + k -= tree[tindex + 1]; + + e = (tree[tindex]); + if ((e & 16) != 0) { // distance + get = e & 15; + dist = tree[tindex + 2]; + mode = DISTEXT; + break; + } + if ((e & 64) == 0) { // next table + need = e; + tree_index = tindex / 3 + tree[tindex + 2]; + break; + } + mode = BADCODE; // invalid code + z.msg = "invalid distance code"; + r = Z_DATA_ERROR; + + s.bitb = b; + s.bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + s.write = q; + return s.inflateFlush(z, r); + + case DISTEXT: // i: getting distance extra + j = get; + + while (k < (j)) { + if (n != 0) { + r = Z_OK; + } else { + + s.bitb = b; + s.bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + s.write = q; + return s.inflateFlush(z, r); + } + n--; + b |= (z.nextin[p++] & 0xff) << k; + k += 8; + } + + dist += (b & inflate_mask[j]); + + b >>= j; + k -= j; + + mode = COPY; + break; + case COPY: // o: copying bytes in window, waiting for space + f = q - dist; + while (f < 0) { // modulo window size-"while" instead + f += s.end; // of "if" handles invalid distances + } + while (len != 0) { + + if (m == 0) { + if (q == s.end && s.read != 0) { + q = 0; + m = q < s.read ? s.read - q - 1 : s.end - q; + } + if (m == 0) { + s.write = q; + r = s.inflateFlush(z, r); + q = s.write; + m = q < s.read ? s.read - q - 1 : s.end - q; + + if (q == s.end && s.read != 0) { + q = 0; + m = q < s.read ? s.read - q - 1 : s.end - q; + } + + if (m == 0) { + s.bitb = b; + s.bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + s.write = q; + return s.inflateFlush(z, r); + } + } + } + + s.window[q++] = s.window[f++]; + m--; + + if (f == s.end) { + f = 0; + } + len--; + } + mode = START; + break; + case LIT: // o: got literal, waiting for output space + if (m == 0) { + if (q == s.end && s.read != 0) { + q = 0; + m = q < s.read ? s.read - q - 1 : s.end - q; + } + if (m == 0) { + s.write = q; + r = s.inflateFlush(z, r); + q = s.write; + m = q < s.read ? s.read - q - 1 : s.end - q; + + if (q == s.end && s.read != 0) { + q = 0; + m = q < s.read ? s.read - q - 1 : s.end - q; + } + if (m == 0) { + s.bitb = b; + s.bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + s.write = q; + return s.inflateFlush(z, r); + } + } + } + r = Z_OK; + + s.window[q++] = (byte) lit; + m--; + + mode = START; + break; + case WASH: // o: got eob, possibly more output + if (k > 7) { // return unused byte, if any + k -= 8; + n++; + p--; // can always return one + } + + s.write = q; + r = s.inflateFlush(z, r); + q = s.write; + m = q < s.read ? s.read - q - 1 : s.end - q; + + if (s.read != s.write) { + s.bitb = b; + s.bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + s.write = q; + return s.inflateFlush(z, r); + } + mode = END; + break; + case END: + r = Z_STREAM_END; + s.bitb = b; + s.bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + s.write = q; + return s.inflateFlush(z, r); + + case BADCODE: // x: got error + + r = Z_DATA_ERROR; + + s.bitb = b; + s.bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + s.write = q; + return s.inflateFlush(z, r); + + default: + r = Z_STREAM_ERROR; + + s.bitb = b; + s.bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + s.write = q; + return s.inflateFlush(z, r); + } + } + } + + protected void free(ZStream z) { + // ZFREE(z, c); + } + + // Called with number of bytes left to write in window at least 258 + // (the maximum string length) and number of input bytes available + // at least ten. The ten bytes are six bytes for the longest length/ + // distance pair plus four bytes for overloading the bit buffer. + protected int inflate_fast(int bl, int bd, + int[] tl, int tl_index, + int[] td, int td_index, + InfBlocks s, ZStream z) { + int t; // temporary pointer + int[] tp; // temporary pointer + int tp_index; // temporary pointer + int e; // extra bits or operation + int b; // bit buffer + int k; // bits in bit buffer + int p; // input data pointer + int n; // bytes available there + int q; // output window write pointer + int m; // bytes to end of window or read pointer + int ml; // mask for literal/length tree + int md; // mask for distance tree + int c; // bytes to copy + int d; // distance back to copy from + int r; // copy source pointer + + int tp_index_t_3; // (tp_index+t)*3 + + // load input, output, bit values + p = z.nextinindex; + n = z.availin; + b = s.bitb; + k = s.bitk; + q = s.write; + m = q < s.read ? s.read - q - 1 : s.end - q; + + // initialize masks + ml = inflate_mask[bl]; + md = inflate_mask[bd]; + + // do until not enough input or output space for fast loop + do { // assume called with m >= 258 && n >= 10 + // get literal/length code + while (k < (20)) { // max bits for literal/length code + n--; + b |= (z.nextin[p++] & 0xff) << k; + k += 8; + } + + t = b & ml; + tp = tl; + tp_index = tl_index; + tp_index_t_3 = (tp_index + t) * 3; + if ((e = tp[tp_index_t_3]) == 0) { + b >>= (tp[tp_index_t_3 + 1]); + k -= (tp[tp_index_t_3 + 1]); + + s.window[q++] = (byte) tp[tp_index_t_3 + 2]; + m--; + continue; + } + do { + + b >>= (tp[tp_index_t_3 + 1]); + k -= (tp[tp_index_t_3 + 1]); + + if ((e & 16) != 0) { + e &= 15; + c = tp[tp_index_t_3 + 2] + (b & inflate_mask[e]); + + b >>= e; + k -= e; + + // decode distance base of block to copy + while (k < (15)) { // max bits for distance code + n--; + b |= (z.nextin[p++] & 0xff) << k; + k += 8; + } + + t = b & md; + tp = td; + tp_index = td_index; + tp_index_t_3 = (tp_index + t) * 3; + e = tp[tp_index_t_3]; + + do { + + b >>= (tp[tp_index_t_3 + 1]); + k -= (tp[tp_index_t_3 + 1]); + + if ((e & 16) != 0) { + // get extra bits to add to distance base + e &= 15; + while (k < (e)) { // get extra bits (up to 13) + n--; + b |= (z.nextin[p++] & 0xff) << k; + k += 8; + } + + d = tp[tp_index_t_3 + 2] + (b & inflate_mask[e]); + + b >>= (e); + k -= (e); + + // do the copy + m -= c; + if (q >= d) { // offset before dest + // just copy + r = q - d; + if (q - r > 0 && 2 > (q - r)) { + s.window[q++] = s.window[r++]; // minimum count is three, + s.window[q++] = s.window[r++]; // so unroll loop a little + c -= 2; + } else { + System.arraycopy(s.window, r, s.window, q, 2); + q += 2; + r += 2; + c -= 2; + } + } else { // else offset after destination + r = q - d; + do { + r += s.end; // force pointer in window + } while (r < 0); // covers invalid distances + e = s.end - r; + if (c > e) { // if source crosses, + c -= e; // wrapped copy + if (q - r > 0 && e > (q - r)) { + do { + s.window[q++] = s.window[r++]; + } while (--e != 0); + } else { + System.arraycopy(s.window, r, s.window, q, e); + q += e; + r += e; + e = 0; + } + r = 0; // copy rest from start of window + } + + } + + // copy all or what's left + if (q - r > 0 && c > (q - r)) { + do { + s.window[q++] = s.window[r++]; + } while (--c != 0); + } else { + System.arraycopy(s.window, r, s.window, q, c); + q += c; + r += c; + c = 0; + } + break; + } else if ((e & 64) == 0) { + t += tp[tp_index_t_3 + 2]; + t += (b & inflate_mask[e]); + tp_index_t_3 = (tp_index + t) * 3; + e = tp[tp_index_t_3]; + } else { + z.msg = "invalid distance code"; + + c = z.availin - n; + c = (k >> 3) < c ? k >> 3 : c; + n += c; + p -= c; + k -= c << 3; + + s.bitb = b; + s.bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + s.write = q; + + return Z_DATA_ERROR; + } + } while (true); + break; + } + + if ((e & 64) == 0) { + t += tp[tp_index_t_3 + 2]; + t += (b & inflate_mask[e]); + tp_index_t_3 = (tp_index + t) * 3; + if ((e = tp[tp_index_t_3]) == 0) { + + b >>= (tp[tp_index_t_3 + 1]); + k -= (tp[tp_index_t_3 + 1]); + + s.window[q++] = (byte) tp[tp_index_t_3 + 2]; + m--; + break; + } + } else if ((e & 32) != 0) { + + c = z.availin - n; + c = (k >> 3) < c ? k >> 3 : c; + n += c; + p -= c; + k -= c << 3; + + s.bitb = b; + s.bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + s.write = q; + + return Z_STREAM_END; + } else { + z.msg = "invalid literal/length code"; + + c = z.availin - n; + c = (k >> 3) < c ? k >> 3 : c; + n += c; + p -= c; + k -= c << 3; + + s.bitb = b; + s.bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + s.write = q; + + return Z_DATA_ERROR; + } + } while (true); + } while (m >= 258 && n >= 10); + + // not enough input or output--restore pointers and return + c = z.availin - n; + c = (k >> 3) < c ? k >> 3 : c; + n += c; + p -= c; + k -= c << 3; + + s.bitb = b; + s.bitk = k; + z.availin = n; + z.totalin += p - z.nextinindex; + z.nextinindex = p; + s.write = q; + + return Z_OK; + } +} diff --git a/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/InfTree.java b/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/InfTree.java new file mode 100644 index 0000000..5b3bb95 --- /dev/null +++ b/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/InfTree.java @@ -0,0 +1,488 @@ + +package org.xbib.io.compress.zlib; + +public class InfTree { + + private static final int MANY = 1440; + private static final int Z_OK = 0; + //private static final int Z_STREAM_END = 1; + //private static final int Z_NEED_DICT = 2; + //private static final int Z_ERRNO = -1; + //private static final int Z_STREAM_ERROR = -2; + private static final int Z_DATA_ERROR = -3; + private static final int Z_MEM_ERROR = -4; + private static final int Z_BUF_ERROR = -5; + //private static final int Z_VERSION_ERROR = -6; + private static final int FIXED_BL = 9; + private static final int FIXED_BD = 5; + private static final int[] FIXED_TL = { + 96, 7, 256, 0, 8, 80, 0, 8, 16, 84, 8, 115, + 82, 7, 31, 0, 8, 112, 0, 8, 48, 0, 9, 192, + 80, 7, 10, 0, 8, 96, 0, 8, 32, 0, 9, 160, + 0, 8, 0, 0, 8, 128, 0, 8, 64, 0, 9, 224, + 80, 7, 6, 0, 8, 88, 0, 8, 24, 0, 9, 144, + 83, 7, 59, 0, 8, 120, 0, 8, 56, 0, 9, 208, + 81, 7, 17, 0, 8, 104, 0, 8, 40, 0, 9, 176, + 0, 8, 8, 0, 8, 136, 0, 8, 72, 0, 9, 240, + 80, 7, 4, 0, 8, 84, 0, 8, 20, 85, 8, 227, + 83, 7, 43, 0, 8, 116, 0, 8, 52, 0, 9, 200, + 81, 7, 13, 0, 8, 100, 0, 8, 36, 0, 9, 168, + 0, 8, 4, 0, 8, 132, 0, 8, 68, 0, 9, 232, + 80, 7, 8, 0, 8, 92, 0, 8, 28, 0, 9, 152, + 84, 7, 83, 0, 8, 124, 0, 8, 60, 0, 9, 216, + 82, 7, 23, 0, 8, 108, 0, 8, 44, 0, 9, 184, + 0, 8, 12, 0, 8, 140, 0, 8, 76, 0, 9, 248, + 80, 7, 3, 0, 8, 82, 0, 8, 18, 85, 8, 163, + 83, 7, 35, 0, 8, 114, 0, 8, 50, 0, 9, 196, + 81, 7, 11, 0, 8, 98, 0, 8, 34, 0, 9, 164, + 0, 8, 2, 0, 8, 130, 0, 8, 66, 0, 9, 228, + 80, 7, 7, 0, 8, 90, 0, 8, 26, 0, 9, 148, + 84, 7, 67, 0, 8, 122, 0, 8, 58, 0, 9, 212, + 82, 7, 19, 0, 8, 106, 0, 8, 42, 0, 9, 180, + 0, 8, 10, 0, 8, 138, 0, 8, 74, 0, 9, 244, + 80, 7, 5, 0, 8, 86, 0, 8, 22, 192, 8, 0, + 83, 7, 51, 0, 8, 118, 0, 8, 54, 0, 9, 204, + 81, 7, 15, 0, 8, 102, 0, 8, 38, 0, 9, 172, + 0, 8, 6, 0, 8, 134, 0, 8, 70, 0, 9, 236, + 80, 7, 9, 0, 8, 94, 0, 8, 30, 0, 9, 156, + 84, 7, 99, 0, 8, 126, 0, 8, 62, 0, 9, 220, + 82, 7, 27, 0, 8, 110, 0, 8, 46, 0, 9, 188, + 0, 8, 14, 0, 8, 142, 0, 8, 78, 0, 9, 252, + 96, 7, 256, 0, 8, 81, 0, 8, 17, 85, 8, 131, + 82, 7, 31, 0, 8, 113, 0, 8, 49, 0, 9, 194, + 80, 7, 10, 0, 8, 97, 0, 8, 33, 0, 9, 162, + 0, 8, 1, 0, 8, 129, 0, 8, 65, 0, 9, 226, + 80, 7, 6, 0, 8, 89, 0, 8, 25, 0, 9, 146, + 83, 7, 59, 0, 8, 121, 0, 8, 57, 0, 9, 210, + 81, 7, 17, 0, 8, 105, 0, 8, 41, 0, 9, 178, + 0, 8, 9, 0, 8, 137, 0, 8, 73, 0, 9, 242, + 80, 7, 4, 0, 8, 85, 0, 8, 21, 80, 8, 258, + 83, 7, 43, 0, 8, 117, 0, 8, 53, 0, 9, 202, + 81, 7, 13, 0, 8, 101, 0, 8, 37, 0, 9, 170, + 0, 8, 5, 0, 8, 133, 0, 8, 69, 0, 9, 234, + 80, 7, 8, 0, 8, 93, 0, 8, 29, 0, 9, 154, + 84, 7, 83, 0, 8, 125, 0, 8, 61, 0, 9, 218, + 82, 7, 23, 0, 8, 109, 0, 8, 45, 0, 9, 186, + 0, 8, 13, 0, 8, 141, 0, 8, 77, 0, 9, 250, + 80, 7, 3, 0, 8, 83, 0, 8, 19, 85, 8, 195, + 83, 7, 35, 0, 8, 115, 0, 8, 51, 0, 9, 198, + 81, 7, 11, 0, 8, 99, 0, 8, 35, 0, 9, 166, + 0, 8, 3, 0, 8, 131, 0, 8, 67, 0, 9, 230, + 80, 7, 7, 0, 8, 91, 0, 8, 27, 0, 9, 150, + 84, 7, 67, 0, 8, 123, 0, 8, 59, 0, 9, 214, + 82, 7, 19, 0, 8, 107, 0, 8, 43, 0, 9, 182, + 0, 8, 11, 0, 8, 139, 0, 8, 75, 0, 9, 246, + 80, 7, 5, 0, 8, 87, 0, 8, 23, 192, 8, 0, + 83, 7, 51, 0, 8, 119, 0, 8, 55, 0, 9, 206, + 81, 7, 15, 0, 8, 103, 0, 8, 39, 0, 9, 174, + 0, 8, 7, 0, 8, 135, 0, 8, 71, 0, 9, 238, + 80, 7, 9, 0, 8, 95, 0, 8, 31, 0, 9, 158, + 84, 7, 99, 0, 8, 127, 0, 8, 63, 0, 9, 222, + 82, 7, 27, 0, 8, 111, 0, 8, 47, 0, 9, 190, + 0, 8, 15, 0, 8, 143, 0, 8, 79, 0, 9, 254, + 96, 7, 256, 0, 8, 80, 0, 8, 16, 84, 8, 115, + 82, 7, 31, 0, 8, 112, 0, 8, 48, 0, 9, 193, + 80, 7, 10, 0, 8, 96, 0, 8, 32, 0, 9, 161, + 0, 8, 0, 0, 8, 128, 0, 8, 64, 0, 9, 225, + 80, 7, 6, 0, 8, 88, 0, 8, 24, 0, 9, 145, + 83, 7, 59, 0, 8, 120, 0, 8, 56, 0, 9, 209, + 81, 7, 17, 0, 8, 104, 0, 8, 40, 0, 9, 177, + 0, 8, 8, 0, 8, 136, 0, 8, 72, 0, 9, 241, + 80, 7, 4, 0, 8, 84, 0, 8, 20, 85, 8, 227, + 83, 7, 43, 0, 8, 116, 0, 8, 52, 0, 9, 201, + 81, 7, 13, 0, 8, 100, 0, 8, 36, 0, 9, 169, + 0, 8, 4, 0, 8, 132, 0, 8, 68, 0, 9, 233, + 80, 7, 8, 0, 8, 92, 0, 8, 28, 0, 9, 153, + 84, 7, 83, 0, 8, 124, 0, 8, 60, 0, 9, 217, + 82, 7, 23, 0, 8, 108, 0, 8, 44, 0, 9, 185, + 0, 8, 12, 0, 8, 140, 0, 8, 76, 0, 9, 249, + 80, 7, 3, 0, 8, 82, 0, 8, 18, 85, 8, 163, + 83, 7, 35, 0, 8, 114, 0, 8, 50, 0, 9, 197, + 81, 7, 11, 0, 8, 98, 0, 8, 34, 0, 9, 165, + 0, 8, 2, 0, 8, 130, 0, 8, 66, 0, 9, 229, + 80, 7, 7, 0, 8, 90, 0, 8, 26, 0, 9, 149, + 84, 7, 67, 0, 8, 122, 0, 8, 58, 0, 9, 213, + 82, 7, 19, 0, 8, 106, 0, 8, 42, 0, 9, 181, + 0, 8, 10, 0, 8, 138, 0, 8, 74, 0, 9, 245, + 80, 7, 5, 0, 8, 86, 0, 8, 22, 192, 8, 0, + 83, 7, 51, 0, 8, 118, 0, 8, 54, 0, 9, 205, + 81, 7, 15, 0, 8, 102, 0, 8, 38, 0, 9, 173, + 0, 8, 6, 0, 8, 134, 0, 8, 70, 0, 9, 237, + 80, 7, 9, 0, 8, 94, 0, 8, 30, 0, 9, 157, + 84, 7, 99, 0, 8, 126, 0, 8, 62, 0, 9, 221, + 82, 7, 27, 0, 8, 110, 0, 8, 46, 0, 9, 189, + 0, 8, 14, 0, 8, 142, 0, 8, 78, 0, 9, 253, + 96, 7, 256, 0, 8, 81, 0, 8, 17, 85, 8, 131, + 82, 7, 31, 0, 8, 113, 0, 8, 49, 0, 9, 195, + 80, 7, 10, 0, 8, 97, 0, 8, 33, 0, 9, 163, + 0, 8, 1, 0, 8, 129, 0, 8, 65, 0, 9, 227, + 80, 7, 6, 0, 8, 89, 0, 8, 25, 0, 9, 147, + 83, 7, 59, 0, 8, 121, 0, 8, 57, 0, 9, 211, + 81, 7, 17, 0, 8, 105, 0, 8, 41, 0, 9, 179, + 0, 8, 9, 0, 8, 137, 0, 8, 73, 0, 9, 243, + 80, 7, 4, 0, 8, 85, 0, 8, 21, 80, 8, 258, + 83, 7, 43, 0, 8, 117, 0, 8, 53, 0, 9, 203, + 81, 7, 13, 0, 8, 101, 0, 8, 37, 0, 9, 171, + 0, 8, 5, 0, 8, 133, 0, 8, 69, 0, 9, 235, + 80, 7, 8, 0, 8, 93, 0, 8, 29, 0, 9, 155, + 84, 7, 83, 0, 8, 125, 0, 8, 61, 0, 9, 219, + 82, 7, 23, 0, 8, 109, 0, 8, 45, 0, 9, 187, + 0, 8, 13, 0, 8, 141, 0, 8, 77, 0, 9, 251, + 80, 7, 3, 0, 8, 83, 0, 8, 19, 85, 8, 195, + 83, 7, 35, 0, 8, 115, 0, 8, 51, 0, 9, 199, + 81, 7, 11, 0, 8, 99, 0, 8, 35, 0, 9, 167, + 0, 8, 3, 0, 8, 131, 0, 8, 67, 0, 9, 231, + 80, 7, 7, 0, 8, 91, 0, 8, 27, 0, 9, 151, + 84, 7, 67, 0, 8, 123, 0, 8, 59, 0, 9, 215, + 82, 7, 19, 0, 8, 107, 0, 8, 43, 0, 9, 183, + 0, 8, 11, 0, 8, 139, 0, 8, 75, 0, 9, 247, + 80, 7, 5, 0, 8, 87, 0, 8, 23, 192, 8, 0, + 83, 7, 51, 0, 8, 119, 0, 8, 55, 0, 9, 207, + 81, 7, 15, 0, 8, 103, 0, 8, 39, 0, 9, 175, + 0, 8, 7, 0, 8, 135, 0, 8, 71, 0, 9, 239, + 80, 7, 9, 0, 8, 95, 0, 8, 31, 0, 9, 159, + 84, 7, 99, 0, 8, 127, 0, 8, 63, 0, 9, 223, + 82, 7, 27, 0, 8, 111, 0, 8, 47, 0, 9, 191, + 0, 8, 15, 0, 8, 143, 0, 8, 79, 0, 9, 255 + }; + private static final int[] FIXED_TD = { + 80, 5, 1, 87, 5, 257, 83, 5, 17, 91, 5, 4097, + 81, 5, 5, 89, 5, 1025, 85, 5, 65, 93, 5, 16385, + 80, 5, 3, 88, 5, 513, 84, 5, 33, 92, 5, 8193, + 82, 5, 9, 90, 5, 2049, 86, 5, 129, 192, 5, 24577, + 80, 5, 2, 87, 5, 385, 83, 5, 25, 91, 5, 6145, + 81, 5, 7, 89, 5, 1537, 85, 5, 97, 93, 5, 24577, + 80, 5, 4, 88, 5, 769, 84, 5, 49, 92, 5, 12289, + 82, 5, 13, 90, 5, 3073, 86, 5, 193, 192, 5, 24577 + }; + // Tables for deflate from PKZIP's appnote.txt. + private static final int[] cplens = { // Copy lengths for literal codes 257..285 + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0 + }; + // see note #13 above about 258 + private static final int[] cplext = { // Extra bits for literal codes 257..285 + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 112, 112 // 112==invalid + }; + private static final int[] cpdist = { // Copy offsets for distance codes 0..29 + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, + 8193, 12289, 16385, 24577 + }; + private static final int[] cpdext = { // Extra bits for distance codes + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, + 12, 12, 13, 13}; + // If BMAX needs to be larger than 16, then h and x[] should be uLong. + private static final int BMAX = 15; // maximum bit length of any code + private int[] hn = null; // hufts used in space + private int[] v = null; // work area for huft_build + private int[] c = null; // bit length count table + private int[] r = null; // table entry for structure assignment + private int[] u = null; // table stack + private int[] x = null; // bit offsets, then code stack + + private int huft_build(int[] b, // code lengths in bits (all assumed <= BMAX) + int bindex, + int n, // number of codes (assumed <= 288) + int s, // number of simple-valued codes (0..s-1) + int[] d, // list of base values for non-simple codes + int[] e, // list of extra bits for non-simple codes + int[] t, // result: starting table + int[] m, // maximum lookup bits, returns actual + int[] hp,// space for trees + int[] hn,// hufts used in space + int[] v // working area: values in order of bit length + ) { + // Given a list of code lengths and a maximum table size, make a set of + // tables to decode that set of codes. Return Z_OK on success, Z_BUF_ERROR + // if the given code set is incomplete (the tables are still built in this + // case), Z_DATA_ERROR if the input is invalid (an over-subscribed set of + // lengths), or Z_MEM_ERROR if not enough memory. + + int a; // counter for codes of length k + int f; // i repeats in table every f entries + int g; // maximum code length + int h; // table level + int i; // counter, current code + int j; // counter + int k; // number of bits in current code + int l; // bits per table (returned in m) + int mask; // (1 << w) - 1, to avoid cc -O bug on HP + int p; // pointer into c[], b[], or v[] + int q; // points to current table + int w; // bits before this table == (l * h) + int xp; // pointer into x + int y; // number of dummy codes added + int z; // number of entries in current table + + // Generate counts for each bit length + + p = 0; + i = n; + do { + c[b[bindex + p]]++; + p++; + i--; // assume all entries <= BMAX + } while (i != 0); + + if (c[0] == n) { // null input--all zero length codes + t[0] = -1; + m[0] = 0; + return Z_OK; + } + + // Find minimum and maximum length, bound *m by those + l = m[0]; + for (j = 1; j <= BMAX; j++) { + if (c[j] != 0) { + break; + } + } + k = j; // minimum code length + if (l < j) { + l = j; + } + for (i = BMAX; i != 0; i--) { + if (c[i] != 0) { + break; + } + } + g = i; // maximum code length + if (l > i) { + l = i; + } + m[0] = l; + + // Adjust last length count to fill out codes, if needed + for (y = 1 << j; j < i; j++, y <<= 1) { + if ((y -= c[j]) < 0) { + return Z_DATA_ERROR; + } + } + if ((y -= c[i]) < 0) { + return Z_DATA_ERROR; + } + c[i] += y; + + // Generate starting offsets into the value table for each length + x[1] = j = 0; + p = 1; + xp = 2; + while (--i != 0) { // note that i == g from above + x[xp] = (j += c[p]); + xp++; + p++; + } + + // Make a table of values in order of bit lengths + i = 0; + p = 0; + do { + if ((j = b[bindex + p]) != 0) { + v[x[j]++] = i; + } + p++; + } while (++i < n); + n = x[g]; // set n to length of v + + // Generate the Huffman codes and for each, make the table entries + x[0] = i = 0; // first Huffman code is zero + p = 0; // grab values in bit order + h = -1; // no tables yet--level -1 + w = -l; // bits decoded == (l * h) + u[0] = 0; // just to keep compilers happy + q = 0; // ditto + z = 0; // ditto + + // go through the bit lengths (k already is bits in shortest code) + for (; k <= g; k++) { + a = c[k]; + while (a-- != 0) { + // here i is the Huffman code of length k bits for value *p + // make tables up to required level + while (k > w + l) { + h++; + w += l; // previous table always l bits + // compute minimum size table less than or equal to l bits + z = g - w; + z = (z > l) ? l : z; // table size upper limit + if ((f = 1 << (j = k - w)) > a + 1) { // try a k-w bit table + // too few codes for k-w bit table + f -= a + 1; // deduct codes from patterns left + xp = k; + if (j < z) { + while (++j < z) { // try smaller tables up to z bits + if ((f <<= 1) <= c[++xp]) { + break; // enough codes to use up j bits + } + f -= c[xp]; // else deduct codes from patterns + } + } + } + z = 1 << j; // table entries for j-bit table + + // allocate new table + if (hn[0] + z > MANY) { // (note: doesn't matter for fixed) + return Z_DATA_ERROR; // overflow of MANY + } + u[h] = q = /*hp+*/ hn[0]; // DEBUG + hn[0] += z; + + // connect to last table, if there is one + if (h != 0) { + x[h] = i; // save pattern for backing up + r[0] = (byte) j; // bits in this table + r[1] = (byte) l; // bits to dump before this table + j = i >>> (w - l); + r[2] = (q - u[h - 1] - j); // offset to this table + System.arraycopy(r, 0, hp, (u[h - 1] + j) * 3, 3); // connect to last table + } else { + t[0] = q; // first table is returned result + } + } + + // set up table entry in r + r[1] = (byte) (k - w); + if (p >= n) { + r[0] = 128 + 64; // out of values--invalid code + } else if (v[p] < s) { + r[0] = (byte) (v[p] < 256 ? 0 : 32 + 64); // 256 is end-of-block + r[2] = v[p++]; // simple code is just the value + } else { + r[0] = (byte) (e[v[p] - s] + 16 + 64); // non-simple--look up in lists + r[2] = d[v[p++] - s]; + } + + // fill code-like entries with r + f = 1 << (k - w); + for (j = i >>> w; j < z; j += f) { + System.arraycopy(r, 0, hp, (q + j) * 3, 3); + } + + // backwards increment the k-bit code i + for (j = 1 << (k - 1); (i & j) != 0; j >>>= 1) { + i ^= j; + } + i ^= j; + + // backup over finished tables + mask = (1 << w) - 1; // needed on HP, cc -O bug + while ((i & mask) != x[h]) { + h--; // don't need to update q + w -= l; + mask = (1 << w) - 1; + } + } + } + // Return Z_BUF_ERROR if we were given an incomplete table + return y != 0 && g != 1 ? Z_BUF_ERROR : Z_OK; + } + + protected int inflateTreesBits(int[] c, // 19 code lengths + int[] bb, // bits tree desired/actual depth + int[] tb, // bits tree result + int[] hp, // space for trees + ZStream z // for messages + ) { + int result; + initWorkArea(19); + hn[0] = 0; + result = huft_build(c, 0, 19, 19, null, null, tb, bb, hp, hn, v); + + if (result == Z_DATA_ERROR) { + z.msg = "oversubscribed dynamic bit lengths tree"; + } else if (result == Z_BUF_ERROR || bb[0] == 0) { + z.msg = "incomplete dynamic bit lengths tree"; + result = Z_DATA_ERROR; + } + return result; + } + + int inflate_trees_dynamic(int nl, // number of literal/length codes + int nd, // number of distance codes + int[] c, // that many (total) code lengths + int[] bl, // literal desired/actual bit depth + int[] bd, // distance desired/actual bit depth + int[] tl, // literal/length tree result + int[] td, // distance tree result + int[] hp, // space for trees + ZStream z // for messages + ) { + int result; + + // build literal/length tree + initWorkArea(288); + hn[0] = 0; + result = huft_build(c, 0, nl, 257, cplens, cplext, tl, bl, hp, hn, v); + if (result != Z_OK || bl[0] == 0) { + if (result == Z_DATA_ERROR) { + z.msg = "oversubscribed literal/length tree"; + } else if (result != Z_MEM_ERROR) { + z.msg = "incomplete literal/length tree"; + result = Z_DATA_ERROR; + } + return result; + } + + // build distance tree + initWorkArea(288); + result = huft_build(c, nl, nd, 0, cpdist, cpdext, td, bd, hp, hn, v); + + if (result != Z_OK || (bd[0] == 0 && nl > 257)) { + if (result == Z_DATA_ERROR) { + z.msg = "oversubscribed distance tree"; + } else if (result == Z_BUF_ERROR) { + z.msg = "incomplete distance tree"; + result = Z_DATA_ERROR; + } else if (result != Z_MEM_ERROR) { + z.msg = "empty distance tree with lengths"; + result = Z_DATA_ERROR; + } + return result; + } + + return Z_OK; + } + + static int inflate_trees_fixed(int[] bl, //literal desired/actual bit depth + int[] bd, //distance desired/actual bit depth + int[][] tl,//literal/length tree result + int[][] td,//distance tree result + ZStream z //for memory allocation + ) { + bl[0] = FIXED_BL; + bd[0] = FIXED_BD; + tl[0] = FIXED_TL; + td[0] = FIXED_TD; + return Z_OK; + } + + private void initWorkArea(int vsize) { + if (hn == null) { + hn = new int[1]; + v = new int[vsize]; + c = new int[BMAX + 1]; + r = new int[3]; + u = new int[BMAX]; + x = new int[BMAX + 1]; + } + if (v.length < vsize) { + v = new int[vsize]; + } + for (int i = 0; i < vsize; i++) { + v[i] = 0; + } + for (int i = 0; i < BMAX + 1; i++) { + c[i] = 0; + } + for (int i = 0; i < 3; i++) { + r[i] = 0; + } +// for(int i=0; istate); + return Z_OK; + } + + protected int inflateInit(ZStream z, int w) { + z.msg = null; + blocks = null; + + // handle undocumented nowrap option (no zlib header or check) + nowrap = 0; + if (w < 0) { + w = -w; + nowrap = 1; + } + + // set window size + if (w < 8 || w > 15) { + inflateEnd(z); + return Z_STREAM_ERROR; + } + wbits = w; + + z.istate.blocks = new InfBlocks(z, + z.istate.nowrap != 0 ? null : this, + 1 << w); + + // reset state + inflateReset(z); + return Z_OK; + } + + protected int inflate(ZStream z, int f) { + int r; + int b; + + if (z == null || z.istate == null || z.nextin == null) { + return Z_STREAM_ERROR; + } + f = f == Z_FINISH ? Z_BUF_ERROR : Z_OK; + r = Z_BUF_ERROR; + while (true) { + switch (z.istate.mode) { + case METHOD: + if (z.availin == 0) { + return r; + } + r = f; + z.availin--; + z.totalin++; + if (((z.istate.method = z.nextin[z.nextinindex++]) & 0xf) != Z_DEFLATED) { + z.istate.mode = BAD; + z.msg = "unknown compression method"; + z.istate.marker = 5; // can't try inflateSync + break; + } + if ((z.istate.method >> 4) + 8 > z.istate.wbits) { + z.istate.mode = BAD; + z.msg = "invalid window size"; + z.istate.marker = 5; // can't try inflateSync + break; + } + z.istate.mode = FLAG; + break; + case FLAG: + if (z.availin == 0) { + return r; + } + r = f; + z.availin--; + z.totalin++; + b = (z.nextin[z.nextinindex++]) & 0xff; + if ((((z.istate.method << 8) + b) % 31) != 0) { + z.istate.mode = BAD; + z.msg = "incorrect header check"; + z.istate.marker = 5; // can't try inflateSync + break; + } + + if ((b & PRESET_DICT) == 0) { + z.istate.mode = BLOCKS; + break; + } + z.istate.mode = DICT4; + break; + case DICT4: + if (z.availin == 0) { + return r; + } + r = f; + z.availin--; + z.totalin++; + z.istate.need = ((z.nextin[z.nextinindex++] & 0xff) << 24) & 0xff000000L; + z.istate.mode = DICT3; + break; + case DICT3: + if (z.availin == 0) { + return r; + } + r = f; + z.availin--; + z.totalin++; + z.istate.need += ((z.nextin[z.nextinindex++] & 0xff) << 16) & 0xff0000L; + z.istate.mode = DICT2; + break; + case DICT2: + if (z.availin == 0) { + return r; + } + r = f; + z.availin--; + z.totalin++; + z.istate.need += ((z.nextin[z.nextinindex++] & 0xff) << 8) & 0xff00L; + z.istate.mode = DICT1; + break; + case DICT1: + if (z.availin == 0) { + return r; + } + r = f; + z.availin--; + z.totalin++; + z.istate.need += (z.nextin[z.nextinindex++] & 0xffL); + z.adler = z.istate.need; + z.istate.mode = DICT0; + return Z_NEED_DICT; + case DICT0: + z.istate.mode = BAD; + z.msg = "need dictionary"; + z.istate.marker = 0; // can try inflateSync + return Z_STREAM_ERROR; + case BLOCKS: + r = z.istate.blocks.proc(z, r); + if (r == Z_DATA_ERROR) { + z.istate.mode = BAD; + z.istate.marker = 0; // can try inflateSync + break; + } + if (r == Z_OK) { + r = f; + } + if (r != Z_STREAM_END) { + return r; + } + r = f; + z.istate.blocks.reset(z, z.istate.was); + if (z.istate.nowrap != 0) { + z.istate.mode = DONE; + break; + } + z.istate.mode = CHECK4; + break; + case CHECK4: + if (z.availin == 0) { + return r; + } + r = f; + z.availin--; + z.totalin++; + z.istate.need = ((z.nextin[z.nextinindex++] & 0xff) << 24) & 0xff000000L; + z.istate.mode = CHECK3; + break; + case CHECK3: + if (z.availin == 0) { + return r; + } + r = f; + z.availin--; + z.totalin++; + z.istate.need += ((z.nextin[z.nextinindex++] & 0xff) << 16) & 0xff0000L; + z.istate.mode = CHECK2; + break; + case CHECK2: + if (z.availin == 0) { + return r; + } + r = f; + z.availin--; + z.totalin++; + z.istate.need += ((z.nextin[z.nextinindex++] & 0xff) << 8) & 0xff00L; + z.istate.mode = CHECK1; + break; + case CHECK1: + if (z.availin == 0) { + return r; + } + r = f; + z.availin--; + z.totalin++; + z.istate.need += (z.nextin[z.nextinindex++] & 0xffL); + if (((int) (z.istate.was[0])) != ((int) (z.istate.need))) { + z.istate.mode = BAD; + z.msg = "incorrect data check"; + z.istate.marker = 5; // can't try inflateSync + break; + } + z.istate.mode = DONE; + break; + case DONE: + return Z_STREAM_END; + case BAD: + return Z_DATA_ERROR; + default: + return Z_STREAM_ERROR; + } + } + } + + protected int inflateSetDictionary(ZStream z, byte[] dictionary, int dictLength) { + int index = 0; + int length = dictLength; + if (z == null || z.istate == null || z.istate.mode != DICT0) { + return Z_STREAM_ERROR; + } + + if (Adler32.adler32(1L, dictionary, 0, dictLength) != z.adler) { + return Z_DATA_ERROR; + } + + z.adler = Adler32.adler32(0, null, 0, 0); + + if (length >= (1 << z.istate.wbits)) { + length = (1 << z.istate.wbits) - 1; + index = dictLength - length; + } + z.istate.blocks.setDictionary(dictionary, index, length); + z.istate.mode = BLOCKS; + return Z_OK; + } + + private static byte[] mark = {(byte) 0, (byte) 0, (byte) 0xff, (byte) 0xff}; + + protected int inflateSync(ZStream z) { + int n; // number of bytes to look at + int p; // pointer to bytes + int m; // number of marker bytes found in a row + long r, w; // temporaries to save total_in and total_out + + // set up + if (z == null || z.istate == null) { + return Z_STREAM_ERROR; + } + if (z.istate.mode != BAD) { + z.istate.mode = BAD; + z.istate.marker = 0; + } + if ((n = z.availin) == 0) { + return Z_BUF_ERROR; + } + p = z.nextinindex; + m = z.istate.marker; + + // search + while (n != 0 && m < 4) { + if (z.nextin[p] == mark[m]) { + m++; + } else if (z.nextin[p] != 0) { + m = 0; + } else { + m = 4 - m; + } + p++; + n--; + } + + // restore + z.totalin += p - z.nextinindex; + z.nextinindex = p; + z.availin = n; + z.istate.marker = m; + + // return no joy or set up to restart on a new block + if (m != 4) { + return Z_DATA_ERROR; + } + r = z.totalin; + w = z.totalout; + inflateReset(z); + z.totalin = r; + z.totalout = w; + z.istate.mode = BLOCKS; + return Z_OK; + } + +} diff --git a/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/StaticTree.java b/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/StaticTree.java new file mode 100644 index 0000000..c1de461 --- /dev/null +++ b/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/StaticTree.java @@ -0,0 +1,121 @@ + +package org.xbib.io.compress.zlib; + +public class StaticTree { + + private static final int MAX_BITS = 15; + private static final int BL_CODES = 19; + private static final int D_CODES = 30; + private static final int LITERALS = 256; + private static final int LENGTH_CODES = 29; + private static final int L_CODES = (LITERALS + 1 + LENGTH_CODES); + // Bit length codes must not exceed MAX_BL_BITS bits + private static final int MAX_BL_BITS = 7; + protected static final short[] STATIC_LTREE = { + 12, 8, 140, 8, 76, 8, 204, 8, 44, 8, + 172, 8, 108, 8, 236, 8, 28, 8, 156, 8, + 92, 8, 220, 8, 60, 8, 188, 8, 124, 8, + 252, 8, 2, 8, 130, 8, 66, 8, 194, 8, + 34, 8, 162, 8, 98, 8, 226, 8, 18, 8, + 146, 8, 82, 8, 210, 8, 50, 8, 178, 8, + 114, 8, 242, 8, 10, 8, 138, 8, 74, 8, + 202, 8, 42, 8, 170, 8, 106, 8, 234, 8, + 26, 8, 154, 8, 90, 8, 218, 8, 58, 8, + 186, 8, 122, 8, 250, 8, 6, 8, 134, 8, + 70, 8, 198, 8, 38, 8, 166, 8, 102, 8, + 230, 8, 22, 8, 150, 8, 86, 8, 214, 8, + 54, 8, 182, 8, 118, 8, 246, 8, 14, 8, + 142, 8, 78, 8, 206, 8, 46, 8, 174, 8, + 110, 8, 238, 8, 30, 8, 158, 8, 94, 8, + 222, 8, 62, 8, 190, 8, 126, 8, 254, 8, + 1, 8, 129, 8, 65, 8, 193, 8, 33, 8, + 161, 8, 97, 8, 225, 8, 17, 8, 145, 8, + 81, 8, 209, 8, 49, 8, 177, 8, 113, 8, + 241, 8, 9, 8, 137, 8, 73, 8, 201, 8, + 41, 8, 169, 8, 105, 8, 233, 8, 25, 8, + 153, 8, 89, 8, 217, 8, 57, 8, 185, 8, + 121, 8, 249, 8, 5, 8, 133, 8, 69, 8, + 197, 8, 37, 8, 165, 8, 101, 8, 229, 8, + 21, 8, 149, 8, 85, 8, 213, 8, 53, 8, + 181, 8, 117, 8, 245, 8, 13, 8, 141, 8, + 77, 8, 205, 8, 45, 8, 173, 8, 109, 8, + 237, 8, 29, 8, 157, 8, 93, 8, 221, 8, + 61, 8, 189, 8, 125, 8, 253, 8, 19, 9, + 275, 9, 147, 9, 403, 9, 83, 9, 339, 9, + 211, 9, 467, 9, 51, 9, 307, 9, 179, 9, + 435, 9, 115, 9, 371, 9, 243, 9, 499, 9, + 11, 9, 267, 9, 139, 9, 395, 9, 75, 9, + 331, 9, 203, 9, 459, 9, 43, 9, 299, 9, + 171, 9, 427, 9, 107, 9, 363, 9, 235, 9, + 491, 9, 27, 9, 283, 9, 155, 9, 411, 9, + 91, 9, 347, 9, 219, 9, 475, 9, 59, 9, + 315, 9, 187, 9, 443, 9, 123, 9, 379, 9, + 251, 9, 507, 9, 7, 9, 263, 9, 135, 9, + 391, 9, 71, 9, 327, 9, 199, 9, 455, 9, + 39, 9, 295, 9, 167, 9, 423, 9, 103, 9, + 359, 9, 231, 9, 487, 9, 23, 9, 279, 9, + 151, 9, 407, 9, 87, 9, 343, 9, 215, 9, + 471, 9, 55, 9, 311, 9, 183, 9, 439, 9, + 119, 9, 375, 9, 247, 9, 503, 9, 15, 9, + 271, 9, 143, 9, 399, 9, 79, 9, 335, 9, + 207, 9, 463, 9, 47, 9, 303, 9, 175, 9, + 431, 9, 111, 9, 367, 9, 239, 9, 495, 9, + 31, 9, 287, 9, 159, 9, 415, 9, 95, 9, + 351, 9, 223, 9, 479, 9, 63, 9, 319, 9, + 191, 9, 447, 9, 127, 9, 383, 9, 255, 9, + 511, 9, 0, 7, 64, 7, 32, 7, 96, 7, + 16, 7, 80, 7, 48, 7, 112, 7, 8, 7, + 72, 7, 40, 7, 104, 7, 24, 7, 88, 7, + 56, 7, 120, 7, 4, 7, 68, 7, 36, 7, + 100, 7, 20, 7, 84, 7, 52, 7, 116, 7, + 3, 8, 131, 8, 67, 8, 195, 8, 35, 8, + 163, 8, 99, 8, 227, 8 + }; + protected static final short[] STATIC_DTREE = { + 0, 5, 16, 5, 8, 5, 24, 5, 4, 5, + 20, 5, 12, 5, 28, 5, 2, 5, 18, 5, + 10, 5, 26, 5, 6, 5, 22, 5, 14, 5, + 30, 5, 1, 5, 17, 5, 9, 5, 25, 5, + 5, 5, 21, 5, 13, 5, 29, 5, 3, 5, + 19, 5, 11, 5, 27, 5, 7, 5, 23, 5 + }; + private final static StaticTree LDESC_TREE = + new StaticTree(STATIC_LTREE, Tree.EXTRA_LBITS, + LITERALS + 1, L_CODES, MAX_BITS); + private final static StaticTree DDESC_TREE = + new StaticTree(STATIC_DTREE, Tree.EXTRA_DBITS, + 0, D_CODES, MAX_BITS); + private final static StaticTree BLDESC_TREE = + new StaticTree(null, Tree.EXTRA_BLBITS, + 0, BL_CODES, MAX_BL_BITS); + protected short[] statictree; // static tree or null + protected int[] extrabits; // extra bits for each code or null + protected int extrabase; // base index for extra_bits + protected int elems; // max number of elements in the tree + protected int maxlength; // max bit length for the codes + + public StaticTree(short[] statictree, + int[] extrabits, + int extrabase, + int elems, + int maxlength) { + this.statictree = statictree; + this.extrabits = extrabits; + this.extrabase = extrabase; + this.elems = elems; + this.maxlength = maxlength; + } + + protected static StaticTree getLDesc() { + return LDESC_TREE; + } + + protected static StaticTree getDDesc() { + return DDESC_TREE; + } + + protected static StaticTree getBLDesc() { + return BLDESC_TREE; + } + +} diff --git a/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/Tree.java b/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/Tree.java new file mode 100644 index 0000000..f8a1d08 --- /dev/null +++ b/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/Tree.java @@ -0,0 +1,316 @@ + +package org.xbib.io.compress.zlib; + +public class Tree { + + private static final int MAX_BITS = 15; + private static final int LITERALS = 256; + private static final int LENGTH_CODES = 29; + private static final int L_CODES = (LITERALS + 1 + LENGTH_CODES); + private static final int HEAP_SIZE = (2 * L_CODES + 1); + // extra bits for each length code + protected static final int[] EXTRA_LBITS = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 + }; + // extra bits for each distance code + protected static final int[] EXTRA_DBITS = { + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13 + }; + // extra bits for each bit length code + protected static final int[] EXTRA_BLBITS = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 7 + }; + protected static final byte[] BL_ORDER = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + private static final byte[] DIST_CODE = { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, + 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 16, 17, + 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 + }; + protected static final byte[] LENGTH_CODE = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, + 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, + 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, + 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28 + }; + protected static final int[] BASE_LENGTH = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, + 64, 80, 96, 112, 128, 160, 192, 224, 0 + }; + protected static final int[] BASE_DIST = { + 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, + 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, + 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576 + }; + + // Mapping from a distance to a distance code. dist is the distance - 1 and + // must not have side effects. DIST_CODE[256] and DIST_CODE[257] are never + // used. + protected static int distanceCode(int dist) { + return ((dist) < 256 ? DIST_CODE[dist] : DIST_CODE[256 + ((dist) >>> 7)]); + } + + protected short[] dynTree; // the dynamic tree + protected int maxCode; // largest code with non zero frequency + protected StaticTree statDesc; // the corresponding static tree + + // Compute the optimal bit lengths for a tree and update the total bit length + // for the current block. + // IN assertion: the fields freq and dad are set, heap[heap_max] and + // above are the tree nodes sorted by increasing frequency. + // OUT assertions: the field len is set to the optimal bit length, the + // array bl_count contains the frequencies for each bit length. + // The length opt_len is updated; static_len is also updated if stree is + // not null. + private void genBitLen(Deflate s) { + short[] tree = dynTree; + short[] stree = statDesc.statictree; + int[] extra = statDesc.extrabits; + int base = statDesc.extrabase; + int max_length = statDesc.maxlength; + int h; // heap index + int n, m; // iterate over the tree elements + int bits; // bit length + int xbits; // extra bits + short f; // frequency + int overflow = 0; // number of elements with bit length too large + + for (bits = 0; bits <= MAX_BITS; bits++) { + s.blCount[bits] = 0; + } + + // In a first pass, compute the optimal bit lengths (which may + // overflow in the case of the bit length tree). + tree[s.heap[s.heapMax] * 2 + 1] = 0; // root of the heap + + for (h = s.heapMax + 1; h < HEAP_SIZE; h++) { + n = s.heap[h]; + bits = tree[tree[n * 2 + 1] * 2 + 1] + 1; + if (bits > max_length) { + bits = max_length; + overflow++; + } + tree[n * 2 + 1] = (short) bits; + // We overwrite tree[n*2+1] which is no longer needed + + if (n > maxCode) { + continue; // not a leaf node + } + s.blCount[bits]++; + xbits = 0; + if (n >= base) { + xbits = extra[n - base]; + } + f = tree[n * 2]; + s.optLen += f * (bits + xbits); + if (stree != null) { + s.staticLen += f * (stree[n * 2 + 1] + xbits); + } + } + if (overflow == 0) { + return; + } + + // This happens for example on obj2 and pic of the Calgary corpus + // Find the first bit length which could increase: + do { + bits = max_length - 1; + while (s.blCount[bits] == 0) { + bits--; + } + s.blCount[bits]--; // move one leaf down the tree + s.blCount[bits + 1] += 2; // move one overflow item as its brother + s.blCount[max_length]--; + // The brother of the overflow item also moves one step up, + // but this does not affect bl_count[max_length] + overflow -= 2; + } while (overflow > 0); + + for (bits = max_length; bits != 0; bits--) { + n = s.blCount[bits]; + while (n != 0) { + m = s.heap[--h]; + if (m > maxCode) { + continue; + } + if (tree[m * 2 + 1] != bits) { + s.optLen += ((long) bits - (long) tree[m * 2 + 1]) * (long) tree[m * 2]; + tree[m * 2 + 1] = (short) bits; + } + n--; + } + } + } + + // Construct one Huffman tree and assigns the code bit strings and lengths. + // Update the total bit length for the current block. + // IN assertion: the field freq is set for all tree elements. + // OUT assertions: the fields len and code are set to the optimal bit length + // and corresponding code. The length opt_len is updated; static_len is + // also updated if stree is not null. The field max_code is set. + protected void buildTree(Deflate s) { + short[] tree = dynTree; + short[] stree = statDesc.statictree; + int elems = statDesc.elems; + int n, m; // iterate over heap elements + int maxCode = -1; // largest code with non zero frequency + int node; // new node being created + + // Construct the initial heap, with least frequent element in + // heap[1]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. + // heap[0] is not used. + s.heapLen = 0; + s.heapMax = HEAP_SIZE; + + for (n = 0; n < elems; n++) { + if (tree[n * 2] != 0) { + s.heap[++s.heapLen] = maxCode = n; + s.depth[n] = 0; + } else { + tree[n * 2 + 1] = 0; + } + } + + // The pkzip format requires that at least one distance code exists, + // and that at least one bit should be sent even if there is only one + // possible code. So to avoid special checks later on we force at least + // two codes of non zero frequency. + while (s.heapLen < 2) { + node = s.heap[++s.heapLen] = (maxCode < 2 ? ++maxCode : 0); + tree[node * 2] = 1; + s.depth[node] = 0; + s.optLen--; + if (stree != null) { + s.staticLen -= stree[node * 2 + 1]; + } + // node is 0 or 1 so it does not have extra bits + } + this.maxCode = maxCode; + + // The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, + // establish sub-heaps of increasing lengths: + + for (n = s.heapLen / 2; n >= 1; n--) { + s.pqdownheap(tree, n); + } + + // Construct the Huffman tree by repeatedly combining the least two + // frequent nodes. + + node = elems; // next internal node of the tree + do { + // n = node of least frequency + n = s.heap[1]; + s.heap[1] = s.heap[s.heapLen--]; + s.pqdownheap(tree, 1); + m = s.heap[1]; // m = node of next least frequency + + s.heap[--s.heapMax] = n; // keep the nodes sorted by frequency + s.heap[--s.heapMax] = m; + + // Create a new node father of n and m + tree[node * 2] = (short) (tree[n * 2] + tree[m * 2]); + s.depth[node] = (byte) (Math.max(s.depth[n], s.depth[m]) + 1); + tree[n * 2 + 1] = tree[m * 2 + 1] = (short) node; + + // and insert the new node in the heap + s.heap[1] = node++; + s.pqdownheap(tree, 1); + } while (s.heapLen >= 2); + + s.heap[--s.heapMax] = s.heap[1]; + + // At this point, the fields freq and dad are set. We can now + // generate the bit lengths. + + genBitLen(s); + + // The field len is now set, we can generate the bit codes + genCodes(tree, maxCode, s.blCount); + } + + // Generate the codes for a given tree and bit counts (which need not be + // optimal). + // IN assertion: the array bl_count contains the bit length statistics for + // the given tree and the field len is set for all tree elements. + // OUT assertion: the field code is set for all tree elements of non + // zero code length. + private void genCodes(short[] tree, // the tree to decorate + int max_code, // largest code with non zero frequency + short[] bl_count // number of codes at each bit length + ) { + short[] next_code = new short[MAX_BITS + 1]; // next code value for each bit length + short code = 0; // running code value + int bits; // bit index + int n; // code index + + // The distribution counts are first used to generate the code values + // without bit reversal. + for (bits = 1; bits <= MAX_BITS; bits++) { + next_code[bits] = code = (short) ((code + bl_count[bits - 1]) << 1); + } + + // Check that the bit counts in bl_count are consistent. The last code + // must be all ones. + //Assert (code + bl_count[MAX_BITS]-1 == (1<>>= 1; + res <<= 1; + } while (--len > 0); + return res >>> 1; + } +} + diff --git a/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/ZConstants.java b/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/ZConstants.java new file mode 100644 index 0000000..8699b1b --- /dev/null +++ b/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/ZConstants.java @@ -0,0 +1,30 @@ + +package org.xbib.io.compress.zlib; + +public interface ZConstants { + + // compression levels + int Z_NO_COMPRESSION = 0; + int Z_BEST_SPEED = 1; + int Z_BEST_COMPRESSION = 9; + int Z_DEFAULT_COMPRESSION = (-1); + + // compression strategy + int Z_FILTERED = 1; + int Z_HUFFMAN_ONLY = 2; + int Z_DEFAULT_STRATEGY = 0; + int Z_NO_FLUSH = 0; + int Z_PARTIAL_FLUSH = 1; + int Z_SYNC_FLUSH = 2; + int Z_FULL_FLUSH = 3; + int Z_FINISH = 4; + int Z_OK = 0; + int Z_STREAM_END = 1; + int Z_NEED_DICT = 2; + int Z_ERRNO = -1; + int Z_STREAM_ERROR = -2; + int Z_DATA_ERROR = -3; + int Z_MEM_ERROR = -4; + int Z_BUF_ERROR = -5; + int Z_VERSION_ERROR = -6; +} diff --git a/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/ZInputStream.java b/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/ZInputStream.java new file mode 100644 index 0000000..c516b90 --- /dev/null +++ b/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/ZInputStream.java @@ -0,0 +1,128 @@ + +package org.xbib.io.compress.zlib; + +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; + +public class ZInputStream extends FilterInputStream { + + protected ZStream z = new ZStream(); + protected int flush = ZConstants.Z_NO_FLUSH; + protected byte[] buf; + protected byte[] buf1 = new byte[1]; + protected boolean compress; + private int bufsize; + + public ZInputStream(InputStream in) { + this(in, false); + } + + public ZInputStream(InputStream in, boolean nowrap) { + super(in); + this.bufsize = 512; + this.buf = new byte[bufsize]; + z.inflateInit(nowrap); + compress = false; + z.nextin = buf; + z.nextinindex = 0; + z.availin = 0; + } + + public ZInputStream(InputStream in, int bufsize) { + super(in); + this.bufsize = bufsize; + this.buf = new byte[bufsize]; + z.inflateInit(false); + compress = false; + z.nextin = buf; + z.nextinindex = 0; + z.availin = 0; + } + + public void level(int level) { + z.deflateInit(level); + } + + @Override + public int read() throws IOException { + if (read(buf1, 0, 1) == -1) { + return (-1); + } + return (buf1[0] & 0xFF); + } + + private boolean nomoreinput = false; + + @Override + public int read(byte[] b, int off, int len) throws IOException { + if (len == 0) { + return (0); + } + int err; + z.nextout = b; + z.nextoutindex = off; + z.availout = len; + do { + if ((z.availin == 0) && (!nomoreinput)) { // if buffer is empty and more input is avaiable, refill it + z.nextinindex = 0; + z.availin = in.read(buf, 0, bufsize);//(bufsize 0 || z.availout == 0); + } + + public int getFlushMode() { + return (flush); + } + + public void setFlushMode(int flush) { + this.flush = flush; + } + + public void finish() throws IOException { + int err; + do { + z.nextout = buf; + z.nextoutindex = 0; + z.availout = bufsize; + if (compress) { + err = z.deflate(ZConstants.Z_FINISH); + } else { + err = z.inflate(ZConstants.Z_FINISH); + } + if (err != ZConstants.Z_STREAM_END && err != ZConstants.Z_OK) { + throw new IOException((compress ? "de" : "in") + "flating: " + z.msg); + } + if (bufsize - z.availout > 0) { + out.write(buf, 0, bufsize - z.availout); + } + } while (z.availin > 0 || z.availout == 0); + flush(); + } + + public void end() { + if (z == null) { + return; + } + if (compress) { + z.deflateEnd(); + } else { + z.inflateEnd(); + } + z.free(); + z = null; + } + + @Override + public void close() throws IOException { + try { + try { + finish(); + } catch (IOException ignored) { + } + } finally { + end(); + out.close(); + out = null; + } + } + + /** + * Returns the total number of bytes input so far. + */ + public long getTotalIn() { + return z.totalin; + } + + /** + * Returns the total number of bytes output so far. + */ + public long getTotalOut() { + return z.totalout; + } + + @Override + public void flush() throws IOException { + out.flush(); + } +} diff --git a/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/ZStream.java b/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/ZStream.java new file mode 100644 index 0000000..4988f1d --- /dev/null +++ b/io-compress-zlib/src/main/java/org/xbib/io/compress/zlib/ZStream.java @@ -0,0 +1,175 @@ + +package org.xbib.io.compress.zlib; + +public class ZStream { + + private final static int MAX_WBITS = 15; // 32K LZ77 window + private final static int DEF_WBITS = MAX_WBITS; + private final static int Z_STREAM_ERROR = -2; + protected byte[] nextin; // next input byte + protected int nextinindex; + protected int availin; // number of bytes available at next_in + protected long totalin; // total nb of input bytes read so far + protected byte[] nextout; // next output byte should be put there + protected int nextoutindex; + protected int availout; // remaining free space at next_out + protected long totalout; // total nb of bytes output so far + protected String msg; + protected Deflate dstate; + protected Inflate istate; + protected int dataType; // best guess about the data type: ascii or binary + protected long adler; + + final public int inflateInit() { + return inflateInit(DEF_WBITS); + } + + final public int inflateInit(boolean nowrap) { + return inflateInit(DEF_WBITS, nowrap); + } + + final public int inflateInit(int w) { + return inflateInit(w, false); + } + + final public int inflateInit(int w, boolean nowrap) { + istate = new Inflate(); + return istate.inflateInit(this, nowrap ? -w : w); + } + + final public int inflate(int f) { + if (istate == null) { + return Z_STREAM_ERROR; + } + return istate.inflate(this, f); + } + + final public int inflateEnd() { + if (istate == null) { + return Z_STREAM_ERROR; + } + int ret = istate.inflateEnd(this); + istate = null; + return ret; + } + + final public int inflateSync() { + if (istate == null) { + return Z_STREAM_ERROR; + } + return istate.inflateSync(this); + } + + final public int inflateSetDictionary(byte[] dictionary, int dictLength) { + if (istate == null) { + return Z_STREAM_ERROR; + } + return istate.inflateSetDictionary(this, dictionary, dictLength); + } + + final public int deflateInit(int level) { + return deflateInit(level, MAX_WBITS); + } + + final public int deflateInit(int level, boolean nowrap) { + return deflateInit(level, MAX_WBITS, nowrap); + } + + final public int deflateInit(int level, int bits) { + return deflateInit(level, bits, false); + } + + final public int deflateInit(int level, int bits, boolean nowrap) { + dstate = new Deflate(); + return dstate.deflateInit(this, level, nowrap ? -bits : bits); + } + + final public int deflate(int flush) { + if (dstate == null) { + return Z_STREAM_ERROR; + } + return dstate.deflate(this, flush); + } + + final public int deflateEnd() { + if (dstate == null) { + return Z_STREAM_ERROR; + } + int ret = dstate.deflateEnd(); + dstate = null; + return ret; + } + + final public int deflateParams(int level, int strategy) { + if (dstate == null) { + return Z_STREAM_ERROR; + } + return dstate.deflateParams(this, level, strategy); + } + + final public int deflateSetDictionary(byte[] dictionary, int dictLength) { + if (dstate == null) { + return Z_STREAM_ERROR; + } + return dstate.deflateSetDictionary(this, dictionary, dictLength); + } + + // Flush as much pending output as possible. All deflate() output goes + // through this function so some applications may wish to modify it + // to avoid allocating a large strm->next_out buffer and copying into it. + // (See also read_buf()). + protected void flushPending() { + int len = dstate.getPending(); + + if (len > availout) { + len = availout; + } + if (len == 0) { + return; + } + + System.arraycopy(dstate.getPendingBuf(), dstate.getPendingOut(), + nextout, nextoutindex, len); + + nextoutindex += len; + dstate.setPendingOut(dstate.getPendingOut() + len); + totalout += len; + availout -= len; + dstate.setPending(dstate.getPending() - len); + if (dstate.getPending() == 0) { + dstate.setPendingOut(0); + } + } + + // Read a new buffer from the current input stream, update the adler32 + // and total number of bytes read. All deflate() input goes through + // this function so some applications may wish to modify it to avoid + // allocating a large strm->next_in buffer and copying from it. + // (See also flush_pending()). + protected int readBuf(byte[] buf, int start, int size) { + int len = availin; + + if (len > size) { + len = size; + } + if (len == 0) { + return 0; + } + + availin -= len; + + if (dstate.getNoHeader() == 0) { + adler = Adler32.adler32(adler, nextin, nextinindex, len); + } + System.arraycopy(nextin, nextinindex, buf, start, len); + nextinindex += len; + totalin += len; + return len; + } + + public void free() { + nextin = null; + nextout = null; + msg = null; + } +} diff --git a/io-compress-zlib/src/test/java/org/xbib/io/compress/zlib/ZlibTest.java b/io-compress-zlib/src/test/java/org/xbib/io/compress/zlib/ZlibTest.java new file mode 100644 index 0000000..c145bee --- /dev/null +++ b/io-compress-zlib/src/test/java/org/xbib/io/compress/zlib/ZlibTest.java @@ -0,0 +1,350 @@ +package org.xbib.io.compress.zlib; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import org.junit.jupiter.api.Test; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; + +public class ZlibTest { + + @Test + public void helloWorld() throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + ZOutputStream zOut = new ZOutputStream(out, ZConstants.Z_BEST_COMPRESSION, false); + ObjectOutputStream objOut = new ObjectOutputStream(zOut); + String helloWorld = "Hello World!"; + objOut.writeObject(helloWorld); + zOut.close(); + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); + ZInputStream zIn = new ZInputStream(in); + ObjectInputStream objIn = new ObjectInputStream(zIn); + assertEquals("Hello World!", objIn.readObject()); + } + private final static byte[] hello = "hello, hello! ".getBytes(); + + + static { + hello[hello.length - 1] = 0; + } + + private void checkError(ZStream z, int err, String msg) throws Exception { + if (err != ZConstants.Z_OK) { + throw new RuntimeException(msg + " " + z.msg + " error: " + err); + } + } + + @Test + public void testDeflateInflate() throws Exception { + int err; + int comprLen = 40000; + int uncomprLen = comprLen; + byte[] compr = new byte[comprLen]; + byte[] uncompr = new byte[uncomprLen]; + + ZStream stream = new ZStream(); + + err = stream.deflateInit(ZConstants.Z_DEFAULT_COMPRESSION); + checkError(stream, err, "deflateInit"); + + stream.nextin = hello; + stream.nextinindex = 0; + + stream.nextout = compr; + stream.nextoutindex = 0; + + while (stream.totalin != hello.length && + stream.totalout < comprLen) { + stream.availin = stream.availout = 1; // force small buffers + err = stream.deflate(ZConstants.Z_NO_FLUSH); + checkError(stream, err, "deflate"); + } + + while (true) { + stream.availout = 1; + err = stream.deflate(ZConstants.Z_FINISH); + if (err == ZConstants.Z_STREAM_END) { + break; + } + checkError(stream, err, "deflate"); + } + + err = stream.deflateEnd(); + checkError(stream, err, "deflateEnd"); + + ZStream d_stream = new ZStream(); + + d_stream.nextin = compr; + d_stream.nextinindex = 0; + d_stream.nextout = uncompr; + d_stream.nextoutindex = 0; + + err = d_stream.inflateInit(); + checkError(d_stream, err, "inflateInit"); + + while (d_stream.totalout < uncomprLen && + d_stream.totalin < comprLen) { + d_stream.availin = d_stream.availout = 1; /* force small buffers */ + err = d_stream.inflate(ZConstants.Z_NO_FLUSH); + if (err == ZConstants.Z_STREAM_END) { + break; + } + checkError(d_stream, err, "inflate"); + } + + err = d_stream.inflateEnd(); + checkError(d_stream, err, "inflateEnd"); + + int i = 0; + for (; i < hello.length; i++) { + if (hello[i] == 0) { + break; + } + } + int j = 0; + for (; j < uncompr.length; j++) { + if (uncompr[j] == 0) { + break; + } + } + + if (i == j) { + for (i = 0; i < j; i++) { + if (hello[i] != uncompr[i]) { + break; + } + } + if (i == j) { + return; + } + } else { + throw new RuntimeException("bad inflate"); + } + } + private final static byte[] dictionary = "hello ".getBytes(); + + + static { + dictionary[dictionary.length - 1] = 0; + } + + @Test + public void testDictDeflateInflate() throws Exception { + int err; + int comprLen = 40000; + int uncomprLen = comprLen; + byte[] uncompr = new byte[uncomprLen]; + byte[] compr = new byte[comprLen]; + long dictId; + + ZStream stream = new ZStream(); + err = stream.deflateInit(ZConstants.Z_BEST_COMPRESSION); + checkError(stream, err, "deflateInit"); + + err = stream.deflateSetDictionary(dictionary, dictionary.length); + checkError(stream, err, "deflateSetDictionary"); + + dictId = stream.adler; + + stream.nextout = compr; + stream.nextoutindex = 0; + stream.availout = comprLen; + + stream.nextin = hello; + stream.nextinindex = 0; + stream.availin = hello.length; + + err = stream.deflate(ZConstants.Z_FINISH); + if (err != ZConstants.Z_STREAM_END) { + throw new RuntimeException("deflate should report Z_STREAM_END"); + } + err = stream.deflateEnd(); + checkError(stream, err, "deflateEnd"); + + ZStream d_stream = new ZStream(); + + d_stream.nextin = compr; + d_stream.nextinindex = 0; + d_stream.availin = comprLen; + + err = d_stream.inflateInit(); + checkError(d_stream, err, "inflateInit"); + d_stream.nextout = uncompr; + d_stream.nextoutindex = 0; + d_stream.availout = uncomprLen; + + while (true) { + err = d_stream.inflate(ZConstants.Z_NO_FLUSH); + if (err == ZConstants.Z_STREAM_END) { + break; + } + if (err == ZConstants.Z_NEED_DICT) { + if ((int) d_stream.adler != (int) dictId) { + throw new RuntimeException("unexpected dictionary"); + } + err = d_stream.inflateSetDictionary(dictionary, dictionary.length); + } + checkError(d_stream, err, "inflate with dict"); + } + + err = d_stream.inflateEnd(); + checkError(d_stream, err, "inflateEnd"); + + int j = 0; + for (; j < uncompr.length; j++) { + if (uncompr[j] == 0) { + break; + } + } + } + + @Test + public void testFlushSync() throws Exception { + int err; + int comprLen = 40000; + int uncomprLen = comprLen; + byte[] compr = new byte[comprLen]; + byte[] uncompr = new byte[uncomprLen]; + int len = hello.length; + + ZStream stream = new ZStream(); + + err = stream.deflateInit(ZConstants.Z_DEFAULT_COMPRESSION); + checkError(stream, err, "deflate"); + + stream.nextin = hello; + stream.nextinindex = 0; + stream.nextout = compr; + stream.nextoutindex = 0; + stream.availin = 3; + stream.availout = comprLen; + + err = stream.deflate(ZConstants.Z_FULL_FLUSH); + checkError(stream, err, "deflate"); + + compr[3]++; // force an error in first compressed block + stream.availin = len - 3; + + err = stream.deflate(ZConstants.Z_FINISH); + if (err != ZConstants.Z_STREAM_END) { + checkError(stream, err, "deflate"); + } + err = stream.deflateEnd(); + checkError(stream, err, "deflateEnd"); + comprLen = (int) (stream.totalout); + + ZStream d_stream = new ZStream(); + + d_stream.nextin = compr; + d_stream.nextinindex = 0; + d_stream.availin = 2; + + err = d_stream.inflateInit(); + checkError(d_stream, err, "inflateInit"); + d_stream.nextout = uncompr; + d_stream.nextoutindex = 0; + d_stream.availout = uncomprLen; + + err = d_stream.inflate(ZConstants.Z_NO_FLUSH); + checkError(d_stream, err, "inflate"); + + d_stream.availin = comprLen - 2; + + err = d_stream.inflateSync(); + checkError(d_stream, err, "inflateSync"); + + err = d_stream.inflate(ZConstants.Z_FINISH); + if (err != ZConstants.Z_DATA_ERROR) { + throw new RuntimeException("inflate should report DATA_ERROR"); + } + + err = d_stream.inflateEnd(); + checkError(d_stream, err, "inflateEnd"); + + int j = 0; + for (; j < uncompr.length; j++) { + if (uncompr[j] == 0) { + break; + } + } + } + + @Test + public void testLargeDeflateInflate() throws Exception { + int err; + int comprLen = 40000; + int uncomprLen = comprLen; + byte[] compr = new byte[comprLen]; + byte[] uncompr = new byte[uncomprLen]; + + ZStream stream = new ZStream(); + + err = stream.deflateInit(ZConstants.Z_BEST_SPEED); + checkError(stream, err, "deflateInit"); + + stream.nextout = compr; + stream.nextoutindex = 0; + stream.availout = comprLen; + + // At this point, uncompr is still mostly zeroes, so it should compress + // very well: + stream.nextin = uncompr; + stream.availin = uncomprLen; + err = stream.deflate(ZConstants.Z_NO_FLUSH); + checkError(stream, err, "deflate"); + if (stream.availin != 0) { + throw new RuntimeException("deflate not greedy"); + } + + // Feed in already compressed data and switch to no compression: + stream.deflateParams(ZConstants.Z_NO_COMPRESSION, ZConstants.Z_DEFAULT_STRATEGY); + stream.nextin = compr; + stream.nextinindex = 0; + stream.availin = comprLen / 2; + err = stream.deflate(ZConstants.Z_NO_FLUSH); + checkError(stream, err, "deflate"); + + // Switch back to compressing mode: + stream.deflateParams(ZConstants.Z_BEST_COMPRESSION, ZConstants.Z_FILTERED); + stream.nextin = uncompr; + stream.nextinindex = 0; + stream.availin = uncomprLen; + err = stream.deflate(ZConstants.Z_NO_FLUSH); + checkError(stream, err, "deflate"); + + err = stream.deflate(ZConstants.Z_FINISH); + if (err != ZConstants.Z_STREAM_END) { + throw new RuntimeException("deflate should report Z_STREAM_END"); + } + err = stream.deflateEnd(); + checkError(stream, err, "deflateEnd"); + + ZStream d_stream = new ZStream(); + + d_stream.nextin = compr; + d_stream.nextinindex = 0; + d_stream.availin = comprLen; + + err = d_stream.inflateInit(); + checkError(d_stream, err, "inflateInit"); + + while (true) { + d_stream.nextout = uncompr; + d_stream.nextoutindex = 0; + d_stream.availout = uncomprLen; + err = d_stream.inflate(ZConstants.Z_NO_FLUSH); + if (err == ZConstants.Z_STREAM_END) { + break; + } + checkError(d_stream, err, "inflate large"); + } + + err = d_stream.inflateEnd(); + checkError(d_stream, err, "inflateEnd"); + + if (d_stream.totalout != 2 * uncomprLen + comprLen / 2) { + throw new RuntimeException("bad large inflate: " + d_stream.totalout); + } + } +} diff --git a/settings.gradle b/settings.gradle new file mode 100644 index 0000000..6d59c8e --- /dev/null +++ b/settings.gradle @@ -0,0 +1,12 @@ +include 'io-compress-bzip2' +include 'io-compress-lzf' +include 'io-compress-xz' +include 'io-compress-zlib' +include 'io-archive' +include 'io-archive-ar' +include 'io-archive-cpio' +include 'io-archive-dump' +include 'io-archive-jar' +include 'io-archive-tar' +include 'io-archive-zip' +include 'io-codec' \ No newline at end of file