initial import

This commit is contained in:
Jörg Prante 2016-08-18 20:39:56 +02:00
commit 931e2b9cda
41 changed files with 104466 additions and 0 deletions

15
.gitignore vendored Normal file
View file

@ -0,0 +1,15 @@
/data
/work
/logs
/.idea
/target
.DS_Store
*.iml
/.settings
/.classpath
/.project
/.gradle
/build
/plugins
/sessions
*~

8
.travis.yml Normal file
View file

@ -0,0 +1,8 @@
sudo: false
language: java
jdk:
- oraclejdk8
cache:
directories:
- $HOME/.m2

39
CREDITS.txt Normal file
View file

@ -0,0 +1,39 @@
These bibliographic characterssets are collected and improved
by Jörg Prante <joergprante@gmail.com>
Thanks to:
Library of Congress
The Library of Congress provides an ANSEL code table file
https://www.loc.gov/marc/specifications/codetables.xml
at https://www.loc.gov/marc/specifications/specchartables.html for making the
character set implementation of ANSEL/Z39.47 possible, including east
asian code characters (EACC).
US-ASCII
The US-ASCII re-implementation was taken from the GNU classpath project.
It is provided as a demonstration of a simple single-byte character set.
The original code was licensed by the GNU Public License 2.1 (GPL)
Simple ANSEL
The ANSEL charset implementation by Piotr Andzel http://anselcharset.sourceforge.net/
has been included as "simple ANSEL". The original code was licensed under Less
GNU Public License 3 (LGPL 3.0)
MAB
The MAB Charset Java implementation was developed by Jürgen Kett of
Deutsche Nationalbibliothek (DNB) in 2004 and was licensed by GNU Public License 2 (GPL)
MAB-Diskette
This is a CP850 variant which could only be implemented by the help of
Thomas Berger http://www.gymel.com/charsets/MAB-Diskette.html
PicaCharset
An alternative PICA character set implementation of Deutsche Nationalbibliothek (DNB)
The original licence was GNU Public License 2 (GPL).
ISO 5428:1984, "Greek alphabet coded character set for bibliographic information interchange",
has been implemented by the help of https://en.wikipedia.org/wiki/ISO_5428

76
README.md Normal file
View file

@ -0,0 +1,76 @@
# Bibliographic character sets
This is a collection of bibliographic character sets implemented in
Java.
These character sets have not been included in the standard Java
distribution. Most of the character sets predate Unicode and are
dormant now but are still in active use in library application
system software.
The reason to provide these character sets is to assist the public
in migrating library data to Unicode, and UTF-8, respectively.
You can include this jar in the classpath, the Java CharsetProvider and
ServiceLoader API will then make the character sets available,
e.g. by `Charset.forName(name)`
This is free software.
Please follow the AGPL license, which requires to offer the source code
of your project to the public if you make modifications to this program.
All contributions and pull requests are welcome.
If you have questions or find issues, pleas post them at
https://github.com/xbib/bibliographic-character-sets/issues
## List of character sets included
### ANSEL "ANSI/NISO Z39.47-1993 (R2003) Extended Latin Alphabet Coded Character Set for Bibliographic Use (ANSEL)"
This implementation can only decode from ANSEL / Z39.47.
Included are the following sets specified by the Library of Congress at
https://www.loc.gov/marc/specifications/specchartables.html
Basic Latin (ASCII), Extended Latin (ANSEL), Greek Symbols,
Subscripts, Superscripts, Basic Hebrew, Basic Cyrillic,
Extended Cyrillic, Basic Arabic, Extended Arabic,
Basic Greek, Chinese, Japanese, Korean (EACC)
Usage:
Charset.forName("ANSEL")
### ISO 5426 "Extension of the Latin alphabet coded character set for bibliographic information interchange"
Usage:
Charset.forName("x-MAB")
### ISO 5428 "Greek alphabet coded character set for bibliographic information interchange"
### Pica (a variant of the INTERMARC character set, a 1979 french/danish adoption of USMARC/UKMARC)
### MAB-Diskette (a variant of CP850 character set)
### US-ASCII (re-implementation for demonstration purpose, disabled by default)
See also the CREDITS.txt for acknowledgements.
# License
Copyright (C) 2016 Jörg Prante and the xbib organization
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.

67
build.gradle Normal file
View file

@ -0,0 +1,67 @@
println "Host: " + java.net.InetAddress.getLocalHost()
println "Gradle: " + gradle.gradleVersion + " JVM: " + org.gradle.internal.jvm.Jvm.current() + " Groovy: " + GroovySystem.getVersion()
println "Build: group: '${project.group}', name: '${project.name}', version: '${project.version}'"
apply plugin: 'java'
apply plugin: 'maven'
apply plugin: 'signing'
apply plugin: 'findbugs'
apply plugin: 'pmd'
apply plugin: 'checkstyle'
repositories {
mavenLocal()
mavenCentral()
jcenter()
maven {
url "http://xbib.org/repository"
}
}
configurations {
wagon
}
dependencies {
testCompile "org.apache.logging.log4j:log4j-core:2.5"
testCompile('junit:junit:4.12')
wagon 'org.apache.maven.wagon:wagon-ssh-external:2.10'
}
sourceCompatibility = JavaVersion.VERSION_1_8
targetCompatibility = JavaVersion.VERSION_1_8
[compileJava, compileTestJava]*.options*.encoding = 'UTF-8'
tasks.withType(JavaCompile) {
options.compilerArgs << "-Xlint:deprecation,unchecked"
}
test {
testLogging {
showStandardStreams = false
exceptionFormat = 'full'
}
}
tasks.withType(FindBugs) {
ignoreFailures = true
reports {
xml.enabled = false
html.enabled = true
}
}
task sourcesJar(type: Jar, dependsOn: classes) {
classifier 'sources'
from sourceSets.main.allSource
}
task javadocJar(type: Jar, dependsOn: javadoc) {
classifier 'javadoc'
}
artifacts {
archives sourcesJar, javadocJar
}
if (project.hasProperty('signing.keyId')) {
signing {
sign configurations.archives
}
}
apply from: 'gradle/publish.gradle'

View file

@ -0,0 +1,323 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE module PUBLIC
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
<!-- This is a checkstyle configuration file. For descriptions of
what the following rules do, please see the checkstyle configuration
page at http://checkstyle.sourceforge.net/config.html -->
<module name="Checker">
<module name="FileTabCharacter">
<!-- Checks that there are no tab characters in the file.
-->
</module>
<module name="NewlineAtEndOfFile">
<property name="lineSeparator" value="lf"/>
</module>
<module name="RegexpSingleline">
<!-- Checks that FIXME is not used in comments. TODO is preferred.
-->
<property name="format" value="((//.*)|(\*.*))FIXME" />
<property name="message" value='TODO is preferred to FIXME. e.g. "TODO(johndoe): Refactor when v2 is released."' />
</module>
<module name="RegexpSingleline">
<!-- Checks that TODOs are named. (Actually, just that they are followed
by an open paren.)
-->
<property name="format" value="((//.*)|(\*.*))TODO[^(]" />
<property name="message" value='All TODOs should be named. e.g. "TODO(johndoe): Refactor when v2 is released."' />
</module>
<module name="JavadocPackage">
<!-- Checks that each Java package has a Javadoc file used for commenting.
Only allows a package-info.java, not package.html. -->
</module>
<!-- All Java AST specific tests live under TreeWalker module. -->
<module name="TreeWalker">
<!--
IMPORT CHECKS
-->
<module name="RedundantImport">
<!-- Checks for redundant import statements. -->
<property name="severity" value="error"/>
</module>
<module name="ImportOrder">
<!-- Checks for out of order import statements. -->
<property name="severity" value="warning"/>
<property name="groups" value="com.google,android,junit,net,org,java,javax"/>
<!-- This ensures that static imports go first. -->
<property name="option" value="top"/>
<property name="tokens" value="STATIC_IMPORT, IMPORT"/>
</module>
<!--
JAVADOC CHECKS
-->
<!-- Checks for Javadoc comments. -->
<!-- See http://checkstyle.sf.net/config_javadoc.html -->
<module name="JavadocMethod">
<property name="scope" value="protected"/>
<property name="severity" value="warning"/>
<property name="allowMissingJavadoc" value="true"/>
<property name="allowMissingParamTags" value="true"/>
<property name="allowMissingReturnTag" value="true"/>
<property name="allowMissingThrowsTags" value="true"/>
<property name="allowThrowsTagsForSubclasses" value="true"/>
<property name="allowUndeclaredRTE" value="true"/>
</module>
<module name="JavadocType">
<property name="scope" value="protected"/>
<property name="severity" value="error"/>
</module>
<module name="JavadocStyle">
<property name="severity" value="warning"/>
</module>
<!--
NAMING CHECKS
-->
<!-- Item 38 - Adhere to generally accepted naming conventions -->
<module name="PackageName">
<!-- Validates identifiers for package names against the
supplied expression. -->
<!-- Here the default checkstyle rule restricts package name parts to
seven characters, this is not in line with common practice at Google.
-->
<property name="format" value="^[a-z]+(\.[a-z][a-z0-9]{1,})*$"/>
<property name="severity" value="warning"/>
</module>
<module name="TypeNameCheck">
<!-- Validates static, final fields against the
expression "^[A-Z][a-zA-Z0-9]*$". -->
<metadata name="altname" value="TypeName"/>
<property name="severity" value="warning"/>
</module>
<module name="ConstantNameCheck">
<!-- Validates non-private, static, final fields against the supplied
public/package final fields "^[A-Z][A-Z0-9]*(_[A-Z0-9]+)*$". -->
<metadata name="altname" value="ConstantName"/>
<property name="applyToPublic" value="true"/>
<property name="applyToProtected" value="true"/>
<property name="applyToPackage" value="true"/>
<property name="applyToPrivate" value="false"/>
<property name="format" value="^([A-Z][A-Z0-9]*(_[A-Z0-9]+)*|FLAG_.*)$"/>
<message key="name.invalidPattern"
value="Variable ''{0}'' should be in ALL_CAPS (if it is a constant) or be private (otherwise)."/>
<property name="severity" value="warning"/>
</module>
<module name="StaticVariableNameCheck">
<!-- Validates static, non-final fields against the supplied
expression "^[a-z][a-zA-Z0-9]*_?$". -->
<metadata name="altname" value="StaticVariableName"/>
<property name="applyToPublic" value="true"/>
<property name="applyToProtected" value="true"/>
<property name="applyToPackage" value="true"/>
<property name="applyToPrivate" value="true"/>
<property name="format" value="^[a-z][a-zA-Z0-9]*_?$"/>
<property name="severity" value="warning"/>
</module>
<module name="MemberNameCheck">
<!-- Validates non-static members against the supplied expression. -->
<metadata name="altname" value="MemberName"/>
<property name="applyToPublic" value="true"/>
<property name="applyToProtected" value="true"/>
<property name="applyToPackage" value="true"/>
<property name="applyToPrivate" value="true"/>
<property name="format" value="^[a-z][a-zA-Z0-9]*$"/>
<property name="severity" value="warning"/>
</module>
<module name="MethodNameCheck">
<!-- Validates identifiers for method names. -->
<metadata name="altname" value="MethodName"/>
<property name="format" value="^[a-z][a-zA-Z0-9]*(_[a-zA-Z0-9]+)*$"/>
<property name="severity" value="warning"/>
</module>
<module name="ParameterName">
<!-- Validates identifiers for method parameters against the
expression "^[a-z][a-zA-Z0-9]*$". -->
<property name="severity" value="warning"/>
</module>
<module name="LocalFinalVariableName">
<!-- Validates identifiers for local final variables against the
expression "^[a-z][a-zA-Z0-9]*$". -->
<property name="severity" value="warning"/>
</module>
<module name="LocalVariableName">
<!-- Validates identifiers for local variables against the
expression "^[a-z][a-zA-Z0-9]*$". -->
<property name="severity" value="warning"/>
</module>
<!--
LENGTH and CODING CHECKS
-->
<module name="LineLength">
<!-- Checks if a line is too long. -->
<property name="max" value="${com.puppycrawl.tools.checkstyle.checks.sizes.LineLength.max}" default="128"/>
<property name="severity" value="error"/>
<!--
The default ignore pattern exempts the following elements:
- import statements
- long URLs inside comments
-->
<property name="ignorePattern"
value="${com.puppycrawl.tools.checkstyle.checks.sizes.LineLength.ignorePattern}"
default="^(package .*;\s*)|(import .*;\s*)|( *(\*|//).*https?://.*)$"/>
</module>
<module name="LeftCurly">
<!-- Checks for placement of the left curly brace ('{'). -->
<property name="severity" value="warning"/>
</module>
<module name="RightCurly">
<!-- Checks right curlies on CATCH, ELSE, and TRY blocks are on
the same line. e.g., the following example is fine:
<pre>
if {
...
} else
</pre>
-->
<!-- This next example is not fine:
<pre>
if {
...
}
else
</pre>
-->
<property name="option" value="same"/>
<property name="severity" value="warning"/>
</module>
<!-- Checks for braces around if and else blocks -->
<module name="NeedBraces">
<property name="severity" value="warning"/>
<property name="tokens" value="LITERAL_IF, LITERAL_ELSE, LITERAL_FOR, LITERAL_WHILE, LITERAL_DO"/>
</module>
<module name="UpperEll">
<!-- Checks that long constants are defined with an upper ell.-->
<property name="severity" value="error"/>
</module>
<module name="FallThrough">
<!-- Warn about falling through to the next case statement. Similar to
javac -Xlint:fallthrough, but the check is suppressed if a single-line comment
on the last non-blank line preceding the fallen-into case contains 'fall through' (or
some other variants which we don't publicized to promote consistency).
-->
<property name="reliefPattern"
value="fall through|Fall through|fallthru|Fallthru|falls through|Falls through|fallthrough|Fallthrough|No break|NO break|no break|continue on"/>
<property name="severity" value="error"/>
</module>
<!--
MODIFIERS CHECKS
-->
<module name="ModifierOrder">
<!-- Warn if modifier order is inconsistent with JLS3 8.1.1, 8.3.1, and
8.4.3. The prescribed order is:
public, protected, private, abstract, static, final, transient, volatile,
synchronized, native, strictfp
-->
</module>
<!--
WHITESPACE CHECKS
-->
<module name="WhitespaceAround">
<!-- Checks that various tokens are surrounded by whitespace.
This includes most binary operators and keywords followed
by regular or curly braces.
-->
<property name="tokens" value="ASSIGN, BAND, BAND_ASSIGN, BOR,
BOR_ASSIGN, BSR, BSR_ASSIGN, BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN,
EQUAL, GE, GT, LAND, LE, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE, LOR, LT, MINUS,
MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL, PLUS, PLUS_ASSIGN, QUESTION,
SL, SL_ASSIGN, SR_ASSIGN, STAR, STAR_ASSIGN"/>
<property name="severity" value="error"/>
</module>
<module name="WhitespaceAfter">
<!-- Checks that commas, semicolons and typecasts are followed by
whitespace.
-->
<property name="tokens" value="COMMA, SEMI, TYPECAST"/>
</module>
<module name="NoWhitespaceAfter">
<!-- Checks that there is no whitespace after various unary operators.
Linebreaks are allowed.
-->
<property name="tokens" value="BNOT, DEC, DOT, INC, LNOT, UNARY_MINUS,
UNARY_PLUS"/>
<property name="allowLineBreaks" value="true"/>
<property name="severity" value="error"/>
</module>
<module name="NoWhitespaceBefore">
<!-- Checks that there is no whitespace before various unary operators.
Linebreaks are allowed.
-->
<property name="tokens" value="SEMI, DOT, POST_DEC, POST_INC"/>
<property name="allowLineBreaks" value="true"/>
<property name="severity" value="error"/>
</module>
<module name="ParenPad">
<!-- Checks that there is no whitespace before close parens or after
open parens.
-->
<property name="severity" value="warning"/>
</module>
</module>
</module>

9
gradle.properties Normal file
View file

@ -0,0 +1,9 @@
group = org.xbib
version = 1.0.0
org.gradle.daemon = true
name = 'bibliographic-character-sets'
description = 'Bibliographic character sets'
user = 'xbib'
scmUrl = 'https://github.com/' + user + '/' + name
scmConnection = 'scm:git:git://github.com/' + user + '/' + name + '.git'
scmDeveloperConnection = 'scm:git:git://github.com/' + user + '/' + name + '.git'

62
gradle/publish.gradle Normal file
View file

@ -0,0 +1,62 @@
task xbibUpload(type: Upload) {
configuration = configurations.archives
uploadDescriptor = true
repositories {
if (project.hasProperty("xbibUsername")) {
mavenDeployer {
configuration = configurations.wagon
repository(url: uri('scpexe://xbib.org/repository')) {
authentication(userName: xbibUsername, privateKey: xbibPrivateKey)
}
}
}
}
}
task mavenCentralUpload(type: Upload) {
configuration = configurations.archives
uploadDescriptor = true
repositories {
if (project.hasProperty('ossrhUsername')) {
mavenDeployer {
beforeDeployment { MavenDeployment deployment -> signing.signPom(deployment) }
repository(url: uri(ossrhReleaseUrl)) {
authentication(userName: ossrhUsername, password: ossrhPassword)
}
snapshotRepository(url: uri(ossrhSnapshotUrl)) {
authentication(userName: ossrhUsername, password: ossrhPassword)
}
pom.project {
name name
description description
packaging 'jar'
inceptionYear '2016'
url scmUrl
organization {
name 'xbib'
url 'http://xbib.org'
}
developers {
developer {
id user
name 'Jörg Prante'
email 'joergprante@gmail.com'
url 'https://github.com/jprante'
}
}
scm {
url scmUrl
connection scmConnection
developerConnection scmDeveloperConnection
}
licenses {
license {
name 'Affero GNU Public License Version 3'
url 'http://www.gnu.org/licenses/agpl-3.0.html'
}
}
}
}
}
}
}

BIN
gradle/wrapper/gradle-wrapper.jar vendored Normal file

Binary file not shown.

View file

@ -0,0 +1,6 @@
#Thu Aug 18 20:34:33 CEST 2016
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-3.0-bin.zip

169
gradlew vendored Executable file
View file

@ -0,0 +1,169 @@
#!/usr/bin/env bash
##############################################################################
##
## Gradle start up script for UN*X
##
##############################################################################
# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
ls=`ls -ld "$PRG"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG=`dirname "$PRG"`"/$link"
fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null
APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS=""
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"
warn ( ) {
echo "$*"
}
die ( ) {
echo
echo "$*"
echo
exit 1
}
# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
CYGWIN* )
cygwin=true
;;
Darwin* )
darwin=true
;;
MINGW* )
msys=true
;;
NONSTOP* )
nonstop=true
;;
esac
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD="$JAVA_HOME/jre/sh/java"
else
JAVACMD="$JAVA_HOME/bin/java"
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD="java"
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
MAX_FD_LIMIT=`ulimit -H -n`
if [ $? -eq 0 ] ; then
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
MAX_FD="$MAX_FD_LIMIT"
fi
ulimit -n $MAX_FD
if [ $? -ne 0 ] ; then
warn "Could not set maximum file descriptor limit: $MAX_FD"
fi
else
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
fi
fi
# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi
# For Cygwin, switch paths to Windows format before running java
if $cygwin ; then
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
JAVACMD=`cygpath --unix "$JAVACMD"`
# We build the pattern for arguments to be converted via cygpath
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
SEP=""
for dir in $ROOTDIRSRAW ; do
ROOTDIRS="$ROOTDIRS$SEP$dir"
SEP="|"
done
OURCYGPATTERN="(^($ROOTDIRS))"
# Add a user-defined pattern to the cygpath arguments
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
fi
# Now convert the arguments - kludge to limit ourselves to /bin/sh
i=0
for arg in "$@" ; do
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
else
eval `echo args$i`="\"$arg\""
fi
i=$((i+1))
done
case $i in
(0) set -- ;;
(1) set -- "$args0" ;;
(2) set -- "$args0" "$args1" ;;
(3) set -- "$args0" "$args1" "$args2" ;;
(4) set -- "$args0" "$args1" "$args2" "$args3" ;;
(5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
(6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
(7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
(8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
(9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
esac
fi
# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
function splitJvmOpts() {
JVM_OPTS=("$@")
}
eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
if [[ "$(uname)" == "Darwin" ]] && [[ "$HOME" == "$PWD" ]]; then
cd "$(dirname "$0")"
fi
exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"

84
gradlew.bat vendored Normal file
View file

@ -0,0 +1,84 @@
@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem Gradle startup script for Windows
@rem
@rem ##########################################################################
@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal
set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS=
@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome
set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto init
echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
if exist "%JAVA_EXE%" goto init
echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:init
@rem Get command-line arguments, handling Windows variants
if not "%OS%" == "Windows_NT" goto win9xME_args
:win9xME_args
@rem Slurp the command line arguments.
set CMD_LINE_ARGS=
set _SKIP=2
:win9xME_args_slurp
if "x%~1" == "x" goto execute
set CMD_LINE_ARGS=%*
:execute
@rem Setup the command line
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd
:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1
:mainEnd
if "%OS%"=="Windows_NT" endlocal
:omega

1
settings.gradle Normal file
View file

@ -0,0 +1 @@
rootProject.name = 'bibliographic-character-sets'

View file

@ -0,0 +1,160 @@
/*
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2016 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*/
/*
Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version.
*/
package org.xbib.charset;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
/**
* US-ASCII charset.
*/
final class ASCII extends Charset {
ASCII() {
/*
* Canonical charset name chosen according to:
* http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
*/
super("US-ASCII", new String[]{
/*
* These names are provided by
* http://www.iana.org/assignments/character-sets
*/
"iso-ir-6", "ANSI_X3.4-1986", "ISO_646.irv:1991", "ASCII", "ISO646-US", "ASCII", "us",
"IBM367", "cp367", "csASCII",
/*
* These names are provided by
* http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
*/
"ANSI_X3.4-1968", "iso_646.irv:1983", "ascii7", "646", "windows-20127"});
}
public boolean contains(Charset cs) {
return cs instanceof ASCII;
}
public CharsetDecoder newDecoder() {
return new Decoder(this);
}
public CharsetEncoder newEncoder() {
return new Encoder(this);
}
private static final class Decoder extends CharsetDecoder {
Decoder(Charset cs) {
super(cs, 1.0f, 1.0f);
}
@Override
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
while (in.hasRemaining()) {
byte b = in.get();
if (b < 0) {
in.position(in.position() - 1);
return CoderResult.malformedForLength(1);
}
if (!out.hasRemaining()) {
in.position(in.position() - 1);
return CoderResult.OVERFLOW;
}
out.put((char) b);
}
return CoderResult.UNDERFLOW;
}
}
private static final class Encoder extends CharsetEncoder {
Encoder(Charset cs) {
super(cs, 1.0f, 1.0f);
}
@Override
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
while (in.hasRemaining()) {
char c = in.get();
if (c > Byte.MAX_VALUE) {
in.position(in.position() - 1);
return CoderResult.unmappableForLength(1);
}
if (!out.hasRemaining()) {
in.position(in.position() - 1);
return CoderResult.OVERFLOW;
}
out.put((byte) c);
}
return CoderResult.UNDERFLOW;
}
}
}

View file

@ -0,0 +1,280 @@
/*
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2016 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*/
package org.xbib.charset;
import java.io.CharArrayWriter;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.StandardCharsets;
import java.util.LinkedHashMap;
import java.util.Map;
import javax.xml.stream.XMLStreamException;
/**
*
*/
public class AnselCharset extends Charset {
private static final Map<String, AnselCodeTableParser.CharacterSet> characterSetMap;
static {
characterSetMap = new LinkedHashMap<>();
ClassLoader cl = Thread.currentThread().getContextClassLoader();
try (InputStream inputStream = cl.getResource("org/xbib/charset/codetables.xml").openStream()) {
AnselCodeTableParser anselCodeTableParser = new AnselCodeTableParser(inputStream);
for (AnselCodeTableParser.CodeTable codeTable : anselCodeTableParser.getCodeTables()) {
for (AnselCodeTableParser.CharacterSet characterSet : codeTable.getCharacterSets()) {
characterSetMap.put(characterSet.getName(), characterSet);
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
private Charset encodeCharset;
public AnselCharset() throws XMLStreamException {
super("ANSEL", BibliographicCharsetProvider.aliasesFor("ANSEL"));
this.encodeCharset = StandardCharsets.UTF_8;
}
@Override
public boolean contains(Charset charset) {
return charset instanceof AnselCharset;
}
public CharsetEncoder newEncoder() {
throw new UnsupportedOperationException();
}
public CharsetDecoder newDecoder() {
return new Decoder(this, encodeCharset.newDecoder());
}
private static class Decoder extends CharsetDecoder {
String g0;
String g1;
Decoder(Charset cs, CharsetDecoder baseDecoder) {
super(cs, baseDecoder.averageCharsPerByte(), baseDecoder.maxCharsPerByte());
}
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
g0 = "Basic Latin (ASCII)";
g1 = "Extended Latin (ANSEL)";
CharArrayWriter w = new CharArrayWriter();
CharArrayWriter diacritics = new CharArrayWriter();
int pos = in.position();
while (in.hasRemaining()) {
byte b = in.get();
char oldChar = (char) (b & 0xFF);
if (oldChar == '\u001b') {
handleEscapeSequence(in);
if (in.hasRemaining()) {
b = in.get();
oldChar = (char) (b & 0xFF);
} else {
// premature end of escape sequence, no data following
return CoderResult.UNDERFLOW;
}
}
AnselCodeTableParser.CharacterSet characterSet = isG0(oldChar) ? characterSetMap.get(g0) :
isG1(oldChar) ? characterSetMap.get(g1) : null;
int len = characterSet != null ? characterSet.getLength() : 1;
String str = len == 1 ? "" + oldChar : "" + oldChar + (char) (in.get() & 0xFF) + (char) (in.get() & 0xFF);
AnselCodeTableParser.Code code = characterSet != null ? characterSet.getMarc().get(str) : null;
char ch = code != null ? code.getUcs() : oldChar;
if (ch == '\u0000') {
// FB, EC - see http://memory.loc.gov/diglib/codetables/45.html#Note1 and http://memory.loc.gov/diglib/codetables/45.html#Note2
continue;
}
boolean isDiacritic = code != null ? isDiacritic(oldChar) || code.isCombining() : isDiacritic(oldChar);
if (isDiacritic) {
diacritics.write(ch);
} else {
w.write(ch);
// diacritics must be appended in Unicode, but are prepended in MARC-8 / Z39.47
if (diacritics.toCharArray().length > 0) {
try {
w.write(diacritics.toCharArray());
} catch (IOException e) {
// dummy
w.flush();
}
diacritics = new CharArrayWriter();
}
}
}
for (char ch : w.toCharArray()) {
if (!out.hasRemaining()) {
in.position(pos - 1);
return CoderResult.OVERFLOW;
}
out.put(ch);
}
return CoderResult.UNDERFLOW;
}
private boolean isDiacritic(char ch) {
return ch >= 0xE0 && ch <= 0xFF;
}
private boolean isG0(char ch) {
return ch >= 0x21 && ch <= 0x7E;
}
private boolean isG1(char ch) {
return ch >= 0xA1 && ch <= 0xFE;
}
/**
* ANSI X3.41 or ISO 2022 escape technique.
* See procedures in IS0 2375-1985.
*
* @param in byte buffer
*/
private void handleEscapeSequence(ByteBuffer in) {
byte oneByte = in.get();
switch (oneByte) {
case 's':
g0 = "Basic Latin (ASCII)";
break;
case 'g':
g0 = "Greek Symbols";
break;
case 'b':
g0 = "Subscripts";
break;
case 'p':
g0 = "Superscripts";
break;
case '(':
case ',':
oneByte = in.get();
switch (oneByte) {
case '1':
g0 = "Chinese, Japanese, Korean (EACC)";
break;
case '2':
g0 = "Basic Hebrew";
break;
case '3':
g0 = "Basic Arabic";
break;
case '4':
g0 = "Extended Arabic";
break;
case 'B':
g0 = "Basic Latin (ASCII)";
break;
case 'N':
g0 = "Basic Cyrillic";
break;
case 'Q':
g0 = "Extended Cyrillic";
break;
case 'S':
g0 = "Basic Greek";
break;
default:
break;
}
break;
case ')':
case '-':
oneByte = in.get();
switch (oneByte) {
case '1':
g1 = "Chinese, Japanese, Korean (EACC)";
break;
case '2':
g1 = "Basic Hebrew";
break;
case '3':
g1 = "Basic Arabic";
break;
case '4':
g1 = "Extended Arabic";
break;
case 'B':
g1 = "Basic Latin (ASCII)";
break;
case 'N':
g1 = "Basic Cyrillic";
break;
case 'Q':
g1 = "Extended Cyrillic";
break;
case 'S':
g1 = "Basic Greek";
break;
default:
break;
}
break;
case '$':
oneByte = in.get();
switch (oneByte) {
case '1':
g0 = "Chinese, Japanese, Korean (EACC)";
break;
default:
break;
}
break;
case '!':
oneByte = in.get();
switch (oneByte) {
case 'E':
g0 = "Extended Latin (ANSEL)";
break;
default:
break;
}
break;
default:
break;
}
}
}
}

View file

@ -0,0 +1,280 @@
/*
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2016 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*/
package org.xbib.charset;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.EndElement;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
class AnselCodeTableParser {
private final List<CodeTable> codeTables;
private CodeTable codeTable;
private CharacterSet characterSet;
private Code code;
private StringBuilder content;
AnselCodeTableParser(InputStream inputStream) {
List<CodeTable> codeTables;
try {
codeTables = createCodeTables(inputStream);
} catch (XMLStreamException e) {
codeTables = null;
// ignore
}
this.codeTables = codeTables;
}
public List<CodeTable> getCodeTables() {
return codeTables;
}
private List<CodeTable> createCodeTables(InputStream inputStream) throws XMLStreamException {
List<CodeTable> codetables = new LinkedList<>();
XMLInputFactory factory = XMLInputFactory.newInstance();
XMLEventReader xmlReader = factory.createXMLEventReader(inputStream);
while (xmlReader.hasNext()) {
processEvent(codetables, xmlReader.peek());
xmlReader.nextEvent();
}
return codetables;
}
private void processEvent(List<CodeTable> codetables, XMLEvent event) {
if (event.isStartDocument()) {
this.code = new Code();
this.content = new StringBuilder();
}
if (event.isStartElement()) {
StartElement element = (StartElement) event;
String name = element.getName().getLocalPart();
switch (name) {
case "codeTables": {
// ignore
break;
}
case "codeTable": {
this.codeTable = new CodeTable();
break;
}
case "characterSet": {
this.characterSet = new CharacterSet();
@SuppressWarnings("unchecked")
Iterator<Attribute> it = element.getAttributes();
while (it.hasNext()) {
Attribute attr = it.next();
QName attributeName = attr.getName();
String attributeLocalName = attributeName.getLocalPart();
String attributeValue = attr.getValue();
if ("name".equals(attributeLocalName)) {
characterSet.name = attributeValue;
} else if ("isoCode".equals(attributeLocalName)) {
characterSet.isoCode = attributeValue;
}
}
break;
}
case "code": {
code = new Code();
break;
}
default:
break;
}
} else if (event.isCharacters()) {
Characters c = (Characters) event;
if (!c.isIgnorableWhiteSpace()) {
// character events may come more than once (e.g. because of XML entities like &quot;)
// concatenate with values that might exist
content.append(c.getData());
}
} else if (event.isEndElement()) {
EndElement element = (EndElement)