initial import
This commit is contained in:
commit
931e2b9cda
41 changed files with 104466 additions and 0 deletions
15
.gitignore
vendored
Normal file
15
.gitignore
vendored
Normal file
|
@ -0,0 +1,15 @@
|
|||
/data
|
||||
/work
|
||||
/logs
|
||||
/.idea
|
||||
/target
|
||||
.DS_Store
|
||||
*.iml
|
||||
/.settings
|
||||
/.classpath
|
||||
/.project
|
||||
/.gradle
|
||||
/build
|
||||
/plugins
|
||||
/sessions
|
||||
*~
|
8
.travis.yml
Normal file
8
.travis.yml
Normal file
|
@ -0,0 +1,8 @@
|
|||
sudo: false
|
||||
language: java
|
||||
jdk:
|
||||
- oraclejdk8
|
||||
|
||||
cache:
|
||||
directories:
|
||||
- $HOME/.m2
|
39
CREDITS.txt
Normal file
39
CREDITS.txt
Normal file
|
@ -0,0 +1,39 @@
|
|||
These bibliographic characterssets are collected and improved
|
||||
by Jörg Prante <joergprante@gmail.com>
|
||||
|
||||
Thanks to:
|
||||
|
||||
Library of Congress
|
||||
The Library of Congress provides an ANSEL code table file
|
||||
|
||||
https://www.loc.gov/marc/specifications/codetables.xml
|
||||
|
||||
at https://www.loc.gov/marc/specifications/specchartables.html for making the
|
||||
character set implementation of ANSEL/Z39.47 possible, including east
|
||||
asian code characters (EACC).
|
||||
|
||||
US-ASCII
|
||||
The US-ASCII re-implementation was taken from the GNU classpath project.
|
||||
It is provided as a demonstration of a simple single-byte character set.
|
||||
The original code was licensed by the GNU Public License 2.1 (GPL)
|
||||
|
||||
Simple ANSEL
|
||||
The ANSEL charset implementation by Piotr Andzel http://anselcharset.sourceforge.net/
|
||||
has been included as "simple ANSEL". The original code was licensed under Less
|
||||
GNU Public License 3 (LGPL 3.0)
|
||||
|
||||
MAB
|
||||
The MAB Charset Java implementation was developed by Jürgen Kett of
|
||||
Deutsche Nationalbibliothek (DNB) in 2004 and was licensed by GNU Public License 2 (GPL)
|
||||
|
||||
MAB-Diskette
|
||||
This is a CP850 variant which could only be implemented by the help of
|
||||
Thomas Berger http://www.gymel.com/charsets/MAB-Diskette.html
|
||||
|
||||
PicaCharset
|
||||
An alternative PICA character set implementation of Deutsche Nationalbibliothek (DNB)
|
||||
The original licence was GNU Public License 2 (GPL).
|
||||
|
||||
ISO 5428:1984, "Greek alphabet coded character set for bibliographic information interchange",
|
||||
has been implemented by the help of https://en.wikipedia.org/wiki/ISO_5428
|
||||
|
76
README.md
Normal file
76
README.md
Normal file
|
@ -0,0 +1,76 @@
|
|||
# Bibliographic character sets
|
||||
|
||||
This is a collection of bibliographic character sets implemented in
|
||||
Java.
|
||||
|
||||
These character sets have not been included in the standard Java
|
||||
distribution. Most of the character sets predate Unicode and are
|
||||
dormant now but are still in active use in library application
|
||||
system software.
|
||||
|
||||
The reason to provide these character sets is to assist the public
|
||||
in migrating library data to Unicode, and UTF-8, respectively.
|
||||
|
||||
You can include this jar in the classpath, the Java CharsetProvider and
|
||||
ServiceLoader API will then make the character sets available,
|
||||
e.g. by `Charset.forName(name)`
|
||||
|
||||
This is free software.
|
||||
Please follow the AGPL license, which requires to offer the source code
|
||||
of your project to the public if you make modifications to this program.
|
||||
|
||||
All contributions and pull requests are welcome.
|
||||
|
||||
If you have questions or find issues, pleas post them at
|
||||
https://github.com/xbib/bibliographic-character-sets/issues
|
||||
|
||||
## List of character sets included
|
||||
|
||||
### ANSEL "ANSI/NISO Z39.47-1993 (R2003) Extended Latin Alphabet Coded Character Set for Bibliographic Use (ANSEL)"
|
||||
|
||||
This implementation can only decode from ANSEL / Z39.47.
|
||||
|
||||
Included are the following sets specified by the Library of Congress at
|
||||
https://www.loc.gov/marc/specifications/specchartables.html
|
||||
|
||||
Basic Latin (ASCII), Extended Latin (ANSEL), Greek Symbols,
|
||||
Subscripts, Superscripts, Basic Hebrew, Basic Cyrillic,
|
||||
Extended Cyrillic, Basic Arabic, Extended Arabic,
|
||||
Basic Greek, Chinese, Japanese, Korean (EACC)
|
||||
|
||||
Usage:
|
||||
|
||||
Charset.forName("ANSEL")
|
||||
|
||||
### ISO 5426 "Extension of the Latin alphabet coded character set for bibliographic information interchange"
|
||||
|
||||
Usage:
|
||||
|
||||
Charset.forName("x-MAB")
|
||||
|
||||
### ISO 5428 "Greek alphabet coded character set for bibliographic information interchange"
|
||||
|
||||
### Pica (a variant of the INTERMARC character set, a 1979 french/danish adoption of USMARC/UKMARC)
|
||||
|
||||
### MAB-Diskette (a variant of CP850 character set)
|
||||
|
||||
### US-ASCII (re-implementation for demonstration purpose, disabled by default)
|
||||
|
||||
See also the CREDITS.txt for acknowledgements.
|
||||
|
||||
# License
|
||||
|
||||
Copyright (C) 2016 Jörg Prante and the xbib organization
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
67
build.gradle
Normal file
67
build.gradle
Normal file
|
@ -0,0 +1,67 @@
|
|||
|
||||
println "Host: " + java.net.InetAddress.getLocalHost()
|
||||
println "Gradle: " + gradle.gradleVersion + " JVM: " + org.gradle.internal.jvm.Jvm.current() + " Groovy: " + GroovySystem.getVersion()
|
||||
println "Build: group: '${project.group}', name: '${project.name}', version: '${project.version}'"
|
||||
|
||||
apply plugin: 'java'
|
||||
apply plugin: 'maven'
|
||||
apply plugin: 'signing'
|
||||
apply plugin: 'findbugs'
|
||||
apply plugin: 'pmd'
|
||||
apply plugin: 'checkstyle'
|
||||
|
||||
repositories {
|
||||
mavenLocal()
|
||||
mavenCentral()
|
||||
jcenter()
|
||||
maven {
|
||||
url "http://xbib.org/repository"
|
||||
}
|
||||
}
|
||||
|
||||
configurations {
|
||||
wagon
|
||||
}
|
||||
|
||||
dependencies {
|
||||
testCompile "org.apache.logging.log4j:log4j-core:2.5"
|
||||
testCompile('junit:junit:4.12')
|
||||
wagon 'org.apache.maven.wagon:wagon-ssh-external:2.10'
|
||||
}
|
||||
|
||||
sourceCompatibility = JavaVersion.VERSION_1_8
|
||||
targetCompatibility = JavaVersion.VERSION_1_8
|
||||
|
||||
[compileJava, compileTestJava]*.options*.encoding = 'UTF-8'
|
||||
tasks.withType(JavaCompile) {
|
||||
options.compilerArgs << "-Xlint:deprecation,unchecked"
|
||||
}
|
||||
test {
|
||||
testLogging {
|
||||
showStandardStreams = false
|
||||
exceptionFormat = 'full'
|
||||
}
|
||||
}
|
||||
tasks.withType(FindBugs) {
|
||||
ignoreFailures = true
|
||||
reports {
|
||||
xml.enabled = false
|
||||
html.enabled = true
|
||||
}
|
||||
}
|
||||
task sourcesJar(type: Jar, dependsOn: classes) {
|
||||
classifier 'sources'
|
||||
from sourceSets.main.allSource
|
||||
}
|
||||
task javadocJar(type: Jar, dependsOn: javadoc) {
|
||||
classifier 'javadoc'
|
||||
}
|
||||
artifacts {
|
||||
archives sourcesJar, javadocJar
|
||||
}
|
||||
if (project.hasProperty('signing.keyId')) {
|
||||
signing {
|
||||
sign configurations.archives
|
||||
}
|
||||
}
|
||||
apply from: 'gradle/publish.gradle'
|
323
config/checkstyle/checkstyle.xml
Normal file
323
config/checkstyle/checkstyle.xml
Normal file
|
@ -0,0 +1,323 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE module PUBLIC
|
||||
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
|
||||
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
|
||||
|
||||
<!-- This is a checkstyle configuration file. For descriptions of
|
||||
what the following rules do, please see the checkstyle configuration
|
||||
page at http://checkstyle.sourceforge.net/config.html -->
|
||||
|
||||
<module name="Checker">
|
||||
|
||||
<module name="FileTabCharacter">
|
||||
<!-- Checks that there are no tab characters in the file.
|
||||
-->
|
||||
</module>
|
||||
|
||||
<module name="NewlineAtEndOfFile">
|
||||
<property name="lineSeparator" value="lf"/>
|
||||
</module>
|
||||
|
||||
<module name="RegexpSingleline">
|
||||
<!-- Checks that FIXME is not used in comments. TODO is preferred.
|
||||
-->
|
||||
<property name="format" value="((//.*)|(\*.*))FIXME" />
|
||||
<property name="message" value='TODO is preferred to FIXME. e.g. "TODO(johndoe): Refactor when v2 is released."' />
|
||||
</module>
|
||||
|
||||
<module name="RegexpSingleline">
|
||||
<!-- Checks that TODOs are named. (Actually, just that they are followed
|
||||
by an open paren.)
|
||||
-->
|
||||
<property name="format" value="((//.*)|(\*.*))TODO[^(]" />
|
||||
<property name="message" value='All TODOs should be named. e.g. "TODO(johndoe): Refactor when v2 is released."' />
|
||||
</module>
|
||||
|
||||
<module name="JavadocPackage">
|
||||
<!-- Checks that each Java package has a Javadoc file used for commenting.
|
||||
Only allows a package-info.java, not package.html. -->
|
||||
</module>
|
||||
|
||||
<!-- All Java AST specific tests live under TreeWalker module. -->
|
||||
<module name="TreeWalker">
|
||||
|
||||
<!--
|
||||
|
||||
IMPORT CHECKS
|
||||
|
||||
-->
|
||||
|
||||
<module name="RedundantImport">
|
||||
<!-- Checks for redundant import statements. -->
|
||||
<property name="severity" value="error"/>
|
||||
</module>
|
||||
|
||||
<module name="ImportOrder">
|
||||
<!-- Checks for out of order import statements. -->
|
||||
|
||||
<property name="severity" value="warning"/>
|
||||
<property name="groups" value="com.google,android,junit,net,org,java,javax"/>
|
||||
<!-- This ensures that static imports go first. -->
|
||||
<property name="option" value="top"/>
|
||||
<property name="tokens" value="STATIC_IMPORT, IMPORT"/>
|
||||
</module>
|
||||
|
||||
<!--
|
||||
|
||||
JAVADOC CHECKS
|
||||
|
||||
-->
|
||||
|
||||
<!-- Checks for Javadoc comments. -->
|
||||
<!-- See http://checkstyle.sf.net/config_javadoc.html -->
|
||||
<module name="JavadocMethod">
|
||||
<property name="scope" value="protected"/>
|
||||
<property name="severity" value="warning"/>
|
||||
<property name="allowMissingJavadoc" value="true"/>
|
||||
<property name="allowMissingParamTags" value="true"/>
|
||||
<property name="allowMissingReturnTag" value="true"/>
|
||||
<property name="allowMissingThrowsTags" value="true"/>
|
||||
<property name="allowThrowsTagsForSubclasses" value="true"/>
|
||||
<property name="allowUndeclaredRTE" value="true"/>
|
||||
</module>
|
||||
|
||||
<module name="JavadocType">
|
||||
<property name="scope" value="protected"/>
|
||||
<property name="severity" value="error"/>
|
||||
</module>
|
||||
|
||||
<module name="JavadocStyle">
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
<!--
|
||||
|
||||
NAMING CHECKS
|
||||
|
||||
-->
|
||||
|
||||
<!-- Item 38 - Adhere to generally accepted naming conventions -->
|
||||
|
||||
<module name="PackageName">
|
||||
<!-- Validates identifiers for package names against the
|
||||
supplied expression. -->
|
||||
<!-- Here the default checkstyle rule restricts package name parts to
|
||||
seven characters, this is not in line with common practice at Google.
|
||||
-->
|
||||
<property name="format" value="^[a-z]+(\.[a-z][a-z0-9]{1,})*$"/>
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
<module name="TypeNameCheck">
|
||||
<!-- Validates static, final fields against the
|
||||
expression "^[A-Z][a-zA-Z0-9]*$". -->
|
||||
<metadata name="altname" value="TypeName"/>
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
<module name="ConstantNameCheck">
|
||||
<!-- Validates non-private, static, final fields against the supplied
|
||||
public/package final fields "^[A-Z][A-Z0-9]*(_[A-Z0-9]+)*$". -->
|
||||
<metadata name="altname" value="ConstantName"/>
|
||||
<property name="applyToPublic" value="true"/>
|
||||
<property name="applyToProtected" value="true"/>
|
||||
<property name="applyToPackage" value="true"/>
|
||||
<property name="applyToPrivate" value="false"/>
|
||||
<property name="format" value="^([A-Z][A-Z0-9]*(_[A-Z0-9]+)*|FLAG_.*)$"/>
|
||||
<message key="name.invalidPattern"
|
||||
value="Variable ''{0}'' should be in ALL_CAPS (if it is a constant) or be private (otherwise)."/>
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
<module name="StaticVariableNameCheck">
|
||||
<!-- Validates static, non-final fields against the supplied
|
||||
expression "^[a-z][a-zA-Z0-9]*_?$". -->
|
||||
<metadata name="altname" value="StaticVariableName"/>
|
||||
<property name="applyToPublic" value="true"/>
|
||||
<property name="applyToProtected" value="true"/>
|
||||
<property name="applyToPackage" value="true"/>
|
||||
<property name="applyToPrivate" value="true"/>
|
||||
<property name="format" value="^[a-z][a-zA-Z0-9]*_?$"/>
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
<module name="MemberNameCheck">
|
||||
<!-- Validates non-static members against the supplied expression. -->
|
||||
<metadata name="altname" value="MemberName"/>
|
||||
<property name="applyToPublic" value="true"/>
|
||||
<property name="applyToProtected" value="true"/>
|
||||
<property name="applyToPackage" value="true"/>
|
||||
<property name="applyToPrivate" value="true"/>
|
||||
<property name="format" value="^[a-z][a-zA-Z0-9]*$"/>
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
<module name="MethodNameCheck">
|
||||
<!-- Validates identifiers for method names. -->
|
||||
<metadata name="altname" value="MethodName"/>
|
||||
<property name="format" value="^[a-z][a-zA-Z0-9]*(_[a-zA-Z0-9]+)*$"/>
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
<module name="ParameterName">
|
||||
<!-- Validates identifiers for method parameters against the
|
||||
expression "^[a-z][a-zA-Z0-9]*$". -->
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
<module name="LocalFinalVariableName">
|
||||
<!-- Validates identifiers for local final variables against the
|
||||
expression "^[a-z][a-zA-Z0-9]*$". -->
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
<module name="LocalVariableName">
|
||||
<!-- Validates identifiers for local variables against the
|
||||
expression "^[a-z][a-zA-Z0-9]*$". -->
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
|
||||
<!--
|
||||
|
||||
LENGTH and CODING CHECKS
|
||||
|
||||
-->
|
||||
|
||||
<module name="LineLength">
|
||||
<!-- Checks if a line is too long. -->
|
||||
<property name="max" value="${com.puppycrawl.tools.checkstyle.checks.sizes.LineLength.max}" default="128"/>
|
||||
<property name="severity" value="error"/>
|
||||
|
||||
<!--
|
||||
The default ignore pattern exempts the following elements:
|
||||
- import statements
|
||||
- long URLs inside comments
|
||||
-->
|
||||
|
||||
<property name="ignorePattern"
|
||||
value="${com.puppycrawl.tools.checkstyle.checks.sizes.LineLength.ignorePattern}"
|
||||
default="^(package .*;\s*)|(import .*;\s*)|( *(\*|//).*https?://.*)$"/>
|
||||
</module>
|
||||
|
||||
<module name="LeftCurly">
|
||||
<!-- Checks for placement of the left curly brace ('{'). -->
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
<module name="RightCurly">
|
||||
<!-- Checks right curlies on CATCH, ELSE, and TRY blocks are on
|
||||
the same line. e.g., the following example is fine:
|
||||
<pre>
|
||||
if {
|
||||
...
|
||||
} else
|
||||
</pre>
|
||||
-->
|
||||
<!-- This next example is not fine:
|
||||
<pre>
|
||||
if {
|
||||
...
|
||||
}
|
||||
else
|
||||
</pre>
|
||||
-->
|
||||
<property name="option" value="same"/>
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
<!-- Checks for braces around if and else blocks -->
|
||||
<module name="NeedBraces">
|
||||
<property name="severity" value="warning"/>
|
||||
<property name="tokens" value="LITERAL_IF, LITERAL_ELSE, LITERAL_FOR, LITERAL_WHILE, LITERAL_DO"/>
|
||||
</module>
|
||||
|
||||
<module name="UpperEll">
|
||||
<!-- Checks that long constants are defined with an upper ell.-->
|
||||
<property name="severity" value="error"/>
|
||||
</module>
|
||||
|
||||
<module name="FallThrough">
|
||||
<!-- Warn about falling through to the next case statement. Similar to
|
||||
javac -Xlint:fallthrough, but the check is suppressed if a single-line comment
|
||||
on the last non-blank line preceding the fallen-into case contains 'fall through' (or
|
||||
some other variants which we don't publicized to promote consistency).
|
||||
-->
|
||||
<property name="reliefPattern"
|
||||
value="fall through|Fall through|fallthru|Fallthru|falls through|Falls through|fallthrough|Fallthrough|No break|NO break|no break|continue on"/>
|
||||
<property name="severity" value="error"/>
|
||||
</module>
|
||||
|
||||
|
||||
<!--
|
||||
|
||||
MODIFIERS CHECKS
|
||||
|
||||
-->
|
||||
|
||||
<module name="ModifierOrder">
|
||||
<!-- Warn if modifier order is inconsistent with JLS3 8.1.1, 8.3.1, and
|
||||
8.4.3. The prescribed order is:
|
||||
public, protected, private, abstract, static, final, transient, volatile,
|
||||
synchronized, native, strictfp
|
||||
-->
|
||||
</module>
|
||||
|
||||
|
||||
<!--
|
||||
|
||||
WHITESPACE CHECKS
|
||||
|
||||
-->
|
||||
|
||||
<module name="WhitespaceAround">
|
||||
<!-- Checks that various tokens are surrounded by whitespace.
|
||||
This includes most binary operators and keywords followed
|
||||
by regular or curly braces.
|
||||
-->
|
||||
<property name="tokens" value="ASSIGN, BAND, BAND_ASSIGN, BOR,
|
||||
BOR_ASSIGN, BSR, BSR_ASSIGN, BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN,
|
||||
EQUAL, GE, GT, LAND, LE, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
|
||||
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
|
||||
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE, LOR, LT, MINUS,
|
||||
MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL, PLUS, PLUS_ASSIGN, QUESTION,
|
||||
SL, SL_ASSIGN, SR_ASSIGN, STAR, STAR_ASSIGN"/>
|
||||
<property name="severity" value="error"/>
|
||||
</module>
|
||||
|
||||
<module name="WhitespaceAfter">
|
||||
<!-- Checks that commas, semicolons and typecasts are followed by
|
||||
whitespace.
|
||||
-->
|
||||
<property name="tokens" value="COMMA, SEMI, TYPECAST"/>
|
||||
</module>
|
||||
|
||||
<module name="NoWhitespaceAfter">
|
||||
<!-- Checks that there is no whitespace after various unary operators.
|
||||
Linebreaks are allowed.
|
||||
-->
|
||||
<property name="tokens" value="BNOT, DEC, DOT, INC, LNOT, UNARY_MINUS,
|
||||
UNARY_PLUS"/>
|
||||
<property name="allowLineBreaks" value="true"/>
|
||||
<property name="severity" value="error"/>
|
||||
</module>
|
||||
|
||||
<module name="NoWhitespaceBefore">
|
||||
<!-- Checks that there is no whitespace before various unary operators.
|
||||
Linebreaks are allowed.
|
||||
-->
|
||||
<property name="tokens" value="SEMI, DOT, POST_DEC, POST_INC"/>
|
||||
<property name="allowLineBreaks" value="true"/>
|
||||
<property name="severity" value="error"/>
|
||||
</module>
|
||||
|
||||
<module name="ParenPad">
|
||||
<!-- Checks that there is no whitespace before close parens or after
|
||||
open parens.
|
||||
-->
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
</module>
|
||||
</module>
|
||||
|
9
gradle.properties
Normal file
9
gradle.properties
Normal file
|
@ -0,0 +1,9 @@
|
|||
group = org.xbib
|
||||
version = 1.0.0
|
||||
org.gradle.daemon = true
|
||||
name = 'bibliographic-character-sets'
|
||||
description = 'Bibliographic character sets'
|
||||
user = 'xbib'
|
||||
scmUrl = 'https://github.com/' + user + '/' + name
|
||||
scmConnection = 'scm:git:git://github.com/' + user + '/' + name + '.git'
|
||||
scmDeveloperConnection = 'scm:git:git://github.com/' + user + '/' + name + '.git'
|
62
gradle/publish.gradle
Normal file
62
gradle/publish.gradle
Normal file
|
@ -0,0 +1,62 @@
|
|||
task xbibUpload(type: Upload) {
|
||||
configuration = configurations.archives
|
||||
uploadDescriptor = true
|
||||
repositories {
|
||||
if (project.hasProperty("xbibUsername")) {
|
||||
mavenDeployer {
|
||||
configuration = configurations.wagon
|
||||
repository(url: uri('scpexe://xbib.org/repository')) {
|
||||
authentication(userName: xbibUsername, privateKey: xbibPrivateKey)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
task mavenCentralUpload(type: Upload) {
|
||||
configuration = configurations.archives
|
||||
uploadDescriptor = true
|
||||
repositories {
|
||||
if (project.hasProperty('ossrhUsername')) {
|
||||
mavenDeployer {
|
||||
beforeDeployment { MavenDeployment deployment -> signing.signPom(deployment) }
|
||||
repository(url: uri(ossrhReleaseUrl)) {
|
||||
authentication(userName: ossrhUsername, password: ossrhPassword)
|
||||
}
|
||||
snapshotRepository(url: uri(ossrhSnapshotUrl)) {
|
||||
authentication(userName: ossrhUsername, password: ossrhPassword)
|
||||
}
|
||||
pom.project {
|
||||
name name
|
||||
description description
|
||||
packaging 'jar'
|
||||
inceptionYear '2016'
|
||||
url scmUrl
|
||||
organization {
|
||||
name 'xbib'
|
||||
url 'http://xbib.org'
|
||||
}
|
||||
developers {
|
||||
developer {
|
||||
id user
|
||||
name 'Jörg Prante'
|
||||
email 'joergprante@gmail.com'
|
||||
url 'https://github.com/jprante'
|
||||
}
|
||||
}
|
||||
scm {
|
||||
url scmUrl
|
||||
connection scmConnection
|
||||
developerConnection scmDeveloperConnection
|
||||
}
|
||||
licenses {
|
||||
license {
|
||||
name 'Affero GNU Public License Version 3'
|
||||
url 'http://www.gnu.org/licenses/agpl-3.0.html'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
Normal file
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
Normal file
Binary file not shown.
6
gradle/wrapper/gradle-wrapper.properties
vendored
Normal file
6
gradle/wrapper/gradle-wrapper.properties
vendored
Normal file
|
@ -0,0 +1,6 @@
|
|||
#Thu Aug 18 20:34:33 CEST 2016
|
||||
distributionBase=GRADLE_USER_HOME
|
||||
distributionPath=wrapper/dists
|
||||
zipStoreBase=GRADLE_USER_HOME
|
||||
zipStorePath=wrapper/dists
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-3.0-bin.zip
|
169
gradlew
vendored
Executable file
169
gradlew
vendored
Executable file
|
@ -0,0 +1,169 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
##############################################################################
|
||||
##
|
||||
## Gradle start up script for UN*X
|
||||
##
|
||||
##############################################################################
|
||||
|
||||
# Attempt to set APP_HOME
|
||||
# Resolve links: $0 may be a link
|
||||
PRG="$0"
|
||||
# Need this for relative symlinks.
|
||||
while [ -h "$PRG" ] ; do
|
||||
ls=`ls -ld "$PRG"`
|
||||
link=`expr "$ls" : '.*-> \(.*\)$'`
|
||||
if expr "$link" : '/.*' > /dev/null; then
|
||||
PRG="$link"
|
||||
else
|
||||
PRG=`dirname "$PRG"`"/$link"
|
||||
fi
|
||||
done
|
||||
SAVED="`pwd`"
|
||||
cd "`dirname \"$PRG\"`/" >/dev/null
|
||||
APP_HOME="`pwd -P`"
|
||||
cd "$SAVED" >/dev/null
|
||||
|
||||
APP_NAME="Gradle"
|
||||
APP_BASE_NAME=`basename "$0"`
|
||||
|
||||
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||
DEFAULT_JVM_OPTS=""
|
||||
|
||||
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
||||
MAX_FD="maximum"
|
||||
|
||||
warn ( ) {
|
||||
echo "$*"
|
||||
}
|
||||
|
||||
die ( ) {
|
||||
echo
|
||||
echo "$*"
|
||||
echo
|
||||
exit 1
|
||||
}
|
||||
|
||||
# OS specific support (must be 'true' or 'false').
|
||||
cygwin=false
|
||||
msys=false
|
||||
darwin=false
|
||||
nonstop=false
|
||||
case "`uname`" in
|
||||
CYGWIN* )
|
||||
cygwin=true
|
||||
;;
|
||||
Darwin* )
|
||||
darwin=true
|
||||
;;
|
||||
MINGW* )
|
||||
msys=true
|
||||
;;
|
||||
NONSTOP* )
|
||||
nonstop=true
|
||||
;;
|
||||
esac
|
||||
|
||||
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
||||
|
||||
# Determine the Java command to use to start the JVM.
|
||||
if [ -n "$JAVA_HOME" ] ; then
|
||||
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
|
||||
# IBM's JDK on AIX uses strange locations for the executables
|
||||
JAVACMD="$JAVA_HOME/jre/sh/java"
|
||||
else
|
||||
JAVACMD="$JAVA_HOME/bin/java"
|
||||
fi
|
||||
if [ ! -x "$JAVACMD" ] ; then
|
||||
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
|
||||
|
||||
Please set the JAVA_HOME variable in your environment to match the
|
||||
location of your Java installation."
|
||||
fi
|
||||
else
|
||||
JAVACMD="java"
|
||||
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||
|
||||
Please set the JAVA_HOME variable in your environment to match the
|
||||
location of your Java installation."
|
||||
fi
|
||||
|
||||
# Increase the maximum file descriptors if we can.
|
||||
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
|
||||
MAX_FD_LIMIT=`ulimit -H -n`
|
||||
if [ $? -eq 0 ] ; then
|
||||
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
|
||||
MAX_FD="$MAX_FD_LIMIT"
|
||||
fi
|
||||
ulimit -n $MAX_FD
|
||||
if [ $? -ne 0 ] ; then
|
||||
warn "Could not set maximum file descriptor limit: $MAX_FD"
|
||||
fi
|
||||
else
|
||||
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
|
||||
fi
|
||||
fi
|
||||
|
||||
# For Darwin, add options to specify how the application appears in the dock
|
||||
if $darwin; then
|
||||
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
|
||||
fi
|
||||
|
||||
# For Cygwin, switch paths to Windows format before running java
|
||||
if $cygwin ; then
|
||||
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
|
||||
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
|
||||
JAVACMD=`cygpath --unix "$JAVACMD"`
|
||||
|
||||
# We build the pattern for arguments to be converted via cygpath
|
||||
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
|
||||
SEP=""
|
||||
for dir in $ROOTDIRSRAW ; do
|
||||
ROOTDIRS="$ROOTDIRS$SEP$dir"
|
||||
SEP="|"
|
||||
done
|
||||
OURCYGPATTERN="(^($ROOTDIRS))"
|
||||
# Add a user-defined pattern to the cygpath arguments
|
||||
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
|
||||
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
|
||||
fi
|
||||
# Now convert the arguments - kludge to limit ourselves to /bin/sh
|
||||
i=0
|
||||
for arg in "$@" ; do
|
||||
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
|
||||
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
|
||||
|
||||
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
|
||||
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
|
||||
else
|
||||
eval `echo args$i`="\"$arg\""
|
||||
fi
|
||||
i=$((i+1))
|
||||
done
|
||||
case $i in
|
||||
(0) set -- ;;
|
||||
(1) set -- "$args0" ;;
|
||||
(2) set -- "$args0" "$args1" ;;
|
||||
(3) set -- "$args0" "$args1" "$args2" ;;
|
||||
(4) set -- "$args0" "$args1" "$args2" "$args3" ;;
|
||||
(5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
|
||||
(6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
|
||||
(7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
|
||||
(8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
|
||||
(9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
|
||||
esac
|
||||
fi
|
||||
|
||||
# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
|
||||
function splitJvmOpts() {
|
||||
JVM_OPTS=("$@")
|
||||
}
|
||||
eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
|
||||
JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
|
||||
|
||||
# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
|
||||
if [[ "$(uname)" == "Darwin" ]] && [[ "$HOME" == "$PWD" ]]; then
|
||||
cd "$(dirname "$0")"
|
||||
fi
|
||||
|
||||
exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
|
84
gradlew.bat
vendored
Normal file
84
gradlew.bat
vendored
Normal file
|
@ -0,0 +1,84 @@
|
|||
@if "%DEBUG%" == "" @echo off
|
||||
@rem ##########################################################################
|
||||
@rem
|
||||
@rem Gradle startup script for Windows
|
||||
@rem
|
||||
@rem ##########################################################################
|
||||
|
||||
@rem Set local scope for the variables with windows NT shell
|
||||
if "%OS%"=="Windows_NT" setlocal
|
||||
|
||||
set DIRNAME=%~dp0
|
||||
if "%DIRNAME%" == "" set DIRNAME=.
|
||||
set APP_BASE_NAME=%~n0
|
||||
set APP_HOME=%DIRNAME%
|
||||
|
||||
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||
set DEFAULT_JVM_OPTS=
|
||||
|
||||
@rem Find java.exe
|
||||
if defined JAVA_HOME goto findJavaFromJavaHome
|
||||
|
||||
set JAVA_EXE=java.exe
|
||||
%JAVA_EXE% -version >NUL 2>&1
|
||||
if "%ERRORLEVEL%" == "0" goto init
|
||||
|
||||
echo.
|
||||
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||
echo.
|
||||
echo Please set the JAVA_HOME variable in your environment to match the
|
||||
echo location of your Java installation.
|
||||
|
||||
goto fail
|
||||
|
||||
:findJavaFromJavaHome
|
||||
set JAVA_HOME=%JAVA_HOME:"=%
|
||||
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
||||
|
||||
if exist "%JAVA_EXE%" goto init
|
||||
|
||||
echo.
|
||||
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
||||
echo.
|
||||
echo Please set the JAVA_HOME variable in your environment to match the
|
||||
echo location of your Java installation.
|
||||
|
||||
goto fail
|
||||
|
||||
:init
|
||||
@rem Get command-line arguments, handling Windows variants
|
||||
|
||||
if not "%OS%" == "Windows_NT" goto win9xME_args
|
||||
|
||||
:win9xME_args
|
||||
@rem Slurp the command line arguments.
|
||||
set CMD_LINE_ARGS=
|
||||
set _SKIP=2
|
||||
|
||||
:win9xME_args_slurp
|
||||
if "x%~1" == "x" goto execute
|
||||
|
||||
set CMD_LINE_ARGS=%*
|
||||
|
||||
:execute
|
||||
@rem Setup the command line
|
||||
|
||||
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
||||
|
||||
@rem Execute Gradle
|
||||
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
|
||||
|
||||
:end
|
||||
@rem End local scope for the variables with windows NT shell
|
||||
if "%ERRORLEVEL%"=="0" goto mainEnd
|
||||
|
||||
:fail
|
||||
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
||||
rem the _cmd.exe /c_ return code!
|
||||
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
|
||||
exit /b 1
|
||||
|
||||
:mainEnd
|
||||
if "%OS%"=="Windows_NT" endlocal
|
||||
|
||||
:omega
|
1
settings.gradle
Normal file
1
settings.gradle
Normal file
|
@ -0,0 +1 @@
|
|||
rootProject.name = 'bibliographic-character-sets'
|
160
src/main/java/org/xbib/charset/ASCII.java
Normal file
160
src/main/java/org/xbib/charset/ASCII.java
Normal file
|
@ -0,0 +1,160 @@
|
|||
/*
|
||||
* Licensed to Jörg Prante and xbib under one or more contributor
|
||||
* license agreements. See the NOTICE.txt file distributed with this work
|
||||
* for additional information regarding copyright ownership.
|
||||
*
|
||||
* Copyright (C) 2016 Jörg Prante and xbib
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program; if not, see http://www.gnu.org/licenses
|
||||
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* The interactive user interfaces in modified source and object code
|
||||
* versions of this program must display Appropriate Legal Notices,
|
||||
* as required under Section 5 of the GNU Affero General Public License.
|
||||
*
|
||||
* In accordance with Section 7(b) of the GNU Affero General Public
|
||||
* License, these Appropriate Legal Notices must retain the display of the
|
||||
* "Powered by xbib" logo. If the display of the logo is not reasonably
|
||||
* feasible for technical reasons, the Appropriate Legal Notices must display
|
||||
* the words "Powered by xbib".
|
||||
*/
|
||||
|
||||
/*
|
||||
Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
GNU Classpath is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GNU Classpath is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GNU Classpath; see the file COPYING. If not, write to the
|
||||
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA.
|
||||
|
||||
Linking this library statically or dynamically with other modules is
|
||||
making a combined work based on this library. Thus, the terms and
|
||||
conditions of the GNU General Public License cover the whole
|
||||
combination.
|
||||
|
||||
As a special exception, the copyright holders of this library give you
|
||||
permission to link this library with independent modules to produce an
|
||||
executable, regardless of the license terms of these independent
|
||||
modules, and to copy and distribute the resulting executable under
|
||||
terms of your choice, provided that you also meet, for each linked
|
||||
independent module, the terms and conditions of the license of that
|
||||
module. An independent module is a module which is not derived from
|
||||
or based on this library. If you modify this library, you may extend
|
||||
this exception to your version of the library, but you are not
|
||||
obligated to do so. If you do not wish to do so, delete this
|
||||
exception statement from your version.
|
||||
*/
|
||||
|
||||
package org.xbib.charset;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
|
||||
/**
|
||||
* US-ASCII charset.
|
||||
*/
|
||||
final class ASCII extends Charset {
|
||||
ASCII() {
|
||||
/*
|
||||
* Canonical charset name chosen according to:
|
||||
* http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
|
||||
*/
|
||||
super("US-ASCII", new String[]{
|
||||
/*
|
||||
* These names are provided by
|
||||
* http://www.iana.org/assignments/character-sets
|
||||
*/
|
||||
"iso-ir-6", "ANSI_X3.4-1986", "ISO_646.irv:1991", "ASCII", "ISO646-US", "ASCII", "us",
|
||||
"IBM367", "cp367", "csASCII",
|
||||
/*
|
||||
* These names are provided by
|
||||
* http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
|
||||
*/
|
||||
"ANSI_X3.4-1968", "iso_646.irv:1983", "ascii7", "646", "windows-20127"});
|
||||
}
|
||||
|
||||
public boolean contains(Charset cs) {
|
||||
return cs instanceof ASCII;
|
||||
}
|
||||
|
||||
public CharsetDecoder newDecoder() {
|
||||
return new Decoder(this);
|
||||
}
|
||||
|
||||
public CharsetEncoder newEncoder() {
|
||||
return new Encoder(this);
|
||||
}
|
||||
|
||||
private static final class Decoder extends CharsetDecoder {
|
||||
Decoder(Charset cs) {
|
||||
super(cs, 1.0f, 1.0f);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
|
||||
while (in.hasRemaining()) {
|
||||
byte b = in.get();
|
||||
if (b < 0) {
|
||||
in.position(in.position() - 1);
|
||||
return CoderResult.malformedForLength(1);
|
||||
}
|
||||
if (!out.hasRemaining()) {
|
||||
in.position(in.position() - 1);
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
out.put((char) b);
|
||||
}
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class Encoder extends CharsetEncoder {
|
||||
Encoder(Charset cs) {
|
||||
super(cs, 1.0f, 1.0f);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
|
||||
while (in.hasRemaining()) {
|
||||
char c = in.get();
|
||||
if (c > Byte.MAX_VALUE) {
|
||||
in.position(in.position() - 1);
|
||||
return CoderResult.unmappableForLength(1);
|
||||
}
|
||||
if (!out.hasRemaining()) {
|
||||
in.position(in.position() - 1);
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
out.put((byte) c);
|
||||
}
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
}
|
||||
}
|
280
src/main/java/org/xbib/charset/AnselCharset.java
Normal file
280
src/main/java/org/xbib/charset/AnselCharset.java
Normal file
|
@ -0,0 +1,280 @@
|
|||
/*
|
||||
* Licensed to Jörg Prante and xbib under one or more contributor
|
||||
* license agreements. See the NOTICE.txt file distributed with this work
|
||||
* for additional information regarding copyright ownership.
|
||||
*
|
||||
* Copyright (C) 2016 Jörg Prante and xbib
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program; if not, see http://www.gnu.org/licenses
|
||||
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* The interactive user interfaces in modified source and object code
|
||||
* versions of this program must display Appropriate Legal Notices,
|
||||
* as required under Section 5 of the GNU Affero General Public License.
|
||||
*
|
||||
* In accordance with Section 7(b) of the GNU Affero General Public
|
||||
* License, these Appropriate Legal Notices must retain the display of the
|
||||
* "Powered by xbib" logo. If the display of the logo is not reasonably
|
||||
* feasible for technical reasons, the Appropriate Legal Notices must display
|
||||
* the words "Powered by xbib".
|
||||
*/
|
||||
package org.xbib.charset;
|
||||
|
||||
import java.io.CharArrayWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
import javax.xml.stream.XMLStreamException;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class AnselCharset extends Charset {
|
||||
|
||||
private static final Map<String, AnselCodeTableParser.CharacterSet> characterSetMap;
|
||||
|
||||
static {
|
||||
characterSetMap = new LinkedHashMap<>();
|
||||
ClassLoader cl = Thread.currentThread().getContextClassLoader();
|
||||
try (InputStream inputStream = cl.getResource("org/xbib/charset/codetables.xml").openStream()) {
|
||||
AnselCodeTableParser anselCodeTableParser = new AnselCodeTableParser(inputStream);
|
||||
for (AnselCodeTableParser.CodeTable codeTable : anselCodeTableParser.getCodeTables()) {
|
||||
for (AnselCodeTableParser.CharacterSet characterSet : codeTable.getCharacterSets()) {
|
||||
characterSetMap.put(characterSet.getName(), characterSet);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
private Charset encodeCharset;
|
||||
|
||||
public AnselCharset() throws XMLStreamException {
|
||||
super("ANSEL", BibliographicCharsetProvider.aliasesFor("ANSEL"));
|
||||
this.encodeCharset = StandardCharsets.UTF_8;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(Charset charset) {
|
||||
return charset instanceof AnselCharset;
|
||||
}
|
||||
|
||||
public CharsetEncoder newEncoder() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public CharsetDecoder newDecoder() {
|
||||
return new Decoder(this, encodeCharset.newDecoder());
|
||||
}
|
||||
|
||||
private static class Decoder extends CharsetDecoder {
|
||||
|
||||
String g0;
|
||||
String g1;
|
||||
|
||||
Decoder(Charset cs, CharsetDecoder baseDecoder) {
|
||||
super(cs, baseDecoder.averageCharsPerByte(), baseDecoder.maxCharsPerByte());
|
||||
}
|
||||
|
||||
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
|
||||
g0 = "Basic Latin (ASCII)";
|
||||
g1 = "Extended Latin (ANSEL)";
|
||||
CharArrayWriter w = new CharArrayWriter();
|
||||
CharArrayWriter diacritics = new CharArrayWriter();
|
||||
int pos = in.position();
|
||||
while (in.hasRemaining()) {
|
||||
byte b = in.get();
|
||||
char oldChar = (char) (b & 0xFF);
|
||||
if (oldChar == '\u001b') {
|
||||
handleEscapeSequence(in);
|
||||
if (in.hasRemaining()) {
|
||||
b = in.get();
|
||||
oldChar = (char) (b & 0xFF);
|
||||
} else {
|
||||
// premature end of escape sequence, no data following
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
}
|
||||
AnselCodeTableParser.CharacterSet characterSet = isG0(oldChar) ? characterSetMap.get(g0) :
|
||||
isG1(oldChar) ? characterSetMap.get(g1) : null;
|
||||
int len = characterSet != null ? characterSet.getLength() : 1;
|
||||
String str = len == 1 ? "" + oldChar : "" + oldChar + (char) (in.get() & 0xFF) + (char) (in.get() & 0xFF);
|
||||
AnselCodeTableParser.Code code = characterSet != null ? characterSet.getMarc().get(str) : null;
|
||||
char ch = code != null ? code.getUcs() : oldChar;
|
||||
if (ch == '\u0000') {
|
||||
// FB, EC - see http://memory.loc.gov/diglib/codetables/45.html#Note1 and http://memory.loc.gov/diglib/codetables/45.html#Note2
|
||||
continue;
|
||||
}
|
||||
boolean isDiacritic = code != null ? isDiacritic(oldChar) || code.isCombining() : isDiacritic(oldChar);
|
||||
if (isDiacritic) {
|
||||
diacritics.write(ch);
|
||||
} else {
|
||||
w.write(ch);
|
||||
// diacritics must be appended in Unicode, but are prepended in MARC-8 / Z39.47
|
||||
if (diacritics.toCharArray().length > 0) {
|
||||
try {
|
||||
w.write(diacritics.toCharArray());
|
||||
} catch (IOException e) {
|
||||
// dummy
|
||||
w.flush();
|
||||
}
|
||||
diacritics = new CharArrayWriter();
|
||||
}
|
||||
}
|
||||
}
|
||||
for (char ch : w.toCharArray()) {
|
||||
if (!out.hasRemaining()) {
|
||||
in.position(pos - 1);
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
out.put(ch);
|
||||
}
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
|
||||
private boolean isDiacritic(char ch) {
|
||||
return ch >= 0xE0 && ch <= 0xFF;
|
||||
}
|
||||
|
||||
private boolean isG0(char ch) {
|
||||
return ch >= 0x21 && ch <= 0x7E;
|
||||
}
|
||||
|
||||
private boolean isG1(char ch) {
|
||||
return ch >= 0xA1 && ch <= 0xFE;
|
||||
}
|
||||
|
||||
/**
|
||||
* ANSI X3.41 or ISO 2022 escape technique.
|
||||
* See procedures in IS0 2375-1985.
|
||||
*
|
||||
* @param in byte buffer
|
||||
*/
|
||||
private void handleEscapeSequence(ByteBuffer in) {
|
||||
byte oneByte = in.get();
|
||||
switch (oneByte) {
|
||||
case 's':
|
||||
g0 = "Basic Latin (ASCII)";
|
||||
break;
|
||||
case 'g':
|
||||
g0 = "Greek Symbols";
|
||||
break;
|
||||
case 'b':
|
||||
g0 = "Subscripts";
|
||||
break;
|
||||
case 'p':
|
||||
g0 = "Superscripts";
|
||||
break;
|
||||
case '(':
|
||||
case ',':
|
||||
oneByte = in.get();
|
||||
switch (oneByte) {
|
||||
case '1':
|
||||
g0 = "Chinese, Japanese, Korean (EACC)";
|
||||
break;
|
||||
case '2':
|
||||
g0 = "Basic Hebrew";
|
||||
break;
|
||||
case '3':
|
||||
g0 = "Basic Arabic";
|
||||
break;
|
||||
case '4':
|
||||
g0 = "Extended Arabic";
|
||||
break;
|
||||
case 'B':
|
||||
g0 = "Basic Latin (ASCII)";
|
||||
break;
|
||||
case 'N':
|
||||
g0 = "Basic Cyrillic";
|
||||
break;
|
||||
case 'Q':
|
||||
g0 = "Extended Cyrillic";
|
||||
break;
|
||||
case 'S':
|
||||
g0 = "Basic Greek";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case ')':
|
||||
case '-':
|
||||
oneByte = in.get();
|
||||
switch (oneByte) {
|
||||
case '1':
|
||||
g1 = "Chinese, Japanese, Korean (EACC)";
|
||||
break;
|
||||
case '2':
|
||||
g1 = "Basic Hebrew";
|
||||
break;
|
||||
case '3':
|
||||
g1 = "Basic Arabic";
|
||||
break;
|
||||
case '4':
|
||||
g1 = "Extended Arabic";
|
||||
break;
|
||||
case 'B':
|
||||
g1 = "Basic Latin (ASCII)";
|
||||
break;
|
||||
case 'N':
|
||||
g1 = "Basic Cyrillic";
|
||||
break;
|
||||
case 'Q':
|
||||
g1 = "Extended Cyrillic";
|
||||
break;
|
||||
case 'S':
|
||||
g1 = "Basic Greek";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '$':
|
||||
oneByte = in.get();
|
||||
switch (oneByte) {
|
||||
case '1':
|
||||
g0 = "Chinese, Japanese, Korean (EACC)";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '!':
|
||||
oneByte = in.get();
|
||||
switch (oneByte) {
|
||||
case 'E':
|
||||
g0 = "Extended Latin (ANSEL)";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
280
src/main/java/org/xbib/charset/AnselCodeTableParser.java
Normal file
280
src/main/java/org/xbib/charset/AnselCodeTableParser.java
Normal file
|
@ -0,0 +1,280 @@
|
|||
/*
|
||||
* Licensed to Jörg Prante and xbib under one or more contributor
|
||||
* license agreements. See the NOTICE.txt file distributed with this work
|
||||
* for additional information regarding copyright ownership.
|
||||
*
|
||||
* Copyright (C) 2016 Jörg Prante and xbib
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program; if not, see http://www.gnu.org/licenses
|
||||
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* The interactive user interfaces in modified source and object code
|
||||
* versions of this program must display Appropriate Legal Notices,
|
||||
* as required under Section 5 of the GNU Affero General Public License.
|
||||
*
|
||||
* In accordance with Section 7(b) of the GNU Affero General Public
|
||||
* License, these Appropriate Legal Notices must retain the display of the
|
||||
* "Powered by xbib" logo. If the display of the logo is not reasonably
|
||||
* feasible for technical reasons, the Appropriate Legal Notices must display
|
||||
* the words "Powered by xbib".
|
||||
*/
|
||||
package org.xbib.charset;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import javax.xml.namespace.QName;
|
||||
import javax.xml.stream.XMLEventReader;
|
||||
import javax.xml.stream.XMLInputFactory;
|
||||
import javax.xml.stream.XMLStreamException;
|
||||
import javax.xml.stream.events.Attribute;
|
||||
import javax.xml.stream.events.Characters;
|
||||
import javax.xml.stream.events.EndElement;
|
||||
import javax.xml.stream.events.StartElement;
|
||||
import javax.xml.stream.events.XMLEvent;
|
||||
|
||||
class AnselCodeTableParser {
|
||||
|
||||
private final List<CodeTable> codeTables;
|
||||
|
||||
private CodeTable codeTable;
|
||||
|
||||
private CharacterSet characterSet;
|
||||
|
||||
private Code code;
|
||||
|
||||
private StringBuilder content;
|
||||
|
||||
AnselCodeTableParser(InputStream inputStream) {
|
||||
List<CodeTable> codeTables;
|
||||
try {
|
||||
codeTables = createCodeTables(inputStream);
|
||||
} catch (XMLStreamException e) {
|
||||
codeTables = null;
|
||||
// ignore
|
||||
}
|
||||
this.codeTables = codeTables;
|
||||
}
|
||||
|
||||
public List<CodeTable> getCodeTables() {
|
||||
return codeTables;
|
||||
}
|
||||
|
||||
private List<CodeTable> createCodeTables(InputStream inputStream) throws XMLStreamException {
|
||||
List<CodeTable> codetables = new LinkedList<>();
|
||||
XMLInputFactory factory = XMLInputFactory.newInstance();
|
||||
XMLEventReader xmlReader = factory.createXMLEventReader(inputStream);
|
||||
while (xmlReader.hasNext()) {
|
||||
processEvent(codetables, xmlReader.peek());
|
||||
xmlReader.nextEvent();
|
||||
}
|
||||
return codetables;
|
||||
}
|
||||
|
||||
private void processEvent(List<CodeTable> codetables, XMLEvent event) {
|
||||
if (event.isStartDocument()) {
|
||||
this.code = new Code();
|
||||
this.content = new StringBuilder();
|
||||
}
|
||||
if (event.isStartElement()) {
|
||||
StartElement element = (StartElement) event;
|
||||
String name = element.getName().getLocalPart();
|
||||
switch (name) {
|
||||
case "codeTables": {
|
||||
// ignore
|
||||
break;
|
||||
}
|
||||
case "codeTable": {
|
||||
this.codeTable = new CodeTable();
|
||||
break;
|
||||
}
|
||||
case "characterSet": {
|
||||
this.characterSet = new CharacterSet();
|
||||
@SuppressWarnings("unchecked")
|
||||
Iterator<Attribute> it = element.getAttributes();
|
||||
while (it.hasNext()) {
|
||||
Attribute attr = it.next();
|
||||
QName attributeName = attr.getName();
|
||||
String attributeLocalName = attributeName.getLocalPart();
|
||||
String attributeValue = attr.getValue();
|
||||
if ("name".equals(attributeLocalName)) {
|
||||
characterSet.name = attributeValue;
|
||||
} else if ("isoCode".equals(attributeLocalName)) {
|
||||
characterSet.isoCode = attributeValue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "code": {
|
||||
code = new Code();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else if (event.isCharacters()) {
|
||||
Characters c = (Characters) event;
|
||||
if (!c.isIgnorableWhiteSpace()) {
|
||||
// character events may come more than once (e.g. because of XML entities like ")
|
||||
// concatenate with values that might exist
|
||||
content.append(c.getData());
|
||||
}
|
||||
} else if (event.isEndElement()) {
|
||||
EndElement element = (EndElement) |