initial import

2016-08-18 20:39:56 +02:00 · 2016-08-18 20:39:56 +02:00 · 931e2b9cda
commit 931e2b9cda
41 changed files with 104466 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,15 @@
+/data
+/work
+/logs
+/.idea
+/target
+.DS_Store
+*.iml
+/.settings
+/.classpath
+/.project
+/.gradle
+/build
+/plugins
+/sessions
+*~
--- a/.travis.yml
+++ b/.travis.yml
@ -0,0 +1,8 @@
+sudo: false
+language: java
+jdk:
+  - oraclejdk8
+
+cache:
+  directories:
+    - $HOME/.m2
--- a/CREDITS.txt
+++ b/CREDITS.txt
@ -0,0 +1,39 @@
+These bibliographic characterssets are collected and improved
+by Jörg Prante <joergprante@gmail.com>
+
+Thanks to:
+
+Library of Congress
+The Library of Congress provides an ANSEL code table file
+
+https://www.loc.gov/marc/specifications/codetables.xml
+
+at https://www.loc.gov/marc/specifications/specchartables.html for making the
+character set implementation of ANSEL/Z39.47 possible, including east
+asian code characters (EACC).
+
+US-ASCII
+The US-ASCII re-implementation was taken from the GNU classpath project.
+It is provided as a demonstration of a simple single-byte character set.
+The original code was licensed by the GNU Public License 2.1 (GPL)
+
+Simple ANSEL
+The ANSEL charset implementation by Piotr Andzel http://anselcharset.sourceforge.net/
+has been included as "simple ANSEL". The original code was licensed under Less
+GNU Public License 3 (LGPL 3.0)
+
+MAB
+The MAB Charset Java implementation was developed by Jürgen Kett of
+Deutsche Nationalbibliothek (DNB) in 2004 and was licensed by GNU Public License 2 (GPL)
+
+MAB-Diskette
+This is a CP850 variant which could only be implemented by the help of
+Thomas Berger http://www.gymel.com/charsets/MAB-Diskette.html
+
+PicaCharset
+An alternative PICA character set implementation of Deutsche Nationalbibliothek (DNB)
+The original licence was GNU Public License 2 (GPL).
+
+ISO 5428:1984, "Greek alphabet coded character set for bibliographic information interchange",
+has been implemented by the help of https://en.wikipedia.org/wiki/ISO_5428
+
--- a/README.md
+++ b/README.md
@ -0,0 +1,76 @@
+# Bibliographic character sets
+
+This is a collection of bibliographic character sets implemented in 
+Java.
+
+These character sets have not been included in the standard Java 
+distribution. Most of the character sets predate Unicode and are 
+dormant now but are still in active use in library application 
+system software.
+
+The reason to provide these character sets is to assist the public 
+in migrating library data to Unicode, and UTF-8, respectively.
+
+You can include this jar in the classpath, the Java CharsetProvider and
+ServiceLoader API will then make the character sets available, 
+e.g. by `Charset.forName(name)`
+
+This is free software. 
+Please follow the AGPL license, which requires to offer the source code
+of your project to the public if you make modifications to this program.
+
+All contributions and pull requests are welcome.
+
+If you have questions or find issues, pleas post them at
+https://github.com/xbib/bibliographic-character-sets/issues
+
+## List of character sets included
+
+### ANSEL "ANSI/NISO Z39.47-1993 (R2003) Extended Latin Alphabet Coded Character Set for Bibliographic Use (ANSEL)"
+
+This implementation can only decode from ANSEL / Z39.47.
+
+Included are the following sets specified by the Library of Congress at
+https://www.loc.gov/marc/specifications/specchartables.html 
+
+Basic Latin (ASCII), Extended Latin (ANSEL),  Greek Symbols,
+Subscripts, Superscripts, Basic Hebrew, Basic Cyrillic,
+Extended Cyrillic, Basic Arabic, Extended Arabic,
+Basic Greek, Chinese, Japanese, Korean (EACC)
+ 
+Usage:
+ 
+     Charset.forName("ANSEL")
+ 
+### ISO 5426 "Extension of the Latin alphabet coded character set for bibliographic information interchange"
+
+Usage:
+ 
+     Charset.forName("x-MAB")
+
+### ISO 5428 "Greek alphabet coded character set for bibliographic information interchange"
+
+### Pica (a variant of the INTERMARC character set, a 1979 french/danish adoption of USMARC/UKMARC)
+
+### MAB-Diskette (a variant of CP850 character set)
+
+### US-ASCII (re-implementation for demonstration purpose, disabled by default)
+
+See also the CREDITS.txt for acknowledgements.
+
+# License
+
+Copyright (C) 2016 Jörg Prante and the xbib organization
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
--- a/build.gradle
+++ b/build.gradle
@ -0,0 +1,67 @@
+
+println "Host: " + java.net.InetAddress.getLocalHost()
+println "Gradle: " + gradle.gradleVersion + " JVM: " + org.gradle.internal.jvm.Jvm.current() + " Groovy: " + GroovySystem.getVersion()
+println "Build: group: '${project.group}', name: '${project.name}', version: '${project.version}'"
+
+apply plugin: 'java'
+apply plugin: 'maven'
+apply plugin: 'signing'
+apply plugin: 'findbugs'
+apply plugin: 'pmd'
+apply plugin: 'checkstyle'
+
+repositories {
+    mavenLocal()
+    mavenCentral()
+    jcenter()
+    maven {
+        url "http://xbib.org/repository"
+    }
+}
+
+configurations {
+    wagon
+}
+
+dependencies {
+    testCompile "org.apache.logging.log4j:log4j-core:2.5"
+    testCompile('junit:junit:4.12')
+    wagon 'org.apache.maven.wagon:wagon-ssh-external:2.10'
+}
+
+sourceCompatibility = JavaVersion.VERSION_1_8
+targetCompatibility = JavaVersion.VERSION_1_8
+
+[compileJava, compileTestJava]*.options*.encoding = 'UTF-8'
+tasks.withType(JavaCompile) {
+    options.compilerArgs << "-Xlint:deprecation,unchecked"
+}
+test {
+    testLogging {
+        showStandardStreams = false
+        exceptionFormat = 'full'
+    }
+}
+tasks.withType(FindBugs) {
+    ignoreFailures = true
+    reports {
+        xml.enabled = false
+        html.enabled = true
+    }
+}
+task sourcesJar(type: Jar, dependsOn: classes) {
+    classifier 'sources'
+    from sourceSets.main.allSource
+}
+task javadocJar(type: Jar, dependsOn: javadoc) {
+    classifier 'javadoc'
+}
+artifacts {
+    archives sourcesJar, javadocJar
+}
+if (project.hasProperty('signing.keyId')) {
+    signing {
+        sign configurations.archives
+    }
+}
+apply from: 'gradle/publish.gradle'
--- a/config/checkstyle/checkstyle.xml
+++ b/config/checkstyle/checkstyle.xml
@ -0,0 +1,323 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE module PUBLIC
+        "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
+        "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
+
+<!-- This is a checkstyle configuration file. For descriptions of
+what the following rules do, please see the checkstyle configuration
+page at http://checkstyle.sourceforge.net/config.html -->
+
+<module name="Checker">
+
+    <module name="FileTabCharacter">
+        <!-- Checks that there are no tab characters in the file.
+        -->
+    </module>
+
+    <module name="NewlineAtEndOfFile">
+        <property name="lineSeparator" value="lf"/>
+    </module>
+
+    <module name="RegexpSingleline">
+        <!-- Checks that FIXME is not used in comments.  TODO is preferred.
+        -->
+        <property name="format" value="((//.*)|(\*.*))FIXME" />
+        <property name="message" value='TODO is preferred to FIXME.  e.g. "TODO(johndoe): Refactor when v2 is released."' />
+    </module>
+
+    <module name="RegexpSingleline">
+        <!-- Checks that TODOs are named.  (Actually, just that they are followed
+             by an open paren.)
+        -->
+        <property name="format" value="((//.*)|(\*.*))TODO[^(]" />
+        <property name="message" value='All TODOs should be named.  e.g. "TODO(johndoe): Refactor when v2 is released."' />
+    </module>
+
+    <module name="JavadocPackage">
+        <!-- Checks that each Java package has a Javadoc file used for commenting.
+          Only allows a package-info.java, not package.html. -->
+    </module>
+
+    <!-- All Java AST specific tests live under TreeWalker module. -->
+    <module name="TreeWalker">
+
+        <!--
+
+        IMPORT CHECKS
+
+        -->
+
+        <module name="RedundantImport">
+            <!-- Checks for redundant import statements. -->
+            <property name="severity" value="error"/>
+        </module>
+
+        <module name="ImportOrder">
+            <!-- Checks for out of order import statements. -->
+
+            <property name="severity" value="warning"/>
+            <property name="groups" value="com.google,android,junit,net,org,java,javax"/>
+            <!-- This ensures that static imports go first. -->
+            <property name="option" value="top"/>
+            <property name="tokens" value="STATIC_IMPORT, IMPORT"/>
+        </module>
+
+        <!--
+
+        JAVADOC CHECKS
+
+        -->
+
+        <!-- Checks for Javadoc comments.                     -->
+        <!-- See http://checkstyle.sf.net/config_javadoc.html -->
+        <module name="JavadocMethod">
+            <property name="scope" value="protected"/>
+            <property name="severity" value="warning"/>
+            <property name="allowMissingJavadoc" value="true"/>
+            <property name="allowMissingParamTags" value="true"/>
+            <property name="allowMissingReturnTag" value="true"/>
+            <property name="allowMissingThrowsTags" value="true"/>
+            <property name="allowThrowsTagsForSubclasses" value="true"/>
+            <property name="allowUndeclaredRTE" value="true"/>
+        </module>
+
+        <module name="JavadocType">
+            <property name="scope" value="protected"/>
+            <property name="severity" value="error"/>
+        </module>
+
+        <module name="JavadocStyle">
+            <property name="severity" value="warning"/>
+        </module>
+
+        <!--
+
+        NAMING CHECKS
+
+        -->
+
+        <!-- Item 38 - Adhere to generally accepted naming conventions -->
+
+        <module name="PackageName">
+            <!-- Validates identifiers for package names against the
+              supplied expression. -->
+            <!-- Here the default checkstyle rule restricts package name parts to
+              seven characters, this is not in line with common practice at Google.
+            -->
+            <property name="format" value="^[a-z]+(\.[a-z][a-z0-9]{1,})*$"/>
+            <property name="severity" value="warning"/>
+        </module>
+
+        <module name="TypeNameCheck">
+            <!-- Validates static, final fields against the
+            expression "^[A-Z][a-zA-Z0-9]*$". -->
+            <metadata name="altname" value="TypeName"/>
+            <property name="severity" value="warning"/>
+        </module>
+
+        <module name="ConstantNameCheck">
+            <!-- Validates non-private, static, final fields against the supplied
+            public/package final fields "^[A-Z][A-Z0-9]*(_[A-Z0-9]+)*$". -->
+            <metadata name="altname" value="ConstantName"/>
+            <property name="applyToPublic" value="true"/>
+            <property name="applyToProtected" value="true"/>
+            <property name="applyToPackage" value="true"/>
+            <property name="applyToPrivate" value="false"/>
+            <property name="format" value="^([A-Z][A-Z0-9]*(_[A-Z0-9]+)*|FLAG_.*)$"/>
+            <message key="name.invalidPattern"
+                     value="Variable ''{0}'' should be in ALL_CAPS (if it is a constant) or be private (otherwise)."/>
+            <property name="severity" value="warning"/>
+        </module>
+
+        <module name="StaticVariableNameCheck">
+            <!-- Validates static, non-final fields against the supplied
+            expression "^[a-z][a-zA-Z0-9]*_?$". -->
+            <metadata name="altname" value="StaticVariableName"/>
+            <property name="applyToPublic" value="true"/>
+            <property name="applyToProtected" value="true"/>
+            <property name="applyToPackage" value="true"/>
+            <property name="applyToPrivate" value="true"/>
+            <property name="format" value="^[a-z][a-zA-Z0-9]*_?$"/>
+            <property name="severity" value="warning"/>
+        </module>
+
+        <module name="MemberNameCheck">
+            <!-- Validates non-static members against the supplied expression. -->
+            <metadata name="altname" value="MemberName"/>
+            <property name="applyToPublic" value="true"/>
+            <property name="applyToProtected" value="true"/>
+            <property name="applyToPackage" value="true"/>
+            <property name="applyToPrivate" value="true"/>
+            <property name="format" value="^[a-z][a-zA-Z0-9]*$"/>
+            <property name="severity" value="warning"/>
+        </module>
+
+        <module name="MethodNameCheck">
+            <!-- Validates identifiers for method names. -->
+            <metadata name="altname" value="MethodName"/>
+            <property name="format" value="^[a-z][a-zA-Z0-9]*(_[a-zA-Z0-9]+)*$"/>
+            <property name="severity" value="warning"/>
+        </module>
+
+        <module name="ParameterName">
+            <!-- Validates identifiers for method parameters against the
+              expression "^[a-z][a-zA-Z0-9]*$". -->
+            <property name="severity" value="warning"/>
+        </module>
+
+        <module name="LocalFinalVariableName">
+            <!-- Validates identifiers for local final variables against the
+              expression "^[a-z][a-zA-Z0-9]*$". -->
+            <property name="severity" value="warning"/>
+        </module>
+
+        <module name="LocalVariableName">
+            <!-- Validates identifiers for local variables against the
+              expression "^[a-z][a-zA-Z0-9]*$". -->
+            <property name="severity" value="warning"/>
+        </module>
+
+
+        <!--
+
+        LENGTH and CODING CHECKS
+
+        -->
+
+        <module name="LineLength">
+            <!-- Checks if a line is too long. -->
+            <property name="max" value="${com.puppycrawl.tools.checkstyle.checks.sizes.LineLength.max}" default="128"/>
+            <property name="severity" value="error"/>
+
+            <!--
+              The default ignore pattern exempts the following elements:
+                - import statements
+                - long URLs inside comments
+            -->
+
+            <property name="ignorePattern"
+                      value="${com.puppycrawl.tools.checkstyle.checks.sizes.LineLength.ignorePattern}"
+                      default="^(package .*;\s*)|(import .*;\s*)|( *(\*|//).*https?://.*)$"/>
+        </module>
+
+        <module name="LeftCurly">
+            <!-- Checks for placement of the left curly brace ('{'). -->
+            <property name="severity" value="warning"/>
+        </module>
+
+        <module name="RightCurly">
+            <!-- Checks right curlies on CATCH, ELSE, and TRY blocks are on
+            the same line. e.g., the following example is fine:
+            <pre>
+              if {
+                ...
+              } else
+            </pre>
+            -->
+            <!-- This next example is not fine:
+            <pre>
+              if {
+                ...
+              }
+              else
+            </pre>
+            -->
+            <property name="option" value="same"/>
+            <property name="severity" value="warning"/>
+        </module>
+
+        <!-- Checks for braces around if and else blocks -->
+        <module name="NeedBraces">
+            <property name="severity" value="warning"/>
+            <property name="tokens" value="LITERAL_IF, LITERAL_ELSE, LITERAL_FOR, LITERAL_WHILE, LITERAL_DO"/>
+        </module>
+
+        <module name="UpperEll">
+            <!-- Checks that long constants are defined with an upper ell.-->
+            <property name="severity" value="error"/>
+        </module>
+
+        <module name="FallThrough">
+            <!-- Warn about falling through to the next case statement.  Similar to
+            javac -Xlint:fallthrough, but the check is suppressed if a single-line comment
+            on the last non-blank line preceding the fallen-into case contains 'fall through' (or
+            some other variants which we don't publicized to promote consistency).
+            -->
+            <property name="reliefPattern"
+                      value="fall through|Fall through|fallthru|Fallthru|falls through|Falls through|fallthrough|Fallthrough|No break|NO break|no break|continue on"/>
+            <property name="severity" value="error"/>
+        </module>
+
+
+        <!--
+
+        MODIFIERS CHECKS
+
+        -->
+
+        <module name="ModifierOrder">
+            <!-- Warn if modifier order is inconsistent with JLS3 8.1.1, 8.3.1, and
+                 8.4.3.  The prescribed order is:
+                 public, protected, private, abstract, static, final, transient, volatile,
+                 synchronized, native, strictfp
+              -->
+        </module>
+
+
+        <!--
+
+        WHITESPACE CHECKS
+
+        -->
+
+        <module name="WhitespaceAround">
+            <!-- Checks that various tokens are surrounded by whitespace.
+                 This includes most binary operators and keywords followed
+                 by regular or curly braces.
+            -->
+            <property name="tokens" value="ASSIGN, BAND, BAND_ASSIGN, BOR,
+        BOR_ASSIGN, BSR, BSR_ASSIGN, BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN,
+        EQUAL, GE, GT, LAND, LE, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
+        LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
+        LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE, LOR, LT, MINUS,
+        MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL, PLUS, PLUS_ASSIGN, QUESTION,
+        SL, SL_ASSIGN, SR_ASSIGN, STAR, STAR_ASSIGN"/>
+            <property name="severity" value="error"/>
+        </module>
+
+        <module name="WhitespaceAfter">
+            <!-- Checks that commas, semicolons and typecasts are followed by
+                 whitespace.
+            -->
+            <property name="tokens" value="COMMA, SEMI, TYPECAST"/>
+        </module>
+
+        <module name="NoWhitespaceAfter">
+            <!-- Checks that there is no whitespace after various unary operators.
+                 Linebreaks are allowed.
+            -->
+            <property name="tokens" value="BNOT, DEC, DOT, INC, LNOT, UNARY_MINUS,
+        UNARY_PLUS"/>
+            <property name="allowLineBreaks" value="true"/>
+            <property name="severity" value="error"/>
+        </module>
+
+        <module name="NoWhitespaceBefore">
+            <!-- Checks that there is no whitespace before various unary operators.
+                 Linebreaks are allowed.
+            -->
+            <property name="tokens" value="SEMI, DOT, POST_DEC, POST_INC"/>
+            <property name="allowLineBreaks" value="true"/>
+            <property name="severity" value="error"/>
+        </module>
+
+        <module name="ParenPad">
+            <!-- Checks that there is no whitespace before close parens or after
+                 open parens.
+            -->
+            <property name="severity" value="warning"/>
+        </module>
+
+    </module>
+</module>
+
--- a/gradle.properties
+++ b/gradle.properties
@ -0,0 +1,9 @@
+group = org.xbib
+version = 1.0.0
+org.gradle.daemon = true
+name = 'bibliographic-character-sets'
+description = 'Bibliographic character sets'
+user = 'xbib'
+scmUrl = 'https://github.com/' + user + '/' + name
+scmConnection = 'scm:git:git://github.com/' + user + '/' + name + '.git'
+scmDeveloperConnection = 'scm:git:git://github.com/' + user + '/' + name + '.git'
--- a/gradle/publish.gradle
+++ b/gradle/publish.gradle
@ -0,0 +1,62 @@
+task xbibUpload(type: Upload) {
+    configuration = configurations.archives
+    uploadDescriptor = true
+    repositories {
+        if (project.hasProperty("xbibUsername")) {
+            mavenDeployer {
+                configuration = configurations.wagon
+                repository(url: uri('scpexe://xbib.org/repository')) {
+                    authentication(userName: xbibUsername, privateKey: xbibPrivateKey)
+                }
+            }
+        }
+    }
+}
+
+task mavenCentralUpload(type: Upload) {
+    configuration = configurations.archives
+    uploadDescriptor = true
+    repositories {
+        if (project.hasProperty('ossrhUsername')) {
+            mavenDeployer {
+                beforeDeployment { MavenDeployment deployment -> signing.signPom(deployment) }
+                repository(url: uri(ossrhReleaseUrl)) {
+                    authentication(userName: ossrhUsername, password: ossrhPassword)
+                }
+                snapshotRepository(url: uri(ossrhSnapshotUrl)) {
+                    authentication(userName: ossrhUsername, password: ossrhPassword)
+                }
+                pom.project {
+                    name name
+                    description description
+                    packaging 'jar'
+                    inceptionYear '2016'
+                    url scmUrl
+                    organization {
+                        name 'xbib'
+                        url 'http://xbib.org'
+                    }
+                    developers {
+                        developer {
+                            id user
+                            name 'Jörg Prante'
+                            email 'joergprante@gmail.com'
+                            url 'https://github.com/jprante'
+                        }
+                    }
+                    scm {
+                        url scmUrl
+                        connection scmConnection
+                        developerConnection scmDeveloperConnection
+                    }
+                    licenses {
+                        license {
+                            name 'Affero GNU Public License Version 3'
+                            url 'http://www.gnu.org/licenses/agpl-3.0.html'
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
--- a/gradle/wrapper/gradle-wrapper.jar
+++ b/gradle/wrapper/gradle-wrapper.jar
--- a/gradle/wrapper/gradle-wrapper.properties
+++ b/gradle/wrapper/gradle-wrapper.properties
@ -0,0 +1,6 @@
+#Thu Aug 18 20:34:33 CEST 2016
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-3.0-bin.zip
--- a/169
+++ b/169
@ -0,0 +1,169 @@
+#!/usr/bin/env bash
+
+##############################################################################
+##
+##  Gradle start up script for UN*X
+##
+##############################################################################
+
+# Attempt to set APP_HOME
+# Resolve links: $0 may be a link
+PRG="$0"
+# Need this for relative symlinks.
+while [ -h "$PRG" ] ; do
+    ls=`ls -ld "$PRG"`
+    link=`expr "$ls" : '.*-> \(.*\)$'`
+    if expr "$link" : '/.*' > /dev/null; then
+        PRG="$link"
+    else
+        PRG=`dirname "$PRG"`"/$link"
+    fi
+done
+SAVED="`pwd`"
+cd "`dirname \"$PRG\"`/" >/dev/null
+APP_HOME="`pwd -P`"
+cd "$SAVED" >/dev/null
+
+APP_NAME="Gradle"
+APP_BASE_NAME=`basename "$0"`
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS=""
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD="maximum"
+
+warn ( ) {
+    echo "$*"
+}
+
+die ( ) {
+    echo
+    echo "$*"
+    echo
+    exit 1
+}
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "`uname`" in
+  CYGWIN* )
+    cygwin=true
+    ;;
+  Darwin* )
+    darwin=true
+    ;;
+  MINGW* )
+    msys=true
+    ;;
+  NONSTOP* )
+    nonstop=true
+    ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+        # IBM's JDK on AIX uses strange locations for the executables
+        JAVACMD="$JAVA_HOME/jre/sh/java"
+    else
+        JAVACMD="$JAVA_HOME/bin/java"
+    fi
+    if [ ! -x "$JAVACMD" ] ; then
+        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+    fi
+else
+    JAVACMD="java"
+    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
+    MAX_FD_LIMIT=`ulimit -H -n`
+    if [ $? -eq 0 ] ; then
+        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
+            MAX_FD="$MAX_FD_LIMIT"
+        fi
+        ulimit -n $MAX_FD
+        if [ $? -ne 0 ] ; then
+            warn "Could not set maximum file descriptor limit: $MAX_FD"
+        fi
+    else
+        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
+    fi
+fi
+
+# For Darwin, add options to specify how the application appears in the dock
+if $darwin; then
+    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
+fi
+
+# For Cygwin, switch paths to Windows format before running java
+if $cygwin ; then
+    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
+    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
+    JAVACMD=`cygpath --unix "$JAVACMD"`
+
+    # We build the pattern for arguments to be converted via cygpath
+    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
+    SEP=""
+    for dir in $ROOTDIRSRAW ; do
+        ROOTDIRS="$ROOTDIRS$SEP$dir"
+        SEP="|"
+    done
+    OURCYGPATTERN="(^($ROOTDIRS))"
+    # Add a user-defined pattern to the cygpath arguments
+    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
+        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
+    fi
+    # Now convert the arguments - kludge to limit ourselves to /bin/sh
+    i=0
+    for arg in "$@" ; do
+        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
+        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option
+
+        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
+            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
+        else
+            eval `echo args$i`="\"$arg\""
+        fi
+        i=$((i+1))
+    done
+    case $i in
+        (0) set -- ;;
+        (1) set -- "$args0" ;;
+        (2) set -- "$args0" "$args1" ;;
+        (3) set -- "$args0" "$args1" "$args2" ;;
+        (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
+        (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
+        (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
+        (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
+        (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
+        (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
+    esac
+fi
+
+# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
+function splitJvmOpts() {
+    JVM_OPTS=("$@")
+}
+eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
+JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
+
+# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
+if [[ "$(uname)" == "Darwin" ]] && [[ "$HOME" == "$PWD" ]]; then
+  cd "$(dirname "$0")"
+fi
+
+exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
--- a/gradlew.bat
+++ b/gradlew.bat
@ -0,0 +1,84 @@
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem  Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS=
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto init
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto init
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:init
+@rem Get command-line arguments, handling Windows variants
+
+if not "%OS%" == "Windows_NT" goto win9xME_args
+
+:win9xME_args
+@rem Slurp the command line arguments.
+set CMD_LINE_ARGS=
+set _SKIP=2
+
+:win9xME_args_slurp
+if "x%~1" == "x" goto execute
+
+set CMD_LINE_ARGS=%*
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
--- a/settings.gradle
+++ b/settings.gradle
@ -0,0 +1 @@
+rootProject.name = 'bibliographic-character-sets'
--- a/src/main/java/org/xbib/charset/ASCII.java
+++ b/src/main/java/org/xbib/charset/ASCII.java
@ -0,0 +1,160 @@
+/*
+ * Licensed to Jörg Prante and xbib under one or more contributor
+ * license agreements. See the NOTICE.txt file distributed with this work
+ * for additional information regarding copyright ownership.
+ *
+ * Copyright (C) 2016 Jörg Prante and xbib
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program; if not, see http://www.gnu.org/licenses
+ * or write to the Free Software Foundation, Inc., 51 Franklin Street,
+ * Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The interactive user interfaces in modified source and object code
+ * versions of this program must display Appropriate Legal Notices,
+ * as required under Section 5 of the GNU Affero General Public License.
+ *
+ * In accordance with Section 7(b) of the GNU Affero General Public
+ * License, these Appropriate Legal Notices must retain the display of the
+ * "Powered by xbib" logo. If the display of the logo is not reasonably
+ * feasible for technical reasons, the Appropriate Legal Notices must display
+ * the words "Powered by xbib".
+ */
+
+/*
+   Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
+
+    This file is part of GNU Classpath.
+
+    GNU Classpath is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2, or (at your option)
+    any later version.
+
+    GNU Classpath is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with GNU Classpath; see the file COPYING.  If not, write to the
+    Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+    02111-1307 USA.
+
+    Linking this library statically or dynamically with other modules is
+    making a combined work based on this library.  Thus, the terms and
+    conditions of the GNU General Public License cover the whole
+    combination.
+
+    As a special exception, the copyright holders of this library give you
+    permission to link this library with independent modules to produce an
+    executable, regardless of the license terms of these independent
+    modules, and to copy and distribute the resulting executable under
+    terms of your choice, provided that you also meet, for each linked
+    independent module, the terms and conditions of the license of that
+    module.  An independent module is a module which is not derived from
+    or based on this library.  If you modify this library, you may extend
+    this exception to your version of the library, but you are not
+    obligated to do so.  If you do not wish to do so, delete this
+    exception statement from your version.
+ */
+
+package org.xbib.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+/**
+ * US-ASCII charset.
+ */
+final class ASCII extends Charset {
+    ASCII() {
+        /*
+         * Canonical charset name chosen according to:
+         * http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
+         */
+        super("US-ASCII", new String[]{
+        /*
+         * These names are provided by
+         * http://www.iana.org/assignments/character-sets
+         */
+                "iso-ir-6", "ANSI_X3.4-1986", "ISO_646.irv:1991", "ASCII", "ISO646-US", "ASCII", "us",
+                "IBM367", "cp367", "csASCII",
+        /*
+         * These names are provided by
+         * http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
+         */
+                "ANSI_X3.4-1968", "iso_646.irv:1983", "ascii7", "646", "windows-20127"});
+    }
+
+    public boolean contains(Charset cs) {
+        return cs instanceof ASCII;
+    }
+
+    public CharsetDecoder newDecoder() {
+        return new Decoder(this);
+    }
+
+    public CharsetEncoder newEncoder() {
+        return new Encoder(this);
+    }
+
+    private static final class Decoder extends CharsetDecoder {
+        Decoder(Charset cs) {
+            super(cs, 1.0f, 1.0f);
+        }
+
+        @Override
+        protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
+            while (in.hasRemaining()) {
+                byte b = in.get();
+                if (b < 0) {
+                    in.position(in.position() - 1);
+                    return CoderResult.malformedForLength(1);
+                }
+                if (!out.hasRemaining()) {
+                    in.position(in.position() - 1);
+                    return CoderResult.OVERFLOW;
+                }
+                out.put((char) b);
+            }
+            return CoderResult.UNDERFLOW;
+        }
+    }
+
+    private static final class Encoder extends CharsetEncoder {
+        Encoder(Charset cs) {
+            super(cs, 1.0f, 1.0f);
+        }
+
+        @Override
+        protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
+            while (in.hasRemaining()) {
+                char c = in.get();
+                if (c > Byte.MAX_VALUE) {
+                    in.position(in.position() - 1);
+                    return CoderResult.unmappableForLength(1);
+                }
+                if (!out.hasRemaining()) {
+                    in.position(in.position() - 1);
+                    return CoderResult.OVERFLOW;
+                }
+                out.put((byte) c);
+            }
+            return CoderResult.UNDERFLOW;
+        }
+    }
+}
--- a/src/main/java/org/xbib/charset/AnselCharset.java
+++ b/src/main/java/org/xbib/charset/AnselCharset.java
@ -0,0 +1,280 @@
+/*
+ * Licensed to Jörg Prante and xbib under one or more contributor
+ * license agreements. See the NOTICE.txt file distributed with this work
+ * for additional information regarding copyright ownership.
+ *
+ * Copyright (C) 2016 Jörg Prante and xbib
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program; if not, see http://www.gnu.org/licenses
+ * or write to the Free Software Foundation, Inc., 51 Franklin Street,
+ * Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The interactive user interfaces in modified source and object code
+ * versions of this program must display Appropriate Legal Notices,
+ * as required under Section 5 of the GNU Affero General Public License.
+ *
+ * In accordance with Section 7(b) of the GNU Affero General Public
+ * License, these Appropriate Legal Notices must retain the display of the
+ * "Powered by xbib" logo. If the display of the logo is not reasonably
+ * feasible for technical reasons, the Appropriate Legal Notices must display
+ * the words "Powered by xbib".
+ */
+package org.xbib.charset;
+
+import java.io.CharArrayWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.StandardCharsets;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import javax.xml.stream.XMLStreamException;
+
+/**
+ *
+ */
+public class AnselCharset extends Charset {
+
+    private static final Map<String, AnselCodeTableParser.CharacterSet> characterSetMap;
+
+    static {
+        characterSetMap = new LinkedHashMap<>();
+        ClassLoader cl = Thread.currentThread().getContextClassLoader();
+        try (InputStream inputStream = cl.getResource("org/xbib/charset/codetables.xml").openStream()) {
+            AnselCodeTableParser anselCodeTableParser = new AnselCodeTableParser(inputStream);
+            for (AnselCodeTableParser.CodeTable codeTable : anselCodeTableParser.getCodeTables()) {
+                for (AnselCodeTableParser.CharacterSet characterSet : codeTable.getCharacterSets()) {
+                    characterSetMap.put(characterSet.getName(), characterSet);
+                }
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+    private Charset encodeCharset;
+
+    public AnselCharset() throws XMLStreamException {
+        super("ANSEL", BibliographicCharsetProvider.aliasesFor("ANSEL"));
+        this.encodeCharset = StandardCharsets.UTF_8;
+    }
+
+    @Override
+    public boolean contains(Charset charset) {
+        return charset instanceof AnselCharset;
+    }
+
+    public CharsetEncoder newEncoder() {
+        throw new UnsupportedOperationException();
+    }
+
+    public CharsetDecoder newDecoder() {
+        return new Decoder(this, encodeCharset.newDecoder());
+    }
+
+    private static class Decoder extends CharsetDecoder {
+
+        String g0;
+        String g1;
+
+        Decoder(Charset cs, CharsetDecoder baseDecoder) {
+            super(cs, baseDecoder.averageCharsPerByte(), baseDecoder.maxCharsPerByte());
+        }
+
+        protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
+            g0 = "Basic Latin (ASCII)";
+            g1 = "Extended Latin (ANSEL)";
+            CharArrayWriter w = new CharArrayWriter();
+            CharArrayWriter diacritics = new CharArrayWriter();
+            int pos = in.position();
+            while (in.hasRemaining()) {
+                byte b = in.get();
+                char oldChar = (char) (b & 0xFF);
+                if (oldChar == '\u001b') {
+                    handleEscapeSequence(in);
+                    if (in.hasRemaining()) {
+                        b = in.get();
+                        oldChar = (char) (b & 0xFF);
+                    } else {
+                        // premature end of escape sequence, no data following
+                        return CoderResult.UNDERFLOW;
+                    }
+                }
+                AnselCodeTableParser.CharacterSet characterSet = isG0(oldChar) ? characterSetMap.get(g0) :
+                        isG1(oldChar) ? characterSetMap.get(g1) : null;
+                int len = characterSet != null ? characterSet.getLength() : 1;
+                String str = len == 1 ? "" + oldChar : "" + oldChar + (char) (in.get() & 0xFF) + (char) (in.get() & 0xFF);
+                AnselCodeTableParser.Code code = characterSet != null ? characterSet.getMarc().get(str) : null;
+                char ch = code != null ? code.getUcs() : oldChar;
+                if (ch == '\u0000') {
+                    // FB, EC - see http://memory.loc.gov/diglib/codetables/45.html#Note1 and http://memory.loc.gov/diglib/codetables/45.html#Note2
+                    continue;
+                }
+                boolean isDiacritic = code != null ? isDiacritic(oldChar) || code.isCombining() : isDiacritic(oldChar);
+                if (isDiacritic) {
+                    diacritics.write(ch);
+                } else {
+                    w.write(ch);
+                    // diacritics must be appended in Unicode, but are prepended in MARC-8 / Z39.47
+                    if (diacritics.toCharArray().length > 0) {
+                        try {
+                            w.write(diacritics.toCharArray());
+                        } catch (IOException e) {
+                            // dummy
+                            w.flush();
+                        }
+                        diacritics = new CharArrayWriter();
+                    }
+                }
+            }
+            for (char ch : w.toCharArray()) {
+                if (!out.hasRemaining()) {
+                    in.position(pos - 1);
+                    return CoderResult.OVERFLOW;
+                }
+                out.put(ch);
+            }
+            return CoderResult.UNDERFLOW;
+        }
+
+        private boolean isDiacritic(char ch) {
+            return ch >= 0xE0 && ch <= 0xFF;
+        }
+
+        private boolean isG0(char ch) {
+            return ch >= 0x21 && ch <= 0x7E;
+        }
+
+        private boolean isG1(char ch) {
+            return ch >= 0xA1 && ch <= 0xFE;
+        }
+
+        /**
+         * ANSI X3.41 or ISO 2022 escape technique.
+         * See procedures in IS0 2375-1985.
+         *
+         * @param in byte buffer
+         */
+        private void handleEscapeSequence(ByteBuffer in) {
+            byte oneByte = in.get();
+            switch (oneByte) {
+                case 's':
+                    g0 = "Basic Latin (ASCII)";
+                    break;
+                case 'g':
+                    g0 = "Greek Symbols";
+                    break;
+                case 'b':
+                    g0 = "Subscripts";
+                    break;
+                case 'p':
+                    g0 = "Superscripts";
+                    break;
+                case '(':
+                case ',':
+                    oneByte = in.get();
+                    switch (oneByte) {
+                        case '1':
+                            g0 = "Chinese, Japanese, Korean (EACC)";
+                            break;
+                        case '2':
+                            g0 = "Basic Hebrew";
+                            break;
+                        case '3':
+                            g0 = "Basic Arabic";
+                            break;
+                        case '4':
+                            g0 = "Extended Arabic";
+                            break;
+                        case 'B':
+                            g0 = "Basic Latin (ASCII)";
+                            break;
+                        case 'N':
+                            g0 = "Basic Cyrillic";
+                            break;
+                        case 'Q':
+                            g0 = "Extended Cyrillic";
+                            break;
+                        case 'S':
+                            g0 = "Basic Greek";
+                            break;
+                        default:
+                            break;
+                    }
+                    break;
+                case ')':
+                case '-':
+                    oneByte = in.get();
+                    switch (oneByte) {
+                        case '1':
+                            g1 = "Chinese, Japanese, Korean (EACC)";
+                            break;
+                        case '2':
+                            g1 = "Basic Hebrew";
+                            break;
+                        case '3':
+                            g1 = "Basic Arabic";
+                            break;
+                        case '4':
+                            g1 = "Extended Arabic";
+                            break;
+                        case 'B':
+                            g1 = "Basic Latin (ASCII)";
+                            break;
+                        case 'N':
+                            g1 = "Basic Cyrillic";
+                            break;
+                        case 'Q':
+                            g1 = "Extended Cyrillic";
+                            break;
+                        case 'S':
+                            g1 = "Basic Greek";
+                            break;
+                        default:
+                            break;
+                    }
+                    break;
+                case '$':
+                    oneByte = in.get();
+                    switch (oneByte) {
+                        case '1':
+                            g0 = "Chinese, Japanese, Korean (EACC)";
+                            break;
+                        default:
+                            break;
+                    }
+                    break;
+                case '!':
+                    oneByte = in.get();
+                    switch (oneByte) {
+                        case 'E':
+                            g0 = "Extended Latin (ANSEL)";
+                            break;
+                        default:
+                            break;
+                    }
+                    break;
+                default:
+                    break;
+            }
+        }
+    }
+
+}
+
+
--- a/src/main/java/org/xbib/charset/AnselCodeTableParser.java
+++ b/src/main/java/org/xbib/charset/AnselCodeTableParser.java
@ -0,0 +1,280 @@
+/*
+ * Licensed to Jörg Prante and xbib under one or more contributor
+ * license agreements. See the NOTICE.txt file distributed with this work
+ * for additional information regarding copyright ownership.
+ *
+ * Copyright (C) 2016 Jörg Prante and xbib
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program; if not, see http://www.gnu.org/licenses
+ * or write to the Free Software Foundation, Inc., 51 Franklin Street,
+ * Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The interactive user interfaces in modified source and object code
+ * versions of this program must display Appropriate Legal Notices,
+ * as required under Section 5 of the GNU Affero General Public License.
+ *
+ * In accordance with Section 7(b) of the GNU Affero General Public
+ * License, these Appropriate Legal Notices must retain the display of the
+ * "Powered by xbib" logo. If the display of the logo is not reasonably
+ * feasible for technical reasons, the Appropriate Legal Notices must display
+ * the words "Powered by xbib".
+ */
+package org.xbib.charset;
+
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import javax.xml.namespace.QName;
+import javax.xml.stream.XMLEventReader;
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.events.Attribute;
+import javax.xml.stream.events.Characters;
+import javax.xml.stream.events.EndElement;
+import javax.xml.stream.events.StartElement;
+import javax.xml.stream.events.XMLEvent;
+
+class AnselCodeTableParser {
+
+    private final List<CodeTable> codeTables;
+
+    private CodeTable codeTable;
+
+    private CharacterSet characterSet;
+
+    private Code code;
+
+    private StringBuilder content;
+
+    AnselCodeTableParser(InputStream inputStream) {
+        List<CodeTable> codeTables;
+        try {
+            codeTables = createCodeTables(inputStream);
+        } catch (XMLStreamException e) {
+            codeTables = null;
+            // ignore
+        }
+        this.codeTables = codeTables;
+    }
+
+    public List<CodeTable> getCodeTables() {
+        return codeTables;
+    }
+
+    private List<CodeTable> createCodeTables(InputStream inputStream) throws XMLStreamException {
+        List<CodeTable> codetables = new LinkedList<>();
+        XMLInputFactory factory = XMLInputFactory.newInstance();
+        XMLEventReader xmlReader = factory.createXMLEventReader(inputStream);
+        while (xmlReader.hasNext()) {
+            processEvent(codetables, xmlReader.peek());
+            xmlReader.nextEvent();
+        }
+        return codetables;
+    }
+
+    private void processEvent(List<CodeTable> codetables, XMLEvent event) {
+        if (event.isStartDocument()) {
+            this.code = new Code();
+            this.content = new StringBuilder();
+        }
+        if (event.isStartElement()) {
+            StartElement element = (StartElement) event;
+            String name = element.getName().getLocalPart();
+            switch (name) {
+                case "codeTables": {
+                    // ignore
+                    break;
+                }
+                case "codeTable": {
+                    this.codeTable = new CodeTable();
+                    break;
+                }
+                case "characterSet": {
+                    this.characterSet = new CharacterSet();
+                    @SuppressWarnings("unchecked")
+                    Iterator<Attribute> it = element.getAttributes();
+                    while (it.hasNext()) {
+                        Attribute attr = it.next();
+                        QName attributeName = attr.getName();
+                        String attributeLocalName = attributeName.getLocalPart();
+                        String attributeValue = attr.getValue();
+                        if ("name".equals(attributeLocalName)) {
+                            characterSet.name = attributeValue;
+                        } else if ("isoCode".equals(attributeLocalName)) {
+                            characterSet.isoCode = attributeValue;
+                        }
+                    }
+                    break;
+                }
+                case "code": {
+                    code = new Code();
+                    break;
+                }
+                default:
+                    break;
+            }
+        } else if (event.isCharacters()) {
+            Characters c = (Characters) event;
+            if (!c.isIgnorableWhiteSpace()) {
+                // character events may come more than once (e.g. because of XML entities like &quot;)
+                // concatenate with values that might exist
+                content.append(c.getData());
+            }
+        } else if (event.isEndElement()) {
+            EndElement element = (EndElement) event;
+            String name = element.getName().getLocalPart();
+            switch (name) {
+                case "codeTable": {
+                    codetables.add(codeTable);
+                    codeTable = new CodeTable();
+                    break;
+                }
+                case "characterSet": {
+                    codeTable.add(characterSet);
+                    characterSet = new CharacterSet();
+                    break;
+                }
+                case "code": {
+                    characterSet.add(code);
+                    code = new Code();
+                    break;
+                }
+                case "marc": {
+                    String s = content.toString().trim();
+                    char[] ch = new char[s.length() / 2];
+                    for (int i = 0; i < s.length(); i += 2) {
+                        ch[i / 2] = (char) ((Character.digit(s.charAt(i), 16) << 4)
+                                + Character.digit(s.charAt(i + 1), 16));
+                    }
+                    code.marc = new String(ch);
+                    break;
+                }
+                case "ucs": {
+                    String s = content.toString().trim();
+                    // two chars have no ucs equivalent...
+                    if (!s.isEmpty()) {
+                        code.ucs = (char) (Integer.parseInt(s, 16) & 0xFFFF);
+                    }
+                    break;
+                }
+                case "utf-8": {
+                    String s = content.toString().trim();
+                    char[] ch = new char[s.length() / 2];
+                    for (int i = 0; i < s.length(); i += 2) {
+                        ch[i / 2] = (char) ((Character.digit(s.charAt(i), 16) << 4)
+                                + Character.digit(s.charAt(i + 1), 16));
+                    }
+                    code.utf8 = new String(ch);
+                    break;
+                }
+                case "name": {
+                    code.name = content.toString();
+                    break;
+                }
+                case "isCombining": {
+                    code.isCombining = "true".equals(content.toString());
+                    break;
+                }
+                default:
+                    break;
+            }
+            content.setLength(0);
+        }
+    }
+
+    static class CodeTable {
+        private final List<CharacterSet> characterSets = new LinkedList<>();
+
+        void add(CharacterSet characterSet) {
+            characterSets.add(characterSet);
+        }
+
+        List<CharacterSet> getCharacterSets() {
+            return characterSets;
+        }
+    }
+
+    static class CharacterSet {
+        String name;
+        String isoCode;
+        int length;
+        Map<String, Code> marc = new HashMap<>();
+        Map<Character, Code> unicode = new HashMap<>();
+
+        void add(Code code) {
+            marc.putIfAbsent(code.marc, code);
+            length = code.marc.length();
+            unicode.putIfAbsent(code.ucs, code);
+        }
+
+        String getName() {
+            return name;
+        }
+
+        int getLength() {
+            return length;
+        }
+
+        String getIsoCode() {
+            return isoCode;
+        }
+
+        Map<String, Code> getMarc() {
+            return marc;
+        }
+
+        Map<Character, Code> getUnicode() {
+            return unicode;
+        }
+    }
+
+    static class Code {
+        // Universal Character Set (UCS, ISO-IEC 10646)/Unicode, always 16 bit
+        char ucs;
+        // MARC-8 standard (single char) or EACC 24-bit code (three chars)
+        String marc;
+        // UTF-8 code (in hex), 1-3 bytes
+        String utf8;
+        // name
+        String name;
+        boolean isCombining;
+
+        char getUcs() {
+            return ucs;
+        }
+
+        String getMarc() {
+            return marc;
+        }
+
+        String getUtf8() {
+            return utf8;
+        }
+
+        String getName() {
+            return name;
+        }
+
+        boolean isCombining() {
+            return isCombining;
+        }
+
+        public String toString() {
+            return "marc=" + marc + " isCombining=" + isCombining + " ucs=" + ucs;
+        }
+    }
+}
+
--- a/src/main/java/org/xbib/charset/BibliographicCharsetProvider.java
+++ b/src/main/java/org/xbib/charset/BibliographicCharsetProvider.java
@ -0,0 +1,173 @@
+/*
+ * Licensed to Jörg Prante and xbib under one or more contributor
+ * license agreements. See the NOTICE.txt file distributed with this work
+ * for additional information regarding copyright ownership.
+ *
+ * Copyright (C) 2016 Jörg Prante and xbib
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program; if not, see http://www.gnu.org/licenses
+ * or write to the Free Software Foundation, Inc., 51 Franklin Street,
+ * Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The interactive user interfaces in modified source and object code
+ * versions of this program must display Appropriate Legal Notices,
+ * as required under Section 5 of the GNU Affero General Public License.
+ *
+ * In accordance with Section 7(b) of the GNU Affero General Public
+ * License, these Appropriate Legal Notices must retain the display of the
+ * "Powered by xbib" logo. If the display of the logo is not reasonably
+ * feasible for technical reasons, the Appropriate Legal Notices must display
+ * the words "Powered by xbib".
+ */
+package org.xbib.charset;
+
+import java.lang.ref.SoftReference;
+import java.nio.charset.Charset;
+import java.nio.charset.spi.CharsetProvider;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * Extra bibliographic character sets.
+ */
+public class BibliographicCharsetProvider extends CharsetProvider {
+
+    private static final Logger logger = Logger.getLogger(BibliographicCharsetProvider.class.getName());
+
+    /**
+     * The reference to the character set instance.
+     * If there are no remaining references to this instance,
+     * the character set will be removed by the garbage collector.
+     */
+    private static volatile SoftReference<BibliographicCharsetProvider> instance = null;
+    private final Map<String, String> classMap;
+    private final Map<String, String> aliasMap;
+    private final Map<String, String[]> aliasNameMap;
+    private final Map<String, SoftReference<Charset>> cache;
+    private final String packagePrefix;
+
+    /**
+     * Constructor must be public.
+     */
+    public BibliographicCharsetProvider() {
+        classMap = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
+        aliasMap = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
+        aliasNameMap = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
+        cache = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
+        packagePrefix = getClass().getPackage().getName();
+
+        charset("ANSEL", "AnselCharset",
+                new String[]{"ANSI_Z39_47", "ANSI-Z39-47", "Z39_47", "Z39-47", "ansel", "usmarc", "usm94"});
+        charset("ISO-5426", "ISO5426", new String[]{"x-mab", "x-MAB", "ISO-5426", "ISO_5426", "ISO_5426:1983", "MAB2"});
+        charset("ISO-5428", "ISO5428", new String[]{"ISO_5428", "ISO-5428:1984", "iso-ir-55"});
+        charset("MAB-Diskette", "MabDisketteCharset", new String[]{});
+        charset("PICA", "Pica", new String[]{"Pica", "pica"});
+        charset("x-PICA", "PicaCharset", new String[]{"x-pica"});
+        charset("SIMPLE_ANSEL", "SimpleAnselCharset", new String[]{});
+        instance = new SoftReference<>(this);
+    }
+
+    /**
+     * List all aliases defined for a character set.
+     *
+     * @param s the name of the character set
+     * @return an alias string array
+     */
+    static String[] aliasesFor(String s) {
+        SoftReference<BibliographicCharsetProvider> softreference = instance;
+        BibliographicCharsetProvider charsets = null;
+        if (softreference != null) {
+            charsets = softreference.get();
+        }
+        if (charsets == null) {
+            charsets = new BibliographicCharsetProvider();
+            instance = new SoftReference<>(charsets);
+        }
+        return charsets.aliases(s);
+    }
+
+    @Override
+    public final Charset charsetForName(String s) {
+        return lookup(canonicalize(s));
+    }
+
+    @Override
+    public final Iterator<Charset> charsets() {
+        return new Iterator<Charset>() {
+
+            Iterator<String> iterator = classMap.keySet().iterator();
+
+            @Override
+            public boolean hasNext() {
+                return iterator.hasNext();
+            }
+
+            @Override
+            public Charset next() {
+                return lookup(iterator.next());
+            }
+
+            @Override
+            public void remove() {
+                throw new UnsupportedOperationException();
+            }
+        };
+    }
+
+    private void charset(String name, String className, String[] aliases) {
+        classMap.putIfAbsent(name, className);
+        for (String alias : aliases) {
+            aliasMap.putIfAbsent(alias, name);
+        }
+        aliasNameMap.putIfAbsent(name, aliases);
+    }
+
+    private String canonicalize(String charsetName) {
+        String aliasCharsetName = aliasMap.get(charsetName);
+        return aliasCharsetName != null ? aliasCharsetName : charsetName;
+    }
+
+    private Charset lookup(String charsetName) {
+        SoftReference<Charset> softreference = cache.get(charsetName);
+        if (softreference != null) {
+            Charset charset = softreference.get();
+            if (charset != null) {
+                return charset;
+            }
+        }
+        String className = classMap.get(charsetName);
+        if (className == null) {
+            return null;
+        }
+        try {
+            Class<?> cl = Class.forName(packagePrefix + "." + className, true, getClass().getClassLoader());
+            Charset charset = (Charset) cl.newInstance();
+            cache.put(charsetName, new SoftReference<>(charset));
+            return charset;
+        } catch (ClassNotFoundException e1) {
+            logger.log(Level.WARNING, "Class not found: " + packagePrefix + "." + className);
+        } catch (IllegalAccessException e2) {
+            logger.log(Level.WARNING, "Illegal access: " + packagePrefix + "." + className);
+        } catch (InstantiationException e3) {
+            logger.log(Level.WARNING, "Instantiation failed: " + packagePrefix + "." + className);
+        }
+        return null;
+    }
+
+    private String[] aliases(String s) {
+        return (String[]) aliasNameMap.get(s);
+    }
+}
--- a/src/main/java/org/xbib/charset/BibliographicCharsets.java
+++ b/src/main/java/org/xbib/charset/BibliographicCharsets.java
@ -0,0 +1,52 @@
+/*
+ * Licensed to Jörg Prante and xbib under one or more contributor
+ * license agreements. See the NOTICE.txt file distributed with this work
+ * for additional information regarding copyright ownership.
+ *
+ * Copyright (C) 2016 Jörg Prante and xbib
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program; if not, see http://www.gnu.org/licenses
+ * or write to the Free Software Foundation, Inc., 51 Franklin Street,
+ * Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The interactive user interfaces in modified source and object code
+ * versions of this program must display Appropriate Legal Notices,
+ * as required under Section 5 of the GNU Affero General Public License.
+ *
+ * In accordance with Section 7(b) of the GNU Affero General Public
+ * License, these Appropriate Legal Notices must retain the display of the
+ * "Powered by xbib" logo. If the display of the logo is not reasonably
+ * feasible for technical reasons, the Appropriate Legal Notices must display
+ * the words "Powered by xbib".
+ */
+package org.xbib.charset;
+
+import java.nio.charset.Charset;
+
+/**
+ *
+ */
+public final class BibliographicCharsets {
+
+    public static final Charset ANSEL = Charset.forName("ANSEL");
+
+    public static final Charset ISO5426 = Charset.forName("ISO-5426");
+
+    public static final Charset ISO5428 = Charset.forName("ISO-5428");
+
+    public static final Charset MAB = Charset.forName("x-MAB");
+
+    public static final Charset MAB_DISKETTE = Charset.forName("MAB-DISKETTE");
+
+    public static final Charset PICA = Charset.forName("Pica");
+}
--- a/src/main/java/org/xbib/charset/ByteCharset.java
+++ b/src/main/java/org/xbib/charset/ByteCharset.java
@ -0,0 +1,222 @@
+/**
+ * Licensed to Jörg Prante and xbib under one or more contributor
+ * license agreements. See the NOTICE.txt file distributed with this work
+ * for additional information regarding copyright ownership.
+ *
+ * Copyright (C) 2016 Jörg Prante and xbib
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program; if not, see http://www.gnu.org/licenses
+ * or write to the Free Software Foundation, Inc., 51 Franklin Street,
+ * Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The interactive user interfaces in modified source and object code
+ * versions of this program must display Appropriate Legal Notices,
+ * as required under Section 5 of the GNU Affero General Public License.
+ *
+ * In accordance with Section 7(b) of the GNU Affero General Public
+ * License, these Appropriate Legal Notices must retain the display of the
+ * "Powered by xbib" logo. If the display of the logo is not reasonably
+ * feasible for technical reasons, the Appropriate Legal Notices must display
+ * the words "Powered by xbib".
+ *
+ *
+ * Derived from
+ *
+ * ByteCharset.java -- Abstract class for generic 1-byte encodings.
+ * Copyright (C) 2005 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Classpath.
+ *
+ * GNU Classpath is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * GNU Classpath is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Classpath; see the file COPYING.  If not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307 USA.
+ *
+ * Linking this library statically or dynamically with other modules is
+ * making a combined work based on this library.  Thus, the terms and
+ * conditions of the GNU General Public License cover the whole
+ * combination.
+ *
+ * As a special exception, the copyright holders of this library give you
+ * permission to link this library with independent modules to produce an
+ * executable, regardless of the license terms of these independent
+ * modules, and to copy and distribute the resulting executable under
+ * terms of your choice, provided that you also meet, for each linked
+ * independent module, the terms and conditions of the license of that
+ * module.  An independent module is a module which is not derived from
+ * or based on this library.  If you modify this library, you may extend
+ * this exception to your version of the library, but you are not
+ * obligated to do so.  If you do not wish to do so, delete this
+ * exception statement from your version.
+ *//**
+ *
+ * Derived from
+ *
+ * ByteCharset.java -- Abstract class for generic 1-byte encodings.
+ * Copyright (C) 2005 Free Software Foundation, Inc.
+ *
+ *  This file is part of GNU Classpath.
+ *
+ *  GNU Classpath is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ *  GNU Classpath is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ * along with GNU Classpath; see the file COPYING.  If not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307 USA.
+ *
+ *  Linking this library statically or dynamically with other modules is
+ * making a combined work based on this library.  Thus, the terms and
+ * conditions of the GNU General Public License cover the whole
+ * combination.
+ *
+ *  As a special exception, the copyright holders of this library give you
+ * permission to link this library with independent modules to produce an
+ * executable, regardless of the license terms of these independent
+ * modules, and to copy and distribute the resulting executable under
+ * terms of your choice, provided that you also meet, for each linked
+ * independent module, the terms and conditions of the license of that
+ * module.  An independent module is a module which is not derived from
+ * or based on this library.  If you modify this library, you may extend
+ * this exception to your version of the library, but you are not
+ * obligated to do so.  If you do not wish to do so, delete this
+ * exception statement from your version.
+ */
+package org.xbib.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+/**
+ * A generic encoding framework for single-byte encodings, utilizing a look-up
+ * table. This replaces the gnu.java.io.EncoderEightBitLookup class, created by
+ * Aron Renn.
+ */
+abstract class ByteCharset extends Charset {
+
+    /**
+     * Char to signify the character in the table is undefined.
+     */
+    private static final char NONE = (char) 0xFFFD;
+    char[] lookupTable;
+
+    ByteCharset(String canonicalName, String[] aliases) {
+        super(canonicalName, aliases);
+    }
+
+    /**
+     * Most western charsets include ASCII, but this should be overloaded for
+     * others.
+     */
+    public boolean contains(Charset cs) {
+        return cs instanceof ASCII || cs.getClass() == getClass();
+    }
+
+    private char[] getLookupTable() {
+        return lookupTable;
+    }
+
+    public CharsetDecoder newDecoder() {
+        return new Decoder(this);
+    }
+
+    public CharsetEncoder newEncoder() {
+        return new Encoder(this);
+    }
+
+    private static final class Decoder extends CharsetDecoder {
+
+        private char[] lookup;
+
+        Decoder(ByteCharset cs) {
+            super(cs, 1.0f, 1.0f);
+            lookup = cs.getLookupTable();
+        }
+
+        protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
+            while (in.hasRemaining()) {
+                byte b = in.get();
+                char c;
+                if (!out.hasRemaining()) {
+                    in.position(in.position() - 1);
+                    return CoderResult.OVERFLOW;
+                }
+                c = lookup[b & 0xFF];
+                out.put(c);
+            }
+
+            return CoderResult.UNDERFLOW;
+        }
+    }
+
+    private static final class Encoder extends CharsetEncoder {
+
+        private byte[] lookup;
+
+        Encoder(ByteCharset cs) {
+            super(cs, 1.0f, 1.0f);
+            char[] lookuptable = cs.getLookupTable();
+            int max = 0;
+            for (char ch : lookuptable) {
+                int c = (int) ch;
+                max = c > max && c < NONE ? c : max;
+            }
+            lookup = new byte[max + 1];
+            for (int i = 0; i < lookuptable.length; i++) {
+                int c = (int) lookuptable[i];
+                if (c != 0 && c < NONE) {
+                    lookup[c] = (byte) i;
+                }
+            }
+        }
+
+        protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
+            while (in.hasRemaining()) {
+                int c = (int) in.get();
+                if (!out.hasRemaining()) {
+                    in.position(in.position() - 1);
+                    return CoderResult.OVERFLOW;
+                }
+                byte b = c < lookup.length ? lookup[c] : (byte) 0;
+                if ((int) b != 0 || c == 0) {
+                    out.put(b);
+                } else {
+                    in.position(in.position() - 1);
+                    return CoderResult.unmappableForLength(1);
+                }
+            }
+            return CoderResult.UNDERFLOW;
+        }
+    }
+}
--- a/src/main/java/org/xbib/charset/ISO5426.java
+++ b/src/main/java/org/xbib/charset/ISO5426.java
@ -0,0 +1,241 @@
+/*
+ * Licensed to Jörg Prante and xbib under one or more contributor
+ * license agreements. See the NOTICE.txt file distributed with this work
+ * for additional information regarding copyright ownership.
+ *
+ * Copyright (C) 2012 Jörg Prante and xbib
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program; if not, see http://www.gnu.org/licenses
+ * or write to the Free Software Foundation, Inc., 51 Franklin Street,
+ * Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The interactive user interfaces in modified source and object code
+ * versions of this program must display Appropriate Legal Notices,
+ * as required under Section 5 of the GNU Affero General Public License.
+ *
+ * In accordance with Section 7(b) of the GNU Affero General Public
+ * License, these Appropriate Legal Notices must retain the display of the
+ * "Powered by xbib" logo. If the display of the logo is not reasonably
+ * feasible for technical reasons, the Appropriate Legal Notices must display
+ * the words "Powered by xbib".
+ *
+ * Copyright (C) 2004  Jürgen Kett, Die Deutsche Bibliothek,
+ * (http://www.ddb.de, mailto:kett@dbf.ddb.de)
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, 
+ * Boston, MA  02111-1307, USA.
+ *
+ */
+package org.xbib.charset;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * MAB-Character-Set-Implementation.
+ * Some minor additions in blocks A-D
+ */
+public class ISO5426 extends Charset {
+
+    public static final char NICHTSORTIERBEGINNZEICHEN = '\u0098';
+    public static final char NICHTSORTIERENDEZEICHEN = '\u009C';
+    public static final char TEILFELDTRENNZEICHEN = '\u2021';
+
+    private static final char[] byteToCharTable = newMabCharsetMap();
+
+    private static final Map<Character, Byte> charToByteTable = newMabByteToCharMap();
+
+    private boolean isNFCOutput;
+
+    public ISO5426() {
+        this(true);
+    }
+
+    private ISO5426(boolean isNFCOutput) {
+        super("ISO-5426", null);
+        this.isNFCOutput = isNFCOutput;
+    }
+
+    private static Map<Character, Byte> newMabByteToCharMap() {
+        Map<Character, Byte> ret = new HashMap<>(byteToCharTable.length);
+        for (int i = 0; i < byteToCharTable.length; i++) {
+            if (byteToCharTable[i] != 0) {
+                ret.put(byteToCharTable[i], (byte) i);
+            }
+        }
+        return ret;
+    }
+
+    private static char[] newMabCharsetMap() {
+        char[] map = new char[256];
+
+        for (int i = 0; i < 128; i++) {
+            map[i] = (char) i;
+        }
+        map[0x88] = ISO5426.NICHTSORTIERBEGINNZEICHEN;
+        map[0x89] = ISO5426.NICHTSORTIERENDEZEICHEN;
+
+        // A-Block
+        map[0xA1] = '\u00A1'; // INVERTED EXCLAMATION MARK
+        map[0xA2] = '\u201E'; // Double Low-9 Quotation Mark
+        map[0xA3] = '\u00A3'; // Pound Sign
+        map[0xA4] = '\u0024'; // Dollar Sign
+        map[0xA5] = '\u00A5'; // YEN SIGN
+        map[0xA6] = '\u2020'; // Dagger
+        map[0xA7] = '\u00A7'; // SECTION SIGN
+        map[0xA8] = '\u2032'; // Prime
+        map[0xA9] = '\u2018'; // Left Single Quotation Mark
+        map[0xAA] = '\u201C'; // Left Double Quotation Mark
+        map[0xAB] = '\u00AB'; // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (LEFT POINTING GUILLEMET)
+        map[0xAC] = '\u266D'; // Music Flat Sign
+        map[0xAD] = '\u00A9'; // Copyright Sign
+        map[0xAE] = '\u2117'; // Sound Recording Copyright
+        map[0xAF] = '\u00AE'; // Registered Sign
+
+        // B-Block
+        map[0xB0] = '\u02BB'; // Modifier Letter Turned Comma
+        map[0xB1] = '\u02BC'; // Modifier Letter Apostrophe
+        map[0xB2] = '\u201A'; // Single Low-9 Quotation Mark
+        map[0xB6] = ISO5426.TEILFELDTRENNZEICHEN;
+        map[0xB7] = '\u00B7'; //
+        map[0xB8] = '\u2033'; // Double Prime
+        map[0xB9] = '\u2019'; // Right Single Quotation Mark
+        map[0xBA] = '\u201D'; // Right Double Quotation Mark
+        map[0xBB] = '\u00BB'; //
+        map[0xBC] = '\u266F'; // Music Sharp Sign !!!!NACHFRAGEN
+        map[0xBD] = '\u02B9'; // Modifier Letter Prime
+        map[0xBE] = '\u02BA'; // Modifier Letter Double Prime
+        map[0xBF] = '\u00BF'; //
+
+        // C-Block
+        map[0xC0] = '\u0309'; // Combining Hook above
+        map[0xC1] = '\u0300'; // Combining Grave Accent
+        map[0xC2] = '\u0301'; // Combining Acute Accent
+        map[0xC3] = '\u0302'; // Combining Circumflex Accent
+        map[0xC4] = '\u0303'; // Combining Tilde
+        map[0xC5] = '\u0304'; // Combining Macron
+        map[0xC6] = '\u0306'; // Combining Breve
+        map[0xC7] = '\u0307'; // Combining Dot Above
+        map[0xC8] = '\u0308'; // Trema -> Combining Diaeresis
+        map[0xC9] = '\u0308'; // Umlaut -> Combining Diaeresis
+        map[0xCA] = '\u030A'; // Combining Ring Above
+        map[0xCB] = '\u0315'; // Combining Comma Above Right
+        map[0xCC] = '\u0312'; // Combining Turned Comma Above
+        map[0xCD] = '\u030B'; // Combining Double Acute Accent
+        map[0xCE] = '\u031B'; // Combining Horn
+        map[0xCF] = '\u030C'; // Combining Caron
+
+        // D-Block
+        map[0xD0] = '\u0327'; // Combining Cedilla
+        map[0xD1] = '\u031C'; // Combining Left Half Ring Below
+        map[0xD2] = '\u0326'; // Combining Comma Below
+        map[0xD3] = '\u0328'; // Combining Ogonek
+        map[0xD4] = '\u0325'; // Combining Ring Below
+        map[0xD5] = '\u032E'; // Combining Breve Below
+        map[0xD6] = '\u0323'; // Combining Dot Below
+        map[0xD7] = '\u0324'; // Combining Diaeresis Below
+        map[0xD8] = '\u0332'; // Combining Low Line
+        map[0xD9] = '\u0333'; // Combining Double Low Line
+        map[0xDA] = '\u0329'; // Combining Vertical Line Below
+        map[0xDB] = '\u032D'; // Combining Circumflex Accent Below
+        map[0xDD] = '\uFE20'; // Combining Ligature Left Half
+        map[0xDE] = '\uFE21'; // Combining Ligature Right Half
+        map[0xDF] = '\uFE23'; // Combining Double Tilde Right Half
+
+        // E-Block
+        map[0xE1] = '\u00C6'; // Latin Capital Letter AE
+        map[0xE2] = '\u0110'; // Latin Capital Letter D with Stroke
+        map[0xE6] = '\u0132'; // Latin Capital Ligature IJ
+        map[0xE8] = '\u0141'; // Latin Capital Letter L with Stroke
+        map[0xE9] = '\u00D8'; // Latin Capital Letter O with Stroke
+        map[0xEA] = '\u0152'; // Latin Capital Ligature OE
+        map[0xEC] = '\u00DE'; // Latin Capital Letter Thorn
+
+        // F-Block
+        map[0xF1] = '\u00E6'; // Latin Small Letter AE
+        map[0xF2] = '\u0111'; // Latin Small Letter D with Stroke
+        map[0xF3] = '\u00F0'; // Latin Small Letter ETH
+        map[0xF5] = '\u0131'; // Latin Small Letter Dotless I
+        map[0xF6] = '\u0133'; // Latin Small Ligature IJ
+        map[0xF8] = '\u0142'; // Latin Small Letter L with Stroke
+        map[0xF9] = '\u00F8'; // Latin Small Letter O with Stroke
+        map[0xFA] = '\u0153'; // Latin Small Ligature OE
+        map[0xFB] = '\u00DF'; // Latin Small Letter Sharp S
+        map[0xFC] = '\u00FE'; // Latin Small Letter Thorn
+        return map;
+    }
+
+    @Override
+    public boolean contains(Charset cs) {
+        return false;
+    }
+
+    @Override
+    public CharsetDecoder newDecoder() {
+        MabDecoder ret = new MabDecoder(this);
+        ret.setComposeCharactersAfterConversion(this.isNFCOutput);
+        return ret;
+    }
+
+    @Override
+    public CharsetEncoder newEncoder() {
+        return new MabEncoder(this);
+    }
+
+    private static class MabDecoder extends SingleByteDecoder {
+
+        MabDecoder(Charset cs) {
+            super(cs);
+        }
+
+        @Override
+        public char byteToChar(byte b) {
+            return byteToCharTable[b & 0xFF];
+        }
+
+        @Override
+        public boolean isCombiningCharacter(byte b) {
+            return (b & 0xFF) > 0xC0 && (b & 0xFF) < 0xDF;
+        }
+    }
+
+    private static class MabEncoder extends SingleByteEncoder {
+
+        MabEncoder(Charset cs) {
+            super(cs);
+        }
+
+        @Override
+        public byte charToByte(char c) {
+            Byte b = charToByteTable.get(c);
+            if (b == null) {
+                return 0;
+            }
+            return b;
+        }
+    }
+
+}
--- a/src/main/java/org/xbib/charset/ISO5428.java
+++ b/src/main/java/org/xbib/charset/ISO5428.java
@ -0,0 +1,390 @@
+/*
+ * Licensed to Jörg Prante and xbib under one or more contributor
+ * license agreements. See the NOTICE.txt file distributed with this work
+ * for additional information regarding copyright ownership.
+ *
+ * Copyright (C) 2016 Jörg Prante and xbib
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program; if not, see http://www.gnu.org/licenses
+ * or write to the Free Software Foundation, Inc., 51 Franklin Street,
+ * Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The interactive user interfaces in modified source and object code
+ * versions of this program must display Appropriate Legal Notices,
+ * as required under Section 5 of the GNU Affero General Public License.
+ *
+ * In accordance with Section 7(b) of the GNU Affero General Public
+ * License, these Appropriate Legal Notices must retain the display of the
+ * "Powered by xbib" logo. If the display of the logo is not reasonably
+ * feasible for technical reasons, the Appropriate Legal Notices must display
+ * the words "Powered by xbib".
+ */
+package org.xbib.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+/**
+ *
+ */
+public class ISO5428 extends Charset {
+
+    public ISO5428() {
+        super("ISO_5428", BibliographicCharsetProvider.aliasesFor("ISO_5428"));
+    }
+
+    @Override
+    public boolean contains(Charset cs) {
+        return false;
+    }
+
+    @Override
+    public CharsetDecoder newDecoder() {
+        return new Decoder(this);
+    }
+
+    @Override
+    public CharsetEncoder newEncoder() {
+        return null;
+    }
+
+    private static class Decoder extends CharsetDecoder {
+
+        Decoder(Charset cs) {
+            super(cs, 1.0f, 1.0f);
+        }
+
+        protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
+            boolean tonos = false;
+            boolean dialitika = false;
+            while (in.hasRemaining()) {
+                byte b = in.get();
+                if (!out.hasRemaining()) {
+                    in.position(in.position() - 1);
+                    return CoderResult.OVERFLOW;
+                }
+                if (b == (byte) 0xa2) {
+                    tonos = true;
+                } else if (b == (byte) 0xa3) {
+                    dialitika = true;
+                }
+                int i = (int) b & 0xFF;
+                char c;
+                switch (i) {
+                    case 0xe1: {
+                        /*  alpha small */
+                        c = tonos ? '\u03ac' : '\u03b1';
+                        break;
+                    }
+                    case 0xc1: {
+                        /*  alpha capital */
+                        c = tonos ? '\u0386' : '\u0391';
+                        break;
+                    }
+                    case 0xe2: {
+                         /*  Beta small */
+                        c = '\u03b2';
+                        break;
+                    }
+                    case 0xc2: {
+                        /*  Beta capital */
+                        c = '\u0392';
+                        break;
+                    }
+                    case 0xe4: {
+                        /*  Gamma small */
+                        c = '\u03b3';
+                        break;
+                    }
+                    case 0xc4: {
+                        /*  Gamma capital */
+                        c = '\u0393';
+                        break;
+                    }
+                    case 0xe5: {
+                        /*  Delta small */
+                        c = '\u03b4';
+                        break;
+                    }
+                    case 0xc5: {
+                        /*  Delta capital */
+                        c = '\u0394';
+                        break;
+                    }
+                    case 0xe6: {
+                        /*  epsilon small */
+                        c = tonos ? '\u03ad' : '\u03b5';
+                        break;
+                    }
+                    case 0xc6: {
+                        /*  epsilon capital */
+                        c = tonos ? '\u0388' : '\u0395';
+                        break;
+                    }
+                    case 0xe9: {
+                        /*  Zeta small */
+                        c = '\u03b6';
+                        break;
+                    }
+                    case 0xc9: {
+                        /*  Zeta capital */
+                        c = '\u0396';
+                        break;
+                    }
+                    case 0xea: {
+                        /*  Eta small */
+                        c = tonos ? '\u03ae' : '\u03b7';
+                        break;
+                    }
+                    case 0xca: {
+                        /*  Eta capital */
+                        c = tonos ? '\u0389' : '\u0397';
+                        break;
+                    }
+                    case 0xeb: {
+                        /*  Theta small */
+                        c = '\u03b8';
+                        break;
+                    }
+                    case 0xcb: {
+                        /*  Theta capital */
+                        c = '\u0398';
+                        break;
+                    }
+                    case 0xec: {
+                        /*  Iota small */
+                        if (tonos) {
+                            if (dialitika) {
+                                c = '\u0390';
+                            } else {
+                                c = '\u03af';
+                            }
+                        } else if (dialitika) {
+                            c = '\u03ca';
+                        } else {
+                            c = '\u03b9';
+                        }
+                        break;
+                    }
+                    case 0xcc: {
+                        /*  Iota capital */
+                        if (tonos) {
+                            c = '\u038a';
+                        } else if (dialitika) {
+                            c = '\u03aa';
+                        } else {
+                            c = '\u0399';
+                        }
+                        break;
+                    }
+                    case 0xed: {
+                        /*  Kappa small */
+                        c = '\u03ba';
+                        break;
+                    }
+                    case 0xcd: {
+                        /*  Kappa capital */
+                        c = '\u039a';
+                        break;
+                    }
+                    case 0xee: {
+                        /*  Lambda small */
+                        c = '\u03bb';
+                        break;
+                    }
+                    case 0xce: {
+                        /*  Lambda capital */
+                        c = '\u039b';
+                        break;
+                    }
+                    case 0xef: {
+                        /*  Mu small */
+                        c = '\u03bc';
+                        break;
+                    }
+                    case 0xcf:
+                        /*  Mu capital */
+                        c = '\u039c';
+                        break;
+                    case 0xf0: {
+                        /*  Nu small */
+                        c = '\u03bd';
+                        break;
+                    }
+                    case 0xd0: {
+                        /*  Nu capital */
+                        c = '\u039d';
+                        break;
+                    }
+                    case 0xf1: {
+                        /*  Xi small */
+                        c = '\u03be';
+                        break;
+                    }
+                    case 0xd1: {
+                        /*  Xi capital */
+                        c = '\u039e';
+                        break;
+                    }
+                    case 0xf2: {
+                        /*  Omicron small */
+                        if (tonos) {
+                            c = '\u03cc';
+                        } else {
+                            c = '\u03bf';
+                        }
+                        break;
+                    }
+                    case 0xd2: {
+                        /*  Omicron capital */
+                        if (tonos) {
+                            c = '\u038c';
+                        } else {
+                            c = '\u039f';
+                        }
+                        break;
+                    }
+                    case 0xf3: {
+                        /*  Pi small */
+                        c = '\u03c0';
+                        break;
+                    }
+                    case 0xd3: {
+                        /*  Pi capital */
+                        c = '\u03a0';
+                        break;
+                    }
+                    case 0xf5: {
+                        /*  Rho small */
+                        c = '\u03c1';
+                        break;
+                    }
+                    case 0xd5: {
+                        /*  Rho capital */
+                        c = '\u03a1';
+                        break;
+                    }
+                    case 0xf7: {
+                        /*  Sigma small (end of words) */
+                        c = '\u03c2';
+                        break;
+                    }
+                    case 0xf6: {
+                        /*  Sigma small */
+                        c = '\u03c3';
+                        break;
+                    }
+                    case 0xd6: {
+                        /*  Sigma capital */
+                        c = '\u03a3';
+                        break;
+                    }
+                    case 0xf8: {
+                        /*  Tau small */
+                        c = '\u03c4';
+                        break;
+                    }
+                    case 0xd8: {
+                        /*  Tau capital */
+                        c = '\u03a4';
+                        break;
+                    }
+                    case 0xf9: {
+                        /*  Upsilon small */
+                        if (tonos) {
+                            if (dialitika) {
+                                c = '\u03b0';
+                            } else {
+                                c = '\u03cd';
+                            }
+                        } else if (dialitika) {
+                            c = '\u03cb';
+                        } else {
+                            c = '\u03c5';
+                        }
+                        break;
+                    }
+                    case 0xd9: {
+                        /*  Upsilon capital */
+                        if (tonos) {
+                            c = '\u038e';
+                        } else if (dialitika) {
+                            c = '\u03ab';
+                        } else {
+                            c = '\u03a5';
+                        }
+                        break;
+                    }
+                    case 0xfa: {
+                        /*  Phi small */
+                        c = '\u03c6';
+                        break;
+                    }
+                    case 0xda: {
+                        /*  Phi capital */
+                        c = '\u03a6';
+                        break;
+                    }
+                    case 0xfb: {
+                        /*  Chi small */
+                        c = '\u03c7';
+                        break;
+                    }
+                    case 0xdb: {
+                        /*  Chi capital */
+                        c = '\u03a7';
+                        break;
+                    }
+                    case 0xfc: {
+                        /*  Psi small */
+                        c = '\u03c8';
+                        break;
+                    }
+                    case 0xdc: {
+                        /*  Psi capital */
+                        c = '\u03a8';
+                        break;
+                    }
+                    case 0xfd: {
+                        /*  Omega small */
+                        if (tonos) {
+                            c = '\u03ce';
+                        } else {
+                            c = '\u03c9';
+                        }
+                        break;
+                    }
+                    case 0xdd: {
+                        /*  Omega capital */
+                        if (tonos) {
+                            c = '\u038f';
+                        } else {
+                            c = '\u03a9';
+                        }
+                        break;
+                    }
+                    default: {
+                        c = (char) b;
+                    }
+                }
+                out.put(c);
+            }
+
+            return CoderResult.UNDERFLOW;
+        }
+    }
+}
--- a/src/main/java/org/xbib/charset/MabDisketteCharset.java
+++ b/src/main/java/org/xbib/charset/MabDisketteCharset.java
@ -0,0 +1,89 @@
+/*
+ * Licensed to Jörg Prante and xbib under one or more contributor
+ * license agreements. See the NOTICE.txt file distributed with this work
+ * for additional information regarding copyright ownership.
+ *
+ * Copyright (C) 2016 Jörg Prante and xbib
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program; if not, see http://www.gnu.org/licenses
+ * or write to the Free Software Foundation, Inc., 51 Franklin Street,
+ * Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The interactive user interfaces in modified source and object code
+ * versions of this program must display Appropriate Legal Notices,
+ * as required under Section 5 of the GNU Affero General Public License.
+ *
+ * In accordance with Section 7(b) of the GNU Affero General Public
+ * License, these Appropriate Legal Notices must retain the display of the
+ * "Powered by xbib" logo. If the display of the logo is not reasonably
+ * feasible for technical reasons, the Appropriate Legal Notices must display
+ * the words "Powered by xbib".
+ */
+package org.xbib.charset;
+
+/**
+ * Implementierung des Zeichensatzes MAB-Diskette. Dieser ist bis auf wenige
+ * Ausnahmen mit Cp850 identisch.
+ */
+public class MabDisketteCharset extends ByteCharset {
+
+    /* Dekodierung:
+     * Abweichungen zu CP850: Nichtsortierzeichen und Teilfeldz. m&uuml;ssen
+     * erhalten bleiben. Nichtsortierz.: 00aa -> 00aa Teilfeldtrennz.: 00ce ->
+     * 2021
+     */
+    /* Kodierung:
+     * Abweichungen zu CP850: Nichtsortierzeichen und Teilfeldz. m&uuml;ssen
+     * erhalten bleiben. Nichtsortierz.: 00aa -> 00aa Teilfeldtrennz.: 2021 ->
+     * 00ce, 00b6 -> 00ce
+     */
+    private static final char[] lookup = {
+            0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
+            0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
+            0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
+            0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
+            0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
+            0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
+            0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+            0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
+            0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+            0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
+            0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
+            0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
+            0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+            0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
+            0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
+            0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
+            0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
+            0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
+            0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
+            0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192,
+            0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
+            0x00BF, 0x00AE, 0x00AA, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
+            0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0,
+            0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510,
+            0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3,
+            0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x2021, 0x00A4,
+            0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE,
+            0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580,
+            0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE,
+            0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4,
+            0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8,
+            0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0
+    };
+
+    public MabDisketteCharset() {
+        super("x-MAB-Diskette", BibliographicCharsetProvider.aliasesFor("x-MAB-Diskette"));
+        lookupTable = lookup;
+    }
+}
--- a/src/main/java/org/xbib/charset/Pica.java
+++ b/src/main/java/org/xbib/charset/Pica.java
@ -0,0 +1,228 @@
+/*
+ * Licensed to Jörg Prante and xbib under one or more contributor
+ * license agreements. See the NOTICE.txt file distributed with this work
+ * for additional information regarding copyright ownership.
+ *
+ * Copyright (C) 2016 Jörg Prante and xbib
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program; if not, see http://www.gnu.org/licenses
+ * or write to the Free Software Foundation, Inc., 51 Franklin Street,
+ * Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The interactive user interfaces in modified source and object code
+ * versions of this program must display Appropriate Legal Notices,
+ * as required under Section 5 of the GNU Affero General Public License.
+ *
+ * In accordance with Section 7(b) of the GNU Affero General Public
+ * License, these Appropriate Legal Notices must retain the display of the
+ * "Powered by xbib" logo. If the display of the logo is not reasonably
+ * feasible for technical reasons, the Appropriate Legal Notices must display
+ * the words "Powered by xbib".
+ */
+package org.xbib.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.StandardCharsets;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * PICA character set implementation.
+ *
+ * This character set is a modified version of the 'InterMARC' character set
+ * and contains 256 tokens.
+ *
+ * A description can be found at
+ * <a href="http://www.pica.nl/ne/docu/dn010/html/t07.shtml">the Pica website</a>.
+ */
+public class Pica extends Charset {
+
+    private static final Map<Character, Character> encodeMap = new HashMap<>();
+    private static final Map<Character, Character> decodeMap = new HashMap<>();
+
+    /*
+     * Pica character mapping for index subset \u00a0..\u00ff.
+     * Pica is equal to US-ASCII but not ISO-8859-1.
+     * These are the definitions for Pica characters
+     * which are different from ISO-8859-1.
+     */
+    static {
+        Pica.charTable(encodeMap, decodeMap, '\u00a0', '\u00ff',
+                new char[]{
+                        '\u00a0', '\u0141', '\u00d8', '\u0110', '\u00de', '\u00c6',
+                        '\u0152', '\u02b9', '\u00b7', '\u266d', '\u00ae', '\u00b1',
+                        '\u01a0', '\u01af', '\u02be', '\u00c5', '\u02bf', '\u0142',
+                        '\u00f8', '\u0111', '\u00fe', '\u00e6', '\u0153', '\u02ba',
+                        '\u0131', '\u00a3', '\u00f0', '\u03b1', '\u01a1', '\u01b0',
+                        '\u00df', '\u00e5', '\u0132', '\u00c4', '\u00d6', '\u00dc',
+                        '\u0186', '\u018e', '\u2260', '\u2192', '\u2264', '\u221e',
+                        '\u222b', '\u00d7', '\u00a7', '\u22a1', '\u21d4', '\u2265',
+                        '\u0133', '\u00e4', '\u00f6', '\u00fc', '\u0254', '\u0258',
+                        '\u00bf', '\u00a1', '\u03b2', '\u003f', '\u03b3', '\u03c0',
+                        '\u003f', '\u003f', '\u003f', '\u003f', '\u0341', '\u0300',
+                        '\u0301', '\u0302', '\u0303', '\u0304', '\u0306', '\u0307',
+                        '\u0308', '\u030c', '\u030a', '\ufe20', '\ufe21', '\u0315',
+                        '\u030b', '\u0310', '\u0327', '\u0000', '\u0323', '\u0324',
+                        '\u0325', '\u0333', '\u0332', '\u003f', '\u031c', '\u032e',
+                        '\ufe23', '\ufe22', '\u003f', '\u0000', '\u0313', '\u003f'
+                });
+    }
+
+    // Handle to the real charset we'll use for transcoding between
+    // characters and bytes.  Doing this allows applying the Pica
+    // charset to multi-byte charset encodings like UTF-8.
+    private Charset encodeCharset;
+
+    /**
+     * Constructor for the Pica charset.  Call the superclass
+     * constructor to pass along the name(s) we'll be known by.
+     * Then save a reference to the delegate Charset.
+     */
+    public Pica() {
+        super("PICA", BibliographicCharsetProvider.aliasesFor("PICA"));
+        encodeCharset = StandardCharsets.ISO_8859_1;
+    }
+
+    /**
+     * Fill the conversion tables.
+     */
+    private static void charTable(Map<Character, Character> encodeMap, Map<Character, Character> decodeMap, char from, char to,
+                                  char[] code) {
+        int i = 0;
+
+        for (char c = from; c <= to; c++) {
+            if (code[i] != '\u0000') {
+                encodeMap.put(code[i], c);
+                decodeMap.put(c, code[i]);
+            }
+
+            i++;
+        }
+    }
+
+    /**
+     * This method must be implemented by concrete Charsets.  We allow
+     * subclasses of the Pica charset.
+     */
+    public boolean contains(Charset charset) {
+        return charset instanceof Pica;
+    }
+
+    /**
+     * Called by users of this Charset to obtain an encoder.
+     * This implementation instantiates an instance of a private class
+     * (defined below) and passes it an encoder from the base Charset.
+     */
+    public CharsetEncoder newEncoder() {
+        return new PicaEncoder(this, encodeCharset.newEncoder());
+    }
+
+    /**
+     * Called by users of this Charset to obtain a decoder.
+     * This implementation instantiates an instance of a private class
+     * (defined below) and passes it a decoder from the base Charset.
+     */
+    public CharsetDecoder newDecoder() {
+        return new PicaDecoder(this, encodeCharset.newDecoder());
+    }
+
+    private static class PicaEncoder extends CharsetEncoder {
+
+        private CharsetEncoder baseEncoder;
+
+        /**
+         * Constructor, call the superclass constructor with the
+         * Charset object and the encodings sizes from the
+         * delegate encoder.
+         */
+        PicaEncoder(Charset cs, CharsetEncoder baseEncoder) {
+            super(cs, baseEncoder.averageBytesPerChar(),
+                    baseEncoder.maxBytesPerChar());
+            this.baseEncoder = baseEncoder;
+        }
+
+        /**
+         * Implementation of the encoding loop.  First, we apply
+         * the Pica charset mapping to the CharBuffer, then
+         * reset the encoder for the base Charset and call it's
+         * encode() method to do the actual encoding. The CharBuffer
+         * passed in may be read-only or re-used by the caller for
+         * other purposes so we duplicate it and apply the Pica
+         * encoding to the copy.  We do want to advance the position
+         * of the input buffer to reflect the chars consumed.
+         */
+        protected CoderResult encodeLoop(CharBuffer cb, ByteBuffer bb) {
+            CharBuffer tmpcb = CharBuffer.allocate(cb.remaining());
+            while (cb.hasRemaining()) {
+                tmpcb.put(cb.get());
+            }
+            tmpcb.rewind();
+            for (int pos = tmpcb.position(); pos < tmpcb.limit(); pos++) {
+                char c = tmpcb.get(pos);
+                Character mapChar = encodeMap.get(c);
+                if (mapChar != null) {
+                    tmpcb.put(pos, mapChar);
+                }
+            }
+            baseEncoder.reset();
+            CoderResult cr = baseEncoder.encode(tmpcb, bb, true);
+            // If error or output overflow, we need to adjust
+            // the position of the input buffer to match what
+            // was really consumed from the temp buffer.  If
+            // underflow (all input consumed) this is a no-op.
+            cb.position(cb.position() - tmpcb.remaining());
+            return cr;
+        }
+    }
+
+    /**
+     * The decoder implementation for the Pica Charset.
+     */
+    private static class PicaDecoder extends CharsetDecoder {
+
+        /**
+         * Constructor, call the superclass constructor with the
+         * Charset object and pass alon the chars/byte values
+         * from the delegate decoder.
+         */
+        PicaDecoder(Charset cs, CharsetDecoder baseDecoder) {
+            // base decoder only needed for size hints
+            super(cs, baseDecoder.averageCharsPerByte(),
+                    baseDecoder.maxCharsPerByte());
+        }
+
+        /**
+         * Implementation of the decoding loop.
+         */
+        protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
+            while (in.hasRemaining()) {
+                byte b = in.get();
+
+                if (!out.hasRemaining()) {
+                    in.position(in.position() - 1);
+                    return CoderResult.OVERFLOW;
+                }
+                char oldChar = (char) (b & 0xFF);
+                Character mapChar = decodeMap.get(oldChar);
+                out.put(mapChar != null ? mapChar : oldChar);
+            }
+            return CoderResult.UNDERFLOW;
+        }
+    }
+}
+
--- a/src/main/java/org/xbib/charset/PicaCharset.java
+++ b/src/main/java/org/xbib/charset/PicaCharset.java
@ -0,0 +1,315 @@
+/*
+ * Licensed to Jörg Prante and xbib under one or more contributor
+ * license agreements. See the NOTICE.txt file distributed with this work
+ * for additional information regarding copyright ownership.
+ *
+ * Copyright (C) 2016 Jörg Prante and xbib
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program; if not, see http://www.gnu.org/licenses
+ * or write to the Free Software Foundation, Inc., 51 Franklin Street,
+ * Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The interactive user interfaces in modified source and object code
+ * versions of this program must display Appropriate Legal Notices,
+ * as required under Section 5 of the GNU Affero General Public License.
+ *
+ * In accordance with Section 7(b) of the GNU Affero General Public
+ * License, these Appropriate Legal Notices must retain the display of the
+ * "Powered by xbib" logo. If the display of the logo is not reasonably
+ * feasible for technical reasons, the Appropriate Legal Notices must display
+ * the words "Powered by xbib".
+ */
+package org.xbib.charset;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * A Charset for the OCLC|PICA-character-encoding (x-PICA). It decodes
+ * x-PICA to Unicode and encodes Unicode to x-PICA.
+ */
+public class PicaCharset extends Charset {
+
+    private static final char[] BYTE_TO_CHAR_MAP = newPicaToUnicodeMap();
+
+    private static final Map<Character, Byte> CHAR_TO_BYTE_MAP = newCharToByteMap();
+
+    private boolean isNFCOutput;
+
+    public PicaCharset() {
+        this(true);
+    }
+
+    private PicaCharset(boolean isNFCOuput) {
+        super("x-PICA", null);
+        this.isNFCOutput = isNFCOuput;
+    }
+
+    private static char[] newPicaToUnicodeMap() {
+        char[] map = new char[256];
+        for (int i = 0; i < 128; i++) {
+            map[i] = (char) i;
+        }
+        /*
+         * DNB-internal definitions, needed for conversion from pica+ to mab2
+         */
+        map[0x80] = ISO5426.TEILFELDTRENNZEICHEN;
+        map[0x81] = ISO5426.NICHTSORTIERBEGINNZEICHEN;
+        map[0x82] = ISO5426.NICHTSORTIERENDEZEICHEN;
+        map[0x83] = '|'; // Füllzeichen
+        map[0x84] = 'u'; // Zeichencode
+        map[0x85] = 'z'; // Zeichenvorrat
+
+        /* L with stroke */
+        map[0xA1] = '\u0141';
+        /* O with stroke */
+        map[0xA2] = '\u00D8';
+        /* D with stroke */
+        map[0xA3] = '\u0110';
+        /* Capital thorn */
+        map[0xA4] = '\u00DE';
+        /* Capital ligature AE */
+        map[0xA5] = '\u00C6';
+        /* Capital ligature OE */
+        map[0xA6] = '\u0152';
+        /* Modifier letter prime */
+        map[0xA7] = '\u02B9';
+        /* Middle dot */
+        map[0xA8] = '\u00B7';
+        /* MUSIC FLAT SIGN */
+        map[0xA9] = '\u266D';
+        /* Registered sign */
+        map[0xAA] = '\u00AE';
+        /* Plus-minus sign */
+        map[0xAB] = '\u00B1';
+        /* Capital letter O with horn */
+        map[0xAC] = '\u01A0';
+        /* Capital letter U with horn */
+        map[0xAD] = '\u01AF';
+        /* Modifier letter apostrophe */
+        map[0xAE] = '\u02BC';
+        /* LATIN CAPITAL LETTER A WITH RING ABOVE */
+        map[0xAF] = '\u00C5';
+        /* Modifier letter turned comma */
+        map[0xB0] = '\u02BB';
+        /* Latin small letter l with stroke */
+        map[0xB1] = '\u0142';
+        /* Latin small letter o with stroke */
+        map[0xB2] = '\u00F8';
+        /* Latin small letter d with stroke */
+        map[0xB3] = '\u0111';
+        /* Latin small letter thorn */
+        map[0xB4] = '\u00FE';
+        /* Latin small ligature ae */
+        map[0xB5] = '\u00E6';
+        /* Latin small ligature oe */
+        map[0xB6] = '\u0153';
+        /* modifier letter double prime */
+        map[0xB7] = '\u02BA';
+        /* latin small letter dotless i */
+        map[0xB8] = '\u0131';
+        /* pound sign */
+        map[0xB9] = '\u00A3';
+        /* latin small letter eth */
+        map[0xBA] = '\u00F0';
+        /* greek small letter alpha */
+        map[0xBB] = '\u03B1';
+        /* latin small letter o with horn */
+        map[0xBC] = '\u01A1';
+        /* latin small letter u with horn */
+        map[0xBD] = '\u01B0';
+        /* latin small letter sharp s */
+        map[0xBE] = '\u00DF';
+        /* LATIN SMALL LETTER A WITH RING ABOVE */
+        map[0xBF] = '\u00E5';
+        /* Latin capital ligature IJ */
+        map[0xC0] = '\u0132';
+        /* Ä */
+        map[0xC1] = '\u00C4';
+        /* Ö */
+        map[0xC2] = '\u00D6';
+        /* Ü */
+        map[0xC3] = '\u00DC';
+        /* LATIN CAPITAL LETTER OPEN O */
+        map[0xC4] = '\u0186';
+        /* latin capital letter reversed E */
+        map[0xC5] = '\u018E';
+        /* NOT EQUAL TO */
+        map[0xC6] = '\u2260';
+        /* RIGHTWARDS ARROW */
+        map[0xC7] = '\u2192';
+        /* LESS-THAN OR EQUAL TO */
+        map[0xC8] = '\u2264';
+        /* INFINITY */
+        map[0xC9] = '\u221E';
+        /* INTEGRAL */
+        map[0xCA] = '\u222B';
+        /* Multiplication sign */
+        map[0xCB] = '\u00D7';
+        /* Section sign */
+        map[0xCC] = '\u00A7';
+        /* SQUARE ROOT */
+        map[0xCD] = '\u221A';
+        /* GREATER-THAN OR LESS-THAN */
+        map[0xCE] = '\u2277';
+        /* GREATER-THAN OR EQUAL TO */
+        map[0xCF] = '\u2265';
+        /* Latin small ligature ij */
+        map[0xD0] = '\u0133';
+        /* ä */
+        map[0xD1] = '\u00E4';
+        /* ö */
+        map[0xD2] = '\u00F6';
+        /* ü */
+        map[0xD3] = '\u00FC';
+        /* LATIN SMALL LETTER OPEN O */
+        map[0xD4] = '\u0254';
+        /* Latin small letter reversed e */
+        map[0xD5] = '\u01DD';
+        /* inverted question mark */
+        map[0xD6] = '\u00BF';
+        /* inverted exclamation mark */
+        map[0xD7] = '\u00A1';
+        /* Greek small letter beta */
+        map[0xD8] = '\u03B2';
+        /* Greek small letter gamma */
+        map[0xDA] = '\u03B3';
+        /* Greek capital letter pi */
+        map[0xDB] = '\u03C0';
+        /* Combining hook above */
+        map[0xE0] = '\u0309';
+        /* COMBINING GRAVE ACCENT */
+        map[0xE1] = '\u0300';
+        /* COMBINING ACUTE ACCENT */
+        map[0xE2] = '\u0301';
+        /* COMBINING CIRCUMFLEX ACCENT */
+        map[0xE3] = '\u0302';
+        /* COMBINING TILDE */
+        map[0xE4] = '\u0303';
+        /* Macron */
+        map[0xE5] = '\u0304';
+        /* Combining breve */
+        map[0xE6] = '\u0306';
+        /* Combining dot above */
+        map[0xE7] = '\u0307';
+        /* COMBINING DIAERESIS */
+        map[0xE8] = '\u0308';
+        /* Combining caron */
+        map[0xE9] = '\u030C';
+        /* Combining ring above */
+        map[0xEA] = '\u030A';
+        /* COMBINING LIGATURE LEFT HALF */
+        map[0xEB] = '\uFE20';
+        /* COMBINING LIGATURE RIGHT HALF */
+        map[0xEC] = '\uFE21';
+        /* combining comma above */
+        map[0xED] = '\u0313';
+        /* combining double acute accent */
+        map[0xEE] = '\u030B';
+        /* combining candrabindu */
+        map[0xEF] = '\u0310';
+        /* Combining cedilla */
+        map[0xF0] = '\u0327';
+        /* Combining dot below */
+        map[0xF2] = '\u0323';
+        /* Combining diaeresis below */
+        map[0xF3] = '\u0324';
+        /* Combining ring below */
+        map[0xF4] = '\u0325';
+        /* Combining double low line */
+        map[0xF5] = '\u0333';
+        /* Combining macron below */
+        map[0xF6] = '\u0331';
+        /* Combining ogonek */
+        map[0xF8] = '\u0328';
+        /* Combining breve below */
+        map[0xF9] = '\u032E';
+        /* Combining DOUBLE TILDE RIGHT HALF */
+        map[0xFA] = '\uFE23';
+        /* Combining DOUBLE TILDE LEFT HALF */
+        map[0xFB] = '\uFE22';
+        /* Combining comma above right */
+        map[0xFE] = '\u0315';
+        return map;
+    }
+
+    private static Map<Character, Byte> newCharToByteMap() {
+        char[] byteToCharMap = BYTE_TO_CHAR_MAP;
+        byteToCharMap[0x80] = 0;
+        byteToCharMap[0x81] = 0;
+        byteToCharMap[0x82] = 0;
+        byteToCharMap[0x83] = 0;
+        byteToCharMap[0x84] = 0;
+        byteToCharMap[0x85] = 0;
+        Map<Character, Byte> ret = new HashMap<>(byteToCharMap.length);
+        for (int i = 0; i < byteToCharMap.length; i++) {
+            if (byteToCharMap[i] != 0) {
+                ret.put(byteToCharMap[i], (byte) i);
+            }
+        }
+        return ret;
+    }
+
+    @Override
+    public boolean contains(Charset cs) {
+        return false;
+    }
+
+    @Override
+    public CharsetDecoder newDecoder() {
+        PicaDecoder ret = new PicaDecoder(this);
+        ret.setComposeCharactersAfterConversion(isNFCOutput);
+        return ret;
+    }
+
+    @Override
+    public CharsetEncoder newEncoder() {
+        return new PicaEncoder(this);
+    }
+
+    private static class PicaDecoder extends SingleByteDecoder {
+
+        PicaDecoder(Charset cs) {
+            super(cs);
+        }
+
+        @Override
+        public char byteToChar(byte b) {
+            return BYTE_TO_CHAR_MAP[b & 0xFF];
+        }
+
+        @Override
+        public boolean isCombiningCharacter(byte b) {
+            return (b & 0xFF) >= 0xE0 && (b & 0xFF) <= 0xFE;
+        }
+    }
+
+    private static class PicaEncoder extends SingleByteEncoder {
+
+        PicaEncoder(Charset cs) {
+            super(cs);
+        }
+
+        @Override
+        public byte charToByte(char c) {
+            Byte b = CHAR_TO_BYTE_MAP.get(c);
+            if (b == null) {
+                return 0;
+            }
+            return b;
+        }
+    }
+}
--- a/src/main/java/org/xbib/charset/SimpleAnselCharset.java
+++ b/src/main/java/org/xbib/charset/SimpleAnselCharset.java
@ -0,0 +1,264 @@
+/*
+ * Licensed to Jörg Prante and xbib under one or more contributor
+ * license agreements. See the NOTICE.txt file distributed with this work
+ * for additional information regarding copyright ownership.
+ *
+ * Copyright (C) 2016 Jörg Prante and xbib
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program; if not, see http://www.gnu.org/licenses
+ * or write to the Free Software Foundation, Inc., 51 Franklin Street,
+ * Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The interactive user interfaces in modified source and object code
+ * versions of this program must display Appropriate Legal Notices,
+ * as required under Section 5 of the GNU Affero General Public License.
+ *
+ * In accordance with Section 7(b) of the GNU Affero General Public
+ * License, these Appropriate Legal Notices must retain the display of the
+ * "Powered by xbib" logo. If the display of the logo is not reasonably
+ * feasible for technical reasons, the Appropriate Legal Notices must display
+ * the words "Powered by xbib".
+ */
+package org.xbib.charset;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.StandardCharsets;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * This is a simplified version of "ANSEL charset" at http://anselcharset.sourceforge.net/
+ * by Piotr Andzel.
+ * Original code licensed under LGPL http://www.gnu.org/licenses/lgpl.html
+ */
+public class SimpleAnselCharset extends Charset {
+
+    private final Map<Character, byte[]> mapping;
+    private final Map<Byte, ReverseMappingEntity> reverseMapping;
+
+    public SimpleAnselCharset() {
+        super("SIMPLE_ANSEL", BibliographicCharsetProvider.aliasesFor("SIMPLE_ANSEL"));
+        mapping = createMapping(getClass().getResourceAsStream("ansel-mapping.txt"));
+        reverseMapping = createReverseMapping(mapping);
+    }
+
+    private static Map<Character, byte[]> createMapping(InputStream mappingStream) {
+        Map<Character, byte[]> mapping = new HashMap<>();
+        try (BufferedReader reader = new BufferedReader(new InputStreamReader(mappingStream, StandardCharsets.UTF_8))) {
+            String line;
+            while ((line = reader.readLine()) != null) {
+                int i = line.indexOf(";");
+                if (i < 0) {
+                    i = line.indexOf("#");
+                }
+                if (i >= 0) {
+                    line = line.substring(0, i);
+                }
+                String[] kvp = line.split("=");
+                if (kvp.length == 2) {
+                    String uni = kvp[0];
+                    String ans = kvp[1];
+                    Character uniCode = (char) Integer.parseInt(uni.replaceFirst("^[uU]", ""), 16);
+                    String[] ansSeq = ans.split(" ");
+                    byte[] ansCodes = new byte[ansSeq.length];
+                    for (int j = 0; j < ansSeq.length; j++) {
+                        ansCodes[j] = (byte) (Integer.parseInt(ansSeq[j].replaceFirst("^0[xX]", ""), 16) & 0xFF);
+                    }
+                    mapping.put(uniCode, ansCodes);
+                }
+            }
+        } catch (IOException e) {
+            Logger.getLogger(SimpleAnselCharset.class.getName()).log(Level.WARNING, e.getMessage(), e);
+        }
+        return mapping;
+    }
+
+    private static Map<Byte, ReverseMappingEntity> createReverseMapping(Map<Character, byte[]> mapping) {
+        Map<Byte, ReverseMappingEntity> rev = new TreeMap<>();
+        for (Map.Entry<Character, byte[]> e : mapping.entrySet()) {
+            Map<Byte, ReverseMappingEntity> ptr = rev;
+            Character ch = e.getKey();
+            for (int i = 0; i < e.getValue().length; i++) {
+                Byte b = e.getValue()[i];
+                ReverseMappingEntity ent = ptr.get(b);
+                if (ent == null) {
+                    ent = new ReverseMappingEntity();
+                    ptr.put(b, ent);
+                }
+                if (i + 1 == e.getValue().length) {
+                    ent.setCharacter(ch);
+                } else {
+                    ptr = ent.getMapping();
+                }
+            }
+        }
+        return rev;
+    }
+
+    @Override
+    public boolean canEncode() {
+        return true;
+    }
+
+    @Override
+    public CharsetDecoder newDecoder() {
+        return new Decoder(this);
+    }
+
+    @Override
+    public CharsetEncoder newEncoder() {
+        return new Encoder(this);
+    }
+
+    @Override
+    public boolean contains(Charset cs) {
+        return displayName().equals(cs.displayName());
+    }
+
+    private static class ReverseMappingEntity {
+        private TreeMap<Byte, ReverseMappingEntity> mapping = new TreeMap<>();
+        private Character character;
+
+        public Character getCharacter() {
+            return character;
+        }
+
+        public void setCharacter(Character ch) {
+            this.character = ch;
+        }
+
+        public Map<Byte, ReverseMappingEntity> getMapping() {
+            return mapping;
+        }
+    }
+
+    private class Decoder extends CharsetDecoder {
+        private LinkedList<Byte> buffer = new LinkedList<>();
+
+        Decoder(Charset charset) {
+            super(charset, 2.2f, 3.0f);
+        }
+
+        @Override
+        protected CoderResult decodeLoop(final ByteBuffer in, CharBuffer out) {
+            ReverseMappingBuffer rmb = new ReverseMappingBuffer(reverseMapping, buffer) {
+                @Override
+                protected Byte onNextByte() {
+                    return in.hasRemaining() ? in.get() : null;
+                }
+            };
+            while (in.hasRemaining() || rmb.hasRemaining()) {
+                if (out.hasRemaining()) {
+                    Character ch = rmb.nextCharacter();
+                    out.append(ch);
+                } else {
+                    return CoderResult.OVERFLOW;
+                }
+            }
+            return CoderResult.UNDERFLOW;
+        }
+    }
+
+    private class Encoder extends CharsetEncoder {
+
+        Encoder(Charset charset) {
+            super(charset, 2.2f, 3.0f);
+        }
+
+        @Override
+        protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
+            while (in.hasRemaining()) {
+                if (out.hasRemaining()) {
+                    char unicode = in.get();
+                    byte[] ansel;
+                    if (unicode <= 0x7f) {
+                        ansel = new byte[2];
+                        ansel[0] = (byte) ((unicode >> 8) & 0xff);
+                        ansel[1] = (byte) ((unicode) & 0xff);
+                    } else {
+                        ansel = mapping.get(unicode);
+                        if (ansel == null) {
+                            return CoderResult.unmappableForLength(2);
+                        }
+                    }
+                    boolean started = false;
+                    for (int i = 0; i < ansel.length; i++) {
+                        if (started || ansel[i] != 0 || i == ansel.length - 1) {
+                            out.put(ansel[i]);
+                            started = true;
+                        }
+                    }
+                } else {
+                    return CoderResult.OVERFLOW;
+                }
+            }
+            return CoderResult.UNDERFLOW;
+        }
+    }
+
+    abstract class ReverseMappingBuffer {
+        private Map<Byte, ReverseMappingEntity> rm;
+        private LinkedList<Byte> buffer;
+
+        ReverseMappingBuffer(Map<Byte, ReverseMappingEntity> rm, LinkedList<Byte> buffer) {
+            this.rm = rm;
+            this.buffer = buffer;
+        }
+
+        boolean hasRemaining() {
+            return !buffer.isEmpty();
+        }
+
+        Character nextCharacter() {
+            LinkedList<Byte> queue = new LinkedList<>();
+            ReverseMappingEntity rme = null;
+            Character ch = null;
+            for (Byte b = nextByte(); b != null; b = nextByte()) {
+                queue.addLast(b);
+                rme = rme != null ? rme.getMapping().get(b) : rm.get(b);
+                if (rme == null) {
+                    buffer.addAll(queue);
+                    return ch != null ? ch : Character.valueOf((char) nextByte().byteValue());
+                }
+                if (rme.getCharacter() != null) {
+                    ch = rme.getCharacter();
+                    queue.clear();
+                }
+            }
+            return ch;
+        }
+
+        protected abstract Byte onNextByte();
+
+        private Byte nextByte() {
+            if (!buffer.isEmpty()) {
+                return buffer.pollFirst();
+            } else {
+                return onNextByte();
+            }
+        }
+    }
+}
--- a/src/main/java/org/xbib/charset/SingleByteDecoder.java
+++ b/src/main/java/org/xbib/charset/SingleByteDecoder.java
@ -0,0 +1,98 @@
+/*
+ * Licensed to Jörg Prante and xbib under one or more contributor
+ * license agreements. See the NOTICE.txt file distributed with this work
+ * for additional information regarding copyright ownership.
+ *
+ * Copyright (C) 2016 Jörg Prante and xbib
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program; if not, see http://www.gnu.org/licenses
+ * or write to the Free Software Foundation, Inc., 51 Franklin Street,
+ * Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The interactive user interfaces in modified source and object code
+ * versions of this program must display Appropriate Legal Notices,
+ * as required under Section 5 of the GNU Affero General Public License.
+ *
+ * In accordance with Section 7(b) of the GNU Affero General Public
+ * License, these Appropriate Legal Notices must retain the display of the
+ * "Powered by xbib" logo. If the display of the logo is not reasonably
+ * feasible for technical reasons, the Appropriate Legal Notices must display
+ * the words "Powered by xbib".
+ */
+package org.xbib.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
+import java.text.Normalizer;
+
+abstract class SingleByteDecoder extends CharsetDecoder {
+
+    private boolean composeCharactersAfterConversion = true;
+
+    SingleByteDecoder(Charset cs) {
+        super(cs, 1.0f, 1.0f);
+    }
+
+    /**
+     * @param composeCharactersAfterConversion The composeCharactersAfterConversion to set.
+     */
+    void setComposeCharactersAfterConversion(boolean composeCharactersAfterConversion) {
+        this.composeCharactersAfterConversion = composeCharactersAfterConversion;
+    }
+
+    @Override
+    protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
+        ByteBuffer inputBuffer = ByteBuffer.allocate(30);
+        while (in.hasRemaining()) {
+            byte c = in.get();
+            inputBuffer.put(c);
+            StringBuilder convertedInputBuffer = null;
+            if (!isCombiningCharacter(c)) {
+                convertedInputBuffer = new StringBuilder();
+                for (int i = inputBuffer.position() - 1; i >= 0; i--) {
+                    char convertedCharacter = byteToChar(inputBuffer.get(i));
+                    String convertedCharacterAsString;
+                    if (convertedCharacter == 0) {
+                        convertedCharacterAsString = replacement();
+                    } else {
+                        convertedCharacterAsString = String
+                                .valueOf(convertedCharacter);
+                    }
+
+                    convertedInputBuffer.append(convertedCharacterAsString);
+                }
+                if (composeCharactersAfterConversion) {
+                    convertedInputBuffer =
+                            new StringBuilder(Normalizer.normalize(convertedInputBuffer.toString(), Normalizer.Form.NFC));
+                }
+            }
+            if (convertedInputBuffer != null) {
+                if (out.remaining() < convertedInputBuffer.length()) {
+                    in.position(in.position() - inputBuffer.position());
+                    return CoderResult.OVERFLOW;
+                }
+                out.append(convertedInputBuffer);
+                inputBuffer.clear();
+            }
+        }
+        return CoderResult.UNDERFLOW;
+    }
+
+    public abstract boolean isCombiningCharacter(byte c);
+
+    public abstract char byteToChar(byte b);
+
+}
--- a/src/main/java/org/xbib/charset/SingleByteEncoder.java
+++ b/src/main/java/org/xbib/charset/SingleByteEncoder.java
@ -0,0 +1,103 @@
+/*
+ * Licensed to Jörg Prante and xbib under one or more contributor
+ * license agreements. See the NOTICE.txt file distributed with this work
+ * for additional information regarding copyright ownership.
+ *
+ * Copyright (C) 2016 Jörg Prante and xbib
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program; if not, see http://www.gnu.org/licenses
+ * or write to the Free Software Foundation, Inc., 51 Franklin Street,
+ * Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The interactive user interfaces in modified source and object code
+ * versions of this program must display Appropriate Legal Notices,
+ * as required under Section 5 of the GNU Affero General Public License.
+ *
+ * In accordance with Section 7(b) of the GNU Affero General Public
+ * License, these Appropriate Legal Notices must retain the display of the
+ * "Powered by xbib" logo. If the display of the logo is not reasonably
+ * feasible for technical reasons, the Appropriate Legal Notices must display
+ * the words "Powered by xbib".
+ */
+package org.xbib.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.text.Normalizer;
+
+abstract class SingleByteEncoder extends CharsetEncoder {
+
+    private boolean decomposeCharactersBeforeConversion = true;
+
+    SingleByteEncoder(Charset cs) {
+        super(cs, 1.0f, 1.0f);
+    }
+
+    /**
+     * @param decomposeCharactersBeforeConversion The decomposeCharactersBeforeConversion to set.
+     */
+    public void setDecomposeCharactersBeforeConversion(boolean decomposeCharactersBeforeConversion) {
+        this.decomposeCharactersBeforeConversion = decomposeCharactersBeforeConversion;
+    }
+
+    @Override
+    protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
+        CharBuffer inputBuffer = CharBuffer.allocate(30);
+        while (in.hasRemaining()) {
+            char c = in.get();
+            String charAsString;
+            if (decomposeCharactersBeforeConversion) {
+                charAsString = Normalizer.normalize(String.valueOf(c), Normalizer.Form.NFD);
+            } else {
+                charAsString = String.valueOf(c);
+            }
+            if (out.remaining() < inputBuffer.position() + charAsString.length()) {
+                in.position(in.position() - inputBuffer.position() - 1);
+                return CoderResult.OVERFLOW;
+            }
+            if (inputBuffer.position() > 0 && !isCombiningCharacter(c)) {
+                for (int i = inputBuffer.position() - 1; i >= 0; i--) {
+                    convert(inputBuffer.get(i), out);
+                }
+                inputBuffer.clear();
+            }
+            inputBuffer.append(charAsString);
+        }
+        if (inputBuffer.position() == 1) {
+            convert(inputBuffer.get(), out);
+        } else if (inputBuffer.position() > 1) {
+            for (int i = inputBuffer.position() - 1; i >= 0; i--) {
+                convert(inputBuffer.get(i), out);
+            }
+        }
+        return CoderResult.UNDERFLOW;
+    }
+
+    public abstract byte charToByte(char c);
+
+    public boolean isCombiningCharacter(char c) {
+        return c >= '\u0300' && c <= '\u036F';
+    }
+
+    private void convert(char c, ByteBuffer out) {
+        byte b = charToByte(c);
+        if (b != 0) {
+            out.put(b);
+        } else {
+            out.put(replacement());
+        }
+    }
+}
--- a/src/main/java/org/xbib/charset/package-info.java
+++ b/src/main/java/org/xbib/charset/package-info.java
@ -0,0 +1,4 @@
+/**
+ * Bibliographic character set implementations.
+ */
+package org.xbib.charset;
--- a/src/main/resources/META-INF/services/java.nio.charset.spi.CharsetProvider
+++ b/src/main/resources/META-INF/services/java.nio.charset.spi.CharsetProvider
@ -0,0 +1 @@
+org.xbib.charset.BibliographicCharsetProvider
--- a/src/main/resources/org/xbib/charset/ansel-mapping.txt
+++ b/src/main/resources/org/xbib/charset/ansel-mapping.txt
@ -0,0 +1,598 @@
+U001b=0x1b
+U001d=0x1d
+U001e=0x1e
+U001f=0x1f
+U0020=0x20
+U0021=0x21
+U0022=0x22
+U0023=0x23
+U0024=0x24
+U0025=0x25
+U0026=0x26
+U0027=0x27
+U0028=0x28
+U0029=0x29
+U002a=0x2a
+U002b=0x2b
+U002c=0x2c
+U002d=0x2d
+U002e=0x2e
+U002f=0x2f
+U0030=0x30
+U0031=0x31
+U0032=0x32
+U0033=0x33
+U0034=0x34
+U0035=0x35
+U0036=0x36
+U0037=0x37
+U0038=0x38
+U0039=0x39
+U003a=0x3a
+U003b=0x3b
+U003c=0x3c
+U003d=0x3d
+U003e=0x3e
+U003f=0x3f
+U0040=0x40
+U0041=0x41
+U0042=0x42
+U0043=0x43
+U0044=0x44
+U0045=0x45
+U0046=0x46
+U0047=0x47
+U0048=0x48
+U0049=0x49
+U004a=0x4a
+U004b=0x4b
+U004c=0x4c
+U004d=0x4d
+U004e=0x4e
+U004f=0x4f
+U0050=0x50
+U0051=0x51
+U0052=0x52
+U0053=0x53
+U0054=0x54
+U0055=0x55
+U0056=0x56
+U0057=0x57
+U0058=0x58
+U0059=0x59
+U005a=0x5a
+U005b=0x5b
+U005c=0x5c
+U005d=0x5d
+U005e=0x5e
+U005f=0x5f
+U0060=0x60
+U0061=0x61
+U0062=0x62
+U0063=0x63
+U0064=0x64
+U0065=0x65
+U0066=0x66
+U0067=0x67
+U0068=0x68
+U0069=0x69
+U006a=0x6a
+U006b=0x6b
+U006c=0x6c
+U006d=0x6d
+U006e=0x6e
+U006f=0x6f
+U0070=0x70
+U0071=0x71
+U0072=0x72
+U0073=0x73
+U0074=0x74
+U0075=0x75
+U0076=0x76
+U0077=0x77
+U0078=0x78
+U0079=0x79
+U007a=0x7a
+U007b=0x7b
+U007c=0x7c
+U007d=0x7d
+U007e=0x7e
+U0098=0x88
+U009c=0x89
+U200d=0x8d
+U200c=0x8e
+U0141=0xa1
+U00d8=0xa2
+U0110=0xa3
+U00de=0xa4
+U00c6=0xa5
+U0152=0xa6
+U02b9=0xa7
+U00b7=0xa8
+U266d=0xa9
+U00ae=0xaa
+U00b1=0xab
+U01a0=0xac
+U01af=0xad
+U02bc=0xae
+U02bb=0xb0
+U0142=0xb1
+U00f8=0xb2
+U0111=0xb3
+U00fe=0xb4
+U00e6=0xb5
+U0153=0xb6
+U02ba=0xb7
+U0131=0xb8
+U00a3=0xb9
+U00f0=0xba
+U01a1=0xbc
+U01b0=0xbd
+U00b0=0xc0
+U2113=0xc1
+U2117=0xc2
+U00a9=0xc3
+U266f=0xc4
+U00bf=0xc5
+U00a1=0xc6
+U00df=0xc7
+U20ac=0xc8
+U0309=0xe0
+U1ea2=0xe0 0x41
+U1eba=0xe0 0x45
+U1ec8=0xe0 0x49
+U1ece=0xe0 0x4f
+U1ee6=0xe0 0x55
+U1ef6=0xe0 0x59
+U1ea3=0xe0 0x61
+U1ebb=0xe0 0x65
+U1ec9=0xe0 0x69
+U1ecf=0xe0 0x6f
+U1ee7=0xe0 0x75
+U1ef7=0xe0 0x79
+U1ede=0xe0 0xac
+U1eec=0xe0 0xad
+U1edf=0xe0 0xbc
+U1eed=0xe0 0xbd
+U1ea8=0xe0 0xe3 0x41
+U1ec2=0xe0 0xe3 0x45
+U1ed4=0xe0 0xe3 0x4f
+U1ea9=0xe0 0xe3 0x61
+U1ec3=0xe0 0xe3 0x65
+U1ed5=0xe0 0xe3 0x6f
+U1eb2=0xe0 0xe6 0x41
+U1eb3=0xe0 0xe6 0x61
+U0300=0xe1
+U00c0=0xe1 0x41
+U00c8=0xe1 0x45
+U00cc=0xe1 0x49
+U01f8=0xe1 0x4e
+U00d2=0xe1 0x4f
+U00d9=0xe1 0x55
+U1e80=0xe1 0x57
+U1ef2=0xe1 0x59
+U00e0=0xe1 0x61
+U00e8=0xe1 0x65
+U00ec=0xe1 0x69
+U01f9=0xe1 0x6e
+U00f2=0xe1 0x6f
+U00f9=0xe1 0x75
+U1e81=0xe1 0x77
+U1ef3=0xe1 0x79
+U1edc=0xe1 0xac
+U1eea=0xe1 0xad
+U1edd=0xe1 0xbc
+U1eeb=0xe1 0xbd
+U1ea6=0xe1 0xe3 0x41
+U1ec0=0xe1 0xe3 0x45
+U1ed2=0xe1 0xe3 0x4f
+U1ea7=0xe1 0xe3 0x61
+U1ec1=0xe1 0xe3 0x65
+U1ed3=0xe1 0xe3 0x6f
+U1e14=0xe1 0xe5 0x45
+U1e50=0xe1 0xe5 0x4f
+U1e15=0xe1 0xe5 0x65
+U1e51=0xe1 0xe5 0x6f
+U1eb0=0xe1 0xe6 0x41
+U1eb1=0xe1 0xe6 0x61
+U01db=0xe1 0xe8 0x55
+U01dc=0xe1 0xe8 0x75
+U0301=0xe2
+U00c1=0xe2 0x41
+U0106=0xe2 0x43
+U00c9=0xe2 0x45
+U01f4=0xe2 0x47
+U00cd=0xe2 0x49
+U1e30=0xe2 0x4b
+U0139=0xe2 0x4c
+U1e3e=0xe2 0x4d
+U0143=0xe2 0x4e
+U00d3=0xe2 0x4f
+U1e54=0xe2 0x50
+U0154=0xe2 0x52
+U015a=0xe2 0x53
+U00da=0xe2 0x55
+U1e82=0xe2 0x57
+U00dd=0xe2 0x59
+U0179=0xe2 0x5a
+U00e1=0xe2 0x61
+U0107=0xe2 0x63
+U00e9=0xe2 0x65
+U01f5=0xe2 0x67
+U00ed=0xe2 0x69
+U1e31=0xe2 0x6b
+U013a=0xe2 0x6c
+U1e3f=0xe2 0x6d
+U0144=0xe2 0x6e
+U00f3=0xe2 0x6f
+U1e55=0xe2 0x70
+U0155=0xe2 0x72
+U015b=0xe2 0x73
+U00fa=0xe2 0x75
+U1e83=0xe2 0x77
+U00fd=0xe2 0x79
+U017a=0xe2 0x7a
+U01fe=0xe2 0xa2
+U01fc=0xe2 0xa5
+U1eda=0xe2 0xac
+U1ee8=0xe2 0xad
+U01ff=0xe2 0xb2
+U01fd=0xe2 0xb5
+U1edb=0xe2 0xbc
+U1ee9=0xe2 0xbd
+U1ea4=0xe2 0xe3 0x41
+U1ebe=0xe2 0xe3 0x45
+U1ed0=0xe2 0xe3 0x4f
+U1ea5=0xe2 0xe3 0x61
+U1ebf=0xe2 0xe3 0x65
+U1ed1=0xe2 0xe3 0x6f
+U1e4c=0xe2 0xe4 0x4f
+U1e78=0xe2 0xe4 0x55
+U1e4d=0xe2 0xe4 0x6f
+U1e79=0xe2 0xe4 0x75
+U1e16=0xe2 0xe5 0x45
+U1e52=0xe2 0xe5 0x4f
+U1e17=0xe2 0xe5 0x65
+U1e53=0xe2 0xe5 0x6f
+U1eae=0xe2 0xe6 0x41
+U1eaf=0xe2 0xe6 0x61
+U0344=0xe2 0xe8
+U1e2e=0xe2 0xe8 0x49
+U01d7=0xe2 0xe8 0x55
+U1e2f=0xe2 0xe8 0x69
+U01d8=0xe2 0xe8 0x75
+U01fa=0xe2 0xea 0x41
+U01fb=0xe2 0xea 0x61
+U1e08=0xe2 0xf0 0x43
+U1e09=0xe2 0xf0 0x63
+U0302=0xe3
+U00c2=0xe3 0x41
+U0108=0xe3 0x43
+U00ca=0xe3 0x45
+U011c=0xe3 0x47
+U0124=0xe3 0x48
+U00ce=0xe3 0x49
+U0134=0xe3 0x4a
+U00d4=0xe3 0x4f
+U015c=0xe3 0x53
+U00db=0xe3 0x55
+U0174=0xe3 0x57
+U0176=0xe3 0x59
+U1e90=0xe3 0x5a
+U00e2=0xe3 0x61
+U0109=0xe3 0x63
+U00ea=0xe3 0x65
+U011d=0xe3 0x67
+U0125=0xe3 0x68
+U00ee=0xe3 0x69
+U0135=0xe3 0x6a
+U00f4=0xe3 0x6f
+U015d=0xe3 0x73
+U00fb=0xe3 0x75
+U0175=0xe3 0x77
+U0177=0xe3 0x79
+U1e91=0xe3 0x7a
+U1eac=0xe3 0xf2 0x41
+U1ec6=0xe3 0xf2 0x45
+U1ed8=0xe3 0xf2 0x4f
+U1ead=0xe3 0xf2 0x61
+U1ec7=0xe3 0xf2 0x65
+U1ed9=0xe3 0xf2 0x6f
+U0303=0xe4
+U00c3=0xe4 0x41
+U1ebc=0xe4 0x45
+U0128=0xe4 0x49
+U00d1=0xe4 0x4e
+U00d5=0xe4 0x4f
+U0168=0xe4 0x55
+U1e7c=0xe4 0x56
+U1ef8=0xe4 0x59
+U00e3=0xe4 0x61
+U1ebd=0xe4 0x65
+U0129=0xe4 0x69
+U00f1=0xe4 0x6e
+U00f5=0xe4 0x6f
+U0169=0xe4 0x75
+U1e7d=0xe4 0x76
+U1ef9=0xe4 0x79
+U1ee0=0xe4 0xac
+U1eee=0xe4 0xad
+U1ee1=0xe4 0xbc
+U1eef=0xe4 0xbd
+U1eaa=0xe4 0xe3 0x41
+U1ec4=0xe4 0xe3 0x45
+U1ed6=0xe4 0xe3 0x4f
+U1eab=0xe4 0xe3 0x61
+U1ec5=0xe4 0xe3 0x65
+U1ed7=0xe4 0xe3 0x6f
+U1eb4=0xe4 0xe6 0x41
+U1eb5=0xe4 0xe6 0x61
+U0304=0xe5
+U0100=0xe5 0x41
+U0112=0xe5 0x45
+U1e20=0xe5 0x47
+U012a=0xe5 0x49
+U014c=0xe5 0x4f
+U016a=0xe5 0x55
+U0232=0xe5 0x59
+U0101=0xe5 0x61
+U0113=0xe5 0x65
+U1e21=0xe5 0x67
+U012b=0xe5 0x69
+U014d=0xe5 0x6f
+U016b=0xe5 0x75
+U0233=0xe5 0x79
+U01e2=0xe5 0xa5
+U01e3=0xe5 0xb5
+U022c=0xe5 0xe4 0x4f
+U022d=0xe5 0xe4 0x6f
+U01e0=0xe5 0xe7 0x41
+U0230=0xe5 0xe7 0x4f
+U01e1=0xe5 0xe7 0x61
+U0231=0xe5 0xe7 0x6f
+U01de=0xe5 0xe8 0x41
+U022a=0xe5 0xe8 0x4f
+U01d5=0xe5 0xe8 0x55
+U01df=0xe5 0xe8 0x61
+U022b=0xe5 0xe8 0x6f
+U01d6=0xe5 0xe8 0x75
+U01ec=0xe5 0xf1 0x4f
+U01ed=0xe5 0xf1 0x6f
+U1e38=0xe5 0xf2 0x4c
+U1e5c=0xe5 0xf2 0x52
+U1e39=0xe5 0xf2 0x6c
+U1e5d=0xe5 0xf2 0x72
+U0306=0xe6
+U0102=0xe6 0x41
+U0114=0xe6 0x45
+U011e=0xe6 0x47
+U012c=0xe6 0x49
+U014e=0xe6 0x4f
+U016c=0xe6 0x55
+U0103=0xe6 0x61
+U0115=0xe6 0x65
+U011f=0xe6 0x67
+U012d=0xe6 0x69
+U014f=0xe6 0x6f
+U016d=0xe6 0x75
+U1e1c=0xe6 0xf0 0x45
+U1e1d=0xe6 0xf0 0x65
+U1eb6=0xe6 0xf2 0x41
+U1eb7=0xe6 0xf2 0x61
+U0307=0xe7
+U0226=0xe7 0x41
+U1e02=0xe7 0x42
+U010a=0xe7 0x43
+U1e0a=0xe7 0x44
+U0116=0xe7 0x45
+U1e1e=0xe7 0x46
+U0120=0xe7 0x47
+U1e22=0xe7 0x48
+U0130=0xe7 0x49
+U1e40=0xe7 0x4d
+U1e44=0xe7 0x4e
+U022e=0xe7 0x4f
+U1e56=0xe7 0x50
+U1e58=0xe7 0x52
+U1e60=0xe7 0x53
+U1e6a=0xe7 0x54
+U1e86=0xe7 0x57
+U1e8a=0xe7 0x58
+U1e8e=0xe7 0x59
+U017b=0xe7 0x5a
+U0227=0xe7 0x61
+U1e03=0xe7 0x62
+U010b=0xe7 0x63
+U1e0b=0xe7 0x64
+U0117=0xe7 0x65
+U1e1f=0xe7 0x66
+U0121=0xe7 0x67
+U1e23=0xe7 0x68
+U1e41=0xe7 0x6d
+U1e45=0xe7 0x6e
+U022f=0xe7 0x6f
+U1e57=0xe7 0x70
+U1e59=0xe7 0x72
+U1e61=0xe7 0x73
+U1e6b=0xe7 0x74
+U1e87=0xe7 0x77
+U1e8b=0xe7 0x78
+U1e8f=0xe7 0x79
+U017c=0xe7 0x7a
+U1e64=0xe7 0xe2 0x53
+U1e65=0xe7 0xe2 0x73
+U1e66=0xe7 0xe9 0x53
+U1e67=0xe7 0xe9 0x73
+U1e68=0xe7 0xf2 0x53
+U1e69=0xe7 0xf2 0x73
+U0308=0xe8
+U00c4=0xe8 0x41
+U00cb=0xe8 0x45
+U1e26=0xe8 0x48
+U00cf=0xe8 0x49
+U00d6=0xe8 0x4f
+U00dc=0xe8 0x55
+U1e84=0xe8 0x57
+U1e8c=0xe8 0x58
+U0178=0xe8 0x59
+U00e4=0xe8 0x61
+U00eb=0xe8 0x65
+U1e27=0xe8 0x68
+U00ef=0xe8 0x69
+U00f6=0xe8 0x6f
+U1e97=0xe8 0x74
+U00fc=0xe8 0x75
+U1e85=0xe8 0x77
+U1e8d=0xe8 0x78
+U00ff=0xe8 0x79
+U1e4e=0xe8 0xe4 0x4f
+U1e4f=0xe8 0xe4 0x6f
+U1e7a=0xe8 0xe5 0x55
+U1e7b=0xe8 0xe5 0x75
+U030c=0xe9
+U01cd=0xe9 0x41
+U010c=0xe9 0x43
+U010e=0xe9 0x44
+U011a=0xe9 0x45
+U01e6=0xe9 0x47
+U021e=0xe9 0x48
+U01cf=0xe9 0x49
+U01e8=0xe9 0x4b
+U013d=0xe9 0x4c
+U0147=0xe9 0x4e
+U01d1=0xe9 0x4f
+U0158=0xe9 0x52
+U0160=0xe9 0x53
+U0164=0xe9 0x54
+U01d3=0xe9 0x55
+U017d=0xe9 0x5a
+U01ce=0xe9 0x61
+U010d=0xe9 0x63
+U010f=0xe9 0x64
+U011b=0xe9 0x65
+U01e7=0xe9 0x67
+U021f=0xe9 0x68
+U01d0=0xe9 0x69
+U01f0=0xe9 0x6a
+U01e9=0xe9 0x6b
+U013e=0xe9 0x6c
+U0148=0xe9 0x6e
+U01d2=0xe9 0x6f
+U0159=0xe9 0x72
+U0161=0xe9 0x73
+U0165=0xe9 0x74
+U01d4=0xe9 0x75
+U017e=0xe9 0x7a
+U01d9=0xe9 0xe8 0x55
+U01da=0xe9 0xe8 0x75
+U030a=0xea
+U00c5=0xea 0x41
+U016e=0xea 0x55
+U00e5=0xea 0x61
+U016f=0xea 0x75
+U1e98=0xea 0x77
+U1e99=0xea 0x79
+U0361=0xeb
+U0315=0xed
+U030b=0xee
+U0150=0xee 0x4f
+U0170=0xee 0x55
+U0151=0xee 0x6f
+U0171=0xee 0x75
+U0310=0xef
+U0327=0xf0
+U00c7=0xf0 0x43
+U1e10=0xf0 0x44
+U0228=0xf0 0x45
+U0122=0xf0 0x47
+U1e28=0xf0 0x48
+U0136=0xf0 0x4b
+U013b=0xf0 0x4c
+U0145=0xf0 0x4e
+U0156=0xf0 0x52
+U015e=0xf0 0x53
+U0162=0xf0 0x54
+U00e7=0xf0 0x63
+U1e11=0xf0 0x64
+U0229=0xf0 0x65
+U0123=0xf0 0x67
+U1e29=0xf0 0x68
+U0137=0xf0 0x6b
+U013c=0xf0 0x6c
+U0146=0xf0 0x6e
+U0157=0xf0 0x72
+U015f=0xf0 0x73
+U0163=0xf0 0x74
+U0328=0xf1
+U0104=0xf1 0x41
+U0118=0xf1 0x45
+U012e=0xf1 0x49
+U01ea=0xf1 0x4f
+U0172=0xf1 0x55
+U0105=0xf1 0x61
+U0119=0xf1 0x65
+U012f=0xf1 0x69
+U01eb=0xf1 0x6f
+U0173=0xf1 0x75
+U0323=0xf2
+U1ea0=0xf2 0x41
+U1e04=0xf2 0x42
+U1e0c=0xf2 0x44
+U1eb8=0xf2 0x45
+U1e24=0xf2 0x48
+U1eca=0xf2 0x49
+U1e32=0xf2 0x4b
+U1e36=0xf2 0x4c
+U1e42=0xf2 0x4d
+U1e46=0xf2 0x4e
+U1ecc=0xf2 0x4f
+U1e5a=0xf2 0x52
+U1e62=0xf2 0x53
+U1e6c=0xf2 0x54
+U1ee4=0xf2 0x55
+U1e7e=0xf2 0x56
+U1e88=0xf2 0x57
+U1ef4=0xf2 0x59
+U1e92=0xf2 0x5a
+U1ea1=0xf2 0x61
+U1e05=0xf2 0x62
+U1e0d=0xf2 0x64
+U1eb9=0xf2 0x65
+U1e25=0xf2 0x68
+U1ecb=0xf2 0x69
+U1e33=0xf2 0x6b
+U1e37=0xf2 0x6c
+U1e43=0xf2 0x6d
+U1e47=0xf2 0x6e
+U1ecd=0xf2 0x6f
+U1e5b=0xf2 0x72
+U1e63=0xf2 0x73
+U1e6d=0xf2 0x74
+U1ee5=0xf2 0x75
+U1e7f=0xf2 0x76
+U1e89=0xf2 0x77
+U1ef5=0xf2 0x79
+U1e93=0xf2 0x7a
+U1ee2=0xf2 0xac
+U1ef0=0xf2 0xad
+U1ee3=0xf2 0xbc
+U1ef1=0xf2 0xbd
+U0324=0xf3
+U1e72=0xf3 0x55
+U1e73=0xf3 0x75
+U0325=0xf4
+U1e00=0xf4 0x41
+U1e01=0xf4 0x61
+U0333=0xf5
+U0332=0xf6
+U0326=0xf7
+U0218=0xf7 0x53
+U021a=0xf7 0x54
+U0219=0xf7 0x73
+U021b=0xf7 0x74
+U031c=0xf8
+U032e=0xf9
+U1e2a=0xf9 0x48
+U1e2b=0xf9 0x68
+U0360=0xfa
+U0313=0xfe
--- a/src/main/resources/org/xbib/charset/codetables.xml
+++ b/src/main/resources/org/xbib/charset/codetables.xml
--- a/src/main/resources/org/xbib/charset/pica.txt
+++ b/src/main/resources/org/xbib/charset/pica.txt
@ -0,0 +1,212 @@
+/*
+
+De PICA characterset is een enigszins gemodificeerde versie van de INTERMARC characterset.
+
+Deze characterset omvat in totaal 256 tekens.
+
+Kolommen:
+(1) = Octaal
+(2) = Teken
+(3) = Omschrijving
+
+(1)  (2)  (3)
+
+-----------------------------------------------------------------
+
+000-177   Standaards ASCII (eerste groep van 128 tekens)
+
+200-237   niet gebruikt
+
+240     diacritische spatie
+
+241     Poolse L
+
+242     Deense O
+
+243   Ð   Servische D
+
+244   Þ   Thorn (groot)
+
+245   Æ   Ligatuur AE
+
+246   ¼   Ligatuur OE
+
+247   ¢   Cyrillische zachtteken (translitt.)
+
+250   ×   Griekse half-hoge punt
+
+251     Mol
+
+252   ®   Registratie-teken
+
+253   ±   Plusminus
+
+254   O   Vietnamese O-haak
+
+255   U   Vietnamese U-haak
+
+256   ?   Alif
+
+257   Å   Angstrom A
+
+260   `   Ayn
+
+261     Poolse l
+
+262     Deense o
+
+263     Servische d
+
+264   þ   Thorn (klein)
+
+265   æ   Ligatuur ae
+
+266   ½   Ligatuur oe
+
+267     Cyrillische hardteken (translitt.)
+
+270     Turkse i (zonder punt)
+
+271   £   Brits pond-teken
+
+272     Eth
+
+273   a   Alfa
+
+274     Vietnamese o-haak
+
+275     Vietnamese u-haak
+
+276   ß   Duitse dubbele S
+
+277   å   Angstrom a
+
+300     Nederlandse IJ
+
+301   Ä   Umlaut A
+
+302   Ö   Umlaut O
+
+303   Ü   Umlaut U
+
+304     Omgekeerde C
+
+305     Omgekeerde E
+
+306   ¹   Ongelijk-teken
+
+307   ®   Fleche
+
+310   £   Kleiner dan/is-gelijk-teken
+
+311   ¥   Oneindig-teken
+
+312   ò   Integraal-teken
+
+313     Vermenigvuldiging-teken
+
+314   §   Paragraaf
+
+315   Ö   Vierkantswortel-teken
+
+316     Reaction
+
+317   ³   Groter dan/is-gelijk-teken
+
+320     Nederlandse ij
+
+321   ä   Umlaut a
+
+322   ö   Umlaut o
+
+323   ü   Umlaut u
+
+324     Omgekeerde c
+
+325     Omgekeerde e
+
+326   ¿   Spaans omgekeerd vraagteken
+
+327   ¡   Spaans omgekeerd uitroepteken
+
+330   b   Beta
+
+331
+
+332   g   Gamma
+
+333   p   Pi
+
+334
+
+335
+
+336
+
+337
+
+340   `   Vietnamese rijzende toon
+
+341   `   Accent grave (zie ook octaal 140)
+
+342   ?   Accent aigu
+
+343   ?   Accent circonflexe (zie ook 140)
+
+344   ~   Tilde
+
+345   ¯   Bovenstreepje (lang)
+
+346     Bovenstreepje (kort)
+
+347   ×   Punt boven
+
+350   ?   Trema (geen umlaut)
+
+351     Hacek
+
+352   ?   Angstrom
+
+353     Ligatuur links
+
+354     Ligatuur rechts
+
+355   '   Komma als accent (bovenaan)
+
+356   ²   Dubbele aigu
+
+357     Candrabindu
+
+360   ?   Cedille
+
+361     Hoek boven links
+
+362   ¢   Punt als accent (onderaan)
+
+363   ²   Twee punten als accent (onderaan)
+
+364     Cirkeltje onderaan
+
+365     Dubbele onderstreping als accent
+
+366   _   Onderstreping als accent
+
+367     Hoek boven rechts
+
+370     Omgekeerde cedille
+
+371     Upadhmaniya (geen accent)
+
+372     Halve tilde rechts
+
+373     Halve tilde links
+
+374
+
+375
+
+376   ?   Komma rechts (op middelhoogte)
+
+377
+
+ */
--- a/src/main/resources/org/xbib/charset/z3947.txt
+++ b/src/main/resources/org/xbib/charset/z3947.txt
@ -0,0 +1,547 @@
+#step 1
+#created: 2001-03-19
+
+A1=0141#latin capital letter L with stroke
+A2=00D8#latin capital letter O with stroke
+A3=0110#latin capital letter D with stroke
+A4=00DE#latin capital letter thorn
+A5=00C6#latin capital letter AE
+A6=0152#latin capital ligature OE
+A7=02B9#modified letter prime
+A8=00B7#middle dot
+A9=266D#music flat sign
+AA=00AE#registered sign
+AB=00B1#plus-minus sign
+AC=01A0#latin capital letter O with horn
+AD=01AF#latin capital letter U with horn
+AE=02BC#modifier letter apostrophe
+B0=02BB#modifier letter turned comma
+B1=0142#latin small letter L with stroke
+B2=00F8#latin small letter O with stroke
+B3=0111#latin small letter D with stroke
+B4=00FE#latin small letter thorn
+B5=00E6#latin small letter AE
+B6=0153#latin small ligature OE
+B7=02BA#modified letter double prime
+B8=0131#latin small letter dotless i
+B9=00A3#pound sign
+BA=00F0#latin small letter eth
+BC=01A1#latin small letter O with horn
+BD=01B0#latin small letter U with horn
+C0=00B0#degree sign
+C1=2113#script small L
+C2=2117#sound recording copyright
+C3=00A9#copyright sign
+C4=266F#music sharp sign
+C5=00BF#inverted question mark
+C6=00A1#inverted exclamation mark
+CF=00DF#latin small letter sharp S
+E0=0309#combining hook above
+E1=0300#combining grave accent
+E2=0301#combining acute accent
+E3=0302#combining circumflex accent
+E4=0303#combining tilde
+E5=0304#combining macron
+E6=0306#combining breve
+E7=0307#combining dot above
+E8=0308#combining diaeresis
+E9=030C#combining caron
+EA=030A#combining ring above
+EB=FE20#combining ligature left half
+EC=FE21#combining ligature right half
+ED=0315#combining comma above right
+EE=030B#combining double acute accent
+EF=0310#combining candrabindu
+F0=0327#combining cedilla
+F1=0328#combining ogonek
+F2=0323#combining dot below
+F3=0324#combining diaeresis below
+F4=0325#combining ring below
+F5=0333#combining double low line
+F6=0332#combining low line
+F7=0326#combining comma below
+F8=0321#combining ogonek
+F9=032E#combining breve below
+FA=FE22#combining double tilde left half
+FB=FE23#combining double tilde right half
+FE=0313#combining comma above
+
+#step 2
+#created: 20 january 1998
+
+0041+0300=00C0# LATIN CAPITAL LETTER A WITH GRAVE = LATIN CAPITAL LETTER A + COMBINING GRAVE ACCENT
+0041+0301=00C1# LATIN CAPITAL LETTER A WITH ACUTE = LATIN CAPITAL LETTER A + COMBINING ACUTE ACCENT
+0041+0302=00C2# LATIN CAPITAL LETTER A WITH CIRCUMFLEX = LATIN CAPITAL LETTER A + COMBINING CIRCUMFLEX ACCENT
+0041+0303=00C3# LATIN CAPITAL LETTER A WITH TILDE = LATIN CAPITAL LETTER A + COMBINING TILDE
+0041+0308=00C4# LATIN CAPITAL LETTER A WITH DIAERESIS = LATIN CAPITAL LETTER A + COMBINING DIAERESIS
+0041+030A=00C5# LATIN CAPITAL LETTER A WITH RING ABOVE = LATIN CAPITAL LETTER A + COMBINING RING ABOVE
+0043+0327=00C7# LATIN CAPITAL LETTER C WITH CEDILLA = LATIN CAPITAL LETTER C + COMBINING CEDILLA
+0045+0300=00C8# LATIN CAPITAL LETTER E WITH GRAVE = LATIN CAPITAL LETTER E + COMBINING GRAVE ACCENT
+0045+0301=00C9# LATIN CAPITAL LETTER E WITH ACUTE = LATIN CAPITAL LETTER E + COMBINING ACUTE ACCENT
+0045+0302=00CA# LATIN CAPITAL LETTER E WITH CIRCUMFLEX = LATIN CAPITAL LETTER E + COMBINING CIRCUMFLEX ACCENT
+0045+0308=00CB# LATIN CAPITAL LETTER E WITH DIAERESIS = LATIN CAPITAL LETTER E + COMBINING DIAERESIS
+0049+0300=00CC# LATIN CAPITAL LETTER I WITH GRAVE = LATIN CAPITAL LETTER I + COMBINING GRAVE ACCENT
+0049+0301=00CD# LATIN CAPITAL LETTER I WITH ACUTE = LATIN CAPITAL LETTER I + COMBINING ACUTE ACCENT
+0049+0302=00CE# LATIN CAPITAL LETTER I WITH CIRCUMFLEX = LATIN CAPITAL LETTER I + COMBINING CIRCUMFLEX ACCENT
+0049+0308=00CF# LATIN CAPITAL LETTER I WITH DIAERESIS = LATIN CAPITAL LETTER I + COMBINING DIAERESIS
+004E+0303=00D1# LATIN CAPITAL LETTER N WITH TILDE = LATIN CAPITAL LETTER N + COMBINING TILDE
+004F+0300=00D2# LATIN CAPITAL LETTER O WITH GRAVE = LATIN CAPITAL LETTER O + COMBINING GRAVE ACCENT
+004F+0301=00D3# LATIN CAPITAL LETTER O WITH ACUTE = LATIN CAPITAL LETTER O + COMBINING ACUTE ACCENT
+004F+0302=00D4# LATIN CAPITAL LETTER O WITH CIRCUMFLEX = LATIN CAPITAL LETTER O + COMBINING CIRCUMFLEX ACCENT
+004F+0303=00D5# LATIN CAPITAL LETTER O WITH TILDE = LATIN CAPITAL LETTER O + COMBINING TILDE
+004F+0308=00D6# LATIN CAPITAL LETTER O WITH DIAERESIS = LATIN CAPITAL LETTER O + COMBINING DIAERESIS
+0055+0300=00D9# LATIN CAPITAL LETTER U WITH GRAVE = LATIN CAPITAL LETTER U + COMBINING GRAVE ACCENT
+0055+0301=00DA# LATIN CAPITAL LETTER U WITH ACUTE = LATIN CAPITAL LETTER U + COMBINING ACUTE ACCENT
+0055+0302=00DB# LATIN CAPITAL LETTER U WITH CIRCUMFLEX = LATIN CAPITAL LETTER U + COMBINING CIRCUMFLEX ACCENT
+0055+0308=00DC# LATIN CAPITAL LETTER U WITH DIAERESIS = LATIN CAPITAL LETTER U + COMBINING DIAERESIS
+0059+0301=00DD# LATIN CAPITAL LETTER Y WITH ACUTE = LATIN CAPITAL LETTER Y + COMBINING ACUTE ACCENT
+0061+0300=00E0# LATIN SMALL LETTER A WITH GRAVE = LATIN SMALL LETTER A + COMBINING GRAVE ACCENT
+0061+0301=00E1# LATIN SMALL LETTER A WITH ACUTE = LATIN SMALL LETTER A + COMBINING ACUTE ACCENT
+0061+0302=00E2# LATIN SMALL LETTER A WITH CIRCUMFLEX = LATIN SMALL LETTER A + COMBINING CIRCUMFLEX ACCENT
+0061+0303=00E3# LATIN SMALL LETTER A WITH TILDE = LATIN SMALL LETTER A + COMBINING TILDE
+0061+0308=00E4# LATIN SMALL LETTER A WITH DIAERESIS = LATIN SMALL LETTER A + COMBINING DIAERESIS
+0061+030A=00E5# LATIN SMALL LETTER A WITH RING ABOVE = LATIN SMALL LETTER A + COMBINING RING ABOVE
+0063+0327=00E7# LATIN SMALL LETTER C WITH CEDILLA = LATIN SMALL LETTER C + COMBINING CEDILLA
+0065+0300=00E8# LATIN SMALL LETTER E WITH GRAVE = LATIN SMALL LETTER E + COMBINING GRAVE ACCENT
+0065+0301=00E9# LATIN SMALL LETTER E WITH ACUTE = LATIN SMALL LETTER E + COMBINING ACUTE ACCENT
+0065+0302=00EA# LATIN SMALL LETTER E WITH CIRCUMFLEX = LATIN SMALL LETTER E + COMBINING CIRCUMFLEX ACCENT
+0065+0308=00EB# LATIN SMALL LETTER E WITH DIAERESIS = LATIN SMALL LETTER E + COMBINING DIAERESIS
+0069+0300=00EC# LATIN SMALL LETTER I WITH GRAVE = LATIN SMALL LETTER I + COMBINING GRAVE ACCENT
+0069+0301=00ED# LATIN SMALL LETTER I WITH ACUTE = LATIN SMALL LETTER I + COMBINING ACUTE ACCENT
+0069+0302=00EE# LATIN SMALL LETTER I WITH CIRCUMFLEX = LATIN SMALL LETTER I + COMBINING CIRCUMFLEX ACCENT
+0069+0308=00EF# LATIN SMALL LETTER I WITH DIAERESIS = LATIN SMALL LETTER I + COMBINING DIAERESIS
+006E+0303=00F1# LATIN SMALL LETTER N WITH TILDE = LATIN SMALL LETTER N + COMBINING TILDE
+006F+0300=00F2# LATIN SMALL LETTER O WITH GRAVE = LATIN SMALL LETTER O + COMBINING GRAVE ACCENT
+006F+0301=00F3# LATIN SMALL LETTER O WITH ACUTE = LATIN SMALL LETTER O + COMBINING ACUTE ACCENT
+006F+0302=00F4# LATIN SMALL LETTER O WITH CIRCUMFLEX = LATIN SMALL LETTER O + COMBINING CIRCUMFLEX ACCENT
+006F+0303=00F5# LATIN SMALL LETTER O WITH TILDE = LATIN SMALL LETTER O + COMBINING TILDE
+006F+0308=00F6# LATIN SMALL LETTER O WITH DIAERESIS = LATIN SMALL LETTER O + COMBINING DIAERESIS
+0075+0300=00F9# LATIN SMALL LETTER U WITH GRAVE = LATIN SMALL LETTER U + COMBINING GRAVE ACCENT
+0075+0301=00FA# LATIN SMALL LETTER U WITH ACUTE = LATIN SMALL LETTER U + COMBINING ACUTE ACCENT
+0075+0302=00FB# LATIN SMALL LETTER U WITH CIRCUMFLEX = LATIN SMALL LETTER U + COMBINING CIRCUMFLEX ACCENT
+0075+0308=00FC# LATIN SMALL LETTER U WITH DIAERESIS = LATIN SMALL LETTER U + COMBINING DIAERESIS
+0079+0301=00FD# LATIN SMALL LETTER Y WITH ACUTE = LATIN SMALL LETTER Y + COMBINING ACUTE ACCENT
+0079+0308=00FF# LATIN SMALL LETTER Y WITH DIAERESIS = LATIN SMALL LETTER Y + COMBINING DIAERESIS
+0041+0304=0100# LATIN CAPITAL LETTER A WITH MACRON = LATIN CAPITAL LETTER A + COMBINING MACRON
+0061+0304=0101# LATIN SMALL LETTER A WITH MACRON = LATIN SMALL LETTER A + COMBINING MACRON
+0041+0306=0102# LATIN CAPITAL LETTER A WITH BREVE = LATIN CAPITAL LETTER A + COMBINING BREVE
+0061+0306=0103# LATIN SMALL LETTER A WITH BREVE = LATIN SMALL LETTER A + COMBINING BREVE
+0041+0328=0104# LATIN CAPITAL LETTER A WITH OGONEK = LATIN CAPITAL LETTER A + COMBINING OGONEK
+0061+0328=0105# LATIN SMALL LETTER A WITH OGONEK = LATIN SMALL LETTER A + COMBINING OGONEK
+0043+0301=0106# LATIN CAPITAL LETTER C WITH ACUTE = LATIN CAPITAL LETTER C + COMBINING ACUTE ACCENT
+0063+0301=0107# LATIN SMALL LETTER C WITH ACUTE = LATIN SMALL LETTER C + COMBINING ACUTE ACCENT
+0043+0302=0108# LATIN CAPITAL LETTER C WITH CIRCUMFLEX = LATIN CAPITAL LETTER C + COMBINING CIRCUMFLEX ACCENT
+0063+0302=0109# LATIN SMALL LETTER C WITH CIRCUMFLEX = LATIN SMALL LETTER C + COMBINING CIRCUMFLEX ACCENT
+0043+0307=010A# LATIN CAPITAL LETTER C WITH DOT ABOVE = LATIN CAPITAL LETTER C + COMBINING DOT ABOVE
+0063+0307=010B# LATIN SMALL LETTER C WITH DOT ABOVE = LATIN SMALL LETTER C + COMBINING DOT ABOVE
+0043+030C=010C# LATIN CAPITAL LETTER C WITH CARON = LATIN CAPITAL LETTER C + COMBINING CARON
+0063+030C=010D# LATIN SMALL LETTER C WITH CARON = LATIN SMALL LETTER C + COMBINING CARON
+0044+030C=010E# LATIN CAPITAL LETTER D WITH CARON = LATIN CAPITAL LETTER D + COMBINING CARON
+0064+030C=010F# LATIN SMALL LETTER D WITH CARON = LATIN SMALL LETTER D + COMBINING CARON
+0045+0304=0112# LATIN CAPITAL LETTER E WITH MACRON = LATIN CAPITAL LETTER E + COMBINING MACRON
+0065+0304=0113# LATIN SMALL LETTER E WITH MACRON = LATIN SMALL LETTER E + COMBINING MACRON
+0045+0306=0114# LATIN CAPITAL LETTER E WITH BREVE = LATIN CAPITAL LETTER E + COMBINING BREVE
+0065+0306=0115# LATIN SMALL LETTER E WITH BREVE = LATIN SMALL LETTER E + COMBINING BREVE
+0045+0307=0116# LATIN CAPITAL LETTER E WITH DOT ABOVE = LATIN CAPITAL LETTER E + COMBINING DOT ABOVE
+0065+0307=0117# LATIN SMALL LETTER E WITH DOT ABOVE = LATIN SMALL LETTER E + COMBINING DOT ABOVE
+0045+0328=0118# LATIN CAPITAL LETTER E WITH OGONEK = LATIN CAPITAL LETTER E + COMBINING OGONEK
+0065+0328=0119# LATIN SMALL LETTER E WITH OGONEK = LATIN SMALL LETTER E + COMBINING OGONEK
+0045+030C=011A# LATIN CAPITAL LETTER E WITH CARON = LATIN CAPITAL LETTER E + COMBINING CARON
+0065+030C=011B# LATIN SMALL LETTER E WITH CARON = LATIN SMALL LETTER E + COMBINING CARON
+0047+0302=011C# LATIN CAPITAL LETTER G WITH CIRCUMFLEX = LATIN CAPITAL LETTER G + COMBINING CIRCUMFLEX ACCENT
+0067+0302=011D# LATIN SMALL LETTER G WITH CIRCUMFLEX = LATIN SMALL LETTER G + COMBINING CIRCUMFLEX ACCENT
+0047+0306=011E# LATIN CAPITAL LETTER G WITH BREVE = LATIN CAPITAL LETTER G + COMBINING BREVE
+0067+0306=011F# LATIN SMALL LETTER G WITH BREVE = LATIN SMALL LETTER G + COMBINING BREVE
+0047+0307=0120# LATIN CAPITAL LETTER G WITH DOT ABOVE = LATIN CAPITAL LETTER G + COMBINING DOT ABOVE
+0067+0307=0121# LATIN SMALL LETTER G WITH DOT ABOVE = LATIN SMALL LETTER G + COMBINING DOT ABOVE
+0047+0327=0122# LATIN CAPITAL LETTER G WITH CEDILLA = LATIN CAPITAL LETTER G + COMBINING CEDILLA
+0067+0327=0123# LATIN SMALL LETTER G WITH CEDILLA = LATIN SMALL LETTER G + COMBINING CEDILLA
+0048+0302=0124# LATIN CAPITAL LETTER H WITH CIRCUMFLEX = LATIN CAPITAL LETTER H + COMBINING CIRCUMFLEX ACCENT
+0068+0302=0125# LATIN SMALL LETTER H WITH CIRCUMFLEX = LATIN SMALL LETTER H + COMBINING CIRCUMFLEX ACCENT
+0049+0303=0128# LATIN CAPITAL LETTER I WITH TILDE = LATIN CAPITAL LETTER I + COMBINING TILDE
+0069+0303=0129# LATIN SMALL LETTER I WITH TILDE = LATIN SMALL LETTER I + COMBINING TILDE
+0049+0304=012A# LATIN CAPITAL LETTER I WITH MACRON = LATIN CAPITAL LETTER I + COMBINING MACRON
+0069+0304=012B# LATIN SMALL LETTER I WITH MACRON = LATIN SMALL LETTER I + COMBINING MACRON
+0049+0306=012C# LATIN CAPITAL LETTER I WITH BREVE = LATIN CAPITAL LETTER I + COMBINING BREVE
+0069+0306=012D# LATIN SMALL LETTER I WITH BREVE = LATIN SMALL LETTER I + COMBINING BREVE
+0049+0328=012E# LATIN CAPITAL LETTER I WITH OGONEK = LATIN CAPITAL LETTER I + COMBINING OGONEK
+0069+0328=012F# LATIN SMALL LETTER I WITH OGONEK = LATIN SMALL LETTER I + COMBINING OGONEK
+0049+0307=0130# LATIN CAPITAL LETTER I WITH DOT ABOVE = LATIN CAPITAL LETTER I + COMBINING DOT ABOVE
+004A+0302=0134# LATIN CAPITAL LETTER J WITH CIRCUMFLEX = LATIN CAPITAL LETTER J + COMBINING CIRCUMFLEX ACCENT
+006A+0302=0135# LATIN SMALL LETTER J WITH CIRCUMFLEX = LATIN SMALL LETTER J + COMBINING CIRCUMFLEX ACCENT
+004B+0327=0136# LATIN CAPITAL LETTER K WITH CEDILLA = LATIN CAPITAL LETTER K + COMBINING CEDILLA
+006B+0327=0137# LATIN SMALL LETTER K WITH CEDILLA = LATIN SMALL LETTER K + COMBINING CEDILLA
+004C+0301=0139# LATIN CAPITAL LETTER L WITH ACUTE = LATIN CAPITAL LETTER L + COMBINING ACUTE ACCENT
+006C+0301=013A# LATIN SMALL LETTER L WITH ACUTE = LATIN SMALL LETTER L + COMBINING ACUTE ACCENT
+004C+0327=013B# LATIN CAPITAL LETTER L WITH CEDILLA = LATIN CAPITAL LETTER L + COMBINING CEDILLA
+006C+0327=013C# LATIN SMALL LETTER L WITH CEDILLA = LATIN SMALL LETTER L + COMBINING CEDILLA
+004C+030C=013D# LATIN CAPITAL LETTER L WITH CARON = LATIN CAPITAL LETTER L + COMBINING CARON
+006C+030C=013E# LATIN SMALL LETTER L WITH CARON = LATIN SMALL LETTER L + COMBINING CARON
+004E+0301=0143# LATIN CAPITAL LETTER N WITH ACUTE = LATIN CAPITAL LETTER N + COMBINING ACUTE ACCENT
+006E+0301=0144# LATIN SMALL LETTER N WITH ACUTE = LATIN SMALL LETTER N + COMBINING ACUTE ACCENT
+004E+0327=0145# LATIN CAPITAL LETTER N WITH CEDILLA = LATIN CAPITAL LETTER N + COMBINING CEDILLA
+006E+0327=0146# LATIN SMALL LETTER N WITH CEDILLA = LATIN SMALL LETTER N + COMBINING CEDILLA
+004E+030C=0147# LATIN CAPITAL LETTER N WITH CARON = LATIN CAPITAL LETTER N + COMBINING CARON
+006E+030C=0148# LATIN SMALL LETTER N WITH CARON = LATIN SMALL LETTER N + COMBINING CARON
+004F+0304=014C# LATIN CAPITAL LETTER O WITH MACRON = LATIN CAPITAL LETTER O + COMBINING MACRON
+006F+0304=014D# LATIN SMALL LETTER O WITH MACRON = LATIN SMALL LETTER O + COMBINING MACRON
+004F+0306=014E# LATIN CAPITAL LETTER O WITH BREVE = LATIN CAPITAL LETTER O + COMBINING BREVE
+006F+0306=014F# LATIN SMALL LETTER O WITH BREVE = LATIN SMALL LETTER O + COMBINING BREVE
+004F+030B=0150# LATIN CAPITAL LETTER O WITH DOUBLE ACUTE = LATIN CAPITAL LETTER O + COMBINING DOUBLE ACUTE ACCENT
+006F+030B=0151# LATIN SMALL LETTER O WITH DOUBLE ACUTE = LATIN SMALL LETTER O + COMBINING DOUBLE ACUTE ACCENT
+0052+0301=0154# LATIN CAPITAL LETTER R WITH ACUTE = LATIN CAPITAL LETTER R + COMBINING ACUTE ACCENT
+0072+0301=0155# LATIN SMALL LETTER R WITH ACUTE = LATIN SMALL LETTER R + COMBINING ACUTE ACCENT
+0052+0327=0156# LATIN CAPITAL LETTER R WITH CEDILLA = LATIN CAPITAL LETTER R + COMBINING CEDILLA
+0072+0327=0157# LATIN SMALL LETTER R WITH CEDILLA = LATIN SMALL LETTER R + COMBINING CEDILLA
+0052+030C=0158# LATIN CAPITAL LETTER R WITH CARON = LATIN CAPITAL LETTER R + COMBINING CARON
+0072+030C=0159# LATIN SMALL LETTER R WITH CARON = LATIN SMALL LETTER R + COMBINING CARON
+0053+0301=015A# LATIN CAPITAL LETTER S WITH ACUTE = LATIN CAPITAL LETTER S + COMBINING ACUTE ACCENT
+0073+0301=015B# LATIN SMALL LETTER S WITH ACUTE = LATIN SMALL LETTER S + COMBINING ACUTE ACCENT
+0053+0302=015C# LATIN CAPITAL LETTER S WITH CIRCUMFLEX = LATIN CAPITAL LETTER S + COMBINING CIRCUMFLEX ACCENT
+0073+0302=015D# LATIN SMALL LETTER S WITH CIRCUMFLEX = LATIN SMALL LETTER S + COMBINING CIRCUMFLEX ACCENT
+0053+0327=015E# LATIN CAPITAL LETTER S WITH CEDILLA = LATIN CAPITAL LETTER S + COMBINING CEDILLA
+0073+0327=015F# LATIN SMALL LETTER S WITH CEDILLA = LATIN SMALL LETTER S + COMBINING CEDILLA
+0053+030C=0160# LATIN CAPITAL LETTER S WITH CARON = LATIN CAPITAL LETTER S + COMBINING CARON
+0073+030C=0161# LATIN SMALL LETTER S WITH CARON = LATIN SMALL LETTER S + COMBINING CARON
+0054+0327=0162# LATIN CAPITAL LETTER T WITH CEDILLA = LATIN CAPITAL LETTER T + COMBINING CEDILLA
+0074+0327=0163# LATIN SMALL LETTER T WITH CEDILLA = LATIN SMALL LETTER T + COMBINING CEDILLA
+0054+030C=0164# LATIN CAPITAL LETTER T WITH CARON = LATIN CAPITAL LETTER T + COMBINING CARON
+0074+030C=0165# LATIN SMALL LETTER T WITH CARON = LATIN SMALL LETTER T + COMBINING CARON
+0055+0303=0168# LATIN CAPITAL LETTER U WITH TILDE = LATIN CAPITAL LETTER U + COMBINING TILDE
+0075+0303=0169# LATIN SMALL LETTER U WITH TILDE = LATIN SMALL LETTER U + COMBINING TILDE
+0055+0304=016A# LATIN CAPITAL LETTER U WITH MACRON = LATIN CAPITAL LETTER U + COMBINING MACRON
+0075+0304=016B# LATIN SMALL LETTER U WITH MACRON = LATIN SMALL LETTER U + COMBINING MACRON
+0055+0306=016C# LATIN CAPITAL LETTER U WITH BREVE = LATIN CAPITAL LETTER U + COMBINING BREVE
+0075+0306=016D# LATIN SMALL LETTER U WITH BREVE = LATIN SMALL LETTER U + COMBINING BREVE
+0055+030A=016E# LATIN CAPITAL LETTER U WITH RING ABOVE = LATIN CAPITAL LETTER U + COMBINING RING ABOVE
+0075+030A=016F# LATIN SMALL LETTER U WITH RING ABOVE = LATIN SMALL LETTER U + COMBINING RING ABOVE
+0055+030B=0170# LATIN CAPITAL LETTER U WITH DOUBLE ACUTE = LATIN CAPITAL LETTER U + COMBINING DOUBLE ACUTE ACCENT
+0075+030B=0171# LATIN SMALL LETTER U WITH DOUBLE ACUTE = LATIN SMALL LETTER U + COMBINING DOUBLE ACUTE ACCENT
+0055+0328=0172# LATIN CAPITAL LETTER U WITH OGONEK = LATIN CAPITAL LETTER U + COMBINING OGONEK
+0075+0328=0173# LATIN SMALL LETTER U WITH OGONEK = LATIN SMALL LETTER U + COMBINING OGONEK
+0057+0302=0174# LATIN CAPITAL LETTER W WITH CIRCUMFLEX = LATIN CAPITAL LETTER W + COMBINING CIRCUMFLEX ACCENT
+0077+0302=0175# LATIN SMALL LETTER W WITH CIRCUMFLEX = LATIN SMALL LETTER W + COMBINING CIRCUMFLEX ACCENT
+0059+0302=0176# LATIN CAPITAL LETTER Y WITH CIRCUMFLEX = LATIN CAPITAL LETTER Y + COMBINING CIRCUMFLEX ACCENT
+0079+0302=0177# LATIN SMALL LETTER Y WITH CIRCUMFLEX = LATIN SMALL LETTER Y + COMBINING CIRCUMFLEX ACCENT
+0059+0308=0178# LATIN CAPITAL LETTER Y WITH DIAERESIS = LATIN CAPITAL LETTER Y + COMBINING DIAERESIS
+005A+0301=0179# LATIN CAPITAL LETTER Z WITH ACUTE = LATIN CAPITAL LETTER Z + COMBINING ACUTE ACCENT
+007A+0301=017A# LATIN SMALL LETTER Z WITH ACUTE = LATIN SMALL LETTER Z + COMBINING ACUTE ACCENT
+005A+0307=017B# LATIN CAPITAL LETTER Z WITH DOT ABOVE = LATIN CAPITAL LETTER Z + COMBINING DOT ABOVE
+007A+0307=017C# LATIN SMALL LETTER Z WITH DOT ABOVE = LATIN SMALL LETTER Z + COMBINING DOT ABOVE
+005A+030C=017D# LATIN CAPITAL LETTER Z WITH CARON = LATIN CAPITAL LETTER Z + COMBINING CARON
+007A+030C=017E# LATIN SMALL LETTER Z WITH CARON = LATIN SMALL LETTER Z + COMBINING CARON
+004F+031B=01A0# LATIN CAPITAL LETTER O WITH HORN = LATIN CAPITAL LETTER O + COMBINING HORN
+006F+031B=01A1# LATIN SMALL LETTER O WITH HORN = LATIN SMALL LETTER O + COMBINING HORN
+0055+031B=01AF# LATIN CAPITAL LETTER U WITH HORN = LATIN CAPITAL LETTER U + COMBINING HORN
+0075+031B=01B0# LATIN SMALL LETTER U WITH HORN = LATIN SMALL LETTER U + COMBINING HORN
+01F1+030C=01C4# LATIN CAPITAL LETTER DZ WITH CARON = LATIN CAPITAL LETTER DZ + COMBINING CARON
+01F3+030C=01C6# LATIN SMALL LETTER DZ WITH CARON = LATIN SMALL LETTER DZ + COMBINING CARON
+0041+030C=01CD# LATIN CAPITAL LETTER A WITH CARON = LATIN CAPITAL LETTER A + COMBINING CARON
+0061+030C=01CE# LATIN SMALL LETTER A WITH CARON = LATIN SMALL LETTER A + COMBINING CARON
+0049+030C=01CF# LATIN CAPITAL LETTER I WITH CARON = LATIN CAPITAL LETTER I + COMBINING CARON
+0069+030C=01D0# LATIN SMALL LETTER I WITH CARON = LATIN SMALL LETTER I + COMBINING CARON
+004F+030C=01D1# LATIN CAPITAL LETTER O WITH CARON = LATIN CAPITAL LETTER O + COMBINING CARON
+006F+030C=01D2# LATIN SMALL LETTER O WITH CARON = LATIN SMALL LETTER O + COMBINING CARON
+0055+030C=01D3# LATIN CAPITAL LETTER U WITH CARON = LATIN CAPITAL LETTER U + COMBINING CARON
+0075+030C=01D4# LATIN SMALL LETTER U WITH CARON = LATIN SMALL LETTER U + COMBINING CARON
+0055+0308+0304=01D5# LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON = LATIN CAPITAL LETTER U + COMBINING DIAERESIS + COMBINING MACRON
+0075+0308+0304=01D6# LATIN SMALL LETTER U WITH DIAERESIS AND MACRON = LATIN SMALL LETTER U + COMBINING DIAERESIS + COMBINING MACRON
+0055+0308+0301=01D7# LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE = LATIN CAPITAL LETTER U + COMBINING DIAERESIS + COMBINING ACUTE ACCENT
+0075+0308+0301=01D8# LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE = LATIN SMALL LETTER U + COMBINING DIAERESIS + COMBINING ACUTE ACCENT
+0055+0308+030C=01D9# LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON = LATIN CAPITAL LETTER U + COMBINING DIAERESIS + COMBINING CARON
+0075+0308+030C=01DA# LATIN SMALL LETTER U WITH DIAERESIS AND CARON = LATIN SMALL LETTER U + COMBINING DIAERESIS + COMBINING CARON
+0055+0308+0300=01DB# LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE = LATIN CAPITAL LETTER U + COMBINING DIAERESIS + COMBINING GRAVE ACCENT
+0075+0308+0300=01DC# LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE = LATIN SMALL LETTER U + COMBINING DIAERESIS + COMBINING GRAVE ACCENT
+0041+0308+0304=01DE# LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON = LATIN CAPITAL LETTER A + COMBINING DIAERESIS + COMBINING MACRON
+0061+0308+0304=01DF# LATIN SMALL LETTER A WITH DIAERESIS AND MACRON = LATIN SMALL LETTER A + COMBINING DIAERESIS + COMBINING MACRON
+0041+0307+0304=01E0# LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON = LATIN CAPITAL LETTER A + COMBINING DOT ABOVE + COMBINING MACRON
+0061+0307+0304=01E1# LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON = LATIN SMALL LETTER A + COMBINING DOT ABOVE + COMBINING MACRON
+00C6+0304=01E2# LATIN CAPITAL LETTER AE WITH MACRON = LATIN CAPITAL LETTER AE + COMBINING MACRON
+00E6+0304=01E3# LATIN SMALL LETTER AE WITH MACRON = LATIN SMALL LETTER AE + COMBINING MACRON
+0047+030C=01E6# LATIN CAPITAL LETTER G WITH CARON = LATIN CAPITAL LETTER G + COMBINING CARON
+0067+030C=01E7# LATIN SMALL LETTER G WITH CARON = LATIN SMALL LETTER G + COMBINING CARON
+004B+030C=01E8# LATIN CAPITAL LETTER K WITH CARON = LATIN CAPITAL LETTER K + COMBINING CARON
+006B+030C=01E9# LATIN SMALL LETTER K WITH CARON = LATIN SMALL LETTER K + COMBINING CARON
+004F+0328=01EA# LATIN CAPITAL LETTER O WITH OGONEK = LATIN CAPITAL LETTER O + COMBINING OGONEK
+006F+0328=01EB# LATIN SMALL LETTER O WITH OGONEK = LATIN SMALL LETTER O + COMBINING OGONEK
+004F+0328+0304=01EC# LATIN CAPITAL LETTER O WITH OGONEK AND MACRON = LATIN CAPITAL LETTER O + COMBINING OGONEK + COMBINING MACRON
+006F+0328+0304=01ED# LATIN SMALL LETTER O WITH OGONEK AND MACRON = LATIN SMALL LETTER O + COMBINING OGONEK + COMBINING MACRON
+01B7+030C=01EE# LATIN CAPITAL LETTER EZH WITH CARON = LATIN CAPITAL LETTER EZH + COMBINING CARON
+0292+030C=01EF# LATIN SMALL LETTER EZH WITH CARON = LATIN SMALL LETTER EZH + COMBINING CARON
+006A+030C=01F0# LATIN SMALL LETTER J WITH CARON = LATIN SMALL LETTER J + COMBINING CARON
+0047+0301=01F4# LATIN CAPITAL LETTER G WITH ACUTE = LATIN CAPITAL LETTER G + COMBINING ACUTE ACCENT
+0067+0301=01F5# LATIN SMALL LETTER G WITH ACUTE = LATIN SMALL LETTER G + COMBINING ACUTE ACCENT
+0041+030A+0301=01FA# LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE = LATIN CAPITAL LETTER A + COMBINING RING ABOVE + COMBINING ACUTE ACCENT
+0061+030A+0301=01FB# LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE = LATIN SMALL LETTER A + COMBINING RING ABOVE + COMBINING ACUTE ACCENT
+00C6+0301=01FC# LATIN CAPITAL LETTER AE WITH ACUTE = LATIN CAPITAL LETTER AE + COMBINING ACUTE ACCENT
+00E6+0301=01FD# LATIN SMALL LETTER AE WITH ACUTE = LATIN SMALL LETTER AE + COMBINING ACUTE ACCENT
+0041+030F=0200# LATIN CAPITAL LETTER A WITH DOUBLE GRAVE = LATIN CAPITAL LETTER A + COMBINING DOUBLE GRAVE ACCENT
+0061+030F=0201# LATIN SMALL LETTER A WITH DOUBLE GRAVE = LATIN SMALL LETTER A + COMBINING DOUBLE GRAVE ACCENT
+0041+0311=0202# LATIN CAPITAL LETTER A WITH INVERTED BREVE = LATIN CAPITAL LETTER A + COMBINING INVERTED BREVE
+0061+0311=0203# LATIN SMALL LETTER A WITH INVERTED BREVE = LATIN SMALL LETTER A + COMBINING INVERTED BREVE
+0045+030F=0204# LATIN CAPITAL LETTER E WITH DOUBLE GRAVE = LATIN CAPITAL LETTER E + COMBINING DOUBLE GRAVE ACCENT
+0065+030F=0205# LATIN SMALL LETTER E WITH DOUBLE GRAVE = LATIN SMALL LETTER E + COMBINING DOUBLE GRAVE ACCENT
+0045+0311=0206# LATIN CAPITAL LETTER E WITH INVERTED BREVE = LATIN CAPITAL LETTER E + COMBINING INVERTED BREVE
+0065+0311=0207# LATIN SMALL LETTER E WITH INVERTED BREVE = LATIN SMALL LETTER E + COMBINING INVERTED BREVE
+0049+030F=0208# LATIN CAPITAL LETTER I WITH DOUBLE GRAVE = LATIN CAPITAL LETTER I + COMBINING DOUBLE GRAVE ACCENT
+0069+030F=0209# LATIN SMALL LETTER I WITH DOUBLE GRAVE = LATIN SMALL LETTER I + COMBINING DOUBLE GRAVE ACCENT
+0049+0311=020A# LATIN CAPITAL LETTER I WITH INVERTED BREVE = LATIN CAPITAL LETTER I + COMBINING INVERTED BREVE
+0069+0311=020B# LATIN SMALL LETTER I WITH INVERTED BREVE = LATIN SMALL LETTER I + COMBINING INVERTED BREVE
+004F+030F=020C# LATIN CAPITAL LETTER O WITH DOUBLE GRAVE = LATIN CAPITAL LETTER O + COMBINING DOUBLE GRAVE ACCENT
+006F+030F=020D# LATIN SMALL LETTER O WITH DOUBLE GRAVE = LATIN SMALL LETTER O + COMBINING DOUBLE GRAVE ACCENT
+004F+0311=020E# LATIN CAPITAL LETTER O WITH INVERTED BREVE = LATIN CAPITAL LETTER O + COMBINING INVERTED BREVE
+006F+0311=020F# LATIN SMALL LETTER O WITH INVERTED BREVE = LATIN SMALL LETTER O + COMBINING INVERTED BREVE
+0052+030F=0210# LATIN CAPITAL LETTER R WITH DOUBLE GRAVE = LATIN CAPITAL LETTER R + COMBINING DOUBLE GRAVE ACCENT
+0072+030F=0211# LATIN SMALL LETTER R WITH DOUBLE GRAVE = LATIN SMALL LETTER R + COMBINING DOUBLE GRAVE ACCENT
+0052+0311=0212# LATIN CAPITAL LETTER R WITH INVERTED BREVE = LATIN CAPITAL LETTER R + COMBINING INVERTED BREVE
+0072+0311=0213# LATIN SMALL LETTER R WITH INVERTED BREVE = LATIN SMALL LETTER R + COMBINING INVERTED BREVE
+0055+030F=0214# LATIN CAPITAL LETTER U WITH DOUBLE GRAVE = LATIN CAPITAL LETTER U + COMBINING DOUBLE GRAVE ACCENT
+0075+030F=0215# LATIN SMALL LETTER U WITH DOUBLE GRAVE = LATIN SMALL LETTER U + COMBINING DOUBLE GRAVE ACCENT
+0055+0311=0216# LATIN CAPITAL LETTER U WITH INVERTED BREVE = LATIN CAPITAL LETTER U + COMBINING INVERTED BREVE
+0075+0311=0217# LATIN SMALL LETTER U WITH INVERTED BREVE = LATIN SMALL LETTER U + COMBINING INVERTED BREVE
+0041+0325=1E00# LATIN CAPITAL LETTER A WITH RING BELOW = LATIN CAPITAL LETTER A + COMBINING RING BELOW
+0061+0325=1E01# LATIN SMALL LETTER A WITH RING BELOW = LATIN SMALL LETTER A + COMBINING RING BELOW
+0042+0307=1E02# LATIN CAPITAL LETTER B WITH DOT ABOVE = LATIN CAPITAL LETTER B + COMBINING DOT ABOVE
+0062+0307=1E03# LATIN SMALL LETTER B WITH DOT ABOVE = LATIN SMALL LETTER B + COMBINING DOT ABOVE
+0042+0323=1E04# LATIN CAPITAL LETTER B WITH DOT BELOW = LATIN CAPITAL LETTER B + COMBINING DOT BELOW
+0062+0323=1E05# LATIN SMALL LETTER B WITH DOT BELOW = LATIN SMALL LETTER B + COMBINING DOT BELOW
+0042+0332=1E06# LATIN CAPITAL LETTER B WITH LINE BELOW = LATIN CAPITAL LETTER B + COMBINING LOW LINE
+0062+0332=1E07# LATIN SMALL LETTER B WITH LINE BELOW = LATIN SMALL LETTER B + COMBINING LOW LINE
+0043+0327+0301=1E08# LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE = LATIN CAPITAL LETTER C + COMBINING CEDILLA + COMBINING ACUTE ACCENT
+0063+0327+0301=1E09# LATIN SMALL LETTER C WITH CEDILLA AND ACUTE = LATIN SMALL LETTER C + COMBINING CEDILLA + COMBINING ACUTE ACCENT
+0044+0307=1E0A# LATIN CAPITAL LETTER D WITH DOT ABOVE = LATIN CAPITAL LETTER D + COMBINING DOT ABOVE
+0064+0307=1E0B# LATIN SMALL LETTER D WITH DOT ABOVE = LATIN SMALL LETTER D + COMBINING DOT ABOVE
+0044+0323=1E0C# LATIN CAPITAL LETTER D WITH DOT BELOW = LATIN CAPITAL LETTER D + COMBINING DOT BELOW
+0064+0323=1E0D# LATIN SMALL LETTER D WITH DOT BELOW = LATIN SMALL LETTER D + COMBINING DOT BELOW
+0044+0332=1E0E# LATIN CAPITAL LETTER D WITH LINE BELOW = LATIN CAPITAL LETTER D + COMBINING LOW LINE
+0064+0332=1E0F# LATIN SMALL LETTER D WITH LINE BELOW = LATIN SMALL LETTER D + COMBINING LOW LINE
+0044+0327=1E10# LATIN CAPITAL LETTER D WITH CEDILLA = LATIN CAPITAL LETTER D + COMBINING CEDILLA
+0064+0327=1E11# LATIN SMALL LETTER D WITH CEDILLA = LATIN SMALL LETTER D + COMBINING CEDILLA
+0044+032D=1E12# LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW = LATIN CAPITAL LETTER D + COMBINING CIRCUMFLEX ACCENT BELOW
+0064+032D=1E13# LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW = LATIN SMALL LETTER D + COMBINING CIRCUMFLEX ACCENT BELOW
+0045+0304+0300=1E14# LATIN CAPITAL LETTER E WITH MACRON AND GRAVE = LATIN CAPITAL LETTER E + COMBINING MACRON + COMBINING GRAVE ACCENT
+0065+0304+0300=1E15# LATIN SMALL LETTER E WITH MACRON AND GRAVE = LATIN SMALL LETTER E + COMBINING MACRON + COMBINING GRAVE ACCENT
+0045+0304+0301=1E16# LATIN CAPITAL LETTER E WITH MACRON AND ACUTE = LATIN CAPITAL LETTER E + COMBINING MACRON + COMBINING ACUTE ACCENT
+0065+0304+0301=1E17# LATIN SMALL LETTER E WITH MACRON AND ACUTE = LATIN SMALL LETTER E + COMBINING MACRON + COMBINING ACUTE ACCENT
+0045+032D=1E18# LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW = LATIN CAPITAL LETTER E + COMBINING CIRCUMFLEX ACCENT BELOW
+0065+032D=1E19# LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW = LATIN SMALL LETTER E + COMBINING CIRCUMFLEX ACCENT BELOW
+0045+0330=1E1A# LATIN CAPITAL LETTER E WITH TILDE BELOW = LATIN CAPITAL LETTER E + COMBINING TILDE BELOW
+0065+0330=1E1B# LATIN SMALL LETTER E WITH TILDE BELOW = LATIN SMALL LETTER E + COMBINING TILDE BELOW
+0045+0327+0306=1E1C# LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE = LATIN CAPITAL LETTER E + COMBINING CEDILLA + COMBINING BREVE
+0065+0327+0306=1E1D# LATIN SMALL LETTER E WITH CEDILLA AND BREVE = LATIN SMALL LETTER E + COMBINING CEDILLA + COMBINING BREVE
+0046+0307=1E1E# LATIN CAPITAL LETTER F WITH DOT ABOVE = LATIN CAPITAL LETTER F + COMBINING DOT ABOVE
+0066+0307=1E1F# LATIN SMALL LETTER F WITH DOT ABOVE = LATIN SMALL LETTER F + COMBINING DOT ABOVE
+0047+0304=1E20# LATIN CAPITAL LETTER G WITH MACRON = LATIN CAPITAL LETTER G + COMBINING MACRON
+0067+0304=1E21# LATIN SMALL LETTER G WITH MACRON = LATIN SMALL LETTER G + COMBINING MACRON
+0048+0307=1E22# LATIN CAPITAL LETTER H WITH DOT ABOVE = LATIN CAPITAL LETTER H + COMBINING DOT ABOVE
+0068+0307=1E23# LATIN SMALL LETTER H WITH DOT ABOVE = LATIN SMALL LETTER H + COMBINING DOT ABOVE
+0048+0323=1E24# LATIN CAPITAL LETTER H WITH DOT BELOW = LATIN CAPITAL LETTER H + COMBINING DOT BELOW
+0068+0323=1E25# LATIN SMALL LETTER H WITH DOT BELOW = LATIN SMALL LETTER H + COMBINING DOT BELOW
+0048+0308=1E26# LATIN CAPITAL LETTER H WITH DIAERESIS = LATIN CAPITAL LETTER H + COMBINING DIAERESIS
+0068+0308=1E27# LATIN SMALL LETTER H WITH DIAERESIS = LATIN SMALL LETTER H + COMBINING DIAERESIS
+0048+0327=1E28# LATIN CAPITAL LETTER H WITH CEDILLA = LATIN CAPITAL LETTER H + COMBINING CEDILLA
+0068+0327=1E29# LATIN SMALL LETTER H WITH CEDILLA = LATIN SMALL LETTER H + COMBINING CEDILLA
+0048+032E=1E2A# LATIN CAPITAL LETTER H WITH BREVE BELOW = LATIN CAPITAL LETTER H + COMBINING BREVE BELOW
+0068+032E=1E2B# LATIN SMALL LETTER H WITH BREVE BELOW = LATIN SMALL LETTER H + COMBINING BREVE BELOW
+0049+0330=1E2C# LATIN CAPITAL LETTER I WITH TILDE BELOW = LATIN CAPITAL LETTER I + COMBINING TILDE BELOW
+0069+0330=1E2D# LATIN SMALL LETTER I WITH TILDE BELOW = LATIN SMALL LETTER I + COMBINING TILDE BELOW
+0049+0308+0301=1E2E# LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE = LATIN CAPITAL LETTER I + COMBINING DIAERESIS + COMBINING ACUTE ACCENT
+0069+0308+0301=1E2F# LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE = LATIN SMALL LETTER I + COMBINING DIAERESIS + COMBINING ACUTE ACCENT
+004B+0301=1E30# LATIN CAPITAL LETTER K WITH ACUTE = LATIN CAPITAL LETTER K + COMBINING ACUTE ACCENT
+006B+0301=1E31# LATIN SMALL LETTER K WITH ACUTE = LATIN SMALL LETTER K + COMBINING ACUTE ACCENT
+004B+0323=1E32# LATIN CAPITAL LETTER K WITH DOT BELOW = LATIN CAPITAL LETTER K + COMBINING DOT BELOW
+006B+0323=1E33# LATIN SMALL LETTER K WITH DOT BELOW = LATIN SMALL LETTER K + COMBINING DOT BELOW
+004B+0332=1E34# LATIN CAPITAL LETTER K WITH LINE BELOW = LATIN CAPITAL LETTER K + COMBINING LOW LINE
+006B+0332=1E35# LATIN SMALL LETTER K WITH LINE BELOW = LATIN SMALL LETTER K + COMBINING LOW LINE
+004C+0323=1E36# LATIN CAPITAL LETTER L WITH DOT BELOW = LATIN CAPITAL LETTER L + COMBINING DOT BELOW
+006C+0323=1E37# LATIN SMALL LETTER L WITH DOT BELOW = LATIN SMALL LETTER L + COMBINING DOT BELOW
+004C+0323+0304=1E38# LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON = LATIN CAPITAL LETTER L + COMBINING DOT BELOW + COMBINING MACRON
+006C+0323+0304=1E39# LATIN SMALL LETTER L WITH DOT BELOW AND MACRON = LATIN SMALL LETTER L + COMBINING DOT BELOW + COMBINING MACRON
+004C+0332=1E3A# LATIN CAPITAL LETTER L WITH LINE BELOW = LATIN CAPITAL LETTER L + COMBINING LOW LINE
+006C+0332=1E3B# LATIN SMALL LETTER L WITH LINE BELOW = LATIN SMALL LETTER L + COMBINING LOW LINE
+004C+032D=1E3C# LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW = LATIN CAPITAL LETTER L + COMBINING CIRCUMFLEX ACCENT BELOW
+006C+032D=1E3D# LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW = LATIN SMALL LETTER L + COMBINING CIRCUMFLEX ACCENT BELOW
+004D+0301=1E3E# LATIN CAPITAL LETTER M WITH ACUTE = LATIN CAPITAL LETTER M + COMBINING ACUTE ACCENT
+006D+0301=1E3F# LATIN SMALL LETTER M WITH ACUTE = LATIN SMALL LETTER M + COMBINING ACUTE ACCENT
+004D+0307=1E40# LATIN CAPITAL LETTER M WITH DOT ABOVE = LATIN CAPITAL LETTER M + COMBINING DOT ABOVE
+006D+0307=1E41# LATIN SMALL LETTER M WITH DOT ABOVE = LATIN SMALL LETTER M + COMBINING DOT ABOVE
+004D+0323=1E42# LATIN CAPITAL LETTER M WITH DOT BELOW = LATIN CAPITAL LETTER M + COMBINING DOT BELOW
+006D+0323=1E43# LATIN SMALL LETTER M WITH DOT BELOW = LATIN SMALL LETTER M + COMBINING DOT BELOW
+004E+0307=1E44# LATIN CAPITAL LETTER N WITH DOT ABOVE = LATIN CAPITAL LETTER N + COMBINING DOT ABOVE
+006E+0307=1E45# LATIN SMALL LETTER N WITH DOT ABOVE = LATIN SMALL LETTER N + COMBINING DOT ABOVE
+004E+0323=1E46# LATIN CAPITAL LETTER N WITH DOT BELOW = LATIN CAPITAL LETTER N + COMBINING DOT BELOW
+006E+0323=1E47# LATIN SMALL LETTER N WITH DOT BELOW = LATIN SMALL LETTER N + COMBINING DOT BELOW
+004E+0332=1E48# LATIN CAPITAL LETTER N WITH LINE BELOW = LATIN CAPITAL LETTER N + COMBINING LOW LINE
+006E+0332=1E49# LATIN SMALL LETTER N WITH LINE BELOW = LATIN SMALL LETTER N + COMBINING LOW LINE
+004E+032D=1E4A# LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW = LATIN CAPITAL LETTER N + COMBINING CIRCUMFLEX ACCENT BELOW
+006E+032D=1E4B# LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW = LATIN SMALL LETTER N + COMBINING CIRCUMFLEX ACCENT BELOW
+004F+0303+0301=1E4C# LATIN CAPITAL LETTER O WITH TILDE AND ACUTE = LATIN CAPITAL LETTER O + COMBINING TILDE + COMBINING ACUTE ACCENT
+006F+0303+0301=1E4D# LATIN SMALL LETTER O WITH TILDE AND ACUTE = LATIN SMALL LETTER O + COMBINING TILDE + COMBINING ACUTE ACCENT
+004F+0303+0308=1E4E# LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS = LATIN CAPITAL LETTER O + COMBINING TILDE + COMBINING DIAERESIS
+006F+0303+0308=1E4F# LATIN SMALL LETTER O WITH TILDE AND DIAERESIS = LATIN SMALL LETTER O + COMBINING TILDE + COMBINING DIAERESIS
+004F+0304+0300=1E50# LATIN CAPITAL LETTER O WITH MACRON AND GRAVE = LATIN CAPITAL LETTER O + COMBINING MACRON + COMBINING GRAVE ACCENT
+006F+0304+0300=1E51# LATIN SMALL LETTER O WITH MACRON AND GRAVE = LATIN SMALL LETTER O + COMBINING MACRON + COMBINING GRAVE ACCENT
+004F+0304+0301=1E52# LATIN CAPITAL LETTER O WITH MACRON AND ACUTE = LATIN CAPITAL LETTER O + COMBINING MACRON + COMBINING ACUTE ACCENT
+006F+0304+0301=1E53# LATIN SMALL LETTER O WITH MACRON AND ACUTE = LATIN SMALL LETTER O + COMBINING MACRON + COMBINING ACUTE ACCENT
+0050+0301=1E54# LATIN CAPITAL LETTER P WITH ACUTE = LATIN CAPITAL LETTER P + COMBINING ACUTE ACCENT
+0070+0301=1E55# LATIN SMALL LETTER P WITH ACUTE = LATIN SMALL LETTER P + COMBINING ACUTE ACCENT
+0050+0307=1E56# LATIN CAPITAL LETTER P WITH DOT ABOVE = LATIN CAPITAL LETTER P + COMBINING DOT ABOVE
+0070+0307=1E57# LATIN SMALL LETTER P WITH DOT ABOVE = LATIN SMALL LETTER P + COMBINING DOT ABOVE
+0052+0307=1E58# LATIN CAPITAL LETTER R WITH DOT ABOVE = LATIN CAPITAL LETTER R + COMBINING DOT ABOVE
+0072+0307=1E59# LATIN SMALL LETTER R WITH DOT ABOVE = LATIN SMALL LETTER R + COMBINING DOT ABOVE
+0052+0323=1E5A# LATIN CAPITAL LETTER R WITH DOT BELOW = LATIN CAPITAL LETTER R + COMBINING DOT BELOW
+0072+0323=1E5B# LATIN SMALL LETTER R WITH DOT BELOW = LATIN SMALL LETTER R + COMBINING DOT BELOW
+0052+0323+0304=1E5C# LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON = LATIN CAPITAL LETTER R + COMBINING DOT BELOW + COMBINING MACRON
+0072+0323+0304=1E5D# LATIN SMALL LETTER R WITH DOT BELOW AND MACRON = LATIN SMALL LETTER R + COMBINING DOT BELOW + COMBINING MACRON
+0052+0332=1E5E# LATIN CAPITAL LETTER R WITH LINE BELOW = LATIN CAPITAL LETTER R + COMBINING LOW LINE
+0072+0332=1E5F# LATIN SMALL LETTER R WITH LINE BELOW = LATIN SMALL LETTER R + COMBINING LOW LINE
+0053+0307=1E60# LATIN CAPITAL LETTER S WITH DOT ABOVE = LATIN CAPITAL LETTER S + COMBINING DOT ABOVE
+0073+0307=1E61# LATIN SMALL LETTER S WITH DOT ABOVE = LATIN SMALL LETTER S + COMBINING DOT ABOVE
+0053+0323=1E62# LATIN CAPITAL LETTER S WITH DOT BELOW = LATIN CAPITAL LETTER S + COMBINING DOT BELOW
+0073+0323=1E63# LATIN SMALL LETTER S WITH DOT BELOW = LATIN SMALL LETTER S + COMBINING DOT BELOW
+0053+0301+0307=1E64# LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE = LATIN CAPITAL LETTER S + COMBINING ACUTE ACCENT + COMBINING DOT ABOVE
+0073+0301+0307=1E65# LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE = LATIN SMALL LETTER S + COMBINING ACUTE ACCENT + COMBINING DOT ABOVE
+0053+030C+0307=1E66# LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE = LATIN CAPITAL LETTER S + COMBINING CARON + COMBINING DOT ABOVE
+0073+030C+0307=1E67# LATIN SMALL LETTER S WITH CARON AND DOT ABOVE = LATIN SMALL LETTER S + COMBINING CARON + COMBINING DOT ABOVE
+0053+0323+0307=1E68# LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE = LATIN CAPITAL LETTER S + COMBINING DOT BELOW + COMBINING DOT ABOVE
+0073+0323+0307=1E69# LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE = LATIN SMALL LETTER S + COMBINING DOT BELOW + COMBINING DOT ABOVE
+0054+0307=1E6A# LATIN CAPITAL LETTER T WITH DOT ABOVE = LATIN CAPITAL LETTER T + COMBINING DOT ABOVE
+0074+0307=1E6B# LATIN SMALL LETTER T WITH DOT ABOVE = LATIN SMALL LETTER T + COMBINING DOT ABOVE
+0054+0323=1E6C# LATIN CAPITAL LETTER T WITH DOT BELOW = LATIN CAPITAL LETTER T + COMBINING DOT BELOW
+0074+0323=1E6D# LATIN SMALL LETTER T WITH DOT BELOW = LATIN SMALL LETTER T + COMBINING DOT BELOW
+0054+0332=1E6E# LATIN CAPITAL LETTER T WITH LINE BELOW = LATIN CAPITAL LETTER T + COMBINING LOW LINE
+0074+0332=1E6F# LATIN SMALL LETTER T WITH LINE BELOW = LATIN SMALL LETTER T + COMBINING LOW LINE
+0054+032D=1E70# LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW = LATIN CAPITAL LETTER T + COMBINING CIRCUMFLEX ACCENT BELOW
+0074+032D=1E71# LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW = LATIN SMALL LETTER T + COMBINING CIRCUMFLEX ACCENT BELOW
+0055+0324=1E72# LATIN CAPITAL LETTER U WITH DIAERESIS BELOW = LATIN CAPITAL LETTER U + COMBINING DIAERESIS BELOW
+0075+0324=1E73# LATIN SMALL LETTER U WITH DIAERESIS BELOW = LATIN SMALL LETTER U + COMBINING DIAERESIS BELOW
+0055+0330=1E74# LATIN CAPITAL LETTER U WITH TILDE BELOW = LATIN CAPITAL LETTER U + COMBINING TILDE BELOW
+0075+0330=1E75# LATIN SMALL LETTER U WITH TILDE BELOW = LATIN SMALL LETTER U + COMBINING TILDE BELOW
+0055+032D=1E76# LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW = LATIN CAPITAL LETTER U + COMBINING CIRCUMFLEX ACCENT BELOW
+0075+032D=1E77# LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW = LATIN SMALL LETTER U + COMBINING CIRCUMFLEX ACCENT BELOW
+0055+0303+0301=1E78# LATIN CAPITAL LETTER U WITH TILDE AND ACUTE = LATIN CAPITAL LETTER U + COMBINING TILDE + COMBINING ACUTE ACCENT
+0075+0303+0301=1E79# LATIN SMALL LETTER U WITH TILDE AND ACUTE = LATIN SMALL LETTER U + COMBINING TILDE + COMBINING ACUTE ACCENT
+0055+0304+0308=1E7A# LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS = LATIN CAPITAL LETTER U + COMBINING MACRON + COMBINING DIAERESIS
+0075+0304+0308=1E7B# LATIN SMALL LETTER U WITH MACRON AND DIAERESIS = LATIN SMALL LETTER U + COMBINING MACRON + COMBINING DIAERESIS
+0056+0303=1E7C# LATIN CAPITAL LETTER V WITH TILDE = LATIN CAPITAL LETTER V + COMBINING TILDE
+0076+0303=1E7D# LATIN SMALL LETTER V WITH TILDE = LATIN SMALL LETTER V + COMBINING TILDE
+0056+0323=1E7E# LATIN CAPITAL LETTER V WITH DOT BELOW = LATIN CAPITAL LETTER V + COMBINING DOT BELOW
+0076+0323=1E7F# LATIN SMALL LETTER V WITH DOT BELOW = LATIN SMALL LETTER V + COMBINING DOT BELOW
+0057+0300=1E80# LATIN CAPITAL LETTER W WITH GRAVE = LATIN CAPITAL LETTER W + COMBINING GRAVE ACCENT
+0077+0300=1E81# LATIN SMALL LETTER W WITH GRAVE = LATIN SMALL LETTER W + COMBINING GRAVE ACCENT
+0057+0301=1E82# LATIN CAPITAL LETTER W WITH ACUTE = LATIN CAPITAL LETTER W + COMBINING ACUTE ACCENT
+0077+0301=1E83# LATIN SMALL LETTER W WITH ACUTE = LATIN SMALL LETTER W + COMBINING ACUTE ACCENT
+0057+0308=1E84# LATIN CAPITAL LETTER W WITH DIAERESIS = LATIN CAPITAL LETTER W + COMBINING DIAERESIS
+0077+0308=1E85# LATIN SMALL LETTER W WITH DIAERESIS = LATIN SMALL LETTER W + COMBINING DIAERESIS
+0057+0307=1E86# LATIN CAPITAL LETTER W WITH DOT ABOVE = LATIN CAPITAL LETTER W + COMBINING DOT ABOVE
+0077+0307=1E87# LATIN SMALL LETTER W WITH DOT ABOVE = LATIN SMALL LETTER W + COMBINING DOT ABOVE
+0057+0323=1E88# LATIN CAPITAL LETTER W WITH DOT BELOW = LATIN CAPITAL LETTER W + COMBINING DOT BELOW
+0077+0323=1E89# LATIN SMALL LETTER W WITH DOT BELOW = LATIN SMALL LETTER W + COMBINING DOT BELOW
+0058+0307=1E8A# LATIN CAPITAL LETTER X WITH DOT ABOVE = LATIN CAPITAL LETTER X + COMBINING DOT ABOVE
+0078+0307=1E8B# LATIN SMALL LETTER X WITH DOT ABOVE = LATIN SMALL LETTER X + COMBINING DOT ABOVE
+0058+0308=1E8C# LATIN CAPITAL LETTER X WITH DIAERESIS = LATIN CAPITAL LETTER X + COMBINING DIAERESIS
+0078+0308=1E8D# LATIN SMALL LETTER X WITH DIAERESIS = LATIN SMALL LETTER X + COMBINING DIAERESIS
+0059+0307=1E8E# LATIN CAPITAL LETTER Y WITH DOT ABOVE = LATIN CAPITAL LETTER Y + COMBINING DOT ABOVE
+0079+0307=1E8F# LATIN SMALL LETTER Y WITH DOT ABOVE = LATIN SMALL LETTER Y + COMBINING DOT ABOVE
+005A+0302=1E90# LATIN CAPITAL LETTER Z WITH CIRCUMFLEX = LATIN CAPITAL LETTER Z + COMBINING CIRCUMFLEX ACCENT
+007A+0302=1E91# LATIN SMALL LETTER Z WITH CIRCUMFLEX = LATIN SMALL LETTER Z + COMBINING CIRCUMFLEX ACCENT
+005A+0323=1E92# LATIN CAPITAL LETTER Z WITH DOT BELOW = LATIN CAPITAL LETTER Z + COMBINING DOT BELOW
+007A+0323=1E93# LATIN SMALL LETTER Z WITH DOT BELOW = LATIN SMALL LETTER Z + COMBINING DOT BELOW
+005A+0332=1E94# LATIN CAPITAL LETTER Z WITH LINE BELOW = LATIN CAPITAL LETTER Z + COMBINING LOW LINE
+007A+0332=1E95# LATIN SMALL LETTER Z WITH LINE BELOW = LATIN SMALL LETTER Z + COMBINING LOW LINE
+0068+0332=1E96# LATIN SMALL LETTER H WITH LINE BELOW = LATIN SMALL LETTER H + COMBINING LOW LINE
+0074+0308=1E97# LATIN SMALL LETTER T WITH DIAERESIS = LATIN SMALL LETTER T + COMBINING DIAERESIS
+0077+030A=1E98# LATIN SMALL LETTER W WITH RING ABOVE = LATIN SMALL LETTER W + COMBINING RING ABOVE
+0079+030A=1E99# LATIN SMALL LETTER Y WITH RING ABOVE = LATIN SMALL LETTER Y + COMBINING RING ABOVE
+017F+0307=1E9B# LATIN SMALL LETTER LONG S WITH DOT ABOVE = LATIN SMALL LETTER LONG S + COMBINING DOT ABOVE
+0041+0323=1EA0# LATIN CAPITAL LETTER A WITH DOT BELOW = LATIN CAPITAL LETTER A + COMBINING DOT BELOW
+0061+0323=1EA1# LATIN SMALL LETTER A WITH DOT BELOW = LATIN SMALL LETTER A + COMBINING DOT BELOW
+0041+0309=1EA2# LATIN CAPITAL LETTER A WITH HOOK ABOVE = LATIN CAPITAL LETTER A + COMBINING HOOK ABOVE
+0061+0309=1EA3# LATIN SMALL LETTER A WITH HOOK ABOVE = LATIN SMALL LETTER A + COMBINING HOOK ABOVE
+0041+0302+0301=1EA4# LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE = LATIN CAPITAL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING ACUTE ACCENT
+0061+0302+0301=1EA5# LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE = LATIN SMALL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING ACUTE ACCENT
+0041+0302+0300=1EA6# LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE = LATIN CAPITAL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING GRAVE ACCENT
+0061+0302+0300=1EA7# LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE = LATIN SMALL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING GRAVE ACCENT
+0041+0302+0309=1EA8# LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE = LATIN CAPITAL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING HOOK ABOVE
+0061+0302+0309=1EA9# LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE = LATIN SMALL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING HOOK ABOVE
+0041+0302+0303=1EAA# LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE = LATIN CAPITAL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING TILDE
+0061+0302+0303=1EAB# LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE = LATIN SMALL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING TILDE
+0041+0302+0323=1EAC# LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW = LATIN CAPITAL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING DOT BELOW
+0061+0302+0323=1EAD# LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW = LATIN SMALL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING DOT BELOW
+0041+0306+0301=1EAE# LATIN CAPITAL LETTER A WITH BREVE AND ACUTE = LATIN CAPITAL LETTER A + COMBINING BREVE + COMBINING ACUTE ACCENT
+0061+0306+0301=1EAF# LATIN SMALL LETTER A WITH BREVE AND ACUTE = LATIN SMALL LETTER A + COMBINING BREVE + COMBINING ACUTE ACCENT
+0041+0306+0300=1EB0# LATIN CAPITAL LETTER A WITH BREVE AND GRAVE = LATIN CAPITAL LETTER A + COMBINING BREVE + COMBINING GRAVE ACCENT
+0061+0306+0300=1EB1# LATIN SMALL LETTER A WITH BREVE AND GRAVE = LATIN SMALL LETTER A + COMBINING BREVE + COMBINING GRAVE ACCENT
+0041+0306+0309=1EB2# LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE = LATIN CAPITAL LETTER A + COMBINING BREVE + COMBINING HOOK ABOVE
+0061+0306+0309=1EB3# LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE = LATIN SMALL LETTER A + COMBINING BREVE + COMBINING HOOK ABOVE
+0041+0306+0303=1EB4# LATIN CAPITAL LETTER A WITH BREVE AND TILDE = LATIN CAPITAL LETTER A + COMBINING BREVE + COMBINING TILDE
+0061+0306+0303=1EB5# LATIN SMALL LETTER A WITH BREVE AND TILDE = LATIN SMALL LETTER A + COMBINING BREVE + COMBINING TILDE
+0041+0306+0323=1EB6# LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW = LATIN CAPITAL LETTER A + COMBINING BREVE + COMBINING DOT BELOW
+0061+0306+0323=1EB7# LATIN SMALL LETTER A WITH BREVE AND DOT BELOW = LATIN SMALL LETTER A + COMBINING BREVE + COMBINING DOT BELOW
+0045+0323=1EB8# LATIN CAPITAL LETTER E WITH DOT BELOW = LATIN CAPITAL LETTER E + COMBINING DOT BELOW
+0065+0323=1EB9# LATIN SMALL LETTER E WITH DOT BELOW = LATIN SMALL LETTER E + COMBINING DOT BELOW
+0045+0309=1EBA# LATIN CAPITAL LETTER E WITH HOOK ABOVE = LATIN CAPITAL LETTER E + COMBINING HOOK ABOVE
+0065+0309=1EBB# LATIN SMALL LETTER E WITH HOOK ABOVE = LATIN SMALL LETTER E + COMBINING HOOK ABOVE
+0045+0303=1EBC# LATIN CAPITAL LETTER E WITH TILDE = LATIN CAPITAL LETTER E + COMBINING TILDE
+0065+0303=1EBD# LATIN SMALL LETTER E WITH TILDE = LATIN SMALL LETTER E + COMBINING TILDE
+0045+0302+0301=1EBE# LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE = LATIN CAPITAL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING ACUTE ACCENT
+0065+0302+0301=1EBF# LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE = LATIN SMALL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING ACUTE ACCENT
+0045+0302+0300=1EC0# LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE = LATIN CAPITAL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING GRAVE ACCENT
+0065+0302+0300=1EC1# LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE = LATIN SMALL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING GRAVE ACCENT
+0045+0302+0309=1EC2# LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE = LATIN CAPITAL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING HOOK ABOVE
+0065+0302+0309=1EC3# LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE = LATIN SMALL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING HOOK ABOVE
+0045+0302+0303=1EC4# LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE = LATIN CAPITAL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING TILDE
+0065+0302+0303=1EC5# LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE = LATIN SMALL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING TILDE
+0045+0302+0323=1EC6# LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW = LATIN CAPITAL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING DOT BELOW
+0065+0302+0323=1EC7# LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW = LATIN SMALL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING DOT BELOW
+0049+0309=1EC8# LATIN CAPITAL LETTER I WITH HOOK ABOVE = LATIN CAPITAL LETTER I + COMBINING HOOK ABOVE
+0069+0309=1EC9# LATIN SMALL LETTER I WITH HOOK ABOVE = LATIN SMALL LETTER I + COMBINING HOOK ABOVE
+0049+0323=1ECA# LATIN CAPITAL LETTER I WITH DOT BELOW = LATIN CAPITAL LETTER I + COMBINING DOT BELOW
+0069+0323=1ECB# LATIN SMALL LETTER I WITH DOT BELOW = LATIN SMALL LETTER I + COMBINING DOT BELOW
+004F+0323=1ECC# LATIN CAPITAL LETTER O WITH DOT BELOW = LATIN CAPITAL LETTER O + COMBINING DOT BELOW
+006F+0323=1ECD# LATIN SMALL LETTER O WITH DOT BELOW = LATIN SMALL LETTER O + COMBINING DOT BELOW
+004F+0309=1ECE# LATIN CAPITAL LETTER O WITH HOOK ABOVE = LATIN CAPITAL LETTER O + COMBINING HOOK ABOVE
+006F+0309=1ECF# LATIN SMALL LETTER O WITH HOOK ABOVE = LATIN SMALL LETTER O + COMBINING HOOK ABOVE
+004F+0302+0301=1ED0# LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE = LATIN CAPITAL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING ACUTE ACCENT
+006F+0302+0301=1ED1# LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE = LATIN SMALL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING ACUTE ACCENT
+004F+0302+0300=1ED2# LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE = LATIN CAPITAL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING GRAVE ACCENT
+006F+0302+0300=1ED3# LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE = LATIN SMALL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING GRAVE ACCENT
+004F+0302+0309=1ED4# LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE = LATIN CAPITAL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING HOOK ABOVE
+006F+0302+0309=1ED5# LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE = LATIN SMALL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING HOOK ABOVE
+004F+0302+0303=1ED6# LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE = LATIN CAPITAL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING TILDE
+006F+0302+0303=1ED7# LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE = LATIN SMALL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING TILDE
+004F+0302+0323=1ED8# LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW = LATIN CAPITAL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING DOT BELOW
+006F+0302+0323=1ED9# LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW = LATIN SMALL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING DOT BELOW
+004F+031B+0301=1EDA# LATIN CAPITAL LETTER O WITH HORN AND ACUTE = LATIN CAPITAL LETTER O + COMBINING HORN + COMBINING ACUTE ACCENT
+006F+031B+0301=1EDB# LATIN SMALL LETTER O WITH HORN AND ACUTE = LATIN SMALL LETTER O + COMBINING HORN + COMBINING ACUTE ACCENT
+004F+031B+0300=1EDC# LATIN CAPITAL LETTER O WITH HORN AND GRAVE = LATIN CAPITAL LETTER O + COMBINING HORN + COMBINING GRAVE ACCENT
+006F+031B+0300=1EDD# LATIN SMALL LETTER O WITH HORN AND GRAVE = LATIN SMALL LETTER O + COMBINING HORN + COMBINING GRAVE ACCENT
+004F+031B+0309=1EDE# LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE = LATIN CAPITAL LETTER O + COMBINING HORN + COMBINING HOOK ABOVE
+006F+031B+0309=1EDF# LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE = LATIN SMALL LETTER O + COMBINING HORN + COMBINING HOOK ABOVE
+004F+031B+0303=1EE0# LATIN CAPITAL LETTER O WITH HORN AND TILDE = LATIN CAPITAL LETTER O + COMBINING HORN + COMBINING TILDE
+006F+031B+0303=1EE1# LATIN SMALL LETTER O WITH HORN AND TILDE = LATIN SMALL LETTER O + COMBINING HORN + COMBINING TILDE
+004F+031B+0323=1EE2# LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW = LATIN CAPITAL LETTER O + COMBINING HORN + COMBINING DOT BELOW
+006F+031B+0323=1EE3# LATIN SMALL LETTER O WITH HORN AND DOT BELOW = LATIN SMALL LETTER O + COMBINING HORN + COMBINING DOT BELOW
+0055+0323=1EE4# LATIN CAPITAL LETTER U WITH DOT BELOW = LATIN CAPITAL LETTER U + COMBINING DOT BELOW
+0075+0323=1EE5# LATIN SMALL LETTER U WITH DOT BELOW = LATIN SMALL LETTER U + COMBINING DOT BELOW
+0055+0309=1EE6# LATIN CAPITAL LETTER U WITH HOOK ABOVE = LATIN CAPITAL LETTER U + COMBINING HOOK ABOVE
+0075+0309=1EE7# LATIN SMALL LETTER U WITH HOOK ABOVE = LATIN SMALL LETTER U + COMBINING HOOK ABOVE
+0055+031B+0301=1EE8# LATIN CAPITAL LETTER U WITH HORN AND ACUTE = LATIN CAPITAL LETTER U + COMBINING HORN + COMBINING ACUTE ACCENT
+0075+031B+0301=1EE9# LATIN SMALL LETTER U WITH HORN AND ACUTE = LATIN SMALL LETTER U + COMBINING HORN + COMBINING ACUTE ACCENT
+0055+031B+0300=1EEA# LATIN CAPITAL LETTER U WITH HORN AND GRAVE = LATIN CAPITAL LETTER U + COMBINING HORN + COMBINING GRAVE ACCENT
+0075+031B+0300=1EEB# LATIN SMALL LETTER U WITH HORN AND GRAVE = LATIN SMALL LETTER U + COMBINING HORN + COMBINING GRAVE ACCENT
+0055+031B+0309=1EEC# LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE = LATIN CAPITAL LETTER U + COMBINING HORN + COMBINING HOOK ABOVE
+0075+031B+0309=1EED# LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE = LATIN SMALL LETTER U + COMBINING HORN + COMBINING HOOK ABOVE
+0055+031B+0303=1EEE# LATIN CAPITAL LETTER U WITH HORN AND TILDE = LATIN CAPITAL LETTER U + COMBINING HORN + COMBINING TILDE
+0075+031B+0303=1EEF# LATIN SMALL LETTER U WITH HORN AND TILDE = LATIN SMALL LETTER U + COMBINING HORN + COMBINING TILDE
+0055+031B+0323=1EF0# LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW = LATIN CAPITAL LETTER U + COMBINING HORN + COMBINING DOT BELOW
+0075+031B+0323=1EF1# LATIN SMALL LETTER U WITH HORN AND DOT BELOW = LATIN SMALL LETTER U + COMBINING HORN + COMBINING DOT BELOW
+0059+0300=1EF2# LATIN CAPITAL LETTER Y WITH GRAVE = LATIN CAPITAL LETTER Y + COMBINING GRAVE ACCENT
+0079+0300=1EF3# LATIN SMALL LETTER Y WITH GRAVE = LATIN SMALL LETTER Y + COMBINING GRAVE ACCENT
+0059+0323=1EF4# LATIN CAPITAL LETTER Y WITH DOT BELOW = LATIN CAPITAL LETTER Y + COMBINING DOT BELOW
+0079+0323=1EF5# LATIN SMALL LETTER Y WITH DOT BELOW = LATIN SMALL LETTER Y + COMBINING DOT BELOW
+0059+0309=1EF6# LATIN CAPITAL LETTER Y WITH HOOK ABOVE = LATIN CAPITAL LETTER Y + COMBINING HOOK ABOVE
+0079+0309=1EF7# LATIN SMALL LETTER Y WITH HOOK ABOVE = LATIN SMALL LETTER Y + COMBINING HOOK ABOVE
+0059+0303=1EF8# LATIN CAPITAL LETTER Y WITH TILDE = LATIN CAPITAL LETTER Y + COMBINING TILDE
+0079+0303=1EF9# LATIN SMALL LETTER Y WITH TILDE = LATIN SMALL LETTER Y + COMBINING TILDE
+
--- a/src/test/java/org/xbib/charset/AnselCharsetTest.java
+++ b/src/test/java/org/xbib/charset/AnselCharsetTest.java
@ -0,0 +1,73 @@
+/*
+ * Licensed to Jörg Prante and xbib under one or more contributor
+ * license agreements. See the NOTICE.txt file distributed with this work
+ * for additional information regarding copyright ownership.
+ *
+ * Copyright (C) 2016 Jörg Prante and xbib
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program; if not, see http://www.gnu.org/licenses
+ * or write to the Free Software Foundation, Inc., 51 Franklin Street,
+ * Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The interactive user interfaces in modified source and object code
+ * versions of this program must display Appropriate Legal Notices,
+ * as required under Section 5 of the GNU Affero General Public License.
+ *
+ * In accordance with Section 7(b) of the GNU Affero General Public
+ * License, these Appropriate Legal Notices must retain the display of the
+ * "Powered by xbib" logo. If the display of the logo is not reasonably
+ * feasible for technical reasons, the Appropriate Legal Notices must display
+ * the words "Powered by xbib".
+ */
+package org.xbib.charset;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.text.Normalizer;
+
+/**
+ *
+ */
+public class AnselCharsetTest extends Assert {
+
+    @Test
+    public void testAnsel() throws Exception {
+        ByteBuffer buf = ByteBuffer.wrap("\u00e8\u0075".getBytes("ISO-8859-1"));
+        Charset charset = Charset.forName("ANSEL");
+        CharsetDecoder decoder = charset.newDecoder();
+        CharBuffer cbuf = decoder.decode(buf);
+        String s = cbuf.toString();
+        assertEquals("\u0075\u0308", s);
+        s = Normalizer.normalize(s, Normalizer.Form.NFC);
+        assertEquals("ü", s);
+    }
+
+    @Test
+    public void testAnsel2() throws Exception {
+        ByteBuffer buf = ByteBuffer.wrap("\u00AC\u00E2\u0041\u00ED\u0042\u00E2\u0043\u00E2\u0044".getBytes("ISO-8859-1"));
+        Charset charset = Charset.forName("ANSEL");
+        CharsetDecoder decoder = charset.newDecoder();
+        CharBuffer cbuf = decoder.decode(buf);
+        String s = cbuf.toString();
+        assertEquals(9, s.length());
+        s = Normalizer.normalize(s, Normalizer.Form.NFC);
+        assertEquals("ƠÁB̕ĆD́", s);
+        assertEquals(7, s.length());
+    }
+
+}
--- a/src/test/java/org/xbib/charset/AnselCodeTableParserTest.java
+++ b/src/test/java/org/xbib/charset/AnselCodeTableParserTest.java
@ -0,0 +1,56 @@
+/*
+ * Licensed to Jörg Prante and xbib under one or more contributor
+ * license agreements. See the NOTICE.txt file distributed with this work
+ * for additional information regarding copyright ownership.
+ *
+ * Copyright (C) 2016 Jörg Prante and xbib
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program; if not, see http://www.gnu.org/licenses
+ * or write to the Free Software Foundation, Inc., 51 Franklin Street,
+ * Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The interactive user interfaces in modified source and object code
+ * versions of this program must display Appropriate Legal Notices,
+ * as required under Section 5 of the GNU Affero General Public License.
+ *
+ * In accordance with Section 7(b) of the GNU Affero General Public
+ * License, these Appropriate Legal Notices must retain the display of the
+ * "Powered by xbib" logo. If the display of the logo is not reasonably
+ * feasible for technical reasons, the Appropriate Legal Notices must display
+ * the words "Powered by xbib".
+ */
+package org.xbib.charset;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.junit.Test;
+
+import javax.xml.stream.XMLStreamException;
+
+/**
+ *
+ */
+public class AnselCodeTableParserTest {
+
+    private Logger logger = LogManager.getLogger(AnselCodeTableParserTest.class);
+
+    @Test
+    public void test() throws XMLStreamException {
+        AnselCodeTableParser anselCodeTableParser = new AnselCodeTableParser(getClass().getResourceAsStream("codetables.xml"));
+        for (AnselCodeTableParser.CodeTable codeTable : anselCodeTableParser.getCodeTables()) {
+            for (AnselCodeTableParser.CharacterSet characterSet : codeTable.getCharacterSets()) {
+                logger.info("{} {}", characterSet.getName(), characterSet.getLength());
+            }
+        }
+    }
+}
--- a/src/test/java/org/xbib/charset/BibliographicCharsetsTest.java
+++ b/src/test/java/org/xbib/charset/BibliographicCharsetsTest.java
@ -0,0 +1,52 @@
+/*
+ * Licensed to Jörg Prante and xbib under one or more contributor
+ * license agreements. See the NOTICE.txt file distributed with this work
+ * for additional information regarding copyright ownership.
+ *
+ * Copyright (C) 2016 Jörg Prante and xbib
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program; if not, see http://www.gnu.org/licenses
+ * or write to the Free Software Foundation, Inc., 51 Franklin Street,
+ * Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The interactive user interfaces in modified source and object code
+ * versions of this program must display Appropriate Legal Notices,
+ * as required under Section 5 of the GNU Affero General Public License.
+ *
+ * In accordance with Section 7(b) of the GNU Affero General Public
+ * License, these Appropriate Legal Notices must retain the display of the
+ * "Powered by xbib" logo. If the display of the logo is not reasonably
+ * feasible for technical reasons, the Appropriate Legal Notices must display
+ * the words "Powered by xbib".
+ */
+package org.xbib.charset;
+
+import org.junit.Test;
+
+import java.nio.charset.Charset;
+
+/**
+ *
+ */
+public class BibliographicCharsetsTest {
+
+    @Test
+    public void testAvailability() {
+        Charset charset = BibliographicCharsets.ANSEL;
+        charset = BibliographicCharsets.ISO5426;
+        charset = BibliographicCharsets.ISO5428;
+        charset = BibliographicCharsets.MAB;
+        charset = BibliographicCharsets.MAB_DISKETTE;
+        charset = BibliographicCharsets.PICA;
+    }
+}
--- a/src/test/java/org/xbib/charset/ISO5426Test.java
+++ b/src/test/java/org/xbib/charset/ISO5426Test.java
@ -0,0 +1,85 @@
+/*
+ * Licensed to Jörg Prante and xbib under one or more contributor
+ * license agreements. See the NOTICE.txt file distributed with this work
+ * for additional information regarding copyright ownership.
+ *
+ * Copyright (C) 2016 Jörg Prante and xbib
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program; if not, see http://www.gnu.org/licenses
+ * or write to the Free Software Foundation, Inc., 51 Franklin Street,
+ * Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The interactive user interfaces in modified source and object code
+ * versions of this program must display Appropriate Legal Notices,
+ * as required under Section 5 of the GNU Affero General Public License.
+ *
+ * In accordance with Section 7(b) of the GNU Affero General Public
+ * License, these Appropriate Legal Notices must retain the display of the
+ * "Powered by xbib" logo. If the display of the logo is not reasonably
+ * feasible for technical reasons, the Appropriate Legal Notices must display
+ * the words "Powered by xbib".
+ */
+package org.xbib.charset;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.StandardCharsets;
+import java.util.SortedMap;
+
+/**
+ *
+ */
+public class ISO5426Test extends Assert {
+
+    @Test
+    public void listCharsets() throws Exception {
+        SortedMap<String, Charset> map = Charset.availableCharsets();
+        assertTrue(map.keySet().contains("ISO-5426"));
+    }
+
+    @Test
+    public void testMAB2() throws Exception {
+        ByteBuffer buf = ByteBuffer.wrap("Éa".getBytes(StandardCharsets.ISO_8859_1));
+        Charset charset = Charset.forName("MAB2");
+        CharsetDecoder decoder = charset.newDecoder();
+        CharBuffer cbuf = decoder.decode(buf);
+        String output = cbuf.toString();
+        assertEquals(output, "ä");
+    }
+
+    @Test
+    public void testXMAB() throws Exception {
+        ByteBuffer buf = ByteBuffer.wrap("Éa".getBytes(StandardCharsets.ISO_8859_1));
+        Charset charset = Charset.forName("x-MAB");
+        CharsetDecoder decoder = charset.newDecoder();
+        CharBuffer cbuf = decoder.decode(buf);
+        String output = cbuf.toString();
+        assertEquals(output, "ä");
+    }
+
+    @Test
+    public void testPound() throws Exception {
+        ByteBuffer buf = ByteBuffer.wrap("\u00A3".getBytes(StandardCharsets.ISO_8859_1));
+        Charset charset = Charset.forName("x-MAB");
+        CharsetDecoder decoder = charset.newDecoder();
+        CharBuffer cbuf = decoder.decode(buf);
+        String output = cbuf.toString();
+        assertEquals(output, "£");
+    }
+
+}
--- a/src/test/java/org/xbib/charset/NormalizerTest.java
+++ b/src/test/java/org/xbib/charset/NormalizerTest.java
@ -0,0 +1,60 @@
+/*
+ * Licensed to Jörg Prante and xbib under one or more contributor
+ * license agreements. See the NOTICE.txt file distributed with this work
+ * for additional information regarding copyright ownership.
+ *
+ * Copyright (C) 2016 Jörg Prante and xbib
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program; if not, see http://www.gnu.org/licenses
+ * or write to the Free Software Foundation, Inc., 51 Franklin Street,
+ * Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The interactive user interfaces in modified source and object code
+ * versions of this program must display Appropriate Legal Notices,
+ * as required under Section 5 of the GNU Affero General Public License.
+ *
+ * In accordance with Section 7(b) of the GNU Affero General Public
+ * License, these Appropriate Legal Notices must retain the display of the
+ * "Powered by xbib" logo. If the display of the logo is not reasonably
+ * feasible for technical reasons, the Appropriate Legal Notices must display
+ * the words "Powered by xbib".
+ */
+package org.xbib.charset;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.text.Normalizer;
+
+/**
+ *
+ */
+public class NormalizerTest extends Assert {
+
+    @Test
+    public void testNormalizer() throws Exception {
+        byte[] b = new byte[]{(byte) 103, (byte) 101, (byte) 109, (byte) 97, (byte) 204, (byte) 136, (byte) 195, (byte) 159};
+        String input = new String(b, "UTF-8");
+        String norm = Normalizer.normalize(input, Normalizer.Form.NFC);
+        assertEquals("gemäß", norm);
+    }
+
+    @Test
+    public void tesNFC() {
+        String s = "Für Bandanzeige bitte zugehörige Publikationen anklicken";
+        assertEquals(56, s.length());
+        String norm = Normalizer.normalize(s, Normalizer.Form.NFC);
+        assertEquals(56, norm.length());
+    }
+
+}
--- a/src/test/java/org/xbib/charset/SimpleAnselCharsetTest.java
+++ b/src/test/java/org/xbib/charset/SimpleAnselCharsetTest.java
@ -0,0 +1,64 @@
+/*
+ * Licensed to Jörg Prante and xbib under one or more contributor
+ * license agreements. See the NOTICE.txt file distributed with this work
+ * for additional information regarding copyright ownership.
+ *
+ * Copyright (C) 2016 Jörg Prante and xbib
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program; if not, see http://www.gnu.org/licenses
+ * or write to the Free Software Foundation, Inc., 51 Franklin Street,
+ * Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The interactive user interfaces in modified source and object code
+ * versions of this program must display Appropriate Legal Notices,
+ * as required under Section 5 of the GNU Affero General Public License.
+ *
+ * In accordance with Section 7(b) of the GNU Affero General Public
+ * License, these Appropriate Legal Notices must retain the display of the
+ * "Powered by xbib" logo. If the display of the logo is not reasonably
+ * feasible for technical reasons, the Appropriate Legal Notices must display
+ * the words "Powered by xbib".
+ */
+package org.xbib.charset;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.util.SortedMap;
+
+/**
+ *
+ */
+public class SimpleAnselCharsetTest extends Assert {
+
+    @Test
+    public void listCharsets() throws Exception {
+        SortedMap<String, Charset> map = Charset.availableCharsets();
+        assertTrue(map.keySet().contains("SIMPLE_ANSEL"));
+    }
+
+    @Test
+    public void testSimpleAnsel() throws Exception {
+        ByteBuffer buf = ByteBuffer.wrap("\u00e8\u0075".getBytes("ISO-8859-1"));
+        Charset charset = Charset.forName("SIMPLE_ANSEL");
+        CharsetDecoder decoder = charset.newDecoder();
+        CharBuffer cbuf = decoder.decode(buf);
+        String output = cbuf.toString();
+        assertEquals("\u00fc", output);
+    }
+
+}
--- a/src/test/java/org/xbib/charset/package-info.java
+++ b/src/test/java/org/xbib/charset/package-info.java
@ -0,0 +1,4 @@
+/**
+ * Bibliographic character set implementations.
+ */
+package org.xbib.charset;
--- a/src/test/resources/log4j2.xml
+++ b/src/test/resources/log4j2.xml
@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<configuration status="OFF">
+    <appenders>
+        <Console name="Console" target="SYSTEM_OUT">
+            <PatternLayout pattern="[%d{ABSOLUTE}][%-5p][%-25c][%t] %m%n"/>
+        </Console>
+    </appenders>
+    <Loggers>
+        <Root level="info">
+            <AppenderRef ref="Console" />
+        </Root>
+    </Loggers>
+</configuration>
				`@ -0,0 +1 @@`
				`rootProject.name = 'bibliographic-character-sets'`
				`@ -0,0 +1 @@`
				`org.xbib.charset.BibliographicCharsetProvider`