initial import

This commit is contained in:
Jörg Prante 2016-08-18 20:39:56 +02:00
commit 931e2b9cda
41 changed files with 104466 additions and 0 deletions

15
.gitignore vendored Normal file
View file

@ -0,0 +1,15 @@
/data
/work
/logs
/.idea
/target
.DS_Store
*.iml
/.settings
/.classpath
/.project
/.gradle
/build
/plugins
/sessions
*~

8
.travis.yml Normal file
View file

@ -0,0 +1,8 @@
sudo: false
language: java
jdk:
- oraclejdk8
cache:
directories:
- $HOME/.m2

39
CREDITS.txt Normal file
View file

@ -0,0 +1,39 @@
These bibliographic characterssets are collected and improved
by Jörg Prante <joergprante@gmail.com>
Thanks to:
Library of Congress
The Library of Congress provides an ANSEL code table file
https://www.loc.gov/marc/specifications/codetables.xml
at https://www.loc.gov/marc/specifications/specchartables.html for making the
character set implementation of ANSEL/Z39.47 possible, including east
asian code characters (EACC).
US-ASCII
The US-ASCII re-implementation was taken from the GNU classpath project.
It is provided as a demonstration of a simple single-byte character set.
The original code was licensed by the GNU Public License 2.1 (GPL)
Simple ANSEL
The ANSEL charset implementation by Piotr Andzel http://anselcharset.sourceforge.net/
has been included as "simple ANSEL". The original code was licensed under Less
GNU Public License 3 (LGPL 3.0)
MAB
The MAB Charset Java implementation was developed by Jürgen Kett of
Deutsche Nationalbibliothek (DNB) in 2004 and was licensed by GNU Public License 2 (GPL)
MAB-Diskette
This is a CP850 variant which could only be implemented by the help of
Thomas Berger http://www.gymel.com/charsets/MAB-Diskette.html
PicaCharset
An alternative PICA character set implementation of Deutsche Nationalbibliothek (DNB)
The original licence was GNU Public License 2 (GPL).
ISO 5428:1984, "Greek alphabet coded character set for bibliographic information interchange",
has been implemented by the help of https://en.wikipedia.org/wiki/ISO_5428

76
README.md Normal file
View file

@ -0,0 +1,76 @@
# Bibliographic character sets
This is a collection of bibliographic character sets implemented in
Java.
These character sets have not been included in the standard Java
distribution. Most of the character sets predate Unicode and are
dormant now but are still in active use in library application
system software.
The reason to provide these character sets is to assist the public
in migrating library data to Unicode, and UTF-8, respectively.
You can include this jar in the classpath, the Java CharsetProvider and
ServiceLoader API will then make the character sets available,
e.g. by `Charset.forName(name)`
This is free software.
Please follow the AGPL license, which requires to offer the source code
of your project to the public if you make modifications to this program.
All contributions and pull requests are welcome.
If you have questions or find issues, pleas post them at
https://github.com/xbib/bibliographic-character-sets/issues
## List of character sets included
### ANSEL "ANSI/NISO Z39.47-1993 (R2003) Extended Latin Alphabet Coded Character Set for Bibliographic Use (ANSEL)"
This implementation can only decode from ANSEL / Z39.47.
Included are the following sets specified by the Library of Congress at
https://www.loc.gov/marc/specifications/specchartables.html
Basic Latin (ASCII), Extended Latin (ANSEL), Greek Symbols,
Subscripts, Superscripts, Basic Hebrew, Basic Cyrillic,
Extended Cyrillic, Basic Arabic, Extended Arabic,
Basic Greek, Chinese, Japanese, Korean (EACC)
Usage:
Charset.forName("ANSEL")
### ISO 5426 "Extension of the Latin alphabet coded character set for bibliographic information interchange"
Usage:
Charset.forName("x-MAB")
### ISO 5428 "Greek alphabet coded character set for bibliographic information interchange"
### Pica (a variant of the INTERMARC character set, a 1979 french/danish adoption of USMARC/UKMARC)
### MAB-Diskette (a variant of CP850 character set)
### US-ASCII (re-implementation for demonstration purpose, disabled by default)
See also the CREDITS.txt for acknowledgements.
# License
Copyright (C) 2016 Jörg Prante and the xbib organization
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.

67
build.gradle Normal file
View file

@ -0,0 +1,67 @@
println "Host: " + java.net.InetAddress.getLocalHost()
println "Gradle: " + gradle.gradleVersion + " JVM: " + org.gradle.internal.jvm.Jvm.current() + " Groovy: " + GroovySystem.getVersion()
println "Build: group: '${project.group}', name: '${project.name}', version: '${project.version}'"
apply plugin: 'java'
apply plugin: 'maven'
apply plugin: 'signing'
apply plugin: 'findbugs'
apply plugin: 'pmd'
apply plugin: 'checkstyle'
repositories {
mavenLocal()
mavenCentral()
jcenter()
maven {
url "http://xbib.org/repository"
}
}
configurations {
wagon
}
dependencies {
testCompile "org.apache.logging.log4j:log4j-core:2.5"
testCompile('junit:junit:4.12')
wagon 'org.apache.maven.wagon:wagon-ssh-external:2.10'
}
sourceCompatibility = JavaVersion.VERSION_1_8
targetCompatibility = JavaVersion.VERSION_1_8
[compileJava, compileTestJava]*.options*.encoding = 'UTF-8'
tasks.withType(JavaCompile) {
options.compilerArgs << "-Xlint:deprecation,unchecked"
}
test {
testLogging {
showStandardStreams = false
exceptionFormat = 'full'
}
}
tasks.withType(FindBugs) {
ignoreFailures = true
reports {
xml.enabled = false
html.enabled = true
}
}
task sourcesJar(type: Jar, dependsOn: classes) {
classifier 'sources'
from sourceSets.main.allSource
}
task javadocJar(type: Jar, dependsOn: javadoc) {
classifier 'javadoc'
}
artifacts {
archives sourcesJar, javadocJar
}
if (project.hasProperty('signing.keyId')) {
signing {
sign configurations.archives
}
}
apply from: 'gradle/publish.gradle'

View file

@ -0,0 +1,323 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE module PUBLIC
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
<!-- This is a checkstyle configuration file. For descriptions of
what the following rules do, please see the checkstyle configuration
page at http://checkstyle.sourceforge.net/config.html -->
<module name="Checker">
<module name="FileTabCharacter">
<!-- Checks that there are no tab characters in the file.
-->
</module>
<module name="NewlineAtEndOfFile">
<property name="lineSeparator" value="lf"/>
</module>
<module name="RegexpSingleline">
<!-- Checks that FIXME is not used in comments. TODO is preferred.
-->
<property name="format" value="((//.*)|(\*.*))FIXME" />
<property name="message" value='TODO is preferred to FIXME. e.g. "TODO(johndoe): Refactor when v2 is released."' />
</module>
<module name="RegexpSingleline">
<!-- Checks that TODOs are named. (Actually, just that they are followed
by an open paren.)
-->
<property name="format" value="((//.*)|(\*.*))TODO[^(]" />
<property name="message" value='All TODOs should be named. e.g. "TODO(johndoe): Refactor when v2 is released."' />
</module>
<module name="JavadocPackage">
<!-- Checks that each Java package has a Javadoc file used for commenting.
Only allows a package-info.java, not package.html. -->
</module>
<!-- All Java AST specific tests live under TreeWalker module. -->
<module name="TreeWalker">
<!--
IMPORT CHECKS
-->
<module name="RedundantImport">
<!-- Checks for redundant import statements. -->
<property name="severity" value="error"/>
</module>
<module name="ImportOrder">
<!-- Checks for out of order import statements. -->
<property name="severity" value="warning"/>
<property name="groups" value="com.google,android,junit,net,org,java,javax"/>
<!-- This ensures that static imports go first. -->
<property name="option" value="top"/>
<property name="tokens" value="STATIC_IMPORT, IMPORT"/>
</module>
<!--
JAVADOC CHECKS
-->
<!-- Checks for Javadoc comments. -->
<!-- See http://checkstyle.sf.net/config_javadoc.html -->
<module name="JavadocMethod">
<property name="scope" value="protected"/>
<property name="severity" value="warning"/>
<property name="allowMissingJavadoc" value="true"/>
<property name="allowMissingParamTags" value="true"/>
<property name="allowMissingReturnTag" value="true"/>
<property name="allowMissingThrowsTags" value="true"/>
<property name="allowThrowsTagsForSubclasses" value="true"/>
<property name="allowUndeclaredRTE" value="true"/>
</module>
<module name="JavadocType">
<property name="scope" value="protected"/>
<property name="severity" value="error"/>
</module>
<module name="JavadocStyle">
<property name="severity" value="warning"/>
</module>
<!--
NAMING CHECKS
-->
<!-- Item 38 - Adhere to generally accepted naming conventions -->
<module name="PackageName">
<!-- Validates identifiers for package names against the
supplied expression. -->
<!-- Here the default checkstyle rule restricts package name parts to
seven characters, this is not in line with common practice at Google.
-->
<property name="format" value="^[a-z]+(\.[a-z][a-z0-9]{1,})*$"/>
<property name="severity" value="warning"/>
</module>
<module name="TypeNameCheck">
<!-- Validates static, final fields against the
expression "^[A-Z][a-zA-Z0-9]*$". -->
<metadata name="altname" value="TypeName"/>
<property name="severity" value="warning"/>
</module>
<module name="ConstantNameCheck">
<!-- Validates non-private, static, final fields against the supplied
public/package final fields "^[A-Z][A-Z0-9]*(_[A-Z0-9]+)*$". -->
<metadata name="altname" value="ConstantName"/>
<property name="applyToPublic" value="true"/>
<property name="applyToProtected" value="true"/>
<property name="applyToPackage" value="true"/>
<property name="applyToPrivate" value="false"/>
<property name="format" value="^([A-Z][A-Z0-9]*(_[A-Z0-9]+)*|FLAG_.*)$"/>
<message key="name.invalidPattern"
value="Variable ''{0}'' should be in ALL_CAPS (if it is a constant) or be private (otherwise)."/>
<property name="severity" value="warning"/>
</module>
<module name="StaticVariableNameCheck">
<!-- Validates static, non-final fields against the supplied
expression "^[a-z][a-zA-Z0-9]*_?$". -->
<metadata name="altname" value="StaticVariableName"/>
<property name="applyToPublic" value="true"/>
<property name="applyToProtected" value="true"/>
<property name="applyToPackage" value="true"/>
<property name="applyToPrivate" value="true"/>
<property name="format" value="^[a-z][a-zA-Z0-9]*_?$"/>
<property name="severity" value="warning"/>
</module>
<module name="MemberNameCheck">
<!-- Validates non-static members against the supplied expression. -->
<metadata name="altname" value="MemberName"/>
<property name="applyToPublic" value="true"/>
<property name="applyToProtected" value="true"/>
<property name="applyToPackage" value="true"/>
<property name="applyToPrivate" value="true"/>
<property name="format" value="^[a-z][a-zA-Z0-9]*$"/>
<property name="severity" value="warning"/>
</module>
<module name="MethodNameCheck">
<!-- Validates identifiers for method names. -->
<metadata name="altname" value="MethodName"/>
<property name="format" value="^[a-z][a-zA-Z0-9]*(_[a-zA-Z0-9]+)*$"/>
<property name="severity" value="warning"/>
</module>
<module name="ParameterName">
<!-- Validates identifiers for method parameters against the
expression "^[a-z][a-zA-Z0-9]*$". -->
<property name="severity" value="warning"/>
</module>
<module name="LocalFinalVariableName">
<!-- Validates identifiers for local final variables against the
expression "^[a-z][a-zA-Z0-9]*$". -->
<property name="severity" value="warning"/>
</module>
<module name="LocalVariableName">
<!-- Validates identifiers for local variables against the
expression "^[a-z][a-zA-Z0-9]*$". -->
<property name="severity" value="warning"/>
</module>
<!--
LENGTH and CODING CHECKS
-->
<module name="LineLength">
<!-- Checks if a line is too long. -->
<property name="max" value="${com.puppycrawl.tools.checkstyle.checks.sizes.LineLength.max}" default="128"/>
<property name="severity" value="error"/>
<!--
The default ignore pattern exempts the following elements:
- import statements
- long URLs inside comments
-->
<property name="ignorePattern"
value="${com.puppycrawl.tools.checkstyle.checks.sizes.LineLength.ignorePattern}"
default="^(package .*;\s*)|(import .*;\s*)|( *(\*|//).*https?://.*)$"/>
</module>
<module name="LeftCurly">
<!-- Checks for placement of the left curly brace ('{'). -->
<property name="severity" value="warning"/>
</module>
<module name="RightCurly">
<!-- Checks right curlies on CATCH, ELSE, and TRY blocks are on
the same line. e.g., the following example is fine:
<pre>
if {
...
} else
</pre>
-->
<!-- This next example is not fine:
<pre>
if {
...
}
else
</pre>
-->
<property name="option" value="same"/>
<property name="severity" value="warning"/>
</module>
<!-- Checks for braces around if and else blocks -->
<module name="NeedBraces">
<property name="severity" value="warning"/>
<property name="tokens" value="LITERAL_IF, LITERAL_ELSE, LITERAL_FOR, LITERAL_WHILE, LITERAL_DO"/>
</module>
<module name="UpperEll">
<!-- Checks that long constants are defined with an upper ell.-->
<property name="severity" value="error"/>
</module>
<module name="FallThrough">
<!-- Warn about falling through to the next case statement. Similar to
javac -Xlint:fallthrough, but the check is suppressed if a single-line comment
on the last non-blank line preceding the fallen-into case contains 'fall through' (or
some other variants which we don't publicized to promote consistency).
-->
<property name="reliefPattern"
value="fall through|Fall through|fallthru|Fallthru|falls through|Falls through|fallthrough|Fallthrough|No break|NO break|no break|continue on"/>
<property name="severity" value="error"/>
</module>
<!--
MODIFIERS CHECKS
-->
<module name="ModifierOrder">
<!-- Warn if modifier order is inconsistent with JLS3 8.1.1, 8.3.1, and
8.4.3. The prescribed order is:
public, protected, private, abstract, static, final, transient, volatile,
synchronized, native, strictfp
-->
</module>
<!--
WHITESPACE CHECKS
-->
<module name="WhitespaceAround">
<!-- Checks that various tokens are surrounded by whitespace.
This includes most binary operators and keywords followed
by regular or curly braces.
-->
<property name="tokens" value="ASSIGN, BAND, BAND_ASSIGN, BOR,
BOR_ASSIGN, BSR, BSR_ASSIGN, BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN,
EQUAL, GE, GT, LAND, LE, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE, LOR, LT, MINUS,
MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL, PLUS, PLUS_ASSIGN, QUESTION,
SL, SL_ASSIGN, SR_ASSIGN, STAR, STAR_ASSIGN"/>
<property name="severity" value="error"/>
</module>
<module name="WhitespaceAfter">
<!-- Checks that commas, semicolons and typecasts are followed by
whitespace.
-->
<property name="tokens" value="COMMA, SEMI, TYPECAST"/>
</module>
<module name="NoWhitespaceAfter">
<!-- Checks that there is no whitespace after various unary operators.
Linebreaks are allowed.
-->
<property name="tokens" value="BNOT, DEC, DOT, INC, LNOT, UNARY_MINUS,
UNARY_PLUS"/>
<property name="allowLineBreaks" value="true"/>
<property name="severity" value="error"/>
</module>
<module name="NoWhitespaceBefore">
<!-- Checks that there is no whitespace before various unary operators.
Linebreaks are allowed.
-->
<property name="tokens" value="SEMI, DOT, POST_DEC, POST_INC"/>
<property name="allowLineBreaks" value="true"/>
<property name="severity" value="error"/>
</module>
<module name="ParenPad">
<!-- Checks that there is no whitespace before close parens or after
open parens.
-->
<property name="severity" value="warning"/>
</module>
</module>
</module>

9
gradle.properties Normal file
View file

@ -0,0 +1,9 @@
group = org.xbib
version = 1.0.0
org.gradle.daemon = true
name = 'bibliographic-character-sets'
description = 'Bibliographic character sets'
user = 'xbib'
scmUrl = 'https://github.com/' + user + '/' + name
scmConnection = 'scm:git:git://github.com/' + user + '/' + name + '.git'
scmDeveloperConnection = 'scm:git:git://github.com/' + user + '/' + name + '.git'

62
gradle/publish.gradle Normal file
View file

@ -0,0 +1,62 @@
task xbibUpload(type: Upload) {
configuration = configurations.archives
uploadDescriptor = true
repositories {
if (project.hasProperty("xbibUsername")) {
mavenDeployer {
configuration = configurations.wagon
repository(url: uri('scpexe://xbib.org/repository')) {
authentication(userName: xbibUsername, privateKey: xbibPrivateKey)
}
}
}
}
}
task mavenCentralUpload(type: Upload) {
configuration = configurations.archives
uploadDescriptor = true
repositories {
if (project.hasProperty('ossrhUsername')) {
mavenDeployer {
beforeDeployment { MavenDeployment deployment -> signing.signPom(deployment) }
repository(url: uri(ossrhReleaseUrl)) {
authentication(userName: ossrhUsername, password: ossrhPassword)
}
snapshotRepository(url: uri(ossrhSnapshotUrl)) {
authentication(userName: ossrhUsername, password: ossrhPassword)
}
pom.project {
name name
description description
packaging 'jar'
inceptionYear '2016'
url scmUrl
organization {
name 'xbib'
url 'http://xbib.org'
}
developers {
developer {
id user
name 'Jörg Prante'
email 'joergprante@gmail.com'
url 'https://github.com/jprante'
}
}
scm {
url scmUrl
connection scmConnection
developerConnection scmDeveloperConnection
}
licenses {
license {
name 'Affero GNU Public License Version 3'
url 'http://www.gnu.org/licenses/agpl-3.0.html'
}
}
}
}
}
}
}

BIN
gradle/wrapper/gradle-wrapper.jar vendored Normal file

Binary file not shown.

View file

@ -0,0 +1,6 @@
#Thu Aug 18 20:34:33 CEST 2016
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-3.0-bin.zip

169
gradlew vendored Executable file
View file

@ -0,0 +1,169 @@
#!/usr/bin/env bash
##############################################################################
##
## Gradle start up script for UN*X
##
##############################################################################
# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
ls=`ls -ld "$PRG"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG=`dirname "$PRG"`"/$link"
fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null
APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS=""
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"
warn ( ) {
echo "$*"
}
die ( ) {
echo
echo "$*"
echo
exit 1
}
# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
CYGWIN* )
cygwin=true
;;
Darwin* )
darwin=true
;;
MINGW* )
msys=true
;;
NONSTOP* )
nonstop=true
;;
esac
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD="$JAVA_HOME/jre/sh/java"
else
JAVACMD="$JAVA_HOME/bin/java"
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD="java"
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
MAX_FD_LIMIT=`ulimit -H -n`
if [ $? -eq 0 ] ; then
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
MAX_FD="$MAX_FD_LIMIT"
fi
ulimit -n $MAX_FD
if [ $? -ne 0 ] ; then
warn "Could not set maximum file descriptor limit: $MAX_FD"
fi
else
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
fi
fi
# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi
# For Cygwin, switch paths to Windows format before running java
if $cygwin ; then
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
JAVACMD=`cygpath --unix "$JAVACMD"`
# We build the pattern for arguments to be converted via cygpath
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
SEP=""
for dir in $ROOTDIRSRAW ; do
ROOTDIRS="$ROOTDIRS$SEP$dir"
SEP="|"
done
OURCYGPATTERN="(^($ROOTDIRS))"
# Add a user-defined pattern to the cygpath arguments
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
fi
# Now convert the arguments - kludge to limit ourselves to /bin/sh
i=0
for arg in "$@" ; do
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
else
eval `echo args$i`="\"$arg\""
fi
i=$((i+1))
done
case $i in
(0) set -- ;;
(1) set -- "$args0" ;;
(2) set -- "$args0" "$args1" ;;
(3) set -- "$args0" "$args1" "$args2" ;;
(4) set -- "$args0" "$args1" "$args2" "$args3" ;;
(5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
(6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
(7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
(8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
(9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
esac
fi
# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
function splitJvmOpts() {
JVM_OPTS=("$@")
}
eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
if [[ "$(uname)" == "Darwin" ]] && [[ "$HOME" == "$PWD" ]]; then
cd "$(dirname "$0")"
fi
exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"

84
gradlew.bat vendored Normal file
View file

@ -0,0 +1,84 @@
@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem Gradle startup script for Windows
@rem
@rem ##########################################################################
@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal
set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS=
@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome
set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto init
echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
if exist "%JAVA_EXE%" goto init
echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:init
@rem Get command-line arguments, handling Windows variants
if not "%OS%" == "Windows_NT" goto win9xME_args
:win9xME_args
@rem Slurp the command line arguments.
set CMD_LINE_ARGS=
set _SKIP=2
:win9xME_args_slurp
if "x%~1" == "x" goto execute
set CMD_LINE_ARGS=%*
:execute
@rem Setup the command line
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd
:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1
:mainEnd
if "%OS%"=="Windows_NT" endlocal
:omega

1
settings.gradle Normal file
View file

@ -0,0 +1 @@
rootProject.name = 'bibliographic-character-sets'

View file

@ -0,0 +1,160 @@
/*
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2016 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*/
/*
Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version.
*/
package org.xbib.charset;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
/**
* US-ASCII charset.
*/
final class ASCII extends Charset {
ASCII() {
/*
* Canonical charset name chosen according to:
* http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
*/
super("US-ASCII", new String[]{
/*
* These names are provided by
* http://www.iana.org/assignments/character-sets
*/
"iso-ir-6", "ANSI_X3.4-1986", "ISO_646.irv:1991", "ASCII", "ISO646-US", "ASCII", "us",
"IBM367", "cp367", "csASCII",
/*
* These names are provided by
* http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
*/
"ANSI_X3.4-1968", "iso_646.irv:1983", "ascii7", "646", "windows-20127"});
}
public boolean contains(Charset cs) {
return cs instanceof ASCII;
}
public CharsetDecoder newDecoder() {
return new Decoder(this);
}
public CharsetEncoder newEncoder() {
return new Encoder(this);
}
private static final class Decoder extends CharsetDecoder {
Decoder(Charset cs) {
super(cs, 1.0f, 1.0f);
}
@Override
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
while (in.hasRemaining()) {
byte b = in.get();
if (b < 0) {
in.position(in.position() - 1);
return CoderResult.malformedForLength(1);
}
if (!out.hasRemaining()) {
in.position(in.position() - 1);
return CoderResult.OVERFLOW;
}
out.put((char) b);
}
return CoderResult.UNDERFLOW;
}
}
private static final class Encoder extends CharsetEncoder {
Encoder(Charset cs) {
super(cs, 1.0f, 1.0f);
}
@Override
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
while (in.hasRemaining()) {
char c = in.get();
if (c > Byte.MAX_VALUE) {
in.position(in.position() - 1);
return CoderResult.unmappableForLength(1);
}
if (!out.hasRemaining()) {
in.position(in.position() - 1);
return CoderResult.OVERFLOW;
}
out.put((byte) c);
}
return CoderResult.UNDERFLOW;
}
}
}

View file

@ -0,0 +1,280 @@
/*
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2016 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*/
package org.xbib.charset;
import java.io.CharArrayWriter;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.StandardCharsets;
import java.util.LinkedHashMap;
import java.util.Map;
import javax.xml.stream.XMLStreamException;
/**
*
*/
public class AnselCharset extends Charset {
private static final Map<String, AnselCodeTableParser.CharacterSet> characterSetMap;
static {
characterSetMap = new LinkedHashMap<>();
ClassLoader cl = Thread.currentThread().getContextClassLoader();
try (InputStream inputStream = cl.getResource("org/xbib/charset/codetables.xml").openStream()) {
AnselCodeTableParser anselCodeTableParser = new AnselCodeTableParser(inputStream);
for (AnselCodeTableParser.CodeTable codeTable : anselCodeTableParser.getCodeTables()) {
for (AnselCodeTableParser.CharacterSet characterSet : codeTable.getCharacterSets()) {
characterSetMap.put(characterSet.getName(), characterSet);
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
private Charset encodeCharset;
public AnselCharset() throws XMLStreamException {
super("ANSEL", BibliographicCharsetProvider.aliasesFor("ANSEL"));
this.encodeCharset = StandardCharsets.UTF_8;
}
@Override
public boolean contains(Charset charset) {
return charset instanceof AnselCharset;
}
public CharsetEncoder newEncoder() {
throw new UnsupportedOperationException();
}
public CharsetDecoder newDecoder() {
return new Decoder(this, encodeCharset.newDecoder());
}
private static class Decoder extends CharsetDecoder {
String g0;
String g1;
Decoder(Charset cs, CharsetDecoder baseDecoder) {
super(cs, baseDecoder.averageCharsPerByte(), baseDecoder.maxCharsPerByte());
}
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
g0 = "Basic Latin (ASCII)";
g1 = "Extended Latin (ANSEL)";
CharArrayWriter w = new CharArrayWriter();
CharArrayWriter diacritics = new CharArrayWriter();
int pos = in.position();
while (in.hasRemaining()) {
byte b = in.get();
char oldChar = (char) (b & 0xFF);
if (oldChar == '\u001b') {
handleEscapeSequence(in);
if (in.hasRemaining()) {
b = in.get();
oldChar = (char) (b & 0xFF);
} else {
// premature end of escape sequence, no data following
return CoderResult.UNDERFLOW;
}
}
AnselCodeTableParser.CharacterSet characterSet = isG0(oldChar) ? characterSetMap.get(g0) :
isG1(oldChar) ? characterSetMap.get(g1) : null;
int len = characterSet != null ? characterSet.getLength() : 1;
String str = len == 1 ? "" + oldChar : "" + oldChar + (char) (in.get() & 0xFF) + (char) (in.get() & 0xFF);
AnselCodeTableParser.Code code = characterSet != null ? characterSet.getMarc().get(str) : null;
char ch = code != null ? code.getUcs() : oldChar;
if (ch == '\u0000') {
// FB, EC - see http://memory.loc.gov/diglib/codetables/45.html#Note1 and http://memory.loc.gov/diglib/codetables/45.html#Note2
continue;
}
boolean isDiacritic = code != null ? isDiacritic(oldChar) || code.isCombining() : isDiacritic(oldChar);
if (isDiacritic) {
diacritics.write(ch);
} else {
w.write(ch);
// diacritics must be appended in Unicode, but are prepended in MARC-8 / Z39.47
if (diacritics.toCharArray().length > 0) {
try {
w.write(diacritics.toCharArray());
} catch (IOException e) {
// dummy
w.flush();
}
diacritics = new CharArrayWriter();
}
}
}
for (char ch : w.toCharArray()) {
if (!out.hasRemaining()) {
in.position(pos - 1);
return CoderResult.OVERFLOW;
}
out.put(ch);
}
return CoderResult.UNDERFLOW;
}
private boolean isDiacritic(char ch) {
return ch >= 0xE0 && ch <= 0xFF;
}
private boolean isG0(char ch) {
return ch >= 0x21 && ch <= 0x7E;
}
private boolean isG1(char ch) {
return ch >= 0xA1 && ch <= 0xFE;
}
/**
* ANSI X3.41 or ISO 2022 escape technique.
* See procedures in IS0 2375-1985.
*
* @param in byte buffer
*/
private void handleEscapeSequence(ByteBuffer in) {
byte oneByte = in.get();
switch (oneByte) {
case 's':
g0 = "Basic Latin (ASCII)";
break;
case 'g':
g0 = "Greek Symbols";
break;
case 'b':
g0 = "Subscripts";
break;
case 'p':
g0 = "Superscripts";
break;
case '(':
case ',':
oneByte = in.get();
switch (oneByte) {
case '1':
g0 = "Chinese, Japanese, Korean (EACC)";
break;
case '2':
g0 = "Basic Hebrew";
break;
case '3':
g0 = "Basic Arabic";
break;
case '4':
g0 = "Extended Arabic";
break;
case 'B':
g0 = "Basic Latin (ASCII)";
break;
case 'N':
g0 = "Basic Cyrillic";
break;
case 'Q':
g0 = "Extended Cyrillic";
break;
case 'S':
g0 = "Basic Greek";
break;
default:
break;
}
break;
case ')':
case '-':
oneByte = in.get();
switch (oneByte) {
case '1':
g1 = "Chinese, Japanese, Korean (EACC)";
break;
case '2':
g1 = "Basic Hebrew";
break;
case '3':
g1 = "Basic Arabic";
break;
case '4':
g1 = "Extended Arabic";
break;
case 'B':
g1 = "Basic Latin (ASCII)";
break;
case 'N':
g1 = "Basic Cyrillic";
break;
case 'Q':
g1 = "Extended Cyrillic";
break;
case 'S':
g1 = "Basic Greek";
break;
default:
break;
}
break;
case '$':
oneByte = in.get();
switch (oneByte) {
case '1':
g0 = "Chinese, Japanese, Korean (EACC)";
break;
default:
break;
}
break;
case '!':
oneByte = in.get();
switch (oneByte) {
case 'E':
g0 = "Extended Latin (ANSEL)";
break;
default:
break;
}
break;
default:
break;
}
}
}
}

View file

@ -0,0 +1,280 @@
/*
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2016 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*/
package org.xbib.charset;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.EndElement;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
class AnselCodeTableParser {
private final List<CodeTable> codeTables;
private CodeTable codeTable;
private CharacterSet characterSet;
private Code code;
private StringBuilder content;
AnselCodeTableParser(InputStream inputStream) {
List<CodeTable> codeTables;
try {
codeTables = createCodeTables(inputStream);
} catch (XMLStreamException e) {
codeTables = null;
// ignore
}
this.codeTables = codeTables;
}
public List<CodeTable> getCodeTables() {
return codeTables;
}
private List<CodeTable> createCodeTables(InputStream inputStream) throws XMLStreamException {
List<CodeTable> codetables = new LinkedList<>();
XMLInputFactory factory = XMLInputFactory.newInstance();
XMLEventReader xmlReader = factory.createXMLEventReader(inputStream);
while (xmlReader.hasNext()) {
processEvent(codetables, xmlReader.peek());
xmlReader.nextEvent();
}
return codetables;
}
private void processEvent(List<CodeTable> codetables, XMLEvent event) {
if (event.isStartDocument()) {
this.code = new Code();
this.content = new StringBuilder();
}
if (event.isStartElement()) {
StartElement element = (StartElement) event;
String name = element.getName().getLocalPart();
switch (name) {
case "codeTables": {
// ignore
break;
}
case "codeTable": {
this.codeTable = new CodeTable();
break;
}
case "characterSet": {
this.characterSet = new CharacterSet();
@SuppressWarnings("unchecked")
Iterator<Attribute> it = element.getAttributes();
while (it.hasNext()) {
Attribute attr = it.next();
QName attributeName = attr.getName();
String attributeLocalName = attributeName.getLocalPart();
String attributeValue = attr.getValue();
if ("name".equals(attributeLocalName)) {
characterSet.name = attributeValue;
} else if ("isoCode".equals(attributeLocalName)) {
characterSet.isoCode = attributeValue;
}
}
break;
}
case "code": {
code = new Code();
break;
}
default:
break;
}
} else if (event.isCharacters()) {
Characters c = (Characters) event;
if (!c.isIgnorableWhiteSpace()) {
// character events may come more than once (e.g. because of XML entities like &quot;)
// concatenate with values that might exist
content.append(c.getData());
}
} else if (event.isEndElement()) {
EndElement element = (EndElement) event;
String name = element.getName().getLocalPart();
switch (name) {
case "codeTable": {
codetables.add(codeTable);
codeTable = new CodeTable();
break;
}
case "characterSet": {
codeTable.add(characterSet);
characterSet = new CharacterSet();
break;
}
case "code": {
characterSet.add(code);
code = new Code();
break;
}
case "marc": {
String s = content.toString().trim();
char[] ch = new char[s.length() / 2];
for (int i = 0; i < s.length(); i += 2) {
ch[i / 2] = (char) ((Character.digit(s.charAt(i), 16) << 4)
+ Character.digit(s.charAt(i + 1), 16));
}
code.marc = new String(ch);
break;
}
case "ucs": {
String s = content.toString().trim();
// two chars have no ucs equivalent...
if (!s.isEmpty()) {
code.ucs = (char) (Integer.parseInt(s, 16) & 0xFFFF);
}
break;
}
case "utf-8": {
String s = content.toString().trim();
char[] ch = new char[s.length() / 2];
for (int i = 0; i < s.length(); i += 2) {
ch[i / 2] = (char) ((Character.digit(s.charAt(i), 16) << 4)
+ Character.digit(s.charAt(i + 1), 16));
}
code.utf8 = new String(ch);
break;
}
case "name": {
code.name = content.toString();
break;
}
case "isCombining": {
code.isCombining = "true".equals(content.toString());
break;
}
default:
break;
}
content.setLength(0);
}
}
static class CodeTable {
private final List<CharacterSet> characterSets = new LinkedList<>();
void add(CharacterSet characterSet) {
characterSets.add(characterSet);
}
List<CharacterSet> getCharacterSets() {
return characterSets;
}
}
static class CharacterSet {
String name;
String isoCode;
int length;
Map<String, Code> marc = new HashMap<>();
Map<Character, Code> unicode = new HashMap<>();
void add(Code code) {
marc.putIfAbsent(code.marc, code);
length = code.marc.length();
unicode.putIfAbsent(code.ucs, code);
}
String getName() {
return name;
}
int getLength() {
return length;
}
String getIsoCode() {
return isoCode;
}
Map<String, Code> getMarc() {
return marc;
}
Map<Character, Code> getUnicode() {
return unicode;
}
}
static class Code {
// Universal Character Set (UCS, ISO-IEC 10646)/Unicode, always 16 bit
char ucs;
// MARC-8 standard (single char) or EACC 24-bit code (three chars)
String marc;
// UTF-8 code (in hex), 1-3 bytes
String utf8;
// name
String name;
boolean isCombining;
char getUcs() {
return ucs;
}
String getMarc() {
return marc;
}
String getUtf8() {
return utf8;
}
String getName() {
return name;
}
boolean isCombining() {
return isCombining;
}
public String toString() {
return "marc=" + marc + " isCombining=" + isCombining + " ucs=" + ucs;
}
}
}

View file

@ -0,0 +1,173 @@
/*
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2016 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*/
package org.xbib.charset;
import java.lang.ref.SoftReference;
import java.nio.charset.Charset;
import java.nio.charset.spi.CharsetProvider;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* Extra bibliographic character sets.
*/
public class BibliographicCharsetProvider extends CharsetProvider {
private static final Logger logger = Logger.getLogger(BibliographicCharsetProvider.class.getName());
/**
* The reference to the character set instance.
* If there are no remaining references to this instance,
* the character set will be removed by the garbage collector.
*/
private static volatile SoftReference<BibliographicCharsetProvider> instance = null;
private final Map<String, String> classMap;
private final Map<String, String> aliasMap;
private final Map<String, String[]> aliasNameMap;
private final Map<String, SoftReference<Charset>> cache;
private final String packagePrefix;
/**
* Constructor must be public.
*/
public BibliographicCharsetProvider() {
classMap = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
aliasMap = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
aliasNameMap = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
cache = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
packagePrefix = getClass().getPackage().getName();
charset("ANSEL", "AnselCharset",
new String[]{"ANSI_Z39_47", "ANSI-Z39-47", "Z39_47", "Z39-47", "ansel", "usmarc", "usm94"});
charset("ISO-5426", "ISO5426", new String[]{"x-mab", "x-MAB", "ISO-5426", "ISO_5426", "ISO_5426:1983", "MAB2"});
charset("ISO-5428", "ISO5428", new String[]{"ISO_5428", "ISO-5428:1984", "iso-ir-55"});
charset("MAB-Diskette", "MabDisketteCharset", new String[]{});
charset("PICA", "Pica", new String[]{"Pica", "pica"});
charset("x-PICA", "PicaCharset", new String[]{"x-pica"});
charset("SIMPLE_ANSEL", "SimpleAnselCharset", new String[]{});
instance = new SoftReference<>(this);
}
/**
* List all aliases defined for a character set.
*
* @param s the name of the character set
* @return an alias string array
*/
static String[] aliasesFor(String s) {
SoftReference<BibliographicCharsetProvider> softreference = instance;
BibliographicCharsetProvider charsets = null;
if (softreference != null) {
charsets = softreference.get();
}
if (charsets == null) {
charsets = new BibliographicCharsetProvider();
instance = new SoftReference<>(charsets);
}
return charsets.aliases(s);
}
@Override
public final Charset charsetForName(String s) {
return lookup(canonicalize(s));
}
@Override
public final Iterator<Charset> charsets() {
return new Iterator<Charset>() {
Iterator<String> iterator = classMap.keySet().iterator();
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
public Charset next() {
return lookup(iterator.next());
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
private void charset(String name, String className, String[] aliases) {
classMap.putIfAbsent(name, className);
for (String alias : aliases) {
aliasMap.putIfAbsent(alias, name);
}
aliasNameMap.putIfAbsent(name, aliases);
}
private String canonicalize(String charsetName) {
String aliasCharsetName = aliasMap.get(charsetName);
return aliasCharsetName != null ? aliasCharsetName : charsetName;
}
private Charset lookup(String charsetName) {
SoftReference<Charset> softreference = cache.get(charsetName);
if (softreference != null) {
Charset charset = softreference.get();
if (charset != null) {
return charset;
}
}
String className = classMap.get(charsetName);
if (className == null) {
return null;
}
try {
Class<?> cl = Class.forName(packagePrefix + "." + className, true, getClass().getClassLoader());
Charset charset = (Charset) cl.newInstance();
cache.put(charsetName, new SoftReference<>(charset));
return charset;
} catch (ClassNotFoundException e1) {
logger.log(Level.WARNING, "Class not found: " + packagePrefix + "." + className);
} catch (IllegalAccessException e2) {
logger.log(Level.WARNING, "Illegal access: " + packagePrefix + "." + className);
} catch (InstantiationException e3) {
logger.log(Level.WARNING, "Instantiation failed: " + packagePrefix + "." + className);
}
return null;
}
private String[] aliases(String s) {
return (String[]) aliasNameMap.get(s);
}
}

View file

@ -0,0 +1,52 @@
/*
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2016 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*/
package org.xbib.charset;
import java.nio.charset.Charset;
/**
*
*/
public final class BibliographicCharsets {
public static final Charset ANSEL = Charset.forName("ANSEL");
public static final Charset ISO5426 = Charset.forName("ISO-5426");
public static final Charset ISO5428 = Charset.forName("ISO-5428");
public static final Charset MAB = Charset.forName("x-MAB");
public static final Charset MAB_DISKETTE = Charset.forName("MAB-DISKETTE");
public static final Charset PICA = Charset.forName("Pica");
}

View file

@ -0,0 +1,222 @@
/**
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2016 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*
*
* Derived from
*
* ByteCharset.java -- Abstract class for generic 1-byte encodings.
* Copyright (C) 2005 Free Software Foundation, Inc.
*
* This file is part of GNU Classpath.
*
* GNU Classpath is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* GNU Classpath is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Classpath; see the file COPYING. If not, write to the
* Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
* 02111-1307 USA.
*
* Linking this library statically or dynamically with other modules is
* making a combined work based on this library. Thus, the terms and
* conditions of the GNU General Public License cover the whole
* combination.
*
* As a special exception, the copyright holders of this library give you
* permission to link this library with independent modules to produce an
* executable, regardless of the license terms of these independent
* modules, and to copy and distribute the resulting executable under
* terms of your choice, provided that you also meet, for each linked
* independent module, the terms and conditions of the license of that
* module. An independent module is a module which is not derived from
* or based on this library. If you modify this library, you may extend
* this exception to your version of the library, but you are not
* obligated to do so. If you do not wish to do so, delete this
* exception statement from your version.
*//**
*
* Derived from
*
* ByteCharset.java -- Abstract class for generic 1-byte encodings.
* Copyright (C) 2005 Free Software Foundation, Inc.
*
* This file is part of GNU Classpath.
*
* GNU Classpath is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* GNU Classpath is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Classpath; see the file COPYING. If not, write to the
* Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
* 02111-1307 USA.
*
* Linking this library statically or dynamically with other modules is
* making a combined work based on this library. Thus, the terms and
* conditions of the GNU General Public License cover the whole
* combination.
*
* As a special exception, the copyright holders of this library give you
* permission to link this library with independent modules to produce an
* executable, regardless of the license terms of these independent
* modules, and to copy and distribute the resulting executable under
* terms of your choice, provided that you also meet, for each linked
* independent module, the terms and conditions of the license of that
* module. An independent module is a module which is not derived from
* or based on this library. If you modify this library, you may extend
* this exception to your version of the library, but you are not
* obligated to do so. If you do not wish to do so, delete this
* exception statement from your version.
*/
package org.xbib.charset;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
/**
* A generic encoding framework for single-byte encodings, utilizing a look-up
* table. This replaces the gnu.java.io.EncoderEightBitLookup class, created by
* Aron Renn.
*/
abstract class ByteCharset extends Charset {
/**
* Char to signify the character in the table is undefined.
*/
private static final char NONE = (char) 0xFFFD;
char[] lookupTable;
ByteCharset(String canonicalName, String[] aliases) {
super(canonicalName, aliases);
}
/**
* Most western charsets include ASCII, but this should be overloaded for
* others.
*/
public boolean contains(Charset cs) {
return cs instanceof ASCII || cs.getClass() == getClass();
}
private char[] getLookupTable() {
return lookupTable;
}
public CharsetDecoder newDecoder() {
return new Decoder(this);
}
public CharsetEncoder newEncoder() {
return new Encoder(this);
}
private static final class Decoder extends CharsetDecoder {
private char[] lookup;
Decoder(ByteCharset cs) {
super(cs, 1.0f, 1.0f);
lookup = cs.getLookupTable();
}
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
while (in.hasRemaining()) {
byte b = in.get();
char c;
if (!out.hasRemaining()) {
in.position(in.position() - 1);
return CoderResult.OVERFLOW;
}
c = lookup[b & 0xFF];
out.put(c);
}
return CoderResult.UNDERFLOW;
}
}
private static final class Encoder extends CharsetEncoder {
private byte[] lookup;
Encoder(ByteCharset cs) {
super(cs, 1.0f, 1.0f);
char[] lookuptable = cs.getLookupTable();
int max = 0;
for (char ch : lookuptable) {
int c = (int) ch;
max = c > max && c < NONE ? c : max;
}
lookup = new byte[max + 1];
for (int i = 0; i < lookuptable.length; i++) {
int c = (int) lookuptable[i];
if (c != 0 && c < NONE) {
lookup[c] = (byte) i;
}
}
}
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
while (in.hasRemaining()) {
int c = (int) in.get();
if (!out.hasRemaining()) {
in.position(in.position() - 1);
return CoderResult.OVERFLOW;
}
byte b = c < lookup.length ? lookup[c] : (byte) 0;
if ((int) b != 0 || c == 0) {
out.put(b);
} else {
in.position(in.position() - 1);
return CoderResult.unmappableForLength(1);
}
}
return CoderResult.UNDERFLOW;
}
}
}

View file

@ -0,0 +1,241 @@
/*
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2012 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*
* Copyright (C) 2004 Jürgen Kett, Die Deutsche Bibliothek,
* (http://www.ddb.de, mailto:kett@dbf.ddb.de)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*
*/
package org.xbib.charset;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.util.HashMap;
import java.util.Map;
/**
* MAB-Character-Set-Implementation.
* Some minor additions in blocks A-D
*/
public class ISO5426 extends Charset {
public static final char NICHTSORTIERBEGINNZEICHEN = '\u0098';
public static final char NICHTSORTIERENDEZEICHEN = '\u009C';
public static final char TEILFELDTRENNZEICHEN = '\u2021';
private static final char[] byteToCharTable = newMabCharsetMap();
private static final Map<Character, Byte> charToByteTable = newMabByteToCharMap();
private boolean isNFCOutput;
public ISO5426() {
this(true);
}
private ISO5426(boolean isNFCOutput) {
super("ISO-5426", null);
this.isNFCOutput = isNFCOutput;
}
private static Map<Character, Byte> newMabByteToCharMap() {
Map<Character, Byte> ret = new HashMap<>(byteToCharTable.length);
for (int i = 0; i < byteToCharTable.length; i++) {
if (byteToCharTable[i] != 0) {
ret.put(byteToCharTable[i], (byte) i);
}
}
return ret;
}
private static char[] newMabCharsetMap() {
char[] map = new char[256];
for (int i = 0; i < 128; i++) {
map[i] = (char) i;
}
map[0x88] = ISO5426.NICHTSORTIERBEGINNZEICHEN;
map[0x89] = ISO5426.NICHTSORTIERENDEZEICHEN;
// A-Block
map[0xA1] = '\u00A1'; // INVERTED EXCLAMATION MARK
map[0xA2] = '\u201E'; // Double Low-9 Quotation Mark
map[0xA3] = '\u00A3'; // Pound Sign
map[0xA4] = '\u0024'; // Dollar Sign
map[0xA5] = '\u00A5'; // YEN SIGN
map[0xA6] = '\u2020'; // Dagger
map[0xA7] = '\u00A7'; // SECTION SIGN
map[0xA8] = '\u2032'; // Prime
map[0xA9] = '\u2018'; // Left Single Quotation Mark
map[0xAA] = '\u201C'; // Left Double Quotation Mark
map[0xAB] = '\u00AB'; // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (LEFT POINTING GUILLEMET)
map[0xAC] = '\u266D'; // Music Flat Sign
map[0xAD] = '\u00A9'; // Copyright Sign
map[0xAE] = '\u2117'; // Sound Recording Copyright
map[0xAF] = '\u00AE'; // Registered Sign
// B-Block
map[0xB0] = '\u02BB'; // Modifier Letter Turned Comma
map[0xB1] = '\u02BC'; // Modifier Letter Apostrophe
map[0xB2] = '\u201A'; // Single Low-9 Quotation Mark
map[0xB6] = ISO5426.TEILFELDTRENNZEICHEN;
map[0xB7] = '\u00B7'; //
map[0xB8] = '\u2033'; // Double Prime
map[0xB9] = '\u2019'; // Right Single Quotation Mark
map[0xBA] = '\u201D'; // Right Double Quotation Mark
map[0xBB] = '\u00BB'; //
map[0xBC] = '\u266F'; // Music Sharp Sign !!!!NACHFRAGEN
map[0xBD] = '\u02B9'; // Modifier Letter Prime
map[0xBE] = '\u02BA'; // Modifier Letter Double Prime
map[0xBF] = '\u00BF'; //
// C-Block
map[0xC0] = '\u0309'; // Combining Hook above
map[0xC1] = '\u0300'; // Combining Grave Accent
map[0xC2] = '\u0301'; // Combining Acute Accent
map[0xC3] = '\u0302'; // Combining Circumflex Accent
map[0xC4] = '\u0303'; // Combining Tilde
map[0xC5] = '\u0304'; // Combining Macron
map[0xC6] = '\u0306'; // Combining Breve
map[0xC7] = '\u0307'; // Combining Dot Above
map[0xC8] = '\u0308'; // Trema -> Combining Diaeresis
map[0xC9] = '\u0308'; // Umlaut -> Combining Diaeresis
map[0xCA] = '\u030A'; // Combining Ring Above
map[0xCB] = '\u0315'; // Combining Comma Above Right
map[0xCC] = '\u0312'; // Combining Turned Comma Above
map[0xCD] = '\u030B'; // Combining Double Acute Accent
map[0xCE] = '\u031B'; // Combining Horn
map[0xCF] = '\u030C'; // Combining Caron
// D-Block
map[0xD0] = '\u0327'; // Combining Cedilla
map[0xD1] = '\u031C'; // Combining Left Half Ring Below
map[0xD2] = '\u0326'; // Combining Comma Below
map[0xD3] = '\u0328'; // Combining Ogonek
map[0xD4] = '\u0325'; // Combining Ring Below
map[0xD5] = '\u032E'; // Combining Breve Below
map[0xD6] = '\u0323'; // Combining Dot Below
map[0xD7] = '\u0324'; // Combining Diaeresis Below
map[0xD8] = '\u0332'; // Combining Low Line
map[0xD9] = '\u0333'; // Combining Double Low Line
map[0xDA] = '\u0329'; // Combining Vertical Line Below
map[0xDB] = '\u032D'; // Combining Circumflex Accent Below
map[0xDD] = '\uFE20'; // Combining Ligature Left Half
map[0xDE] = '\uFE21'; // Combining Ligature Right Half
map[0xDF] = '\uFE23'; // Combining Double Tilde Right Half
// E-Block
map[0xE1] = '\u00C6'; // Latin Capital Letter AE
map[0xE2] = '\u0110'; // Latin Capital Letter D with Stroke
map[0xE6] = '\u0132'; // Latin Capital Ligature IJ
map[0xE8] = '\u0141'; // Latin Capital Letter L with Stroke
map[0xE9] = '\u00D8'; // Latin Capital Letter O with Stroke
map[0xEA] = '\u0152'; // Latin Capital Ligature OE
map[0xEC] = '\u00DE'; // Latin Capital Letter Thorn
// F-Block
map[0xF1] = '\u00E6'; // Latin Small Letter AE
map[0xF2] = '\u0111'; // Latin Small Letter D with Stroke
map[0xF3] = '\u00F0'; // Latin Small Letter ETH
map[0xF5] = '\u0131'; // Latin Small Letter Dotless I
map[0xF6] = '\u0133'; // Latin Small Ligature IJ
map[0xF8] = '\u0142'; // Latin Small Letter L with Stroke
map[0xF9] = '\u00F8'; // Latin Small Letter O with Stroke
map[0xFA] = '\u0153'; // Latin Small Ligature OE
map[0xFB] = '\u00DF'; // Latin Small Letter Sharp S
map[0xFC] = '\u00FE'; // Latin Small Letter Thorn
return map;
}
@Override
public boolean contains(Charset cs) {
return false;
}
@Override
public CharsetDecoder newDecoder() {
MabDecoder ret = new MabDecoder(this);
ret.setComposeCharactersAfterConversion(this.isNFCOutput);
return ret;
}
@Override
public CharsetEncoder newEncoder() {
return new MabEncoder(this);
}
private static class MabDecoder extends SingleByteDecoder {
MabDecoder(Charset cs) {
super(cs);
}
@Override
public char byteToChar(byte b) {
return byteToCharTable[b & 0xFF];
}
@Override
public boolean isCombiningCharacter(byte b) {
return (b & 0xFF) > 0xC0 && (b & 0xFF) < 0xDF;
}
}
private static class MabEncoder extends SingleByteEncoder {
MabEncoder(Charset cs) {
super(cs);
}
@Override
public byte charToByte(char c) {
Byte b = charToByteTable.get(c);
if (b == null) {
return 0;
}
return b;
}
}
}

View file

@ -0,0 +1,390 @@
/*
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2016 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*/
package org.xbib.charset;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
/**
*
*/
public class ISO5428 extends Charset {
public ISO5428() {
super("ISO_5428", BibliographicCharsetProvider.aliasesFor("ISO_5428"));
}
@Override
public boolean contains(Charset cs) {
return false;
}
@Override
public CharsetDecoder newDecoder() {
return new Decoder(this);
}
@Override
public CharsetEncoder newEncoder() {
return null;
}
private static class Decoder extends CharsetDecoder {
Decoder(Charset cs) {
super(cs, 1.0f, 1.0f);
}
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
boolean tonos = false;
boolean dialitika = false;
while (in.hasRemaining()) {
byte b = in.get();
if (!out.hasRemaining()) {
in.position(in.position() - 1);
return CoderResult.OVERFLOW;
}
if (b == (byte) 0xa2) {
tonos = true;
} else if (b == (byte) 0xa3) {
dialitika = true;
}
int i = (int) b & 0xFF;
char c;
switch (i) {
case 0xe1: {
/* alpha small */
c = tonos ? '\u03ac' : '\u03b1';
break;
}
case 0xc1: {
/* alpha capital */
c = tonos ? '\u0386' : '\u0391';
break;
}
case 0xe2: {
/* Beta small */
c = '\u03b2';
break;
}
case 0xc2: {
/* Beta capital */
c = '\u0392';
break;
}
case 0xe4: {
/* Gamma small */
c = '\u03b3';
break;
}
case 0xc4: {
/* Gamma capital */
c = '\u0393';
break;
}
case 0xe5: {
/* Delta small */
c = '\u03b4';
break;
}
case 0xc5: {
/* Delta capital */
c = '\u0394';
break;
}
case 0xe6: {
/* epsilon small */
c = tonos ? '\u03ad' : '\u03b5';
break;
}
case 0xc6: {
/* epsilon capital */
c = tonos ? '\u0388' : '\u0395';
break;
}
case 0xe9: {
/* Zeta small */
c = '\u03b6';
break;
}
case 0xc9: {
/* Zeta capital */
c = '\u0396';
break;
}
case 0xea: {
/* Eta small */
c = tonos ? '\u03ae' : '\u03b7';
break;
}
case 0xca: {
/* Eta capital */
c = tonos ? '\u0389' : '\u0397';
break;
}
case 0xeb: {
/* Theta small */
c = '\u03b8';
break;
}
case 0xcb: {
/* Theta capital */
c = '\u0398';
break;
}
case 0xec: {
/* Iota small */
if (tonos) {
if (dialitika) {
c = '\u0390';
} else {
c = '\u03af';
}
} else if (dialitika) {
c = '\u03ca';
} else {
c = '\u03b9';
}
break;
}
case 0xcc: {
/* Iota capital */
if (tonos) {
c = '\u038a';
} else if (dialitika) {
c = '\u03aa';
} else {
c = '\u0399';
}
break;
}
case 0xed: {
/* Kappa small */
c = '\u03ba';
break;
}
case 0xcd: {
/* Kappa capital */
c = '\u039a';
break;
}
case 0xee: {
/* Lambda small */
c = '\u03bb';
break;
}
case 0xce: {
/* Lambda capital */
c = '\u039b';
break;
}
case 0xef: {
/* Mu small */
c = '\u03bc';
break;
}
case 0xcf:
/* Mu capital */
c = '\u039c';
break;
case 0xf0: {
/* Nu small */
c = '\u03bd';
break;
}
case 0xd0: {
/* Nu capital */
c = '\u039d';
break;
}
case 0xf1: {
/* Xi small */
c = '\u03be';
break;
}
case 0xd1: {
/* Xi capital */
c = '\u039e';
break;
}
case 0xf2: {
/* Omicron small */
if (tonos) {
c = '\u03cc';
} else {
c = '\u03bf';
}
break;
}
case 0xd2: {
/* Omicron capital */
if (tonos) {
c = '\u038c';
} else {
c = '\u039f';
}
break;
}
case 0xf3: {
/* Pi small */
c = '\u03c0';
break;
}
case 0xd3: {
/* Pi capital */
c = '\u03a0';
break;
}
case 0xf5: {
/* Rho small */
c = '\u03c1';
break;
}
case 0xd5: {
/* Rho capital */
c = '\u03a1';
break;
}
case 0xf7: {
/* Sigma small (end of words) */
c = '\u03c2';
break;
}
case 0xf6: {
/* Sigma small */
c = '\u03c3';
break;
}
case 0xd6: {
/* Sigma capital */
c = '\u03a3';
break;
}
case 0xf8: {
/* Tau small */
c = '\u03c4';
break;
}
case 0xd8: {
/* Tau capital */
c = '\u03a4';
break;
}
case 0xf9: {
/* Upsilon small */
if (tonos) {
if (dialitika) {
c = '\u03b0';
} else {
c = '\u03cd';
}
} else if (dialitika) {
c = '\u03cb';
} else {
c = '\u03c5';
}
break;
}
case 0xd9: {
/* Upsilon capital */
if (tonos) {
c = '\u038e';
} else if (dialitika) {
c = '\u03ab';
} else {
c = '\u03a5';
}
break;
}
case 0xfa: {
/* Phi small */
c = '\u03c6';
break;
}
case 0xda: {
/* Phi capital */
c = '\u03a6';
break;
}
case 0xfb: {
/* Chi small */
c = '\u03c7';
break;
}
case 0xdb: {
/* Chi capital */
c = '\u03a7';
break;
}
case 0xfc: {
/* Psi small */
c = '\u03c8';
break;
}
case 0xdc: {
/* Psi capital */
c = '\u03a8';
break;
}
case 0xfd: {
/* Omega small */
if (tonos) {
c = '\u03ce';
} else {
c = '\u03c9';
}
break;
}
case 0xdd: {
/* Omega capital */
if (tonos) {
c = '\u038f';
} else {
c = '\u03a9';
}
break;
}
default: {
c = (char) b;
}
}
out.put(c);
}
return CoderResult.UNDERFLOW;
}
}
}

View file

@ -0,0 +1,89 @@
/*
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2016 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*/
package org.xbib.charset;
/**
* Implementierung des Zeichensatzes MAB-Diskette. Dieser ist bis auf wenige
* Ausnahmen mit Cp850 identisch.
*/
public class MabDisketteCharset extends ByteCharset {
/* Dekodierung:
* Abweichungen zu CP850: Nichtsortierzeichen und Teilfeldz. m&uuml;ssen
* erhalten bleiben. Nichtsortierz.: 00aa -> 00aa Teilfeldtrennz.: 00ce ->
* 2021
*/
/* Kodierung:
* Abweichungen zu CP850: Nichtsortierzeichen und Teilfeldz. m&uuml;ssen
* erhalten bleiben. Nichtsortierz.: 00aa -> 00aa Teilfeldtrennz.: 2021 ->
* 00ce, 00b6 -> 00ce
*/
private static final char[] lookup = {
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192,
0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
0x00BF, 0x00AE, 0x00AA, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0,
0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510,
0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3,
0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x2021, 0x00A4,
0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE,
0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580,
0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE,
0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4,
0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8,
0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0
};
public MabDisketteCharset() {
super("x-MAB-Diskette", BibliographicCharsetProvider.aliasesFor("x-MAB-Diskette"));
lookupTable = lookup;
}
}

View file

@ -0,0 +1,228 @@
/*
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2016 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*/
package org.xbib.charset;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;
/**
* PICA character set implementation.
*
* This character set is a modified version of the 'InterMARC' character set
* and contains 256 tokens.
*
* A description can be found at
* <a href="http://www.pica.nl/ne/docu/dn010/html/t07.shtml">the Pica website</a>.
*/
public class Pica extends Charset {
private static final Map<Character, Character> encodeMap = new HashMap<>();
private static final Map<Character, Character> decodeMap = new HashMap<>();
/*
* Pica character mapping for index subset \u00a0..\u00ff.
* Pica is equal to US-ASCII but not ISO-8859-1.
* These are the definitions for Pica characters
* which are different from ISO-8859-1.
*/
static {
Pica.charTable(encodeMap, decodeMap, '\u00a0', '\u00ff',
new char[]{
'\u00a0', '\u0141', '\u00d8', '\u0110', '\u00de', '\u00c6',
'\u0152', '\u02b9', '\u00b7', '\u266d', '\u00ae', '\u00b1',
'\u01a0', '\u01af', '\u02be', '\u00c5', '\u02bf', '\u0142',
'\u00f8', '\u0111', '\u00fe', '\u00e6', '\u0153', '\u02ba',
'\u0131', '\u00a3', '\u00f0', '\u03b1', '\u01a1', '\u01b0',
'\u00df', '\u00e5', '\u0132', '\u00c4', '\u00d6', '\u00dc',
'\u0186', '\u018e', '\u2260', '\u2192', '\u2264', '\u221e',
'\u222b', '\u00d7', '\u00a7', '\u22a1', '\u21d4', '\u2265',
'\u0133', '\u00e4', '\u00f6', '\u00fc', '\u0254', '\u0258',
'\u00bf', '\u00a1', '\u03b2', '\u003f', '\u03b3', '\u03c0',
'\u003f', '\u003f', '\u003f', '\u003f', '\u0341', '\u0300',
'\u0301', '\u0302', '\u0303', '\u0304', '\u0306', '\u0307',
'\u0308', '\u030c', '\u030a', '\ufe20', '\ufe21', '\u0315',
'\u030b', '\u0310', '\u0327', '\u0000', '\u0323', '\u0324',
'\u0325', '\u0333', '\u0332', '\u003f', '\u031c', '\u032e',
'\ufe23', '\ufe22', '\u003f', '\u0000', '\u0313', '\u003f'
});
}
// Handle to the real charset we'll use for transcoding between
// characters and bytes. Doing this allows applying the Pica
// charset to multi-byte charset encodings like UTF-8.
private Charset encodeCharset;
/**
* Constructor for the Pica charset. Call the superclass
* constructor to pass along the name(s) we'll be known by.
* Then save a reference to the delegate Charset.
*/
public Pica() {
super("PICA", BibliographicCharsetProvider.aliasesFor("PICA"));
encodeCharset = StandardCharsets.ISO_8859_1;
}
/**
* Fill the conversion tables.
*/
private static void charTable(Map<Character, Character> encodeMap, Map<Character, Character> decodeMap, char from, char to,
char[] code) {
int i = 0;
for (char c = from; c <= to; c++) {
if (code[i] != '\u0000') {
encodeMap.put(code[i], c);
decodeMap.put(c, code[i]);
}
i++;
}
}
/**
* This method must be implemented by concrete Charsets. We allow
* subclasses of the Pica charset.
*/
public boolean contains(Charset charset) {
return charset instanceof Pica;
}
/**
* Called by users of this Charset to obtain an encoder.
* This implementation instantiates an instance of a private class
* (defined below) and passes it an encoder from the base Charset.
*/
public CharsetEncoder newEncoder() {
return new PicaEncoder(this, encodeCharset.newEncoder());
}
/**
* Called by users of this Charset to obtain a decoder.
* This implementation instantiates an instance of a private class
* (defined below) and passes it a decoder from the base Charset.
*/
public CharsetDecoder newDecoder() {
return new PicaDecoder(this, encodeCharset.newDecoder());
}
private static class PicaEncoder extends CharsetEncoder {
private CharsetEncoder baseEncoder;
/**
* Constructor, call the superclass constructor with the
* Charset object and the encodings sizes from the
* delegate encoder.
*/
PicaEncoder(Charset cs, CharsetEncoder baseEncoder) {
super(cs, baseEncoder.averageBytesPerChar(),
baseEncoder.maxBytesPerChar());
this.baseEncoder = baseEncoder;
}
/**
* Implementation of the encoding loop. First, we apply
* the Pica charset mapping to the CharBuffer, then
* reset the encoder for the base Charset and call it's
* encode() method to do the actual encoding. The CharBuffer
* passed in may be read-only or re-used by the caller for
* other purposes so we duplicate it and apply the Pica
* encoding to the copy. We do want to advance the position
* of the input buffer to reflect the chars consumed.
*/
protected CoderResult encodeLoop(CharBuffer cb, ByteBuffer bb) {
CharBuffer tmpcb = CharBuffer.allocate(cb.remaining());
while (cb.hasRemaining()) {
tmpcb.put(cb.get());
}
tmpcb.rewind();
for (int pos = tmpcb.position(); pos < tmpcb.limit(); pos++) {
char c = tmpcb.get(pos);
Character mapChar = encodeMap.get(c);
if (mapChar != null) {
tmpcb.put(pos, mapChar);
}
}
baseEncoder.reset();
CoderResult cr = baseEncoder.encode(tmpcb, bb, true);
// If error or output overflow, we need to adjust
// the position of the input buffer to match what
// was really consumed from the temp buffer. If
// underflow (all input consumed) this is a no-op.
cb.position(cb.position() - tmpcb.remaining());
return cr;
}
}
/**
* The decoder implementation for the Pica Charset.
*/
private static class PicaDecoder extends CharsetDecoder {
/**
* Constructor, call the superclass constructor with the
* Charset object and pass alon the chars/byte values
* from the delegate decoder.
*/
PicaDecoder(Charset cs, CharsetDecoder baseDecoder) {
// base decoder only needed for size hints
super(cs, baseDecoder.averageCharsPerByte(),
baseDecoder.maxCharsPerByte());
}
/**
* Implementation of the decoding loop.
*/
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
while (in.hasRemaining()) {
byte b = in.get();
if (!out.hasRemaining()) {
in.position(in.position() - 1);
return CoderResult.OVERFLOW;
}
char oldChar = (char) (b & 0xFF);
Character mapChar = decodeMap.get(oldChar);
out.put(mapChar != null ? mapChar : oldChar);
}
return CoderResult.UNDERFLOW;
}
}
}

View file

@ -0,0 +1,315 @@
/*
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2016 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*/
package org.xbib.charset;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.util.HashMap;
import java.util.Map;
/**
* A Charset for the OCLC|PICA-character-encoding (x-PICA). It decodes
* x-PICA to Unicode and encodes Unicode to x-PICA.
*/
public class PicaCharset extends Charset {
private static final char[] BYTE_TO_CHAR_MAP = newPicaToUnicodeMap();
private static final Map<Character, Byte> CHAR_TO_BYTE_MAP = newCharToByteMap();
private boolean isNFCOutput;
public PicaCharset() {
this(true);
}
private PicaCharset(boolean isNFCOuput) {
super("x-PICA", null);
this.isNFCOutput = isNFCOuput;
}
private static char[] newPicaToUnicodeMap() {
char[] map = new char[256];
for (int i = 0; i < 128; i++) {
map[i] = (char) i;
}
/*
* DNB-internal definitions, needed for conversion from pica+ to mab2
*/
map[0x80] = ISO5426.TEILFELDTRENNZEICHEN;
map[0x81] = ISO5426.NICHTSORTIERBEGINNZEICHEN;
map[0x82] = ISO5426.NICHTSORTIERENDEZEICHEN;
map[0x83] = '|'; // Füllzeichen
map[0x84] = 'u'; // Zeichencode
map[0x85] = 'z'; // Zeichenvorrat
/* L with stroke */
map[0xA1] = '\u0141';
/* O with stroke */
map[0xA2] = '\u00D8';
/* D with stroke */
map[0xA3] = '\u0110';
/* Capital thorn */
map[0xA4] = '\u00DE';
/* Capital ligature AE */
map[0xA5] = '\u00C6';
/* Capital ligature OE */
map[0xA6] = '\u0152';
/* Modifier letter prime */
map[0xA7] = '\u02B9';
/* Middle dot */
map[0xA8] = '\u00B7';
/* MUSIC FLAT SIGN */
map[0xA9] = '\u266D';
/* Registered sign */
map[0xAA] = '\u00AE';
/* Plus-minus sign */
map[0xAB] = '\u00B1';
/* Capital letter O with horn */
map[0xAC] = '\u01A0';
/* Capital letter U with horn */
map[0xAD] = '\u01AF';
/* Modifier letter apostrophe */
map[0xAE] = '\u02BC';
/* LATIN CAPITAL LETTER A WITH RING ABOVE */
map[0xAF] = '\u00C5';
/* Modifier letter turned comma */
map[0xB0] = '\u02BB';
/* Latin small letter l with stroke */
map[0xB1] = '\u0142';
/* Latin small letter o with stroke */
map[0xB2] = '\u00F8';
/* Latin small letter d with stroke */
map[0xB3] = '\u0111';
/* Latin small letter thorn */
map[0xB4] = '\u00FE';
/* Latin small ligature ae */
map[0xB5] = '\u00E6';
/* Latin small ligature oe */
map[0xB6] = '\u0153';
/* modifier letter double prime */
map[0xB7] = '\u02BA';
/* latin small letter dotless i */
map[0xB8] = '\u0131';
/* pound sign */
map[0xB9] = '\u00A3';
/* latin small letter eth */
map[0xBA] = '\u00F0';
/* greek small letter alpha */
map[0xBB] = '\u03B1';
/* latin small letter o with horn */
map[0xBC] = '\u01A1';
/* latin small letter u with horn */
map[0xBD] = '\u01B0';
/* latin small letter sharp s */
map[0xBE] = '\u00DF';
/* LATIN SMALL LETTER A WITH RING ABOVE */
map[0xBF] = '\u00E5';
/* Latin capital ligature IJ */
map[0xC0] = '\u0132';
/* Ä */
map[0xC1] = '\u00C4';
/* Ö */
map[0xC2] = '\u00D6';
/* Ü */
map[0xC3] = '\u00DC';
/* LATIN CAPITAL LETTER OPEN O */
map[0xC4] = '\u0186';
/* latin capital letter reversed E */
map[0xC5] = '\u018E';
/* NOT EQUAL TO */
map[0xC6] = '\u2260';
/* RIGHTWARDS ARROW */
map[0xC7] = '\u2192';
/* LESS-THAN OR EQUAL TO */
map[0xC8] = '\u2264';
/* INFINITY */
map[0xC9] = '\u221E';
/* INTEGRAL */
map[0xCA] = '\u222B';
/* Multiplication sign */
map[0xCB] = '\u00D7';
/* Section sign */
map[0xCC] = '\u00A7';
/* SQUARE ROOT */
map[0xCD] = '\u221A';
/* GREATER-THAN OR LESS-THAN */
map[0xCE] = '\u2277';
/* GREATER-THAN OR EQUAL TO */
map[0xCF] = '\u2265';
/* Latin small ligature ij */
map[0xD0] = '\u0133';
/* ä */
map[0xD1] = '\u00E4';
/* ö */
map[0xD2] = '\u00F6';
/* ü */
map[0xD3] = '\u00FC';
/* LATIN SMALL LETTER OPEN O */
map[0xD4] = '\u0254';
/* Latin small letter reversed e */
map[0xD5] = '\u01DD';
/* inverted question mark */
map[0xD6] = '\u00BF';
/* inverted exclamation mark */
map[0xD7] = '\u00A1';
/* Greek small letter beta */
map[0xD8] = '\u03B2';
/* Greek small letter gamma */
map[0xDA] = '\u03B3';
/* Greek capital letter pi */
map[0xDB] = '\u03C0';
/* Combining hook above */
map[0xE0] = '\u0309';
/* COMBINING GRAVE ACCENT */
map[0xE1] = '\u0300';
/* COMBINING ACUTE ACCENT */
map[0xE2] = '\u0301';
/* COMBINING CIRCUMFLEX ACCENT */
map[0xE3] = '\u0302';
/* COMBINING TILDE */
map[0xE4] = '\u0303';
/* Macron */
map[0xE5] = '\u0304';
/* Combining breve */
map[0xE6] = '\u0306';
/* Combining dot above */
map[0xE7] = '\u0307';
/* COMBINING DIAERESIS */
map[0xE8] = '\u0308';
/* Combining caron */
map[0xE9] = '\u030C';
/* Combining ring above */
map[0xEA] = '\u030A';
/* COMBINING LIGATURE LEFT HALF */
map[0xEB] = '\uFE20';
/* COMBINING LIGATURE RIGHT HALF */
map[0xEC] = '\uFE21';
/* combining comma above */
map[0xED] = '\u0313';
/* combining double acute accent */
map[0xEE] = '\u030B';
/* combining candrabindu */
map[0xEF] = '\u0310';
/* Combining cedilla */
map[0xF0] = '\u0327';
/* Combining dot below */
map[0xF2] = '\u0323';
/* Combining diaeresis below */
map[0xF3] = '\u0324';
/* Combining ring below */
map[0xF4] = '\u0325';
/* Combining double low line */
map[0xF5] = '\u0333';
/* Combining macron below */
map[0xF6] = '\u0331';
/* Combining ogonek */
map[0xF8] = '\u0328';
/* Combining breve below */
map[0xF9] = '\u032E';
/* Combining DOUBLE TILDE RIGHT HALF */
map[0xFA] = '\uFE23';
/* Combining DOUBLE TILDE LEFT HALF */
map[0xFB] = '\uFE22';
/* Combining comma above right */
map[0xFE] = '\u0315';
return map;
}
private static Map<Character, Byte> newCharToByteMap() {
char[] byteToCharMap = BYTE_TO_CHAR_MAP;
byteToCharMap[0x80] = 0;
byteToCharMap[0x81] = 0;
byteToCharMap[0x82] = 0;
byteToCharMap[0x83] = 0;
byteToCharMap[0x84] = 0;
byteToCharMap[0x85] = 0;
Map<Character, Byte> ret = new HashMap<>(byteToCharMap.length);
for (int i = 0; i < byteToCharMap.length; i++) {
if (byteToCharMap[i] != 0) {
ret.put(byteToCharMap[i], (byte) i);
}
}
return ret;
}
@Override
public boolean contains(Charset cs) {
return false;
}
@Override
public CharsetDecoder newDecoder() {
PicaDecoder ret = new PicaDecoder(this);
ret.setComposeCharactersAfterConversion(isNFCOutput);
return ret;
}
@Override
public CharsetEncoder newEncoder() {
return new PicaEncoder(this);
}
private static class PicaDecoder extends SingleByteDecoder {
PicaDecoder(Charset cs) {
super(cs);
}
@Override
public char byteToChar(byte b) {
return BYTE_TO_CHAR_MAP[b & 0xFF];
}
@Override
public boolean isCombiningCharacter(byte b) {
return (b & 0xFF) >= 0xE0 && (b & 0xFF) <= 0xFE;
}
}
private static class PicaEncoder extends SingleByteEncoder {
PicaEncoder(Charset cs) {
super(cs);
}
@Override
public byte charToByte(char c) {
Byte b = CHAR_TO_BYTE_MAP.get(c);
if (b == null) {
return 0;
}
return b;
}
}
}

View file

@ -0,0 +1,264 @@
/*
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2016 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*/
package org.xbib.charset;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
import java.util.TreeMap;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* This is a simplified version of "ANSEL charset" at http://anselcharset.sourceforge.net/
* by Piotr Andzel.
* Original code licensed under LGPL http://www.gnu.org/licenses/lgpl.html
*/
public class SimpleAnselCharset extends Charset {
private final Map<Character, byte[]> mapping;
private final Map<Byte, ReverseMappingEntity> reverseMapping;
public SimpleAnselCharset() {
super("SIMPLE_ANSEL", BibliographicCharsetProvider.aliasesFor("SIMPLE_ANSEL"));
mapping = createMapping(getClass().getResourceAsStream("ansel-mapping.txt"));
reverseMapping = createReverseMapping(mapping);
}
private static Map<Character, byte[]> createMapping(InputStream mappingStream) {
Map<Character, byte[]> mapping = new HashMap<>();
try (BufferedReader reader = new BufferedReader(new InputStreamReader(mappingStream, StandardCharsets.UTF_8))) {
String line;
while ((line = reader.readLine()) != null) {
int i = line.indexOf(";");
if (i < 0) {
i = line.indexOf("#");
}
if (i >= 0) {
line = line.substring(0, i);
}
String[] kvp = line.split("=");
if (kvp.length == 2) {
String uni = kvp[0];
String ans = kvp[1];
Character uniCode = (char) Integer.parseInt(uni.replaceFirst("^[uU]", ""), 16);
String[] ansSeq = ans.split(" ");
byte[] ansCodes = new byte[ansSeq.length];
for (int j = 0; j < ansSeq.length; j++) {
ansCodes[j] = (byte) (Integer.parseInt(ansSeq[j].replaceFirst("^0[xX]", ""), 16) & 0xFF);
}
mapping.put(uniCode, ansCodes);
}
}
} catch (IOException e) {
Logger.getLogger(SimpleAnselCharset.class.getName()).log(Level.WARNING, e.getMessage(), e);
}
return mapping;
}
private static Map<Byte, ReverseMappingEntity> createReverseMapping(Map<Character, byte[]> mapping) {
Map<Byte, ReverseMappingEntity> rev = new TreeMap<>();
for (Map.Entry<Character, byte[]> e : mapping.entrySet()) {
Map<Byte, ReverseMappingEntity> ptr = rev;
Character ch = e.getKey();
for (int i = 0; i < e.getValue().length; i++) {
Byte b = e.getValue()[i];
ReverseMappingEntity ent = ptr.get(b);
if (ent == null) {
ent = new ReverseMappingEntity();
ptr.put(b, ent);
}
if (i + 1 == e.getValue().length) {
ent.setCharacter(ch);
} else {
ptr = ent.getMapping();
}
}
}
return rev;
}
@Override
public boolean canEncode() {
return true;
}
@Override
public CharsetDecoder newDecoder() {
return new Decoder(this);
}
@Override
public CharsetEncoder newEncoder() {
return new Encoder(this);
}
@Override
public boolean contains(Charset cs) {
return displayName().equals(cs.displayName());
}
private static class ReverseMappingEntity {
private TreeMap<Byte, ReverseMappingEntity> mapping = new TreeMap<>();
private Character character;
public Character getCharacter() {
return character;
}
public void setCharacter(Character ch) {
this.character = ch;
}
public Map<Byte, ReverseMappingEntity> getMapping() {
return mapping;
}
}
private class Decoder extends CharsetDecoder {
private LinkedList<Byte> buffer = new LinkedList<>();
Decoder(Charset charset) {
super(charset, 2.2f, 3.0f);
}
@Override
protected CoderResult decodeLoop(final ByteBuffer in, CharBuffer out) {
ReverseMappingBuffer rmb = new ReverseMappingBuffer(reverseMapping, buffer) {
@Override
protected Byte onNextByte() {
return in.hasRemaining() ? in.get() : null;
}
};
while (in.hasRemaining() || rmb.hasRemaining()) {
if (out.hasRemaining()) {
Character ch = rmb.nextCharacter();
out.append(ch);
} else {
return CoderResult.OVERFLOW;
}
}
return CoderResult.UNDERFLOW;
}
}
private class Encoder extends CharsetEncoder {
Encoder(Charset charset) {
super(charset, 2.2f, 3.0f);
}
@Override
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
while (in.hasRemaining()) {
if (out.hasRemaining()) {
char unicode = in.get();
byte[] ansel;
if (unicode <= 0x7f) {
ansel = new byte[2];
ansel[0] = (byte) ((unicode >> 8) & 0xff);
ansel[1] = (byte) ((unicode) & 0xff);
} else {
ansel = mapping.get(unicode);
if (ansel == null) {
return CoderResult.unmappableForLength(2);
}
}
boolean started = false;
for (int i = 0; i < ansel.length; i++) {
if (started || ansel[i] != 0 || i == ansel.length - 1) {
out.put(ansel[i]);
started = true;
}
}
} else {
return CoderResult.OVERFLOW;
}
}
return CoderResult.UNDERFLOW;
}
}
abstract class ReverseMappingBuffer {
private Map<Byte, ReverseMappingEntity> rm;
private LinkedList<Byte> buffer;
ReverseMappingBuffer(Map<Byte, ReverseMappingEntity> rm, LinkedList<Byte> buffer) {
this.rm = rm;
this.buffer = buffer;
}
boolean hasRemaining() {
return !buffer.isEmpty();
}
Character nextCharacter() {
LinkedList<Byte> queue = new LinkedList<>();
ReverseMappingEntity rme = null;
Character ch = null;
for (Byte b = nextByte(); b != null; b = nextByte()) {
queue.addLast(b);
rme = rme != null ? rme.getMapping().get(b) : rm.get(b);
if (rme == null) {
buffer.addAll(queue);
return ch != null ? ch : Character.valueOf((char) nextByte().byteValue());
}
if (rme.getCharacter() != null) {
ch = rme.getCharacter();
queue.clear();
}
}
return ch;
}
protected abstract Byte onNextByte();
private Byte nextByte() {
if (!buffer.isEmpty()) {
return buffer.pollFirst();
} else {
return onNextByte();
}
}
}
}

View file

@ -0,0 +1,98 @@
/*
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2016 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*/
package org.xbib.charset;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.text.Normalizer;
abstract class SingleByteDecoder extends CharsetDecoder {
private boolean composeCharactersAfterConversion = true;
SingleByteDecoder(Charset cs) {
super(cs, 1.0f, 1.0f);
}
/**
* @param composeCharactersAfterConversion The composeCharactersAfterConversion to set.
*/
void setComposeCharactersAfterConversion(boolean composeCharactersAfterConversion) {
this.composeCharactersAfterConversion = composeCharactersAfterConversion;
}
@Override
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
ByteBuffer inputBuffer = ByteBuffer.allocate(30);
while (in.hasRemaining()) {
byte c = in.get();
inputBuffer.put(c);
StringBuilder convertedInputBuffer = null;
if (!isCombiningCharacter(c)) {
convertedInputBuffer = new StringBuilder();
for (int i = inputBuffer.position() - 1; i >= 0; i--) {
char convertedCharacter = byteToChar(inputBuffer.get(i));
String convertedCharacterAsString;
if (convertedCharacter == 0) {
convertedCharacterAsString = replacement();
} else {
convertedCharacterAsString = String
.valueOf(convertedCharacter);
}
convertedInputBuffer.append(convertedCharacterAsString);
}
if (composeCharactersAfterConversion) {
convertedInputBuffer =
new StringBuilder(Normalizer.normalize(convertedInputBuffer.toString(), Normalizer.Form.NFC));
}
}
if (convertedInputBuffer != null) {
if (out.remaining() < convertedInputBuffer.length()) {
in.position(in.position() - inputBuffer.position());
return CoderResult.OVERFLOW;
}
out.append(convertedInputBuffer);
inputBuffer.clear();
}
}
return CoderResult.UNDERFLOW;
}
public abstract boolean isCombiningCharacter(byte c);
public abstract char byteToChar(byte b);
}

View file

@ -0,0 +1,103 @@
/*
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2016 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*/
package org.xbib.charset;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.text.Normalizer;
abstract class SingleByteEncoder extends CharsetEncoder {
private boolean decomposeCharactersBeforeConversion = true;
SingleByteEncoder(Charset cs) {
super(cs, 1.0f, 1.0f);
}
/**
* @param decomposeCharactersBeforeConversion The decomposeCharactersBeforeConversion to set.
*/
public void setDecomposeCharactersBeforeConversion(boolean decomposeCharactersBeforeConversion) {
this.decomposeCharactersBeforeConversion = decomposeCharactersBeforeConversion;
}
@Override
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
CharBuffer inputBuffer = CharBuffer.allocate(30);
while (in.hasRemaining()) {
char c = in.get();
String charAsString;
if (decomposeCharactersBeforeConversion) {
charAsString = Normalizer.normalize(String.valueOf(c), Normalizer.Form.NFD);
} else {
charAsString = String.valueOf(c);
}
if (out.remaining() < inputBuffer.position() + charAsString.length()) {
in.position(in.position() - inputBuffer.position() - 1);
return CoderResult.OVERFLOW;
}
if (inputBuffer.position() > 0 && !isCombiningCharacter(c)) {
for (int i = inputBuffer.position() - 1; i >= 0; i--) {
convert(inputBuffer.get(i), out);
}
inputBuffer.clear();
}
inputBuffer.append(charAsString);
}
if (inputBuffer.position() == 1) {
convert(inputBuffer.get(), out);
} else if (inputBuffer.position() > 1) {
for (int i = inputBuffer.position() - 1; i >= 0; i--) {
convert(inputBuffer.get(i), out);
}
}
return CoderResult.UNDERFLOW;
}
public abstract byte charToByte(char c);
public boolean isCombiningCharacter(char c) {
return c >= '\u0300' && c <= '\u036F';
}
private void convert(char c, ByteBuffer out) {
byte b = charToByte(c);
if (b != 0) {
out.put(b);
} else {
out.put(replacement());
}
}
}

View file

@ -0,0 +1,4 @@
/**
* Bibliographic character set implementations.
*/
package org.xbib.charset;

View file

@ -0,0 +1 @@
org.xbib.charset.BibliographicCharsetProvider

View file

@ -0,0 +1,598 @@
U001b=0x1b
U001d=0x1d
U001e=0x1e
U001f=0x1f
U0020=0x20
U0021=0x21
U0022=0x22
U0023=0x23
U0024=0x24
U0025=0x25
U0026=0x26
U0027=0x27
U0028=0x28
U0029=0x29
U002a=0x2a
U002b=0x2b
U002c=0x2c
U002d=0x2d
U002e=0x2e
U002f=0x2f
U0030=0x30
U0031=0x31
U0032=0x32
U0033=0x33
U0034=0x34
U0035=0x35
U0036=0x36
U0037=0x37
U0038=0x38
U0039=0x39
U003a=0x3a
U003b=0x3b
U003c=0x3c
U003d=0x3d
U003e=0x3e
U003f=0x3f
U0040=0x40
U0041=0x41
U0042=0x42
U0043=0x43
U0044=0x44
U0045=0x45
U0046=0x46
U0047=0x47
U0048=0x48
U0049=0x49
U004a=0x4a
U004b=0x4b
U004c=0x4c
U004d=0x4d
U004e=0x4e
U004f=0x4f
U0050=0x50
U0051=0x51
U0052=0x52
U0053=0x53
U0054=0x54
U0055=0x55
U0056=0x56
U0057=0x57
U0058=0x58
U0059=0x59
U005a=0x5a
U005b=0x5b
U005c=0x5c
U005d=0x5d
U005e=0x5e
U005f=0x5f
U0060=0x60
U0061=0x61
U0062=0x62
U0063=0x63
U0064=0x64
U0065=0x65
U0066=0x66
U0067=0x67
U0068=0x68
U0069=0x69
U006a=0x6a
U006b=0x6b
U006c=0x6c
U006d=0x6d
U006e=0x6e
U006f=0x6f
U0070=0x70
U0071=0x71
U0072=0x72
U0073=0x73
U0074=0x74
U0075=0x75
U0076=0x76
U0077=0x77
U0078=0x78
U0079=0x79
U007a=0x7a
U007b=0x7b
U007c=0x7c
U007d=0x7d
U007e=0x7e
U0098=0x88
U009c=0x89
U200d=0x8d
U200c=0x8e
U0141=0xa1
U00d8=0xa2
U0110=0xa3
U00de=0xa4
U00c6=0xa5
U0152=0xa6
U02b9=0xa7
U00b7=0xa8
U266d=0xa9
U00ae=0xaa
U00b1=0xab
U01a0=0xac
U01af=0xad
U02bc=0xae
U02bb=0xb0
U0142=0xb1
U00f8=0xb2
U0111=0xb3
U00fe=0xb4
U00e6=0xb5
U0153=0xb6
U02ba=0xb7
U0131=0xb8
U00a3=0xb9
U00f0=0xba
U01a1=0xbc
U01b0=0xbd
U00b0=0xc0
U2113=0xc1
U2117=0xc2
U00a9=0xc3
U266f=0xc4
U00bf=0xc5
U00a1=0xc6
U00df=0xc7
U20ac=0xc8
U0309=0xe0
U1ea2=0xe0 0x41
U1eba=0xe0 0x45
U1ec8=0xe0 0x49
U1ece=0xe0 0x4f
U1ee6=0xe0 0x55
U1ef6=0xe0 0x59
U1ea3=0xe0 0x61
U1ebb=0xe0 0x65
U1ec9=0xe0 0x69
U1ecf=0xe0 0x6f
U1ee7=0xe0 0x75
U1ef7=0xe0 0x79
U1ede=0xe0 0xac
U1eec=0xe0 0xad
U1edf=0xe0 0xbc
U1eed=0xe0 0xbd
U1ea8=0xe0 0xe3 0x41
U1ec2=0xe0 0xe3 0x45
U1ed4=0xe0 0xe3 0x4f
U1ea9=0xe0 0xe3 0x61
U1ec3=0xe0 0xe3 0x65
U1ed5=0xe0 0xe3 0x6f
U1eb2=0xe0 0xe6 0x41
U1eb3=0xe0 0xe6 0x61
U0300=0xe1
U00c0=0xe1 0x41
U00c8=0xe1 0x45
U00cc=0xe1 0x49
U01f8=0xe1 0x4e
U00d2=0xe1 0x4f
U00d9=0xe1 0x55
U1e80=0xe1 0x57
U1ef2=0xe1 0x59
U00e0=0xe1 0x61
U00e8=0xe1 0x65
U00ec=0xe1 0x69
U01f9=0xe1 0x6e
U00f2=0xe1 0x6f
U00f9=0xe1 0x75
U1e81=0xe1 0x77
U1ef3=0xe1 0x79
U1edc=0xe1 0xac
U1eea=0xe1 0xad
U1edd=0xe1 0xbc
U1eeb=0xe1 0xbd
U1ea6=0xe1 0xe3 0x41
U1ec0=0xe1 0xe3 0x45
U1ed2=0xe1 0xe3 0x4f
U1ea7=0xe1 0xe3 0x61
U1ec1=0xe1 0xe3 0x65
U1ed3=0xe1 0xe3 0x6f
U1e14=0xe1 0xe5 0x45
U1e50=0xe1 0xe5 0x4f
U1e15=0xe1 0xe5 0x65
U1e51=0xe1 0xe5 0x6f
U1eb0=0xe1 0xe6 0x41
U1eb1=0xe1 0xe6 0x61
U01db=0xe1 0xe8 0x55
U01dc=0xe1 0xe8 0x75
U0301=0xe2
U00c1=0xe2 0x41
U0106=0xe2 0x43
U00c9=0xe2 0x45
U01f4=0xe2 0x47
U00cd=0xe2 0x49
U1e30=0xe2 0x4b
U0139=0xe2 0x4c
U1e3e=0xe2 0x4d
U0143=0xe2 0x4e
U00d3=0xe2 0x4f
U1e54=0xe2 0x50
U0154=0xe2 0x52
U015a=0xe2 0x53
U00da=0xe2 0x55
U1e82=0xe2 0x57
U00dd=0xe2 0x59
U0179=0xe2 0x5a
U00e1=0xe2 0x61
U0107=0xe2 0x63
U00e9=0xe2 0x65
U01f5=0xe2 0x67
U00ed=0xe2 0x69
U1e31=0xe2 0x6b
U013a=0xe2 0x6c
U1e3f=0xe2 0x6d
U0144=0xe2 0x6e
U00f3=0xe2 0x6f
U1e55=0xe2 0x70
U0155=0xe2 0x72
U015b=0xe2 0x73
U00fa=0xe2 0x75
U1e83=0xe2 0x77
U00fd=0xe2 0x79
U017a=0xe2 0x7a
U01fe=0xe2 0xa2
U01fc=0xe2 0xa5
U1eda=0xe2 0xac
U1ee8=0xe2 0xad
U01ff=0xe2 0xb2
U01fd=0xe2 0xb5
U1edb=0xe2 0xbc
U1ee9=0xe2 0xbd
U1ea4=0xe2 0xe3 0x41
U1ebe=0xe2 0xe3 0x45
U1ed0=0xe2 0xe3 0x4f
U1ea5=0xe2 0xe3 0x61
U1ebf=0xe2 0xe3 0x65
U1ed1=0xe2 0xe3 0x6f
U1e4c=0xe2 0xe4 0x4f
U1e78=0xe2 0xe4 0x55
U1e4d=0xe2 0xe4 0x6f
U1e79=0xe2 0xe4 0x75
U1e16=0xe2 0xe5 0x45
U1e52=0xe2 0xe5 0x4f
U1e17=0xe2 0xe5 0x65
U1e53=0xe2 0xe5 0x6f
U1eae=0xe2 0xe6 0x41
U1eaf=0xe2 0xe6 0x61
U0344=0xe2 0xe8
U1e2e=0xe2 0xe8 0x49
U01d7=0xe2 0xe8 0x55
U1e2f=0xe2 0xe8 0x69
U01d8=0xe2 0xe8 0x75
U01fa=0xe2 0xea 0x41
U01fb=0xe2 0xea 0x61
U1e08=0xe2 0xf0 0x43
U1e09=0xe2 0xf0 0x63
U0302=0xe3
U00c2=0xe3 0x41
U0108=0xe3 0x43
U00ca=0xe3 0x45
U011c=0xe3 0x47
U0124=0xe3 0x48
U00ce=0xe3 0x49
U0134=0xe3 0x4a
U00d4=0xe3 0x4f
U015c=0xe3 0x53
U00db=0xe3 0x55
U0174=0xe3 0x57
U0176=0xe3 0x59
U1e90=0xe3 0x5a
U00e2=0xe3 0x61
U0109=0xe3 0x63
U00ea=0xe3 0x65
U011d=0xe3 0x67
U0125=0xe3 0x68
U00ee=0xe3 0x69
U0135=0xe3 0x6a
U00f4=0xe3 0x6f
U015d=0xe3 0x73
U00fb=0xe3 0x75
U0175=0xe3 0x77
U0177=0xe3 0x79
U1e91=0xe3 0x7a
U1eac=0xe3 0xf2 0x41
U1ec6=0xe3 0xf2 0x45
U1ed8=0xe3 0xf2 0x4f
U1ead=0xe3 0xf2 0x61
U1ec7=0xe3 0xf2 0x65
U1ed9=0xe3 0xf2 0x6f
U0303=0xe4
U00c3=0xe4 0x41
U1ebc=0xe4 0x45
U0128=0xe4 0x49
U00d1=0xe4 0x4e
U00d5=0xe4 0x4f
U0168=0xe4 0x55
U1e7c=0xe4 0x56
U1ef8=0xe4 0x59
U00e3=0xe4 0x61
U1ebd=0xe4 0x65
U0129=0xe4 0x69
U00f1=0xe4 0x6e
U00f5=0xe4 0x6f
U0169=0xe4 0x75
U1e7d=0xe4 0x76
U1ef9=0xe4 0x79
U1ee0=0xe4 0xac
U1eee=0xe4 0xad
U1ee1=0xe4 0xbc
U1eef=0xe4 0xbd
U1eaa=0xe4 0xe3 0x41
U1ec4=0xe4 0xe3 0x45
U1ed6=0xe4 0xe3 0x4f
U1eab=0xe4 0xe3 0x61
U1ec5=0xe4 0xe3 0x65
U1ed7=0xe4 0xe3 0x6f
U1eb4=0xe4 0xe6 0x41
U1eb5=0xe4 0xe6 0x61
U0304=0xe5
U0100=0xe5 0x41
U0112=0xe5 0x45
U1e20=0xe5 0x47
U012a=0xe5 0x49
U014c=0xe5 0x4f
U016a=0xe5 0x55
U0232=0xe5 0x59
U0101=0xe5 0x61
U0113=0xe5 0x65
U1e21=0xe5 0x67
U012b=0xe5 0x69
U014d=0xe5 0x6f
U016b=0xe5 0x75
U0233=0xe5 0x79
U01e2=0xe5 0xa5
U01e3=0xe5 0xb5
U022c=0xe5 0xe4 0x4f
U022d=0xe5 0xe4 0x6f
U01e0=0xe5 0xe7 0x41
U0230=0xe5 0xe7 0x4f
U01e1=0xe5 0xe7 0x61
U0231=0xe5 0xe7 0x6f
U01de=0xe5 0xe8 0x41
U022a=0xe5 0xe8 0x4f
U01d5=0xe5 0xe8 0x55
U01df=0xe5 0xe8 0x61
U022b=0xe5 0xe8 0x6f
U01d6=0xe5 0xe8 0x75
U01ec=0xe5 0xf1 0x4f
U01ed=0xe5 0xf1 0x6f
U1e38=0xe5 0xf2 0x4c
U1e5c=0xe5 0xf2 0x52
U1e39=0xe5 0xf2 0x6c
U1e5d=0xe5 0xf2 0x72
U0306=0xe6
U0102=0xe6 0x41
U0114=0xe6 0x45
U011e=0xe6 0x47
U012c=0xe6 0x49
U014e=0xe6 0x4f
U016c=0xe6 0x55
U0103=0xe6 0x61
U0115=0xe6 0x65
U011f=0xe6 0x67
U012d=0xe6 0x69
U014f=0xe6 0x6f
U016d=0xe6 0x75
U1e1c=0xe6 0xf0 0x45
U1e1d=0xe6 0xf0 0x65
U1eb6=0xe6 0xf2 0x41
U1eb7=0xe6 0xf2 0x61
U0307=0xe7
U0226=0xe7 0x41
U1e02=0xe7 0x42
U010a=0xe7 0x43
U1e0a=0xe7 0x44
U0116=0xe7 0x45
U1e1e=0xe7 0x46
U0120=0xe7 0x47
U1e22=0xe7 0x48
U0130=0xe7 0x49
U1e40=0xe7 0x4d
U1e44=0xe7 0x4e
U022e=0xe7 0x4f
U1e56=0xe7 0x50
U1e58=0xe7 0x52
U1e60=0xe7 0x53
U1e6a=0xe7 0x54
U1e86=0xe7 0x57
U1e8a=0xe7 0x58
U1e8e=0xe7 0x59
U017b=0xe7 0x5a
U0227=0xe7 0x61
U1e03=0xe7 0x62
U010b=0xe7 0x63
U1e0b=0xe7 0x64
U0117=0xe7 0x65
U1e1f=0xe7 0x66
U0121=0xe7 0x67
U1e23=0xe7 0x68
U1e41=0xe7 0x6d
U1e45=0xe7 0x6e
U022f=0xe7 0x6f
U1e57=0xe7 0x70
U1e59=0xe7 0x72
U1e61=0xe7 0x73
U1e6b=0xe7 0x74
U1e87=0xe7 0x77
U1e8b=0xe7 0x78
U1e8f=0xe7 0x79
U017c=0xe7 0x7a
U1e64=0xe7 0xe2 0x53
U1e65=0xe7 0xe2 0x73
U1e66=0xe7 0xe9 0x53
U1e67=0xe7 0xe9 0x73
U1e68=0xe7 0xf2 0x53
U1e69=0xe7 0xf2 0x73
U0308=0xe8
U00c4=0xe8 0x41
U00cb=0xe8 0x45
U1e26=0xe8 0x48
U00cf=0xe8 0x49
U00d6=0xe8 0x4f
U00dc=0xe8 0x55
U1e84=0xe8 0x57
U1e8c=0xe8 0x58
U0178=0xe8 0x59
U00e4=0xe8 0x61
U00eb=0xe8 0x65
U1e27=0xe8 0x68
U00ef=0xe8 0x69
U00f6=0xe8 0x6f
U1e97=0xe8 0x74
U00fc=0xe8 0x75
U1e85=0xe8 0x77
U1e8d=0xe8 0x78
U00ff=0xe8 0x79
U1e4e=0xe8 0xe4 0x4f
U1e4f=0xe8 0xe4 0x6f
U1e7a=0xe8 0xe5 0x55
U1e7b=0xe8 0xe5 0x75
U030c=0xe9
U01cd=0xe9 0x41
U010c=0xe9 0x43
U010e=0xe9 0x44
U011a=0xe9 0x45
U01e6=0xe9 0x47
U021e=0xe9 0x48
U01cf=0xe9 0x49
U01e8=0xe9 0x4b
U013d=0xe9 0x4c
U0147=0xe9 0x4e
U01d1=0xe9 0x4f
U0158=0xe9 0x52
U0160=0xe9 0x53
U0164=0xe9 0x54
U01d3=0xe9 0x55
U017d=0xe9 0x5a
U01ce=0xe9 0x61
U010d=0xe9 0x63
U010f=0xe9 0x64
U011b=0xe9 0x65
U01e7=0xe9 0x67
U021f=0xe9 0x68
U01d0=0xe9 0x69
U01f0=0xe9 0x6a
U01e9=0xe9 0x6b
U013e=0xe9 0x6c
U0148=0xe9 0x6e
U01d2=0xe9 0x6f
U0159=0xe9 0x72
U0161=0xe9 0x73
U0165=0xe9 0x74
U01d4=0xe9 0x75
U017e=0xe9 0x7a
U01d9=0xe9 0xe8 0x55
U01da=0xe9 0xe8 0x75
U030a=0xea
U00c5=0xea 0x41
U016e=0xea 0x55
U00e5=0xea 0x61
U016f=0xea 0x75
U1e98=0xea 0x77
U1e99=0xea 0x79
U0361=0xeb
U0315=0xed
U030b=0xee
U0150=0xee 0x4f
U0170=0xee 0x55
U0151=0xee 0x6f
U0171=0xee 0x75
U0310=0xef
U0327=0xf0
U00c7=0xf0 0x43
U1e10=0xf0 0x44
U0228=0xf0 0x45
U0122=0xf0 0x47
U1e28=0xf0 0x48
U0136=0xf0 0x4b
U013b=0xf0 0x4c
U0145=0xf0 0x4e
U0156=0xf0 0x52
U015e=0xf0 0x53
U0162=0xf0 0x54
U00e7=0xf0 0x63
U1e11=0xf0 0x64
U0229=0xf0 0x65
U0123=0xf0 0x67
U1e29=0xf0 0x68
U0137=0xf0 0x6b
U013c=0xf0 0x6c
U0146=0xf0 0x6e
U0157=0xf0 0x72
U015f=0xf0 0x73
U0163=0xf0 0x74
U0328=0xf1
U0104=0xf1 0x41
U0118=0xf1 0x45
U012e=0xf1 0x49
U01ea=0xf1 0x4f
U0172=0xf1 0x55
U0105=0xf1 0x61
U0119=0xf1 0x65
U012f=0xf1 0x69
U01eb=0xf1 0x6f
U0173=0xf1 0x75
U0323=0xf2
U1ea0=0xf2 0x41
U1e04=0xf2 0x42
U1e0c=0xf2 0x44
U1eb8=0xf2 0x45
U1e24=0xf2 0x48
U1eca=0xf2 0x49
U1e32=0xf2 0x4b
U1e36=0xf2 0x4c
U1e42=0xf2 0x4d
U1e46=0xf2 0x4e
U1ecc=0xf2 0x4f
U1e5a=0xf2 0x52
U1e62=0xf2 0x53
U1e6c=0xf2 0x54
U1ee4=0xf2 0x55
U1e7e=0xf2 0x56
U1e88=0xf2 0x57
U1ef4=0xf2 0x59
U1e92=0xf2 0x5a
U1ea1=0xf2 0x61
U1e05=0xf2 0x62
U1e0d=0xf2 0x64
U1eb9=0xf2 0x65
U1e25=0xf2 0x68
U1ecb=0xf2 0x69
U1e33=0xf2 0x6b
U1e37=0xf2 0x6c
U1e43=0xf2 0x6d
U1e47=0xf2 0x6e
U1ecd=0xf2 0x6f
U1e5b=0xf2 0x72
U1e63=0xf2 0x73
U1e6d=0xf2 0x74
U1ee5=0xf2 0x75
U1e7f=0xf2 0x76
U1e89=0xf2 0x77
U1ef5=0xf2 0x79
U1e93=0xf2 0x7a
U1ee2=0xf2 0xac
U1ef0=0xf2 0xad
U1ee3=0xf2 0xbc
U1ef1=0xf2 0xbd
U0324=0xf3
U1e72=0xf3 0x55
U1e73=0xf3 0x75
U0325=0xf4
U1e00=0xf4 0x41
U1e01=0xf4 0x61
U0333=0xf5
U0332=0xf6
U0326=0xf7
U0218=0xf7 0x53
U021a=0xf7 0x54
U0219=0xf7 0x73
U021b=0xf7 0x74
U031c=0xf8
U032e=0xf9
U1e2a=0xf9 0x48
U1e2b=0xf9 0x68
U0360=0xfa
U0313=0xfe

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,212 @@
/*
De PICA characterset is een enigszins gemodificeerde versie van de INTERMARC characterset.
Deze characterset omvat in totaal 256 tekens.
Kolommen:
(1) = Octaal
(2) = Teken
(3) = Omschrijving
(1) (2) (3)
-----------------------------------------------------------------
000-177 Standaards ASCII (eerste groep van 128 tekens)
200-237 niet gebruikt
240 diacritische spatie
241 Poolse L
242 Deense O
243 Ð Servische D
244 Þ Thorn (groot)
245 Æ Ligatuur AE
246 ¼ Ligatuur OE
247 ¢ Cyrillische zachtteken (translitt.)
250 × Griekse half-hoge punt
251 Mol
252 ® Registratie-teken
253 ± Plusminus
254 O Vietnamese O-haak
255 U Vietnamese U-haak
256 ? Alif
257 Å Angstrom A
260 ` Ayn
261 Poolse l
262 Deense o
263 Servische d
264 þ Thorn (klein)
265 æ Ligatuur ae
266 ½ Ligatuur oe
267 Cyrillische hardteken (translitt.)
270 Turkse i (zonder punt)
271 £ Brits pond-teken
272 Eth
273 a Alfa
274 Vietnamese o-haak
275 Vietnamese u-haak
276 ß Duitse dubbele S
277 å Angstrom a
300 Nederlandse IJ
301 Ä Umlaut A
302 Ö Umlaut O
303 Ü Umlaut U
304 Omgekeerde C
305 Omgekeerde E
306 ¹ Ongelijk-teken
307 ® Fleche
310 £ Kleiner dan/is-gelijk-teken
311 ¥ Oneindig-teken
312 ò Integraal-teken
313 Vermenigvuldiging-teken
314 § Paragraaf
315 Ö Vierkantswortel-teken
316 Reaction
317 ³ Groter dan/is-gelijk-teken
320 Nederlandse ij
321 ä Umlaut a
322 ö Umlaut o
323 ü Umlaut u
324 Omgekeerde c
325 Omgekeerde e
326 ¿ Spaans omgekeerd vraagteken
327 ¡ Spaans omgekeerd uitroepteken
330 b Beta
331
332 g Gamma
333 p Pi
334
335
336
337
340 ` Vietnamese rijzende toon
341 ` Accent grave (zie ook octaal 140)
342 ? Accent aigu
343 ? Accent circonflexe (zie ook 140)
344 ~ Tilde
345 ¯ Bovenstreepje (lang)
346 Bovenstreepje (kort)
347 × Punt boven
350 ? Trema (geen umlaut)
351 Hacek
352 ? Angstrom
353 Ligatuur links
354 Ligatuur rechts
355 ' Komma als accent (bovenaan)
356 ² Dubbele aigu
357 Candrabindu
360 ? Cedille
361 Hoek boven links
362 ¢ Punt als accent (onderaan)
363 ² Twee punten als accent (onderaan)
364 Cirkeltje onderaan
365 Dubbele onderstreping als accent
366 _ Onderstreping als accent
367 Hoek boven rechts
370 Omgekeerde cedille
371 Upadhmaniya (geen accent)
372 Halve tilde rechts
373 Halve tilde links
374
375
376 ? Komma rechts (op middelhoogte)
377
*/

View file

@ -0,0 +1,547 @@
#step 1
#created: 2001-03-19
A1=0141#latin capital letter L with stroke
A2=00D8#latin capital letter O with stroke
A3=0110#latin capital letter D with stroke
A4=00DE#latin capital letter thorn
A5=00C6#latin capital letter AE
A6=0152#latin capital ligature OE
A7=02B9#modified letter prime
A8=00B7#middle dot
A9=266D#music flat sign
AA=00AE#registered sign
AB=00B1#plus-minus sign
AC=01A0#latin capital letter O with horn
AD=01AF#latin capital letter U with horn
AE=02BC#modifier letter apostrophe
B0=02BB#modifier letter turned comma
B1=0142#latin small letter L with stroke
B2=00F8#latin small letter O with stroke
B3=0111#latin small letter D with stroke
B4=00FE#latin small letter thorn
B5=00E6#latin small letter AE
B6=0153#latin small ligature OE
B7=02BA#modified letter double prime
B8=0131#latin small letter dotless i
B9=00A3#pound sign
BA=00F0#latin small letter eth
BC=01A1#latin small letter O with horn
BD=01B0#latin small letter U with horn
C0=00B0#degree sign
C1=2113#script small L
C2=2117#sound recording copyright
C3=00A9#copyright sign
C4=266F#music sharp sign
C5=00BF#inverted question mark
C6=00A1#inverted exclamation mark
CF=00DF#latin small letter sharp S
E0=0309#combining hook above
E1=0300#combining grave accent
E2=0301#combining acute accent
E3=0302#combining circumflex accent
E4=0303#combining tilde
E5=0304#combining macron
E6=0306#combining breve
E7=0307#combining dot above
E8=0308#combining diaeresis
E9=030C#combining caron
EA=030A#combining ring above
EB=FE20#combining ligature left half
EC=FE21#combining ligature right half
ED=0315#combining comma above right
EE=030B#combining double acute accent
EF=0310#combining candrabindu
F0=0327#combining cedilla
F1=0328#combining ogonek
F2=0323#combining dot below
F3=0324#combining diaeresis below
F4=0325#combining ring below
F5=0333#combining double low line
F6=0332#combining low line
F7=0326#combining comma below
F8=0321#combining ogonek
F9=032E#combining breve below
FA=FE22#combining double tilde left half
FB=FE23#combining double tilde right half
FE=0313#combining comma above
#step 2
#created: 20 january 1998
0041+0300=00C0# LATIN CAPITAL LETTER A WITH GRAVE = LATIN CAPITAL LETTER A + COMBINING GRAVE ACCENT
0041+0301=00C1# LATIN CAPITAL LETTER A WITH ACUTE = LATIN CAPITAL LETTER A + COMBINING ACUTE ACCENT
0041+0302=00C2# LATIN CAPITAL LETTER A WITH CIRCUMFLEX = LATIN CAPITAL LETTER A + COMBINING CIRCUMFLEX ACCENT
0041+0303=00C3# LATIN CAPITAL LETTER A WITH TILDE = LATIN CAPITAL LETTER A + COMBINING TILDE
0041+0308=00C4# LATIN CAPITAL LETTER A WITH DIAERESIS = LATIN CAPITAL LETTER A + COMBINING DIAERESIS
0041+030A=00C5# LATIN CAPITAL LETTER A WITH RING ABOVE = LATIN CAPITAL LETTER A + COMBINING RING ABOVE
0043+0327=00C7# LATIN CAPITAL LETTER C WITH CEDILLA = LATIN CAPITAL LETTER C + COMBINING CEDILLA
0045+0300=00C8# LATIN CAPITAL LETTER E WITH GRAVE = LATIN CAPITAL LETTER E + COMBINING GRAVE ACCENT
0045+0301=00C9# LATIN CAPITAL LETTER E WITH ACUTE = LATIN CAPITAL LETTER E + COMBINING ACUTE ACCENT
0045+0302=00CA# LATIN CAPITAL LETTER E WITH CIRCUMFLEX = LATIN CAPITAL LETTER E + COMBINING CIRCUMFLEX ACCENT
0045+0308=00CB# LATIN CAPITAL LETTER E WITH DIAERESIS = LATIN CAPITAL LETTER E + COMBINING DIAERESIS
0049+0300=00CC# LATIN CAPITAL LETTER I WITH GRAVE = LATIN CAPITAL LETTER I + COMBINING GRAVE ACCENT
0049+0301=00CD# LATIN CAPITAL LETTER I WITH ACUTE = LATIN CAPITAL LETTER I + COMBINING ACUTE ACCENT
0049+0302=00CE# LATIN CAPITAL LETTER I WITH CIRCUMFLEX = LATIN CAPITAL LETTER I + COMBINING CIRCUMFLEX ACCENT
0049+0308=00CF# LATIN CAPITAL LETTER I WITH DIAERESIS = LATIN CAPITAL LETTER I + COMBINING DIAERESIS
004E+0303=00D1# LATIN CAPITAL LETTER N WITH TILDE = LATIN CAPITAL LETTER N + COMBINING TILDE
004F+0300=00D2# LATIN CAPITAL LETTER O WITH GRAVE = LATIN CAPITAL LETTER O + COMBINING GRAVE ACCENT
004F+0301=00D3# LATIN CAPITAL LETTER O WITH ACUTE = LATIN CAPITAL LETTER O + COMBINING ACUTE ACCENT
004F+0302=00D4# LATIN CAPITAL LETTER O WITH CIRCUMFLEX = LATIN CAPITAL LETTER O + COMBINING CIRCUMFLEX ACCENT
004F+0303=00D5# LATIN CAPITAL LETTER O WITH TILDE = LATIN CAPITAL LETTER O + COMBINING TILDE
004F+0308=00D6# LATIN CAPITAL LETTER O WITH DIAERESIS = LATIN CAPITAL LETTER O + COMBINING DIAERESIS
0055+0300=00D9# LATIN CAPITAL LETTER U WITH GRAVE = LATIN CAPITAL LETTER U + COMBINING GRAVE ACCENT
0055+0301=00DA# LATIN CAPITAL LETTER U WITH ACUTE = LATIN CAPITAL LETTER U + COMBINING ACUTE ACCENT
0055+0302=00DB# LATIN CAPITAL LETTER U WITH CIRCUMFLEX = LATIN CAPITAL LETTER U + COMBINING CIRCUMFLEX ACCENT
0055+0308=00DC# LATIN CAPITAL LETTER U WITH DIAERESIS = LATIN CAPITAL LETTER U + COMBINING DIAERESIS
0059+0301=00DD# LATIN CAPITAL LETTER Y WITH ACUTE = LATIN CAPITAL LETTER Y + COMBINING ACUTE ACCENT
0061+0300=00E0# LATIN SMALL LETTER A WITH GRAVE = LATIN SMALL LETTER A + COMBINING GRAVE ACCENT
0061+0301=00E1# LATIN SMALL LETTER A WITH ACUTE = LATIN SMALL LETTER A + COMBINING ACUTE ACCENT
0061+0302=00E2# LATIN SMALL LETTER A WITH CIRCUMFLEX = LATIN SMALL LETTER A + COMBINING CIRCUMFLEX ACCENT
0061+0303=00E3# LATIN SMALL LETTER A WITH TILDE = LATIN SMALL LETTER A + COMBINING TILDE
0061+0308=00E4# LATIN SMALL LETTER A WITH DIAERESIS = LATIN SMALL LETTER A + COMBINING DIAERESIS
0061+030A=00E5# LATIN SMALL LETTER A WITH RING ABOVE = LATIN SMALL LETTER A + COMBINING RING ABOVE
0063+0327=00E7# LATIN SMALL LETTER C WITH CEDILLA = LATIN SMALL LETTER C + COMBINING CEDILLA
0065+0300=00E8# LATIN SMALL LETTER E WITH GRAVE = LATIN SMALL LETTER E + COMBINING GRAVE ACCENT
0065+0301=00E9# LATIN SMALL LETTER E WITH ACUTE = LATIN SMALL LETTER E + COMBINING ACUTE ACCENT
0065+0302=00EA# LATIN SMALL LETTER E WITH CIRCUMFLEX = LATIN SMALL LETTER E + COMBINING CIRCUMFLEX ACCENT
0065+0308=00EB# LATIN SMALL LETTER E WITH DIAERESIS = LATIN SMALL LETTER E + COMBINING DIAERESIS
0069+0300=00EC# LATIN SMALL LETTER I WITH GRAVE = LATIN SMALL LETTER I + COMBINING GRAVE ACCENT
0069+0301=00ED# LATIN SMALL LETTER I WITH ACUTE = LATIN SMALL LETTER I + COMBINING ACUTE ACCENT
0069+0302=00EE# LATIN SMALL LETTER I WITH CIRCUMFLEX = LATIN SMALL LETTER I + COMBINING CIRCUMFLEX ACCENT
0069+0308=00EF# LATIN SMALL LETTER I WITH DIAERESIS = LATIN SMALL LETTER I + COMBINING DIAERESIS
006E+0303=00F1# LATIN SMALL LETTER N WITH TILDE = LATIN SMALL LETTER N + COMBINING TILDE
006F+0300=00F2# LATIN SMALL LETTER O WITH GRAVE = LATIN SMALL LETTER O + COMBINING GRAVE ACCENT
006F+0301=00F3# LATIN SMALL LETTER O WITH ACUTE = LATIN SMALL LETTER O + COMBINING ACUTE ACCENT
006F+0302=00F4# LATIN SMALL LETTER O WITH CIRCUMFLEX = LATIN SMALL LETTER O + COMBINING CIRCUMFLEX ACCENT
006F+0303=00F5# LATIN SMALL LETTER O WITH TILDE = LATIN SMALL LETTER O + COMBINING TILDE
006F+0308=00F6# LATIN SMALL LETTER O WITH DIAERESIS = LATIN SMALL LETTER O + COMBINING DIAERESIS
0075+0300=00F9# LATIN SMALL LETTER U WITH GRAVE = LATIN SMALL LETTER U + COMBINING GRAVE ACCENT
0075+0301=00FA# LATIN SMALL LETTER U WITH ACUTE = LATIN SMALL LETTER U + COMBINING ACUTE ACCENT
0075+0302=00FB# LATIN SMALL LETTER U WITH CIRCUMFLEX = LATIN SMALL LETTER U + COMBINING CIRCUMFLEX ACCENT
0075+0308=00FC# LATIN SMALL LETTER U WITH DIAERESIS = LATIN SMALL LETTER U + COMBINING DIAERESIS
0079+0301=00FD# LATIN SMALL LETTER Y WITH ACUTE = LATIN SMALL LETTER Y + COMBINING ACUTE ACCENT
0079+0308=00FF# LATIN SMALL LETTER Y WITH DIAERESIS = LATIN SMALL LETTER Y + COMBINING DIAERESIS
0041+0304=0100# LATIN CAPITAL LETTER A WITH MACRON = LATIN CAPITAL LETTER A + COMBINING MACRON
0061+0304=0101# LATIN SMALL LETTER A WITH MACRON = LATIN SMALL LETTER A + COMBINING MACRON
0041+0306=0102# LATIN CAPITAL LETTER A WITH BREVE = LATIN CAPITAL LETTER A + COMBINING BREVE
0061+0306=0103# LATIN SMALL LETTER A WITH BREVE = LATIN SMALL LETTER A + COMBINING BREVE
0041+0328=0104# LATIN CAPITAL LETTER A WITH OGONEK = LATIN CAPITAL LETTER A + COMBINING OGONEK
0061+0328=0105# LATIN SMALL LETTER A WITH OGONEK = LATIN SMALL LETTER A + COMBINING OGONEK
0043+0301=0106# LATIN CAPITAL LETTER C WITH ACUTE = LATIN CAPITAL LETTER C + COMBINING ACUTE ACCENT
0063+0301=0107# LATIN SMALL LETTER C WITH ACUTE = LATIN SMALL LETTER C + COMBINING ACUTE ACCENT
0043+0302=0108# LATIN CAPITAL LETTER C WITH CIRCUMFLEX = LATIN CAPITAL LETTER C + COMBINING CIRCUMFLEX ACCENT
0063+0302=0109# LATIN SMALL LETTER C WITH CIRCUMFLEX = LATIN SMALL LETTER C + COMBINING CIRCUMFLEX ACCENT
0043+0307=010A# LATIN CAPITAL LETTER C WITH DOT ABOVE = LATIN CAPITAL LETTER C + COMBINING DOT ABOVE
0063+0307=010B# LATIN SMALL LETTER C WITH DOT ABOVE = LATIN SMALL LETTER C + COMBINING DOT ABOVE
0043+030C=010C# LATIN CAPITAL LETTER C WITH CARON = LATIN CAPITAL LETTER C + COMBINING CARON
0063+030C=010D# LATIN SMALL LETTER C WITH CARON = LATIN SMALL LETTER C + COMBINING CARON
0044+030C=010E# LATIN CAPITAL LETTER D WITH CARON = LATIN CAPITAL LETTER D + COMBINING CARON
0064+030C=010F# LATIN SMALL LETTER D WITH CARON = LATIN SMALL LETTER D + COMBINING CARON
0045+0304=0112# LATIN CAPITAL LETTER E WITH MACRON = LATIN CAPITAL LETTER E + COMBINING MACRON
0065+0304=0113# LATIN SMALL LETTER E WITH MACRON = LATIN SMALL LETTER E + COMBINING MACRON
0045+0306=0114# LATIN CAPITAL LETTER E WITH BREVE = LATIN CAPITAL LETTER E + COMBINING BREVE
0065+0306=0115# LATIN SMALL LETTER E WITH BREVE = LATIN SMALL LETTER E + COMBINING BREVE
0045+0307=0116# LATIN CAPITAL LETTER E WITH DOT ABOVE = LATIN CAPITAL LETTER E + COMBINING DOT ABOVE
0065+0307=0117# LATIN SMALL LETTER E WITH DOT ABOVE = LATIN SMALL LETTER E + COMBINING DOT ABOVE
0045+0328=0118# LATIN CAPITAL LETTER E WITH OGONEK = LATIN CAPITAL LETTER E + COMBINING OGONEK
0065+0328=0119# LATIN SMALL LETTER E WITH OGONEK = LATIN SMALL LETTER E + COMBINING OGONEK
0045+030C=011A# LATIN CAPITAL LETTER E WITH CARON = LATIN CAPITAL LETTER E + COMBINING CARON
0065+030C=011B# LATIN SMALL LETTER E WITH CARON = LATIN SMALL LETTER E + COMBINING CARON
0047+0302=011C# LATIN CAPITAL LETTER G WITH CIRCUMFLEX = LATIN CAPITAL LETTER G + COMBINING CIRCUMFLEX ACCENT
0067+0302=011D# LATIN SMALL LETTER G WITH CIRCUMFLEX = LATIN SMALL LETTER G + COMBINING CIRCUMFLEX ACCENT
0047+0306=011E# LATIN CAPITAL LETTER G WITH BREVE = LATIN CAPITAL LETTER G + COMBINING BREVE
0067+0306=011F# LATIN SMALL LETTER G WITH BREVE = LATIN SMALL LETTER G + COMBINING BREVE
0047+0307=0120# LATIN CAPITAL LETTER G WITH DOT ABOVE = LATIN CAPITAL LETTER G + COMBINING DOT ABOVE
0067+0307=0121# LATIN SMALL LETTER G WITH DOT ABOVE = LATIN SMALL LETTER G + COMBINING DOT ABOVE
0047+0327=0122# LATIN CAPITAL LETTER G WITH CEDILLA = LATIN CAPITAL LETTER G + COMBINING CEDILLA
0067+0327=0123# LATIN SMALL LETTER G WITH CEDILLA = LATIN SMALL LETTER G + COMBINING CEDILLA
0048+0302=0124# LATIN CAPITAL LETTER H WITH CIRCUMFLEX = LATIN CAPITAL LETTER H + COMBINING CIRCUMFLEX ACCENT
0068+0302=0125# LATIN SMALL LETTER H WITH CIRCUMFLEX = LATIN SMALL LETTER H + COMBINING CIRCUMFLEX ACCENT
0049+0303=0128# LATIN CAPITAL LETTER I WITH TILDE = LATIN CAPITAL LETTER I + COMBINING TILDE
0069+0303=0129# LATIN SMALL LETTER I WITH TILDE = LATIN SMALL LETTER I + COMBINING TILDE
0049+0304=012A# LATIN CAPITAL LETTER I WITH MACRON = LATIN CAPITAL LETTER I + COMBINING MACRON
0069+0304=012B# LATIN SMALL LETTER I WITH MACRON = LATIN SMALL LETTER I + COMBINING MACRON
0049+0306=012C# LATIN CAPITAL LETTER I WITH BREVE = LATIN CAPITAL LETTER I + COMBINING BREVE
0069+0306=012D# LATIN SMALL LETTER I WITH BREVE = LATIN SMALL LETTER I + COMBINING BREVE
0049+0328=012E# LATIN CAPITAL LETTER I WITH OGONEK = LATIN CAPITAL LETTER I + COMBINING OGONEK
0069+0328=012F# LATIN SMALL LETTER I WITH OGONEK = LATIN SMALL LETTER I + COMBINING OGONEK
0049+0307=0130# LATIN CAPITAL LETTER I WITH DOT ABOVE = LATIN CAPITAL LETTER I + COMBINING DOT ABOVE
004A+0302=0134# LATIN CAPITAL LETTER J WITH CIRCUMFLEX = LATIN CAPITAL LETTER J + COMBINING CIRCUMFLEX ACCENT
006A+0302=0135# LATIN SMALL LETTER J WITH CIRCUMFLEX = LATIN SMALL LETTER J + COMBINING CIRCUMFLEX ACCENT
004B+0327=0136# LATIN CAPITAL LETTER K WITH CEDILLA = LATIN CAPITAL LETTER K + COMBINING CEDILLA
006B+0327=0137# LATIN SMALL LETTER K WITH CEDILLA = LATIN SMALL LETTER K + COMBINING CEDILLA
004C+0301=0139# LATIN CAPITAL LETTER L WITH ACUTE = LATIN CAPITAL LETTER L + COMBINING ACUTE ACCENT
006C+0301=013A# LATIN SMALL LETTER L WITH ACUTE = LATIN SMALL LETTER L + COMBINING ACUTE ACCENT
004C+0327=013B# LATIN CAPITAL LETTER L WITH CEDILLA = LATIN CAPITAL LETTER L + COMBINING CEDILLA
006C+0327=013C# LATIN SMALL LETTER L WITH CEDILLA = LATIN SMALL LETTER L + COMBINING CEDILLA
004C+030C=013D# LATIN CAPITAL LETTER L WITH CARON = LATIN CAPITAL LETTER L + COMBINING CARON
006C+030C=013E# LATIN SMALL LETTER L WITH CARON = LATIN SMALL LETTER L + COMBINING CARON
004E+0301=0143# LATIN CAPITAL LETTER N WITH ACUTE = LATIN CAPITAL LETTER N + COMBINING ACUTE ACCENT
006E+0301=0144# LATIN SMALL LETTER N WITH ACUTE = LATIN SMALL LETTER N + COMBINING ACUTE ACCENT
004E+0327=0145# LATIN CAPITAL LETTER N WITH CEDILLA = LATIN CAPITAL LETTER N + COMBINING CEDILLA
006E+0327=0146# LATIN SMALL LETTER N WITH CEDILLA = LATIN SMALL LETTER N + COMBINING CEDILLA
004E+030C=0147# LATIN CAPITAL LETTER N WITH CARON = LATIN CAPITAL LETTER N + COMBINING CARON
006E+030C=0148# LATIN SMALL LETTER N WITH CARON = LATIN SMALL LETTER N + COMBINING CARON
004F+0304=014C# LATIN CAPITAL LETTER O WITH MACRON = LATIN CAPITAL LETTER O + COMBINING MACRON
006F+0304=014D# LATIN SMALL LETTER O WITH MACRON = LATIN SMALL LETTER O + COMBINING MACRON
004F+0306=014E# LATIN CAPITAL LETTER O WITH BREVE = LATIN CAPITAL LETTER O + COMBINING BREVE
006F+0306=014F# LATIN SMALL LETTER O WITH BREVE = LATIN SMALL LETTER O + COMBINING BREVE
004F+030B=0150# LATIN CAPITAL LETTER O WITH DOUBLE ACUTE = LATIN CAPITAL LETTER O + COMBINING DOUBLE ACUTE ACCENT
006F+030B=0151# LATIN SMALL LETTER O WITH DOUBLE ACUTE = LATIN SMALL LETTER O + COMBINING DOUBLE ACUTE ACCENT
0052+0301=0154# LATIN CAPITAL LETTER R WITH ACUTE = LATIN CAPITAL LETTER R + COMBINING ACUTE ACCENT
0072+0301=0155# LATIN SMALL LETTER R WITH ACUTE = LATIN SMALL LETTER R + COMBINING ACUTE ACCENT
0052+0327=0156# LATIN CAPITAL LETTER R WITH CEDILLA = LATIN CAPITAL LETTER R + COMBINING CEDILLA
0072+0327=0157# LATIN SMALL LETTER R WITH CEDILLA = LATIN SMALL LETTER R + COMBINING CEDILLA
0052+030C=0158# LATIN CAPITAL LETTER R WITH CARON = LATIN CAPITAL LETTER R + COMBINING CARON
0072+030C=0159# LATIN SMALL LETTER R WITH CARON = LATIN SMALL LETTER R + COMBINING CARON
0053+0301=015A# LATIN CAPITAL LETTER S WITH ACUTE = LATIN CAPITAL LETTER S + COMBINING ACUTE ACCENT
0073+0301=015B# LATIN SMALL LETTER S WITH ACUTE = LATIN SMALL LETTER S + COMBINING ACUTE ACCENT
0053+0302=015C# LATIN CAPITAL LETTER S WITH CIRCUMFLEX = LATIN CAPITAL LETTER S + COMBINING CIRCUMFLEX ACCENT
0073+0302=015D# LATIN SMALL LETTER S WITH CIRCUMFLEX = LATIN SMALL LETTER S + COMBINING CIRCUMFLEX ACCENT
0053+0327=015E# LATIN CAPITAL LETTER S WITH CEDILLA = LATIN CAPITAL LETTER S + COMBINING CEDILLA
0073+0327=015F# LATIN SMALL LETTER S WITH CEDILLA = LATIN SMALL LETTER S + COMBINING CEDILLA
0053+030C=0160# LATIN CAPITAL LETTER S WITH CARON = LATIN CAPITAL LETTER S + COMBINING CARON
0073+030C=0161# LATIN SMALL LETTER S WITH CARON = LATIN SMALL LETTER S + COMBINING CARON
0054+0327=0162# LATIN CAPITAL LETTER T WITH CEDILLA = LATIN CAPITAL LETTER T + COMBINING CEDILLA
0074+0327=0163# LATIN SMALL LETTER T WITH CEDILLA = LATIN SMALL LETTER T + COMBINING CEDILLA
0054+030C=0164# LATIN CAPITAL LETTER T WITH CARON = LATIN CAPITAL LETTER T + COMBINING CARON
0074+030C=0165# LATIN SMALL LETTER T WITH CARON = LATIN SMALL LETTER T + COMBINING CARON
0055+0303=0168# LATIN CAPITAL LETTER U WITH TILDE = LATIN CAPITAL LETTER U + COMBINING TILDE
0075+0303=0169# LATIN SMALL LETTER U WITH TILDE = LATIN SMALL LETTER U + COMBINING TILDE
0055+0304=016A# LATIN CAPITAL LETTER U WITH MACRON = LATIN CAPITAL LETTER U + COMBINING MACRON
0075+0304=016B# LATIN SMALL LETTER U WITH MACRON = LATIN SMALL LETTER U + COMBINING MACRON
0055+0306=016C# LATIN CAPITAL LETTER U WITH BREVE = LATIN CAPITAL LETTER U + COMBINING BREVE
0075+0306=016D# LATIN SMALL LETTER U WITH BREVE = LATIN SMALL LETTER U + COMBINING BREVE
0055+030A=016E# LATIN CAPITAL LETTER U WITH RING ABOVE = LATIN CAPITAL LETTER U + COMBINING RING ABOVE
0075+030A=016F# LATIN SMALL LETTER U WITH RING ABOVE = LATIN SMALL LETTER U + COMBINING RING ABOVE
0055+030B=0170# LATIN CAPITAL LETTER U WITH DOUBLE ACUTE = LATIN CAPITAL LETTER U + COMBINING DOUBLE ACUTE ACCENT
0075+030B=0171# LATIN SMALL LETTER U WITH DOUBLE ACUTE = LATIN SMALL LETTER U + COMBINING DOUBLE ACUTE ACCENT
0055+0328=0172# LATIN CAPITAL LETTER U WITH OGONEK = LATIN CAPITAL LETTER U + COMBINING OGONEK
0075+0328=0173# LATIN SMALL LETTER U WITH OGONEK = LATIN SMALL LETTER U + COMBINING OGONEK
0057+0302=0174# LATIN CAPITAL LETTER W WITH CIRCUMFLEX = LATIN CAPITAL LETTER W + COMBINING CIRCUMFLEX ACCENT
0077+0302=0175# LATIN SMALL LETTER W WITH CIRCUMFLEX = LATIN SMALL LETTER W + COMBINING CIRCUMFLEX ACCENT
0059+0302=0176# LATIN CAPITAL LETTER Y WITH CIRCUMFLEX = LATIN CAPITAL LETTER Y + COMBINING CIRCUMFLEX ACCENT
0079+0302=0177# LATIN SMALL LETTER Y WITH CIRCUMFLEX = LATIN SMALL LETTER Y + COMBINING CIRCUMFLEX ACCENT
0059+0308=0178# LATIN CAPITAL LETTER Y WITH DIAERESIS = LATIN CAPITAL LETTER Y + COMBINING DIAERESIS
005A+0301=0179# LATIN CAPITAL LETTER Z WITH ACUTE = LATIN CAPITAL LETTER Z + COMBINING ACUTE ACCENT
007A+0301=017A# LATIN SMALL LETTER Z WITH ACUTE = LATIN SMALL LETTER Z + COMBINING ACUTE ACCENT
005A+0307=017B# LATIN CAPITAL LETTER Z WITH DOT ABOVE = LATIN CAPITAL LETTER Z + COMBINING DOT ABOVE
007A+0307=017C# LATIN SMALL LETTER Z WITH DOT ABOVE = LATIN SMALL LETTER Z + COMBINING DOT ABOVE
005A+030C=017D# LATIN CAPITAL LETTER Z WITH CARON = LATIN CAPITAL LETTER Z + COMBINING CARON
007A+030C=017E# LATIN SMALL LETTER Z WITH CARON = LATIN SMALL LETTER Z + COMBINING CARON
004F+031B=01A0# LATIN CAPITAL LETTER O WITH HORN = LATIN CAPITAL LETTER O + COMBINING HORN
006F+031B=01A1# LATIN SMALL LETTER O WITH HORN = LATIN SMALL LETTER O + COMBINING HORN
0055+031B=01AF# LATIN CAPITAL LETTER U WITH HORN = LATIN CAPITAL LETTER U + COMBINING HORN
0075+031B=01B0# LATIN SMALL LETTER U WITH HORN = LATIN SMALL LETTER U + COMBINING HORN
01F1+030C=01C4# LATIN CAPITAL LETTER DZ WITH CARON = LATIN CAPITAL LETTER DZ + COMBINING CARON
01F3+030C=01C6# LATIN SMALL LETTER DZ WITH CARON = LATIN SMALL LETTER DZ + COMBINING CARON
0041+030C=01CD# LATIN CAPITAL LETTER A WITH CARON = LATIN CAPITAL LETTER A + COMBINING CARON
0061+030C=01CE# LATIN SMALL LETTER A WITH CARON = LATIN SMALL LETTER A + COMBINING CARON
0049+030C=01CF# LATIN CAPITAL LETTER I WITH CARON = LATIN CAPITAL LETTER I + COMBINING CARON
0069+030C=01D0# LATIN SMALL LETTER I WITH CARON = LATIN SMALL LETTER I + COMBINING CARON
004F+030C=01D1# LATIN CAPITAL LETTER O WITH CARON = LATIN CAPITAL LETTER O + COMBINING CARON
006F+030C=01D2# LATIN SMALL LETTER O WITH CARON = LATIN SMALL LETTER O + COMBINING CARON
0055+030C=01D3# LATIN CAPITAL LETTER U WITH CARON = LATIN CAPITAL LETTER U + COMBINING CARON
0075+030C=01D4# LATIN SMALL LETTER U WITH CARON = LATIN SMALL LETTER U + COMBINING CARON
0055+0308+0304=01D5# LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON = LATIN CAPITAL LETTER U + COMBINING DIAERESIS + COMBINING MACRON
0075+0308+0304=01D6# LATIN SMALL LETTER U WITH DIAERESIS AND MACRON = LATIN SMALL LETTER U + COMBINING DIAERESIS + COMBINING MACRON
0055+0308+0301=01D7# LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE = LATIN CAPITAL LETTER U + COMBINING DIAERESIS + COMBINING ACUTE ACCENT
0075+0308+0301=01D8# LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE = LATIN SMALL LETTER U + COMBINING DIAERESIS + COMBINING ACUTE ACCENT
0055+0308+030C=01D9# LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON = LATIN CAPITAL LETTER U + COMBINING DIAERESIS + COMBINING CARON
0075+0308+030C=01DA# LATIN SMALL LETTER U WITH DIAERESIS AND CARON = LATIN SMALL LETTER U + COMBINING DIAERESIS + COMBINING CARON
0055+0308+0300=01DB# LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE = LATIN CAPITAL LETTER U + COMBINING DIAERESIS + COMBINING GRAVE ACCENT
0075+0308+0300=01DC# LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE = LATIN SMALL LETTER U + COMBINING DIAERESIS + COMBINING GRAVE ACCENT
0041+0308+0304=01DE# LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON = LATIN CAPITAL LETTER A + COMBINING DIAERESIS + COMBINING MACRON
0061+0308+0304=01DF# LATIN SMALL LETTER A WITH DIAERESIS AND MACRON = LATIN SMALL LETTER A + COMBINING DIAERESIS + COMBINING MACRON
0041+0307+0304=01E0# LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON = LATIN CAPITAL LETTER A + COMBINING DOT ABOVE + COMBINING MACRON
0061+0307+0304=01E1# LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON = LATIN SMALL LETTER A + COMBINING DOT ABOVE + COMBINING MACRON
00C6+0304=01E2# LATIN CAPITAL LETTER AE WITH MACRON = LATIN CAPITAL LETTER AE + COMBINING MACRON
00E6+0304=01E3# LATIN SMALL LETTER AE WITH MACRON = LATIN SMALL LETTER AE + COMBINING MACRON
0047+030C=01E6# LATIN CAPITAL LETTER G WITH CARON = LATIN CAPITAL LETTER G + COMBINING CARON
0067+030C=01E7# LATIN SMALL LETTER G WITH CARON = LATIN SMALL LETTER G + COMBINING CARON
004B+030C=01E8# LATIN CAPITAL LETTER K WITH CARON = LATIN CAPITAL LETTER K + COMBINING CARON
006B+030C=01E9# LATIN SMALL LETTER K WITH CARON = LATIN SMALL LETTER K + COMBINING CARON
004F+0328=01EA# LATIN CAPITAL LETTER O WITH OGONEK = LATIN CAPITAL LETTER O + COMBINING OGONEK
006F+0328=01EB# LATIN SMALL LETTER O WITH OGONEK = LATIN SMALL LETTER O + COMBINING OGONEK
004F+0328+0304=01EC# LATIN CAPITAL LETTER O WITH OGONEK AND MACRON = LATIN CAPITAL LETTER O + COMBINING OGONEK + COMBINING MACRON
006F+0328+0304=01ED# LATIN SMALL LETTER O WITH OGONEK AND MACRON = LATIN SMALL LETTER O + COMBINING OGONEK + COMBINING MACRON
01B7+030C=01EE# LATIN CAPITAL LETTER EZH WITH CARON = LATIN CAPITAL LETTER EZH + COMBINING CARON
0292+030C=01EF# LATIN SMALL LETTER EZH WITH CARON = LATIN SMALL LETTER EZH + COMBINING CARON
006A+030C=01F0# LATIN SMALL LETTER J WITH CARON = LATIN SMALL LETTER J + COMBINING CARON
0047+0301=01F4# LATIN CAPITAL LETTER G WITH ACUTE = LATIN CAPITAL LETTER G + COMBINING ACUTE ACCENT
0067+0301=01F5# LATIN SMALL LETTER G WITH ACUTE = LATIN SMALL LETTER G + COMBINING ACUTE ACCENT
0041+030A+0301=01FA# LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE = LATIN CAPITAL LETTER A + COMBINING RING ABOVE + COMBINING ACUTE ACCENT
0061+030A+0301=01FB# LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE = LATIN SMALL LETTER A + COMBINING RING ABOVE + COMBINING ACUTE ACCENT
00C6+0301=01FC# LATIN CAPITAL LETTER AE WITH ACUTE = LATIN CAPITAL LETTER AE + COMBINING ACUTE ACCENT
00E6+0301=01FD# LATIN SMALL LETTER AE WITH ACUTE = LATIN SMALL LETTER AE + COMBINING ACUTE ACCENT
0041+030F=0200# LATIN CAPITAL LETTER A WITH DOUBLE GRAVE = LATIN CAPITAL LETTER A + COMBINING DOUBLE GRAVE ACCENT
0061+030F=0201# LATIN SMALL LETTER A WITH DOUBLE GRAVE = LATIN SMALL LETTER A + COMBINING DOUBLE GRAVE ACCENT
0041+0311=0202# LATIN CAPITAL LETTER A WITH INVERTED BREVE = LATIN CAPITAL LETTER A + COMBINING INVERTED BREVE
0061+0311=0203# LATIN SMALL LETTER A WITH INVERTED BREVE = LATIN SMALL LETTER A + COMBINING INVERTED BREVE
0045+030F=0204# LATIN CAPITAL LETTER E WITH DOUBLE GRAVE = LATIN CAPITAL LETTER E + COMBINING DOUBLE GRAVE ACCENT
0065+030F=0205# LATIN SMALL LETTER E WITH DOUBLE GRAVE = LATIN SMALL LETTER E + COMBINING DOUBLE GRAVE ACCENT
0045+0311=0206# LATIN CAPITAL LETTER E WITH INVERTED BREVE = LATIN CAPITAL LETTER E + COMBINING INVERTED BREVE
0065+0311=0207# LATIN SMALL LETTER E WITH INVERTED BREVE = LATIN SMALL LETTER E + COMBINING INVERTED BREVE
0049+030F=0208# LATIN CAPITAL LETTER I WITH DOUBLE GRAVE = LATIN CAPITAL LETTER I + COMBINING DOUBLE GRAVE ACCENT
0069+030F=0209# LATIN SMALL LETTER I WITH DOUBLE GRAVE = LATIN SMALL LETTER I + COMBINING DOUBLE GRAVE ACCENT
0049+0311=020A# LATIN CAPITAL LETTER I WITH INVERTED BREVE = LATIN CAPITAL LETTER I + COMBINING INVERTED BREVE
0069+0311=020B# LATIN SMALL LETTER I WITH INVERTED BREVE = LATIN SMALL LETTER I + COMBINING INVERTED BREVE
004F+030F=020C# LATIN CAPITAL LETTER O WITH DOUBLE GRAVE = LATIN CAPITAL LETTER O + COMBINING DOUBLE GRAVE ACCENT
006F+030F=020D# LATIN SMALL LETTER O WITH DOUBLE GRAVE = LATIN SMALL LETTER O + COMBINING DOUBLE GRAVE ACCENT
004F+0311=020E# LATIN CAPITAL LETTER O WITH INVERTED BREVE = LATIN CAPITAL LETTER O + COMBINING INVERTED BREVE
006F+0311=020F# LATIN SMALL LETTER O WITH INVERTED BREVE = LATIN SMALL LETTER O + COMBINING INVERTED BREVE
0052+030F=0210# LATIN CAPITAL LETTER R WITH DOUBLE GRAVE = LATIN CAPITAL LETTER R + COMBINING DOUBLE GRAVE ACCENT
0072+030F=0211# LATIN SMALL LETTER R WITH DOUBLE GRAVE = LATIN SMALL LETTER R + COMBINING DOUBLE GRAVE ACCENT
0052+0311=0212# LATIN CAPITAL LETTER R WITH INVERTED BREVE = LATIN CAPITAL LETTER R + COMBINING INVERTED BREVE
0072+0311=0213# LATIN SMALL LETTER R WITH INVERTED BREVE = LATIN SMALL LETTER R + COMBINING INVERTED BREVE
0055+030F=0214# LATIN CAPITAL LETTER U WITH DOUBLE GRAVE = LATIN CAPITAL LETTER U + COMBINING DOUBLE GRAVE ACCENT
0075+030F=0215# LATIN SMALL LETTER U WITH DOUBLE GRAVE = LATIN SMALL LETTER U + COMBINING DOUBLE GRAVE ACCENT
0055+0311=0216# LATIN CAPITAL LETTER U WITH INVERTED BREVE = LATIN CAPITAL LETTER U + COMBINING INVERTED BREVE
0075+0311=0217# LATIN SMALL LETTER U WITH INVERTED BREVE = LATIN SMALL LETTER U + COMBINING INVERTED BREVE
0041+0325=1E00# LATIN CAPITAL LETTER A WITH RING BELOW = LATIN CAPITAL LETTER A + COMBINING RING BELOW
0061+0325=1E01# LATIN SMALL LETTER A WITH RING BELOW = LATIN SMALL LETTER A + COMBINING RING BELOW
0042+0307=1E02# LATIN CAPITAL LETTER B WITH DOT ABOVE = LATIN CAPITAL LETTER B + COMBINING DOT ABOVE
0062+0307=1E03# LATIN SMALL LETTER B WITH DOT ABOVE = LATIN SMALL LETTER B + COMBINING DOT ABOVE
0042+0323=1E04# LATIN CAPITAL LETTER B WITH DOT BELOW = LATIN CAPITAL LETTER B + COMBINING DOT BELOW
0062+0323=1E05# LATIN SMALL LETTER B WITH DOT BELOW = LATIN SMALL LETTER B + COMBINING DOT BELOW
0042+0332=1E06# LATIN CAPITAL LETTER B WITH LINE BELOW = LATIN CAPITAL LETTER B + COMBINING LOW LINE
0062+0332=1E07# LATIN SMALL LETTER B WITH LINE BELOW = LATIN SMALL LETTER B + COMBINING LOW LINE
0043+0327+0301=1E08# LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE = LATIN CAPITAL LETTER C + COMBINING CEDILLA + COMBINING ACUTE ACCENT
0063+0327+0301=1E09# LATIN SMALL LETTER C WITH CEDILLA AND ACUTE = LATIN SMALL LETTER C + COMBINING CEDILLA + COMBINING ACUTE ACCENT
0044+0307=1E0A# LATIN CAPITAL LETTER D WITH DOT ABOVE = LATIN CAPITAL LETTER D + COMBINING DOT ABOVE
0064+0307=1E0B# LATIN SMALL LETTER D WITH DOT ABOVE = LATIN SMALL LETTER D + COMBINING DOT ABOVE
0044+0323=1E0C# LATIN CAPITAL LETTER D WITH DOT BELOW = LATIN CAPITAL LETTER D + COMBINING DOT BELOW
0064+0323=1E0D# LATIN SMALL LETTER D WITH DOT BELOW = LATIN SMALL LETTER D + COMBINING DOT BELOW
0044+0332=1E0E# LATIN CAPITAL LETTER D WITH LINE BELOW = LATIN CAPITAL LETTER D + COMBINING LOW LINE
0064+0332=1E0F# LATIN SMALL LETTER D WITH LINE BELOW = LATIN SMALL LETTER D + COMBINING LOW LINE
0044+0327=1E10# LATIN CAPITAL LETTER D WITH CEDILLA = LATIN CAPITAL LETTER D + COMBINING CEDILLA
0064+0327=1E11# LATIN SMALL LETTER D WITH CEDILLA = LATIN SMALL LETTER D + COMBINING CEDILLA
0044+032D=1E12# LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW = LATIN CAPITAL LETTER D + COMBINING CIRCUMFLEX ACCENT BELOW
0064+032D=1E13# LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW = LATIN SMALL LETTER D + COMBINING CIRCUMFLEX ACCENT BELOW
0045+0304+0300=1E14# LATIN CAPITAL LETTER E WITH MACRON AND GRAVE = LATIN CAPITAL LETTER E + COMBINING MACRON + COMBINING GRAVE ACCENT
0065+0304+0300=1E15# LATIN SMALL LETTER E WITH MACRON AND GRAVE = LATIN SMALL LETTER E + COMBINING MACRON + COMBINING GRAVE ACCENT
0045+0304+0301=1E16# LATIN CAPITAL LETTER E WITH MACRON AND ACUTE = LATIN CAPITAL LETTER E + COMBINING MACRON + COMBINING ACUTE ACCENT
0065+0304+0301=1E17# LATIN SMALL LETTER E WITH MACRON AND ACUTE = LATIN SMALL LETTER E + COMBINING MACRON + COMBINING ACUTE ACCENT
0045+032D=1E18# LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW = LATIN CAPITAL LETTER E + COMBINING CIRCUMFLEX ACCENT BELOW
0065+032D=1E19# LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW = LATIN SMALL LETTER E + COMBINING CIRCUMFLEX ACCENT BELOW
0045+0330=1E1A# LATIN CAPITAL LETTER E WITH TILDE BELOW = LATIN CAPITAL LETTER E + COMBINING TILDE BELOW
0065+0330=1E1B# LATIN SMALL LETTER E WITH TILDE BELOW = LATIN SMALL LETTER E + COMBINING TILDE BELOW
0045+0327+0306=1E1C# LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE = LATIN CAPITAL LETTER E + COMBINING CEDILLA + COMBINING BREVE
0065+0327+0306=1E1D# LATIN SMALL LETTER E WITH CEDILLA AND BREVE = LATIN SMALL LETTER E + COMBINING CEDILLA + COMBINING BREVE
0046+0307=1E1E# LATIN CAPITAL LETTER F WITH DOT ABOVE = LATIN CAPITAL LETTER F + COMBINING DOT ABOVE
0066+0307=1E1F# LATIN SMALL LETTER F WITH DOT ABOVE = LATIN SMALL LETTER F + COMBINING DOT ABOVE
0047+0304=1E20# LATIN CAPITAL LETTER G WITH MACRON = LATIN CAPITAL LETTER G + COMBINING MACRON
0067+0304=1E21# LATIN SMALL LETTER G WITH MACRON = LATIN SMALL LETTER G + COMBINING MACRON
0048+0307=1E22# LATIN CAPITAL LETTER H WITH DOT ABOVE = LATIN CAPITAL LETTER H + COMBINING DOT ABOVE
0068+0307=1E23# LATIN SMALL LETTER H WITH DOT ABOVE = LATIN SMALL LETTER H + COMBINING DOT ABOVE
0048+0323=1E24# LATIN CAPITAL LETTER H WITH DOT BELOW = LATIN CAPITAL LETTER H + COMBINING DOT BELOW
0068+0323=1E25# LATIN SMALL LETTER H WITH DOT BELOW = LATIN SMALL LETTER H + COMBINING DOT BELOW
0048+0308=1E26# LATIN CAPITAL LETTER H WITH DIAERESIS = LATIN CAPITAL LETTER H + COMBINING DIAERESIS
0068+0308=1E27# LATIN SMALL LETTER H WITH DIAERESIS = LATIN SMALL LETTER H + COMBINING DIAERESIS
0048+0327=1E28# LATIN CAPITAL LETTER H WITH CEDILLA = LATIN CAPITAL LETTER H + COMBINING CEDILLA
0068+0327=1E29# LATIN SMALL LETTER H WITH CEDILLA = LATIN SMALL LETTER H + COMBINING CEDILLA
0048+032E=1E2A# LATIN CAPITAL LETTER H WITH BREVE BELOW = LATIN CAPITAL LETTER H + COMBINING BREVE BELOW
0068+032E=1E2B# LATIN SMALL LETTER H WITH BREVE BELOW = LATIN SMALL LETTER H + COMBINING BREVE BELOW
0049+0330=1E2C# LATIN CAPITAL LETTER I WITH TILDE BELOW = LATIN CAPITAL LETTER I + COMBINING TILDE BELOW
0069+0330=1E2D# LATIN SMALL LETTER I WITH TILDE BELOW = LATIN SMALL LETTER I + COMBINING TILDE BELOW
0049+0308+0301=1E2E# LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE = LATIN CAPITAL LETTER I + COMBINING DIAERESIS + COMBINING ACUTE ACCENT
0069+0308+0301=1E2F# LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE = LATIN SMALL LETTER I + COMBINING DIAERESIS + COMBINING ACUTE ACCENT
004B+0301=1E30# LATIN CAPITAL LETTER K WITH ACUTE = LATIN CAPITAL LETTER K + COMBINING ACUTE ACCENT
006B+0301=1E31# LATIN SMALL LETTER K WITH ACUTE = LATIN SMALL LETTER K + COMBINING ACUTE ACCENT
004B+0323=1E32# LATIN CAPITAL LETTER K WITH DOT BELOW = LATIN CAPITAL LETTER K + COMBINING DOT BELOW
006B+0323=1E33# LATIN SMALL LETTER K WITH DOT BELOW = LATIN SMALL LETTER K + COMBINING DOT BELOW
004B+0332=1E34# LATIN CAPITAL LETTER K WITH LINE BELOW = LATIN CAPITAL LETTER K + COMBINING LOW LINE
006B+0332=1E35# LATIN SMALL LETTER K WITH LINE BELOW = LATIN SMALL LETTER K + COMBINING LOW LINE
004C+0323=1E36# LATIN CAPITAL LETTER L WITH DOT BELOW = LATIN CAPITAL LETTER L + COMBINING DOT BELOW
006C+0323=1E37# LATIN SMALL LETTER L WITH DOT BELOW = LATIN SMALL LETTER L + COMBINING DOT BELOW
004C+0323+0304=1E38# LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON = LATIN CAPITAL LETTER L + COMBINING DOT BELOW + COMBINING MACRON
006C+0323+0304=1E39# LATIN SMALL LETTER L WITH DOT BELOW AND MACRON = LATIN SMALL LETTER L + COMBINING DOT BELOW + COMBINING MACRON
004C+0332=1E3A# LATIN CAPITAL LETTER L WITH LINE BELOW = LATIN CAPITAL LETTER L + COMBINING LOW LINE
006C+0332=1E3B# LATIN SMALL LETTER L WITH LINE BELOW = LATIN SMALL LETTER L + COMBINING LOW LINE
004C+032D=1E3C# LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW = LATIN CAPITAL LETTER L + COMBINING CIRCUMFLEX ACCENT BELOW
006C+032D=1E3D# LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW = LATIN SMALL LETTER L + COMBINING CIRCUMFLEX ACCENT BELOW
004D+0301=1E3E# LATIN CAPITAL LETTER M WITH ACUTE = LATIN CAPITAL LETTER M + COMBINING ACUTE ACCENT
006D+0301=1E3F# LATIN SMALL LETTER M WITH ACUTE = LATIN SMALL LETTER M + COMBINING ACUTE ACCENT
004D+0307=1E40# LATIN CAPITAL LETTER M WITH DOT ABOVE = LATIN CAPITAL LETTER M + COMBINING DOT ABOVE
006D+0307=1E41# LATIN SMALL LETTER M WITH DOT ABOVE = LATIN SMALL LETTER M + COMBINING DOT ABOVE
004D+0323=1E42# LATIN CAPITAL LETTER M WITH DOT BELOW = LATIN CAPITAL LETTER M + COMBINING DOT BELOW
006D+0323=1E43# LATIN SMALL LETTER M WITH DOT BELOW = LATIN SMALL LETTER M + COMBINING DOT BELOW
004E+0307=1E44# LATIN CAPITAL LETTER N WITH DOT ABOVE = LATIN CAPITAL LETTER N + COMBINING DOT ABOVE
006E+0307=1E45# LATIN SMALL LETTER N WITH DOT ABOVE = LATIN SMALL LETTER N + COMBINING DOT ABOVE
004E+0323=1E46# LATIN CAPITAL LETTER N WITH DOT BELOW = LATIN CAPITAL LETTER N + COMBINING DOT BELOW
006E+0323=1E47# LATIN SMALL LETTER N WITH DOT BELOW = LATIN SMALL LETTER N + COMBINING DOT BELOW
004E+0332=1E48# LATIN CAPITAL LETTER N WITH LINE BELOW = LATIN CAPITAL LETTER N + COMBINING LOW LINE
006E+0332=1E49# LATIN SMALL LETTER N WITH LINE BELOW = LATIN SMALL LETTER N + COMBINING LOW LINE
004E+032D=1E4A# LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW = LATIN CAPITAL LETTER N + COMBINING CIRCUMFLEX ACCENT BELOW
006E+032D=1E4B# LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW = LATIN SMALL LETTER N + COMBINING CIRCUMFLEX ACCENT BELOW
004F+0303+0301=1E4C# LATIN CAPITAL LETTER O WITH TILDE AND ACUTE = LATIN CAPITAL LETTER O + COMBINING TILDE + COMBINING ACUTE ACCENT
006F+0303+0301=1E4D# LATIN SMALL LETTER O WITH TILDE AND ACUTE = LATIN SMALL LETTER O + COMBINING TILDE + COMBINING ACUTE ACCENT
004F+0303+0308=1E4E# LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS = LATIN CAPITAL LETTER O + COMBINING TILDE + COMBINING DIAERESIS
006F+0303+0308=1E4F# LATIN SMALL LETTER O WITH TILDE AND DIAERESIS = LATIN SMALL LETTER O + COMBINING TILDE + COMBINING DIAERESIS
004F+0304+0300=1E50# LATIN CAPITAL LETTER O WITH MACRON AND GRAVE = LATIN CAPITAL LETTER O + COMBINING MACRON + COMBINING GRAVE ACCENT
006F+0304+0300=1E51# LATIN SMALL LETTER O WITH MACRON AND GRAVE = LATIN SMALL LETTER O + COMBINING MACRON + COMBINING GRAVE ACCENT
004F+0304+0301=1E52# LATIN CAPITAL LETTER O WITH MACRON AND ACUTE = LATIN CAPITAL LETTER O + COMBINING MACRON + COMBINING ACUTE ACCENT
006F+0304+0301=1E53# LATIN SMALL LETTER O WITH MACRON AND ACUTE = LATIN SMALL LETTER O + COMBINING MACRON + COMBINING ACUTE ACCENT
0050+0301=1E54# LATIN CAPITAL LETTER P WITH ACUTE = LATIN CAPITAL LETTER P + COMBINING ACUTE ACCENT
0070+0301=1E55# LATIN SMALL LETTER P WITH ACUTE = LATIN SMALL LETTER P + COMBINING ACUTE ACCENT
0050+0307=1E56# LATIN CAPITAL LETTER P WITH DOT ABOVE = LATIN CAPITAL LETTER P + COMBINING DOT ABOVE
0070+0307=1E57# LATIN SMALL LETTER P WITH DOT ABOVE = LATIN SMALL LETTER P + COMBINING DOT ABOVE
0052+0307=1E58# LATIN CAPITAL LETTER R WITH DOT ABOVE = LATIN CAPITAL LETTER R + COMBINING DOT ABOVE
0072+0307=1E59# LATIN SMALL LETTER R WITH DOT ABOVE = LATIN SMALL LETTER R + COMBINING DOT ABOVE
0052+0323=1E5A# LATIN CAPITAL LETTER R WITH DOT BELOW = LATIN CAPITAL LETTER R + COMBINING DOT BELOW
0072+0323=1E5B# LATIN SMALL LETTER R WITH DOT BELOW = LATIN SMALL LETTER R + COMBINING DOT BELOW
0052+0323+0304=1E5C# LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON = LATIN CAPITAL LETTER R + COMBINING DOT BELOW + COMBINING MACRON
0072+0323+0304=1E5D# LATIN SMALL LETTER R WITH DOT BELOW AND MACRON = LATIN SMALL LETTER R + COMBINING DOT BELOW + COMBINING MACRON
0052+0332=1E5E# LATIN CAPITAL LETTER R WITH LINE BELOW = LATIN CAPITAL LETTER R + COMBINING LOW LINE
0072+0332=1E5F# LATIN SMALL LETTER R WITH LINE BELOW = LATIN SMALL LETTER R + COMBINING LOW LINE
0053+0307=1E60# LATIN CAPITAL LETTER S WITH DOT ABOVE = LATIN CAPITAL LETTER S + COMBINING DOT ABOVE
0073+0307=1E61# LATIN SMALL LETTER S WITH DOT ABOVE = LATIN SMALL LETTER S + COMBINING DOT ABOVE
0053+0323=1E62# LATIN CAPITAL LETTER S WITH DOT BELOW = LATIN CAPITAL LETTER S + COMBINING DOT BELOW
0073+0323=1E63# LATIN SMALL LETTER S WITH DOT BELOW = LATIN SMALL LETTER S + COMBINING DOT BELOW
0053+0301+0307=1E64# LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE = LATIN CAPITAL LETTER S + COMBINING ACUTE ACCENT + COMBINING DOT ABOVE
0073+0301+0307=1E65# LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE = LATIN SMALL LETTER S + COMBINING ACUTE ACCENT + COMBINING DOT ABOVE
0053+030C+0307=1E66# LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE = LATIN CAPITAL LETTER S + COMBINING CARON + COMBINING DOT ABOVE
0073+030C+0307=1E67# LATIN SMALL LETTER S WITH CARON AND DOT ABOVE = LATIN SMALL LETTER S + COMBINING CARON + COMBINING DOT ABOVE
0053+0323+0307=1E68# LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE = LATIN CAPITAL LETTER S + COMBINING DOT BELOW + COMBINING DOT ABOVE
0073+0323+0307=1E69# LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE = LATIN SMALL LETTER S + COMBINING DOT BELOW + COMBINING DOT ABOVE
0054+0307=1E6A# LATIN CAPITAL LETTER T WITH DOT ABOVE = LATIN CAPITAL LETTER T + COMBINING DOT ABOVE
0074+0307=1E6B# LATIN SMALL LETTER T WITH DOT ABOVE = LATIN SMALL LETTER T + COMBINING DOT ABOVE
0054+0323=1E6C# LATIN CAPITAL LETTER T WITH DOT BELOW = LATIN CAPITAL LETTER T + COMBINING DOT BELOW
0074+0323=1E6D# LATIN SMALL LETTER T WITH DOT BELOW = LATIN SMALL LETTER T + COMBINING DOT BELOW
0054+0332=1E6E# LATIN CAPITAL LETTER T WITH LINE BELOW = LATIN CAPITAL LETTER T + COMBINING LOW LINE
0074+0332=1E6F# LATIN SMALL LETTER T WITH LINE BELOW = LATIN SMALL LETTER T + COMBINING LOW LINE
0054+032D=1E70# LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW = LATIN CAPITAL LETTER T + COMBINING CIRCUMFLEX ACCENT BELOW
0074+032D=1E71# LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW = LATIN SMALL LETTER T + COMBINING CIRCUMFLEX ACCENT BELOW
0055+0324=1E72# LATIN CAPITAL LETTER U WITH DIAERESIS BELOW = LATIN CAPITAL LETTER U + COMBINING DIAERESIS BELOW
0075+0324=1E73# LATIN SMALL LETTER U WITH DIAERESIS BELOW = LATIN SMALL LETTER U + COMBINING DIAERESIS BELOW
0055+0330=1E74# LATIN CAPITAL LETTER U WITH TILDE BELOW = LATIN CAPITAL LETTER U + COMBINING TILDE BELOW
0075+0330=1E75# LATIN SMALL LETTER U WITH TILDE BELOW = LATIN SMALL LETTER U + COMBINING TILDE BELOW
0055+032D=1E76# LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW = LATIN CAPITAL LETTER U + COMBINING CIRCUMFLEX ACCENT BELOW
0075+032D=1E77# LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW = LATIN SMALL LETTER U + COMBINING CIRCUMFLEX ACCENT BELOW
0055+0303+0301=1E78# LATIN CAPITAL LETTER U WITH TILDE AND ACUTE = LATIN CAPITAL LETTER U + COMBINING TILDE + COMBINING ACUTE ACCENT
0075+0303+0301=1E79# LATIN SMALL LETTER U WITH TILDE AND ACUTE = LATIN SMALL LETTER U + COMBINING TILDE + COMBINING ACUTE ACCENT
0055+0304+0308=1E7A# LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS = LATIN CAPITAL LETTER U + COMBINING MACRON + COMBINING DIAERESIS
0075+0304+0308=1E7B# LATIN SMALL LETTER U WITH MACRON AND DIAERESIS = LATIN SMALL LETTER U + COMBINING MACRON + COMBINING DIAERESIS
0056+0303=1E7C# LATIN CAPITAL LETTER V WITH TILDE = LATIN CAPITAL LETTER V + COMBINING TILDE
0076+0303=1E7D# LATIN SMALL LETTER V WITH TILDE = LATIN SMALL LETTER V + COMBINING TILDE
0056+0323=1E7E# LATIN CAPITAL LETTER V WITH DOT BELOW = LATIN CAPITAL LETTER V + COMBINING DOT BELOW
0076+0323=1E7F# LATIN SMALL LETTER V WITH DOT BELOW = LATIN SMALL LETTER V + COMBINING DOT BELOW
0057+0300=1E80# LATIN CAPITAL LETTER W WITH GRAVE = LATIN CAPITAL LETTER W + COMBINING GRAVE ACCENT
0077+0300=1E81# LATIN SMALL LETTER W WITH GRAVE = LATIN SMALL LETTER W + COMBINING GRAVE ACCENT
0057+0301=1E82# LATIN CAPITAL LETTER W WITH ACUTE = LATIN CAPITAL LETTER W + COMBINING ACUTE ACCENT
0077+0301=1E83# LATIN SMALL LETTER W WITH ACUTE = LATIN SMALL LETTER W + COMBINING ACUTE ACCENT
0057+0308=1E84# LATIN CAPITAL LETTER W WITH DIAERESIS = LATIN CAPITAL LETTER W + COMBINING DIAERESIS
0077+0308=1E85# LATIN SMALL LETTER W WITH DIAERESIS = LATIN SMALL LETTER W + COMBINING DIAERESIS
0057+0307=1E86# LATIN CAPITAL LETTER W WITH DOT ABOVE = LATIN CAPITAL LETTER W + COMBINING DOT ABOVE
0077+0307=1E87# LATIN SMALL LETTER W WITH DOT ABOVE = LATIN SMALL LETTER W + COMBINING DOT ABOVE
0057+0323=1E88# LATIN CAPITAL LETTER W WITH DOT BELOW = LATIN CAPITAL LETTER W + COMBINING DOT BELOW
0077+0323=1E89# LATIN SMALL LETTER W WITH DOT BELOW = LATIN SMALL LETTER W + COMBINING DOT BELOW
0058+0307=1E8A# LATIN CAPITAL LETTER X WITH DOT ABOVE = LATIN CAPITAL LETTER X + COMBINING DOT ABOVE
0078+0307=1E8B# LATIN SMALL LETTER X WITH DOT ABOVE = LATIN SMALL LETTER X + COMBINING DOT ABOVE
0058+0308=1E8C# LATIN CAPITAL LETTER X WITH DIAERESIS = LATIN CAPITAL LETTER X + COMBINING DIAERESIS
0078+0308=1E8D# LATIN SMALL LETTER X WITH DIAERESIS = LATIN SMALL LETTER X + COMBINING DIAERESIS
0059+0307=1E8E# LATIN CAPITAL LETTER Y WITH DOT ABOVE = LATIN CAPITAL LETTER Y + COMBINING DOT ABOVE
0079+0307=1E8F# LATIN SMALL LETTER Y WITH DOT ABOVE = LATIN SMALL LETTER Y + COMBINING DOT ABOVE
005A+0302=1E90# LATIN CAPITAL LETTER Z WITH CIRCUMFLEX = LATIN CAPITAL LETTER Z + COMBINING CIRCUMFLEX ACCENT
007A+0302=1E91# LATIN SMALL LETTER Z WITH CIRCUMFLEX = LATIN SMALL LETTER Z + COMBINING CIRCUMFLEX ACCENT
005A+0323=1E92# LATIN CAPITAL LETTER Z WITH DOT BELOW = LATIN CAPITAL LETTER Z + COMBINING DOT BELOW
007A+0323=1E93# LATIN SMALL LETTER Z WITH DOT BELOW = LATIN SMALL LETTER Z + COMBINING DOT BELOW
005A+0332=1E94# LATIN CAPITAL LETTER Z WITH LINE BELOW = LATIN CAPITAL LETTER Z + COMBINING LOW LINE
007A+0332=1E95# LATIN SMALL LETTER Z WITH LINE BELOW = LATIN SMALL LETTER Z + COMBINING LOW LINE
0068+0332=1E96# LATIN SMALL LETTER H WITH LINE BELOW = LATIN SMALL LETTER H + COMBINING LOW LINE
0074+0308=1E97# LATIN SMALL LETTER T WITH DIAERESIS = LATIN SMALL LETTER T + COMBINING DIAERESIS
0077+030A=1E98# LATIN SMALL LETTER W WITH RING ABOVE = LATIN SMALL LETTER W + COMBINING RING ABOVE
0079+030A=1E99# LATIN SMALL LETTER Y WITH RING ABOVE = LATIN SMALL LETTER Y + COMBINING RING ABOVE
017F+0307=1E9B# LATIN SMALL LETTER LONG S WITH DOT ABOVE = LATIN SMALL LETTER LONG S + COMBINING DOT ABOVE
0041+0323=1EA0# LATIN CAPITAL LETTER A WITH DOT BELOW = LATIN CAPITAL LETTER A + COMBINING DOT BELOW
0061+0323=1EA1# LATIN SMALL LETTER A WITH DOT BELOW = LATIN SMALL LETTER A + COMBINING DOT BELOW
0041+0309=1EA2# LATIN CAPITAL LETTER A WITH HOOK ABOVE = LATIN CAPITAL LETTER A + COMBINING HOOK ABOVE
0061+0309=1EA3# LATIN SMALL LETTER A WITH HOOK ABOVE = LATIN SMALL LETTER A + COMBINING HOOK ABOVE
0041+0302+0301=1EA4# LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE = LATIN CAPITAL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING ACUTE ACCENT
0061+0302+0301=1EA5# LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE = LATIN SMALL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING ACUTE ACCENT
0041+0302+0300=1EA6# LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE = LATIN CAPITAL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING GRAVE ACCENT
0061+0302+0300=1EA7# LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE = LATIN SMALL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING GRAVE ACCENT
0041+0302+0309=1EA8# LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE = LATIN CAPITAL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING HOOK ABOVE
0061+0302+0309=1EA9# LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE = LATIN SMALL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING HOOK ABOVE
0041+0302+0303=1EAA# LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE = LATIN CAPITAL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING TILDE
0061+0302+0303=1EAB# LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE = LATIN SMALL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING TILDE
0041+0302+0323=1EAC# LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW = LATIN CAPITAL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING DOT BELOW
0061+0302+0323=1EAD# LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW = LATIN SMALL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING DOT BELOW
0041+0306+0301=1EAE# LATIN CAPITAL LETTER A WITH BREVE AND ACUTE = LATIN CAPITAL LETTER A + COMBINING BREVE + COMBINING ACUTE ACCENT
0061+0306+0301=1EAF# LATIN SMALL LETTER A WITH BREVE AND ACUTE = LATIN SMALL LETTER A + COMBINING BREVE + COMBINING ACUTE ACCENT
0041+0306+0300=1EB0# LATIN CAPITAL LETTER A WITH BREVE AND GRAVE = LATIN CAPITAL LETTER A + COMBINING BREVE + COMBINING GRAVE ACCENT
0061+0306+0300=1EB1# LATIN SMALL LETTER A WITH BREVE AND GRAVE = LATIN SMALL LETTER A + COMBINING BREVE + COMBINING GRAVE ACCENT
0041+0306+0309=1EB2# LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE = LATIN CAPITAL LETTER A + COMBINING BREVE + COMBINING HOOK ABOVE
0061+0306+0309=1EB3# LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE = LATIN SMALL LETTER A + COMBINING BREVE + COMBINING HOOK ABOVE
0041+0306+0303=1EB4# LATIN CAPITAL LETTER A WITH BREVE AND TILDE = LATIN CAPITAL LETTER A + COMBINING BREVE + COMBINING TILDE
0061+0306+0303=1EB5# LATIN SMALL LETTER A WITH BREVE AND TILDE = LATIN SMALL LETTER A + COMBINING BREVE + COMBINING TILDE
0041+0306+0323=1EB6# LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW = LATIN CAPITAL LETTER A + COMBINING BREVE + COMBINING DOT BELOW
0061+0306+0323=1EB7# LATIN SMALL LETTER A WITH BREVE AND DOT BELOW = LATIN SMALL LETTER A + COMBINING BREVE + COMBINING DOT BELOW
0045+0323=1EB8# LATIN CAPITAL LETTER E WITH DOT BELOW = LATIN CAPITAL LETTER E + COMBINING DOT BELOW
0065+0323=1EB9# LATIN SMALL LETTER E WITH DOT BELOW = LATIN SMALL LETTER E + COMBINING DOT BELOW
0045+0309=1EBA# LATIN CAPITAL LETTER E WITH HOOK ABOVE = LATIN CAPITAL LETTER E + COMBINING HOOK ABOVE
0065+0309=1EBB# LATIN SMALL LETTER E WITH HOOK ABOVE = LATIN SMALL LETTER E + COMBINING HOOK ABOVE
0045+0303=1EBC# LATIN CAPITAL LETTER E WITH TILDE = LATIN CAPITAL LETTER E + COMBINING TILDE
0065+0303=1EBD# LATIN SMALL LETTER E WITH TILDE = LATIN SMALL LETTER E + COMBINING TILDE
0045+0302+0301=1EBE# LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE = LATIN CAPITAL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING ACUTE ACCENT
0065+0302+0301=1EBF# LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE = LATIN SMALL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING ACUTE ACCENT
0045+0302+0300=1EC0# LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE = LATIN CAPITAL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING GRAVE ACCENT
0065+0302+0300=1EC1# LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE = LATIN SMALL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING GRAVE ACCENT
0045+0302+0309=1EC2# LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE = LATIN CAPITAL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING HOOK ABOVE
0065+0302+0309=1EC3# LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE = LATIN SMALL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING HOOK ABOVE
0045+0302+0303=1EC4# LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE = LATIN CAPITAL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING TILDE
0065+0302+0303=1EC5# LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE = LATIN SMALL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING TILDE
0045+0302+0323=1EC6# LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW = LATIN CAPITAL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING DOT BELOW
0065+0302+0323=1EC7# LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW = LATIN SMALL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING DOT BELOW
0049+0309=1EC8# LATIN CAPITAL LETTER I WITH HOOK ABOVE = LATIN CAPITAL LETTER I + COMBINING HOOK ABOVE
0069+0309=1EC9# LATIN SMALL LETTER I WITH HOOK ABOVE = LATIN SMALL LETTER I + COMBINING HOOK ABOVE
0049+0323=1ECA# LATIN CAPITAL LETTER I WITH DOT BELOW = LATIN CAPITAL LETTER I + COMBINING DOT BELOW
0069+0323=1ECB# LATIN SMALL LETTER I WITH DOT BELOW = LATIN SMALL LETTER I + COMBINING DOT BELOW
004F+0323=1ECC# LATIN CAPITAL LETTER O WITH DOT BELOW = LATIN CAPITAL LETTER O + COMBINING DOT BELOW
006F+0323=1ECD# LATIN SMALL LETTER O WITH DOT BELOW = LATIN SMALL LETTER O + COMBINING DOT BELOW
004F+0309=1ECE# LATIN CAPITAL LETTER O WITH HOOK ABOVE = LATIN CAPITAL LETTER O + COMBINING HOOK ABOVE
006F+0309=1ECF# LATIN SMALL LETTER O WITH HOOK ABOVE = LATIN SMALL LETTER O + COMBINING HOOK ABOVE
004F+0302+0301=1ED0# LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE = LATIN CAPITAL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING ACUTE ACCENT
006F+0302+0301=1ED1# LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE = LATIN SMALL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING ACUTE ACCENT
004F+0302+0300=1ED2# LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE = LATIN CAPITAL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING GRAVE ACCENT
006F+0302+0300=1ED3# LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE = LATIN SMALL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING GRAVE ACCENT
004F+0302+0309=1ED4# LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE = LATIN CAPITAL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING HOOK ABOVE
006F+0302+0309=1ED5# LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE = LATIN SMALL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING HOOK ABOVE
004F+0302+0303=1ED6# LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE = LATIN CAPITAL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING TILDE
006F+0302+0303=1ED7# LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE = LATIN SMALL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING TILDE
004F+0302+0323=1ED8# LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW = LATIN CAPITAL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING DOT BELOW
006F+0302+0323=1ED9# LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW = LATIN SMALL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING DOT BELOW
004F+031B+0301=1EDA# LATIN CAPITAL LETTER O WITH HORN AND ACUTE = LATIN CAPITAL LETTER O + COMBINING HORN + COMBINING ACUTE ACCENT
006F+031B+0301=1EDB# LATIN SMALL LETTER O WITH HORN AND ACUTE = LATIN SMALL LETTER O + COMBINING HORN + COMBINING ACUTE ACCENT
004F+031B+0300=1EDC# LATIN CAPITAL LETTER O WITH HORN AND GRAVE = LATIN CAPITAL LETTER O + COMBINING HORN + COMBINING GRAVE ACCENT
006F+031B+0300=1EDD# LATIN SMALL LETTER O WITH HORN AND GRAVE = LATIN SMALL LETTER O + COMBINING HORN + COMBINING GRAVE ACCENT
004F+031B+0309=1EDE# LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE = LATIN CAPITAL LETTER O + COMBINING HORN + COMBINING HOOK ABOVE
006F+031B+0309=1EDF# LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE = LATIN SMALL LETTER O + COMBINING HORN + COMBINING HOOK ABOVE
004F+031B+0303=1EE0# LATIN CAPITAL LETTER O WITH HORN AND TILDE = LATIN CAPITAL LETTER O + COMBINING HORN + COMBINING TILDE
006F+031B+0303=1EE1# LATIN SMALL LETTER O WITH HORN AND TILDE = LATIN SMALL LETTER O + COMBINING HORN + COMBINING TILDE
004F+031B+0323=1EE2# LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW = LATIN CAPITAL LETTER O + COMBINING HORN + COMBINING DOT BELOW
006F+031B+0323=1EE3# LATIN SMALL LETTER O WITH HORN AND DOT BELOW = LATIN SMALL LETTER O + COMBINING HORN + COMBINING DOT BELOW
0055+0323=1EE4# LATIN CAPITAL LETTER U WITH DOT BELOW = LATIN CAPITAL LETTER U + COMBINING DOT BELOW
0075+0323=1EE5# LATIN SMALL LETTER U WITH DOT BELOW = LATIN SMALL LETTER U + COMBINING DOT BELOW
0055+0309=1EE6# LATIN CAPITAL LETTER U WITH HOOK ABOVE = LATIN CAPITAL LETTER U + COMBINING HOOK ABOVE
0075+0309=1EE7# LATIN SMALL LETTER U WITH HOOK ABOVE = LATIN SMALL LETTER U + COMBINING HOOK ABOVE
0055+031B+0301=1EE8# LATIN CAPITAL LETTER U WITH HORN AND ACUTE = LATIN CAPITAL LETTER U + COMBINING HORN + COMBINING ACUTE ACCENT
0075+031B+0301=1EE9# LATIN SMALL LETTER U WITH HORN AND ACUTE = LATIN SMALL LETTER U + COMBINING HORN + COMBINING ACUTE ACCENT
0055+031B+0300=1EEA# LATIN CAPITAL LETTER U WITH HORN AND GRAVE = LATIN CAPITAL LETTER U + COMBINING HORN + COMBINING GRAVE ACCENT
0075+031B+0300=1EEB# LATIN SMALL LETTER U WITH HORN AND GRAVE = LATIN SMALL LETTER U + COMBINING HORN + COMBINING GRAVE ACCENT
0055+031B+0309=1EEC# LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE = LATIN CAPITAL LETTER U + COMBINING HORN + COMBINING HOOK ABOVE
0075+031B+0309=1EED# LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE = LATIN SMALL LETTER U + COMBINING HORN + COMBINING HOOK ABOVE
0055+031B+0303=1EEE# LATIN CAPITAL LETTER U WITH HORN AND TILDE = LATIN CAPITAL LETTER U + COMBINING HORN + COMBINING TILDE
0075+031B+0303=1EEF# LATIN SMALL LETTER U WITH HORN AND TILDE = LATIN SMALL LETTER U + COMBINING HORN + COMBINING TILDE
0055+031B+0323=1EF0# LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW = LATIN CAPITAL LETTER U + COMBINING HORN + COMBINING DOT BELOW
0075+031B+0323=1EF1# LATIN SMALL LETTER U WITH HORN AND DOT BELOW = LATIN SMALL LETTER U + COMBINING HORN + COMBINING DOT BELOW
0059+0300=1EF2# LATIN CAPITAL LETTER Y WITH GRAVE = LATIN CAPITAL LETTER Y + COMBINING GRAVE ACCENT
0079+0300=1EF3# LATIN SMALL LETTER Y WITH GRAVE = LATIN SMALL LETTER Y + COMBINING GRAVE ACCENT
0059+0323=1EF4# LATIN CAPITAL LETTER Y WITH DOT BELOW = LATIN CAPITAL LETTER Y + COMBINING DOT BELOW
0079+0323=1EF5# LATIN SMALL LETTER Y WITH DOT BELOW = LATIN SMALL LETTER Y + COMBINING DOT BELOW
0059+0309=1EF6# LATIN CAPITAL LETTER Y WITH HOOK ABOVE = LATIN CAPITAL LETTER Y + COMBINING HOOK ABOVE
0079+0309=1EF7# LATIN SMALL LETTER Y WITH HOOK ABOVE = LATIN SMALL LETTER Y + COMBINING HOOK ABOVE
0059+0303=1EF8# LATIN CAPITAL LETTER Y WITH TILDE = LATIN CAPITAL LETTER Y + COMBINING TILDE
0079+0303=1EF9# LATIN SMALL LETTER Y WITH TILDE = LATIN SMALL LETTER Y + COMBINING TILDE

View file

@ -0,0 +1,73 @@
/*
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2016 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*/
package org.xbib.charset;
import org.junit.Assert;
import org.junit.Test;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.text.Normalizer;
/**
*
*/
public class AnselCharsetTest extends Assert {
@Test
public void testAnsel() throws Exception {
ByteBuffer buf = ByteBuffer.wrap("\u00e8\u0075".getBytes("ISO-8859-1"));
Charset charset = Charset.forName("ANSEL");
CharsetDecoder decoder = charset.newDecoder();
CharBuffer cbuf = decoder.decode(buf);
String s = cbuf.toString();
assertEquals("\u0075\u0308", s);
s = Normalizer.normalize(s, Normalizer.Form.NFC);
assertEquals("ü", s);
}
@Test
public void testAnsel2() throws Exception {
ByteBuffer buf = ByteBuffer.wrap("\u00AC\u00E2\u0041\u00ED\u0042\u00E2\u0043\u00E2\u0044".getBytes("ISO-8859-1"));
Charset charset = Charset.forName("ANSEL");
CharsetDecoder decoder = charset.newDecoder();
CharBuffer cbuf = decoder.decode(buf);
String s = cbuf.toString();
assertEquals(9, s.length());
s = Normalizer.normalize(s, Normalizer.Form.NFC);
assertEquals("ƠÁB̕ĆD́", s);
assertEquals(7, s.length());
}
}

View file

@ -0,0 +1,56 @@
/*
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2016 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*/
package org.xbib.charset;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.junit.Test;
import javax.xml.stream.XMLStreamException;
/**
*
*/
public class AnselCodeTableParserTest {
private Logger logger = LogManager.getLogger(AnselCodeTableParserTest.class);
@Test
public void test() throws XMLStreamException {
AnselCodeTableParser anselCodeTableParser = new AnselCodeTableParser(getClass().getResourceAsStream("codetables.xml"));
for (AnselCodeTableParser.CodeTable codeTable : anselCodeTableParser.getCodeTables()) {
for (AnselCodeTableParser.CharacterSet characterSet : codeTable.getCharacterSets()) {
logger.info("{} {}", characterSet.getName(), characterSet.getLength());
}
}
}
}

View file

@ -0,0 +1,52 @@
/*
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2016 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*/
package org.xbib.charset;
import org.junit.Test;
import java.nio.charset.Charset;
/**
*
*/
public class BibliographicCharsetsTest {
@Test
public void testAvailability() {
Charset charset = BibliographicCharsets.ANSEL;
charset = BibliographicCharsets.ISO5426;
charset = BibliographicCharsets.ISO5428;
charset = BibliographicCharsets.MAB;
charset = BibliographicCharsets.MAB_DISKETTE;
charset = BibliographicCharsets.PICA;
}
}

View file

@ -0,0 +1,85 @@
/*
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2016 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*/
package org.xbib.charset;
import org.junit.Assert;
import org.junit.Test;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.StandardCharsets;
import java.util.SortedMap;
/**
*
*/
public class ISO5426Test extends Assert {
@Test
public void listCharsets() throws Exception {
SortedMap<String, Charset> map = Charset.availableCharsets();
assertTrue(map.keySet().contains("ISO-5426"));
}
@Test
public void testMAB2() throws Exception {
ByteBuffer buf = ByteBuffer.wrap("Éa".getBytes(StandardCharsets.ISO_8859_1));
Charset charset = Charset.forName("MAB2");
CharsetDecoder decoder = charset.newDecoder();
CharBuffer cbuf = decoder.decode(buf);
String output = cbuf.toString();
assertEquals(output, "ä");
}
@Test
public void testXMAB() throws Exception {
ByteBuffer buf = ByteBuffer.wrap("Éa".getBytes(StandardCharsets.ISO_8859_1));
Charset charset = Charset.forName("x-MAB");
CharsetDecoder decoder = charset.newDecoder();
CharBuffer cbuf = decoder.decode(buf);
String output = cbuf.toString();
assertEquals(output, "ä");
}
@Test
public void testPound() throws Exception {
ByteBuffer buf = ByteBuffer.wrap("\u00A3".getBytes(StandardCharsets.ISO_8859_1));
Charset charset = Charset.forName("x-MAB");
CharsetDecoder decoder = charset.newDecoder();
CharBuffer cbuf = decoder.decode(buf);
String output = cbuf.toString();
assertEquals(output, "£");
}
}

View file

@ -0,0 +1,60 @@
/*
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2016 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*/
package org.xbib.charset;
import org.junit.Assert;
import org.junit.Test;
import java.text.Normalizer;
/**
*
*/
public class NormalizerTest extends Assert {
@Test
public void testNormalizer() throws Exception {
byte[] b = new byte[]{(byte) 103, (byte) 101, (byte) 109, (byte) 97, (byte) 204, (byte) 136, (byte) 195, (byte) 159};
String input = new String(b, "UTF-8");
String norm = Normalizer.normalize(input, Normalizer.Form.NFC);
assertEquals("gemäß", norm);
}
@Test
public void tesNFC() {
String s = "Für Bandanzeige bitte zugehörige Publikationen anklicken";
assertEquals(56, s.length());
String norm = Normalizer.normalize(s, Normalizer.Form.NFC);
assertEquals(56, norm.length());
}
}

View file

@ -0,0 +1,64 @@
/*
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2016 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*/
package org.xbib.charset;
import org.junit.Assert;
import org.junit.Test;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.SortedMap;
/**
*
*/
public class SimpleAnselCharsetTest extends Assert {
@Test
public void listCharsets() throws Exception {
SortedMap<String, Charset> map = Charset.availableCharsets();
assertTrue(map.keySet().contains("SIMPLE_ANSEL"));
}
@Test
public void testSimpleAnsel() throws Exception {
ByteBuffer buf = ByteBuffer.wrap("\u00e8\u0075".getBytes("ISO-8859-1"));
Charset charset = Charset.forName("SIMPLE_ANSEL");
CharsetDecoder decoder = charset.newDecoder();
CharBuffer cbuf = decoder.decode(buf);
String output = cbuf.toString();
assertEquals("\u00fc", output);
}
}

View file

@ -0,0 +1,4 @@
/**
* Bibliographic character set implementations.
*/
package org.xbib.charset;

View file

@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<configuration status="OFF">
<appenders>
<Console name="Console" target="SYSTEM_OUT">
<PatternLayout pattern="[%d{ABSOLUTE}][%-5p][%-25c][%t] %m%n"/>
</Console>
</appenders>
<Loggers>
<Root level="info">
<AppenderRef ref="Console" />
</Root>
</Loggers>
</configuration>