initial import
This commit is contained in:
commit
931e2b9cda
41 changed files with 104466 additions and 0 deletions
15
.gitignore
vendored
Normal file
15
.gitignore
vendored
Normal file
|
@ -0,0 +1,15 @@
|
|||
/data
|
||||
/work
|
||||
/logs
|
||||
/.idea
|
||||
/target
|
||||
.DS_Store
|
||||
*.iml
|
||||
/.settings
|
||||
/.classpath
|
||||
/.project
|
||||
/.gradle
|
||||
/build
|
||||
/plugins
|
||||
/sessions
|
||||
*~
|
8
.travis.yml
Normal file
8
.travis.yml
Normal file
|
@ -0,0 +1,8 @@
|
|||
sudo: false
|
||||
language: java
|
||||
jdk:
|
||||
- oraclejdk8
|
||||
|
||||
cache:
|
||||
directories:
|
||||
- $HOME/.m2
|
39
CREDITS.txt
Normal file
39
CREDITS.txt
Normal file
|
@ -0,0 +1,39 @@
|
|||
These bibliographic characterssets are collected and improved
|
||||
by Jörg Prante <joergprante@gmail.com>
|
||||
|
||||
Thanks to:
|
||||
|
||||
Library of Congress
|
||||
The Library of Congress provides an ANSEL code table file
|
||||
|
||||
https://www.loc.gov/marc/specifications/codetables.xml
|
||||
|
||||
at https://www.loc.gov/marc/specifications/specchartables.html for making the
|
||||
character set implementation of ANSEL/Z39.47 possible, including east
|
||||
asian code characters (EACC).
|
||||
|
||||
US-ASCII
|
||||
The US-ASCII re-implementation was taken from the GNU classpath project.
|
||||
It is provided as a demonstration of a simple single-byte character set.
|
||||
The original code was licensed by the GNU Public License 2.1 (GPL)
|
||||
|
||||
Simple ANSEL
|
||||
The ANSEL charset implementation by Piotr Andzel http://anselcharset.sourceforge.net/
|
||||
has been included as "simple ANSEL". The original code was licensed under Less
|
||||
GNU Public License 3 (LGPL 3.0)
|
||||
|
||||
MAB
|
||||
The MAB Charset Java implementation was developed by Jürgen Kett of
|
||||
Deutsche Nationalbibliothek (DNB) in 2004 and was licensed by GNU Public License 2 (GPL)
|
||||
|
||||
MAB-Diskette
|
||||
This is a CP850 variant which could only be implemented by the help of
|
||||
Thomas Berger http://www.gymel.com/charsets/MAB-Diskette.html
|
||||
|
||||
PicaCharset
|
||||
An alternative PICA character set implementation of Deutsche Nationalbibliothek (DNB)
|
||||
The original licence was GNU Public License 2 (GPL).
|
||||
|
||||
ISO 5428:1984, "Greek alphabet coded character set for bibliographic information interchange",
|
||||
has been implemented by the help of https://en.wikipedia.org/wiki/ISO_5428
|
||||
|
76
README.md
Normal file
76
README.md
Normal file
|
@ -0,0 +1,76 @@
|
|||
# Bibliographic character sets
|
||||
|
||||
This is a collection of bibliographic character sets implemented in
|
||||
Java.
|
||||
|
||||
These character sets have not been included in the standard Java
|
||||
distribution. Most of the character sets predate Unicode and are
|
||||
dormant now but are still in active use in library application
|
||||
system software.
|
||||
|
||||
The reason to provide these character sets is to assist the public
|
||||
in migrating library data to Unicode, and UTF-8, respectively.
|
||||
|
||||
You can include this jar in the classpath, the Java CharsetProvider and
|
||||
ServiceLoader API will then make the character sets available,
|
||||
e.g. by `Charset.forName(name)`
|
||||
|
||||
This is free software.
|
||||
Please follow the AGPL license, which requires to offer the source code
|
||||
of your project to the public if you make modifications to this program.
|
||||
|
||||
All contributions and pull requests are welcome.
|
||||
|
||||
If you have questions or find issues, pleas post them at
|
||||
https://github.com/xbib/bibliographic-character-sets/issues
|
||||
|
||||
## List of character sets included
|
||||
|
||||
### ANSEL "ANSI/NISO Z39.47-1993 (R2003) Extended Latin Alphabet Coded Character Set for Bibliographic Use (ANSEL)"
|
||||
|
||||
This implementation can only decode from ANSEL / Z39.47.
|
||||
|
||||
Included are the following sets specified by the Library of Congress at
|
||||
https://www.loc.gov/marc/specifications/specchartables.html
|
||||
|
||||
Basic Latin (ASCII), Extended Latin (ANSEL), Greek Symbols,
|
||||
Subscripts, Superscripts, Basic Hebrew, Basic Cyrillic,
|
||||
Extended Cyrillic, Basic Arabic, Extended Arabic,
|
||||
Basic Greek, Chinese, Japanese, Korean (EACC)
|
||||
|
||||
Usage:
|
||||
|
||||
Charset.forName("ANSEL")
|
||||
|
||||
### ISO 5426 "Extension of the Latin alphabet coded character set for bibliographic information interchange"
|
||||
|
||||
Usage:
|
||||
|
||||
Charset.forName("x-MAB")
|
||||
|
||||
### ISO 5428 "Greek alphabet coded character set for bibliographic information interchange"
|
||||
|
||||
### Pica (a variant of the INTERMARC character set, a 1979 french/danish adoption of USMARC/UKMARC)
|
||||
|
||||
### MAB-Diskette (a variant of CP850 character set)
|
||||
|
||||
### US-ASCII (re-implementation for demonstration purpose, disabled by default)
|
||||
|
||||
See also the CREDITS.txt for acknowledgements.
|
||||
|
||||
# License
|
||||
|
||||
Copyright (C) 2016 Jörg Prante and the xbib organization
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
67
build.gradle
Normal file
67
build.gradle
Normal file
|
@ -0,0 +1,67 @@
|
|||
|
||||
println "Host: " + java.net.InetAddress.getLocalHost()
|
||||
println "Gradle: " + gradle.gradleVersion + " JVM: " + org.gradle.internal.jvm.Jvm.current() + " Groovy: " + GroovySystem.getVersion()
|
||||
println "Build: group: '${project.group}', name: '${project.name}', version: '${project.version}'"
|
||||
|
||||
apply plugin: 'java'
|
||||
apply plugin: 'maven'
|
||||
apply plugin: 'signing'
|
||||
apply plugin: 'findbugs'
|
||||
apply plugin: 'pmd'
|
||||
apply plugin: 'checkstyle'
|
||||
|
||||
repositories {
|
||||
mavenLocal()
|
||||
mavenCentral()
|
||||
jcenter()
|
||||
maven {
|
||||
url "http://xbib.org/repository"
|
||||
}
|
||||
}
|
||||
|
||||
configurations {
|
||||
wagon
|
||||
}
|
||||
|
||||
dependencies {
|
||||
testCompile "org.apache.logging.log4j:log4j-core:2.5"
|
||||
testCompile('junit:junit:4.12')
|
||||
wagon 'org.apache.maven.wagon:wagon-ssh-external:2.10'
|
||||
}
|
||||
|
||||
sourceCompatibility = JavaVersion.VERSION_1_8
|
||||
targetCompatibility = JavaVersion.VERSION_1_8
|
||||
|
||||
[compileJava, compileTestJava]*.options*.encoding = 'UTF-8'
|
||||
tasks.withType(JavaCompile) {
|
||||
options.compilerArgs << "-Xlint:deprecation,unchecked"
|
||||
}
|
||||
test {
|
||||
testLogging {
|
||||
showStandardStreams = false
|
||||
exceptionFormat = 'full'
|
||||
}
|
||||
}
|
||||
tasks.withType(FindBugs) {
|
||||
ignoreFailures = true
|
||||
reports {
|
||||
xml.enabled = false
|
||||
html.enabled = true
|
||||
}
|
||||
}
|
||||
task sourcesJar(type: Jar, dependsOn: classes) {
|
||||
classifier 'sources'
|
||||
from sourceSets.main.allSource
|
||||
}
|
||||
task javadocJar(type: Jar, dependsOn: javadoc) {
|
||||
classifier 'javadoc'
|
||||
}
|
||||
artifacts {
|
||||
archives sourcesJar, javadocJar
|
||||
}
|
||||
if (project.hasProperty('signing.keyId')) {
|
||||
signing {
|
||||
sign configurations.archives
|
||||
}
|
||||
}
|
||||
apply from: 'gradle/publish.gradle'
|
323
config/checkstyle/checkstyle.xml
Normal file
323
config/checkstyle/checkstyle.xml
Normal file
|
@ -0,0 +1,323 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE module PUBLIC
|
||||
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
|
||||
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
|
||||
|
||||
<!-- This is a checkstyle configuration file. For descriptions of
|
||||
what the following rules do, please see the checkstyle configuration
|
||||
page at http://checkstyle.sourceforge.net/config.html -->
|
||||
|
||||
<module name="Checker">
|
||||
|
||||
<module name="FileTabCharacter">
|
||||
<!-- Checks that there are no tab characters in the file.
|
||||
-->
|
||||
</module>
|
||||
|
||||
<module name="NewlineAtEndOfFile">
|
||||
<property name="lineSeparator" value="lf"/>
|
||||
</module>
|
||||
|
||||
<module name="RegexpSingleline">
|
||||
<!-- Checks that FIXME is not used in comments. TODO is preferred.
|
||||
-->
|
||||
<property name="format" value="((//.*)|(\*.*))FIXME" />
|
||||
<property name="message" value='TODO is preferred to FIXME. e.g. "TODO(johndoe): Refactor when v2 is released."' />
|
||||
</module>
|
||||
|
||||
<module name="RegexpSingleline">
|
||||
<!-- Checks that TODOs are named. (Actually, just that they are followed
|
||||
by an open paren.)
|
||||
-->
|
||||
<property name="format" value="((//.*)|(\*.*))TODO[^(]" />
|
||||
<property name="message" value='All TODOs should be named. e.g. "TODO(johndoe): Refactor when v2 is released."' />
|
||||
</module>
|
||||
|
||||
<module name="JavadocPackage">
|
||||
<!-- Checks that each Java package has a Javadoc file used for commenting.
|
||||
Only allows a package-info.java, not package.html. -->
|
||||
</module>
|
||||
|
||||
<!-- All Java AST specific tests live under TreeWalker module. -->
|
||||
<module name="TreeWalker">
|
||||
|
||||
<!--
|
||||
|
||||
IMPORT CHECKS
|
||||
|
||||
-->
|
||||
|
||||
<module name="RedundantImport">
|
||||
<!-- Checks for redundant import statements. -->
|
||||
<property name="severity" value="error"/>
|
||||
</module>
|
||||
|
||||
<module name="ImportOrder">
|
||||
<!-- Checks for out of order import statements. -->
|
||||
|
||||
<property name="severity" value="warning"/>
|
||||
<property name="groups" value="com.google,android,junit,net,org,java,javax"/>
|
||||
<!-- This ensures that static imports go first. -->
|
||||
<property name="option" value="top"/>
|
||||
<property name="tokens" value="STATIC_IMPORT, IMPORT"/>
|
||||
</module>
|
||||
|
||||
<!--
|
||||
|
||||
JAVADOC CHECKS
|
||||
|
||||
-->
|
||||
|
||||
<!-- Checks for Javadoc comments. -->
|
||||
<!-- See http://checkstyle.sf.net/config_javadoc.html -->
|
||||
<module name="JavadocMethod">
|
||||
<property name="scope" value="protected"/>
|
||||
<property name="severity" value="warning"/>
|
||||
<property name="allowMissingJavadoc" value="true"/>
|
||||
<property name="allowMissingParamTags" value="true"/>
|
||||
<property name="allowMissingReturnTag" value="true"/>
|
||||
<property name="allowMissingThrowsTags" value="true"/>
|
||||
<property name="allowThrowsTagsForSubclasses" value="true"/>
|
||||
<property name="allowUndeclaredRTE" value="true"/>
|
||||
</module>
|
||||
|
||||
<module name="JavadocType">
|
||||
<property name="scope" value="protected"/>
|
||||
<property name="severity" value="error"/>
|
||||
</module>
|
||||
|
||||
<module name="JavadocStyle">
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
<!--
|
||||
|
||||
NAMING CHECKS
|
||||
|
||||
-->
|
||||
|
||||
<!-- Item 38 - Adhere to generally accepted naming conventions -->
|
||||
|
||||
<module name="PackageName">
|
||||
<!-- Validates identifiers for package names against the
|
||||
supplied expression. -->
|
||||
<!-- Here the default checkstyle rule restricts package name parts to
|
||||
seven characters, this is not in line with common practice at Google.
|
||||
-->
|
||||
<property name="format" value="^[a-z]+(\.[a-z][a-z0-9]{1,})*$"/>
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
<module name="TypeNameCheck">
|
||||
<!-- Validates static, final fields against the
|
||||
expression "^[A-Z][a-zA-Z0-9]*$". -->
|
||||
<metadata name="altname" value="TypeName"/>
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
<module name="ConstantNameCheck">
|
||||
<!-- Validates non-private, static, final fields against the supplied
|
||||
public/package final fields "^[A-Z][A-Z0-9]*(_[A-Z0-9]+)*$". -->
|
||||
<metadata name="altname" value="ConstantName"/>
|
||||
<property name="applyToPublic" value="true"/>
|
||||
<property name="applyToProtected" value="true"/>
|
||||
<property name="applyToPackage" value="true"/>
|
||||
<property name="applyToPrivate" value="false"/>
|
||||
<property name="format" value="^([A-Z][A-Z0-9]*(_[A-Z0-9]+)*|FLAG_.*)$"/>
|
||||
<message key="name.invalidPattern"
|
||||
value="Variable ''{0}'' should be in ALL_CAPS (if it is a constant) or be private (otherwise)."/>
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
<module name="StaticVariableNameCheck">
|
||||
<!-- Validates static, non-final fields against the supplied
|
||||
expression "^[a-z][a-zA-Z0-9]*_?$". -->
|
||||
<metadata name="altname" value="StaticVariableName"/>
|
||||
<property name="applyToPublic" value="true"/>
|
||||
<property name="applyToProtected" value="true"/>
|
||||
<property name="applyToPackage" value="true"/>
|
||||
<property name="applyToPrivate" value="true"/>
|
||||
<property name="format" value="^[a-z][a-zA-Z0-9]*_?$"/>
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
<module name="MemberNameCheck">
|
||||
<!-- Validates non-static members against the supplied expression. -->
|
||||
<metadata name="altname" value="MemberName"/>
|
||||
<property name="applyToPublic" value="true"/>
|
||||
<property name="applyToProtected" value="true"/>
|
||||
<property name="applyToPackage" value="true"/>
|
||||
<property name="applyToPrivate" value="true"/>
|
||||
<property name="format" value="^[a-z][a-zA-Z0-9]*$"/>
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
<module name="MethodNameCheck">
|
||||
<!-- Validates identifiers for method names. -->
|
||||
<metadata name="altname" value="MethodName"/>
|
||||
<property name="format" value="^[a-z][a-zA-Z0-9]*(_[a-zA-Z0-9]+)*$"/>
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
<module name="ParameterName">
|
||||
<!-- Validates identifiers for method parameters against the
|
||||
expression "^[a-z][a-zA-Z0-9]*$". -->
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
<module name="LocalFinalVariableName">
|
||||
<!-- Validates identifiers for local final variables against the
|
||||
expression "^[a-z][a-zA-Z0-9]*$". -->
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
<module name="LocalVariableName">
|
||||
<!-- Validates identifiers for local variables against the
|
||||
expression "^[a-z][a-zA-Z0-9]*$". -->
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
|
||||
<!--
|
||||
|
||||
LENGTH and CODING CHECKS
|
||||
|
||||
-->
|
||||
|
||||
<module name="LineLength">
|
||||
<!-- Checks if a line is too long. -->
|
||||
<property name="max" value="${com.puppycrawl.tools.checkstyle.checks.sizes.LineLength.max}" default="128"/>
|
||||
<property name="severity" value="error"/>
|
||||
|
||||
<!--
|
||||
The default ignore pattern exempts the following elements:
|
||||
- import statements
|
||||
- long URLs inside comments
|
||||
-->
|
||||
|
||||
<property name="ignorePattern"
|
||||
value="${com.puppycrawl.tools.checkstyle.checks.sizes.LineLength.ignorePattern}"
|
||||
default="^(package .*;\s*)|(import .*;\s*)|( *(\*|//).*https?://.*)$"/>
|
||||
</module>
|
||||
|
||||
<module name="LeftCurly">
|
||||
<!-- Checks for placement of the left curly brace ('{'). -->
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
<module name="RightCurly">
|
||||
<!-- Checks right curlies on CATCH, ELSE, and TRY blocks are on
|
||||
the same line. e.g., the following example is fine:
|
||||
<pre>
|
||||
if {
|
||||
...
|
||||
} else
|
||||
</pre>
|
||||
-->
|
||||
<!-- This next example is not fine:
|
||||
<pre>
|
||||
if {
|
||||
...
|
||||
}
|
||||
else
|
||||
</pre>
|
||||
-->
|
||||
<property name="option" value="same"/>
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
<!-- Checks for braces around if and else blocks -->
|
||||
<module name="NeedBraces">
|
||||
<property name="severity" value="warning"/>
|
||||
<property name="tokens" value="LITERAL_IF, LITERAL_ELSE, LITERAL_FOR, LITERAL_WHILE, LITERAL_DO"/>
|
||||
</module>
|
||||
|
||||
<module name="UpperEll">
|
||||
<!-- Checks that long constants are defined with an upper ell.-->
|
||||
<property name="severity" value="error"/>
|
||||
</module>
|
||||
|
||||
<module name="FallThrough">
|
||||
<!-- Warn about falling through to the next case statement. Similar to
|
||||
javac -Xlint:fallthrough, but the check is suppressed if a single-line comment
|
||||
on the last non-blank line preceding the fallen-into case contains 'fall through' (or
|
||||
some other variants which we don't publicized to promote consistency).
|
||||
-->
|
||||
<property name="reliefPattern"
|
||||
value="fall through|Fall through|fallthru|Fallthru|falls through|Falls through|fallthrough|Fallthrough|No break|NO break|no break|continue on"/>
|
||||
<property name="severity" value="error"/>
|
||||
</module>
|
||||
|
||||
|
||||
<!--
|
||||
|
||||
MODIFIERS CHECKS
|
||||
|
||||
-->
|
||||
|
||||
<module name="ModifierOrder">
|
||||
<!-- Warn if modifier order is inconsistent with JLS3 8.1.1, 8.3.1, and
|
||||
8.4.3. The prescribed order is:
|
||||
public, protected, private, abstract, static, final, transient, volatile,
|
||||
synchronized, native, strictfp
|
||||
-->
|
||||
</module>
|
||||
|
||||
|
||||
<!--
|
||||
|
||||
WHITESPACE CHECKS
|
||||
|
||||
-->
|
||||
|
||||
<module name="WhitespaceAround">
|
||||
<!-- Checks that various tokens are surrounded by whitespace.
|
||||
This includes most binary operators and keywords followed
|
||||
by regular or curly braces.
|
||||
-->
|
||||
<property name="tokens" value="ASSIGN, BAND, BAND_ASSIGN, BOR,
|
||||
BOR_ASSIGN, BSR, BSR_ASSIGN, BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN,
|
||||
EQUAL, GE, GT, LAND, LE, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
|
||||
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
|
||||
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE, LOR, LT, MINUS,
|
||||
MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL, PLUS, PLUS_ASSIGN, QUESTION,
|
||||
SL, SL_ASSIGN, SR_ASSIGN, STAR, STAR_ASSIGN"/>
|
||||
<property name="severity" value="error"/>
|
||||
</module>
|
||||
|
||||
<module name="WhitespaceAfter">
|
||||
<!-- Checks that commas, semicolons and typecasts are followed by
|
||||
whitespace.
|
||||
-->
|
||||
<property name="tokens" value="COMMA, SEMI, TYPECAST"/>
|
||||
</module>
|
||||
|
||||
<module name="NoWhitespaceAfter">
|
||||
<!-- Checks that there is no whitespace after various unary operators.
|
||||
Linebreaks are allowed.
|
||||
-->
|
||||
<property name="tokens" value="BNOT, DEC, DOT, INC, LNOT, UNARY_MINUS,
|
||||
UNARY_PLUS"/>
|
||||
<property name="allowLineBreaks" value="true"/>
|
||||
<property name="severity" value="error"/>
|
||||
</module>
|
||||
|
||||
<module name="NoWhitespaceBefore">
|
||||
<!-- Checks that there is no whitespace before various unary operators.
|
||||
Linebreaks are allowed.
|
||||
-->
|
||||
<property name="tokens" value="SEMI, DOT, POST_DEC, POST_INC"/>
|
||||
<property name="allowLineBreaks" value="true"/>
|
||||
<property name="severity" value="error"/>
|
||||
</module>
|
||||
|
||||
<module name="ParenPad">
|
||||
<!-- Checks that there is no whitespace before close parens or after
|
||||
open parens.
|
||||
-->
|
||||
<property name="severity" value="warning"/>
|
||||
</module>
|
||||
|
||||
</module>
|
||||
</module>
|
||||
|
9
gradle.properties
Normal file
9
gradle.properties
Normal file
|
@ -0,0 +1,9 @@
|
|||
group = org.xbib
|
||||
version = 1.0.0
|
||||
org.gradle.daemon = true
|
||||
name = 'bibliographic-character-sets'
|
||||
description = 'Bibliographic character sets'
|
||||
user = 'xbib'
|
||||
scmUrl = 'https://github.com/' + user + '/' + name
|
||||
scmConnection = 'scm:git:git://github.com/' + user + '/' + name + '.git'
|
||||
scmDeveloperConnection = 'scm:git:git://github.com/' + user + '/' + name + '.git'
|
62
gradle/publish.gradle
Normal file
62
gradle/publish.gradle
Normal file
|
@ -0,0 +1,62 @@
|
|||
task xbibUpload(type: Upload) {
|
||||
configuration = configurations.archives
|
||||
uploadDescriptor = true
|
||||
repositories {
|
||||
if (project.hasProperty("xbibUsername")) {
|
||||
mavenDeployer {
|
||||
configuration = configurations.wagon
|
||||
repository(url: uri('scpexe://xbib.org/repository')) {
|
||||
authentication(userName: xbibUsername, privateKey: xbibPrivateKey)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
task mavenCentralUpload(type: Upload) {
|
||||
configuration = configurations.archives
|
||||
uploadDescriptor = true
|
||||
repositories {
|
||||
if (project.hasProperty('ossrhUsername')) {
|
||||
mavenDeployer {
|
||||
beforeDeployment { MavenDeployment deployment -> signing.signPom(deployment) }
|
||||
repository(url: uri(ossrhReleaseUrl)) {
|
||||
authentication(userName: ossrhUsername, password: ossrhPassword)
|
||||
}
|
||||
snapshotRepository(url: uri(ossrhSnapshotUrl)) {
|
||||
authentication(userName: ossrhUsername, password: ossrhPassword)
|
||||
}
|
||||
pom.project {
|
||||
name name
|
||||
description description
|
||||
packaging 'jar'
|
||||
inceptionYear '2016'
|
||||
url scmUrl
|
||||
organization {
|
||||
name 'xbib'
|
||||
url 'http://xbib.org'
|
||||
}
|
||||
developers {
|
||||
developer {
|
||||
id user
|
||||
name 'Jörg Prante'
|
||||
email 'joergprante@gmail.com'
|
||||
url 'https://github.com/jprante'
|
||||
}
|
||||
}
|
||||
scm {
|
||||
url scmUrl
|
||||
connection scmConnection
|
||||
developerConnection scmDeveloperConnection
|
||||
}
|
||||
licenses {
|
||||
license {
|
||||
name 'Affero GNU Public License Version 3'
|
||||
url 'http://www.gnu.org/licenses/agpl-3.0.html'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
Normal file
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
Normal file
Binary file not shown.
6
gradle/wrapper/gradle-wrapper.properties
vendored
Normal file
6
gradle/wrapper/gradle-wrapper.properties
vendored
Normal file
|
@ -0,0 +1,6 @@
|
|||
#Thu Aug 18 20:34:33 CEST 2016
|
||||
distributionBase=GRADLE_USER_HOME
|
||||
distributionPath=wrapper/dists
|
||||
zipStoreBase=GRADLE_USER_HOME
|
||||
zipStorePath=wrapper/dists
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-3.0-bin.zip
|
169
gradlew
vendored
Executable file
169
gradlew
vendored
Executable file
|
@ -0,0 +1,169 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
##############################################################################
|
||||
##
|
||||
## Gradle start up script for UN*X
|
||||
##
|
||||
##############################################################################
|
||||
|
||||
# Attempt to set APP_HOME
|
||||
# Resolve links: $0 may be a link
|
||||
PRG="$0"
|
||||
# Need this for relative symlinks.
|
||||
while [ -h "$PRG" ] ; do
|
||||
ls=`ls -ld "$PRG"`
|
||||
link=`expr "$ls" : '.*-> \(.*\)$'`
|
||||
if expr "$link" : '/.*' > /dev/null; then
|
||||
PRG="$link"
|
||||
else
|
||||
PRG=`dirname "$PRG"`"/$link"
|
||||
fi
|
||||
done
|
||||
SAVED="`pwd`"
|
||||
cd "`dirname \"$PRG\"`/" >/dev/null
|
||||
APP_HOME="`pwd -P`"
|
||||
cd "$SAVED" >/dev/null
|
||||
|
||||
APP_NAME="Gradle"
|
||||
APP_BASE_NAME=`basename "$0"`
|
||||
|
||||
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||
DEFAULT_JVM_OPTS=""
|
||||
|
||||
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
||||
MAX_FD="maximum"
|
||||
|
||||
warn ( ) {
|
||||
echo "$*"
|
||||
}
|
||||
|
||||
die ( ) {
|
||||
echo
|
||||
echo "$*"
|
||||
echo
|
||||
exit 1
|
||||
}
|
||||
|
||||
# OS specific support (must be 'true' or 'false').
|
||||
cygwin=false
|
||||
msys=false
|
||||
darwin=false
|
||||
nonstop=false
|
||||
case "`uname`" in
|
||||
CYGWIN* )
|
||||
cygwin=true
|
||||
;;
|
||||
Darwin* )
|
||||
darwin=true
|
||||
;;
|
||||
MINGW* )
|
||||
msys=true
|
||||
;;
|
||||
NONSTOP* )
|
||||
nonstop=true
|
||||
;;
|
||||
esac
|
||||
|
||||
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
||||
|
||||
# Determine the Java command to use to start the JVM.
|
||||
if [ -n "$JAVA_HOME" ] ; then
|
||||
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
|
||||
# IBM's JDK on AIX uses strange locations for the executables
|
||||
JAVACMD="$JAVA_HOME/jre/sh/java"
|
||||
else
|
||||
JAVACMD="$JAVA_HOME/bin/java"
|
||||
fi
|
||||
if [ ! -x "$JAVACMD" ] ; then
|
||||
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
|
||||
|
||||
Please set the JAVA_HOME variable in your environment to match the
|
||||
location of your Java installation."
|
||||
fi
|
||||
else
|
||||
JAVACMD="java"
|
||||
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||
|
||||
Please set the JAVA_HOME variable in your environment to match the
|
||||
location of your Java installation."
|
||||
fi
|
||||
|
||||
# Increase the maximum file descriptors if we can.
|
||||
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
|
||||
MAX_FD_LIMIT=`ulimit -H -n`
|
||||
if [ $? -eq 0 ] ; then
|
||||
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
|
||||
MAX_FD="$MAX_FD_LIMIT"
|
||||
fi
|
||||
ulimit -n $MAX_FD
|
||||
if [ $? -ne 0 ] ; then
|
||||
warn "Could not set maximum file descriptor limit: $MAX_FD"
|
||||
fi
|
||||
else
|
||||
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
|
||||
fi
|
||||
fi
|
||||
|
||||
# For Darwin, add options to specify how the application appears in the dock
|
||||
if $darwin; then
|
||||
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
|
||||
fi
|
||||
|
||||
# For Cygwin, switch paths to Windows format before running java
|
||||
if $cygwin ; then
|
||||
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
|
||||
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
|
||||
JAVACMD=`cygpath --unix "$JAVACMD"`
|
||||
|
||||
# We build the pattern for arguments to be converted via cygpath
|
||||
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
|
||||
SEP=""
|
||||
for dir in $ROOTDIRSRAW ; do
|
||||
ROOTDIRS="$ROOTDIRS$SEP$dir"
|
||||
SEP="|"
|
||||
done
|
||||
OURCYGPATTERN="(^($ROOTDIRS))"
|
||||
# Add a user-defined pattern to the cygpath arguments
|
||||
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
|
||||
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
|
||||
fi
|
||||
# Now convert the arguments - kludge to limit ourselves to /bin/sh
|
||||
i=0
|
||||
for arg in "$@" ; do
|
||||
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
|
||||
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
|
||||
|
||||
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
|
||||
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
|
||||
else
|
||||
eval `echo args$i`="\"$arg\""
|
||||
fi
|
||||
i=$((i+1))
|
||||
done
|
||||
case $i in
|
||||
(0) set -- ;;
|
||||
(1) set -- "$args0" ;;
|
||||
(2) set -- "$args0" "$args1" ;;
|
||||
(3) set -- "$args0" "$args1" "$args2" ;;
|
||||
(4) set -- "$args0" "$args1" "$args2" "$args3" ;;
|
||||
(5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
|
||||
(6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
|
||||
(7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
|
||||
(8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
|
||||
(9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
|
||||
esac
|
||||
fi
|
||||
|
||||
# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
|
||||
function splitJvmOpts() {
|
||||
JVM_OPTS=("$@")
|
||||
}
|
||||
eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
|
||||
JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
|
||||
|
||||
# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
|
||||
if [[ "$(uname)" == "Darwin" ]] && [[ "$HOME" == "$PWD" ]]; then
|
||||
cd "$(dirname "$0")"
|
||||
fi
|
||||
|
||||
exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
|
84
gradlew.bat
vendored
Normal file
84
gradlew.bat
vendored
Normal file
|
@ -0,0 +1,84 @@
|
|||
@if "%DEBUG%" == "" @echo off
|
||||
@rem ##########################################################################
|
||||
@rem
|
||||
@rem Gradle startup script for Windows
|
||||
@rem
|
||||
@rem ##########################################################################
|
||||
|
||||
@rem Set local scope for the variables with windows NT shell
|
||||
if "%OS%"=="Windows_NT" setlocal
|
||||
|
||||
set DIRNAME=%~dp0
|
||||
if "%DIRNAME%" == "" set DIRNAME=.
|
||||
set APP_BASE_NAME=%~n0
|
||||
set APP_HOME=%DIRNAME%
|
||||
|
||||
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||
set DEFAULT_JVM_OPTS=
|
||||
|
||||
@rem Find java.exe
|
||||
if defined JAVA_HOME goto findJavaFromJavaHome
|
||||
|
||||
set JAVA_EXE=java.exe
|
||||
%JAVA_EXE% -version >NUL 2>&1
|
||||
if "%ERRORLEVEL%" == "0" goto init
|
||||
|
||||
echo.
|
||||
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||
echo.
|
||||
echo Please set the JAVA_HOME variable in your environment to match the
|
||||
echo location of your Java installation.
|
||||
|
||||
goto fail
|
||||
|
||||
:findJavaFromJavaHome
|
||||
set JAVA_HOME=%JAVA_HOME:"=%
|
||||
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
||||
|
||||
if exist "%JAVA_EXE%" goto init
|
||||
|
||||
echo.
|
||||
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
||||
echo.
|
||||
echo Please set the JAVA_HOME variable in your environment to match the
|
||||
echo location of your Java installation.
|
||||
|
||||
goto fail
|
||||
|
||||
:init
|
||||
@rem Get command-line arguments, handling Windows variants
|
||||
|
||||
if not "%OS%" == "Windows_NT" goto win9xME_args
|
||||
|
||||
:win9xME_args
|
||||
@rem Slurp the command line arguments.
|
||||
set CMD_LINE_ARGS=
|
||||
set _SKIP=2
|
||||
|
||||
:win9xME_args_slurp
|
||||
if "x%~1" == "x" goto execute
|
||||
|
||||
set CMD_LINE_ARGS=%*
|
||||
|
||||
:execute
|
||||
@rem Setup the command line
|
||||
|
||||
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
||||
|
||||
@rem Execute Gradle
|
||||
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
|
||||
|
||||
:end
|
||||
@rem End local scope for the variables with windows NT shell
|
||||
if "%ERRORLEVEL%"=="0" goto mainEnd
|
||||
|
||||
:fail
|
||||
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
||||
rem the _cmd.exe /c_ return code!
|
||||
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
|
||||
exit /b 1
|
||||
|
||||
:mainEnd
|
||||
if "%OS%"=="Windows_NT" endlocal
|
||||
|
||||
:omega
|
1
settings.gradle
Normal file
1
settings.gradle
Normal file
|
@ -0,0 +1 @@
|
|||
rootProject.name = 'bibliographic-character-sets'
|
160
src/main/java/org/xbib/charset/ASCII.java
Normal file
160
src/main/java/org/xbib/charset/ASCII.java
Normal file
|
@ -0,0 +1,160 @@
|
|||
/*
|
||||
* Licensed to Jörg Prante and xbib under one or more contributor
|
||||
* license agreements. See the NOTICE.txt file distributed with this work
|
||||
* for additional information regarding copyright ownership.
|
||||
*
|
||||
* Copyright (C) 2016 Jörg Prante and xbib
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program; if not, see http://www.gnu.org/licenses
|
||||
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* The interactive user interfaces in modified source and object code
|
||||
* versions of this program must display Appropriate Legal Notices,
|
||||
* as required under Section 5 of the GNU Affero General Public License.
|
||||
*
|
||||
* In accordance with Section 7(b) of the GNU Affero General Public
|
||||
* License, these Appropriate Legal Notices must retain the display of the
|
||||
* "Powered by xbib" logo. If the display of the logo is not reasonably
|
||||
* feasible for technical reasons, the Appropriate Legal Notices must display
|
||||
* the words "Powered by xbib".
|
||||
*/
|
||||
|
||||
/*
|
||||
Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
GNU Classpath is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GNU Classpath is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GNU Classpath; see the file COPYING. If not, write to the
|
||||
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA.
|
||||
|
||||
Linking this library statically or dynamically with other modules is
|
||||
making a combined work based on this library. Thus, the terms and
|
||||
conditions of the GNU General Public License cover the whole
|
||||
combination.
|
||||
|
||||
As a special exception, the copyright holders of this library give you
|
||||
permission to link this library with independent modules to produce an
|
||||
executable, regardless of the license terms of these independent
|
||||
modules, and to copy and distribute the resulting executable under
|
||||
terms of your choice, provided that you also meet, for each linked
|
||||
independent module, the terms and conditions of the license of that
|
||||
module. An independent module is a module which is not derived from
|
||||
or based on this library. If you modify this library, you may extend
|
||||
this exception to your version of the library, but you are not
|
||||
obligated to do so. If you do not wish to do so, delete this
|
||||
exception statement from your version.
|
||||
*/
|
||||
|
||||
package org.xbib.charset;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
|
||||
/**
|
||||
* US-ASCII charset.
|
||||
*/
|
||||
final class ASCII extends Charset {
|
||||
ASCII() {
|
||||
/*
|
||||
* Canonical charset name chosen according to:
|
||||
* http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
|
||||
*/
|
||||
super("US-ASCII", new String[]{
|
||||
/*
|
||||
* These names are provided by
|
||||
* http://www.iana.org/assignments/character-sets
|
||||
*/
|
||||
"iso-ir-6", "ANSI_X3.4-1986", "ISO_646.irv:1991", "ASCII", "ISO646-US", "ASCII", "us",
|
||||
"IBM367", "cp367", "csASCII",
|
||||
/*
|
||||
* These names are provided by
|
||||
* http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
|
||||
*/
|
||||
"ANSI_X3.4-1968", "iso_646.irv:1983", "ascii7", "646", "windows-20127"});
|
||||
}
|
||||
|
||||
public boolean contains(Charset cs) {
|
||||
return cs instanceof ASCII;
|
||||
}
|
||||
|
||||
public CharsetDecoder newDecoder() {
|
||||
return new Decoder(this);
|
||||
}
|
||||
|
||||
public CharsetEncoder newEncoder() {
|
||||
return new Encoder(this);
|
||||
}
|
||||
|
||||
private static final class Decoder extends CharsetDecoder {
|
||||
Decoder(Charset cs) {
|
||||
super(cs, 1.0f, 1.0f);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
|
||||
while (in.hasRemaining()) {
|
||||
byte b = in.get();
|
||||
if (b < 0) {
|
||||
in.position(in.position() - 1);
|
||||
return CoderResult.malformedForLength(1);
|
||||
}
|
||||
if (!out.hasRemaining()) {
|
||||
in.position(in.position() - 1);
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
out.put((char) b);
|
||||
}
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class Encoder extends CharsetEncoder {
|
||||
Encoder(Charset cs) {
|
||||
super(cs, 1.0f, 1.0f);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
|
||||
while (in.hasRemaining()) {
|
||||
char c = in.get();
|
||||
if (c > Byte.MAX_VALUE) {
|
||||
in.position(in.position() - 1);
|
||||
return CoderResult.unmappableForLength(1);
|
||||
}
|
||||
if (!out.hasRemaining()) {
|
||||
in.position(in.position() - 1);
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
out.put((byte) c);
|
||||
}
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
}
|
||||
}
|
280
src/main/java/org/xbib/charset/AnselCharset.java
Normal file
280
src/main/java/org/xbib/charset/AnselCharset.java
Normal file
|
@ -0,0 +1,280 @@
|
|||
/*
|
||||
* Licensed to Jörg Prante and xbib under one or more contributor
|
||||
* license agreements. See the NOTICE.txt file distributed with this work
|
||||
* for additional information regarding copyright ownership.
|
||||
*
|
||||
* Copyright (C) 2016 Jörg Prante and xbib
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program; if not, see http://www.gnu.org/licenses
|
||||
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* The interactive user interfaces in modified source and object code
|
||||
* versions of this program must display Appropriate Legal Notices,
|
||||
* as required under Section 5 of the GNU Affero General Public License.
|
||||
*
|
||||
* In accordance with Section 7(b) of the GNU Affero General Public
|
||||
* License, these Appropriate Legal Notices must retain the display of the
|
||||
* "Powered by xbib" logo. If the display of the logo is not reasonably
|
||||
* feasible for technical reasons, the Appropriate Legal Notices must display
|
||||
* the words "Powered by xbib".
|
||||
*/
|
||||
package org.xbib.charset;
|
||||
|
||||
import java.io.CharArrayWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
import javax.xml.stream.XMLStreamException;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class AnselCharset extends Charset {
|
||||
|
||||
private static final Map<String, AnselCodeTableParser.CharacterSet> characterSetMap;
|
||||
|
||||
static {
|
||||
characterSetMap = new LinkedHashMap<>();
|
||||
ClassLoader cl = Thread.currentThread().getContextClassLoader();
|
||||
try (InputStream inputStream = cl.getResource("org/xbib/charset/codetables.xml").openStream()) {
|
||||
AnselCodeTableParser anselCodeTableParser = new AnselCodeTableParser(inputStream);
|
||||
for (AnselCodeTableParser.CodeTable codeTable : anselCodeTableParser.getCodeTables()) {
|
||||
for (AnselCodeTableParser.CharacterSet characterSet : codeTable.getCharacterSets()) {
|
||||
characterSetMap.put(characterSet.getName(), characterSet);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
private Charset encodeCharset;
|
||||
|
||||
public AnselCharset() throws XMLStreamException {
|
||||
super("ANSEL", BibliographicCharsetProvider.aliasesFor("ANSEL"));
|
||||
this.encodeCharset = StandardCharsets.UTF_8;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(Charset charset) {
|
||||
return charset instanceof AnselCharset;
|
||||
}
|
||||
|
||||
public CharsetEncoder newEncoder() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public CharsetDecoder newDecoder() {
|
||||
return new Decoder(this, encodeCharset.newDecoder());
|
||||
}
|
||||
|
||||
private static class Decoder extends CharsetDecoder {
|
||||
|
||||
String g0;
|
||||
String g1;
|
||||
|
||||
Decoder(Charset cs, CharsetDecoder baseDecoder) {
|
||||
super(cs, baseDecoder.averageCharsPerByte(), baseDecoder.maxCharsPerByte());
|
||||
}
|
||||
|
||||
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
|
||||
g0 = "Basic Latin (ASCII)";
|
||||
g1 = "Extended Latin (ANSEL)";
|
||||
CharArrayWriter w = new CharArrayWriter();
|
||||
CharArrayWriter diacritics = new CharArrayWriter();
|
||||
int pos = in.position();
|
||||
while (in.hasRemaining()) {
|
||||
byte b = in.get();
|
||||
char oldChar = (char) (b & 0xFF);
|
||||
if (oldChar == '\u001b') {
|
||||
handleEscapeSequence(in);
|
||||
if (in.hasRemaining()) {
|
||||
b = in.get();
|
||||
oldChar = (char) (b & 0xFF);
|
||||
} else {
|
||||
// premature end of escape sequence, no data following
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
}
|
||||
AnselCodeTableParser.CharacterSet characterSet = isG0(oldChar) ? characterSetMap.get(g0) :
|
||||
isG1(oldChar) ? characterSetMap.get(g1) : null;
|
||||
int len = characterSet != null ? characterSet.getLength() : 1;
|
||||
String str = len == 1 ? "" + oldChar : "" + oldChar + (char) (in.get() & 0xFF) + (char) (in.get() & 0xFF);
|
||||
AnselCodeTableParser.Code code = characterSet != null ? characterSet.getMarc().get(str) : null;
|
||||
char ch = code != null ? code.getUcs() : oldChar;
|
||||
if (ch == '\u0000') {
|
||||
// FB, EC - see http://memory.loc.gov/diglib/codetables/45.html#Note1 and http://memory.loc.gov/diglib/codetables/45.html#Note2
|
||||
continue;
|
||||
}
|
||||
boolean isDiacritic = code != null ? isDiacritic(oldChar) || code.isCombining() : isDiacritic(oldChar);
|
||||
if (isDiacritic) {
|
||||
diacritics.write(ch);
|
||||
} else {
|
||||
w.write(ch);
|
||||
// diacritics must be appended in Unicode, but are prepended in MARC-8 / Z39.47
|
||||
if (diacritics.toCharArray().length > 0) {
|
||||
try {
|
||||
w.write(diacritics.toCharArray());
|
||||
} catch (IOException e) {
|
||||
// dummy
|
||||
w.flush();
|
||||
}
|
||||
diacritics = new CharArrayWriter();
|
||||
}
|
||||
}
|
||||
}
|
||||
for (char ch : w.toCharArray()) {
|
||||
if (!out.hasRemaining()) {
|
||||
in.position(pos - 1);
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
out.put(ch);
|
||||
}
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
|
||||
private boolean isDiacritic(char ch) {
|
||||
return ch >= 0xE0 && ch <= 0xFF;
|
||||
}
|
||||
|
||||
private boolean isG0(char ch) {
|
||||
return ch >= 0x21 && ch <= 0x7E;
|
||||
}
|
||||
|
||||
private boolean isG1(char ch) {
|
||||
return ch >= 0xA1 && ch <= 0xFE;
|
||||
}
|
||||
|
||||
/**
|
||||
* ANSI X3.41 or ISO 2022 escape technique.
|
||||
* See procedures in IS0 2375-1985.
|
||||
*
|
||||
* @param in byte buffer
|
||||
*/
|
||||
private void handleEscapeSequence(ByteBuffer in) {
|
||||
byte oneByte = in.get();
|
||||
switch (oneByte) {
|
||||
case 's':
|
||||
g0 = "Basic Latin (ASCII)";
|
||||
break;
|
||||
case 'g':
|
||||
g0 = "Greek Symbols";
|
||||
break;
|
||||
case 'b':
|
||||
g0 = "Subscripts";
|
||||
break;
|
||||
case 'p':
|
||||
g0 = "Superscripts";
|
||||
break;
|
||||
case '(':
|
||||
case ',':
|
||||
oneByte = in.get();
|
||||
switch (oneByte) {
|
||||
case '1':
|
||||
g0 = "Chinese, Japanese, Korean (EACC)";
|
||||
break;
|
||||
case '2':
|
||||
g0 = "Basic Hebrew";
|
||||
break;
|
||||
case '3':
|
||||
g0 = "Basic Arabic";
|
||||
break;
|
||||
case '4':
|
||||
g0 = "Extended Arabic";
|
||||
break;
|
||||
case 'B':
|
||||
g0 = "Basic Latin (ASCII)";
|
||||
break;
|
||||
case 'N':
|
||||
g0 = "Basic Cyrillic";
|
||||
break;
|
||||
case 'Q':
|
||||
g0 = "Extended Cyrillic";
|
||||
break;
|
||||
case 'S':
|
||||
g0 = "Basic Greek";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case ')':
|
||||
case '-':
|
||||
oneByte = in.get();
|
||||
switch (oneByte) {
|
||||
case '1':
|
||||
g1 = "Chinese, Japanese, Korean (EACC)";
|
||||
break;
|
||||
case '2':
|
||||
g1 = "Basic Hebrew";
|
||||
break;
|
||||
case '3':
|
||||
g1 = "Basic Arabic";
|
||||
break;
|
||||
case '4':
|
||||
g1 = "Extended Arabic";
|
||||
break;
|
||||
case 'B':
|
||||
g1 = "Basic Latin (ASCII)";
|
||||
break;
|
||||
case 'N':
|
||||
g1 = "Basic Cyrillic";
|
||||
break;
|
||||
case 'Q':
|
||||
g1 = "Extended Cyrillic";
|
||||
break;
|
||||
case 'S':
|
||||
g1 = "Basic Greek";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '$':
|
||||
oneByte = in.get();
|
||||
switch (oneByte) {
|
||||
case '1':
|
||||
g0 = "Chinese, Japanese, Korean (EACC)";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '!':
|
||||
oneByte = in.get();
|
||||
switch (oneByte) {
|
||||
case 'E':
|
||||
g0 = "Extended Latin (ANSEL)";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
280
src/main/java/org/xbib/charset/AnselCodeTableParser.java
Normal file
280
src/main/java/org/xbib/charset/AnselCodeTableParser.java
Normal file
|
@ -0,0 +1,280 @@
|
|||
/*
|
||||
* Licensed to Jörg Prante and xbib under one or more contributor
|
||||
* license agreements. See the NOTICE.txt file distributed with this work
|
||||
* for additional information regarding copyright ownership.
|
||||
*
|
||||
* Copyright (C) 2016 Jörg Prante and xbib
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program; if not, see http://www.gnu.org/licenses
|
||||
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* The interactive user interfaces in modified source and object code
|
||||
* versions of this program must display Appropriate Legal Notices,
|
||||
* as required under Section 5 of the GNU Affero General Public License.
|
||||
*
|
||||
* In accordance with Section 7(b) of the GNU Affero General Public
|
||||
* License, these Appropriate Legal Notices must retain the display of the
|
||||
* "Powered by xbib" logo. If the display of the logo is not reasonably
|
||||
* feasible for technical reasons, the Appropriate Legal Notices must display
|
||||
* the words "Powered by xbib".
|
||||
*/
|
||||
package org.xbib.charset;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import javax.xml.namespace.QName;
|
||||
import javax.xml.stream.XMLEventReader;
|
||||
import javax.xml.stream.XMLInputFactory;
|
||||
import javax.xml.stream.XMLStreamException;
|
||||
import javax.xml.stream.events.Attribute;
|
||||
import javax.xml.stream.events.Characters;
|
||||
import javax.xml.stream.events.EndElement;
|
||||
import javax.xml.stream.events.StartElement;
|
||||
import javax.xml.stream.events.XMLEvent;
|
||||
|
||||
class AnselCodeTableParser {
|
||||
|
||||
private final List<CodeTable> codeTables;
|
||||
|
||||
private CodeTable codeTable;
|
||||
|
||||
private CharacterSet characterSet;
|
||||
|
||||
private Code code;
|
||||
|
||||
private StringBuilder content;
|
||||
|
||||
AnselCodeTableParser(InputStream inputStream) {
|
||||
List<CodeTable> codeTables;
|
||||
try {
|
||||
codeTables = createCodeTables(inputStream);
|
||||
} catch (XMLStreamException e) {
|
||||
codeTables = null;
|
||||
// ignore
|
||||
}
|
||||
this.codeTables = codeTables;
|
||||
}
|
||||
|
||||
public List<CodeTable> getCodeTables() {
|
||||
return codeTables;
|
||||
}
|
||||
|
||||
private List<CodeTable> createCodeTables(InputStream inputStream) throws XMLStreamException {
|
||||
List<CodeTable> codetables = new LinkedList<>();
|
||||
XMLInputFactory factory = XMLInputFactory.newInstance();
|
||||
XMLEventReader xmlReader = factory.createXMLEventReader(inputStream);
|
||||
while (xmlReader.hasNext()) {
|
||||
processEvent(codetables, xmlReader.peek());
|
||||
xmlReader.nextEvent();
|
||||
}
|
||||
return codetables;
|
||||
}
|
||||
|
||||
private void processEvent(List<CodeTable> codetables, XMLEvent event) {
|
||||
if (event.isStartDocument()) {
|
||||
this.code = new Code();
|
||||
this.content = new StringBuilder();
|
||||
}
|
||||
if (event.isStartElement()) {
|
||||
StartElement element = (StartElement) event;
|
||||
String name = element.getName().getLocalPart();
|
||||
switch (name) {
|
||||
case "codeTables": {
|
||||
// ignore
|
||||
break;
|
||||
}
|
||||
case "codeTable": {
|
||||
this.codeTable = new CodeTable();
|
||||
break;
|
||||
}
|
||||
case "characterSet": {
|
||||
this.characterSet = new CharacterSet();
|
||||
@SuppressWarnings("unchecked")
|
||||
Iterator<Attribute> it = element.getAttributes();
|
||||
while (it.hasNext()) {
|
||||
Attribute attr = it.next();
|
||||
QName attributeName = attr.getName();
|
||||
String attributeLocalName = attributeName.getLocalPart();
|
||||
String attributeValue = attr.getValue();
|
||||
if ("name".equals(attributeLocalName)) {
|
||||
characterSet.name = attributeValue;
|
||||
} else if ("isoCode".equals(attributeLocalName)) {
|
||||
characterSet.isoCode = attributeValue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "code": {
|
||||
code = new Code();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else if (event.isCharacters()) {
|
||||
Characters c = (Characters) event;
|
||||
if (!c.isIgnorableWhiteSpace()) {
|
||||
// character events may come more than once (e.g. because of XML entities like ")
|
||||
// concatenate with values that might exist
|
||||
content.append(c.getData());
|
||||
}
|
||||
} else if (event.isEndElement()) {
|
||||
EndElement element = (EndElement) event;
|
||||
String name = element.getName().getLocalPart();
|
||||
switch (name) {
|
||||
case "codeTable": {
|
||||
codetables.add(codeTable);
|
||||
codeTable = new CodeTable();
|
||||
break;
|
||||
}
|
||||
case "characterSet": {
|
||||
codeTable.add(characterSet);
|
||||
characterSet = new CharacterSet();
|
||||
break;
|
||||
}
|
||||
case "code": {
|
||||
characterSet.add(code);
|
||||
code = new Code();
|
||||
break;
|
||||
}
|
||||
case "marc": {
|
||||
String s = content.toString().trim();
|
||||
char[] ch = new char[s.length() / 2];
|
||||
for (int i = 0; i < s.length(); i += 2) {
|
||||
ch[i / 2] = (char) ((Character.digit(s.charAt(i), 16) << 4)
|
||||
+ Character.digit(s.charAt(i + 1), 16));
|
||||
}
|
||||
code.marc = new String(ch);
|
||||
break;
|
||||
}
|
||||
case "ucs": {
|
||||
String s = content.toString().trim();
|
||||
// two chars have no ucs equivalent...
|
||||
if (!s.isEmpty()) {
|
||||
code.ucs = (char) (Integer.parseInt(s, 16) & 0xFFFF);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "utf-8": {
|
||||
String s = content.toString().trim();
|
||||
char[] ch = new char[s.length() / 2];
|
||||
for (int i = 0; i < s.length(); i += 2) {
|
||||
ch[i / 2] = (char) ((Character.digit(s.charAt(i), 16) << 4)
|
||||
+ Character.digit(s.charAt(i + 1), 16));
|
||||
}
|
||||
code.utf8 = new String(ch);
|
||||
break;
|
||||
}
|
||||
case "name": {
|
||||
code.name = content.toString();
|
||||
break;
|
||||
}
|
||||
case "isCombining": {
|
||||
code.isCombining = "true".equals(content.toString());
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
content.setLength(0);
|
||||
}
|
||||
}
|
||||
|
||||
static class CodeTable {
|
||||
private final List<CharacterSet> characterSets = new LinkedList<>();
|
||||
|
||||
void add(CharacterSet characterSet) {
|
||||
characterSets.add(characterSet);
|
||||
}
|
||||
|
||||
List<CharacterSet> getCharacterSets() {
|
||||
return characterSets;
|
||||
}
|
||||
}
|
||||
|
||||
static class CharacterSet {
|
||||
String name;
|
||||
String isoCode;
|
||||
int length;
|
||||
Map<String, Code> marc = new HashMap<>();
|
||||
Map<Character, Code> unicode = new HashMap<>();
|
||||
|
||||
void add(Code code) {
|
||||
marc.putIfAbsent(code.marc, code);
|
||||
length = code.marc.length();
|
||||
unicode.putIfAbsent(code.ucs, code);
|
||||
}
|
||||
|
||||
String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
int getLength() {
|
||||
return length;
|
||||
}
|
||||
|
||||
String getIsoCode() {
|
||||
return isoCode;
|
||||
}
|
||||
|
||||
Map<String, Code> getMarc() {
|
||||
return marc;
|
||||
}
|
||||
|
||||
Map<Character, Code> getUnicode() {
|
||||
return unicode;
|
||||
}
|
||||
}
|
||||
|
||||
static class Code {
|
||||
// Universal Character Set (UCS, ISO-IEC 10646)/Unicode, always 16 bit
|
||||
char ucs;
|
||||
// MARC-8 standard (single char) or EACC 24-bit code (three chars)
|
||||
String marc;
|
||||
// UTF-8 code (in hex), 1-3 bytes
|
||||
String utf8;
|
||||
// name
|
||||
String name;
|
||||
boolean isCombining;
|
||||
|
||||
char getUcs() {
|
||||
return ucs;
|
||||
}
|
||||
|
||||
String getMarc() {
|
||||
return marc;
|
||||
}
|
||||
|
||||
String getUtf8() {
|
||||
return utf8;
|
||||
}
|
||||
|
||||
String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
boolean isCombining() {
|
||||
return isCombining;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "marc=" + marc + " isCombining=" + isCombining + " ucs=" + ucs;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
173
src/main/java/org/xbib/charset/BibliographicCharsetProvider.java
Normal file
173
src/main/java/org/xbib/charset/BibliographicCharsetProvider.java
Normal file
|
@ -0,0 +1,173 @@
|
|||
/*
|
||||
* Licensed to Jörg Prante and xbib under one or more contributor
|
||||
* license agreements. See the NOTICE.txt file distributed with this work
|
||||
* for additional information regarding copyright ownership.
|
||||
*
|
||||
* Copyright (C) 2016 Jörg Prante and xbib
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program; if not, see http://www.gnu.org/licenses
|
||||
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* The interactive user interfaces in modified source and object code
|
||||
* versions of this program must display Appropriate Legal Notices,
|
||||
* as required under Section 5 of the GNU Affero General Public License.
|
||||
*
|
||||
* In accordance with Section 7(b) of the GNU Affero General Public
|
||||
* License, these Appropriate Legal Notices must retain the display of the
|
||||
* "Powered by xbib" logo. If the display of the logo is not reasonably
|
||||
* feasible for technical reasons, the Appropriate Legal Notices must display
|
||||
* the words "Powered by xbib".
|
||||
*/
|
||||
package org.xbib.charset;
|
||||
|
||||
import java.lang.ref.SoftReference;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.spi.CharsetProvider;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
/**
|
||||
* Extra bibliographic character sets.
|
||||
*/
|
||||
public class BibliographicCharsetProvider extends CharsetProvider {
|
||||
|
||||
private static final Logger logger = Logger.getLogger(BibliographicCharsetProvider.class.getName());
|
||||
|
||||
/**
|
||||
* The reference to the character set instance.
|
||||
* If there are no remaining references to this instance,
|
||||
* the character set will be removed by the garbage collector.
|
||||
*/
|
||||
private static volatile SoftReference<BibliographicCharsetProvider> instance = null;
|
||||
private final Map<String, String> classMap;
|
||||
private final Map<String, String> aliasMap;
|
||||
private final Map<String, String[]> aliasNameMap;
|
||||
private final Map<String, SoftReference<Charset>> cache;
|
||||
private final String packagePrefix;
|
||||
|
||||
/**
|
||||
* Constructor must be public.
|
||||
*/
|
||||
public BibliographicCharsetProvider() {
|
||||
classMap = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
|
||||
aliasMap = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
|
||||
aliasNameMap = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
|
||||
cache = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
|
||||
packagePrefix = getClass().getPackage().getName();
|
||||
|
||||
charset("ANSEL", "AnselCharset",
|
||||
new String[]{"ANSI_Z39_47", "ANSI-Z39-47", "Z39_47", "Z39-47", "ansel", "usmarc", "usm94"});
|
||||
charset("ISO-5426", "ISO5426", new String[]{"x-mab", "x-MAB", "ISO-5426", "ISO_5426", "ISO_5426:1983", "MAB2"});
|
||||
charset("ISO-5428", "ISO5428", new String[]{"ISO_5428", "ISO-5428:1984", "iso-ir-55"});
|
||||
charset("MAB-Diskette", "MabDisketteCharset", new String[]{});
|
||||
charset("PICA", "Pica", new String[]{"Pica", "pica"});
|
||||
charset("x-PICA", "PicaCharset", new String[]{"x-pica"});
|
||||
charset("SIMPLE_ANSEL", "SimpleAnselCharset", new String[]{});
|
||||
instance = new SoftReference<>(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* List all aliases defined for a character set.
|
||||
*
|
||||
* @param s the name of the character set
|
||||
* @return an alias string array
|
||||
*/
|
||||
static String[] aliasesFor(String s) {
|
||||
SoftReference<BibliographicCharsetProvider> softreference = instance;
|
||||
BibliographicCharsetProvider charsets = null;
|
||||
if (softreference != null) {
|
||||
charsets = softreference.get();
|
||||
}
|
||||
if (charsets == null) {
|
||||
charsets = new BibliographicCharsetProvider();
|
||||
instance = new SoftReference<>(charsets);
|
||||
}
|
||||
return charsets.aliases(s);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Charset charsetForName(String s) {
|
||||
return lookup(canonicalize(s));
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Iterator<Charset> charsets() {
|
||||
return new Iterator<Charset>() {
|
||||
|
||||
Iterator<String> iterator = classMap.keySet().iterator();
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return iterator.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Charset next() {
|
||||
return lookup(iterator.next());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private void charset(String name, String className, String[] aliases) {
|
||||
classMap.putIfAbsent(name, className);
|
||||
for (String alias : aliases) {
|
||||
aliasMap.putIfAbsent(alias, name);
|
||||
}
|
||||
aliasNameMap.putIfAbsent(name, aliases);
|
||||
}
|
||||
|
||||
private String canonicalize(String charsetName) {
|
||||
String aliasCharsetName = aliasMap.get(charsetName);
|
||||
return aliasCharsetName != null ? aliasCharsetName : charsetName;
|
||||
}
|
||||
|
||||
private Charset lookup(String charsetName) {
|
||||
SoftReference<Charset> softreference = cache.get(charsetName);
|
||||
if (softreference != null) {
|
||||
Charset charset = softreference.get();
|
||||
if (charset != null) {
|
||||
return charset;
|
||||
}
|
||||
}
|
||||
String className = classMap.get(charsetName);
|
||||
if (className == null) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
Class<?> cl = Class.forName(packagePrefix + "." + className, true, getClass().getClassLoader());
|
||||
Charset charset = (Charset) cl.newInstance();
|
||||
cache.put(charsetName, new SoftReference<>(charset));
|
||||
return charset;
|
||||
} catch (ClassNotFoundException e1) {
|
||||
logger.log(Level.WARNING, "Class not found: " + packagePrefix + "." + className);
|
||||
} catch (IllegalAccessException e2) {
|
||||
logger.log(Level.WARNING, "Illegal access: " + packagePrefix + "." + className);
|
||||
} catch (InstantiationException e3) {
|
||||
logger.log(Level.WARNING, "Instantiation failed: " + packagePrefix + "." + className);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private String[] aliases(String s) {
|
||||
return (String[]) aliasNameMap.get(s);
|
||||
}
|
||||
}
|
52
src/main/java/org/xbib/charset/BibliographicCharsets.java
Normal file
52
src/main/java/org/xbib/charset/BibliographicCharsets.java
Normal file
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* Licensed to Jörg Prante and xbib under one or more contributor
|
||||
* license agreements. See the NOTICE.txt file distributed with this work
|
||||
* for additional information regarding copyright ownership.
|
||||
*
|
||||
* Copyright (C) 2016 Jörg Prante and xbib
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program; if not, see http://www.gnu.org/licenses
|
||||
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* The interactive user interfaces in modified source and object code
|
||||
* versions of this program must display Appropriate Legal Notices,
|
||||
* as required under Section 5 of the GNU Affero General Public License.
|
||||
*
|
||||
* In accordance with Section 7(b) of the GNU Affero General Public
|
||||
* License, these Appropriate Legal Notices must retain the display of the
|
||||
* "Powered by xbib" logo. If the display of the logo is not reasonably
|
||||
* feasible for technical reasons, the Appropriate Legal Notices must display
|
||||
* the words "Powered by xbib".
|
||||
*/
|
||||
package org.xbib.charset;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public final class BibliographicCharsets {
|
||||
|
||||
public static final Charset ANSEL = Charset.forName("ANSEL");
|
||||
|
||||
public static final Charset ISO5426 = Charset.forName("ISO-5426");
|
||||
|
||||
public static final Charset ISO5428 = Charset.forName("ISO-5428");
|
||||
|
||||
public static final Charset MAB = Charset.forName("x-MAB");
|
||||
|
||||
public static final Charset MAB_DISKETTE = Charset.forName("MAB-DISKETTE");
|
||||
|
||||
public static final Charset PICA = Charset.forName("Pica");
|
||||
}
|
222
src/main/java/org/xbib/charset/ByteCharset.java
Normal file
222
src/main/java/org/xbib/charset/ByteCharset.java
Normal file
|
@ -0,0 +1,222 @@
|
|||
/**
|
||||
* Licensed to Jörg Prante and xbib under one or more contributor
|
||||
* license agreements. See the NOTICE.txt file distributed with this work
|
||||
* for additional information regarding copyright ownership.
|
||||
*
|
||||
* Copyright (C) 2016 Jörg Prante and xbib
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program; if not, see http://www.gnu.org/licenses
|
||||
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* The interactive user interfaces in modified source and object code
|
||||
* versions of this program must display Appropriate Legal Notices,
|
||||
* as required under Section 5 of the GNU Affero General Public License.
|
||||
*
|
||||
* In accordance with Section 7(b) of the GNU Affero General Public
|
||||
* License, these Appropriate Legal Notices must retain the display of the
|
||||
* "Powered by xbib" logo. If the display of the logo is not reasonably
|
||||
* feasible for technical reasons, the Appropriate Legal Notices must display
|
||||
* the words "Powered by xbib".
|
||||
*
|
||||
*
|
||||
* Derived from
|
||||
*
|
||||
* ByteCharset.java -- Abstract class for generic 1-byte encodings.
|
||||
* Copyright (C) 2005 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GNU Classpath.
|
||||
*
|
||||
* GNU Classpath is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* GNU Classpath is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Classpath; see the file COPYING. If not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
* 02111-1307 USA.
|
||||
*
|
||||
* Linking this library statically or dynamically with other modules is
|
||||
* making a combined work based on this library. Thus, the terms and
|
||||
* conditions of the GNU General Public License cover the whole
|
||||
* combination.
|
||||
*
|
||||
* As a special exception, the copyright holders of this library give you
|
||||
* permission to link this library with independent modules to produce an
|
||||
* executable, regardless of the license terms of these independent
|
||||
* modules, and to copy and distribute the resulting executable under
|
||||
* terms of your choice, provided that you also meet, for each linked
|
||||
* independent module, the terms and conditions of the license of that
|
||||
* module. An independent module is a module which is not derived from
|
||||
* or based on this library. If you modify this library, you may extend
|
||||
* this exception to your version of the library, but you are not
|
||||
* obligated to do so. If you do not wish to do so, delete this
|
||||
* exception statement from your version.
|
||||
*//**
|
||||
*
|
||||
* Derived from
|
||||
*
|
||||
* ByteCharset.java -- Abstract class for generic 1-byte encodings.
|
||||
* Copyright (C) 2005 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GNU Classpath.
|
||||
*
|
||||
* GNU Classpath is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* GNU Classpath is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Classpath; see the file COPYING. If not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
* 02111-1307 USA.
|
||||
*
|
||||
* Linking this library statically or dynamically with other modules is
|
||||
* making a combined work based on this library. Thus, the terms and
|
||||
* conditions of the GNU General Public License cover the whole
|
||||
* combination.
|
||||
*
|
||||
* As a special exception, the copyright holders of this library give you
|
||||
* permission to link this library with independent modules to produce an
|
||||
* executable, regardless of the license terms of these independent
|
||||
* modules, and to copy and distribute the resulting executable under
|
||||
* terms of your choice, provided that you also meet, for each linked
|
||||
* independent module, the terms and conditions of the license of that
|
||||
* module. An independent module is a module which is not derived from
|
||||
* or based on this library. If you modify this library, you may extend
|
||||
* this exception to your version of the library, but you are not
|
||||
* obligated to do so. If you do not wish to do so, delete this
|
||||
* exception statement from your version.
|
||||
*/
|
||||
package org.xbib.charset;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
|
||||
/**
|
||||
* A generic encoding framework for single-byte encodings, utilizing a look-up
|
||||
* table. This replaces the gnu.java.io.EncoderEightBitLookup class, created by
|
||||
* Aron Renn.
|
||||
*/
|
||||
abstract class ByteCharset extends Charset {
|
||||
|
||||
/**
|
||||
* Char to signify the character in the table is undefined.
|
||||
*/
|
||||
private static final char NONE = (char) 0xFFFD;
|
||||
char[] lookupTable;
|
||||
|
||||
ByteCharset(String canonicalName, String[] aliases) {
|
||||
super(canonicalName, aliases);
|
||||
}
|
||||
|
||||
/**
|
||||
* Most western charsets include ASCII, but this should be overloaded for
|
||||
* others.
|
||||
*/
|
||||
public boolean contains(Charset cs) {
|
||||
return cs instanceof ASCII || cs.getClass() == getClass();
|
||||
}
|
||||
|
||||
private char[] getLookupTable() {
|
||||
return lookupTable;
|
||||
}
|
||||
|
||||
public CharsetDecoder newDecoder() {
|
||||
return new Decoder(this);
|
||||
}
|
||||
|
||||
public CharsetEncoder newEncoder() {
|
||||
return new Encoder(this);
|
||||
}
|
||||
|
||||
private static final class Decoder extends CharsetDecoder {
|
||||
|
||||
private char[] lookup;
|
||||
|
||||
Decoder(ByteCharset cs) {
|
||||
super(cs, 1.0f, 1.0f);
|
||||
lookup = cs.getLookupTable();
|
||||
}
|
||||
|
||||
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
|
||||
while (in.hasRemaining()) {
|
||||
byte b = in.get();
|
||||
char c;
|
||||
if (!out.hasRemaining()) {
|
||||
in.position(in.position() - 1);
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
c = lookup[b & 0xFF];
|
||||
out.put(c);
|
||||
}
|
||||
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class Encoder extends CharsetEncoder {
|
||||
|
||||
private byte[] lookup;
|
||||
|
||||
Encoder(ByteCharset cs) {
|
||||
super(cs, 1.0f, 1.0f);
|
||||
char[] lookuptable = cs.getLookupTable();
|
||||
int max = 0;
|
||||
for (char ch : lookuptable) {
|
||||
int c = (int) ch;
|
||||
max = c > max && c < NONE ? c : max;
|
||||
}
|
||||
lookup = new byte[max + 1];
|
||||
for (int i = 0; i < lookuptable.length; i++) {
|
||||
int c = (int) lookuptable[i];
|
||||
if (c != 0 && c < NONE) {
|
||||
lookup[c] = (byte) i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
|
||||
while (in.hasRemaining()) {
|
||||
int c = (int) in.get();
|
||||
if (!out.hasRemaining()) {
|
||||
in.position(in.position() - 1);
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
byte b = c < lookup.length ? lookup[c] : (byte) 0;
|
||||
if ((int) b != 0 || c == 0) {
|
||||
out.put(b);
|
||||
} else {
|
||||
in.position(in.position() - 1);
|
||||
return CoderResult.unmappableForLength(1);
|
||||
}
|
||||
}
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
}
|
||||
}
|
241
src/main/java/org/xbib/charset/ISO5426.java
Normal file
241
src/main/java/org/xbib/charset/ISO5426.java
Normal file
|
@ -0,0 +1,241 @@
|
|||
/*
|
||||
* Licensed to Jörg Prante and xbib under one or more contributor
|
||||
* license agreements. See the NOTICE.txt file distributed with this work
|
||||
* for additional information regarding copyright ownership.
|
||||
*
|
||||
* Copyright (C) 2012 Jörg Prante and xbib
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program; if not, see http://www.gnu.org/licenses
|
||||
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* The interactive user interfaces in modified source and object code
|
||||
* versions of this program must display Appropriate Legal Notices,
|
||||
* as required under Section 5 of the GNU Affero General Public License.
|
||||
*
|
||||
* In accordance with Section 7(b) of the GNU Affero General Public
|
||||
* License, these Appropriate Legal Notices must retain the display of the
|
||||
* "Powered by xbib" logo. If the display of the logo is not reasonably
|
||||
* feasible for technical reasons, the Appropriate Legal Notices must display
|
||||
* the words "Powered by xbib".
|
||||
*
|
||||
* Copyright (C) 2004 Jürgen Kett, Die Deutsche Bibliothek,
|
||||
* (http://www.ddb.de, mailto:kett@dbf.ddb.de)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 02111-1307, USA.
|
||||
*
|
||||
*/
|
||||
package org.xbib.charset;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* MAB-Character-Set-Implementation.
|
||||
* Some minor additions in blocks A-D
|
||||
*/
|
||||
public class ISO5426 extends Charset {
|
||||
|
||||
public static final char NICHTSORTIERBEGINNZEICHEN = '\u0098';
|
||||
public static final char NICHTSORTIERENDEZEICHEN = '\u009C';
|
||||
public static final char TEILFELDTRENNZEICHEN = '\u2021';
|
||||
|
||||
private static final char[] byteToCharTable = newMabCharsetMap();
|
||||
|
||||
private static final Map<Character, Byte> charToByteTable = newMabByteToCharMap();
|
||||
|
||||
private boolean isNFCOutput;
|
||||
|
||||
public ISO5426() {
|
||||
this(true);
|
||||
}
|
||||
|
||||
private ISO5426(boolean isNFCOutput) {
|
||||
super("ISO-5426", null);
|
||||
this.isNFCOutput = isNFCOutput;
|
||||
}
|
||||
|
||||
private static Map<Character, Byte> newMabByteToCharMap() {
|
||||
Map<Character, Byte> ret = new HashMap<>(byteToCharTable.length);
|
||||
for (int i = 0; i < byteToCharTable.length; i++) {
|
||||
if (byteToCharTable[i] != 0) {
|
||||
ret.put(byteToCharTable[i], (byte) i);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
private static char[] newMabCharsetMap() {
|
||||
char[] map = new char[256];
|
||||
|
||||
for (int i = 0; i < 128; i++) {
|
||||
map[i] = (char) i;
|
||||
}
|
||||
map[0x88] = ISO5426.NICHTSORTIERBEGINNZEICHEN;
|
||||
map[0x89] = ISO5426.NICHTSORTIERENDEZEICHEN;
|
||||
|
||||
// A-Block
|
||||
map[0xA1] = '\u00A1'; // INVERTED EXCLAMATION MARK
|
||||
map[0xA2] = '\u201E'; // Double Low-9 Quotation Mark
|
||||
map[0xA3] = '\u00A3'; // Pound Sign
|
||||
map[0xA4] = '\u0024'; // Dollar Sign
|
||||
map[0xA5] = '\u00A5'; // YEN SIGN
|
||||
map[0xA6] = '\u2020'; // Dagger
|
||||
map[0xA7] = '\u00A7'; // SECTION SIGN
|
||||
map[0xA8] = '\u2032'; // Prime
|
||||
map[0xA9] = '\u2018'; // Left Single Quotation Mark
|
||||
map[0xAA] = '\u201C'; // Left Double Quotation Mark
|
||||
map[0xAB] = '\u00AB'; // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (LEFT POINTING GUILLEMET)
|
||||
map[0xAC] = '\u266D'; // Music Flat Sign
|
||||
map[0xAD] = '\u00A9'; // Copyright Sign
|
||||
map[0xAE] = '\u2117'; // Sound Recording Copyright
|
||||
map[0xAF] = '\u00AE'; // Registered Sign
|
||||
|
||||
// B-Block
|
||||
map[0xB0] = '\u02BB'; // Modifier Letter Turned Comma
|
||||
map[0xB1] = '\u02BC'; // Modifier Letter Apostrophe
|
||||
map[0xB2] = '\u201A'; // Single Low-9 Quotation Mark
|
||||
map[0xB6] = ISO5426.TEILFELDTRENNZEICHEN;
|
||||
map[0xB7] = '\u00B7'; //
|
||||
map[0xB8] = '\u2033'; // Double Prime
|
||||
map[0xB9] = '\u2019'; // Right Single Quotation Mark
|
||||
map[0xBA] = '\u201D'; // Right Double Quotation Mark
|
||||
map[0xBB] = '\u00BB'; //
|
||||
map[0xBC] = '\u266F'; // Music Sharp Sign !!!!NACHFRAGEN
|
||||
map[0xBD] = '\u02B9'; // Modifier Letter Prime
|
||||
map[0xBE] = '\u02BA'; // Modifier Letter Double Prime
|
||||
map[0xBF] = '\u00BF'; //
|
||||
|
||||
// C-Block
|
||||
map[0xC0] = '\u0309'; // Combining Hook above
|
||||
map[0xC1] = '\u0300'; // Combining Grave Accent
|
||||
map[0xC2] = '\u0301'; // Combining Acute Accent
|
||||
map[0xC3] = '\u0302'; // Combining Circumflex Accent
|
||||
map[0xC4] = '\u0303'; // Combining Tilde
|
||||
map[0xC5] = '\u0304'; // Combining Macron
|
||||
map[0xC6] = '\u0306'; // Combining Breve
|
||||
map[0xC7] = '\u0307'; // Combining Dot Above
|
||||
map[0xC8] = '\u0308'; // Trema -> Combining Diaeresis
|
||||
map[0xC9] = '\u0308'; // Umlaut -> Combining Diaeresis
|
||||
map[0xCA] = '\u030A'; // Combining Ring Above
|
||||
map[0xCB] = '\u0315'; // Combining Comma Above Right
|
||||
map[0xCC] = '\u0312'; // Combining Turned Comma Above
|
||||
map[0xCD] = '\u030B'; // Combining Double Acute Accent
|
||||
map[0xCE] = '\u031B'; // Combining Horn
|
||||
map[0xCF] = '\u030C'; // Combining Caron
|
||||
|
||||
// D-Block
|
||||
map[0xD0] = '\u0327'; // Combining Cedilla
|
||||
map[0xD1] = '\u031C'; // Combining Left Half Ring Below
|
||||
map[0xD2] = '\u0326'; // Combining Comma Below
|
||||
map[0xD3] = '\u0328'; // Combining Ogonek
|
||||
map[0xD4] = '\u0325'; // Combining Ring Below
|
||||
map[0xD5] = '\u032E'; // Combining Breve Below
|
||||
map[0xD6] = '\u0323'; // Combining Dot Below
|
||||
map[0xD7] = '\u0324'; // Combining Diaeresis Below
|
||||
map[0xD8] = '\u0332'; // Combining Low Line
|
||||
map[0xD9] = '\u0333'; // Combining Double Low Line
|
||||
map[0xDA] = '\u0329'; // Combining Vertical Line Below
|
||||
map[0xDB] = '\u032D'; // Combining Circumflex Accent Below
|
||||
map[0xDD] = '\uFE20'; // Combining Ligature Left Half
|
||||
map[0xDE] = '\uFE21'; // Combining Ligature Right Half
|
||||
map[0xDF] = '\uFE23'; // Combining Double Tilde Right Half
|
||||
|
||||
// E-Block
|
||||
map[0xE1] = '\u00C6'; // Latin Capital Letter AE
|
||||
map[0xE2] = '\u0110'; // Latin Capital Letter D with Stroke
|
||||
map[0xE6] = '\u0132'; // Latin Capital Ligature IJ
|
||||
map[0xE8] = '\u0141'; // Latin Capital Letter L with Stroke
|
||||
map[0xE9] = '\u00D8'; // Latin Capital Letter O with Stroke
|
||||
map[0xEA] = '\u0152'; // Latin Capital Ligature OE
|
||||
map[0xEC] = '\u00DE'; // Latin Capital Letter Thorn
|
||||
|
||||
// F-Block
|
||||
map[0xF1] = '\u00E6'; // Latin Small Letter AE
|
||||
map[0xF2] = '\u0111'; // Latin Small Letter D with Stroke
|
||||
map[0xF3] = '\u00F0'; // Latin Small Letter ETH
|
||||
map[0xF5] = '\u0131'; // Latin Small Letter Dotless I
|
||||
map[0xF6] = '\u0133'; // Latin Small Ligature IJ
|
||||
map[0xF8] = '\u0142'; // Latin Small Letter L with Stroke
|
||||
map[0xF9] = '\u00F8'; // Latin Small Letter O with Stroke
|
||||
map[0xFA] = '\u0153'; // Latin Small Ligature OE
|
||||
map[0xFB] = '\u00DF'; // Latin Small Letter Sharp S
|
||||
map[0xFC] = '\u00FE'; // Latin Small Letter Thorn
|
||||
return map;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(Charset cs) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharsetDecoder newDecoder() {
|
||||
MabDecoder ret = new MabDecoder(this);
|
||||
ret.setComposeCharactersAfterConversion(this.isNFCOutput);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharsetEncoder newEncoder() {
|
||||
return new MabEncoder(this);
|
||||
}
|
||||
|
||||
private static class MabDecoder extends SingleByteDecoder {
|
||||
|
||||
MabDecoder(Charset cs) {
|
||||
super(cs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public char byteToChar(byte b) {
|
||||
return byteToCharTable[b & 0xFF];
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCombiningCharacter(byte b) {
|
||||
return (b & 0xFF) > 0xC0 && (b & 0xFF) < 0xDF;
|
||||
}
|
||||
}
|
||||
|
||||
private static class MabEncoder extends SingleByteEncoder {
|
||||
|
||||
MabEncoder(Charset cs) {
|
||||
super(cs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte charToByte(char c) {
|
||||
Byte b = charToByteTable.get(c);
|
||||
if (b == null) {
|
||||
return 0;
|
||||
}
|
||||
return b;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
390
src/main/java/org/xbib/charset/ISO5428.java
Normal file
390
src/main/java/org/xbib/charset/ISO5428.java
Normal file
|
@ -0,0 +1,390 @@
|
|||
/*
|
||||
* Licensed to Jörg Prante and xbib under one or more contributor
|
||||
* license agreements. See the NOTICE.txt file distributed with this work
|
||||
* for additional information regarding copyright ownership.
|
||||
*
|
||||
* Copyright (C) 2016 Jörg Prante and xbib
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program; if not, see http://www.gnu.org/licenses
|
||||
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* The interactive user interfaces in modified source and object code
|
||||
* versions of this program must display Appropriate Legal Notices,
|
||||
* as required under Section 5 of the GNU Affero General Public License.
|
||||
*
|
||||
* In accordance with Section 7(b) of the GNU Affero General Public
|
||||
* License, these Appropriate Legal Notices must retain the display of the
|
||||
* "Powered by xbib" logo. If the display of the logo is not reasonably
|
||||
* feasible for technical reasons, the Appropriate Legal Notices must display
|
||||
* the words "Powered by xbib".
|
||||
*/
|
||||
package org.xbib.charset;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class ISO5428 extends Charset {
|
||||
|
||||
public ISO5428() {
|
||||
super("ISO_5428", BibliographicCharsetProvider.aliasesFor("ISO_5428"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(Charset cs) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharsetDecoder newDecoder() {
|
||||
return new Decoder(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharsetEncoder newEncoder() {
|
||||
return null;
|
||||
}
|
||||
|
||||
private static class Decoder extends CharsetDecoder {
|
||||
|
||||
Decoder(Charset cs) {
|
||||
super(cs, 1.0f, 1.0f);
|
||||
}
|
||||
|
||||
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
|
||||
boolean tonos = false;
|
||||
boolean dialitika = false;
|
||||
while (in.hasRemaining()) {
|
||||
byte b = in.get();
|
||||
if (!out.hasRemaining()) {
|
||||
in.position(in.position() - 1);
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
if (b == (byte) 0xa2) {
|
||||
tonos = true;
|
||||
} else if (b == (byte) 0xa3) {
|
||||
dialitika = true;
|
||||
}
|
||||
int i = (int) b & 0xFF;
|
||||
char c;
|
||||
switch (i) {
|
||||
case 0xe1: {
|
||||
/* alpha small */
|
||||
c = tonos ? '\u03ac' : '\u03b1';
|
||||
break;
|
||||
}
|
||||
case 0xc1: {
|
||||
/* alpha capital */
|
||||
c = tonos ? '\u0386' : '\u0391';
|
||||
break;
|
||||
}
|
||||
case 0xe2: {
|
||||
/* Beta small */
|
||||
c = '\u03b2';
|
||||
break;
|
||||
}
|
||||
case 0xc2: {
|
||||
/* Beta capital */
|
||||
c = '\u0392';
|
||||
break;
|
||||
}
|
||||
case 0xe4: {
|
||||
/* Gamma small */
|
||||
c = '\u03b3';
|
||||
break;
|
||||
}
|
||||
case 0xc4: {
|
||||
/* Gamma capital */
|
||||
c = '\u0393';
|
||||
break;
|
||||
}
|
||||
case 0xe5: {
|
||||
/* Delta small */
|
||||
c = '\u03b4';
|
||||
break;
|
||||
}
|
||||
case 0xc5: {
|
||||
/* Delta capital */
|
||||
c = '\u0394';
|
||||
break;
|
||||
}
|
||||
case 0xe6: {
|
||||
/* epsilon small */
|
||||
c = tonos ? '\u03ad' : '\u03b5';
|
||||
break;
|
||||
}
|
||||
case 0xc6: {
|
||||
/* epsilon capital */
|
||||
c = tonos ? '\u0388' : '\u0395';
|
||||
break;
|
||||
}
|
||||
case 0xe9: {
|
||||
/* Zeta small */
|
||||
c = '\u03b6';
|
||||
break;
|
||||
}
|
||||
case 0xc9: {
|
||||
/* Zeta capital */
|
||||
c = '\u0396';
|
||||
break;
|
||||
}
|
||||
case 0xea: {
|
||||
/* Eta small */
|
||||
c = tonos ? '\u03ae' : '\u03b7';
|
||||
break;
|
||||
}
|
||||
case 0xca: {
|
||||
/* Eta capital */
|
||||
c = tonos ? '\u0389' : '\u0397';
|
||||
break;
|
||||
}
|
||||
case 0xeb: {
|
||||
/* Theta small */
|
||||
c = '\u03b8';
|
||||
break;
|
||||
}
|
||||
case 0xcb: {
|
||||
/* Theta capital */
|
||||
c = '\u0398';
|
||||
break;
|
||||
}
|
||||
case 0xec: {
|
||||
/* Iota small */
|
||||
if (tonos) {
|
||||
if (dialitika) {
|
||||
c = '\u0390';
|
||||
} else {
|
||||
c = '\u03af';
|
||||
}
|
||||
} else if (dialitika) {
|
||||
c = '\u03ca';
|
||||
} else {
|
||||
c = '\u03b9';
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 0xcc: {
|
||||
/* Iota capital */
|
||||
if (tonos) {
|
||||
c = '\u038a';
|
||||
} else if (dialitika) {
|
||||
c = '\u03aa';
|
||||
} else {
|
||||
c = '\u0399';
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 0xed: {
|
||||
/* Kappa small */
|
||||
c = '\u03ba';
|
||||
break;
|
||||
}
|
||||
case 0xcd: {
|
||||
/* Kappa capital */
|
||||
c = '\u039a';
|
||||
break;
|
||||
}
|
||||
case 0xee: {
|
||||
/* Lambda small */
|
||||
c = '\u03bb';
|
||||
break;
|
||||
}
|
||||
case 0xce: {
|
||||
/* Lambda capital */
|
||||
c = '\u039b';
|
||||
break;
|
||||
}
|
||||
case 0xef: {
|
||||
/* Mu small */
|
||||
c = '\u03bc';
|
||||
break;
|
||||
}
|
||||
case 0xcf:
|
||||
/* Mu capital */
|
||||
c = '\u039c';
|
||||
break;
|
||||
case 0xf0: {
|
||||
/* Nu small */
|
||||
c = '\u03bd';
|
||||
break;
|
||||
}
|
||||
case 0xd0: {
|
||||
/* Nu capital */
|
||||
c = '\u039d';
|
||||
break;
|
||||
}
|
||||
case 0xf1: {
|
||||
/* Xi small */
|
||||
c = '\u03be';
|
||||
break;
|
||||
}
|
||||
case 0xd1: {
|
||||
/* Xi capital */
|
||||
c = '\u039e';
|
||||
break;
|
||||
}
|
||||
case 0xf2: {
|
||||
/* Omicron small */
|
||||
if (tonos) {
|
||||
c = '\u03cc';
|
||||
} else {
|
||||
c = '\u03bf';
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 0xd2: {
|
||||
/* Omicron capital */
|
||||
if (tonos) {
|
||||
c = '\u038c';
|
||||
} else {
|
||||
c = '\u039f';
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 0xf3: {
|
||||
/* Pi small */
|
||||
c = '\u03c0';
|
||||
break;
|
||||
}
|
||||
case 0xd3: {
|
||||
/* Pi capital */
|
||||
c = '\u03a0';
|
||||
break;
|
||||
}
|
||||
case 0xf5: {
|
||||
/* Rho small */
|
||||
c = '\u03c1';
|
||||
break;
|
||||
}
|
||||
case 0xd5: {
|
||||
/* Rho capital */
|
||||
c = '\u03a1';
|
||||
break;
|
||||
}
|
||||
case 0xf7: {
|
||||
/* Sigma small (end of words) */
|
||||
c = '\u03c2';
|
||||
break;
|
||||
}
|
||||
case 0xf6: {
|
||||
/* Sigma small */
|
||||
c = '\u03c3';
|
||||
break;
|
||||
}
|
||||
case 0xd6: {
|
||||
/* Sigma capital */
|
||||
c = '\u03a3';
|
||||
break;
|
||||
}
|
||||
case 0xf8: {
|
||||
/* Tau small */
|
||||
c = '\u03c4';
|
||||
break;
|
||||
}
|
||||
case 0xd8: {
|
||||
/* Tau capital */
|
||||
c = '\u03a4';
|
||||
break;
|
||||
}
|
||||
case 0xf9: {
|
||||
/* Upsilon small */
|
||||
if (tonos) {
|
||||
if (dialitika) {
|
||||
c = '\u03b0';
|
||||
} else {
|
||||
c = '\u03cd';
|
||||
}
|
||||
} else if (dialitika) {
|
||||
c = '\u03cb';
|
||||
} else {
|
||||
c = '\u03c5';
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 0xd9: {
|
||||
/* Upsilon capital */
|
||||
if (tonos) {
|
||||
c = '\u038e';
|
||||
} else if (dialitika) {
|
||||
c = '\u03ab';
|
||||
} else {
|
||||
c = '\u03a5';
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 0xfa: {
|
||||
/* Phi small */
|
||||
c = '\u03c6';
|
||||
break;
|
||||
}
|
||||
case 0xda: {
|
||||
/* Phi capital */
|
||||
c = '\u03a6';
|
||||
break;
|
||||
}
|
||||
case 0xfb: {
|
||||
/* Chi small */
|
||||
c = '\u03c7';
|
||||
break;
|
||||
}
|
||||
case 0xdb: {
|
||||
/* Chi capital */
|
||||
c = '\u03a7';
|
||||
break;
|
||||
}
|
||||
case 0xfc: {
|
||||
/* Psi small */
|
||||
c = '\u03c8';
|
||||
break;
|
||||
}
|
||||
case 0xdc: {
|
||||
/* Psi capital */
|
||||
c = '\u03a8';
|
||||
break;
|
||||
}
|
||||
case 0xfd: {
|
||||
/* Omega small */
|
||||
if (tonos) {
|
||||
c = '\u03ce';
|
||||
} else {
|
||||
c = '\u03c9';
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 0xdd: {
|
||||
/* Omega capital */
|
||||
if (tonos) {
|
||||
c = '\u038f';
|
||||
} else {
|
||||
c = '\u03a9';
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
c = (char) b;
|
||||
}
|
||||
}
|
||||
out.put(c);
|
||||
}
|
||||
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
}
|
||||
}
|
89
src/main/java/org/xbib/charset/MabDisketteCharset.java
Normal file
89
src/main/java/org/xbib/charset/MabDisketteCharset.java
Normal file
|
@ -0,0 +1,89 @@
|
|||
/*
|
||||
* Licensed to Jörg Prante and xbib under one or more contributor
|
||||
* license agreements. See the NOTICE.txt file distributed with this work
|
||||
* for additional information regarding copyright ownership.
|
||||
*
|
||||
* Copyright (C) 2016 Jörg Prante and xbib
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program; if not, see http://www.gnu.org/licenses
|
||||
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* The interactive user interfaces in modified source and object code
|
||||
* versions of this program must display Appropriate Legal Notices,
|
||||
* as required under Section 5 of the GNU Affero General Public License.
|
||||
*
|
||||
* In accordance with Section 7(b) of the GNU Affero General Public
|
||||
* License, these Appropriate Legal Notices must retain the display of the
|
||||
* "Powered by xbib" logo. If the display of the logo is not reasonably
|
||||
* feasible for technical reasons, the Appropriate Legal Notices must display
|
||||
* the words "Powered by xbib".
|
||||
*/
|
||||
package org.xbib.charset;
|
||||
|
||||
/**
|
||||
* Implementierung des Zeichensatzes MAB-Diskette. Dieser ist bis auf wenige
|
||||
* Ausnahmen mit Cp850 identisch.
|
||||
*/
|
||||
public class MabDisketteCharset extends ByteCharset {
|
||||
|
||||
/* Dekodierung:
|
||||
* Abweichungen zu CP850: Nichtsortierzeichen und Teilfeldz. müssen
|
||||
* erhalten bleiben. Nichtsortierz.: 00aa -> 00aa Teilfeldtrennz.: 00ce ->
|
||||
* 2021
|
||||
*/
|
||||
/* Kodierung:
|
||||
* Abweichungen zu CP850: Nichtsortierzeichen und Teilfeldz. müssen
|
||||
* erhalten bleiben. Nichtsortierz.: 00aa -> 00aa Teilfeldtrennz.: 2021 ->
|
||||
* 00ce, 00b6 -> 00ce
|
||||
*/
|
||||
private static final char[] lookup = {
|
||||
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
|
||||
0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
|
||||
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
|
||||
0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
|
||||
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
|
||||
0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
|
||||
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
|
||||
0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
|
||||
0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
|
||||
0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
|
||||
0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
|
||||
0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
|
||||
0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
|
||||
0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
|
||||
0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
|
||||
0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
|
||||
0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
|
||||
0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
|
||||
0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
|
||||
0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192,
|
||||
0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
|
||||
0x00BF, 0x00AE, 0x00AA, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
|
||||
0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0,
|
||||
0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510,
|
||||
0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3,
|
||||
0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x2021, 0x00A4,
|
||||
0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE,
|
||||
0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580,
|
||||
0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE,
|
||||
0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4,
|
||||
0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8,
|
||||
0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0
|
||||
};
|
||||
|
||||
public MabDisketteCharset() {
|
||||
super("x-MAB-Diskette", BibliographicCharsetProvider.aliasesFor("x-MAB-Diskette"));
|
||||
lookupTable = lookup;
|
||||
}
|
||||
}
|
228
src/main/java/org/xbib/charset/Pica.java
Normal file
228
src/main/java/org/xbib/charset/Pica.java
Normal file
|
@ -0,0 +1,228 @@
|
|||
/*
|
||||
* Licensed to Jörg Prante and xbib under one or more contributor
|
||||
* license agreements. See the NOTICE.txt file distributed with this work
|
||||
* for additional information regarding copyright ownership.
|
||||
*
|
||||
* Copyright (C) 2016 Jörg Prante and xbib
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program; if not, see http://www.gnu.org/licenses
|
||||
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* The interactive user interfaces in modified source and object code
|
||||
* versions of this program must display Appropriate Legal Notices,
|
||||
* as required under Section 5 of the GNU Affero General Public License.
|
||||
*
|
||||
* In accordance with Section 7(b) of the GNU Affero General Public
|
||||
* License, these Appropriate Legal Notices must retain the display of the
|
||||
* "Powered by xbib" logo. If the display of the logo is not reasonably
|
||||
* feasible for technical reasons, the Appropriate Legal Notices must display
|
||||
* the words "Powered by xbib".
|
||||
*/
|
||||
package org.xbib.charset;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* PICA character set implementation.
|
||||
*
|
||||
* This character set is a modified version of the 'InterMARC' character set
|
||||
* and contains 256 tokens.
|
||||
*
|
||||
* A description can be found at
|
||||
* <a href="http://www.pica.nl/ne/docu/dn010/html/t07.shtml">the Pica website</a>.
|
||||
*/
|
||||
public class Pica extends Charset {
|
||||
|
||||
private static final Map<Character, Character> encodeMap = new HashMap<>();
|
||||
private static final Map<Character, Character> decodeMap = new HashMap<>();
|
||||
|
||||
/*
|
||||
* Pica character mapping for index subset \u00a0..\u00ff.
|
||||
* Pica is equal to US-ASCII but not ISO-8859-1.
|
||||
* These are the definitions for Pica characters
|
||||
* which are different from ISO-8859-1.
|
||||
*/
|
||||
static {
|
||||
Pica.charTable(encodeMap, decodeMap, '\u00a0', '\u00ff',
|
||||
new char[]{
|
||||
'\u00a0', '\u0141', '\u00d8', '\u0110', '\u00de', '\u00c6',
|
||||
'\u0152', '\u02b9', '\u00b7', '\u266d', '\u00ae', '\u00b1',
|
||||
'\u01a0', '\u01af', '\u02be', '\u00c5', '\u02bf', '\u0142',
|
||||
'\u00f8', '\u0111', '\u00fe', '\u00e6', '\u0153', '\u02ba',
|
||||
'\u0131', '\u00a3', '\u00f0', '\u03b1', '\u01a1', '\u01b0',
|
||||
'\u00df', '\u00e5', '\u0132', '\u00c4', '\u00d6', '\u00dc',
|
||||
'\u0186', '\u018e', '\u2260', '\u2192', '\u2264', '\u221e',
|
||||
'\u222b', '\u00d7', '\u00a7', '\u22a1', '\u21d4', '\u2265',
|
||||
'\u0133', '\u00e4', '\u00f6', '\u00fc', '\u0254', '\u0258',
|
||||
'\u00bf', '\u00a1', '\u03b2', '\u003f', '\u03b3', '\u03c0',
|
||||
'\u003f', '\u003f', '\u003f', '\u003f', '\u0341', '\u0300',
|
||||
'\u0301', '\u0302', '\u0303', '\u0304', '\u0306', '\u0307',
|
||||
'\u0308', '\u030c', '\u030a', '\ufe20', '\ufe21', '\u0315',
|
||||
'\u030b', '\u0310', '\u0327', '\u0000', '\u0323', '\u0324',
|
||||
'\u0325', '\u0333', '\u0332', '\u003f', '\u031c', '\u032e',
|
||||
'\ufe23', '\ufe22', '\u003f', '\u0000', '\u0313', '\u003f'
|
||||
});
|
||||
}
|
||||
|
||||
// Handle to the real charset we'll use for transcoding between
|
||||
// characters and bytes. Doing this allows applying the Pica
|
||||
// charset to multi-byte charset encodings like UTF-8.
|
||||
private Charset encodeCharset;
|
||||
|
||||
/**
|
||||
* Constructor for the Pica charset. Call the superclass
|
||||
* constructor to pass along the name(s) we'll be known by.
|
||||
* Then save a reference to the delegate Charset.
|
||||
*/
|
||||
public Pica() {
|
||||
super("PICA", BibliographicCharsetProvider.aliasesFor("PICA"));
|
||||
encodeCharset = StandardCharsets.ISO_8859_1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill the conversion tables.
|
||||
*/
|
||||
private static void charTable(Map<Character, Character> encodeMap, Map<Character, Character> decodeMap, char from, char to,
|
||||
char[] code) {
|
||||
int i = 0;
|
||||
|
||||
for (char c = from; c <= to; c++) {
|
||||
if (code[i] != '\u0000') {
|
||||
encodeMap.put(code[i], c);
|
||||
decodeMap.put(c, code[i]);
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This method must be implemented by concrete Charsets. We allow
|
||||
* subclasses of the Pica charset.
|
||||
*/
|
||||
public boolean contains(Charset charset) {
|
||||
return charset instanceof Pica;
|
||||
}
|
||||
|
||||
/**
|
||||
* Called by users of this Charset to obtain an encoder.
|
||||
* This implementation instantiates an instance of a private class
|
||||
* (defined below) and passes it an encoder from the base Charset.
|
||||
*/
|
||||
public CharsetEncoder newEncoder() {
|
||||
return new PicaEncoder(this, encodeCharset.newEncoder());
|
||||
}
|
||||
|
||||
/**
|
||||
* Called by users of this Charset to obtain a decoder.
|
||||
* This implementation instantiates an instance of a private class
|
||||
* (defined below) and passes it a decoder from the base Charset.
|
||||
*/
|
||||
public CharsetDecoder newDecoder() {
|
||||
return new PicaDecoder(this, encodeCharset.newDecoder());
|
||||
}
|
||||
|
||||
private static class PicaEncoder extends CharsetEncoder {
|
||||
|
||||
private CharsetEncoder baseEncoder;
|
||||
|
||||
/**
|
||||
* Constructor, call the superclass constructor with the
|
||||
* Charset object and the encodings sizes from the
|
||||
* delegate encoder.
|
||||
*/
|
||||
PicaEncoder(Charset cs, CharsetEncoder baseEncoder) {
|
||||
super(cs, baseEncoder.averageBytesPerChar(),
|
||||
baseEncoder.maxBytesPerChar());
|
||||
this.baseEncoder = baseEncoder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implementation of the encoding loop. First, we apply
|
||||
* the Pica charset mapping to the CharBuffer, then
|
||||
* reset the encoder for the base Charset and call it's
|
||||
* encode() method to do the actual encoding. The CharBuffer
|
||||
* passed in may be read-only or re-used by the caller for
|
||||
* other purposes so we duplicate it and apply the Pica
|
||||
* encoding to the copy. We do want to advance the position
|
||||
* of the input buffer to reflect the chars consumed.
|
||||
*/
|
||||
protected CoderResult encodeLoop(CharBuffer cb, ByteBuffer bb) {
|
||||
CharBuffer tmpcb = CharBuffer.allocate(cb.remaining());
|
||||
while (cb.hasRemaining()) {
|
||||
tmpcb.put(cb.get());
|
||||
}
|
||||
tmpcb.rewind();
|
||||
for (int pos = tmpcb.position(); pos < tmpcb.limit(); pos++) {
|
||||
char c = tmpcb.get(pos);
|
||||
Character mapChar = encodeMap.get(c);
|
||||
if (mapChar != null) {
|
||||
tmpcb.put(pos, mapChar);
|
||||
}
|
||||
}
|
||||
baseEncoder.reset();
|
||||
CoderResult cr = baseEncoder.encode(tmpcb, bb, true);
|
||||
// If error or output overflow, we need to adjust
|
||||
// the position of the input buffer to match what
|
||||
// was really consumed from the temp buffer. If
|
||||
// underflow (all input consumed) this is a no-op.
|
||||
cb.position(cb.position() - tmpcb.remaining());
|
||||
return cr;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The decoder implementation for the Pica Charset.
|
||||
*/
|
||||
private static class PicaDecoder extends CharsetDecoder {
|
||||
|
||||
/**
|
||||
* Constructor, call the superclass constructor with the
|
||||
* Charset object and pass alon the chars/byte values
|
||||
* from the delegate decoder.
|
||||
*/
|
||||
PicaDecoder(Charset cs, CharsetDecoder baseDecoder) {
|
||||
// base decoder only needed for size hints
|
||||
super(cs, baseDecoder.averageCharsPerByte(),
|
||||
baseDecoder.maxCharsPerByte());
|
||||
}
|
||||
|
||||
/**
|
||||
* Implementation of the decoding loop.
|
||||
*/
|
||||
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
|
||||
while (in.hasRemaining()) {
|
||||
byte b = in.get();
|
||||
|
||||
if (!out.hasRemaining()) {
|
||||
in.position(in.position() - 1);
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
char oldChar = (char) (b & 0xFF);
|
||||
Character mapChar = decodeMap.get(oldChar);
|
||||
out.put(mapChar != null ? mapChar : oldChar);
|
||||
}
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
315
src/main/java/org/xbib/charset/PicaCharset.java
Normal file
315
src/main/java/org/xbib/charset/PicaCharset.java
Normal file
|
@ -0,0 +1,315 @@
|
|||
/*
|
||||
* Licensed to Jörg Prante and xbib under one or more contributor
|
||||
* license agreements. See the NOTICE.txt file distributed with this work
|
||||
* for additional information regarding copyright ownership.
|
||||
*
|
||||
* Copyright (C) 2016 Jörg Prante and xbib
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program; if not, see http://www.gnu.org/licenses
|
||||
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* The interactive user interfaces in modified source and object code
|
||||
* versions of this program must display Appropriate Legal Notices,
|
||||
* as required under Section 5 of the GNU Affero General Public License.
|
||||
*
|
||||
* In accordance with Section 7(b) of the GNU Affero General Public
|
||||
* License, these Appropriate Legal Notices must retain the display of the
|
||||
* "Powered by xbib" logo. If the display of the logo is not reasonably
|
||||
* feasible for technical reasons, the Appropriate Legal Notices must display
|
||||
* the words "Powered by xbib".
|
||||
*/
|
||||
package org.xbib.charset;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* A Charset for the OCLC|PICA-character-encoding (x-PICA). It decodes
|
||||
* x-PICA to Unicode and encodes Unicode to x-PICA.
|
||||
*/
|
||||
public class PicaCharset extends Charset {
|
||||
|
||||
private static final char[] BYTE_TO_CHAR_MAP = newPicaToUnicodeMap();
|
||||
|
||||
private static final Map<Character, Byte> CHAR_TO_BYTE_MAP = newCharToByteMap();
|
||||
|
||||
private boolean isNFCOutput;
|
||||
|
||||
public PicaCharset() {
|
||||
this(true);
|
||||
}
|
||||
|
||||
private PicaCharset(boolean isNFCOuput) {
|
||||
super("x-PICA", null);
|
||||
this.isNFCOutput = isNFCOuput;
|
||||
}
|
||||
|
||||
private static char[] newPicaToUnicodeMap() {
|
||||
char[] map = new char[256];
|
||||
for (int i = 0; i < 128; i++) {
|
||||
map[i] = (char) i;
|
||||
}
|
||||
/*
|
||||
* DNB-internal definitions, needed for conversion from pica+ to mab2
|
||||
*/
|
||||
map[0x80] = ISO5426.TEILFELDTRENNZEICHEN;
|
||||
map[0x81] = ISO5426.NICHTSORTIERBEGINNZEICHEN;
|
||||
map[0x82] = ISO5426.NICHTSORTIERENDEZEICHEN;
|
||||
map[0x83] = '|'; // Füllzeichen
|
||||
map[0x84] = 'u'; // Zeichencode
|
||||
map[0x85] = 'z'; // Zeichenvorrat
|
||||
|
||||
/* L with stroke */
|
||||
map[0xA1] = '\u0141';
|
||||
/* O with stroke */
|
||||
map[0xA2] = '\u00D8';
|
||||
/* D with stroke */
|
||||
map[0xA3] = '\u0110';
|
||||
/* Capital thorn */
|
||||
map[0xA4] = '\u00DE';
|
||||
/* Capital ligature AE */
|
||||
map[0xA5] = '\u00C6';
|
||||
/* Capital ligature OE */
|
||||
map[0xA6] = '\u0152';
|
||||
/* Modifier letter prime */
|
||||
map[0xA7] = '\u02B9';
|
||||
/* Middle dot */
|
||||
map[0xA8] = '\u00B7';
|
||||
/* MUSIC FLAT SIGN */
|
||||
map[0xA9] = '\u266D';
|
||||
/* Registered sign */
|
||||
map[0xAA] = '\u00AE';
|
||||
/* Plus-minus sign */
|
||||
map[0xAB] = '\u00B1';
|
||||
/* Capital letter O with horn */
|
||||
map[0xAC] = '\u01A0';
|
||||
/* Capital letter U with horn */
|
||||
map[0xAD] = '\u01AF';
|
||||
/* Modifier letter apostrophe */
|
||||
map[0xAE] = '\u02BC';
|
||||
/* LATIN CAPITAL LETTER A WITH RING ABOVE */
|
||||
map[0xAF] = '\u00C5';
|
||||
/* Modifier letter turned comma */
|
||||
map[0xB0] = '\u02BB';
|
||||
/* Latin small letter l with stroke */
|
||||
map[0xB1] = '\u0142';
|
||||
/* Latin small letter o with stroke */
|
||||
map[0xB2] = '\u00F8';
|
||||
/* Latin small letter d with stroke */
|
||||
map[0xB3] = '\u0111';
|
||||
/* Latin small letter thorn */
|
||||
map[0xB4] = '\u00FE';
|
||||
/* Latin small ligature ae */
|
||||
map[0xB5] = '\u00E6';
|
||||
/* Latin small ligature oe */
|
||||
map[0xB6] = '\u0153';
|
||||
/* modifier letter double prime */
|
||||
map[0xB7] = '\u02BA';
|
||||
/* latin small letter dotless i */
|
||||
map[0xB8] = '\u0131';
|
||||
/* pound sign */
|
||||
map[0xB9] = '\u00A3';
|
||||
/* latin small letter eth */
|
||||
map[0xBA] = '\u00F0';
|
||||
/* greek small letter alpha */
|
||||
map[0xBB] = '\u03B1';
|
||||
/* latin small letter o with horn */
|
||||
map[0xBC] = '\u01A1';
|
||||
/* latin small letter u with horn */
|
||||
map[0xBD] = '\u01B0';
|
||||
/* latin small letter sharp s */
|
||||
map[0xBE] = '\u00DF';
|
||||
/* LATIN SMALL LETTER A WITH RING ABOVE */
|
||||
map[0xBF] = '\u00E5';
|
||||
/* Latin capital ligature IJ */
|
||||
map[0xC0] = '\u0132';
|
||||
/* Ä */
|
||||
map[0xC1] = '\u00C4';
|
||||
/* Ö */
|
||||
map[0xC2] = '\u00D6';
|
||||
/* Ü */
|
||||
map[0xC3] = '\u00DC';
|
||||
/* LATIN CAPITAL LETTER OPEN O */
|
||||
map[0xC4] = '\u0186';
|
||||
/* latin capital letter reversed E */
|
||||
map[0xC5] = '\u018E';
|
||||
/* NOT EQUAL TO */
|
||||
map[0xC6] = '\u2260';
|
||||
/* RIGHTWARDS ARROW */
|
||||
map[0xC7] = '\u2192';
|
||||
/* LESS-THAN OR EQUAL TO */
|
||||
map[0xC8] = '\u2264';
|
||||
/* INFINITY */
|
||||
map[0xC9] = '\u221E';
|
||||
/* INTEGRAL */
|
||||
map[0xCA] = '\u222B';
|
||||
/* Multiplication sign */
|
||||
map[0xCB] = '\u00D7';
|
||||
/* Section sign */
|
||||
map[0xCC] = '\u00A7';
|
||||
/* SQUARE ROOT */
|
||||
map[0xCD] = '\u221A';
|
||||
/* GREATER-THAN OR LESS-THAN */
|
||||
map[0xCE] = '\u2277';
|
||||
/* GREATER-THAN OR EQUAL TO */
|
||||
map[0xCF] = '\u2265';
|
||||
/* Latin small ligature ij */
|
||||
map[0xD0] = '\u0133';
|
||||
/* ä */
|
||||
map[0xD1] = '\u00E4';
|
||||
/* ö */
|
||||
map[0xD2] = '\u00F6';
|
||||
/* ü */
|
||||
map[0xD3] = '\u00FC';
|
||||
/* LATIN SMALL LETTER OPEN O */
|
||||
map[0xD4] = '\u0254';
|
||||
/* Latin small letter reversed e */
|
||||
map[0xD5] = '\u01DD';
|
||||
/* inverted question mark */
|
||||
map[0xD6] = '\u00BF';
|
||||
/* inverted exclamation mark */
|
||||
map[0xD7] = '\u00A1';
|
||||
/* Greek small letter beta */
|
||||
map[0xD8] = '\u03B2';
|
||||
/* Greek small letter gamma */
|
||||
map[0xDA] = '\u03B3';
|
||||
/* Greek capital letter pi */
|
||||
map[0xDB] = '\u03C0';
|
||||
/* Combining hook above */
|
||||
map[0xE0] = '\u0309';
|
||||
/* COMBINING GRAVE ACCENT */
|
||||
map[0xE1] = '\u0300';
|
||||
/* COMBINING ACUTE ACCENT */
|
||||
map[0xE2] = '\u0301';
|
||||
/* COMBINING CIRCUMFLEX ACCENT */
|
||||
map[0xE3] = '\u0302';
|
||||
/* COMBINING TILDE */
|
||||
map[0xE4] = '\u0303';
|
||||
/* Macron */
|
||||
map[0xE5] = '\u0304';
|
||||
/* Combining breve */
|
||||
map[0xE6] = '\u0306';
|
||||
/* Combining dot above */
|
||||
map[0xE7] = '\u0307';
|
||||
/* COMBINING DIAERESIS */
|
||||
map[0xE8] = '\u0308';
|
||||
/* Combining caron */
|
||||
map[0xE9] = '\u030C';
|
||||
/* Combining ring above */
|
||||
map[0xEA] = '\u030A';
|
||||
/* COMBINING LIGATURE LEFT HALF */
|
||||
map[0xEB] = '\uFE20';
|
||||
/* COMBINING LIGATURE RIGHT HALF */
|
||||
map[0xEC] = '\uFE21';
|
||||
/* combining comma above */
|
||||
map[0xED] = '\u0313';
|
||||
/* combining double acute accent */
|
||||
map[0xEE] = '\u030B';
|
||||
/* combining candrabindu */
|
||||
map[0xEF] = '\u0310';
|
||||
/* Combining cedilla */
|
||||
map[0xF0] = '\u0327';
|
||||
/* Combining dot below */
|
||||
map[0xF2] = '\u0323';
|
||||
/* Combining diaeresis below */
|
||||
map[0xF3] = '\u0324';
|
||||
/* Combining ring below */
|
||||
map[0xF4] = '\u0325';
|
||||
/* Combining double low line */
|
||||
map[0xF5] = '\u0333';
|
||||
/* Combining macron below */
|
||||
map[0xF6] = '\u0331';
|
||||
/* Combining ogonek */
|
||||
map[0xF8] = '\u0328';
|
||||
/* Combining breve below */
|
||||
map[0xF9] = '\u032E';
|
||||
/* Combining DOUBLE TILDE RIGHT HALF */
|
||||
map[0xFA] = '\uFE23';
|
||||
/* Combining DOUBLE TILDE LEFT HALF */
|
||||
map[0xFB] = '\uFE22';
|
||||
/* Combining comma above right */
|
||||
map[0xFE] = '\u0315';
|
||||
return map;
|
||||
}
|
||||
|
||||
private static Map<Character, Byte> newCharToByteMap() {
|
||||
char[] byteToCharMap = BYTE_TO_CHAR_MAP;
|
||||
byteToCharMap[0x80] = 0;
|
||||
byteToCharMap[0x81] = 0;
|
||||
byteToCharMap[0x82] = 0;
|
||||
byteToCharMap[0x83] = 0;
|
||||
byteToCharMap[0x84] = 0;
|
||||
byteToCharMap[0x85] = 0;
|
||||
Map<Character, Byte> ret = new HashMap<>(byteToCharMap.length);
|
||||
for (int i = 0; i < byteToCharMap.length; i++) {
|
||||
if (byteToCharMap[i] != 0) {
|
||||
ret.put(byteToCharMap[i], (byte) i);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(Charset cs) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharsetDecoder newDecoder() {
|
||||
PicaDecoder ret = new PicaDecoder(this);
|
||||
ret.setComposeCharactersAfterConversion(isNFCOutput);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharsetEncoder newEncoder() {
|
||||
return new PicaEncoder(this);
|
||||
}
|
||||
|
||||
private static class PicaDecoder extends SingleByteDecoder {
|
||||
|
||||
PicaDecoder(Charset cs) {
|
||||
super(cs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public char byteToChar(byte b) {
|
||||
return BYTE_TO_CHAR_MAP[b & 0xFF];
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCombiningCharacter(byte b) {
|
||||
return (b & 0xFF) >= 0xE0 && (b & 0xFF) <= 0xFE;
|
||||
}
|
||||
}
|
||||
|
||||
private static class PicaEncoder extends SingleByteEncoder {
|
||||
|
||||
PicaEncoder(Charset cs) {
|
||||
super(cs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte charToByte(char c) {
|
||||
Byte b = CHAR_TO_BYTE_MAP.get(c);
|
||||
if (b == null) {
|
||||
return 0;
|
||||
}
|
||||
return b;
|
||||
}
|
||||
}
|
||||
}
|
264
src/main/java/org/xbib/charset/SimpleAnselCharset.java
Normal file
264
src/main/java/org/xbib/charset/SimpleAnselCharset.java
Normal file
|
@ -0,0 +1,264 @@
|
|||
/*
|
||||
* Licensed to Jörg Prante and xbib under one or more contributor
|
||||
* license agreements. See the NOTICE.txt file distributed with this work
|
||||
* for additional information regarding copyright ownership.
|
||||
*
|
||||
* Copyright (C) 2016 Jörg Prante and xbib
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program; if not, see http://www.gnu.org/licenses
|
||||
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* The interactive user interfaces in modified source and object code
|
||||
* versions of this program must display Appropriate Legal Notices,
|
||||
* as required under Section 5 of the GNU Affero General Public License.
|
||||
*
|
||||
* In accordance with Section 7(b) of the GNU Affero General Public
|
||||
* License, these Appropriate Legal Notices must retain the display of the
|
||||
* "Powered by xbib" logo. If the display of the logo is not reasonably
|
||||
* feasible for technical reasons, the Appropriate Legal Notices must display
|
||||
* the words "Powered by xbib".
|
||||
*/
|
||||
package org.xbib.charset;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
/**
|
||||
* This is a simplified version of "ANSEL charset" at http://anselcharset.sourceforge.net/
|
||||
* by Piotr Andzel.
|
||||
* Original code licensed under LGPL http://www.gnu.org/licenses/lgpl.html
|
||||
*/
|
||||
public class SimpleAnselCharset extends Charset {
|
||||
|
||||
private final Map<Character, byte[]> mapping;
|
||||
private final Map<Byte, ReverseMappingEntity> reverseMapping;
|
||||
|
||||
public SimpleAnselCharset() {
|
||||
super("SIMPLE_ANSEL", BibliographicCharsetProvider.aliasesFor("SIMPLE_ANSEL"));
|
||||
mapping = createMapping(getClass().getResourceAsStream("ansel-mapping.txt"));
|
||||
reverseMapping = createReverseMapping(mapping);
|
||||
}
|
||||
|
||||
private static Map<Character, byte[]> createMapping(InputStream mappingStream) {
|
||||
Map<Character, byte[]> mapping = new HashMap<>();
|
||||
try (BufferedReader reader = new BufferedReader(new InputStreamReader(mappingStream, StandardCharsets.UTF_8))) {
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
int i = line.indexOf(";");
|
||||
if (i < 0) {
|
||||
i = line.indexOf("#");
|
||||
}
|
||||
if (i >= 0) {
|
||||
line = line.substring(0, i);
|
||||
}
|
||||
String[] kvp = line.split("=");
|
||||
if (kvp.length == 2) {
|
||||
String uni = kvp[0];
|
||||
String ans = kvp[1];
|
||||
Character uniCode = (char) Integer.parseInt(uni.replaceFirst("^[uU]", ""), 16);
|
||||
String[] ansSeq = ans.split(" ");
|
||||
byte[] ansCodes = new byte[ansSeq.length];
|
||||
for (int j = 0; j < ansSeq.length; j++) {
|
||||
ansCodes[j] = (byte) (Integer.parseInt(ansSeq[j].replaceFirst("^0[xX]", ""), 16) & 0xFF);
|
||||
}
|
||||
mapping.put(uniCode, ansCodes);
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
Logger.getLogger(SimpleAnselCharset.class.getName()).log(Level.WARNING, e.getMessage(), e);
|
||||
}
|
||||
return mapping;
|
||||
}
|
||||
|
||||
private static Map<Byte, ReverseMappingEntity> createReverseMapping(Map<Character, byte[]> mapping) {
|
||||
Map<Byte, ReverseMappingEntity> rev = new TreeMap<>();
|
||||
for (Map.Entry<Character, byte[]> e : mapping.entrySet()) {
|
||||
Map<Byte, ReverseMappingEntity> ptr = rev;
|
||||
Character ch = e.getKey();
|
||||
for (int i = 0; i < e.getValue().length; i++) {
|
||||
Byte b = e.getValue()[i];
|
||||
ReverseMappingEntity ent = ptr.get(b);
|
||||
if (ent == null) {
|
||||
ent = new ReverseMappingEntity();
|
||||
ptr.put(b, ent);
|
||||
}
|
||||
if (i + 1 == e.getValue().length) {
|
||||
ent.setCharacter(ch);
|
||||
} else {
|
||||
ptr = ent.getMapping();
|
||||
}
|
||||
}
|
||||
}
|
||||
return rev;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canEncode() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharsetDecoder newDecoder() {
|
||||
return new Decoder(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharsetEncoder newEncoder() {
|
||||
return new Encoder(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(Charset cs) {
|
||||
return displayName().equals(cs.displayName());
|
||||
}
|
||||
|
||||
private static class ReverseMappingEntity {
|
||||
private TreeMap<Byte, ReverseMappingEntity> mapping = new TreeMap<>();
|
||||
private Character character;
|
||||
|
||||
public Character getCharacter() {
|
||||
return character;
|
||||
}
|
||||
|
||||
public void setCharacter(Character ch) {
|
||||
this.character = ch;
|
||||
}
|
||||
|
||||
public Map<Byte, ReverseMappingEntity> getMapping() {
|
||||
return mapping;
|
||||
}
|
||||
}
|
||||
|
||||
private class Decoder extends CharsetDecoder {
|
||||
private LinkedList<Byte> buffer = new LinkedList<>();
|
||||
|
||||
Decoder(Charset charset) {
|
||||
super(charset, 2.2f, 3.0f);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected CoderResult decodeLoop(final ByteBuffer in, CharBuffer out) {
|
||||
ReverseMappingBuffer rmb = new ReverseMappingBuffer(reverseMapping, buffer) {
|
||||
@Override
|
||||
protected Byte onNextByte() {
|
||||
return in.hasRemaining() ? in.get() : null;
|
||||
}
|
||||
};
|
||||
while (in.hasRemaining() || rmb.hasRemaining()) {
|
||||
if (out.hasRemaining()) {
|
||||
Character ch = rmb.nextCharacter();
|
||||
out.append(ch);
|
||||
} else {
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
}
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
}
|
||||
|
||||
private class Encoder extends CharsetEncoder {
|
||||
|
||||
Encoder(Charset charset) {
|
||||
super(charset, 2.2f, 3.0f);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
|
||||
while (in.hasRemaining()) {
|
||||
if (out.hasRemaining()) {
|
||||
char unicode = in.get();
|
||||
byte[] ansel;
|
||||
if (unicode <= 0x7f) {
|
||||
ansel = new byte[2];
|
||||
ansel[0] = (byte) ((unicode >> 8) & 0xff);
|
||||
ansel[1] = (byte) ((unicode) & 0xff);
|
||||
} else {
|
||||
ansel = mapping.get(unicode);
|
||||
if (ansel == null) {
|
||||
return CoderResult.unmappableForLength(2);
|
||||
}
|
||||
}
|
||||
boolean started = false;
|
||||
for (int i = 0; i < ansel.length; i++) {
|
||||
if (started || ansel[i] != 0 || i == ansel.length - 1) {
|
||||
out.put(ansel[i]);
|
||||
started = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
}
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
}
|
||||
|
||||
abstract class ReverseMappingBuffer {
|
||||
private Map<Byte, ReverseMappingEntity> rm;
|
||||
private LinkedList<Byte> buffer;
|
||||
|
||||
ReverseMappingBuffer(Map<Byte, ReverseMappingEntity> rm, LinkedList<Byte> buffer) {
|
||||
this.rm = rm;
|
||||
this.buffer = buffer;
|
||||
}
|
||||
|
||||
boolean hasRemaining() {
|
||||
return !buffer.isEmpty();
|
||||
}
|
||||
|
||||
Character nextCharacter() {
|
||||
LinkedList<Byte> queue = new LinkedList<>();
|
||||
ReverseMappingEntity rme = null;
|
||||
Character ch = null;
|
||||
for (Byte b = nextByte(); b != null; b = nextByte()) {
|
||||
queue.addLast(b);
|
||||
rme = rme != null ? rme.getMapping().get(b) : rm.get(b);
|
||||
if (rme == null) {
|
||||
buffer.addAll(queue);
|
||||
return ch != null ? ch : Character.valueOf((char) nextByte().byteValue());
|
||||
}
|
||||
if (rme.getCharacter() != null) {
|
||||
ch = rme.getCharacter();
|
||||
queue.clear();
|
||||
}
|
||||
}
|
||||
return ch;
|
||||
}
|
||||
|
||||
protected abstract Byte onNextByte();
|
||||
|
||||
private Byte nextByte() {
|
||||
if (!buffer.isEmpty()) {
|
||||
return buffer.pollFirst();
|
||||
} else {
|
||||
return onNextByte();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
98
src/main/java/org/xbib/charset/SingleByteDecoder.java
Normal file
98
src/main/java/org/xbib/charset/SingleByteDecoder.java
Normal file
|
@ -0,0 +1,98 @@
|
|||
/*
|
||||
* Licensed to Jörg Prante and xbib under one or more contributor
|
||||
* license agreements. See the NOTICE.txt file distributed with this work
|
||||
* for additional information regarding copyright ownership.
|
||||
*
|
||||
* Copyright (C) 2016 Jörg Prante and xbib
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program; if not, see http://www.gnu.org/licenses
|
||||
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* The interactive user interfaces in modified source and object code
|
||||
* versions of this program must display Appropriate Legal Notices,
|
||||
* as required under Section 5 of the GNU Affero General Public License.
|
||||
*
|
||||
* In accordance with Section 7(b) of the GNU Affero General Public
|
||||
* License, these Appropriate Legal Notices must retain the display of the
|
||||
* "Powered by xbib" logo. If the display of the logo is not reasonably
|
||||
* feasible for technical reasons, the Appropriate Legal Notices must display
|
||||
* the words "Powered by xbib".
|
||||
*/
|
||||
package org.xbib.charset;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
import java.text.Normalizer;
|
||||
|
||||
abstract class SingleByteDecoder extends CharsetDecoder {
|
||||
|
||||
private boolean composeCharactersAfterConversion = true;
|
||||
|
||||
SingleByteDecoder(Charset cs) {
|
||||
super(cs, 1.0f, 1.0f);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param composeCharactersAfterConversion The composeCharactersAfterConversion to set.
|
||||
*/
|
||||
void setComposeCharactersAfterConversion(boolean composeCharactersAfterConversion) {
|
||||
this.composeCharactersAfterConversion = composeCharactersAfterConversion;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
|
||||
ByteBuffer inputBuffer = ByteBuffer.allocate(30);
|
||||
while (in.hasRemaining()) {
|
||||
byte c = in.get();
|
||||
inputBuffer.put(c);
|
||||
StringBuilder convertedInputBuffer = null;
|
||||
if (!isCombiningCharacter(c)) {
|
||||
convertedInputBuffer = new StringBuilder();
|
||||
for (int i = inputBuffer.position() - 1; i >= 0; i--) {
|
||||
char convertedCharacter = byteToChar(inputBuffer.get(i));
|
||||
String convertedCharacterAsString;
|
||||
if (convertedCharacter == 0) {
|
||||
convertedCharacterAsString = replacement();
|
||||
} else {
|
||||
convertedCharacterAsString = String
|
||||
.valueOf(convertedCharacter);
|
||||
}
|
||||
|
||||
convertedInputBuffer.append(convertedCharacterAsString);
|
||||
}
|
||||
if (composeCharactersAfterConversion) {
|
||||
convertedInputBuffer =
|
||||
new StringBuilder(Normalizer.normalize(convertedInputBuffer.toString(), Normalizer.Form.NFC));
|
||||
}
|
||||
}
|
||||
if (convertedInputBuffer != null) {
|
||||
if (out.remaining() < convertedInputBuffer.length()) {
|
||||
in.position(in.position() - inputBuffer.position());
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
out.append(convertedInputBuffer);
|
||||
inputBuffer.clear();
|
||||
}
|
||||
}
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
|
||||
public abstract boolean isCombiningCharacter(byte c);
|
||||
|
||||
public abstract char byteToChar(byte b);
|
||||
|
||||
}
|
103
src/main/java/org/xbib/charset/SingleByteEncoder.java
Normal file
103
src/main/java/org/xbib/charset/SingleByteEncoder.java
Normal file
|
@ -0,0 +1,103 @@
|
|||
/*
|
||||
* Licensed to Jörg Prante and xbib under one or more contributor
|
||||
* license agreements. See the NOTICE.txt file distributed with this work
|
||||
* for additional information regarding copyright ownership.
|
||||
*
|
||||
* Copyright (C) 2016 Jörg Prante and xbib
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program; if not, see http://www.gnu.org/licenses
|
||||
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* The interactive user interfaces in modified source and object code
|
||||
* versions of this program must display Appropriate Legal Notices,
|
||||
* as required under Section 5 of the GNU Affero General Public License.
|
||||
*
|
||||
* In accordance with Section 7(b) of the GNU Affero General Public
|
||||
* License, these Appropriate Legal Notices must retain the display of the
|
||||
* "Powered by xbib" logo. If the display of the logo is not reasonably
|
||||
* feasible for technical reasons, the Appropriate Legal Notices must display
|
||||
* the words "Powered by xbib".
|
||||
*/
|
||||
package org.xbib.charset;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
import java.text.Normalizer;
|
||||
|
||||
abstract class SingleByteEncoder extends CharsetEncoder {
|
||||
|
||||
private boolean decomposeCharactersBeforeConversion = true;
|
||||
|
||||
SingleByteEncoder(Charset cs) {
|
||||
super(cs, 1.0f, 1.0f);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param decomposeCharactersBeforeConversion The decomposeCharactersBeforeConversion to set.
|
||||
*/
|
||||
public void setDecomposeCharactersBeforeConversion(boolean decomposeCharactersBeforeConversion) {
|
||||
this.decomposeCharactersBeforeConversion = decomposeCharactersBeforeConversion;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
|
||||
CharBuffer inputBuffer = CharBuffer.allocate(30);
|
||||
while (in.hasRemaining()) {
|
||||
char c = in.get();
|
||||
String charAsString;
|
||||
if (decomposeCharactersBeforeConversion) {
|
||||
charAsString = Normalizer.normalize(String.valueOf(c), Normalizer.Form.NFD);
|
||||
} else {
|
||||
charAsString = String.valueOf(c);
|
||||
}
|
||||
if (out.remaining() < inputBuffer.position() + charAsString.length()) {
|
||||
in.position(in.position() - inputBuffer.position() - 1);
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
if (inputBuffer.position() > 0 && !isCombiningCharacter(c)) {
|
||||
for (int i = inputBuffer.position() - 1; i >= 0; i--) {
|
||||
convert(inputBuffer.get(i), out);
|
||||
}
|
||||
inputBuffer.clear();
|
||||
}
|
||||
inputBuffer.append(charAsString);
|
||||
}
|
||||
if (inputBuffer.position() == 1) {
|
||||
convert(inputBuffer.get(), out);
|
||||
} else if (inputBuffer.position() > 1) {
|
||||
for (int i = inputBuffer.position() - 1; i >= 0; i--) {
|
||||
convert(inputBuffer.get(i), out);
|
||||
}
|
||||
}
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
|
||||
public abstract byte charToByte(char c);
|
||||
|
||||
public boolean isCombiningCharacter(char c) {
|
||||
return c >= '\u0300' && c <= '\u036F';
|
||||
}
|
||||
|
||||
private void convert(char c, ByteBuffer out) {
|
||||
byte b = charToByte(c);
|
||||
if (b != 0) {
|
||||
out.put(b);
|
||||
} else {
|
||||
out.put(replacement());
|
||||
}
|
||||
}
|
||||
}
|
4
src/main/java/org/xbib/charset/package-info.java
Normal file
4
src/main/java/org/xbib/charset/package-info.java
Normal file
|
@ -0,0 +1,4 @@
|
|||
/**
|
||||
* Bibliographic character set implementations.
|
||||
*/
|
||||
package org.xbib.charset;
|
|
@ -0,0 +1 @@
|
|||
org.xbib.charset.BibliographicCharsetProvider
|
598
src/main/resources/org/xbib/charset/ansel-mapping.txt
Normal file
598
src/main/resources/org/xbib/charset/ansel-mapping.txt
Normal file
|
@ -0,0 +1,598 @@
|
|||
U001b=0x1b
|
||||
U001d=0x1d
|
||||
U001e=0x1e
|
||||
U001f=0x1f
|
||||
U0020=0x20
|
||||
U0021=0x21
|
||||
U0022=0x22
|
||||
U0023=0x23
|
||||
U0024=0x24
|
||||
U0025=0x25
|
||||
U0026=0x26
|
||||
U0027=0x27
|
||||
U0028=0x28
|
||||
U0029=0x29
|
||||
U002a=0x2a
|
||||
U002b=0x2b
|
||||
U002c=0x2c
|
||||
U002d=0x2d
|
||||
U002e=0x2e
|
||||
U002f=0x2f
|
||||
U0030=0x30
|
||||
U0031=0x31
|
||||
U0032=0x32
|
||||
U0033=0x33
|
||||
U0034=0x34
|
||||
U0035=0x35
|
||||
U0036=0x36
|
||||
U0037=0x37
|
||||
U0038=0x38
|
||||
U0039=0x39
|
||||
U003a=0x3a
|
||||
U003b=0x3b
|
||||
U003c=0x3c
|
||||
U003d=0x3d
|
||||
U003e=0x3e
|
||||
U003f=0x3f
|
||||
U0040=0x40
|
||||
U0041=0x41
|
||||
U0042=0x42
|
||||
U0043=0x43
|
||||
U0044=0x44
|
||||
U0045=0x45
|
||||
U0046=0x46
|
||||
U0047=0x47
|
||||
U0048=0x48
|
||||
U0049=0x49
|
||||
U004a=0x4a
|
||||
U004b=0x4b
|
||||
U004c=0x4c
|
||||
U004d=0x4d
|
||||
U004e=0x4e
|
||||
U004f=0x4f
|
||||
U0050=0x50
|
||||
U0051=0x51
|
||||
U0052=0x52
|
||||
U0053=0x53
|
||||
U0054=0x54
|
||||
U0055=0x55
|
||||
U0056=0x56
|
||||
U0057=0x57
|
||||
U0058=0x58
|
||||
U0059=0x59
|
||||
U005a=0x5a
|
||||
U005b=0x5b
|
||||
U005c=0x5c
|
||||
U005d=0x5d
|
||||
U005e=0x5e
|
||||
U005f=0x5f
|
||||
U0060=0x60
|
||||
U0061=0x61
|
||||
U0062=0x62
|
||||
U0063=0x63
|
||||
U0064=0x64
|
||||
U0065=0x65
|
||||
U0066=0x66
|
||||
U0067=0x67
|
||||
U0068=0x68
|
||||
U0069=0x69
|
||||
U006a=0x6a
|
||||
U006b=0x6b
|
||||
U006c=0x6c
|
||||
U006d=0x6d
|
||||
U006e=0x6e
|
||||
U006f=0x6f
|
||||
U0070=0x70
|
||||
U0071=0x71
|
||||
U0072=0x72
|
||||
U0073=0x73
|
||||
U0074=0x74
|
||||
U0075=0x75
|
||||
U0076=0x76
|
||||
U0077=0x77
|
||||
U0078=0x78
|
||||
U0079=0x79
|
||||
U007a=0x7a
|
||||
U007b=0x7b
|
||||
U007c=0x7c
|
||||
U007d=0x7d
|
||||
U007e=0x7e
|
||||
U0098=0x88
|
||||
U009c=0x89
|
||||
U200d=0x8d
|
||||
U200c=0x8e
|
||||
U0141=0xa1
|
||||
U00d8=0xa2
|
||||
U0110=0xa3
|
||||
U00de=0xa4
|
||||
U00c6=0xa5
|
||||
U0152=0xa6
|
||||
U02b9=0xa7
|
||||
U00b7=0xa8
|
||||
U266d=0xa9
|
||||
U00ae=0xaa
|
||||
U00b1=0xab
|
||||
U01a0=0xac
|
||||
U01af=0xad
|
||||
U02bc=0xae
|
||||
U02bb=0xb0
|
||||
U0142=0xb1
|
||||
U00f8=0xb2
|
||||
U0111=0xb3
|
||||
U00fe=0xb4
|
||||
U00e6=0xb5
|
||||
U0153=0xb6
|
||||
U02ba=0xb7
|
||||
U0131=0xb8
|
||||
U00a3=0xb9
|
||||
U00f0=0xba
|
||||
U01a1=0xbc
|
||||
U01b0=0xbd
|
||||
U00b0=0xc0
|
||||
U2113=0xc1
|
||||
U2117=0xc2
|
||||
U00a9=0xc3
|
||||
U266f=0xc4
|
||||
U00bf=0xc5
|
||||
U00a1=0xc6
|
||||
U00df=0xc7
|
||||
U20ac=0xc8
|
||||
U0309=0xe0
|
||||
U1ea2=0xe0 0x41
|
||||
U1eba=0xe0 0x45
|
||||
U1ec8=0xe0 0x49
|
||||
U1ece=0xe0 0x4f
|
||||
U1ee6=0xe0 0x55
|
||||
U1ef6=0xe0 0x59
|
||||
U1ea3=0xe0 0x61
|
||||
U1ebb=0xe0 0x65
|
||||
U1ec9=0xe0 0x69
|
||||
U1ecf=0xe0 0x6f
|
||||
U1ee7=0xe0 0x75
|
||||
U1ef7=0xe0 0x79
|
||||
U1ede=0xe0 0xac
|
||||
U1eec=0xe0 0xad
|
||||
U1edf=0xe0 0xbc
|
||||
U1eed=0xe0 0xbd
|
||||
U1ea8=0xe0 0xe3 0x41
|
||||
U1ec2=0xe0 0xe3 0x45
|
||||
U1ed4=0xe0 0xe3 0x4f
|
||||
U1ea9=0xe0 0xe3 0x61
|
||||
U1ec3=0xe0 0xe3 0x65
|
||||
U1ed5=0xe0 0xe3 0x6f
|
||||
U1eb2=0xe0 0xe6 0x41
|
||||
U1eb3=0xe0 0xe6 0x61
|
||||
U0300=0xe1
|
||||
U00c0=0xe1 0x41
|
||||
U00c8=0xe1 0x45
|
||||
U00cc=0xe1 0x49
|
||||
U01f8=0xe1 0x4e
|
||||
U00d2=0xe1 0x4f
|
||||
U00d9=0xe1 0x55
|
||||
U1e80=0xe1 0x57
|
||||
U1ef2=0xe1 0x59
|
||||
U00e0=0xe1 0x61
|
||||
U00e8=0xe1 0x65
|
||||
U00ec=0xe1 0x69
|
||||
U01f9=0xe1 0x6e
|
||||
U00f2=0xe1 0x6f
|
||||
U00f9=0xe1 0x75
|
||||
U1e81=0xe1 0x77
|
||||
U1ef3=0xe1 0x79
|
||||
U1edc=0xe1 0xac
|
||||
U1eea=0xe1 0xad
|
||||
U1edd=0xe1 0xbc
|
||||
U1eeb=0xe1 0xbd
|
||||
U1ea6=0xe1 0xe3 0x41
|
||||
U1ec0=0xe1 0xe3 0x45
|
||||
U1ed2=0xe1 0xe3 0x4f
|
||||
U1ea7=0xe1 0xe3 0x61
|
||||
U1ec1=0xe1 0xe3 0x65
|
||||
U1ed3=0xe1 0xe3 0x6f
|
||||
U1e14=0xe1 0xe5 0x45
|
||||
U1e50=0xe1 0xe5 0x4f
|
||||
U1e15=0xe1 0xe5 0x65
|
||||
U1e51=0xe1 0xe5 0x6f
|
||||
U1eb0=0xe1 0xe6 0x41
|
||||
U1eb1=0xe1 0xe6 0x61
|
||||
U01db=0xe1 0xe8 0x55
|
||||
U01dc=0xe1 0xe8 0x75
|
||||
U0301=0xe2
|
||||
U00c1=0xe2 0x41
|
||||
U0106=0xe2 0x43
|
||||
U00c9=0xe2 0x45
|
||||
U01f4=0xe2 0x47
|
||||
U00cd=0xe2 0x49
|
||||
U1e30=0xe2 0x4b
|
||||
U0139=0xe2 0x4c
|
||||
U1e3e=0xe2 0x4d
|
||||
U0143=0xe2 0x4e
|
||||
U00d3=0xe2 0x4f
|
||||
U1e54=0xe2 0x50
|
||||
U0154=0xe2 0x52
|
||||
U015a=0xe2 0x53
|
||||
U00da=0xe2 0x55
|
||||
U1e82=0xe2 0x57
|
||||
U00dd=0xe2 0x59
|
||||
U0179=0xe2 0x5a
|
||||
U00e1=0xe2 0x61
|
||||
U0107=0xe2 0x63
|
||||
U00e9=0xe2 0x65
|
||||
U01f5=0xe2 0x67
|
||||
U00ed=0xe2 0x69
|
||||
U1e31=0xe2 0x6b
|
||||
U013a=0xe2 0x6c
|
||||
U1e3f=0xe2 0x6d
|
||||
U0144=0xe2 0x6e
|
||||
U00f3=0xe2 0x6f
|
||||
U1e55=0xe2 0x70
|
||||
U0155=0xe2 0x72
|
||||
U015b=0xe2 0x73
|
||||
U00fa=0xe2 0x75
|
||||
U1e83=0xe2 0x77
|
||||
U00fd=0xe2 0x79
|
||||
U017a=0xe2 0x7a
|
||||
U01fe=0xe2 0xa2
|
||||
U01fc=0xe2 0xa5
|
||||
U1eda=0xe2 0xac
|
||||
U1ee8=0xe2 0xad
|
||||
U01ff=0xe2 0xb2
|
||||
U01fd=0xe2 0xb5
|
||||
U1edb=0xe2 0xbc
|
||||
U1ee9=0xe2 0xbd
|
||||
U1ea4=0xe2 0xe3 0x41
|
||||
U1ebe=0xe2 0xe3 0x45
|
||||
U1ed0=0xe2 0xe3 0x4f
|
||||
U1ea5=0xe2 0xe3 0x61
|
||||
U1ebf=0xe2 0xe3 0x65
|
||||
U1ed1=0xe2 0xe3 0x6f
|
||||
U1e4c=0xe2 0xe4 0x4f
|
||||
U1e78=0xe2 0xe4 0x55
|
||||
U1e4d=0xe2 0xe4 0x6f
|
||||
U1e79=0xe2 0xe4 0x75
|
||||
U1e16=0xe2 0xe5 0x45
|
||||
U1e52=0xe2 0xe5 0x4f
|
||||
U1e17=0xe2 0xe5 0x65
|
||||
U1e53=0xe2 0xe5 0x6f
|
||||
U1eae=0xe2 0xe6 0x41
|
||||
U1eaf=0xe2 0xe6 0x61
|
||||
U0344=0xe2 0xe8
|
||||
U1e2e=0xe2 0xe8 0x49
|
||||
U01d7=0xe2 0xe8 0x55
|
||||
U1e2f=0xe2 0xe8 0x69
|
||||
U01d8=0xe2 0xe8 0x75
|
||||
U01fa=0xe2 0xea 0x41
|
||||
U01fb=0xe2 0xea 0x61
|
||||
U1e08=0xe2 0xf0 0x43
|
||||
U1e09=0xe2 0xf0 0x63
|
||||
U0302=0xe3
|
||||
U00c2=0xe3 0x41
|
||||
U0108=0xe3 0x43
|
||||
U00ca=0xe3 0x45
|
||||
U011c=0xe3 0x47
|
||||
U0124=0xe3 0x48
|
||||
U00ce=0xe3 0x49
|
||||
U0134=0xe3 0x4a
|
||||
U00d4=0xe3 0x4f
|
||||
U015c=0xe3 0x53
|
||||
U00db=0xe3 0x55
|
||||
U0174=0xe3 0x57
|
||||
U0176=0xe3 0x59
|
||||
U1e90=0xe3 0x5a
|
||||
U00e2=0xe3 0x61
|
||||
U0109=0xe3 0x63
|
||||
U00ea=0xe3 0x65
|
||||
U011d=0xe3 0x67
|
||||
U0125=0xe3 0x68
|
||||
U00ee=0xe3 0x69
|
||||
U0135=0xe3 0x6a
|
||||
U00f4=0xe3 0x6f
|
||||
U015d=0xe3 0x73
|
||||
U00fb=0xe3 0x75
|
||||
U0175=0xe3 0x77
|
||||
U0177=0xe3 0x79
|
||||
U1e91=0xe3 0x7a
|
||||
U1eac=0xe3 0xf2 0x41
|
||||
U1ec6=0xe3 0xf2 0x45
|
||||
U1ed8=0xe3 0xf2 0x4f
|
||||
U1ead=0xe3 0xf2 0x61
|
||||
U1ec7=0xe3 0xf2 0x65
|
||||
U1ed9=0xe3 0xf2 0x6f
|
||||
U0303=0xe4
|
||||
U00c3=0xe4 0x41
|
||||
U1ebc=0xe4 0x45
|
||||
U0128=0xe4 0x49
|
||||
U00d1=0xe4 0x4e
|
||||
U00d5=0xe4 0x4f
|
||||
U0168=0xe4 0x55
|
||||
U1e7c=0xe4 0x56
|
||||
U1ef8=0xe4 0x59
|
||||
U00e3=0xe4 0x61
|
||||
U1ebd=0xe4 0x65
|
||||
U0129=0xe4 0x69
|
||||
U00f1=0xe4 0x6e
|
||||
U00f5=0xe4 0x6f
|
||||
U0169=0xe4 0x75
|
||||
U1e7d=0xe4 0x76
|
||||
U1ef9=0xe4 0x79
|
||||
U1ee0=0xe4 0xac
|
||||
U1eee=0xe4 0xad
|
||||
U1ee1=0xe4 0xbc
|
||||
U1eef=0xe4 0xbd
|
||||
U1eaa=0xe4 0xe3 0x41
|
||||
U1ec4=0xe4 0xe3 0x45
|
||||
U1ed6=0xe4 0xe3 0x4f
|
||||
U1eab=0xe4 0xe3 0x61
|
||||
U1ec5=0xe4 0xe3 0x65
|
||||
U1ed7=0xe4 0xe3 0x6f
|
||||
U1eb4=0xe4 0xe6 0x41
|
||||
U1eb5=0xe4 0xe6 0x61
|
||||
U0304=0xe5
|
||||
U0100=0xe5 0x41
|
||||
U0112=0xe5 0x45
|
||||
U1e20=0xe5 0x47
|
||||
U012a=0xe5 0x49
|
||||
U014c=0xe5 0x4f
|
||||
U016a=0xe5 0x55
|
||||
U0232=0xe5 0x59
|
||||
U0101=0xe5 0x61
|
||||
U0113=0xe5 0x65
|
||||
U1e21=0xe5 0x67
|
||||
U012b=0xe5 0x69
|
||||
U014d=0xe5 0x6f
|
||||
U016b=0xe5 0x75
|
||||
U0233=0xe5 0x79
|
||||
U01e2=0xe5 0xa5
|
||||
U01e3=0xe5 0xb5
|
||||
U022c=0xe5 0xe4 0x4f
|
||||
U022d=0xe5 0xe4 0x6f
|
||||
U01e0=0xe5 0xe7 0x41
|
||||
U0230=0xe5 0xe7 0x4f
|
||||
U01e1=0xe5 0xe7 0x61
|
||||
U0231=0xe5 0xe7 0x6f
|
||||
U01de=0xe5 0xe8 0x41
|
||||
U022a=0xe5 0xe8 0x4f
|
||||
U01d5=0xe5 0xe8 0x55
|
||||
U01df=0xe5 0xe8 0x61
|
||||
U022b=0xe5 0xe8 0x6f
|
||||
U01d6=0xe5 0xe8 0x75
|
||||
U01ec=0xe5 0xf1 0x4f
|
||||
U01ed=0xe5 0xf1 0x6f
|
||||
U1e38=0xe5 0xf2 0x4c
|
||||
U1e5c=0xe5 0xf2 0x52
|
||||
U1e39=0xe5 0xf2 0x6c
|
||||
U1e5d=0xe5 0xf2 0x72
|
||||
U0306=0xe6
|
||||
U0102=0xe6 0x41
|
||||
U0114=0xe6 0x45
|
||||
U011e=0xe6 0x47
|
||||
U012c=0xe6 0x49
|
||||
U014e=0xe6 0x4f
|
||||
U016c=0xe6 0x55
|
||||
U0103=0xe6 0x61
|
||||
U0115=0xe6 0x65
|
||||
U011f=0xe6 0x67
|
||||
U012d=0xe6 0x69
|
||||
U014f=0xe6 0x6f
|
||||
U016d=0xe6 0x75
|
||||
U1e1c=0xe6 0xf0 0x45
|
||||
U1e1d=0xe6 0xf0 0x65
|
||||
U1eb6=0xe6 0xf2 0x41
|
||||
U1eb7=0xe6 0xf2 0x61
|
||||
U0307=0xe7
|
||||
U0226=0xe7 0x41
|
||||
U1e02=0xe7 0x42
|
||||
U010a=0xe7 0x43
|
||||
U1e0a=0xe7 0x44
|
||||
U0116=0xe7 0x45
|
||||
U1e1e=0xe7 0x46
|
||||
U0120=0xe7 0x47
|
||||
U1e22=0xe7 0x48
|
||||
U0130=0xe7 0x49
|
||||
U1e40=0xe7 0x4d
|
||||
U1e44=0xe7 0x4e
|
||||
U022e=0xe7 0x4f
|
||||
U1e56=0xe7 0x50
|
||||
U1e58=0xe7 0x52
|
||||
U1e60=0xe7 0x53
|
||||
U1e6a=0xe7 0x54
|
||||
U1e86=0xe7 0x57
|
||||
U1e8a=0xe7 0x58
|
||||
U1e8e=0xe7 0x59
|
||||
U017b=0xe7 0x5a
|
||||
U0227=0xe7 0x61
|
||||
U1e03=0xe7 0x62
|
||||
U010b=0xe7 0x63
|
||||
U1e0b=0xe7 0x64
|
||||
U0117=0xe7 0x65
|
||||
U1e1f=0xe7 0x66
|
||||
U0121=0xe7 0x67
|
||||
U1e23=0xe7 0x68
|
||||
U1e41=0xe7 0x6d
|
||||
U1e45=0xe7 0x6e
|
||||
U022f=0xe7 0x6f
|
||||
U1e57=0xe7 0x70
|
||||
U1e59=0xe7 0x72
|
||||
U1e61=0xe7 0x73
|
||||
U1e6b=0xe7 0x74
|
||||
U1e87=0xe7 0x77
|
||||
U1e8b=0xe7 0x78
|
||||
U1e8f=0xe7 0x79
|
||||
U017c=0xe7 0x7a
|
||||
U1e64=0xe7 0xe2 0x53
|
||||
U1e65=0xe7 0xe2 0x73
|
||||
U1e66=0xe7 0xe9 0x53
|
||||
U1e67=0xe7 0xe9 0x73
|
||||
U1e68=0xe7 0xf2 0x53
|
||||
U1e69=0xe7 0xf2 0x73
|
||||
U0308=0xe8
|
||||
U00c4=0xe8 0x41
|
||||
U00cb=0xe8 0x45
|
||||
U1e26=0xe8 0x48
|
||||
U00cf=0xe8 0x49
|
||||
U00d6=0xe8 0x4f
|
||||
U00dc=0xe8 0x55
|
||||
U1e84=0xe8 0x57
|
||||
U1e8c=0xe8 0x58
|
||||
U0178=0xe8 0x59
|
||||
U00e4=0xe8 0x61
|
||||
U00eb=0xe8 0x65
|
||||
U1e27=0xe8 0x68
|
||||
U00ef=0xe8 0x69
|
||||
U00f6=0xe8 0x6f
|
||||
U1e97=0xe8 0x74
|
||||
U00fc=0xe8 0x75
|
||||
U1e85=0xe8 0x77
|
||||
U1e8d=0xe8 0x78
|
||||
U00ff=0xe8 0x79
|
||||
U1e4e=0xe8 0xe4 0x4f
|
||||
U1e4f=0xe8 0xe4 0x6f
|
||||
U1e7a=0xe8 0xe5 0x55
|
||||
U1e7b=0xe8 0xe5 0x75
|
||||
U030c=0xe9
|
||||
U01cd=0xe9 0x41
|
||||
U010c=0xe9 0x43
|
||||
U010e=0xe9 0x44
|
||||
U011a=0xe9 0x45
|
||||
U01e6=0xe9 0x47
|
||||
U021e=0xe9 0x48
|
||||
U01cf=0xe9 0x49
|
||||
U01e8=0xe9 0x4b
|
||||
U013d=0xe9 0x4c
|
||||
U0147=0xe9 0x4e
|
||||
U01d1=0xe9 0x4f
|
||||
U0158=0xe9 0x52
|
||||
U0160=0xe9 0x53
|
||||
U0164=0xe9 0x54
|
||||
U01d3=0xe9 0x55
|
||||
U017d=0xe9 0x5a
|
||||
U01ce=0xe9 0x61
|
||||
U010d=0xe9 0x63
|
||||
U010f=0xe9 0x64
|
||||
U011b=0xe9 0x65
|
||||
U01e7=0xe9 0x67
|
||||
U021f=0xe9 0x68
|
||||
U01d0=0xe9 0x69
|
||||
U01f0=0xe9 0x6a
|
||||
U01e9=0xe9 0x6b
|
||||
U013e=0xe9 0x6c
|
||||
U0148=0xe9 0x6e
|
||||
U01d2=0xe9 0x6f
|
||||
U0159=0xe9 0x72
|
||||
U0161=0xe9 0x73
|
||||
U0165=0xe9 0x74
|
||||
U01d4=0xe9 0x75
|
||||
U017e=0xe9 0x7a
|
||||
U01d9=0xe9 0xe8 0x55
|
||||
U01da=0xe9 0xe8 0x75
|
||||
U030a=0xea
|
||||
U00c5=0xea 0x41
|
||||
U016e=0xea 0x55
|
||||
U00e5=0xea 0x61
|
||||
U016f=0xea 0x75
|
||||
U1e98=0xea 0x77
|
||||
U1e99=0xea 0x79
|
||||
U0361=0xeb
|
||||
U0315=0xed
|
||||
U030b=0xee
|
||||
U0150=0xee 0x4f
|
||||
U0170=0xee 0x55
|
||||
U0151=0xee 0x6f
|
||||
U0171=0xee 0x75
|
||||
U0310=0xef
|
||||
U0327=0xf0
|
||||
U00c7=0xf0 0x43
|
||||
U1e10=0xf0 0x44
|
||||
U0228=0xf0 0x45
|
||||
U0122=0xf0 0x47
|
||||
U1e28=0xf0 0x48
|
||||
U0136=0xf0 0x4b
|
||||
U013b=0xf0 0x4c
|
||||
U0145=0xf0 0x4e
|
||||
U0156=0xf0 0x52
|
||||
U015e=0xf0 0x53
|
||||
U0162=0xf0 0x54
|
||||
U00e7=0xf0 0x63
|
||||
U1e11=0xf0 0x64
|
||||
U0229=0xf0 0x65
|
||||
U0123=0xf0 0x67
|
||||
U1e29=0xf0 0x68
|
||||
U0137=0xf0 0x6b
|
||||
U013c=0xf0 0x6c
|
||||
U0146=0xf0 0x6e
|
||||
U0157=0xf0 0x72
|
||||
U015f=0xf0 0x73
|
||||
U0163=0xf0 0x74
|
||||
U0328=0xf1
|
||||
U0104=0xf1 0x41
|
||||
U0118=0xf1 0x45
|
||||
U012e=0xf1 0x49
|
||||
U01ea=0xf1 0x4f
|
||||
U0172=0xf1 0x55
|
||||
U0105=0xf1 0x61
|
||||
U0119=0xf1 0x65
|
||||
U012f=0xf1 0x69
|
||||
U01eb=0xf1 0x6f
|
||||
U0173=0xf1 0x75
|
||||
U0323=0xf2
|
||||
U1ea0=0xf2 0x41
|
||||
U1e04=0xf2 0x42
|
||||
U1e0c=0xf2 0x44
|
||||
U1eb8=0xf2 0x45
|
||||
U1e24=0xf2 0x48
|
||||
U1eca=0xf2 0x49
|
||||
U1e32=0xf2 0x4b
|
||||
U1e36=0xf2 0x4c
|
||||
U1e42=0xf2 0x4d
|
||||
U1e46=0xf2 0x4e
|
||||
U1ecc=0xf2 0x4f
|
||||
U1e5a=0xf2 0x52
|
||||
U1e62=0xf2 0x53
|
||||
U1e6c=0xf2 0x54
|
||||
U1ee4=0xf2 0x55
|
||||
U1e7e=0xf2 0x56
|
||||
U1e88=0xf2 0x57
|
||||
U1ef4=0xf2 0x59
|
||||
U1e92=0xf2 0x5a
|
||||
U1ea1=0xf2 0x61
|
||||
U1e05=0xf2 0x62
|
||||
U1e0d=0xf2 0x64
|
||||
U1eb9=0xf2 0x65
|
||||
U1e25=0xf2 0x68
|
||||
U1ecb=0xf2 0x69
|
||||
U1e33=0xf2 0x6b
|
||||
U1e37=0xf2 0x6c
|
||||
U1e43=0xf2 0x6d
|
||||
U1e47=0xf2 0x6e
|
||||
U1ecd=0xf2 0x6f
|
||||
U1e5b=0xf2 0x72
|
||||
U1e63=0xf2 0x73
|
||||
U1e6d=0xf2 0x74
|
||||
U1ee5=0xf2 0x75
|
||||
U1e7f=0xf2 0x76
|
||||
U1e89=0xf2 0x77
|
||||
U1ef5=0xf2 0x79
|
||||
U1e93=0xf2 0x7a
|
||||
U1ee2=0xf2 0xac
|
||||
U1ef0=0xf2 0xad
|
||||
U1ee3=0xf2 0xbc
|
||||
U1ef1=0xf2 0xbd
|
||||
U0324=0xf3
|
||||
U1e72=0xf3 0x55
|
||||
U1e73=0xf3 0x75
|
||||
U0325=0xf4
|
||||
U1e00=0xf4 0x41
|
||||
U1e01=0xf4 0x61
|
||||
U0333=0xf5
|
||||
U0332=0xf6
|
||||
U0326=0xf7
|
||||
U0218=0xf7 0x53
|
||||
U021a=0xf7 0x54
|
||||
U0219=0xf7 0x73
|
||||
U021b=0xf7 0x74
|
||||
U031c=0xf8
|
||||
U032e=0xf9
|
||||
U1e2a=0xf9 0x48
|
||||
U1e2b=0xf9 0x68
|
||||
U0360=0xfa
|
||||
U0313=0xfe
|
98943
src/main/resources/org/xbib/charset/codetables.xml
Normal file
98943
src/main/resources/org/xbib/charset/codetables.xml
Normal file
File diff suppressed because it is too large
Load diff
212
src/main/resources/org/xbib/charset/pica.txt
Normal file
212
src/main/resources/org/xbib/charset/pica.txt
Normal file
|
@ -0,0 +1,212 @@
|
|||
/*
|
||||
|
||||
De PICA characterset is een enigszins gemodificeerde versie van de INTERMARC characterset.
|
||||
|
||||
Deze characterset omvat in totaal 256 tekens.
|
||||
|
||||
Kolommen:
|
||||
(1) = Octaal
|
||||
(2) = Teken
|
||||
(3) = Omschrijving
|
||||
|
||||
(1) (2) (3)
|
||||
|
||||
-----------------------------------------------------------------
|
||||
|
||||
000-177 Standaards ASCII (eerste groep van 128 tekens)
|
||||
|
||||
200-237 niet gebruikt
|
||||
|
||||
240 diacritische spatie
|
||||
|
||||
241 Poolse L
|
||||
|
||||
242 Deense O
|
||||
|
||||
243 Ð Servische D
|
||||
|
||||
244 Þ Thorn (groot)
|
||||
|
||||
245 Æ Ligatuur AE
|
||||
|
||||
246 ¼ Ligatuur OE
|
||||
|
||||
247 ¢ Cyrillische zachtteken (translitt.)
|
||||
|
||||
250 × Griekse half-hoge punt
|
||||
|
||||
251 Mol
|
||||
|
||||
252 ® Registratie-teken
|
||||
|
||||
253 ± Plusminus
|
||||
|
||||
254 O Vietnamese O-haak
|
||||
|
||||
255 U Vietnamese U-haak
|
||||
|
||||
256 ? Alif
|
||||
|
||||
257 Å Angstrom A
|
||||
|
||||
260 ` Ayn
|
||||
|
||||
261 Poolse l
|
||||
|
||||
262 Deense o
|
||||
|
||||
263 Servische d
|
||||
|
||||
264 þ Thorn (klein)
|
||||
|
||||
265 æ Ligatuur ae
|
||||
|
||||
266 ½ Ligatuur oe
|
||||
|
||||
267 Cyrillische hardteken (translitt.)
|
||||
|
||||
270 Turkse i (zonder punt)
|
||||
|
||||
271 £ Brits pond-teken
|
||||
|
||||
272 Eth
|
||||
|
||||
273 a Alfa
|
||||
|
||||
274 Vietnamese o-haak
|
||||
|
||||
275 Vietnamese u-haak
|
||||
|
||||
276 ß Duitse dubbele S
|
||||
|
||||
277 å Angstrom a
|
||||
|
||||
300 Nederlandse IJ
|
||||
|
||||
301 Ä Umlaut A
|
||||
|
||||
302 Ö Umlaut O
|
||||
|
||||
303 Ü Umlaut U
|
||||
|
||||
304 Omgekeerde C
|
||||
|
||||
305 Omgekeerde E
|
||||
|
||||
306 ¹ Ongelijk-teken
|
||||
|
||||
307 ® Fleche
|
||||
|
||||
310 £ Kleiner dan/is-gelijk-teken
|
||||
|
||||
311 ¥ Oneindig-teken
|
||||
|
||||
312 ò Integraal-teken
|
||||
|
||||
313 Vermenigvuldiging-teken
|
||||
|
||||
314 § Paragraaf
|
||||
|
||||
315 Ö Vierkantswortel-teken
|
||||
|
||||
316 Reaction
|
||||
|
||||
317 ³ Groter dan/is-gelijk-teken
|
||||
|
||||
320 Nederlandse ij
|
||||
|
||||
321 ä Umlaut a
|
||||
|
||||
322 ö Umlaut o
|
||||
|
||||
323 ü Umlaut u
|
||||
|
||||
324 Omgekeerde c
|
||||
|
||||
325 Omgekeerde e
|
||||
|
||||
326 ¿ Spaans omgekeerd vraagteken
|
||||
|
||||
327 ¡ Spaans omgekeerd uitroepteken
|
||||
|
||||
330 b Beta
|
||||
|
||||
331
|
||||
|
||||
332 g Gamma
|
||||
|
||||
333 p Pi
|
||||
|
||||
334
|
||||
|
||||
335
|
||||
|
||||
336
|
||||
|
||||
337
|
||||
|
||||
340 ` Vietnamese rijzende toon
|
||||
|
||||
341 ` Accent grave (zie ook octaal 140)
|
||||
|
||||
342 ? Accent aigu
|
||||
|
||||
343 ? Accent circonflexe (zie ook 140)
|
||||
|
||||
344 ~ Tilde
|
||||
|
||||
345 ¯ Bovenstreepje (lang)
|
||||
|
||||
346 Bovenstreepje (kort)
|
||||
|
||||
347 × Punt boven
|
||||
|
||||
350 ? Trema (geen umlaut)
|
||||
|
||||
351 Hacek
|
||||
|
||||
352 ? Angstrom
|
||||
|
||||
353 Ligatuur links
|
||||
|
||||
354 Ligatuur rechts
|
||||
|
||||
355 ' Komma als accent (bovenaan)
|
||||
|
||||
356 ² Dubbele aigu
|
||||
|
||||
357 Candrabindu
|
||||
|
||||
360 ? Cedille
|
||||
|
||||
361 Hoek boven links
|
||||
|
||||
362 ¢ Punt als accent (onderaan)
|
||||
|
||||
363 ² Twee punten als accent (onderaan)
|
||||
|
||||
364 Cirkeltje onderaan
|
||||
|
||||
365 Dubbele onderstreping als accent
|
||||
|
||||
366 _ Onderstreping als accent
|
||||
|
||||
367 Hoek boven rechts
|
||||
|
||||
370 Omgekeerde cedille
|
||||
|
||||
371 Upadhmaniya (geen accent)
|
||||
|
||||
372 Halve tilde rechts
|
||||
|
||||
373 Halve tilde links
|
||||
|
||||
374
|
||||
|
||||
375
|
||||
|
||||
376 ? Komma rechts (op middelhoogte)
|
||||
|
||||
377
|
||||
|
||||
*/
|
547
src/main/resources/org/xbib/charset/z3947.txt
Normal file
547
src/main/resources/org/xbib/charset/z3947.txt
Normal file
|
@ -0,0 +1,547 @@
|
|||
#step 1
|
||||
#created: 2001-03-19
|
||||
|
||||
A1=0141#latin capital letter L with stroke
|
||||
A2=00D8#latin capital letter O with stroke
|
||||
A3=0110#latin capital letter D with stroke
|
||||
A4=00DE#latin capital letter thorn
|
||||
A5=00C6#latin capital letter AE
|
||||
A6=0152#latin capital ligature OE
|
||||
A7=02B9#modified letter prime
|
||||
A8=00B7#middle dot
|
||||
A9=266D#music flat sign
|
||||
AA=00AE#registered sign
|
||||
AB=00B1#plus-minus sign
|
||||
AC=01A0#latin capital letter O with horn
|
||||
AD=01AF#latin capital letter U with horn
|
||||
AE=02BC#modifier letter apostrophe
|
||||
B0=02BB#modifier letter turned comma
|
||||
B1=0142#latin small letter L with stroke
|
||||
B2=00F8#latin small letter O with stroke
|
||||
B3=0111#latin small letter D with stroke
|
||||
B4=00FE#latin small letter thorn
|
||||
B5=00E6#latin small letter AE
|
||||
B6=0153#latin small ligature OE
|
||||
B7=02BA#modified letter double prime
|
||||
B8=0131#latin small letter dotless i
|
||||
B9=00A3#pound sign
|
||||
BA=00F0#latin small letter eth
|
||||
BC=01A1#latin small letter O with horn
|
||||
BD=01B0#latin small letter U with horn
|
||||
C0=00B0#degree sign
|
||||
C1=2113#script small L
|
||||
C2=2117#sound recording copyright
|
||||
C3=00A9#copyright sign
|
||||
C4=266F#music sharp sign
|
||||
C5=00BF#inverted question mark
|
||||
C6=00A1#inverted exclamation mark
|
||||
CF=00DF#latin small letter sharp S
|
||||
E0=0309#combining hook above
|
||||
E1=0300#combining grave accent
|
||||
E2=0301#combining acute accent
|
||||
E3=0302#combining circumflex accent
|
||||
E4=0303#combining tilde
|
||||
E5=0304#combining macron
|
||||
E6=0306#combining breve
|
||||
E7=0307#combining dot above
|
||||
E8=0308#combining diaeresis
|
||||
E9=030C#combining caron
|
||||
EA=030A#combining ring above
|
||||
EB=FE20#combining ligature left half
|
||||
EC=FE21#combining ligature right half
|
||||
ED=0315#combining comma above right
|
||||
EE=030B#combining double acute accent
|
||||
EF=0310#combining candrabindu
|
||||
F0=0327#combining cedilla
|
||||
F1=0328#combining ogonek
|
||||
F2=0323#combining dot below
|
||||
F3=0324#combining diaeresis below
|
||||
F4=0325#combining ring below
|
||||
F5=0333#combining double low line
|
||||
F6=0332#combining low line
|
||||
F7=0326#combining comma below
|
||||
F8=0321#combining ogonek
|
||||
F9=032E#combining breve below
|
||||
FA=FE22#combining double tilde left half
|
||||
FB=FE23#combining double tilde right half
|
||||
FE=0313#combining comma above
|
||||
|
||||
#step 2
|
||||
#created: 20 january 1998
|
||||
|
||||
0041+0300=00C0# LATIN CAPITAL LETTER A WITH GRAVE = LATIN CAPITAL LETTER A + COMBINING GRAVE ACCENT
|
||||
0041+0301=00C1# LATIN CAPITAL LETTER A WITH ACUTE = LATIN CAPITAL LETTER A + COMBINING ACUTE ACCENT
|
||||
0041+0302=00C2# LATIN CAPITAL LETTER A WITH CIRCUMFLEX = LATIN CAPITAL LETTER A + COMBINING CIRCUMFLEX ACCENT
|
||||
0041+0303=00C3# LATIN CAPITAL LETTER A WITH TILDE = LATIN CAPITAL LETTER A + COMBINING TILDE
|
||||
0041+0308=00C4# LATIN CAPITAL LETTER A WITH DIAERESIS = LATIN CAPITAL LETTER A + COMBINING DIAERESIS
|
||||
0041+030A=00C5# LATIN CAPITAL LETTER A WITH RING ABOVE = LATIN CAPITAL LETTER A + COMBINING RING ABOVE
|
||||
0043+0327=00C7# LATIN CAPITAL LETTER C WITH CEDILLA = LATIN CAPITAL LETTER C + COMBINING CEDILLA
|
||||
0045+0300=00C8# LATIN CAPITAL LETTER E WITH GRAVE = LATIN CAPITAL LETTER E + COMBINING GRAVE ACCENT
|
||||
0045+0301=00C9# LATIN CAPITAL LETTER E WITH ACUTE = LATIN CAPITAL LETTER E + COMBINING ACUTE ACCENT
|
||||
0045+0302=00CA# LATIN CAPITAL LETTER E WITH CIRCUMFLEX = LATIN CAPITAL LETTER E + COMBINING CIRCUMFLEX ACCENT
|
||||
0045+0308=00CB# LATIN CAPITAL LETTER E WITH DIAERESIS = LATIN CAPITAL LETTER E + COMBINING DIAERESIS
|
||||
0049+0300=00CC# LATIN CAPITAL LETTER I WITH GRAVE = LATIN CAPITAL LETTER I + COMBINING GRAVE ACCENT
|
||||
0049+0301=00CD# LATIN CAPITAL LETTER I WITH ACUTE = LATIN CAPITAL LETTER I + COMBINING ACUTE ACCENT
|
||||
0049+0302=00CE# LATIN CAPITAL LETTER I WITH CIRCUMFLEX = LATIN CAPITAL LETTER I + COMBINING CIRCUMFLEX ACCENT
|
||||
0049+0308=00CF# LATIN CAPITAL LETTER I WITH DIAERESIS = LATIN CAPITAL LETTER I + COMBINING DIAERESIS
|
||||
004E+0303=00D1# LATIN CAPITAL LETTER N WITH TILDE = LATIN CAPITAL LETTER N + COMBINING TILDE
|
||||
004F+0300=00D2# LATIN CAPITAL LETTER O WITH GRAVE = LATIN CAPITAL LETTER O + COMBINING GRAVE ACCENT
|
||||
004F+0301=00D3# LATIN CAPITAL LETTER O WITH ACUTE = LATIN CAPITAL LETTER O + COMBINING ACUTE ACCENT
|
||||
004F+0302=00D4# LATIN CAPITAL LETTER O WITH CIRCUMFLEX = LATIN CAPITAL LETTER O + COMBINING CIRCUMFLEX ACCENT
|
||||
004F+0303=00D5# LATIN CAPITAL LETTER O WITH TILDE = LATIN CAPITAL LETTER O + COMBINING TILDE
|
||||
004F+0308=00D6# LATIN CAPITAL LETTER O WITH DIAERESIS = LATIN CAPITAL LETTER O + COMBINING DIAERESIS
|
||||
0055+0300=00D9# LATIN CAPITAL LETTER U WITH GRAVE = LATIN CAPITAL LETTER U + COMBINING GRAVE ACCENT
|
||||
0055+0301=00DA# LATIN CAPITAL LETTER U WITH ACUTE = LATIN CAPITAL LETTER U + COMBINING ACUTE ACCENT
|
||||
0055+0302=00DB# LATIN CAPITAL LETTER U WITH CIRCUMFLEX = LATIN CAPITAL LETTER U + COMBINING CIRCUMFLEX ACCENT
|
||||
0055+0308=00DC# LATIN CAPITAL LETTER U WITH DIAERESIS = LATIN CAPITAL LETTER U + COMBINING DIAERESIS
|
||||
0059+0301=00DD# LATIN CAPITAL LETTER Y WITH ACUTE = LATIN CAPITAL LETTER Y + COMBINING ACUTE ACCENT
|
||||
0061+0300=00E0# LATIN SMALL LETTER A WITH GRAVE = LATIN SMALL LETTER A + COMBINING GRAVE ACCENT
|
||||
0061+0301=00E1# LATIN SMALL LETTER A WITH ACUTE = LATIN SMALL LETTER A + COMBINING ACUTE ACCENT
|
||||
0061+0302=00E2# LATIN SMALL LETTER A WITH CIRCUMFLEX = LATIN SMALL LETTER A + COMBINING CIRCUMFLEX ACCENT
|
||||
0061+0303=00E3# LATIN SMALL LETTER A WITH TILDE = LATIN SMALL LETTER A + COMBINING TILDE
|
||||
0061+0308=00E4# LATIN SMALL LETTER A WITH DIAERESIS = LATIN SMALL LETTER A + COMBINING DIAERESIS
|
||||
0061+030A=00E5# LATIN SMALL LETTER A WITH RING ABOVE = LATIN SMALL LETTER A + COMBINING RING ABOVE
|
||||
0063+0327=00E7# LATIN SMALL LETTER C WITH CEDILLA = LATIN SMALL LETTER C + COMBINING CEDILLA
|
||||
0065+0300=00E8# LATIN SMALL LETTER E WITH GRAVE = LATIN SMALL LETTER E + COMBINING GRAVE ACCENT
|
||||
0065+0301=00E9# LATIN SMALL LETTER E WITH ACUTE = LATIN SMALL LETTER E + COMBINING ACUTE ACCENT
|
||||
0065+0302=00EA# LATIN SMALL LETTER E WITH CIRCUMFLEX = LATIN SMALL LETTER E + COMBINING CIRCUMFLEX ACCENT
|
||||
0065+0308=00EB# LATIN SMALL LETTER E WITH DIAERESIS = LATIN SMALL LETTER E + COMBINING DIAERESIS
|
||||
0069+0300=00EC# LATIN SMALL LETTER I WITH GRAVE = LATIN SMALL LETTER I + COMBINING GRAVE ACCENT
|
||||
0069+0301=00ED# LATIN SMALL LETTER I WITH ACUTE = LATIN SMALL LETTER I + COMBINING ACUTE ACCENT
|
||||
0069+0302=00EE# LATIN SMALL LETTER I WITH CIRCUMFLEX = LATIN SMALL LETTER I + COMBINING CIRCUMFLEX ACCENT
|
||||
0069+0308=00EF# LATIN SMALL LETTER I WITH DIAERESIS = LATIN SMALL LETTER I + COMBINING DIAERESIS
|
||||
006E+0303=00F1# LATIN SMALL LETTER N WITH TILDE = LATIN SMALL LETTER N + COMBINING TILDE
|
||||
006F+0300=00F2# LATIN SMALL LETTER O WITH GRAVE = LATIN SMALL LETTER O + COMBINING GRAVE ACCENT
|
||||
006F+0301=00F3# LATIN SMALL LETTER O WITH ACUTE = LATIN SMALL LETTER O + COMBINING ACUTE ACCENT
|
||||
006F+0302=00F4# LATIN SMALL LETTER O WITH CIRCUMFLEX = LATIN SMALL LETTER O + COMBINING CIRCUMFLEX ACCENT
|
||||
006F+0303=00F5# LATIN SMALL LETTER O WITH TILDE = LATIN SMALL LETTER O + COMBINING TILDE
|
||||
006F+0308=00F6# LATIN SMALL LETTER O WITH DIAERESIS = LATIN SMALL LETTER O + COMBINING DIAERESIS
|
||||
0075+0300=00F9# LATIN SMALL LETTER U WITH GRAVE = LATIN SMALL LETTER U + COMBINING GRAVE ACCENT
|
||||
0075+0301=00FA# LATIN SMALL LETTER U WITH ACUTE = LATIN SMALL LETTER U + COMBINING ACUTE ACCENT
|
||||
0075+0302=00FB# LATIN SMALL LETTER U WITH CIRCUMFLEX = LATIN SMALL LETTER U + COMBINING CIRCUMFLEX ACCENT
|
||||
0075+0308=00FC# LATIN SMALL LETTER U WITH DIAERESIS = LATIN SMALL LETTER U + COMBINING DIAERESIS
|
||||
0079+0301=00FD# LATIN SMALL LETTER Y WITH ACUTE = LATIN SMALL LETTER Y + COMBINING ACUTE ACCENT
|
||||
0079+0308=00FF# LATIN SMALL LETTER Y WITH DIAERESIS = LATIN SMALL LETTER Y + COMBINING DIAERESIS
|
||||
0041+0304=0100# LATIN CAPITAL LETTER A WITH MACRON = LATIN CAPITAL LETTER A + COMBINING MACRON
|
||||
0061+0304=0101# LATIN SMALL LETTER A WITH MACRON = LATIN SMALL LETTER A + COMBINING MACRON
|
||||
0041+0306=0102# LATIN CAPITAL LETTER A WITH BREVE = LATIN CAPITAL LETTER A + COMBINING BREVE
|
||||
0061+0306=0103# LATIN SMALL LETTER A WITH BREVE = LATIN SMALL LETTER A + COMBINING BREVE
|
||||
0041+0328=0104# LATIN CAPITAL LETTER A WITH OGONEK = LATIN CAPITAL LETTER A + COMBINING OGONEK
|
||||
0061+0328=0105# LATIN SMALL LETTER A WITH OGONEK = LATIN SMALL LETTER A + COMBINING OGONEK
|
||||
0043+0301=0106# LATIN CAPITAL LETTER C WITH ACUTE = LATIN CAPITAL LETTER C + COMBINING ACUTE ACCENT
|
||||
0063+0301=0107# LATIN SMALL LETTER C WITH ACUTE = LATIN SMALL LETTER C + COMBINING ACUTE ACCENT
|
||||
0043+0302=0108# LATIN CAPITAL LETTER C WITH CIRCUMFLEX = LATIN CAPITAL LETTER C + COMBINING CIRCUMFLEX ACCENT
|
||||
0063+0302=0109# LATIN SMALL LETTER C WITH CIRCUMFLEX = LATIN SMALL LETTER C + COMBINING CIRCUMFLEX ACCENT
|
||||
0043+0307=010A# LATIN CAPITAL LETTER C WITH DOT ABOVE = LATIN CAPITAL LETTER C + COMBINING DOT ABOVE
|
||||
0063+0307=010B# LATIN SMALL LETTER C WITH DOT ABOVE = LATIN SMALL LETTER C + COMBINING DOT ABOVE
|
||||
0043+030C=010C# LATIN CAPITAL LETTER C WITH CARON = LATIN CAPITAL LETTER C + COMBINING CARON
|
||||
0063+030C=010D# LATIN SMALL LETTER C WITH CARON = LATIN SMALL LETTER C + COMBINING CARON
|
||||
0044+030C=010E# LATIN CAPITAL LETTER D WITH CARON = LATIN CAPITAL LETTER D + COMBINING CARON
|
||||
0064+030C=010F# LATIN SMALL LETTER D WITH CARON = LATIN SMALL LETTER D + COMBINING CARON
|
||||
0045+0304=0112# LATIN CAPITAL LETTER E WITH MACRON = LATIN CAPITAL LETTER E + COMBINING MACRON
|
||||
0065+0304=0113# LATIN SMALL LETTER E WITH MACRON = LATIN SMALL LETTER E + COMBINING MACRON
|
||||
0045+0306=0114# LATIN CAPITAL LETTER E WITH BREVE = LATIN CAPITAL LETTER E + COMBINING BREVE
|
||||
0065+0306=0115# LATIN SMALL LETTER E WITH BREVE = LATIN SMALL LETTER E + COMBINING BREVE
|
||||
0045+0307=0116# LATIN CAPITAL LETTER E WITH DOT ABOVE = LATIN CAPITAL LETTER E + COMBINING DOT ABOVE
|
||||
0065+0307=0117# LATIN SMALL LETTER E WITH DOT ABOVE = LATIN SMALL LETTER E + COMBINING DOT ABOVE
|
||||
0045+0328=0118# LATIN CAPITAL LETTER E WITH OGONEK = LATIN CAPITAL LETTER E + COMBINING OGONEK
|
||||
0065+0328=0119# LATIN SMALL LETTER E WITH OGONEK = LATIN SMALL LETTER E + COMBINING OGONEK
|
||||
0045+030C=011A# LATIN CAPITAL LETTER E WITH CARON = LATIN CAPITAL LETTER E + COMBINING CARON
|
||||
0065+030C=011B# LATIN SMALL LETTER E WITH CARON = LATIN SMALL LETTER E + COMBINING CARON
|
||||
0047+0302=011C# LATIN CAPITAL LETTER G WITH CIRCUMFLEX = LATIN CAPITAL LETTER G + COMBINING CIRCUMFLEX ACCENT
|
||||
0067+0302=011D# LATIN SMALL LETTER G WITH CIRCUMFLEX = LATIN SMALL LETTER G + COMBINING CIRCUMFLEX ACCENT
|
||||
0047+0306=011E# LATIN CAPITAL LETTER G WITH BREVE = LATIN CAPITAL LETTER G + COMBINING BREVE
|
||||
0067+0306=011F# LATIN SMALL LETTER G WITH BREVE = LATIN SMALL LETTER G + COMBINING BREVE
|
||||
0047+0307=0120# LATIN CAPITAL LETTER G WITH DOT ABOVE = LATIN CAPITAL LETTER G + COMBINING DOT ABOVE
|
||||
0067+0307=0121# LATIN SMALL LETTER G WITH DOT ABOVE = LATIN SMALL LETTER G + COMBINING DOT ABOVE
|
||||
0047+0327=0122# LATIN CAPITAL LETTER G WITH CEDILLA = LATIN CAPITAL LETTER G + COMBINING CEDILLA
|
||||
0067+0327=0123# LATIN SMALL LETTER G WITH CEDILLA = LATIN SMALL LETTER G + COMBINING CEDILLA
|
||||
0048+0302=0124# LATIN CAPITAL LETTER H WITH CIRCUMFLEX = LATIN CAPITAL LETTER H + COMBINING CIRCUMFLEX ACCENT
|
||||
0068+0302=0125# LATIN SMALL LETTER H WITH CIRCUMFLEX = LATIN SMALL LETTER H + COMBINING CIRCUMFLEX ACCENT
|
||||
0049+0303=0128# LATIN CAPITAL LETTER I WITH TILDE = LATIN CAPITAL LETTER I + COMBINING TILDE
|
||||
0069+0303=0129# LATIN SMALL LETTER I WITH TILDE = LATIN SMALL LETTER I + COMBINING TILDE
|
||||
0049+0304=012A# LATIN CAPITAL LETTER I WITH MACRON = LATIN CAPITAL LETTER I + COMBINING MACRON
|
||||
0069+0304=012B# LATIN SMALL LETTER I WITH MACRON = LATIN SMALL LETTER I + COMBINING MACRON
|
||||
0049+0306=012C# LATIN CAPITAL LETTER I WITH BREVE = LATIN CAPITAL LETTER I + COMBINING BREVE
|
||||
0069+0306=012D# LATIN SMALL LETTER I WITH BREVE = LATIN SMALL LETTER I + COMBINING BREVE
|
||||
0049+0328=012E# LATIN CAPITAL LETTER I WITH OGONEK = LATIN CAPITAL LETTER I + COMBINING OGONEK
|
||||
0069+0328=012F# LATIN SMALL LETTER I WITH OGONEK = LATIN SMALL LETTER I + COMBINING OGONEK
|
||||
0049+0307=0130# LATIN CAPITAL LETTER I WITH DOT ABOVE = LATIN CAPITAL LETTER I + COMBINING DOT ABOVE
|
||||
004A+0302=0134# LATIN CAPITAL LETTER J WITH CIRCUMFLEX = LATIN CAPITAL LETTER J + COMBINING CIRCUMFLEX ACCENT
|
||||
006A+0302=0135# LATIN SMALL LETTER J WITH CIRCUMFLEX = LATIN SMALL LETTER J + COMBINING CIRCUMFLEX ACCENT
|
||||
004B+0327=0136# LATIN CAPITAL LETTER K WITH CEDILLA = LATIN CAPITAL LETTER K + COMBINING CEDILLA
|
||||
006B+0327=0137# LATIN SMALL LETTER K WITH CEDILLA = LATIN SMALL LETTER K + COMBINING CEDILLA
|
||||
004C+0301=0139# LATIN CAPITAL LETTER L WITH ACUTE = LATIN CAPITAL LETTER L + COMBINING ACUTE ACCENT
|
||||
006C+0301=013A# LATIN SMALL LETTER L WITH ACUTE = LATIN SMALL LETTER L + COMBINING ACUTE ACCENT
|
||||
004C+0327=013B# LATIN CAPITAL LETTER L WITH CEDILLA = LATIN CAPITAL LETTER L + COMBINING CEDILLA
|
||||
006C+0327=013C# LATIN SMALL LETTER L WITH CEDILLA = LATIN SMALL LETTER L + COMBINING CEDILLA
|
||||
004C+030C=013D# LATIN CAPITAL LETTER L WITH CARON = LATIN CAPITAL LETTER L + COMBINING CARON
|
||||
006C+030C=013E# LATIN SMALL LETTER L WITH CARON = LATIN SMALL LETTER L + COMBINING CARON
|
||||
004E+0301=0143# LATIN CAPITAL LETTER N WITH ACUTE = LATIN CAPITAL LETTER N + COMBINING ACUTE ACCENT
|
||||
006E+0301=0144# LATIN SMALL LETTER N WITH ACUTE = LATIN SMALL LETTER N + COMBINING ACUTE ACCENT
|
||||
004E+0327=0145# LATIN CAPITAL LETTER N WITH CEDILLA = LATIN CAPITAL LETTER N + COMBINING CEDILLA
|
||||
006E+0327=0146# LATIN SMALL LETTER N WITH CEDILLA = LATIN SMALL LETTER N + COMBINING CEDILLA
|
||||
004E+030C=0147# LATIN CAPITAL LETTER N WITH CARON = LATIN CAPITAL LETTER N + COMBINING CARON
|
||||
006E+030C=0148# LATIN SMALL LETTER N WITH CARON = LATIN SMALL LETTER N + COMBINING CARON
|
||||
004F+0304=014C# LATIN CAPITAL LETTER O WITH MACRON = LATIN CAPITAL LETTER O + COMBINING MACRON
|
||||
006F+0304=014D# LATIN SMALL LETTER O WITH MACRON = LATIN SMALL LETTER O + COMBINING MACRON
|
||||
004F+0306=014E# LATIN CAPITAL LETTER O WITH BREVE = LATIN CAPITAL LETTER O + COMBINING BREVE
|
||||
006F+0306=014F# LATIN SMALL LETTER O WITH BREVE = LATIN SMALL LETTER O + COMBINING BREVE
|
||||
004F+030B=0150# LATIN CAPITAL LETTER O WITH DOUBLE ACUTE = LATIN CAPITAL LETTER O + COMBINING DOUBLE ACUTE ACCENT
|
||||
006F+030B=0151# LATIN SMALL LETTER O WITH DOUBLE ACUTE = LATIN SMALL LETTER O + COMBINING DOUBLE ACUTE ACCENT
|
||||
0052+0301=0154# LATIN CAPITAL LETTER R WITH ACUTE = LATIN CAPITAL LETTER R + COMBINING ACUTE ACCENT
|
||||
0072+0301=0155# LATIN SMALL LETTER R WITH ACUTE = LATIN SMALL LETTER R + COMBINING ACUTE ACCENT
|
||||
0052+0327=0156# LATIN CAPITAL LETTER R WITH CEDILLA = LATIN CAPITAL LETTER R + COMBINING CEDILLA
|
||||
0072+0327=0157# LATIN SMALL LETTER R WITH CEDILLA = LATIN SMALL LETTER R + COMBINING CEDILLA
|
||||
0052+030C=0158# LATIN CAPITAL LETTER R WITH CARON = LATIN CAPITAL LETTER R + COMBINING CARON
|
||||
0072+030C=0159# LATIN SMALL LETTER R WITH CARON = LATIN SMALL LETTER R + COMBINING CARON
|
||||
0053+0301=015A# LATIN CAPITAL LETTER S WITH ACUTE = LATIN CAPITAL LETTER S + COMBINING ACUTE ACCENT
|
||||
0073+0301=015B# LATIN SMALL LETTER S WITH ACUTE = LATIN SMALL LETTER S + COMBINING ACUTE ACCENT
|
||||
0053+0302=015C# LATIN CAPITAL LETTER S WITH CIRCUMFLEX = LATIN CAPITAL LETTER S + COMBINING CIRCUMFLEX ACCENT
|
||||
0073+0302=015D# LATIN SMALL LETTER S WITH CIRCUMFLEX = LATIN SMALL LETTER S + COMBINING CIRCUMFLEX ACCENT
|
||||
0053+0327=015E# LATIN CAPITAL LETTER S WITH CEDILLA = LATIN CAPITAL LETTER S + COMBINING CEDILLA
|
||||
0073+0327=015F# LATIN SMALL LETTER S WITH CEDILLA = LATIN SMALL LETTER S + COMBINING CEDILLA
|
||||
0053+030C=0160# LATIN CAPITAL LETTER S WITH CARON = LATIN CAPITAL LETTER S + COMBINING CARON
|
||||
0073+030C=0161# LATIN SMALL LETTER S WITH CARON = LATIN SMALL LETTER S + COMBINING CARON
|
||||
0054+0327=0162# LATIN CAPITAL LETTER T WITH CEDILLA = LATIN CAPITAL LETTER T + COMBINING CEDILLA
|
||||
0074+0327=0163# LATIN SMALL LETTER T WITH CEDILLA = LATIN SMALL LETTER T + COMBINING CEDILLA
|
||||
0054+030C=0164# LATIN CAPITAL LETTER T WITH CARON = LATIN CAPITAL LETTER T + COMBINING CARON
|
||||
0074+030C=0165# LATIN SMALL LETTER T WITH CARON = LATIN SMALL LETTER T + COMBINING CARON
|
||||
0055+0303=0168# LATIN CAPITAL LETTER U WITH TILDE = LATIN CAPITAL LETTER U + COMBINING TILDE
|
||||
0075+0303=0169# LATIN SMALL LETTER U WITH TILDE = LATIN SMALL LETTER U + COMBINING TILDE
|
||||
0055+0304=016A# LATIN CAPITAL LETTER U WITH MACRON = LATIN CAPITAL LETTER U + COMBINING MACRON
|
||||
0075+0304=016B# LATIN SMALL LETTER U WITH MACRON = LATIN SMALL LETTER U + COMBINING MACRON
|
||||
0055+0306=016C# LATIN CAPITAL LETTER U WITH BREVE = LATIN CAPITAL LETTER U + COMBINING BREVE
|
||||
0075+0306=016D# LATIN SMALL LETTER U WITH BREVE = LATIN SMALL LETTER U + COMBINING BREVE
|
||||
0055+030A=016E# LATIN CAPITAL LETTER U WITH RING ABOVE = LATIN CAPITAL LETTER U + COMBINING RING ABOVE
|
||||
0075+030A=016F# LATIN SMALL LETTER U WITH RING ABOVE = LATIN SMALL LETTER U + COMBINING RING ABOVE
|
||||
0055+030B=0170# LATIN CAPITAL LETTER U WITH DOUBLE ACUTE = LATIN CAPITAL LETTER U + COMBINING DOUBLE ACUTE ACCENT
|
||||
0075+030B=0171# LATIN SMALL LETTER U WITH DOUBLE ACUTE = LATIN SMALL LETTER U + COMBINING DOUBLE ACUTE ACCENT
|
||||
0055+0328=0172# LATIN CAPITAL LETTER U WITH OGONEK = LATIN CAPITAL LETTER U + COMBINING OGONEK
|
||||
0075+0328=0173# LATIN SMALL LETTER U WITH OGONEK = LATIN SMALL LETTER U + COMBINING OGONEK
|
||||
0057+0302=0174# LATIN CAPITAL LETTER W WITH CIRCUMFLEX = LATIN CAPITAL LETTER W + COMBINING CIRCUMFLEX ACCENT
|
||||
0077+0302=0175# LATIN SMALL LETTER W WITH CIRCUMFLEX = LATIN SMALL LETTER W + COMBINING CIRCUMFLEX ACCENT
|
||||
0059+0302=0176# LATIN CAPITAL LETTER Y WITH CIRCUMFLEX = LATIN CAPITAL LETTER Y + COMBINING CIRCUMFLEX ACCENT
|
||||
0079+0302=0177# LATIN SMALL LETTER Y WITH CIRCUMFLEX = LATIN SMALL LETTER Y + COMBINING CIRCUMFLEX ACCENT
|
||||
0059+0308=0178# LATIN CAPITAL LETTER Y WITH DIAERESIS = LATIN CAPITAL LETTER Y + COMBINING DIAERESIS
|
||||
005A+0301=0179# LATIN CAPITAL LETTER Z WITH ACUTE = LATIN CAPITAL LETTER Z + COMBINING ACUTE ACCENT
|
||||
007A+0301=017A# LATIN SMALL LETTER Z WITH ACUTE = LATIN SMALL LETTER Z + COMBINING ACUTE ACCENT
|
||||
005A+0307=017B# LATIN CAPITAL LETTER Z WITH DOT ABOVE = LATIN CAPITAL LETTER Z + COMBINING DOT ABOVE
|
||||
007A+0307=017C# LATIN SMALL LETTER Z WITH DOT ABOVE = LATIN SMALL LETTER Z + COMBINING DOT ABOVE
|
||||
005A+030C=017D# LATIN CAPITAL LETTER Z WITH CARON = LATIN CAPITAL LETTER Z + COMBINING CARON
|
||||
007A+030C=017E# LATIN SMALL LETTER Z WITH CARON = LATIN SMALL LETTER Z + COMBINING CARON
|
||||
004F+031B=01A0# LATIN CAPITAL LETTER O WITH HORN = LATIN CAPITAL LETTER O + COMBINING HORN
|
||||
006F+031B=01A1# LATIN SMALL LETTER O WITH HORN = LATIN SMALL LETTER O + COMBINING HORN
|
||||
0055+031B=01AF# LATIN CAPITAL LETTER U WITH HORN = LATIN CAPITAL LETTER U + COMBINING HORN
|
||||
0075+031B=01B0# LATIN SMALL LETTER U WITH HORN = LATIN SMALL LETTER U + COMBINING HORN
|
||||
01F1+030C=01C4# LATIN CAPITAL LETTER DZ WITH CARON = LATIN CAPITAL LETTER DZ + COMBINING CARON
|
||||
01F3+030C=01C6# LATIN SMALL LETTER DZ WITH CARON = LATIN SMALL LETTER DZ + COMBINING CARON
|
||||
0041+030C=01CD# LATIN CAPITAL LETTER A WITH CARON = LATIN CAPITAL LETTER A + COMBINING CARON
|
||||
0061+030C=01CE# LATIN SMALL LETTER A WITH CARON = LATIN SMALL LETTER A + COMBINING CARON
|
||||
0049+030C=01CF# LATIN CAPITAL LETTER I WITH CARON = LATIN CAPITAL LETTER I + COMBINING CARON
|
||||
0069+030C=01D0# LATIN SMALL LETTER I WITH CARON = LATIN SMALL LETTER I + COMBINING CARON
|
||||
004F+030C=01D1# LATIN CAPITAL LETTER O WITH CARON = LATIN CAPITAL LETTER O + COMBINING CARON
|
||||
006F+030C=01D2# LATIN SMALL LETTER O WITH CARON = LATIN SMALL LETTER O + COMBINING CARON
|
||||
0055+030C=01D3# LATIN CAPITAL LETTER U WITH CARON = LATIN CAPITAL LETTER U + COMBINING CARON
|
||||
0075+030C=01D4# LATIN SMALL LETTER U WITH CARON = LATIN SMALL LETTER U + COMBINING CARON
|
||||
0055+0308+0304=01D5# LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON = LATIN CAPITAL LETTER U + COMBINING DIAERESIS + COMBINING MACRON
|
||||
0075+0308+0304=01D6# LATIN SMALL LETTER U WITH DIAERESIS AND MACRON = LATIN SMALL LETTER U + COMBINING DIAERESIS + COMBINING MACRON
|
||||
0055+0308+0301=01D7# LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE = LATIN CAPITAL LETTER U + COMBINING DIAERESIS + COMBINING ACUTE ACCENT
|
||||
0075+0308+0301=01D8# LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE = LATIN SMALL LETTER U + COMBINING DIAERESIS + COMBINING ACUTE ACCENT
|
||||
0055+0308+030C=01D9# LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON = LATIN CAPITAL LETTER U + COMBINING DIAERESIS + COMBINING CARON
|
||||
0075+0308+030C=01DA# LATIN SMALL LETTER U WITH DIAERESIS AND CARON = LATIN SMALL LETTER U + COMBINING DIAERESIS + COMBINING CARON
|
||||
0055+0308+0300=01DB# LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE = LATIN CAPITAL LETTER U + COMBINING DIAERESIS + COMBINING GRAVE ACCENT
|
||||
0075+0308+0300=01DC# LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE = LATIN SMALL LETTER U + COMBINING DIAERESIS + COMBINING GRAVE ACCENT
|
||||
0041+0308+0304=01DE# LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON = LATIN CAPITAL LETTER A + COMBINING DIAERESIS + COMBINING MACRON
|
||||
0061+0308+0304=01DF# LATIN SMALL LETTER A WITH DIAERESIS AND MACRON = LATIN SMALL LETTER A + COMBINING DIAERESIS + COMBINING MACRON
|
||||
0041+0307+0304=01E0# LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON = LATIN CAPITAL LETTER A + COMBINING DOT ABOVE + COMBINING MACRON
|
||||
0061+0307+0304=01E1# LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON = LATIN SMALL LETTER A + COMBINING DOT ABOVE + COMBINING MACRON
|
||||
00C6+0304=01E2# LATIN CAPITAL LETTER AE WITH MACRON = LATIN CAPITAL LETTER AE + COMBINING MACRON
|
||||
00E6+0304=01E3# LATIN SMALL LETTER AE WITH MACRON = LATIN SMALL LETTER AE + COMBINING MACRON
|
||||
0047+030C=01E6# LATIN CAPITAL LETTER G WITH CARON = LATIN CAPITAL LETTER G + COMBINING CARON
|
||||
0067+030C=01E7# LATIN SMALL LETTER G WITH CARON = LATIN SMALL LETTER G + COMBINING CARON
|
||||
004B+030C=01E8# LATIN CAPITAL LETTER K WITH CARON = LATIN CAPITAL LETTER K + COMBINING CARON
|
||||
006B+030C=01E9# LATIN SMALL LETTER K WITH CARON = LATIN SMALL LETTER K + COMBINING CARON
|
||||
004F+0328=01EA# LATIN CAPITAL LETTER O WITH OGONEK = LATIN CAPITAL LETTER O + COMBINING OGONEK
|
||||
006F+0328=01EB# LATIN SMALL LETTER O WITH OGONEK = LATIN SMALL LETTER O + COMBINING OGONEK
|
||||
004F+0328+0304=01EC# LATIN CAPITAL LETTER O WITH OGONEK AND MACRON = LATIN CAPITAL LETTER O + COMBINING OGONEK + COMBINING MACRON
|
||||
006F+0328+0304=01ED# LATIN SMALL LETTER O WITH OGONEK AND MACRON = LATIN SMALL LETTER O + COMBINING OGONEK + COMBINING MACRON
|
||||
01B7+030C=01EE# LATIN CAPITAL LETTER EZH WITH CARON = LATIN CAPITAL LETTER EZH + COMBINING CARON
|
||||
0292+030C=01EF# LATIN SMALL LETTER EZH WITH CARON = LATIN SMALL LETTER EZH + COMBINING CARON
|
||||
006A+030C=01F0# LATIN SMALL LETTER J WITH CARON = LATIN SMALL LETTER J + COMBINING CARON
|
||||
0047+0301=01F4# LATIN CAPITAL LETTER G WITH ACUTE = LATIN CAPITAL LETTER G + COMBINING ACUTE ACCENT
|
||||
0067+0301=01F5# LATIN SMALL LETTER G WITH ACUTE = LATIN SMALL LETTER G + COMBINING ACUTE ACCENT
|
||||
0041+030A+0301=01FA# LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE = LATIN CAPITAL LETTER A + COMBINING RING ABOVE + COMBINING ACUTE ACCENT
|
||||
0061+030A+0301=01FB# LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE = LATIN SMALL LETTER A + COMBINING RING ABOVE + COMBINING ACUTE ACCENT
|
||||
00C6+0301=01FC# LATIN CAPITAL LETTER AE WITH ACUTE = LATIN CAPITAL LETTER AE + COMBINING ACUTE ACCENT
|
||||
00E6+0301=01FD# LATIN SMALL LETTER AE WITH ACUTE = LATIN SMALL LETTER AE + COMBINING ACUTE ACCENT
|
||||
0041+030F=0200# LATIN CAPITAL LETTER A WITH DOUBLE GRAVE = LATIN CAPITAL LETTER A + COMBINING DOUBLE GRAVE ACCENT
|
||||
0061+030F=0201# LATIN SMALL LETTER A WITH DOUBLE GRAVE = LATIN SMALL LETTER A + COMBINING DOUBLE GRAVE ACCENT
|
||||
0041+0311=0202# LATIN CAPITAL LETTER A WITH INVERTED BREVE = LATIN CAPITAL LETTER A + COMBINING INVERTED BREVE
|
||||
0061+0311=0203# LATIN SMALL LETTER A WITH INVERTED BREVE = LATIN SMALL LETTER A + COMBINING INVERTED BREVE
|
||||
0045+030F=0204# LATIN CAPITAL LETTER E WITH DOUBLE GRAVE = LATIN CAPITAL LETTER E + COMBINING DOUBLE GRAVE ACCENT
|
||||
0065+030F=0205# LATIN SMALL LETTER E WITH DOUBLE GRAVE = LATIN SMALL LETTER E + COMBINING DOUBLE GRAVE ACCENT
|
||||
0045+0311=0206# LATIN CAPITAL LETTER E WITH INVERTED BREVE = LATIN CAPITAL LETTER E + COMBINING INVERTED BREVE
|
||||
0065+0311=0207# LATIN SMALL LETTER E WITH INVERTED BREVE = LATIN SMALL LETTER E + COMBINING INVERTED BREVE
|
||||
0049+030F=0208# LATIN CAPITAL LETTER I WITH DOUBLE GRAVE = LATIN CAPITAL LETTER I + COMBINING DOUBLE GRAVE ACCENT
|
||||
0069+030F=0209# LATIN SMALL LETTER I WITH DOUBLE GRAVE = LATIN SMALL LETTER I + COMBINING DOUBLE GRAVE ACCENT
|
||||
0049+0311=020A# LATIN CAPITAL LETTER I WITH INVERTED BREVE = LATIN CAPITAL LETTER I + COMBINING INVERTED BREVE
|
||||
0069+0311=020B# LATIN SMALL LETTER I WITH INVERTED BREVE = LATIN SMALL LETTER I + COMBINING INVERTED BREVE
|
||||
004F+030F=020C# LATIN CAPITAL LETTER O WITH DOUBLE GRAVE = LATIN CAPITAL LETTER O + COMBINING DOUBLE GRAVE ACCENT
|
||||
006F+030F=020D# LATIN SMALL LETTER O WITH DOUBLE GRAVE = LATIN SMALL LETTER O + COMBINING DOUBLE GRAVE ACCENT
|
||||
004F+0311=020E# LATIN CAPITAL LETTER O WITH INVERTED BREVE = LATIN CAPITAL LETTER O + COMBINING INVERTED BREVE
|
||||
006F+0311=020F# LATIN SMALL LETTER O WITH INVERTED BREVE = LATIN SMALL LETTER O + COMBINING INVERTED BREVE
|
||||
0052+030F=0210# LATIN CAPITAL LETTER R WITH DOUBLE GRAVE = LATIN CAPITAL LETTER R + COMBINING DOUBLE GRAVE ACCENT
|
||||
0072+030F=0211# LATIN SMALL LETTER R WITH DOUBLE GRAVE = LATIN SMALL LETTER R + COMBINING DOUBLE GRAVE ACCENT
|
||||
0052+0311=0212# LATIN CAPITAL LETTER R WITH INVERTED BREVE = LATIN CAPITAL LETTER R + COMBINING INVERTED BREVE
|
||||
0072+0311=0213# LATIN SMALL LETTER R WITH INVERTED BREVE = LATIN SMALL LETTER R + COMBINING INVERTED BREVE
|
||||
0055+030F=0214# LATIN CAPITAL LETTER U WITH DOUBLE GRAVE = LATIN CAPITAL LETTER U + COMBINING DOUBLE GRAVE ACCENT
|
||||
0075+030F=0215# LATIN SMALL LETTER U WITH DOUBLE GRAVE = LATIN SMALL LETTER U + COMBINING DOUBLE GRAVE ACCENT
|
||||
0055+0311=0216# LATIN CAPITAL LETTER U WITH INVERTED BREVE = LATIN CAPITAL LETTER U + COMBINING INVERTED BREVE
|
||||
0075+0311=0217# LATIN SMALL LETTER U WITH INVERTED BREVE = LATIN SMALL LETTER U + COMBINING INVERTED BREVE
|
||||
0041+0325=1E00# LATIN CAPITAL LETTER A WITH RING BELOW = LATIN CAPITAL LETTER A + COMBINING RING BELOW
|
||||
0061+0325=1E01# LATIN SMALL LETTER A WITH RING BELOW = LATIN SMALL LETTER A + COMBINING RING BELOW
|
||||
0042+0307=1E02# LATIN CAPITAL LETTER B WITH DOT ABOVE = LATIN CAPITAL LETTER B + COMBINING DOT ABOVE
|
||||
0062+0307=1E03# LATIN SMALL LETTER B WITH DOT ABOVE = LATIN SMALL LETTER B + COMBINING DOT ABOVE
|
||||
0042+0323=1E04# LATIN CAPITAL LETTER B WITH DOT BELOW = LATIN CAPITAL LETTER B + COMBINING DOT BELOW
|
||||
0062+0323=1E05# LATIN SMALL LETTER B WITH DOT BELOW = LATIN SMALL LETTER B + COMBINING DOT BELOW
|
||||
0042+0332=1E06# LATIN CAPITAL LETTER B WITH LINE BELOW = LATIN CAPITAL LETTER B + COMBINING LOW LINE
|
||||
0062+0332=1E07# LATIN SMALL LETTER B WITH LINE BELOW = LATIN SMALL LETTER B + COMBINING LOW LINE
|
||||
0043+0327+0301=1E08# LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE = LATIN CAPITAL LETTER C + COMBINING CEDILLA + COMBINING ACUTE ACCENT
|
||||
0063+0327+0301=1E09# LATIN SMALL LETTER C WITH CEDILLA AND ACUTE = LATIN SMALL LETTER C + COMBINING CEDILLA + COMBINING ACUTE ACCENT
|
||||
0044+0307=1E0A# LATIN CAPITAL LETTER D WITH DOT ABOVE = LATIN CAPITAL LETTER D + COMBINING DOT ABOVE
|
||||
0064+0307=1E0B# LATIN SMALL LETTER D WITH DOT ABOVE = LATIN SMALL LETTER D + COMBINING DOT ABOVE
|
||||
0044+0323=1E0C# LATIN CAPITAL LETTER D WITH DOT BELOW = LATIN CAPITAL LETTER D + COMBINING DOT BELOW
|
||||
0064+0323=1E0D# LATIN SMALL LETTER D WITH DOT BELOW = LATIN SMALL LETTER D + COMBINING DOT BELOW
|
||||
0044+0332=1E0E# LATIN CAPITAL LETTER D WITH LINE BELOW = LATIN CAPITAL LETTER D + COMBINING LOW LINE
|
||||
0064+0332=1E0F# LATIN SMALL LETTER D WITH LINE BELOW = LATIN SMALL LETTER D + COMBINING LOW LINE
|
||||
0044+0327=1E10# LATIN CAPITAL LETTER D WITH CEDILLA = LATIN CAPITAL LETTER D + COMBINING CEDILLA
|
||||
0064+0327=1E11# LATIN SMALL LETTER D WITH CEDILLA = LATIN SMALL LETTER D + COMBINING CEDILLA
|
||||
0044+032D=1E12# LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW = LATIN CAPITAL LETTER D + COMBINING CIRCUMFLEX ACCENT BELOW
|
||||
0064+032D=1E13# LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW = LATIN SMALL LETTER D + COMBINING CIRCUMFLEX ACCENT BELOW
|
||||
0045+0304+0300=1E14# LATIN CAPITAL LETTER E WITH MACRON AND GRAVE = LATIN CAPITAL LETTER E + COMBINING MACRON + COMBINING GRAVE ACCENT
|
||||
0065+0304+0300=1E15# LATIN SMALL LETTER E WITH MACRON AND GRAVE = LATIN SMALL LETTER E + COMBINING MACRON + COMBINING GRAVE ACCENT
|
||||
0045+0304+0301=1E16# LATIN CAPITAL LETTER E WITH MACRON AND ACUTE = LATIN CAPITAL LETTER E + COMBINING MACRON + COMBINING ACUTE ACCENT
|
||||
0065+0304+0301=1E17# LATIN SMALL LETTER E WITH MACRON AND ACUTE = LATIN SMALL LETTER E + COMBINING MACRON + COMBINING ACUTE ACCENT
|
||||
0045+032D=1E18# LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW = LATIN CAPITAL LETTER E + COMBINING CIRCUMFLEX ACCENT BELOW
|
||||
0065+032D=1E19# LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW = LATIN SMALL LETTER E + COMBINING CIRCUMFLEX ACCENT BELOW
|
||||
0045+0330=1E1A# LATIN CAPITAL LETTER E WITH TILDE BELOW = LATIN CAPITAL LETTER E + COMBINING TILDE BELOW
|
||||
0065+0330=1E1B# LATIN SMALL LETTER E WITH TILDE BELOW = LATIN SMALL LETTER E + COMBINING TILDE BELOW
|
||||
0045+0327+0306=1E1C# LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE = LATIN CAPITAL LETTER E + COMBINING CEDILLA + COMBINING BREVE
|
||||
0065+0327+0306=1E1D# LATIN SMALL LETTER E WITH CEDILLA AND BREVE = LATIN SMALL LETTER E + COMBINING CEDILLA + COMBINING BREVE
|
||||
0046+0307=1E1E# LATIN CAPITAL LETTER F WITH DOT ABOVE = LATIN CAPITAL LETTER F + COMBINING DOT ABOVE
|
||||
0066+0307=1E1F# LATIN SMALL LETTER F WITH DOT ABOVE = LATIN SMALL LETTER F + COMBINING DOT ABOVE
|
||||
0047+0304=1E20# LATIN CAPITAL LETTER G WITH MACRON = LATIN CAPITAL LETTER G + COMBINING MACRON
|
||||
0067+0304=1E21# LATIN SMALL LETTER G WITH MACRON = LATIN SMALL LETTER G + COMBINING MACRON
|
||||
0048+0307=1E22# LATIN CAPITAL LETTER H WITH DOT ABOVE = LATIN CAPITAL LETTER H + COMBINING DOT ABOVE
|
||||
0068+0307=1E23# LATIN SMALL LETTER H WITH DOT ABOVE = LATIN SMALL LETTER H + COMBINING DOT ABOVE
|
||||
0048+0323=1E24# LATIN CAPITAL LETTER H WITH DOT BELOW = LATIN CAPITAL LETTER H + COMBINING DOT BELOW
|
||||
0068+0323=1E25# LATIN SMALL LETTER H WITH DOT BELOW = LATIN SMALL LETTER H + COMBINING DOT BELOW
|
||||
0048+0308=1E26# LATIN CAPITAL LETTER H WITH DIAERESIS = LATIN CAPITAL LETTER H + COMBINING DIAERESIS
|
||||
0068+0308=1E27# LATIN SMALL LETTER H WITH DIAERESIS = LATIN SMALL LETTER H + COMBINING DIAERESIS
|
||||
0048+0327=1E28# LATIN CAPITAL LETTER H WITH CEDILLA = LATIN CAPITAL LETTER H + COMBINING CEDILLA
|
||||
0068+0327=1E29# LATIN SMALL LETTER H WITH CEDILLA = LATIN SMALL LETTER H + COMBINING CEDILLA
|
||||
0048+032E=1E2A# LATIN CAPITAL LETTER H WITH BREVE BELOW = LATIN CAPITAL LETTER H + COMBINING BREVE BELOW
|
||||
0068+032E=1E2B# LATIN SMALL LETTER H WITH BREVE BELOW = LATIN SMALL LETTER H + COMBINING BREVE BELOW
|
||||
0049+0330=1E2C# LATIN CAPITAL LETTER I WITH TILDE BELOW = LATIN CAPITAL LETTER I + COMBINING TILDE BELOW
|
||||
0069+0330=1E2D# LATIN SMALL LETTER I WITH TILDE BELOW = LATIN SMALL LETTER I + COMBINING TILDE BELOW
|
||||
0049+0308+0301=1E2E# LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE = LATIN CAPITAL LETTER I + COMBINING DIAERESIS + COMBINING ACUTE ACCENT
|
||||
0069+0308+0301=1E2F# LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE = LATIN SMALL LETTER I + COMBINING DIAERESIS + COMBINING ACUTE ACCENT
|
||||
004B+0301=1E30# LATIN CAPITAL LETTER K WITH ACUTE = LATIN CAPITAL LETTER K + COMBINING ACUTE ACCENT
|
||||
006B+0301=1E31# LATIN SMALL LETTER K WITH ACUTE = LATIN SMALL LETTER K + COMBINING ACUTE ACCENT
|
||||
004B+0323=1E32# LATIN CAPITAL LETTER K WITH DOT BELOW = LATIN CAPITAL LETTER K + COMBINING DOT BELOW
|
||||
006B+0323=1E33# LATIN SMALL LETTER K WITH DOT BELOW = LATIN SMALL LETTER K + COMBINING DOT BELOW
|
||||
004B+0332=1E34# LATIN CAPITAL LETTER K WITH LINE BELOW = LATIN CAPITAL LETTER K + COMBINING LOW LINE
|
||||
006B+0332=1E35# LATIN SMALL LETTER K WITH LINE BELOW = LATIN SMALL LETTER K + COMBINING LOW LINE
|
||||
004C+0323=1E36# LATIN CAPITAL LETTER L WITH DOT BELOW = LATIN CAPITAL LETTER L + COMBINING DOT BELOW
|
||||
006C+0323=1E37# LATIN SMALL LETTER L WITH DOT BELOW = LATIN SMALL LETTER L + COMBINING DOT BELOW
|
||||
004C+0323+0304=1E38# LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON = LATIN CAPITAL LETTER L + COMBINING DOT BELOW + COMBINING MACRON
|
||||
006C+0323+0304=1E39# LATIN SMALL LETTER L WITH DOT BELOW AND MACRON = LATIN SMALL LETTER L + COMBINING DOT BELOW + COMBINING MACRON
|
||||
004C+0332=1E3A# LATIN CAPITAL LETTER L WITH LINE BELOW = LATIN CAPITAL LETTER L + COMBINING LOW LINE
|
||||
006C+0332=1E3B# LATIN SMALL LETTER L WITH LINE BELOW = LATIN SMALL LETTER L + COMBINING LOW LINE
|
||||
004C+032D=1E3C# LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW = LATIN CAPITAL LETTER L + COMBINING CIRCUMFLEX ACCENT BELOW
|
||||
006C+032D=1E3D# LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW = LATIN SMALL LETTER L + COMBINING CIRCUMFLEX ACCENT BELOW
|
||||
004D+0301=1E3E# LATIN CAPITAL LETTER M WITH ACUTE = LATIN CAPITAL LETTER M + COMBINING ACUTE ACCENT
|
||||
006D+0301=1E3F# LATIN SMALL LETTER M WITH ACUTE = LATIN SMALL LETTER M + COMBINING ACUTE ACCENT
|
||||
004D+0307=1E40# LATIN CAPITAL LETTER M WITH DOT ABOVE = LATIN CAPITAL LETTER M + COMBINING DOT ABOVE
|
||||
006D+0307=1E41# LATIN SMALL LETTER M WITH DOT ABOVE = LATIN SMALL LETTER M + COMBINING DOT ABOVE
|
||||
004D+0323=1E42# LATIN CAPITAL LETTER M WITH DOT BELOW = LATIN CAPITAL LETTER M + COMBINING DOT BELOW
|
||||
006D+0323=1E43# LATIN SMALL LETTER M WITH DOT BELOW = LATIN SMALL LETTER M + COMBINING DOT BELOW
|
||||
004E+0307=1E44# LATIN CAPITAL LETTER N WITH DOT ABOVE = LATIN CAPITAL LETTER N + COMBINING DOT ABOVE
|
||||
006E+0307=1E45# LATIN SMALL LETTER N WITH DOT ABOVE = LATIN SMALL LETTER N + COMBINING DOT ABOVE
|
||||
004E+0323=1E46# LATIN CAPITAL LETTER N WITH DOT BELOW = LATIN CAPITAL LETTER N + COMBINING DOT BELOW
|
||||
006E+0323=1E47# LATIN SMALL LETTER N WITH DOT BELOW = LATIN SMALL LETTER N + COMBINING DOT BELOW
|
||||
004E+0332=1E48# LATIN CAPITAL LETTER N WITH LINE BELOW = LATIN CAPITAL LETTER N + COMBINING LOW LINE
|
||||
006E+0332=1E49# LATIN SMALL LETTER N WITH LINE BELOW = LATIN SMALL LETTER N + COMBINING LOW LINE
|
||||
004E+032D=1E4A# LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW = LATIN CAPITAL LETTER N + COMBINING CIRCUMFLEX ACCENT BELOW
|
||||
006E+032D=1E4B# LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW = LATIN SMALL LETTER N + COMBINING CIRCUMFLEX ACCENT BELOW
|
||||
004F+0303+0301=1E4C# LATIN CAPITAL LETTER O WITH TILDE AND ACUTE = LATIN CAPITAL LETTER O + COMBINING TILDE + COMBINING ACUTE ACCENT
|
||||
006F+0303+0301=1E4D# LATIN SMALL LETTER O WITH TILDE AND ACUTE = LATIN SMALL LETTER O + COMBINING TILDE + COMBINING ACUTE ACCENT
|
||||
004F+0303+0308=1E4E# LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS = LATIN CAPITAL LETTER O + COMBINING TILDE + COMBINING DIAERESIS
|
||||
006F+0303+0308=1E4F# LATIN SMALL LETTER O WITH TILDE AND DIAERESIS = LATIN SMALL LETTER O + COMBINING TILDE + COMBINING DIAERESIS
|
||||
004F+0304+0300=1E50# LATIN CAPITAL LETTER O WITH MACRON AND GRAVE = LATIN CAPITAL LETTER O + COMBINING MACRON + COMBINING GRAVE ACCENT
|
||||
006F+0304+0300=1E51# LATIN SMALL LETTER O WITH MACRON AND GRAVE = LATIN SMALL LETTER O + COMBINING MACRON + COMBINING GRAVE ACCENT
|
||||
004F+0304+0301=1E52# LATIN CAPITAL LETTER O WITH MACRON AND ACUTE = LATIN CAPITAL LETTER O + COMBINING MACRON + COMBINING ACUTE ACCENT
|
||||
006F+0304+0301=1E53# LATIN SMALL LETTER O WITH MACRON AND ACUTE = LATIN SMALL LETTER O + COMBINING MACRON + COMBINING ACUTE ACCENT
|
||||
0050+0301=1E54# LATIN CAPITAL LETTER P WITH ACUTE = LATIN CAPITAL LETTER P + COMBINING ACUTE ACCENT
|
||||
0070+0301=1E55# LATIN SMALL LETTER P WITH ACUTE = LATIN SMALL LETTER P + COMBINING ACUTE ACCENT
|
||||
0050+0307=1E56# LATIN CAPITAL LETTER P WITH DOT ABOVE = LATIN CAPITAL LETTER P + COMBINING DOT ABOVE
|
||||
0070+0307=1E57# LATIN SMALL LETTER P WITH DOT ABOVE = LATIN SMALL LETTER P + COMBINING DOT ABOVE
|
||||
0052+0307=1E58# LATIN CAPITAL LETTER R WITH DOT ABOVE = LATIN CAPITAL LETTER R + COMBINING DOT ABOVE
|
||||
0072+0307=1E59# LATIN SMALL LETTER R WITH DOT ABOVE = LATIN SMALL LETTER R + COMBINING DOT ABOVE
|
||||
0052+0323=1E5A# LATIN CAPITAL LETTER R WITH DOT BELOW = LATIN CAPITAL LETTER R + COMBINING DOT BELOW
|
||||
0072+0323=1E5B# LATIN SMALL LETTER R WITH DOT BELOW = LATIN SMALL LETTER R + COMBINING DOT BELOW
|
||||
0052+0323+0304=1E5C# LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON = LATIN CAPITAL LETTER R + COMBINING DOT BELOW + COMBINING MACRON
|
||||
0072+0323+0304=1E5D# LATIN SMALL LETTER R WITH DOT BELOW AND MACRON = LATIN SMALL LETTER R + COMBINING DOT BELOW + COMBINING MACRON
|
||||
0052+0332=1E5E# LATIN CAPITAL LETTER R WITH LINE BELOW = LATIN CAPITAL LETTER R + COMBINING LOW LINE
|
||||
0072+0332=1E5F# LATIN SMALL LETTER R WITH LINE BELOW = LATIN SMALL LETTER R + COMBINING LOW LINE
|
||||
0053+0307=1E60# LATIN CAPITAL LETTER S WITH DOT ABOVE = LATIN CAPITAL LETTER S + COMBINING DOT ABOVE
|
||||
0073+0307=1E61# LATIN SMALL LETTER S WITH DOT ABOVE = LATIN SMALL LETTER S + COMBINING DOT ABOVE
|
||||
0053+0323=1E62# LATIN CAPITAL LETTER S WITH DOT BELOW = LATIN CAPITAL LETTER S + COMBINING DOT BELOW
|
||||
0073+0323=1E63# LATIN SMALL LETTER S WITH DOT BELOW = LATIN SMALL LETTER S + COMBINING DOT BELOW
|
||||
0053+0301+0307=1E64# LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE = LATIN CAPITAL LETTER S + COMBINING ACUTE ACCENT + COMBINING DOT ABOVE
|
||||
0073+0301+0307=1E65# LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE = LATIN SMALL LETTER S + COMBINING ACUTE ACCENT + COMBINING DOT ABOVE
|
||||
0053+030C+0307=1E66# LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE = LATIN CAPITAL LETTER S + COMBINING CARON + COMBINING DOT ABOVE
|
||||
0073+030C+0307=1E67# LATIN SMALL LETTER S WITH CARON AND DOT ABOVE = LATIN SMALL LETTER S + COMBINING CARON + COMBINING DOT ABOVE
|
||||
0053+0323+0307=1E68# LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE = LATIN CAPITAL LETTER S + COMBINING DOT BELOW + COMBINING DOT ABOVE
|
||||
0073+0323+0307=1E69# LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE = LATIN SMALL LETTER S + COMBINING DOT BELOW + COMBINING DOT ABOVE
|
||||
0054+0307=1E6A# LATIN CAPITAL LETTER T WITH DOT ABOVE = LATIN CAPITAL LETTER T + COMBINING DOT ABOVE
|
||||
0074+0307=1E6B# LATIN SMALL LETTER T WITH DOT ABOVE = LATIN SMALL LETTER T + COMBINING DOT ABOVE
|
||||
0054+0323=1E6C# LATIN CAPITAL LETTER T WITH DOT BELOW = LATIN CAPITAL LETTER T + COMBINING DOT BELOW
|
||||
0074+0323=1E6D# LATIN SMALL LETTER T WITH DOT BELOW = LATIN SMALL LETTER T + COMBINING DOT BELOW
|
||||
0054+0332=1E6E# LATIN CAPITAL LETTER T WITH LINE BELOW = LATIN CAPITAL LETTER T + COMBINING LOW LINE
|
||||
0074+0332=1E6F# LATIN SMALL LETTER T WITH LINE BELOW = LATIN SMALL LETTER T + COMBINING LOW LINE
|
||||
0054+032D=1E70# LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW = LATIN CAPITAL LETTER T + COMBINING CIRCUMFLEX ACCENT BELOW
|
||||
0074+032D=1E71# LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW = LATIN SMALL LETTER T + COMBINING CIRCUMFLEX ACCENT BELOW
|
||||
0055+0324=1E72# LATIN CAPITAL LETTER U WITH DIAERESIS BELOW = LATIN CAPITAL LETTER U + COMBINING DIAERESIS BELOW
|
||||
0075+0324=1E73# LATIN SMALL LETTER U WITH DIAERESIS BELOW = LATIN SMALL LETTER U + COMBINING DIAERESIS BELOW
|
||||
0055+0330=1E74# LATIN CAPITAL LETTER U WITH TILDE BELOW = LATIN CAPITAL LETTER U + COMBINING TILDE BELOW
|
||||
0075+0330=1E75# LATIN SMALL LETTER U WITH TILDE BELOW = LATIN SMALL LETTER U + COMBINING TILDE BELOW
|
||||
0055+032D=1E76# LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW = LATIN CAPITAL LETTER U + COMBINING CIRCUMFLEX ACCENT BELOW
|
||||
0075+032D=1E77# LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW = LATIN SMALL LETTER U + COMBINING CIRCUMFLEX ACCENT BELOW
|
||||
0055+0303+0301=1E78# LATIN CAPITAL LETTER U WITH TILDE AND ACUTE = LATIN CAPITAL LETTER U + COMBINING TILDE + COMBINING ACUTE ACCENT
|
||||
0075+0303+0301=1E79# LATIN SMALL LETTER U WITH TILDE AND ACUTE = LATIN SMALL LETTER U + COMBINING TILDE + COMBINING ACUTE ACCENT
|
||||
0055+0304+0308=1E7A# LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS = LATIN CAPITAL LETTER U + COMBINING MACRON + COMBINING DIAERESIS
|
||||
0075+0304+0308=1E7B# LATIN SMALL LETTER U WITH MACRON AND DIAERESIS = LATIN SMALL LETTER U + COMBINING MACRON + COMBINING DIAERESIS
|
||||
0056+0303=1E7C# LATIN CAPITAL LETTER V WITH TILDE = LATIN CAPITAL LETTER V + COMBINING TILDE
|
||||
0076+0303=1E7D# LATIN SMALL LETTER V WITH TILDE = LATIN SMALL LETTER V + COMBINING TILDE
|
||||
0056+0323=1E7E# LATIN CAPITAL LETTER V WITH DOT BELOW = LATIN CAPITAL LETTER V + COMBINING DOT BELOW
|
||||
0076+0323=1E7F# LATIN SMALL LETTER V WITH DOT BELOW = LATIN SMALL LETTER V + COMBINING DOT BELOW
|
||||
0057+0300=1E80# LATIN CAPITAL LETTER W WITH GRAVE = LATIN CAPITAL LETTER W + COMBINING GRAVE ACCENT
|
||||
0077+0300=1E81# LATIN SMALL LETTER W WITH GRAVE = LATIN SMALL LETTER W + COMBINING GRAVE ACCENT
|
||||
0057+0301=1E82# LATIN CAPITAL LETTER W WITH ACUTE = LATIN CAPITAL LETTER W + COMBINING ACUTE ACCENT
|
||||
0077+0301=1E83# LATIN SMALL LETTER W WITH ACUTE = LATIN SMALL LETTER W + COMBINING ACUTE ACCENT
|
||||
0057+0308=1E84# LATIN CAPITAL LETTER W WITH DIAERESIS = LATIN CAPITAL LETTER W + COMBINING DIAERESIS
|
||||
0077+0308=1E85# LATIN SMALL LETTER W WITH DIAERESIS = LATIN SMALL LETTER W + COMBINING DIAERESIS
|
||||
0057+0307=1E86# LATIN CAPITAL LETTER W WITH DOT ABOVE = LATIN CAPITAL LETTER W + COMBINING DOT ABOVE
|
||||
0077+0307=1E87# LATIN SMALL LETTER W WITH DOT ABOVE = LATIN SMALL LETTER W + COMBINING DOT ABOVE
|
||||
0057+0323=1E88# LATIN CAPITAL LETTER W WITH DOT BELOW = LATIN CAPITAL LETTER W + COMBINING DOT BELOW
|
||||
0077+0323=1E89# LATIN SMALL LETTER W WITH DOT BELOW = LATIN SMALL LETTER W + COMBINING DOT BELOW
|
||||
0058+0307=1E8A# LATIN CAPITAL LETTER X WITH DOT ABOVE = LATIN CAPITAL LETTER X + COMBINING DOT ABOVE
|
||||
0078+0307=1E8B# LATIN SMALL LETTER X WITH DOT ABOVE = LATIN SMALL LETTER X + COMBINING DOT ABOVE
|
||||
0058+0308=1E8C# LATIN CAPITAL LETTER X WITH DIAERESIS = LATIN CAPITAL LETTER X + COMBINING DIAERESIS
|
||||
0078+0308=1E8D# LATIN SMALL LETTER X WITH DIAERESIS = LATIN SMALL LETTER X + COMBINING DIAERESIS
|
||||
0059+0307=1E8E# LATIN CAPITAL LETTER Y WITH DOT ABOVE = LATIN CAPITAL LETTER Y + COMBINING DOT ABOVE
|
||||
0079+0307=1E8F# LATIN SMALL LETTER Y WITH DOT ABOVE = LATIN SMALL LETTER Y + COMBINING DOT ABOVE
|
||||
005A+0302=1E90# LATIN CAPITAL LETTER Z WITH CIRCUMFLEX = LATIN CAPITAL LETTER Z + COMBINING CIRCUMFLEX ACCENT
|
||||
007A+0302=1E91# LATIN SMALL LETTER Z WITH CIRCUMFLEX = LATIN SMALL LETTER Z + COMBINING CIRCUMFLEX ACCENT
|
||||
005A+0323=1E92# LATIN CAPITAL LETTER Z WITH DOT BELOW = LATIN CAPITAL LETTER Z + COMBINING DOT BELOW
|
||||
007A+0323=1E93# LATIN SMALL LETTER Z WITH DOT BELOW = LATIN SMALL LETTER Z + COMBINING DOT BELOW
|
||||
005A+0332=1E94# LATIN CAPITAL LETTER Z WITH LINE BELOW = LATIN CAPITAL LETTER Z + COMBINING LOW LINE
|
||||
007A+0332=1E95# LATIN SMALL LETTER Z WITH LINE BELOW = LATIN SMALL LETTER Z + COMBINING LOW LINE
|
||||
0068+0332=1E96# LATIN SMALL LETTER H WITH LINE BELOW = LATIN SMALL LETTER H + COMBINING LOW LINE
|
||||
0074+0308=1E97# LATIN SMALL LETTER T WITH DIAERESIS = LATIN SMALL LETTER T + COMBINING DIAERESIS
|
||||
0077+030A=1E98# LATIN SMALL LETTER W WITH RING ABOVE = LATIN SMALL LETTER W + COMBINING RING ABOVE
|
||||
0079+030A=1E99# LATIN SMALL LETTER Y WITH RING ABOVE = LATIN SMALL LETTER Y + COMBINING RING ABOVE
|
||||
017F+0307=1E9B# LATIN SMALL LETTER LONG S WITH DOT ABOVE = LATIN SMALL LETTER LONG S + COMBINING DOT ABOVE
|
||||
0041+0323=1EA0# LATIN CAPITAL LETTER A WITH DOT BELOW = LATIN CAPITAL LETTER A + COMBINING DOT BELOW
|
||||
0061+0323=1EA1# LATIN SMALL LETTER A WITH DOT BELOW = LATIN SMALL LETTER A + COMBINING DOT BELOW
|
||||
0041+0309=1EA2# LATIN CAPITAL LETTER A WITH HOOK ABOVE = LATIN CAPITAL LETTER A + COMBINING HOOK ABOVE
|
||||
0061+0309=1EA3# LATIN SMALL LETTER A WITH HOOK ABOVE = LATIN SMALL LETTER A + COMBINING HOOK ABOVE
|
||||
0041+0302+0301=1EA4# LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE = LATIN CAPITAL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING ACUTE ACCENT
|
||||
0061+0302+0301=1EA5# LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE = LATIN SMALL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING ACUTE ACCENT
|
||||
0041+0302+0300=1EA6# LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE = LATIN CAPITAL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING GRAVE ACCENT
|
||||
0061+0302+0300=1EA7# LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE = LATIN SMALL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING GRAVE ACCENT
|
||||
0041+0302+0309=1EA8# LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE = LATIN CAPITAL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING HOOK ABOVE
|
||||
0061+0302+0309=1EA9# LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE = LATIN SMALL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING HOOK ABOVE
|
||||
0041+0302+0303=1EAA# LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE = LATIN CAPITAL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING TILDE
|
||||
0061+0302+0303=1EAB# LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE = LATIN SMALL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING TILDE
|
||||
0041+0302+0323=1EAC# LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW = LATIN CAPITAL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING DOT BELOW
|
||||
0061+0302+0323=1EAD# LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW = LATIN SMALL LETTER A + COMBINING CIRCUMFLEX ACCENT + COMBINING DOT BELOW
|
||||
0041+0306+0301=1EAE# LATIN CAPITAL LETTER A WITH BREVE AND ACUTE = LATIN CAPITAL LETTER A + COMBINING BREVE + COMBINING ACUTE ACCENT
|
||||
0061+0306+0301=1EAF# LATIN SMALL LETTER A WITH BREVE AND ACUTE = LATIN SMALL LETTER A + COMBINING BREVE + COMBINING ACUTE ACCENT
|
||||
0041+0306+0300=1EB0# LATIN CAPITAL LETTER A WITH BREVE AND GRAVE = LATIN CAPITAL LETTER A + COMBINING BREVE + COMBINING GRAVE ACCENT
|
||||
0061+0306+0300=1EB1# LATIN SMALL LETTER A WITH BREVE AND GRAVE = LATIN SMALL LETTER A + COMBINING BREVE + COMBINING GRAVE ACCENT
|
||||
0041+0306+0309=1EB2# LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE = LATIN CAPITAL LETTER A + COMBINING BREVE + COMBINING HOOK ABOVE
|
||||
0061+0306+0309=1EB3# LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE = LATIN SMALL LETTER A + COMBINING BREVE + COMBINING HOOK ABOVE
|
||||
0041+0306+0303=1EB4# LATIN CAPITAL LETTER A WITH BREVE AND TILDE = LATIN CAPITAL LETTER A + COMBINING BREVE + COMBINING TILDE
|
||||
0061+0306+0303=1EB5# LATIN SMALL LETTER A WITH BREVE AND TILDE = LATIN SMALL LETTER A + COMBINING BREVE + COMBINING TILDE
|
||||
0041+0306+0323=1EB6# LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW = LATIN CAPITAL LETTER A + COMBINING BREVE + COMBINING DOT BELOW
|
||||
0061+0306+0323=1EB7# LATIN SMALL LETTER A WITH BREVE AND DOT BELOW = LATIN SMALL LETTER A + COMBINING BREVE + COMBINING DOT BELOW
|
||||
0045+0323=1EB8# LATIN CAPITAL LETTER E WITH DOT BELOW = LATIN CAPITAL LETTER E + COMBINING DOT BELOW
|
||||
0065+0323=1EB9# LATIN SMALL LETTER E WITH DOT BELOW = LATIN SMALL LETTER E + COMBINING DOT BELOW
|
||||
0045+0309=1EBA# LATIN CAPITAL LETTER E WITH HOOK ABOVE = LATIN CAPITAL LETTER E + COMBINING HOOK ABOVE
|
||||
0065+0309=1EBB# LATIN SMALL LETTER E WITH HOOK ABOVE = LATIN SMALL LETTER E + COMBINING HOOK ABOVE
|
||||
0045+0303=1EBC# LATIN CAPITAL LETTER E WITH TILDE = LATIN CAPITAL LETTER E + COMBINING TILDE
|
||||
0065+0303=1EBD# LATIN SMALL LETTER E WITH TILDE = LATIN SMALL LETTER E + COMBINING TILDE
|
||||
0045+0302+0301=1EBE# LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE = LATIN CAPITAL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING ACUTE ACCENT
|
||||
0065+0302+0301=1EBF# LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE = LATIN SMALL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING ACUTE ACCENT
|
||||
0045+0302+0300=1EC0# LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE = LATIN CAPITAL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING GRAVE ACCENT
|
||||
0065+0302+0300=1EC1# LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE = LATIN SMALL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING GRAVE ACCENT
|
||||
0045+0302+0309=1EC2# LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE = LATIN CAPITAL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING HOOK ABOVE
|
||||
0065+0302+0309=1EC3# LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE = LATIN SMALL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING HOOK ABOVE
|
||||
0045+0302+0303=1EC4# LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE = LATIN CAPITAL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING TILDE
|
||||
0065+0302+0303=1EC5# LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE = LATIN SMALL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING TILDE
|
||||
0045+0302+0323=1EC6# LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW = LATIN CAPITAL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING DOT BELOW
|
||||
0065+0302+0323=1EC7# LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW = LATIN SMALL LETTER E + COMBINING CIRCUMFLEX ACCENT + COMBINING DOT BELOW
|
||||
0049+0309=1EC8# LATIN CAPITAL LETTER I WITH HOOK ABOVE = LATIN CAPITAL LETTER I + COMBINING HOOK ABOVE
|
||||
0069+0309=1EC9# LATIN SMALL LETTER I WITH HOOK ABOVE = LATIN SMALL LETTER I + COMBINING HOOK ABOVE
|
||||
0049+0323=1ECA# LATIN CAPITAL LETTER I WITH DOT BELOW = LATIN CAPITAL LETTER I + COMBINING DOT BELOW
|
||||
0069+0323=1ECB# LATIN SMALL LETTER I WITH DOT BELOW = LATIN SMALL LETTER I + COMBINING DOT BELOW
|
||||
004F+0323=1ECC# LATIN CAPITAL LETTER O WITH DOT BELOW = LATIN CAPITAL LETTER O + COMBINING DOT BELOW
|
||||
006F+0323=1ECD# LATIN SMALL LETTER O WITH DOT BELOW = LATIN SMALL LETTER O + COMBINING DOT BELOW
|
||||
004F+0309=1ECE# LATIN CAPITAL LETTER O WITH HOOK ABOVE = LATIN CAPITAL LETTER O + COMBINING HOOK ABOVE
|
||||
006F+0309=1ECF# LATIN SMALL LETTER O WITH HOOK ABOVE = LATIN SMALL LETTER O + COMBINING HOOK ABOVE
|
||||
004F+0302+0301=1ED0# LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE = LATIN CAPITAL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING ACUTE ACCENT
|
||||
006F+0302+0301=1ED1# LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE = LATIN SMALL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING ACUTE ACCENT
|
||||
004F+0302+0300=1ED2# LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE = LATIN CAPITAL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING GRAVE ACCENT
|
||||
006F+0302+0300=1ED3# LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE = LATIN SMALL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING GRAVE ACCENT
|
||||
004F+0302+0309=1ED4# LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE = LATIN CAPITAL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING HOOK ABOVE
|
||||
006F+0302+0309=1ED5# LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE = LATIN SMALL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING HOOK ABOVE
|
||||
004F+0302+0303=1ED6# LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE = LATIN CAPITAL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING TILDE
|
||||
006F+0302+0303=1ED7# LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE = LATIN SMALL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING TILDE
|
||||
004F+0302+0323=1ED8# LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW = LATIN CAPITAL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING DOT BELOW
|
||||
006F+0302+0323=1ED9# LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW = LATIN SMALL LETTER O + COMBINING CIRCUMFLEX ACCENT + COMBINING DOT BELOW
|
||||
004F+031B+0301=1EDA# LATIN CAPITAL LETTER O WITH HORN AND ACUTE = LATIN CAPITAL LETTER O + COMBINING HORN + COMBINING ACUTE ACCENT
|
||||
006F+031B+0301=1EDB# LATIN SMALL LETTER O WITH HORN AND ACUTE = LATIN SMALL LETTER O + COMBINING HORN + COMBINING ACUTE ACCENT
|
||||
004F+031B+0300=1EDC# LATIN CAPITAL LETTER O WITH HORN AND GRAVE = LATIN CAPITAL LETTER O + COMBINING HORN + COMBINING GRAVE ACCENT
|
||||
006F+031B+0300=1EDD# LATIN SMALL LETTER O WITH HORN AND GRAVE = LATIN SMALL LETTER O + COMBINING HORN + COMBINING GRAVE ACCENT
|
||||
004F+031B+0309=1EDE# LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE = LATIN CAPITAL LETTER O + COMBINING HORN + COMBINING HOOK ABOVE
|
||||
006F+031B+0309=1EDF# LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE = LATIN SMALL LETTER O + COMBINING HORN + COMBINING HOOK ABOVE
|
||||
004F+031B+0303=1EE0# LATIN CAPITAL LETTER O WITH HORN AND TILDE = LATIN CAPITAL LETTER O + COMBINING HORN + COMBINING TILDE
|
||||
006F+031B+0303=1EE1# LATIN SMALL LETTER O WITH HORN AND TILDE = LATIN SMALL LETTER O + COMBINING HORN + COMBINING TILDE
|
||||
004F+031B+0323=1EE2# LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW = LATIN CAPITAL LETTER O + COMBINING HORN + COMBINING DOT BELOW
|
||||
006F+031B+0323=1EE3# LATIN SMALL LETTER O WITH HORN AND DOT BELOW = LATIN SMALL LETTER O + COMBINING HORN + COMBINING DOT BELOW
|
||||
0055+0323=1EE4# LATIN CAPITAL LETTER U WITH DOT BELOW = LATIN CAPITAL LETTER U + COMBINING DOT BELOW
|
||||
0075+0323=1EE5# LATIN SMALL LETTER U WITH DOT BELOW = LATIN SMALL LETTER U + COMBINING DOT BELOW
|
||||
0055+0309=1EE6# LATIN CAPITAL LETTER U WITH HOOK ABOVE = LATIN CAPITAL LETTER U + COMBINING HOOK ABOVE
|
||||
0075+0309=1EE7# LATIN SMALL LETTER U WITH HOOK ABOVE = LATIN SMALL LETTER U + COMBINING HOOK ABOVE
|
||||
0055+031B+0301=1EE8# LATIN CAPITAL LETTER U WITH HORN AND ACUTE = LATIN CAPITAL LETTER U + COMBINING HORN + COMBINING ACUTE ACCENT
|
||||
0075+031B+0301=1EE9# LATIN SMALL LETTER U WITH HORN AND ACUTE = LATIN SMALL LETTER U + COMBINING HORN + COMBINING ACUTE ACCENT
|
||||
0055+031B+0300=1EEA# LATIN CAPITAL LETTER U WITH HORN AND GRAVE = LATIN CAPITAL LETTER U + COMBINING HORN + COMBINING GRAVE ACCENT
|
||||
0075+031B+0300=1EEB# LATIN SMALL LETTER U WITH HORN AND GRAVE = LATIN SMALL LETTER U + COMBINING HORN + COMBINING GRAVE ACCENT
|
||||
0055+031B+0309=1EEC# LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE = LATIN CAPITAL LETTER U + COMBINING HORN + COMBINING HOOK ABOVE
|
||||
0075+031B+0309=1EED# LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE = LATIN SMALL LETTER U + COMBINING HORN + COMBINING HOOK ABOVE
|
||||
0055+031B+0303=1EEE# LATIN CAPITAL LETTER U WITH HORN AND TILDE = LATIN CAPITAL LETTER U + COMBINING HORN + COMBINING TILDE
|
||||
0075+031B+0303=1EEF# LATIN SMALL LETTER U WITH HORN AND TILDE = LATIN SMALL LETTER U + COMBINING HORN + COMBINING TILDE
|
||||
0055+031B+0323=1EF0# LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW = LATIN CAPITAL LETTER U + COMBINING HORN + COMBINING DOT BELOW
|
||||
0075+031B+0323=1EF1# LATIN SMALL LETTER U WITH HORN AND DOT BELOW = LATIN SMALL LETTER U + COMBINING HORN + COMBINING DOT BELOW
|
||||
0059+0300=1EF2# LATIN CAPITAL LETTER Y WITH GRAVE = LATIN CAPITAL LETTER Y + COMBINING GRAVE ACCENT
|
||||
0079+0300=1EF3# LATIN SMALL LETTER Y WITH GRAVE = LATIN SMALL LETTER Y + COMBINING GRAVE ACCENT
|
||||
0059+0323=1EF4# LATIN CAPITAL LETTER Y WITH DOT BELOW = LATIN CAPITAL LETTER Y + COMBINING DOT BELOW
|
||||
0079+0323=1EF5# LATIN SMALL LETTER Y WITH DOT BELOW = LATIN SMALL LETTER Y + COMBINING DOT BELOW
|
||||
0059+0309=1EF6# LATIN CAPITAL LETTER Y WITH HOOK ABOVE = LATIN CAPITAL LETTER Y + COMBINING HOOK ABOVE
|
||||
0079+0309=1EF7# LATIN SMALL LETTER Y WITH HOOK ABOVE = LATIN SMALL LETTER Y + COMBINING HOOK ABOVE
|
||||
0059+0303=1EF8# LATIN CAPITAL LETTER Y WITH TILDE = LATIN CAPITAL LETTER Y + COMBINING TILDE
|
||||
0079+0303=1EF9# LATIN SMALL LETTER Y WITH TILDE = LATIN SMALL LETTER Y + COMBINING TILDE
|
||||
|
73
src/test/java/org/xbib/charset/AnselCharsetTest.java
Normal file
73
src/test/java/org/xbib/charset/AnselCharsetTest.java
Normal file
|
@ -0,0 +1,73 @@
|
|||
/*
|
||||
* Licensed to Jörg Prante and xbib under one or more contributor
|
||||
* license agreements. See the NOTICE.txt file distributed with this work
|
||||
* for additional information regarding copyright ownership.
|
||||
*
|
||||
* Copyright (C) 2016 Jörg Prante and xbib
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program; if not, see http://www.gnu.org/licenses
|
||||
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* The interactive user interfaces in modified source and object code
|
||||
* versions of this program must display Appropriate Legal Notices,
|
||||
* as required under Section 5 of the GNU Affero General Public License.
|
||||
*
|
||||
* In accordance with Section 7(b) of the GNU Affero General Public
|
||||
* License, these Appropriate Legal Notices must retain the display of the
|
||||
* "Powered by xbib" logo. If the display of the logo is not reasonably
|
||||
* feasible for technical reasons, the Appropriate Legal Notices must display
|
||||
* the words "Powered by xbib".
|
||||
*/
|
||||
package org.xbib.charset;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.text.Normalizer;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class AnselCharsetTest extends Assert {
|
||||
|
||||
@Test
|
||||
public void testAnsel() throws Exception {
|
||||
ByteBuffer buf = ByteBuffer.wrap("\u00e8\u0075".getBytes("ISO-8859-1"));
|
||||
Charset charset = Charset.forName("ANSEL");
|
||||
CharsetDecoder decoder = charset.newDecoder();
|
||||
CharBuffer cbuf = decoder.decode(buf);
|
||||
String s = cbuf.toString();
|
||||
assertEquals("\u0075\u0308", s);
|
||||
s = Normalizer.normalize(s, Normalizer.Form.NFC);
|
||||
assertEquals("ü", s);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAnsel2() throws Exception {
|
||||
ByteBuffer buf = ByteBuffer.wrap("\u00AC\u00E2\u0041\u00ED\u0042\u00E2\u0043\u00E2\u0044".getBytes("ISO-8859-1"));
|
||||
Charset charset = Charset.forName("ANSEL");
|
||||
CharsetDecoder decoder = charset.newDecoder();
|
||||
CharBuffer cbuf = decoder.decode(buf);
|
||||
String s = cbuf.toString();
|
||||
assertEquals(9, s.length());
|
||||
s = Normalizer.normalize(s, Normalizer.Form.NFC);
|
||||
assertEquals("ƠÁB̕ĆD́", s);
|
||||
assertEquals(7, s.length());
|
||||
}
|
||||
|
||||
}
|
56
src/test/java/org/xbib/charset/AnselCodeTableParserTest.java
Normal file
56
src/test/java/org/xbib/charset/AnselCodeTableParserTest.java
Normal file
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
* Licensed to Jörg Prante and xbib under one or more contributor
|
||||
* license agreements. See the NOTICE.txt file distributed with this work
|
||||
* for additional information regarding copyright ownership.
|
||||
*
|
||||
* Copyright (C) 2016 Jörg Prante and xbib
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program; if not, see http://www.gnu.org/licenses
|
||||
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* The interactive user interfaces in modified source and object code
|
||||
* versions of this program must display Appropriate Legal Notices,
|
||||
* as required under Section 5 of the GNU Affero General Public License.
|
||||
*
|
||||
* In accordance with Section 7(b) of the GNU Affero General Public
|
||||
* License, these Appropriate Legal Notices must retain the display of the
|
||||
* "Powered by xbib" logo. If the display of the logo is not reasonably
|
||||
* feasible for technical reasons, the Appropriate Legal Notices must display
|
||||
* the words "Powered by xbib".
|
||||
*/
|
||||
package org.xbib.charset;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.junit.Test;
|
||||
|
||||
import javax.xml.stream.XMLStreamException;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class AnselCodeTableParserTest {
|
||||
|
||||
private Logger logger = LogManager.getLogger(AnselCodeTableParserTest.class);
|
||||
|
||||
@Test
|
||||
public void test() throws XMLStreamException {
|
||||
AnselCodeTableParser anselCodeTableParser = new AnselCodeTableParser(getClass().getResourceAsStream("codetables.xml"));
|
||||
for (AnselCodeTableParser.CodeTable codeTable : anselCodeTableParser.getCodeTables()) {
|
||||
for (AnselCodeTableParser.CharacterSet characterSet : codeTable.getCharacterSets()) {
|
||||
logger.info("{} {}", characterSet.getName(), characterSet.getLength());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* Licensed to Jörg Prante and xbib under one or more contributor
|
||||
* license agreements. See the NOTICE.txt file distributed with this work
|
||||
* for additional information regarding copyright ownership.
|
||||
*
|
||||
* Copyright (C) 2016 Jörg Prante and xbib
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program; if not, see http://www.gnu.org/licenses
|
||||
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* The interactive user interfaces in modified source and object code
|
||||
* versions of this program must display Appropriate Legal Notices,
|
||||
* as required under Section 5 of the GNU Affero General Public License.
|
||||
*
|
||||
* In accordance with Section 7(b) of the GNU Affero General Public
|
||||
* License, these Appropriate Legal Notices must retain the display of the
|
||||
* "Powered by xbib" logo. If the display of the logo is not reasonably
|
||||
* feasible for technical reasons, the Appropriate Legal Notices must display
|
||||
* the words "Powered by xbib".
|
||||
*/
|
||||
package org.xbib.charset;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class BibliographicCharsetsTest {
|
||||
|
||||
@Test
|
||||
public void testAvailability() {
|
||||
Charset charset = BibliographicCharsets.ANSEL;
|
||||
charset = BibliographicCharsets.ISO5426;
|
||||
charset = BibliographicCharsets.ISO5428;
|
||||
charset = BibliographicCharsets.MAB;
|
||||
charset = BibliographicCharsets.MAB_DISKETTE;
|
||||
charset = BibliographicCharsets.PICA;
|
||||
}
|
||||
}
|
85
src/test/java/org/xbib/charset/ISO5426Test.java
Normal file
85
src/test/java/org/xbib/charset/ISO5426Test.java
Normal file
|
@ -0,0 +1,85 @@
|
|||
/*
|
||||
* Licensed to Jörg Prante and xbib under one or more contributor
|
||||
* license agreements. See the NOTICE.txt file distributed with this work
|
||||
* for additional information regarding copyright ownership.
|
||||
*
|
||||
* Copyright (C) 2016 Jörg Prante and xbib
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program; if not, see http://www.gnu.org/licenses
|
||||
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* The interactive user interfaces in modified source and object code
|
||||
* versions of this program must display Appropriate Legal Notices,
|
||||
* as required under Section 5 of the GNU Affero General Public License.
|
||||
*
|
||||
* In accordance with Section 7(b) of the GNU Affero General Public
|
||||
* License, these Appropriate Legal Notices must retain the display of the
|
||||
* "Powered by xbib" logo. If the display of the logo is not reasonably
|
||||
* feasible for technical reasons, the Appropriate Legal Notices must display
|
||||
* the words "Powered by xbib".
|
||||
*/
|
||||
package org.xbib.charset;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.SortedMap;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class ISO5426Test extends Assert {
|
||||
|
||||
@Test
|
||||
public void listCharsets() throws Exception {
|
||||
SortedMap<String, Charset> map = Charset.availableCharsets();
|
||||
assertTrue(map.keySet().contains("ISO-5426"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMAB2() throws Exception {
|
||||
ByteBuffer buf = ByteBuffer.wrap("Éa".getBytes(StandardCharsets.ISO_8859_1));
|
||||
Charset charset = Charset.forName("MAB2");
|
||||
CharsetDecoder decoder = charset.newDecoder();
|
||||
CharBuffer cbuf = decoder.decode(buf);
|
||||
String output = cbuf.toString();
|
||||
assertEquals(output, "ä");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testXMAB() throws Exception {
|
||||
ByteBuffer buf = ByteBuffer.wrap("Éa".getBytes(StandardCharsets.ISO_8859_1));
|
||||
Charset charset = Charset.forName("x-MAB");
|
||||
CharsetDecoder decoder = charset.newDecoder();
|
||||
CharBuffer cbuf = decoder.decode(buf);
|
||||
String output = cbuf.toString();
|
||||
assertEquals(output, "ä");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPound() throws Exception {
|
||||
ByteBuffer buf = ByteBuffer.wrap("\u00A3".getBytes(StandardCharsets.ISO_8859_1));
|
||||
Charset charset = Charset.forName("x-MAB");
|
||||
CharsetDecoder decoder = charset.newDecoder();
|
||||
CharBuffer cbuf = decoder.decode(buf);
|
||||
String output = cbuf.toString();
|
||||
assertEquals(output, "£");
|
||||
}
|
||||
|
||||
}
|
60
src/test/java/org/xbib/charset/NormalizerTest.java
Normal file
60
src/test/java/org/xbib/charset/NormalizerTest.java
Normal file
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
* Licensed to Jörg Prante and xbib under one or more contributor
|
||||
* license agreements. See the NOTICE.txt file distributed with this work
|
||||
* for additional information regarding copyright ownership.
|
||||
*
|
||||
* Copyright (C) 2016 Jörg Prante and xbib
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program; if not, see http://www.gnu.org/licenses
|
||||
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* The interactive user interfaces in modified source and object code
|
||||
* versions of this program must display Appropriate Legal Notices,
|
||||
* as required under Section 5 of the GNU Affero General Public License.
|
||||
*
|
||||
* In accordance with Section 7(b) of the GNU Affero General Public
|
||||
* License, these Appropriate Legal Notices must retain the display of the
|
||||
* "Powered by xbib" logo. If the display of the logo is not reasonably
|
||||
* feasible for technical reasons, the Appropriate Legal Notices must display
|
||||
* the words "Powered by xbib".
|
||||
*/
|
||||
package org.xbib.charset;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.text.Normalizer;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class NormalizerTest extends Assert {
|
||||
|
||||
@Test
|
||||
public void testNormalizer() throws Exception {
|
||||
byte[] b = new byte[]{(byte) 103, (byte) 101, (byte) 109, (byte) 97, (byte) 204, (byte) 136, (byte) 195, (byte) 159};
|
||||
String input = new String(b, "UTF-8");
|
||||
String norm = Normalizer.normalize(input, Normalizer.Form.NFC);
|
||||
assertEquals("gemäß", norm);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void tesNFC() {
|
||||
String s = "Für Bandanzeige bitte zugehörige Publikationen anklicken";
|
||||
assertEquals(56, s.length());
|
||||
String norm = Normalizer.normalize(s, Normalizer.Form.NFC);
|
||||
assertEquals(56, norm.length());
|
||||
}
|
||||
|
||||
}
|
64
src/test/java/org/xbib/charset/SimpleAnselCharsetTest.java
Normal file
64
src/test/java/org/xbib/charset/SimpleAnselCharsetTest.java
Normal file
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
* Licensed to Jörg Prante and xbib under one or more contributor
|
||||
* license agreements. See the NOTICE.txt file distributed with this work
|
||||
* for additional information regarding copyright ownership.
|
||||
*
|
||||
* Copyright (C) 2016 Jörg Prante and xbib
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program; if not, see http://www.gnu.org/licenses
|
||||
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* The interactive user interfaces in modified source and object code
|
||||
* versions of this program must display Appropriate Legal Notices,
|
||||
* as required under Section 5 of the GNU Affero General Public License.
|
||||
*
|
||||
* In accordance with Section 7(b) of the GNU Affero General Public
|
||||
* License, these Appropriate Legal Notices must retain the display of the
|
||||
* "Powered by xbib" logo. If the display of the logo is not reasonably
|
||||
* feasible for technical reasons, the Appropriate Legal Notices must display
|
||||
* the words "Powered by xbib".
|
||||
*/
|
||||
package org.xbib.charset;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.util.SortedMap;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class SimpleAnselCharsetTest extends Assert {
|
||||
|
||||
@Test
|
||||
public void listCharsets() throws Exception {
|
||||
SortedMap<String, Charset> map = Charset.availableCharsets();
|
||||
assertTrue(map.keySet().contains("SIMPLE_ANSEL"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleAnsel() throws Exception {
|
||||
ByteBuffer buf = ByteBuffer.wrap("\u00e8\u0075".getBytes("ISO-8859-1"));
|
||||
Charset charset = Charset.forName("SIMPLE_ANSEL");
|
||||
CharsetDecoder decoder = charset.newDecoder();
|
||||
CharBuffer cbuf = decoder.decode(buf);
|
||||
String output = cbuf.toString();
|
||||
assertEquals("\u00fc", output);
|
||||
}
|
||||
|
||||
}
|
4
src/test/java/org/xbib/charset/package-info.java
Normal file
4
src/test/java/org/xbib/charset/package-info.java
Normal file
|
@ -0,0 +1,4 @@
|
|||
/**
|
||||
* Bibliographic character set implementations.
|
||||
*/
|
||||
package org.xbib.charset;
|
13
src/test/resources/log4j2.xml
Normal file
13
src/test/resources/log4j2.xml
Normal file
|
@ -0,0 +1,13 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<configuration status="OFF">
|
||||
<appenders>
|
||||
<Console name="Console" target="SYSTEM_OUT">
|
||||
<PatternLayout pattern="[%d{ABSOLUTE}][%-5p][%-25c][%t] %m%n"/>
|
||||
</Console>
|
||||
</appenders>
|
||||
<Loggers>
|
||||
<Root level="info">
|
||||
<AppenderRef ref="Console" />
|
||||
</Root>
|
||||
</Loggers>
|
||||
</configuration>
|
Loading…
Reference in a new issue