bibliographic-character-sets/src/main/java/org/xbib/charset/SimpleAnselCharset.java

274 lines
9.5 KiB
Java
Raw Normal View History

2016-08-18 20:39:56 +02:00
/*
* Licensed to Jörg Prante and xbib under one or more contributor
* license agreements. See the NOTICE.txt file distributed with this work
* for additional information regarding copyright ownership.
*
* Copyright (C) 2016 Jörg Prante and xbib
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public
* License, these Appropriate Legal Notices must retain the display of the
* "Powered by xbib" logo. If the display of the logo is not reasonably
* feasible for technical reasons, the Appropriate Legal Notices must display
* the words "Powered by xbib".
*/
package org.xbib.charset;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
import java.util.Objects;
2016-08-18 20:39:56 +02:00
import java.util.TreeMap;
/**
* This is a simplified version of "ANSEL charset" at http://anselcharset.sourceforge.net/
* by Piotr Andzel.
* Original code licensed under LGPL http://www.gnu.org/licenses/lgpl.html
*/
public class SimpleAnselCharset extends Charset {
private final Map<Character, byte[]> mapping;
2016-08-18 20:39:56 +02:00
private final Map<Byte, ReverseMappingEntity> reverseMapping;
public SimpleAnselCharset() {
super("SIMPLE_ANSEL", BibliographicCharsetProvider.aliasesFor("SIMPLE_ANSEL"));
this.mapping = createMapping(getClass().getResourceAsStream("ansel-mapping.txt"));
Objects.requireNonNull(this.mapping);
2016-08-18 20:39:56 +02:00
reverseMapping = createReverseMapping(mapping);
}
private static Map<Character, byte[]> createMapping(InputStream mappingStream) {
Map<Character, byte[]> mapping = new HashMap<>();
try (BufferedReader reader = new BufferedReader(new InputStreamReader(mappingStream, StandardCharsets.UTF_8))) {
String line;
while ((line = reader.readLine()) != null) {
int i = line.indexOf(";");
if (i < 0) {
i = line.indexOf("#");
}
if (i >= 0) {
line = line.substring(0, i);
}
String[] kvp = line.split("=");
if (kvp.length == 2) {
String uni = kvp[0];
String ans = kvp[1];
Character uniCode = (char) Integer.parseInt(uni.replaceFirst("^[uU]", ""), 16);
String[] ansSeq = ans.split(" ");
byte[] ansCodes = new byte[ansSeq.length];
for (int j = 0; j < ansSeq.length; j++) {
ansCodes[j] = (byte) (Integer.parseInt(ansSeq[j].replaceFirst("^0[xX]", ""), 16) & 0xFF);
}
mapping.put(uniCode, ansCodes);
}
}
return mapping;
2016-08-18 20:39:56 +02:00
} catch (IOException e) {
return null;
2016-08-18 20:39:56 +02:00
}
}
@Override
public boolean canEncode() {
return true;
}
@Override
public CharsetDecoder newDecoder() {
return new Decoder(this, reverseMapping);
}
@Override
public CharsetEncoder newEncoder() {
return new Encoder(this);
}
@Override
public boolean contains(Charset cs) {
return displayName().equals(cs.displayName());
2016-08-18 20:39:56 +02:00
}
private static Map<Byte, ReverseMappingEntity> createReverseMapping(Map<Character, byte[]> mapping) {
Map<Byte, ReverseMappingEntity> rev = new TreeMap<>();
for (Map.Entry<Character, byte[]> e : mapping.entrySet()) {
Map<Byte, ReverseMappingEntity> ptr = rev;
Character ch = e.getKey();
for (int i = 0; i < e.getValue().length; i++) {
Byte b = e.getValue()[i];
ReverseMappingEntity ent = ptr.get(b);
if (ent == null) {
ent = new ReverseMappingEntity();
ptr.put(b, ent);
}
if (i + 1 == e.getValue().length) {
ent.setCharacter(ch);
} else {
ptr = ent.getMapping();
}
}
}
return rev;
}
private static class ReverseMappingEntity {
2016-08-18 20:39:56 +02:00
private final TreeMap<Byte, ReverseMappingEntity> mapping = new TreeMap<>();
2016-08-18 20:39:56 +02:00
private Character character;
public Character getCharacter() {
return character;
}
public void setCharacter(Character ch) {
this.character = ch;
}
public Map<Byte, ReverseMappingEntity> getMapping() {
return mapping;
}
}
private static class Decoder extends CharsetDecoder {
private final Map<Byte, ReverseMappingEntity> reverseMapping;
private final LinkedList<Byte> buffer = new LinkedList<>();
2016-08-18 20:39:56 +02:00
Decoder(Charset charset, Map<Byte, ReverseMappingEntity> reverseMapping) {
2016-08-18 20:39:56 +02:00
super(charset, 2.2f, 3.0f);
this.reverseMapping = reverseMapping;
2016-08-18 20:39:56 +02:00
}
@Override
protected CoderResult decodeLoop(final ByteBuffer in, CharBuffer out) {
ReverseMappingBuffer rmb = new ReverseMappingBuffer(reverseMapping, buffer) {
@Override
protected Byte onNextByte() {
return in.hasRemaining() ? in.get() : null;
}
};
while (in.hasRemaining() || rmb.hasRemaining()) {
if (out.hasRemaining()) {
Character ch = rmb.nextCharacter();
out.append(ch);
} else {
return CoderResult.OVERFLOW;
}
}
return CoderResult.UNDERFLOW;
}
}
private class Encoder extends CharsetEncoder {
Encoder(Charset charset) {
super(charset, 2.2f, 3.0f);
}
@Override
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
while (in.hasRemaining()) {
if (out.hasRemaining()) {
char unicode = in.get();
byte[] ansel;
if (unicode <= 0x7f) {
ansel = new byte[2];
ansel[0] = (byte) ((unicode >> 8) & 0xff);
ansel[1] = (byte) ((unicode) & 0xff);
} else {
ansel = mapping.get(unicode);
if (ansel == null) {
return CoderResult.unmappableForLength(2);
}
}
boolean started = false;
for (int i = 0; i < ansel.length; i++) {
if (started || ansel[i] != 0 || i == ansel.length - 1) {
out.put(ansel[i]);
started = true;
}
}
} else {
return CoderResult.OVERFLOW;
}
}
return CoderResult.UNDERFLOW;
}
}
abstract static class ReverseMappingBuffer {
private final Map<Byte, ReverseMappingEntity> rm;
private final LinkedList<Byte> buffer;
2016-08-18 20:39:56 +02:00
ReverseMappingBuffer(Map<Byte, ReverseMappingEntity> rm, LinkedList<Byte> buffer) {
this.rm = rm;
this.buffer = buffer;
}
boolean hasRemaining() {
return !buffer.isEmpty();
}
Character nextCharacter() {
LinkedList<Byte> queue = new LinkedList<>();
ReverseMappingEntity rme = null;
Character ch = null;
for (Byte b = nextByte(); b != null; b = nextByte()) {
queue.addLast(b);
rme = rme != null ? rme.getMapping().get(b) : rm.get(b);
if (rme == null) {
buffer.addAll(queue);
return ch != null ? ch : Character.valueOf((char) nextByte().byteValue());
}
if (rme.getCharacter() != null) {
ch = rme.getCharacter();
queue.clear();
}
}
return ch;
}
protected abstract Byte onNextByte();
private Byte nextByte() {
if (!buffer.isEmpty()) {
return buffer.pollFirst();
} else {
return onNextByte();
}
}
}
}