/* * Licensed to Jörg Prante and xbib under one or more contributor * license agreements. See the NOTICE.txt file distributed with this work * for additional information regarding copyright ownership. * * Copyright (C) 2016 Jörg Prante and xbib * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program; if not, see http://www.gnu.org/licenses * or write to the Free Software Foundation, Inc., 51 Franklin Street, * Fifth Floor, Boston, MA 02110-1301 USA. * * The interactive user interfaces in modified source and object code * versions of this program must display Appropriate Legal Notices, * as required under Section 5 of the GNU Affero General Public License. * * In accordance with Section 7(b) of the GNU Affero General Public * License, these Appropriate Legal Notices must retain the display of the * "Powered by xbib" logo. If the display of the logo is not reasonably * feasible for technical reasons, the Appropriate Legal Notices must display * the words "Powered by xbib". */ package org.xbib.charset; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; import java.nio.charset.CoderResult; import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.LinkedList; import java.util.Map; import java.util.Objects; import java.util.TreeMap; /** * This is a simplified version of "ANSEL charset" at http://anselcharset.sourceforge.net/ * by Piotr Andzel. * Original code licensed under LGPL http://www.gnu.org/licenses/lgpl.html */ public class SimpleAnselCharset extends Charset { private final Map mapping; private final Map reverseMapping; public SimpleAnselCharset() { super("SIMPLE_ANSEL", BibliographicCharsetProvider.aliasesFor("SIMPLE_ANSEL")); this.mapping = createMapping(getClass().getResourceAsStream("ansel-mapping.txt")); Objects.requireNonNull(this.mapping); reverseMapping = createReverseMapping(mapping); } private static Map createMapping(InputStream mappingStream) { Map mapping = new HashMap<>(); try (BufferedReader reader = new BufferedReader(new InputStreamReader(mappingStream, StandardCharsets.UTF_8))) { String line; while ((line = reader.readLine()) != null) { int i = line.indexOf(";"); if (i < 0) { i = line.indexOf("#"); } if (i >= 0) { line = line.substring(0, i); } String[] kvp = line.split("="); if (kvp.length == 2) { String uni = kvp[0]; String ans = kvp[1]; Character uniCode = (char) Integer.parseInt(uni.replaceFirst("^[uU]", ""), 16); String[] ansSeq = ans.split(" "); byte[] ansCodes = new byte[ansSeq.length]; for (int j = 0; j < ansSeq.length; j++) { ansCodes[j] = (byte) (Integer.parseInt(ansSeq[j].replaceFirst("^0[xX]", ""), 16) & 0xFF); } mapping.put(uniCode, ansCodes); } } return mapping; } catch (IOException e) { return null; } } @Override public boolean canEncode() { return true; } @Override public CharsetDecoder newDecoder() { return new Decoder(this, reverseMapping); } @Override public CharsetEncoder newEncoder() { return new Encoder(this); } @Override public boolean contains(Charset cs) { return displayName().equals(cs.displayName()); } private static Map createReverseMapping(Map mapping) { Map rev = new TreeMap<>(); for (Map.Entry e : mapping.entrySet()) { Map ptr = rev; Character ch = e.getKey(); for (int i = 0; i < e.getValue().length; i++) { Byte b = e.getValue()[i]; ReverseMappingEntity ent = ptr.get(b); if (ent == null) { ent = new ReverseMappingEntity(); ptr.put(b, ent); } if (i + 1 == e.getValue().length) { ent.setCharacter(ch); } else { ptr = ent.getMapping(); } } } return rev; } private static class ReverseMappingEntity { private final TreeMap mapping = new TreeMap<>(); private Character character; public Character getCharacter() { return character; } public void setCharacter(Character ch) { this.character = ch; } public Map getMapping() { return mapping; } } private static class Decoder extends CharsetDecoder { private final Map reverseMapping; private final LinkedList buffer = new LinkedList<>(); Decoder(Charset charset, Map reverseMapping) { super(charset, 2.2f, 3.0f); this.reverseMapping = reverseMapping; } @Override protected CoderResult decodeLoop(final ByteBuffer in, CharBuffer out) { ReverseMappingBuffer rmb = new ReverseMappingBuffer(reverseMapping, buffer) { @Override protected Byte onNextByte() { return in.hasRemaining() ? in.get() : null; } }; while (in.hasRemaining() || rmb.hasRemaining()) { if (out.hasRemaining()) { Character ch = rmb.nextCharacter(); out.append(ch); } else { return CoderResult.OVERFLOW; } } return CoderResult.UNDERFLOW; } } private class Encoder extends CharsetEncoder { Encoder(Charset charset) { super(charset, 2.2f, 3.0f); } @Override protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { while (in.hasRemaining()) { if (out.hasRemaining()) { char unicode = in.get(); byte[] ansel; if (unicode <= 0x7f) { ansel = new byte[2]; ansel[0] = (byte) ((unicode >> 8) & 0xff); ansel[1] = (byte) ((unicode) & 0xff); } else { ansel = mapping.get(unicode); if (ansel == null) { return CoderResult.unmappableForLength(2); } } boolean started = false; for (int i = 0; i < ansel.length; i++) { if (started || ansel[i] != 0 || i == ansel.length - 1) { out.put(ansel[i]); started = true; } } } else { return CoderResult.OVERFLOW; } } return CoderResult.UNDERFLOW; } } abstract static class ReverseMappingBuffer { private final Map rm; private final LinkedList buffer; ReverseMappingBuffer(Map rm, LinkedList buffer) { this.rm = rm; this.buffer = buffer; } boolean hasRemaining() { return !buffer.isEmpty(); } Character nextCharacter() { LinkedList queue = new LinkedList<>(); ReverseMappingEntity rme = null; Character ch = null; for (Byte b = nextByte(); b != null; b = nextByte()) { queue.addLast(b); rme = rme != null ? rme.getMapping().get(b) : rm.get(b); if (rme == null) { buffer.addAll(queue); return ch != null ? ch : Character.valueOf((char) nextByte().byteValue()); } if (rme.getCharacter() != null) { ch = rme.getCharacter(); queue.clear(); } } return ch; } protected abstract Byte onNextByte(); private Byte nextByte() { if (!buffer.isEmpty()) { return buffer.pollFirst(); } else { return onNextByte(); } } } }