allow the use of other charsets than UTF-8 when parsing URLs

This commit is contained in:
Jörg Prante 2019-08-29 16:04:46 +02:00
parent 98470dc638
commit 64ae0f60bf
3 changed files with 84 additions and 21 deletions

View file

@ -1,6 +1,6 @@
group = org.xbib group = org.xbib
name = net name = net
version = 2.0.0 version = 2.0.1
# test # test
junit.version = 5.5.1 junit.version = 5.5.1

View file

@ -11,6 +11,7 @@ import java.net.InetAddress;
import java.net.UnknownHostException; import java.net.UnknownHostException;
import java.nio.charset.CharacterCodingException; import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.MalformedInputException; import java.nio.charset.MalformedInputException;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.nio.charset.UnmappableCharacterException; import java.nio.charset.UnmappableCharacterException;
@ -239,7 +240,11 @@ public class URL implements Comparable<URL> {
} }
public static Parser parser() { public static Parser parser() {
return new Parser(); return new Parser(StandardCharsets.UTF_8, CodingErrorAction.REPORT);
}
public static Parser parser(Charset charset, CodingErrorAction codingErrorAction) {
return new Parser(charset, codingErrorAction);
} }
public static Resolver base(String base) { public static Resolver base(String base) {
@ -262,42 +267,71 @@ public class URL implements Comparable<URL> {
} }
public static URL from(String input) { public static URL from(String input) {
return from(input, true); return from(input, StandardCharsets.UTF_8, CodingErrorAction.REPORT, true, false);
} }
public static URL create(String input) { public static URL create(String input) {
return from(input, false); return from(input, StandardCharsets.UTF_8, CodingErrorAction.REPORT, false, false);
} }
public static URL from(String input, boolean resolve) { public static URL create(String input, boolean disableException) {
return from(input, StandardCharsets.UTF_8, CodingErrorAction.REPORT, false, disableException);
}
public static URL from(String input,
Charset charset, CodingErrorAction codingErrorAction,
boolean resolve, boolean disableException) {
try { try {
return parser().parse(input, resolve); return parser(charset, codingErrorAction).parse(input, resolve);
} catch (URLSyntaxException | MalformedInputException | UnmappableCharacterException e) { } catch (URLSyntaxException | MalformedInputException | UnmappableCharacterException e) {
throw new IllegalArgumentException(e); if (disableException) {
logger.log(Level.WARNING, e.getMessage(), e);
return null;
} else {
throw new IllegalArgumentException(e);
}
} }
} }
public URL resolve(String spec) { public URL resolve(String spec) {
return from(this, spec); return from(this, spec, false);
} }
public static URL from(URL base, String spec) { public URL resolve(String spec, boolean disableException) {
return from(this, spec, disableException);
}
public static URL from(URL base, String spec, boolean disableException) {
try { try {
return new Resolver(base).resolve(spec); return new Resolver(base).resolve(spec);
} catch (URLSyntaxException | MalformedInputException | UnmappableCharacterException e) { } catch (URLSyntaxException | MalformedInputException | UnmappableCharacterException e) {
throw new IllegalArgumentException(e); if (disableException) {
logger.log(Level.WARNING, e.getMessage(), e);
return null;
} else {
throw new IllegalArgumentException(e);
}
} }
} }
public URL resolve(URL spec) { public URL resolve(URL spec) {
return from(this, spec); return from(this, spec, false);
} }
public static URL from(URL base, URL spec) { public URL resolve(URL spec, boolean disableException) {
return from(this, spec, disableException);
}
public static URL from(URL base, URL spec, boolean disableException) {
try { try {
return new Resolver(base).resolve(spec); return new Resolver(base).resolve(spec);
} catch (URLSyntaxException e) { } catch (URLSyntaxException e) {
throw new IllegalArgumentException(e); if (disableException) {
logger.log(Level.WARNING, e.getMessage(), e);
return null;
} else {
throw new IllegalArgumentException(e);
}
} }
} }
@ -319,11 +353,20 @@ public class URL implements Comparable<URL> {
} }
public static QueryParameters parseQueryString(String query) { public static QueryParameters parseQueryString(String query) {
return parseQueryString(query, false);
}
public static QueryParameters parseQueryString(String query, boolean disableException) {
Objects.requireNonNull(query); Objects.requireNonNull(query);
try { try {
return URL.parser().parse(query.charAt(0) == QUESTION_CHAR ? query : QUESTION_CHAR + query).getQueryParams(); return URL.parser().parse(query.charAt(0) == QUESTION_CHAR ? query : QUESTION_CHAR + query).getQueryParams();
} catch (URLSyntaxException | MalformedInputException | UnmappableCharacterException e) { } catch (URLSyntaxException | MalformedInputException | UnmappableCharacterException e) {
throw new IllegalArgumentException(e); if (disableException) {
logger.log(Level.WARNING, e.getMessage(), e);
return null;
} else {
throw new IllegalArgumentException(e);
}
} }
} }
@ -774,13 +817,13 @@ public class URL implements Comparable<URL> {
/** /**
* The URL builder class is required for building an URL. It uses fluent API methods * The URL builder class is required for building an URL. It uses fluent API methods
* and pre-processes paralameter accordingly. * and pre-processes parameter accordingly.
*/ */
public static class Builder { public static class Builder {
private PercentEncoder regNameEncoder; private PercentEncoder regNameEncoder;
private final PercentDecoder percentDecoder; private PercentDecoder percentDecoder;
private final QueryParameters queryParams; private final QueryParameters queryParams;
@ -788,6 +831,8 @@ public class URL implements Comparable<URL> {
private Charset charset; private Charset charset;
private CodingErrorAction codingErrorAction;
private String scheme; private String scheme;
private String schemeSpecificPart; private String schemeSpecificPart;
@ -809,10 +854,9 @@ public class URL implements Comparable<URL> {
private boolean fatalResolveErrorsEnabled; private boolean fatalResolveErrorsEnabled;
private Builder() { private Builder() {
charset(StandardCharsets.UTF_8);
this.percentDecoder = new PercentDecoder();
this.queryParams = new QueryParameters(); this.queryParams = new QueryParameters();
this.pathSegments = new ArrayList<>(); this.pathSegments = new ArrayList<>();
charset(StandardCharsets.UTF_8, CodingErrorAction.REPLACE);
} }
/** /**
@ -820,9 +864,12 @@ public class URL implements Comparable<URL> {
* @param charset the chaarcter set * @param charset the chaarcter set
* @return this builder * @return this builder
*/ */
public Builder charset(Charset charset) { public Builder charset(Charset charset, CodingErrorAction codingErrorAction) {
this.charset = charset; this.charset = charset;
this.codingErrorAction = codingErrorAction;
this.regNameEncoder = PercentEncoders.getRegNameEncoder(charset); this.regNameEncoder = PercentEncoders.getRegNameEncoder(charset);
this.percentDecoder = new PercentDecoder(charset.newDecoder()
.onUnmappableCharacter(codingErrorAction));
return this; return this;
} }
@ -916,7 +963,7 @@ public class URL implements Comparable<URL> {
public Builder path(String path) { public Builder path(String path) {
try { try {
parser().parsePathWithQueryAndFragment(this, path); parser(charset, codingErrorAction).parsePathWithQueryAndFragment(this, path);
} catch (CharacterCodingException e) { } catch (CharacterCodingException e) {
throw new IllegalArgumentException(e); throw new IllegalArgumentException(e);
} }
@ -1044,8 +1091,9 @@ public class URL implements Comparable<URL> {
private final Builder builder; private final Builder builder;
private Parser() { private Parser(Charset charset, CodingErrorAction codingErrorAction) {
builder = new Builder(); builder = new Builder();
builder.charset(charset, codingErrorAction);
} }
public URL parse(String input) public URL parse(String input)
@ -1267,6 +1315,7 @@ public class URL implements Comparable<URL> {
if (relative.isEmpty()) { if (relative.isEmpty()) {
return base; return base;
} }
// TODO(jprante) parser(charset, codingErrorAction)
URL url = parser().parse(relative); URL url = parser().parse(relative);
return resolve(url); return resolve(url);
} }

View file

@ -3,6 +3,8 @@ package org.xbib.net;
import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.util.Iterator; import java.util.Iterator;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
@ -420,6 +422,18 @@ class URLParserTest {
assertEquals("foo:bar", url.getPassword()); assertEquals("foo:bar", url.getPassword());
} }
@Test
void testCharset() throws Exception {
// default parser uses UTF-8
Assertions.assertThrows(URLSyntaxException.class, () -> {
String string = "http%3A%2F%2Flibrary.fes.de%2Flibrary%2Fjournals%2Fde-part%2Fdas-rote-bl%E4ttla%2Findex.html";
URL url = URL.parser().parse(string);
});
String string = "http%3A%2F%2Flibrary.fes.de%2Flibrary%2Fjournals%2Fde-part%2Fdas-rote-bl%E4ttla%2Findex.html";
URL url = URL.parser(StandardCharsets.ISO_8859_1, CodingErrorAction.REPLACE).parse(string);
assertEquals("http://library.fes.de/library/journals/de-part/das-rote-blättla/index.html", url.toString());
}
private void assertUrlCompatibility(String url) throws Exception { private void assertUrlCompatibility(String url) throws Exception {
String s = URL.from(url).toExternalForm(); String s = URL.from(url).toExternalForm();
assertEquals(s, URL.from(s).toExternalForm()); assertEquals(s, URL.from(s).toExternalForm());