allow the use of other charsets than UTF-8 when parsing URLs

This commit is contained in:
Jörg Prante 2019-08-29 16:04:46 +02:00
parent 98470dc638
commit 64ae0f60bf
3 changed files with 84 additions and 21 deletions

View file

@ -1,6 +1,6 @@
group = org.xbib
name = net
version = 2.0.0
version = 2.0.1
# test
junit.version = 5.5.1

View file

@ -11,6 +11,7 @@ import java.net.InetAddress;
import java.net.UnknownHostException;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.MalformedInputException;
import java.nio.charset.StandardCharsets;
import java.nio.charset.UnmappableCharacterException;
@ -239,7 +240,11 @@ public class URL implements Comparable<URL> {
}
public static Parser parser() {
return new Parser();
return new Parser(StandardCharsets.UTF_8, CodingErrorAction.REPORT);
}
public static Parser parser(Charset charset, CodingErrorAction codingErrorAction) {
return new Parser(charset, codingErrorAction);
}
public static Resolver base(String base) {
@ -262,42 +267,71 @@ public class URL implements Comparable<URL> {
}
public static URL from(String input) {
return from(input, true);
return from(input, StandardCharsets.UTF_8, CodingErrorAction.REPORT, true, false);
}
public static URL create(String input) {
return from(input, false);
return from(input, StandardCharsets.UTF_8, CodingErrorAction.REPORT, false, false);
}
public static URL from(String input, boolean resolve) {
public static URL create(String input, boolean disableException) {
return from(input, StandardCharsets.UTF_8, CodingErrorAction.REPORT, false, disableException);
}
public static URL from(String input,
Charset charset, CodingErrorAction codingErrorAction,
boolean resolve, boolean disableException) {
try {
return parser().parse(input, resolve);
return parser(charset, codingErrorAction).parse(input, resolve);
} catch (URLSyntaxException | MalformedInputException | UnmappableCharacterException e) {
throw new IllegalArgumentException(e);
if (disableException) {
logger.log(Level.WARNING, e.getMessage(), e);
return null;
} else {
throw new IllegalArgumentException(e);
}
}
}
public URL resolve(String spec) {
return from(this, spec);
return from(this, spec, false);
}
public static URL from(URL base, String spec) {
public URL resolve(String spec, boolean disableException) {
return from(this, spec, disableException);
}
public static URL from(URL base, String spec, boolean disableException) {
try {
return new Resolver(base).resolve(spec);
} catch (URLSyntaxException | MalformedInputException | UnmappableCharacterException e) {
throw new IllegalArgumentException(e);
if (disableException) {
logger.log(Level.WARNING, e.getMessage(), e);
return null;
} else {
throw new IllegalArgumentException(e);
}
}
}
public URL resolve(URL spec) {
return from(this, spec);
return from(this, spec, false);
}
public static URL from(URL base, URL spec) {
public URL resolve(URL spec, boolean disableException) {
return from(this, spec, disableException);
}
public static URL from(URL base, URL spec, boolean disableException) {
try {
return new Resolver(base).resolve(spec);
} catch (URLSyntaxException e) {
throw new IllegalArgumentException(e);
if (disableException) {
logger.log(Level.WARNING, e.getMessage(), e);
return null;
} else {
throw new IllegalArgumentException(e);
}
}
}
@ -319,11 +353,20 @@ public class URL implements Comparable<URL> {
}
public static QueryParameters parseQueryString(String query) {
return parseQueryString(query, false);
}
public static QueryParameters parseQueryString(String query, boolean disableException) {
Objects.requireNonNull(query);
try {
return URL.parser().parse(query.charAt(0) == QUESTION_CHAR ? query : QUESTION_CHAR + query).getQueryParams();
} catch (URLSyntaxException | MalformedInputException | UnmappableCharacterException e) {
throw new IllegalArgumentException(e);
if (disableException) {
logger.log(Level.WARNING, e.getMessage(), e);
return null;
} else {
throw new IllegalArgumentException(e);
}
}
}
@ -774,13 +817,13 @@ public class URL implements Comparable<URL> {
/**
* The URL builder class is required for building an URL. It uses fluent API methods
* and pre-processes paralameter accordingly.
* and pre-processes parameter accordingly.
*/
public static class Builder {
private PercentEncoder regNameEncoder;
private final PercentDecoder percentDecoder;
private PercentDecoder percentDecoder;
private final QueryParameters queryParams;
@ -788,6 +831,8 @@ public class URL implements Comparable<URL> {
private Charset charset;
private CodingErrorAction codingErrorAction;
private String scheme;
private String schemeSpecificPart;
@ -809,10 +854,9 @@ public class URL implements Comparable<URL> {
private boolean fatalResolveErrorsEnabled;
private Builder() {
charset(StandardCharsets.UTF_8);
this.percentDecoder = new PercentDecoder();
this.queryParams = new QueryParameters();
this.pathSegments = new ArrayList<>();
charset(StandardCharsets.UTF_8, CodingErrorAction.REPLACE);
}
/**
@ -820,9 +864,12 @@ public class URL implements Comparable<URL> {
* @param charset the chaarcter set
* @return this builder
*/
public Builder charset(Charset charset) {
public Builder charset(Charset charset, CodingErrorAction codingErrorAction) {
this.charset = charset;
this.codingErrorAction = codingErrorAction;
this.regNameEncoder = PercentEncoders.getRegNameEncoder(charset);
this.percentDecoder = new PercentDecoder(charset.newDecoder()
.onUnmappableCharacter(codingErrorAction));
return this;
}
@ -916,7 +963,7 @@ public class URL implements Comparable<URL> {
public Builder path(String path) {
try {
parser().parsePathWithQueryAndFragment(this, path);
parser(charset, codingErrorAction).parsePathWithQueryAndFragment(this, path);
} catch (CharacterCodingException e) {
throw new IllegalArgumentException(e);
}
@ -1044,8 +1091,9 @@ public class URL implements Comparable<URL> {
private final Builder builder;
private Parser() {
private Parser(Charset charset, CodingErrorAction codingErrorAction) {
builder = new Builder();
builder.charset(charset, codingErrorAction);
}
public URL parse(String input)
@ -1267,6 +1315,7 @@ public class URL implements Comparable<URL> {
if (relative.isEmpty()) {
return base;
}
// TODO(jprante) parser(charset, codingErrorAction)
URL url = parser().parse(relative);
return resolve(url);
}

View file

@ -3,6 +3,8 @@ package org.xbib.net;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.util.Iterator;
import static org.junit.jupiter.api.Assertions.assertEquals;
@ -420,6 +422,18 @@ class URLParserTest {
assertEquals("foo:bar", url.getPassword());
}
@Test
void testCharset() throws Exception {
// default parser uses UTF-8
Assertions.assertThrows(URLSyntaxException.class, () -> {
String string = "http%3A%2F%2Flibrary.fes.de%2Flibrary%2Fjournals%2Fde-part%2Fdas-rote-bl%E4ttla%2Findex.html";
URL url = URL.parser().parse(string);
});
String string = "http%3A%2F%2Flibrary.fes.de%2Flibrary%2Fjournals%2Fde-part%2Fdas-rote-bl%E4ttla%2Findex.html";
URL url = URL.parser(StandardCharsets.ISO_8859_1, CodingErrorAction.REPLACE).parse(string);
assertEquals("http://library.fes.de/library/journals/de-part/das-rote-blättla/index.html", url.toString());
}
private void assertUrlCompatibility(String url) throws Exception {
String s = URL.from(url).toExternalForm();
assertEquals(s, URL.from(s).toExternalForm());