Skip to content

Commit 95f3d49

Browse files
committed
[feature] Add a 'bom-action' attribute to the http request to allow control over how the BOM (if present) in the http response should be processed.
Closes #58
1 parent 9fea292 commit 95f3d49

13 files changed

Lines changed: 258 additions & 66 deletions

File tree

http-client-java/pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,11 @@
8989
<groupId>com.google.code.findbugs</groupId>
9090
<artifactId>jsr305</artifactId>
9191
</dependency>
92+
<dependency>
93+
<groupId>com.evolvedbinary.j8xu</groupId>
94+
<artifactId>j8xu</artifactId>
95+
<version>1.2.0</version>
96+
</dependency>
9297
<dependency>
9398
<groupId>junit</groupId>
9499
<artifactId>junit</artifactId>
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/****************************************************************************/
2+
/* File: org.expath.httpclient.BomAction.java */
3+
/* Author: Adam Retter - Evolved Binary Ltd */
4+
/* Date: 2026-06-16 */
5+
/* Tags: */
6+
/* Copyright (c) 2026 Evolved Binary Ltd (see end of file.) */
7+
/* ------------------------------------------------------------------------ */
8+
9+
10+
package org.expath.httpclient;
11+
12+
/**
13+
* Provides options for BOM (Byte Order Mark) handling.
14+
*/
15+
public enum BomAction {
16+
17+
/**
18+
* Raise and error if a BOM is present.
19+
*/
20+
ERROR,
21+
22+
/**
23+
* Preserve the BOM in the output, and don't use it to inform the charset.
24+
*/
25+
PRESERVE_IGNORE,
26+
27+
/**
28+
* Preserve the BOM in the output, and use it to override the charset.
29+
*/
30+
PRESERVE_OVERRIDE_CHARSET,
31+
32+
/**
33+
* Drop the BOM in the output, and don't use it to inform the charset.
34+
*/
35+
DROP_IGNORE,
36+
37+
/**
38+
* Drop the BOM in the output, and use it to override the charset.
39+
*/
40+
DROP_OVERRIDE_CHARSET;
41+
42+
/**
43+
* Get the BOM Action from an Argument string.
44+
*
45+
* @param str the argument string.
46+
*
47+
* @return the BOM action.
48+
*
49+
* @throws IllegalArgumentException if there is no BOM action for the supplied argument string.
50+
*/
51+
public static BomAction fromArgumentString(final String str) throws IllegalArgumentException {
52+
return valueOf(str.toUpperCase().replace("-", "_"));
53+
}
54+
55+
/**
56+
* Get the argument version of the name.
57+
*
58+
* @return the argument name.
59+
*/
60+
public String toArgumentString() {
61+
return name().toLowerCase().replace('_', '-');
62+
}
63+
64+
}
65+
66+
67+
/* ------------------------------------------------------------------------ */
68+
/* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS COMMENT. */
69+
/* */
70+
/* The contents of this file are subject to the Mozilla Public License */
71+
/* Version 1.0 (the "License"); you may not use this file except in */
72+
/* compliance with the License. You may obtain a copy of the License at */
73+
/* http://www.mozilla.org/MPL/. */
74+
/* */
75+
/* Software distributed under the License is distributed on an "AS IS" */
76+
/* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See */
77+
/* the License for the specific language governing rights and limitations */
78+
/* under the License. */
79+
/* */
80+
/* The Original Code is: all this file. */
81+
/* */
82+
/* The Initial Developer of the Original Code is Evolved Binary Ltd. */
83+
/* */
84+
/* Contributor(s): none. */
85+
/* ------------------------------------------------------------------------ */

http-client-java/src/main/java/org/expath/httpclient/HttpRequest.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ public HttpResponse send(Result result, HttpConnection conn, HttpCredentials cre
2828
public void setHttpVersion(String ver)
2929
throws HttpClientException;
3030
public void setDefaultCharset(String charset);
31+
public void setBomAction(BomAction bomAction);
3132
public void setOverrideType(String type);
3233
public void setHeaders(HeaderSet headers);
3334
public void setBody(HttpRequestBody body)
@@ -60,5 +61,5 @@ public void setBody(HttpRequestBody body)
6061
/* */
6162
/* The Initial Developer of the Original Code is Florent Georges. */
6263
/* */
63-
/* Contributor(s): none. */
64+
/* Contributor(s): Evolved Binary Ltd. */
6465
/* ------------------------------------------------------------------------ */

http-client-java/src/main/java/org/expath/httpclient/impl/BodyFactory.java

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.apache.hc.core5.http.HeaderElement;
1818
import org.apache.hc.core5.http.message.BasicHeader;
1919
import org.apache.hc.core5.http.message.MessageSupport;
20+
import org.expath.httpclient.BomAction;
2021
import org.expath.httpclient.ContentType;
2122
import org.expath.httpclient.HeaderSet;
2223
import org.expath.httpclient.HttpClientError;
@@ -64,7 +65,7 @@ public static HttpRequestBody makeRequestBody(final Element elem, final Sequence
6465
}
6566
}
6667

67-
public static HttpResponseBody makeResponseBody(final Result result, final ContentType type, final HttpConnection conn)
68+
public static HttpResponseBody makeResponseBody(final Result result, final ContentType type, final BomAction bomAction, final HttpConnection conn)
6869
throws HttpClientException {
6970
if (type == null) {
7071
// it is legitimate to not have a body in a response; for instance
@@ -80,26 +81,26 @@ public static HttpResponseBody makeResponseBody(final Result result, final Conte
8081
return null;
8182
}
8283
if (t.startsWith("multipart/")) {
83-
return new MultipartResponseBody(result, in, type);
84+
return new MultipartResponseBody(result, in, type, bomAction);
8485
} else {
85-
return makeResponsePart(result, null, in, type);
86+
return makeResponsePart(result, null, in, type, bomAction);
8687
}
8788
}
8889

8990
// package-level to be used within MultipartResponseBody ctor
9091
// TODO: Take new methods into account (XHTML, BASE64 and HEX).
91-
static HttpResponseBody makeResponsePart(final Result result, final HeaderSet headers, final InputStream in, final ContentType ctype)
92+
static HttpResponseBody makeResponsePart(final Result result, final HeaderSet headers, final InputStream in, final ContentType ctype, final BomAction bomAction)
9293
throws HttpClientException {
9394
switch (parseType(ctype)) {
9495
case XML:
9596
// TODO: 'content_type' is the header Content-Type without any param
9697
// (i.e. "text/xml".) Should we keep this, or put the whole header
9798
// (i.e. "text/xml; charset=utf-8")? (and for other types as well...)
98-
return new XmlResponseBody(result, in, ctype, headers, false);
99+
return new XmlResponseBody(result, in, ctype, bomAction, headers, false);
99100
case HTML:
100-
return new XmlResponseBody(result, in, ctype, headers, true);
101+
return new XmlResponseBody(result, in, ctype, bomAction, headers, true);
101102
case TEXT:
102-
return new TextResponseBody(result, in, ctype, headers);
103+
return new TextResponseBody(result, in, ctype, bomAction, headers);
103104
case BINARY:
104105
return new BinaryResponseBody(result, in, ctype, headers);
105106
default:
@@ -278,5 +279,5 @@ else if ("base64".equals(m)) {
278279
/* */
279280
/* The Initial Developer of the Original Code is Florent Georges. */
280281
/* */
281-
/* Contributor(s): none. */
282+
/* Contributor(s): Evolved Binary Ltd. */
282283
/* ------------------------------------------------------------------------ */

http-client-java/src/main/java/org/expath/httpclient/impl/HttpRequestImpl.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
package org.expath.httpclient.impl;
1111

12+
import org.expath.httpclient.BomAction;
1213
import org.slf4j.Logger;
1314
import org.slf4j.LoggerFactory;
1415
import org.apache.hc.core5.http.Header;
@@ -68,7 +69,7 @@ public HttpResponse send(Result result, HttpConnection conn, HttpCredentials cre
6869
LOG.debug("There is no Content-Type, we assume there is no content");
6970
}
7071
else {
71-
body = BodyFactory.makeResponseBody(result, type, conn);
72+
body = BodyFactory.makeResponseBody(result, type, myBomAction, conn);
7273
}
7374
}
7475
long stop = System.currentTimeMillis();
@@ -135,6 +136,11 @@ public void setDefaultCharset(final String charset) {
135136
myDefaultCharset = charset;
136137
}
137138

139+
@Override
140+
public void setBomAction(final BomAction bomAction) {
141+
myBomAction = bomAction;
142+
}
143+
138144
@Override
139145
public void setOverrideType(String type)
140146
{
@@ -210,6 +216,7 @@ public void setPreemptiveAuthentication(final boolean preemptiveAuthentication)
210216
private String myHref;
211217
private String myHttpVer;
212218
private String myDefaultCharset;
219+
private BomAction myBomAction = BomAction.PRESERVE_IGNORE;
213220
private String myOverrideType;
214221
private boolean myStatusOnly;
215222
private boolean myFollowRedirect = true;
@@ -240,5 +247,5 @@ public void setPreemptiveAuthentication(final boolean preemptiveAuthentication)
240247
/* */
241248
/* The Initial Developer of the Original Code is Florent Georges. */
242249
/* */
243-
/* Contributor(s): none. */
250+
/* Contributor(s): Evolved Binary Ltd. */
244251
/* ------------------------------------------------------------------------ */

http-client-java/src/main/java/org/expath/httpclient/impl/MultipartResponseBody.java

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,14 @@
3535
*/
3636
public class MultipartResponseBody implements HttpResponseBody {
3737

38-
public MultipartResponseBody(final Result result, final InputStream in, final ContentType type)
38+
public MultipartResponseBody(final Result result, final InputStream in, final ContentType type, final BomAction bomAction)
3939
throws HttpClientException {
4040
if (type == null || type.getType() == null) {
4141
throw new HttpClientException(HttpClientError.HC002, "No content type");
4242
}
4343

4444
myContentType = type;
45+
myBomAction = bomAction;
4546
myParts = new ArrayList<>();
4647

4748
myBoundary = type.getBoundary();
@@ -167,34 +168,31 @@ private HttpResponseBody makeResponsePart(final Result result, final HeaderSet h
167168
throw new HttpClientException(HttpClientError.HC002, "impossible to find the content type");
168169
}
169170
final ContentType type = ContentType.parse(h, null, null);
170-
try {
171-
switch (BodyFactory.parseType(type)) {
172-
case XML: {
173-
final Reader in = parser.getReader();
174-
return new XmlResponseBody(result, in, type, headers, false);
175-
}
176-
case HTML: {
177-
final Reader in = parser.getReader();
178-
return new XmlResponseBody(result, in, type, headers, true);
179-
}
180-
case TEXT: {
181-
final Reader in = parser.getReader();
182-
return new TextResponseBody(result, in, type, headers);
183-
}
184-
case BINARY: {
185-
final InputStream in = parser.getInputStream();
186-
return new BinaryResponseBody(result, in, type, headers);
187-
}
188-
default:
189-
throw new HttpClientException(HttpClientError.HC002, "INTERNAL ERROR: cannot happen");
171+
switch (BodyFactory.parseType(type)) {
172+
case XML: {
173+
final InputStream in = parser.getInputStream();
174+
return new XmlResponseBody(result, in, type, myBomAction, headers, false);
190175
}
191-
} catch (final UnsupportedEncodingException ex) {
192-
throw new HttpClientException(HttpClientError.HC002, "Unable to parse response part", ex);
176+
case HTML: {
177+
final InputStream in = parser.getInputStream();
178+
return new XmlResponseBody(result, in, type, myBomAction, headers, true);
179+
}
180+
case TEXT: {
181+
final InputStream in = parser.getInputStream();
182+
return new TextResponseBody(result, in, type, myBomAction, headers);
183+
}
184+
case BINARY: {
185+
final InputStream in = parser.getInputStream();
186+
return new BinaryResponseBody(result, in, type, headers);
187+
}
188+
default:
189+
throw new HttpClientException(HttpClientError.HC002, "INTERNAL ERROR: cannot happen");
193190
}
194191
}
195192

196193
private List<HttpResponseBody> myParts;
197194
private ContentType myContentType;
195+
private BomAction myBomAction;
198196
private String myBoundary;
199197
private static final Logger LOG = LoggerFactory.getLogger(MultipartResponseBody.class);
200198
}
@@ -217,5 +215,5 @@ private HttpResponseBody makeResponsePart(final Result result, final HeaderSet h
217215
/* */
218216
/* The Initial Developer of the Original Code is Florent Georges. */
219217
/* */
220-
/* Contributor(s): none. */
218+
/* Contributor(s): Evolved Binary Ltd. */
221219
/* ------------------------------------------------------------------------ */

http-client-java/src/main/java/org/expath/httpclient/impl/RequestParser.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,13 @@ else if ( "send-authorization".equals(local) ) {
113113
else if ( "default-charset".equals(local) ) {
114114
req.setDefaultCharset(a.getValue());
115115
}
116+
else if ( "bom-action".equals(local)) {
117+
try {
118+
req.setBomAction(BomAction.fromArgumentString(a.getValue()));
119+
} catch (final IllegalArgumentException e) {
120+
throw new HttpClientException(HttpClientError.HC005, "Unknown http:request/@bom-action" + a.getValue());
121+
}
122+
}
116123
else if ( "override-media-type".equals(local) ) {
117124
req.setOverrideType(a.getValue());
118125
}
@@ -298,5 +305,5 @@ private int toInteger(Attribute a)
298305
/* */
299306
/* The Initial Developer of the Original Code is Florent Georges. */
300307
/* */
301-
/* Contributor(s): none. */
308+
/* Contributor(s): Evolved Binary Ltd. */
302309
/* ------------------------------------------------------------------------ */

http-client-java/src/main/java/org/expath/httpclient/impl/TextResponseBody.java

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,15 @@
1313
import java.nio.charset.Charset;
1414
import java.nio.charset.StandardCharsets;
1515

16+
import com.evolvedbinary.j8xu.BOM;
17+
import com.evolvedbinary.j8xu.io.BomFilterInputStream;
1618
import org.expath.httpclient.*;
1719
import org.expath.httpclient.model.Result;
1820
import org.expath.httpclient.model.TreeBuilder;
1921
import org.expath.tools.ToolsException;
2022

23+
import javax.annotation.Nullable;
24+
2125
/**
2226
* A text body in the response.
2327
*
@@ -27,35 +31,46 @@ public class TextResponseBody implements HttpResponseBody {
2731

2832
public static final Charset DEFAULT_HTTP_TEXT_CHARSET = StandardCharsets.ISO_8859_1;
2933

30-
public TextResponseBody(final Result result, final InputStream in, final ContentType type, final HeaderSet headers)
34+
public TextResponseBody(final Result result, InputStream in, final ContentType type, final BomAction bomAction, final HeaderSet headers)
3135
throws HttpClientException {
3236
myContentType = type;
3337
myHeaders = headers;
3438

35-
final Charset contentCharset;
39+
Charset contentCharset;
3640
if (type.getCharset() != null) {
3741
contentCharset = Charset.forName(type.getCharset());
3842
} else {
3943
contentCharset = DEFAULT_HTTP_TEXT_CHARSET;
4044
}
4145

42-
final Reader reader = new InputStreamReader(in, contentCharset);
43-
result.add(reader, contentCharset);
44-
}
45-
46-
public TextResponseBody(final Result result, final Reader in, final ContentType type, final HeaderSet headers)
47-
throws HttpClientException {
48-
myContentType = type;
49-
myHeaders = headers;
50-
51-
final Charset contentCharset;
52-
if (type.getCharset() != null) {
53-
contentCharset = Charset.forName(type.getCharset());
54-
} else {
55-
contentCharset = DEFAULT_HTTP_TEXT_CHARSET;
46+
if (bomAction != BomAction.PRESERVE_IGNORE) {
47+
final BomFilterInputStream bomIn = new BomFilterInputStream(in);
48+
try {
49+
@Nullable final BOM bom = bomIn.parseBom();
50+
51+
if (bom != null) {
52+
if (bomAction == BomAction.ERROR) {
53+
throw new HttpClientException(HttpClientError.HC002, "bom-action='error' but found BOM: " + bom.name());
54+
}
55+
56+
if (bomAction == BomAction.DROP_IGNORE) {
57+
bomIn.skip(bom.getBomBytes().length);
58+
} else if (bomAction == BomAction.DROP_OVERRIDE_CHARSET) {
59+
contentCharset = bom.getCharset();
60+
bomIn.skip(bom.getBomBytes().length);
61+
} else if (bomAction == BomAction.PRESERVE_OVERRIDE_CHARSET) {
62+
contentCharset = bom.getCharset();
63+
}
64+
}
65+
} catch (final IOException e) {
66+
throw new HttpClientException(HttpClientError.HC002, "Unable to parser BOM: " + e.getMessage(), e);
67+
}
68+
69+
in = bomIn;
5670
}
5771

58-
result.add(in, contentCharset);
72+
final Reader reader = new InputStreamReader(in, contentCharset);
73+
result.add(reader, contentCharset);
5974
}
6075

6176
@Override
@@ -96,5 +111,5 @@ public void outputBody(final TreeBuilder b) throws HttpClientException {
96111
/* */
97112
/* The Initial Developer of the Original Code is Florent Georges. */
98113
/* */
99-
/* Contributor(s): none. */
114+
/* Contributor(s): Evolved Binary Ltd. */
100115
/* ------------------------------------------------------------------------ */

0 commit comments

Comments
 (0)