Skip to content

Commit a3c736d

Browse files
committed
feat: extend proper URL checking to more places (like CSS)
This commit: - extracts the URL checking logic from `BaseURLHandler` (SAX handler) to its own independent class `URLChecker` - applies the same URL-checking logic to CSS as was used in XML, using the new `URLChecker` - applies the URL-checking logic to the few places were it wasn't the case already (in XML handlers)
1 parent 5b5391f commit a3c736d

File tree

7 files changed

+171
-198
lines changed

7 files changed

+171
-198
lines changed

src/main/java/com/adobe/epubcheck/css/CSSHandler.java

Lines changed: 13 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
package com.adobe.epubcheck.css;
22

33
import java.util.EnumSet;
4+
import java.util.HashMap;
45
import java.util.List;
6+
import java.util.Map;
57
import java.util.Set;
68
import java.util.regex.Matcher;
79
import java.util.regex.Pattern;
@@ -17,6 +19,7 @@
1719
import org.idpf.epubcheck.util.css.CssGrammar.CssSelector;
1820
import org.idpf.epubcheck.util.css.CssGrammar.CssURI;
1921
import org.idpf.epubcheck.util.css.CssLocation;
22+
import org.w3c.epubcheck.url.URLChecker;
2023

2124
import com.adobe.epubcheck.api.EPUBLocation;
2225
import com.adobe.epubcheck.api.Report;
@@ -34,7 +37,6 @@
3437
import com.google.common.base.CharMatcher;
3538
import com.google.common.collect.Sets;
3639

37-
import io.mola.galimatias.GalimatiasParseException;
3840
import io.mola.galimatias.URL;
3941

4042
public class CSSHandler implements CssContentHandler, CssErrorHandler
@@ -47,6 +49,10 @@ public class CSSHandler implements CssContentHandler, CssErrorHandler
4749
int startingColumnNumber = 0;
4850
static final CharMatcher SPACE_AND_QUOTES = CharMatcher.anyOf(" \t\n\r\f\"'").precomputed();
4951

52+
// map to store parsed URLs
53+
Map<String, URL> parsedURLs = new HashMap<>();
54+
final URLChecker urlChecker;
55+
5056
// vars for font-face info
5157
String fontFamily;
5258
String fontStyle;
@@ -66,6 +72,7 @@ public CSSHandler(ValidationContext context)
6672
this.xrefChecker = context.xrefChecker.orNull();
6773
this.report = context.report;
6874
this.version = context.version;
75+
this.urlChecker = new URLChecker(context);
6976
}
7077

7178
private EPUBLocation getCorrectedEPUBLocation(int lineNumber, int columnNumber, String details)
@@ -314,20 +321,7 @@ else if (propertyName.equals("src"))
314321
{
315322
if (construct.getType() == CssConstruct.Type.URI)
316323
{
317-
fontURI = ((CssURI) construct).toUriString();
318-
319-
// TODO implement more URL checks (like in BaseURLHandler)
320-
URL fontURL = null;
321-
try
322-
{
323-
fontURL = context.url.resolve(fontURI);
324-
} catch (GalimatiasParseException e)
325-
{
326-
report.message(MessageId.RSC_020,
327-
getCorrectedEPUBLocation(declaration.getLocation().getLine(),
328-
declaration.getLocation().getColumn(), declaration.toCssString()),
329-
fontURI, e.getLocalizedMessage());
330-
}
324+
URL fontURL = parsedURLs.get(((CssURI) construct).toUriString());
331325
if (fontURL != null)
332326
{
333327
// check font mimetypes
@@ -348,7 +342,7 @@ else if (version == EPUBVersion.VERSION_3)
348342
report.message(MessageId.CSS_007,
349343
getCorrectedEPUBLocation(declaration.getLocation().getLine(),
350344
declaration.getLocation().getColumn(), declaration.toCssString()),
351-
fontURI, fontMimeType);
345+
fontURL, fontMimeType);
352346
}
353347
}
354348

@@ -388,17 +382,10 @@ private void resolveAndRegister(String uriString, int line, int col, String cssC
388382
// we ignore this case
389383
if (!uriString.startsWith("#"))
390384
{
385+
// Check the URL once and store the parsed URL for later reference
386+
URL url = urlChecker.checkURL(uriString, getCorrectedEPUBLocation(line, col, cssContext));
387+
parsedURLs.put(uriString, url);
391388

392-
// TODO implement more URL checks (like in BaseURLHandler)
393-
URL url = null;
394-
try
395-
{
396-
url = context.url.resolve(uriString);
397-
} catch (GalimatiasParseException e)
398-
{
399-
report.message(MessageId.RSC_020, getCorrectedEPUBLocation(line, col, cssContext),
400-
uriString, e.getLocalizedMessage());
401-
}
402389
if (url != null)
403390
{
404391
xrefChecker.registerReference(url, type, getCorrectedEPUBLocation(line, col, cssContext));

src/main/java/com/adobe/epubcheck/ocf/OCFContainerFileHandler.java

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -82,19 +82,12 @@ else if (fullPath.trim().isEmpty())
8282
return;
8383
}
8484

85-
try
85+
// Parse the rootfile URL
86+
URL rootfileURL = checkURL(fullPath);
87+
if (rootfileURL != null)
8688
{
87-
// Parse the rootfile URL
88-
URL rootfileURL = URL.parse(baseURL(), fullPath);
89-
9089
// Register the parsed rootfile entry to the data model
9190
state.addRootfile(mediaType, rootfileURL);
92-
93-
} catch (GalimatiasParseException e)
94-
{
95-
// FIXME 2022 - test this is reported
96-
report.message(MessageId.RSC_020, location(), fullPath);
97-
return;
9891
}
9992
}
10093

@@ -107,19 +100,14 @@ private void processMappingDoc()
107100
&& !Strings.nullToEmpty(href).trim().isEmpty())
108101
{
109102

110-
try
103+
// Parse the href attribute against the container root URL
104+
URL mappingDocURL = checkURL(href);
105+
if (mappingDocURL != null)
111106
{
112-
// Parse the href attribute against the container root URL
113-
URL mappingDocURL = URL.parse(baseURL(), href);
114-
115107
// Register the parsed mapping document entry to the data model
116108
state.addMappingDocument(mappingDocURL);
117-
} catch (GalimatiasParseException e)
118-
{
119-
// FIXME 2022 - test this is reported
120-
report.message(MessageId.RSC_020, location(), href);
121-
return;
122109
}
110+
123111
}
124112
}
125113

src/main/java/com/adobe/epubcheck/opf/OPFHandler.java

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@
4646
import com.google.common.collect.ImmutableList;
4747
import com.google.common.collect.Maps;
4848

49-
import io.mola.galimatias.GalimatiasParseException;
5049
import io.mola.galimatias.URL;
5150

5251
public class OPFHandler extends XMLHandler
@@ -304,22 +303,11 @@ else if (name.equals("reference"))
304303
String href = e.getAttribute("href");
305304
if (href != null && context.xrefChecker.isPresent())
306305
{
307-
308-
// FIXME next test URL string is conforming, better test remote URLs
309-
if (href.matches("^[^:/?#]+://.*"))
310-
{
306+
URL url = checkURL(href);
307+
if (context.isRemote(url)) {
311308
report.info(path, FeatureEnum.REFERENCE, href);
312309
}
313310

314-
URL url;
315-
try
316-
{
317-
url = baseURL().resolve(href);
318-
} catch (GalimatiasParseException e1)
319-
{
320-
report.message(MessageId.RSC_020, location(), href);
321-
return;
322-
}
323311
try
324312
{
325313
context.xrefChecker.get().registerReference(url, XRefChecker.Type.GENERIC, location());

src/main/java/com/adobe/epubcheck/opf/OPFHandler30.java

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,15 @@
4747
import static com.adobe.epubcheck.vocab.PackageVocabs.META_VOCAB;
4848
import static com.adobe.epubcheck.vocab.PackageVocabs.META_VOCAB_URI;
4949

50-
import java.net.URI;
51-
import java.net.URISyntaxException;
5250
import java.util.Deque;
5351
import java.util.IllformedLocaleException;
5452
import java.util.List;
5553
import java.util.Locale;
5654
import java.util.Map;
5755
import java.util.Set;
5856

57+
import org.w3c.epubcheck.url.URLUtils;
58+
5959
import com.adobe.epubcheck.api.EPUBLocation;
6060
import com.adobe.epubcheck.api.QuietReport;
6161
import com.adobe.epubcheck.messages.MessageId;
@@ -403,22 +403,22 @@ private List<String> processCollectionRole(String roleAtt)
403403
ImmutableList.Builder<String> rolesBuilder = ImmutableList.builder();
404404
for (String role : TOKENIZER.split(Strings.nullToEmpty(roleAtt)))
405405
{
406-
if (role.matches("^[^:/?#]+://.*"))
406+
if (URLUtils.isAbsoluteURLString(role))
407407
{
408408
// Role is an absolute IRI
409409
// check that the host component doesn't contain 'idpf.org'
410410
try
411411
{
412-
URI uri = new URI(role);
413-
if (uri.getHost() != null && uri.getHost().contains("idpf.org"))
412+
URL url = URL.parse(role);
413+
if (url.authority() != null && url.authority().contains("idpf.org"))
414414
{
415415
report.message(MessageId.OPF_069, location(), role);
416416
}
417417
else
418418
{
419419
rolesBuilder.add(role);
420420
}
421-
} catch (URISyntaxException e)
421+
} catch (GalimatiasParseException e)
422422
{
423423
report.message(MessageId.OPF_070, location(), role);
424424
}
@@ -448,22 +448,11 @@ private void processLink()
448448
if (href != null)
449449
{ // check by schema
450450

451-
// FIXME next test URL string is conforming, better test remote URLs
452-
if (href.matches("^[^:/?#]+://.*"))
453-
{
451+
URL url = checkURL(href);
452+
if (context.isRemote(url)) {
454453
report.info(path, FeatureEnum.REFERENCE, href);
455454
}
456455

457-
URL url;
458-
try
459-
{
460-
url = baseURL().resolve(href);
461-
} catch (GalimatiasParseException e1)
462-
{
463-
report.message(MessageId.RSC_020, location(), href);
464-
return;
465-
}
466-
467456
if (context.xrefChecker.isPresent())
468457
{
469458
context.xrefChecker.get().registerReference(url, Type.LINK, location());

0 commit comments

Comments
 (0)