|
1 | 1 | package com.adobe.epubcheck.ocf;
|
2 | 2 |
|
| 3 | +import java.util.LinkedHashSet; |
3 | 4 | import java.util.Set;
|
| 5 | +import java.util.stream.Collectors; |
| 6 | + |
| 7 | +import org.w3c.epubcheck.core.Checker; |
4 | 8 |
|
5 | 9 | import com.adobe.epubcheck.api.EPUBLocation;
|
6 | 10 | import com.adobe.epubcheck.api.Report;
|
7 | 11 | import com.adobe.epubcheck.messages.MessageId;
|
8 | 12 | import com.adobe.epubcheck.opf.ValidationContext;
|
9 | 13 | import com.adobe.epubcheck.util.EPUBVersion;
|
10 |
| -import com.google.common.collect.ImmutableSet; |
| 14 | +import com.google.common.base.Preconditions; |
| 15 | +import com.google.common.collect.ImmutableMap; |
| 16 | +import com.ibm.icu.lang.UCharacter; |
| 17 | +import com.ibm.icu.text.UCharacterIterator; |
| 18 | +import com.ibm.icu.text.UForwardCharacterIterator; |
| 19 | +import com.ibm.icu.text.UnicodeSet; |
11 | 20 |
|
12 |
| -//FIXME 2022 update related PKG-* messages to contain the file name string |
13 |
| -public final class OCFFilenameChecker |
| 21 | +public final class OCFFilenameChecker implements Checker |
14 | 22 | {
|
15 |
| - private static final Set<String> RESTRICTED_30_CHARACTER_SET = ImmutableSet.of("PRIVATE_USE_AREA", |
16 |
| - "ARABIC_PRESENTATION_FORMS_A", "SPECIALS", "SUPPLEMENTARY_PRIVATE_USE_AREA_A", |
17 |
| - "SUPPLEMENTARY_PRIVATE_USE_AREA_B", "VARIATION_SELECTORS_SUPPLEMENT", "TAGS"); |
| 23 | + |
| 24 | + private static final UnicodeSet ASCII = new UnicodeSet("[:ascii:]").freeze(); |
| 25 | + |
| 26 | + private static final UnicodeSet DISALLOWED_EPUB2 = new UnicodeSet() |
| 27 | + // .add(0x002F)// SOLIDUS '/' -- allowed as path separator |
| 28 | + .add(0x0022)// QUOTATION MARK '"' |
| 29 | + .add(0x002A)// ASTERISK '*' |
| 30 | + // .add(0x002E)// FULL STOP '.' -- only disallowed as the last character |
| 31 | + .add(0x003A)// COLON ':' |
| 32 | + .add(0x003C)// LESS-THAN SIGN '<' |
| 33 | + .add(0x003E)// GREATER-THAN SIGN '>' |
| 34 | + .add(0x003F)// QUESTION MARK '?' |
| 35 | + .add(0x005C)// REVERSE SOLIDUS '\' |
| 36 | + .freeze(); |
| 37 | + |
| 38 | + private static final ImmutableMap<String, UnicodeSet> DISALLOWED_EPUB3 = new ImmutableMap.Builder<String, UnicodeSet>() |
| 39 | + .put("ASCII", new UnicodeSet() // |
| 40 | + .addAll(DISALLOWED_EPUB2)// all disallowed in EPUB 2.0.1 |
| 41 | + .add(0x007C) // VERTICAL LINE '|' |
| 42 | + .freeze()) |
| 43 | + .put("NON CHARACTER", new UnicodeSet("[:Noncharacter_Code_Point=Yes:]")// |
| 44 | + .freeze()) |
| 45 | + .put("CONTROL", new UnicodeSet().add(0x007F) // DEL |
| 46 | + .addAll(0x0000, 0x001F) // C0 range |
| 47 | + .addAll(0x0080, 0x009F) // C1 range |
| 48 | + .freeze()) |
| 49 | + .put("PRIVATE USE", new UnicodeSet() // |
| 50 | + .addAll(0xE000, 0xF8FF) // Private Use Area |
| 51 | + .addAll(0xF0000, 0xFFFFF) // Supplementary Private Use Area-A |
| 52 | + .addAll(0x100000, 0x10FFFF) // Supplementary Private Use Area-B |
| 53 | + .freeze()) |
| 54 | + .put("SPECIALS", new UnicodeSet() // |
| 55 | + .addAll(0xFFF0, 0xFFFF) // Specials Blocks |
| 56 | + .freeze()) |
| 57 | + .put("DEPRECATED", new UnicodeSet() // |
| 58 | + .add(0xE0001)// LANGUAGE TAG |
| 59 | + // .add(0xE007F)// CANCEL TAG -- reinstated in Emoji tag sequences |
| 60 | + .freeze()) |
| 61 | + .build(); |
| 62 | + |
| 63 | + private static String toString(int codepoint, String setName) |
| 64 | + { |
| 65 | + assert setName != null; |
| 66 | + StringBuilder result = new StringBuilder().append(String.format("U+%04X ", codepoint)); |
| 67 | + if ("ASCII".equals(setName)) |
| 68 | + { |
| 69 | + result.append('(').append(UCharacter.toString(codepoint)).append(')'); |
| 70 | + } |
| 71 | + else |
| 72 | + { |
| 73 | + String characterName = UCharacter.getName(codepoint); |
| 74 | + if (characterName != null) |
| 75 | + { |
| 76 | + result.append(characterName).append(' '); |
| 77 | + } |
| 78 | + result.append('(').append(setName).append(')'); |
| 79 | + } |
| 80 | + return result.toString(); |
| 81 | + } |
18 | 82 |
|
19 | 83 | private final Report report;
|
20 | 84 | private final EPUBVersion version;
|
21 | 85 | private final EPUBLocation location;
|
| 86 | + private final String filename; |
| 87 | + |
| 88 | + public OCFFilenameChecker(String filename, ValidationContext context) |
| 89 | + { |
| 90 | + this(filename, context, null); |
| 91 | + } |
22 | 92 |
|
23 |
| - public OCFFilenameChecker(ValidationContext context) |
| 93 | + public OCFFilenameChecker(String filename, ValidationContext context, EPUBLocation location) |
24 | 94 | {
|
| 95 | + Preconditions.checkArgument(filename != null); |
| 96 | + Preconditions.checkArgument(context != null); |
| 97 | + this.filename = filename; |
25 | 98 | this.report = context.report;
|
26 | 99 | this.version = context.version;
|
27 |
| - this.location = EPUBLocation.of(context); |
| 100 | + this.location = (location != null) ? location : EPUBLocation.of(context); |
28 | 101 | }
|
29 | 102 |
|
30 |
| - public String checkCompatiblyEscaped(final String str) |
| 103 | + @Override |
| 104 | + public void check() |
31 | 105 | {
|
32 |
| - // don't check remote resources |
33 |
| - if (str.matches("^[^:/?#]+://.*")) |
34 |
| - { |
35 |
| - return ""; |
36 |
| - } |
37 |
| - |
38 |
| - // the test string will be used to compare test result |
39 |
| - String test = checkNonAsciiFilename(str); |
40 |
| - |
41 |
| - if (str.endsWith(".")) |
42 |
| - { |
43 |
| - report.message(MessageId.PKG_011, location, str); |
44 |
| - test += "."; |
45 |
| - } |
46 |
| - |
47 |
| - boolean spaces = false; |
48 |
| - final char[] ascciGraphic = new char[] { '<', '>', '"', '{', '}', '|', '^', '`', '*', |
49 |
| - '?' /* , ':','/', '\\' */ }; |
50 |
| - String result = ""; |
51 |
| - char[] chars = str.toCharArray(); |
52 |
| - for (char c : chars) |
| 106 | + // Iterate through the code points to search disallowed characters |
| 107 | + UCharacterIterator chars = UCharacterIterator.getInstance(filename); |
| 108 | + final Set<String> disallowed = new LinkedHashSet<>(); |
| 109 | + boolean hasSpaces = false; |
| 110 | + boolean isASCIIOnly = true; |
| 111 | + int codepoint; |
| 112 | + while ((codepoint = chars.nextCodePoint()) != UForwardCharacterIterator.DONE) |
53 | 113 | {
|
54 |
| - for (char a : ascciGraphic) |
| 114 | + // Check if the string has non-ASCII characters |
| 115 | + isASCIIOnly = isASCIIOnly && ASCII.contains(codepoint); |
| 116 | + // Check if the string has space characters |
| 117 | + hasSpaces = hasSpaces || UCharacter.isUWhiteSpace(codepoint); |
| 118 | + // Check for disallowed characters |
| 119 | + switch (version) |
55 | 120 | {
|
56 |
| - if (c == a) |
| 121 | + case VERSION_2: |
| 122 | + if (DISALLOWED_EPUB2.contains(codepoint)) |
57 | 123 | {
|
58 |
| - result += "\"" + Character.toString(c) + "\","; |
59 |
| - test += Character.toString(c); |
| 124 | + disallowed.add(toString(codepoint, "ASCII")); |
60 | 125 | }
|
61 |
| - } |
62 |
| - if (Character.isSpaceChar(c)) |
63 |
| - { |
64 |
| - spaces = true; |
65 |
| - test += Character.toString(c); |
| 126 | + break; |
| 127 | + default: |
| 128 | + for (String name : DISALLOWED_EPUB3.keySet()) |
| 129 | + { |
| 130 | + if (DISALLOWED_EPUB3.get(name).contains(codepoint)) |
| 131 | + { |
| 132 | + disallowed.add(toString(codepoint, name)); |
| 133 | + break; |
| 134 | + } |
| 135 | + } |
| 136 | + break; |
66 | 137 | }
|
67 | 138 | }
|
68 |
| - if (result.length() > 1) |
| 139 | + // Check that FULL STOP is not used as the last character |
| 140 | + if (chars.previousCodePoint() == 0x002E) |
69 | 141 | {
|
70 |
| - result = result.substring(0, result.length() - 1); |
71 |
| - report.message(MessageId.PKG_009, location, str, result); |
| 142 | + report.message(MessageId.PKG_011, location, filename); |
72 | 143 | }
|
73 |
| - if (spaces) |
| 144 | + // Report if disallowed characters were found |
| 145 | + if (!disallowed.isEmpty()) |
74 | 146 | {
|
75 |
| - report.message(MessageId.PKG_010, location, str); |
| 147 | + report.message(MessageId.PKG_009, location, filename, |
| 148 | + disallowed.stream().collect(Collectors.joining(", "))); |
76 | 149 | }
|
77 |
| - |
78 |
| - if (version == EPUBVersion.VERSION_3) |
| 150 | + // Report whitespace characters |
| 151 | + if (hasSpaces) |
79 | 152 | {
|
80 |
| - checkCompatiblyEscaped30(str, test); |
| 153 | + report.message(MessageId.PKG_010, location, filename); |
81 | 154 | }
|
82 |
| - return test; |
83 |
| - } |
84 |
| - |
85 |
| - private String checkNonAsciiFilename(final String str) |
86 |
| - { |
87 |
| - String nonAscii = str.replaceAll("[\\p{ASCII}]", ""); |
88 |
| - if (nonAscii.length() > 0) |
| 155 | + // Report non-ASCII characters as usage |
| 156 | + if (!isASCIIOnly) |
89 | 157 | {
|
90 |
| - report.message(MessageId.PKG_012, location, str, nonAscii); |
| 158 | + report.message(MessageId.PKG_012, location, filename); |
91 | 159 | }
|
92 |
| - return nonAscii; |
93 | 160 | }
|
94 | 161 |
|
95 |
| - private String checkCompatiblyEscaped30(String str, String test) |
96 |
| - { |
97 |
| - String result = ""; |
98 |
| - |
99 |
| - char[] chars = str.toCharArray(); |
100 |
| - for (char c : chars) |
101 |
| - { |
102 |
| - if (Character.isISOControl(c)) |
103 |
| - { |
104 |
| - result += "\"" + Character.toString(c) + "\","; |
105 |
| - test += Character.toString(c); |
106 |
| - } |
107 |
| - |
108 |
| - // DEL (U+007F) |
109 |
| - if (c == '\u007F') |
110 |
| - { |
111 |
| - result += "\"" + Character.toString(c) + "\","; |
112 |
| - test += Character.toString(c); |
113 |
| - } |
114 |
| - String unicodeType = Character.UnicodeBlock.of(c).toString(); |
115 |
| - if (RESTRICTED_30_CHARACTER_SET.contains(unicodeType)) |
116 |
| - { |
117 |
| - result += "\"" + Character.toString(c) + "\","; |
118 |
| - } |
119 |
| - } |
120 |
| - if (result.length() > 1) |
121 |
| - { |
122 |
| - result = result.substring(0, result.length() - 1); |
123 |
| - report.message(MessageId.PKG_009, location, str, result); |
124 |
| - } |
125 |
| - return test; |
126 |
| - } |
127 | 162 | }
|
0 commit comments