Skip to content

Commit a483f79

Browse files
committed
Add ability to inject scriptlets according to origin of ancestor contexts
New syntax for scriptlet-based filtering: it is now possible to inject a scriptlet in a context according to whether an ancestor origin matches a specific hostname. Example: example.com>>##+js(set, iAmEmbeddedInExampleDotCom, true) The new `>>` specifier means that the scriptlet will not be injected in `example.com`, but will be injected in all direct and indirect embedded contexts in pages loaded from `example.com` The new ancestor domain specifier also works for entity directives: example.*##+js(set, iAmEmbeddedInExampleDotEffectiveTLD, true) This is an experimental feature, to be further evaluated and discussed by filter list maintainers. This is not a complete implementation, by choice. Do not open issues regarding this new syntax, the current shortcomings are known. The new syntax is being discussed internally at: uBlockOrigin/uBlock-discussions#244
1 parent e636c32 commit a483f79

10 files changed

+102
-43
lines changed

src/js/benchmarks.js

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,6 @@ export async function benchmarkCosmeticFiltering() {
321321
frameId: undefined,
322322
hostname: '',
323323
domain: '',
324-
entity: '',
325324
};
326325
const options = {
327326
noSpecificCosmeticFiltering: false,

src/js/cosmetic-filtering.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
import { MRUCache } from './mrucache.js';
2525
import { StaticExtFilteringHostnameDB } from './static-ext-filtering-db.js';
26+
import { entityFromHostname } from './uri-utils.js';
2627
import logger from './logger.js';
2728
import µb from './background.js';
2829

@@ -818,9 +819,10 @@ CosmeticFilteringEngine.prototype.retrieveSpecificSelectors = function(
818819
3
819820
);
820821
// Retrieve filters with a entity-based hostname value
821-
if ( request.entity !== '' ) {
822+
const entity = entityFromHostname(hostname, request.domain);
823+
if ( entity !== '' ) {
822824
this.specificFilters.retrieve(
823-
`${hostname.slice(0, -request.domain.length)}${request.entity}`,
825+
entity,
824826
options.noSpecificCosmeticFiltering ? discardSets : retrieveSets,
825827
1
826828
);

src/js/messaging.js

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ import * as sfp from './static-filtering-parser.js';
2525
import {
2626
domainFromHostname,
2727
domainFromURI,
28-
entityFromDomain,
2928
hostnameFromURI,
3029
isNetworkURI,
3130
} from './uri-utils.js';
@@ -684,7 +683,7 @@ const retrieveContentScriptParameters = async function(sender, request) {
684683
request.frameId = frameId;
685684
request.hostname = hostnameFromURI(request.url);
686685
request.domain = domainFromHostname(request.hostname);
687-
request.entity = entityFromDomain(request.domain);
686+
request.ancestors = pageStore.getFrameAncestorDetails(frameId);
688687

689688
const scf = response.specificCosmeticFilters =
690689
cosmeticFilteringEngine.retrieveSpecificSelectors(request, response);

src/js/pagestore.js

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,21 @@ const PageStore = class {
535535
return sender.frameURL;
536536
}
537537

538+
getFrameAncestorDetails(frameId) {
539+
if ( frameId === 0 ) { return []; }
540+
const out = [];
541+
for (;;) {
542+
const frameStore = this.getFrameStore(frameId);
543+
if ( frameStore === null ) { break; }
544+
const { domain, hostname } = frameStore;
545+
if ( hostname !== undefined ) {
546+
out.push({ domain, hostname });
547+
}
548+
frameId = frameStore.parentId;
549+
}
550+
return out.slice(1);
551+
}
552+
538553
// There is no event to tell us a specific subframe has been removed from
539554
// the main document. The code below will remove subframes which are no
540555
// longer present in the root document. Removing obsolete subframes is

src/js/scriptlet-filtering-core.js

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
*/
2121

2222
import { StaticExtFilteringHostnameDB } from './static-ext-filtering-db.js';
23+
import { entityFromHostname } from './uri-utils.js';
2324
import { redirectEngine as reng } from './redirect-engine.js';
2425

2526
/******************************************************************************/
@@ -208,13 +209,23 @@ export class ScriptletFilteringEngine {
208209
$scriptlets.clear();
209210
$exceptions.clear();
210211

211-
const { hostname } = request;
212+
const { ancestors = [], domain, hostname } = request;
212213

213214
this.scriptletDB.retrieve(hostname, [ $scriptlets, $exceptions ]);
214-
const entity = request.entity !== ''
215-
? `${hostname.slice(0, -request.domain.length)}${request.entity}`
216-
: '*';
217-
this.scriptletDB.retrieve(entity, [ $scriptlets, $exceptions ], 1);
215+
const entity = entityFromHostname(hostname, domain);
216+
if ( entity !== '' ) {
217+
this.scriptletDB.retrieve(entity, [ $scriptlets, $exceptions ], 1);
218+
} else {
219+
this.scriptletDB.retrieve('*', [ $scriptlets, $exceptions ], 1);
220+
}
221+
for ( const ancestor of ancestors ) {
222+
const { domain, hostname } = ancestor;
223+
this.scriptletDB.retrieve(`${hostname}>>`, [ $scriptlets, $exceptions ], 1);
224+
const entity = entityFromHostname(hostname, domain);
225+
if ( entity !== '' ) {
226+
this.scriptletDB.retrieve(`${entity}>>`, [ $scriptlets, $exceptions ], 1);
227+
}
228+
}
218229
if ( $scriptlets.size === 0 ) { return; }
219230

220231
// Wholly disable scriptlet injection?

src/js/scriptlet-filtering.js

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323

2424
import {
2525
domainFromHostname,
26-
entityFromDomain,
2726
hostnameFromURI,
2827
} from './uri-utils.js';
2928

@@ -335,7 +334,7 @@ export class ScriptletFilteringEngineEx extends ScriptletFilteringEngine {
335334
url: details.url,
336335
hostname,
337336
domain,
338-
entity: entityFromDomain(domain),
337+
ancestors: details.ancestors,
339338
});
340339
if ( scriptletDetails === undefined ) {
341340
contentScriptRegisterer.unregister(hostname);

src/js/static-filtering-parser.js

Lines changed: 53 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,27 @@ export const nodeNameFromNodeType = new Map([
335335

336336
/******************************************************************************/
337337

338+
// Local constants
339+
340+
const DOMAIN_CAN_USE_WILDCARD = 0b000001;
341+
const DOMAIN_CAN_USE_ENTITY = 0b000010;
342+
const DOMAIN_CAN_USE_SINGLE_WILDCARD = 0b000100;
343+
const DOMAIN_CAN_BE_NEGATED = 0b001000;
344+
const DOMAIN_CAN_BE_REGEX = 0b010000;
345+
const DOMAIN_CAN_BE_ANCESTOR = 0b100000;
346+
347+
const DOMAIN_FROM_FROMTO_LIST = DOMAIN_CAN_USE_ENTITY |
348+
DOMAIN_CAN_BE_NEGATED |
349+
DOMAIN_CAN_BE_REGEX;
350+
const DOMAIN_FROM_DENYALLOW_LIST = 0;
351+
const DOMAIN_FROM_EXT_LIST = DOMAIN_CAN_USE_ENTITY |
352+
DOMAIN_CAN_USE_SINGLE_WILDCARD |
353+
DOMAIN_CAN_BE_NEGATED |
354+
DOMAIN_CAN_BE_REGEX |
355+
DOMAIN_CAN_BE_ANCESTOR;
356+
357+
/******************************************************************************/
358+
338359
// Precomputed AST layouts for most common filters.
339360

340361
const astTemplates = {
@@ -1839,7 +1860,7 @@ export class AstFilterParser {
18391860
const hn = match[0].replace(this.reHostnameLabel, s => {
18401861
if ( this.reHasUnicodeChar.test(s) === false ) { return s; }
18411862
if ( s.charCodeAt(0) === 0x2D /* - */ ) { s = '*' + s; }
1842-
return this.normalizeHostnameValue(s, 0b0001) || s;
1863+
return this.normalizeHostnameValue(s, DOMAIN_CAN_USE_WILDCARD) || s;
18431864
});
18441865
normal = hn + normal.slice(match.index + match[0].length);
18451866
}
@@ -2018,11 +2039,11 @@ export class AstFilterParser {
20182039
}
20192040
switch ( nodeOptionType ) {
20202041
case NODE_TYPE_NET_OPTION_NAME_DENYALLOW:
2021-
this.linkDown(next, this.parseDomainList(next, '|'), 0b00000);
2042+
this.linkDown(next, this.parseDomainList(next, '|'), DOMAIN_FROM_DENYALLOW_LIST);
20222043
break;
20232044
case NODE_TYPE_NET_OPTION_NAME_FROM:
20242045
case NODE_TYPE_NET_OPTION_NAME_TO:
2025-
this.linkDown(next, this.parseDomainList(next, '|', 0b11010));
2046+
this.linkDown(next, this.parseDomainList(next, '|', DOMAIN_FROM_FROMTO_LIST));
20262047
break;
20272048
default:
20282049
break;
@@ -2054,7 +2075,7 @@ export class AstFilterParser {
20542075
return this.getNodeTransform(valueNode);
20552076
}
20562077

2057-
parseDomainList(parent, separator, mode = 0b00000) {
2078+
parseDomainList(parent, separator, mode = 0) {
20582079
const parentBeg = this.nodes[parent+NODE_BEG_INDEX];
20592080
const parentEnd = this.nodes[parent+NODE_END_INDEX];
20602081
const containerNode = this.allocTypedNode(
@@ -2128,7 +2149,7 @@ export class AstFilterParser {
21282149
if ( not ) {
21292150
this.addNodeFlags(parent, NODE_FLAG_IS_NEGATED);
21302151
head = this.allocTypedNode(NODE_TYPE_OPTION_VALUE_NOT, beg, beg + 1);
2131-
if ( (parseDetails.mode & 0b1000) === 0 ) {
2152+
if ( (parseDetails.mode & DOMAIN_CAN_BE_NEGATED) === 0 ) {
21322153
this.addNodeFlags(parent, NODE_FLAG_ERROR);
21332154
}
21342155
beg += 1;
@@ -2173,23 +2194,29 @@ export class AstFilterParser {
21732194
parseDetails.len = end - parentBeg;
21742195
}
21752196

2176-
// mode bits:
2177-
// 0b00001: can use wildcard at any position
2178-
// 0b00010: can use entity-based hostnames
2179-
// 0b00100: can use single wildcard
2180-
// 0b01000: can be negated
2181-
// 0b10000: can be a regex
21822197
normalizeDomainValue(node, type, modeBits) {
2183-
const s = this.getNodeString(node);
2198+
const raw = this.getNodeString(node);
2199+
const isAncestor = raw.endsWith('>>');
2200+
if ( isAncestor ) {
2201+
if ( (modeBits & DOMAIN_CAN_BE_ANCESTOR) === 0 ) { return ''; }
2202+
}
2203+
const before = isAncestor ? raw.slice(0, -2) : raw;
2204+
let after;
21842205
if ( type === 0 ) {
2185-
return this.normalizeHostnameValue(s, modeBits);
2206+
after = this.normalizeHostnameValue(before, modeBits) ?? before;
2207+
if ( after === '' ) { return ''; }
2208+
} else {
2209+
if ( (modeBits & DOMAIN_CAN_BE_REGEX) === 0 ) { return ''; }
2210+
const regex = type === 1 ? before : `/${before.slice(10, -2)}/`;
2211+
const source = this.normalizeRegexPattern(regex);
2212+
if ( source === '' ) { return ''; }
2213+
after = type === 2 || source !== regex ? `/${source}/` : before;
2214+
}
2215+
if ( isAncestor ) {
2216+
after = `${after}>>`;
21862217
}
2187-
if ( (modeBits & 0b10000) === 0 ) { return ''; }
2188-
const regex = type === 1 ? s : `/${s.slice(10, -2)}/`;
2189-
const source = this.normalizeRegexPattern(regex);
2190-
if ( source === '' ) { return ''; }
2191-
if ( type === 1 && source === regex ) { return; }
2192-
return `/${source}/`;
2218+
if ( after === raw ) { return; }
2219+
return after;
21932220
}
21942221

21952222
parseExt(parent, anchorBeg, anchorLen) {
@@ -2207,7 +2234,8 @@ export class AstFilterParser {
22072234
);
22082235
this.addFlags(AST_FLAG_HAS_OPTIONS);
22092236
this.addNodeToRegister(NODE_TYPE_EXT_OPTIONS, next);
2210-
this.linkDown(next, this.parseDomainList(next, ',', 0b11110));
2237+
const down = this.parseDomainList(next, ',', DOMAIN_FROM_EXT_LIST);
2238+
this.linkDown(next, down);
22112239
prev = this.linkRight(prev, next);
22122240
}
22132241
next = this.allocTypedNode(
@@ -2800,31 +2828,25 @@ export class AstFilterParser {
28002828
// Ultimately, let the browser API do the hostname normalization, after
28012829
// making some other trivial checks.
28022830
//
2803-
// mode bits:
2804-
// 0b00001: can use wildcard at any position
2805-
// 0b00010: can use entity-based hostnames
2806-
// 0b00100: can use single wildcard
2807-
// 0b01000: can be negated
2808-
//
28092831
// returns:
28102832
// undefined: no normalization needed, use original hostname
28112833
// empty string: hostname is invalid
28122834
// non-empty string: normalized hostname
2813-
normalizeHostnameValue(s, modeBits = 0b00000) {
2835+
normalizeHostnameValue(s, modeBits = 0) {
28142836
if ( this.reHostnameAscii.test(s) ) { return; }
28152837
if ( this.reBadHostnameChars.test(s) ) { return ''; }
28162838
let hn = s;
28172839
const hasWildcard = hn.includes('*');
28182840
if ( hasWildcard ) {
28192841
if ( modeBits === 0 ) { return ''; }
28202842
if ( hn.length === 1 ) {
2821-
if ( (modeBits & 0b0100) === 0 ) { return ''; }
2843+
if ( (modeBits & DOMAIN_CAN_USE_SINGLE_WILDCARD) === 0 ) { return ''; }
28222844
return;
28232845
}
2824-
if ( (modeBits & 0b0010) !== 0 ) {
2846+
if ( (modeBits & DOMAIN_CAN_USE_ENTITY) !== 0 ) {
28252847
if ( this.rePlainEntity.test(hn) ) { return; }
28262848
if ( this.reIsEntity.test(hn) === false ) { return ''; }
2827-
} else if ( (modeBits & 0b0001) === 0 ) {
2849+
} else if ( (modeBits & DOMAIN_CAN_USE_WILDCARD) === 0 ) {
28282850
return '';
28292851
}
28302852
hn = hn.replace(/\*/g, '__asterisk__');
@@ -2841,7 +2863,7 @@ export class AstFilterParser {
28412863
hn = this.punycoder.hostname.replace(/__asterisk__/g, '*');
28422864
}
28432865
if (
2844-
(modeBits & 0b0001) === 0 && (
2866+
(modeBits & DOMAIN_CAN_USE_WILDCARD) === 0 && (
28452867
hn.charCodeAt(0) === 0x2E /* . */ ||
28462868
exCharCodeAt(hn, -1) === 0x2E /* . */
28472869
)

src/js/tab.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -928,6 +928,7 @@ vAPI.Tabs = class extends vAPI.Tabs {
928928
if ( pageStore === null ) { return; }
929929
pageStore.setFrameURL(details);
930930
if ( pageStore.getNetFilteringSwitch() ) {
931+
details.ancestors = pageStore.getFrameAncestorDetails(frameId);
931932
scriptletFilteringEngine.injectNow(details);
932933
}
933934
}

src/js/traffic.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1256,6 +1256,7 @@ const webRequest = {
12561256
const pageStore = µb.pageStoreFromTabId(details.tabId);
12571257
if ( pageStore === null ) { return; }
12581258
if ( pageStore.getNetFilteringSwitch() === false ) { return; }
1259+
details.ancestors = pageStore.getFrameAncestorDetails(details.frameId);
12591260
scriptletFilteringEngine.injectNow(details);
12601261
},
12611262
{

src/js/uri-utils.js

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,15 @@ function entityFromDomain(domain) {
7070
return pos !== -1 ? domain.slice(0, pos) + '.*' : '';
7171
}
7272

73+
function entityFromHostname(hostname, domain) {
74+
if ( domain === undefined ) {
75+
domain = domainFromHostname(hostname);
76+
}
77+
const entity = entityFromDomain(domain);
78+
if ( entity === '' ) { return ''; }
79+
return `${hostname.slice(0, -domain.length)}${entity}`
80+
}
81+
7382
function hostnameFromURI(uri) {
7483
let match = reHostnameFromCommonURL.exec(uri);
7584
if ( match !== null ) { return match[0].slice(8, -1); }
@@ -164,6 +173,7 @@ export {
164173
domainFromHostname,
165174
domainFromURI,
166175
entityFromDomain,
176+
entityFromHostname,
167177
hostnameFromNetworkURL,
168178
hostnameFromURI,
169179
isNetworkURI,

0 commit comments

Comments
 (0)