Skip to content

Commit 9bf8d53

Browse files
committed
Improve href-sanitizer scriptlet
Related discussion: uBlockOrigin/uBlock-issues#3487 Add support for `urlskip=` syntax by internally reusing `urlskip=` code.
1 parent 791a2b0 commit 9bf8d53

File tree

3 files changed

+180
-161
lines changed

3 files changed

+180
-161
lines changed

src/js/resources/href-sanitizer.js

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
/*******************************************************************************
2+
3+
uBlock Origin - a comprehensive, efficient content blocker
4+
Copyright (C) 2019-present Raymond Hill
5+
6+
This program is free software: you can redistribute it and/or modify
7+
it under the terms of the GNU General Public License as published by
8+
the Free Software Foundation, either version 3 of the License, or
9+
(at your option) any later version.
10+
11+
This program is distributed in the hope that it will be useful,
12+
but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
GNU General Public License for more details.
15+
16+
You should have received a copy of the GNU General Public License
17+
along with this program. If not, see {http://www.gnu.org/licenses/}.
18+
19+
Home: https://github.com/gorhill/uBlock
20+
21+
*/
22+
23+
import { registerScriptlet } from './base.js';
24+
import { runAt } from './run-at.js';
25+
import { safeSelf } from './safe-self.js';
26+
import { urlSkip } from '../urlskip.js';
27+
28+
/******************************************************************************/
29+
30+
registerScriptlet(urlSkip, {
31+
name: 'urlskip.fn',
32+
});
33+
34+
/**
35+
* @scriptlet href-sanitizer
36+
*
37+
* @description
38+
* Set the `href` attribute to a value found in the DOM at, or below the
39+
* targeted `a` element, and optionally with transformation steps.
40+
*
41+
* @param selector
42+
* A plain CSS selector for elements which `href` property must be sanitized.
43+
*
44+
* @param source
45+
* One or more tokens to lookup the source of the `href` property, and
46+
* optionally the transformation steps to perform:
47+
* - `text`: Use the text content of the element as the URL
48+
* - `[name]`: Use the value of the attribute `name` as the URL
49+
* - Transformation steps: see `urlskip` documentation
50+
*
51+
* If `text` or `[name]` is not present, the URL will be the value of `href`
52+
* attribute.
53+
*
54+
* @example
55+
* `example.org##+js(href-sanitizer, a)`
56+
* `example.org##+js(href-sanitizer, a[title], [title])`
57+
* `example.org##+js(href-sanitizer, a[href*="/away.php?to="], ?to)`
58+
* `example.org##+js(href-sanitizer, a[href*="/redirect"], ?url ?url -base64)`
59+
*
60+
* */
61+
62+
function hrefSanitizer(
63+
selector = '',
64+
source = ''
65+
) {
66+
if ( typeof selector !== 'string' ) { return; }
67+
if ( selector === '' ) { return; }
68+
const safe = safeSelf();
69+
const logPrefix = safe.makeLogPrefix('href-sanitizer', selector, source);
70+
if ( source === '' ) { source = 'text'; }
71+
const sanitizeCopycats = (href, text) => {
72+
let elems = [];
73+
try {
74+
elems = document.querySelectorAll(`a[href="${href}"`);
75+
}
76+
catch(ex) {
77+
}
78+
for ( const elem of elems ) {
79+
elem.setAttribute('href', text);
80+
}
81+
return elems.length;
82+
};
83+
const validateURL = text => {
84+
if ( typeof text !== 'string' ) { return ''; }
85+
if ( text === '' ) { return ''; }
86+
if ( /[\x00-\x20\x7f]/.test(text) ) { return ''; }
87+
try {
88+
const url = new URL(text, document.location);
89+
return url.href;
90+
} catch(ex) {
91+
}
92+
return '';
93+
};
94+
const extractURL = (elem, source) => {
95+
if ( /^\[.*\]$/.test(source) ) {
96+
return elem.getAttribute(source.slice(1,-1).trim()) || '';
97+
}
98+
if ( source === 'text' ) {
99+
return elem.textContent
100+
.replace(/^[^\x21-\x7e]+/, '') // remove leading invalid characters
101+
.replace(/[^\x21-\x7e]+$/, '') // remove trailing invalid characters
102+
;
103+
}
104+
if ( source.startsWith('?') ) {
105+
const steps = source.replace(/(\S)\?/g, '\\1?').split(/\s+/);
106+
const url = urlSkip(elem.href, false, steps);
107+
if ( url === undefined ) { return; }
108+
return url.replace(/ /g, '%20');
109+
}
110+
return '';
111+
};
112+
const sanitize = ( ) => {
113+
let elems = [];
114+
try {
115+
elems = document.querySelectorAll(selector);
116+
}
117+
catch(ex) {
118+
return false;
119+
}
120+
for ( const elem of elems ) {
121+
if ( elem.localName !== 'a' ) { continue; }
122+
if ( elem.hasAttribute('href') === false ) { continue; }
123+
const href = elem.getAttribute('href');
124+
const text = extractURL(elem, source);
125+
const hrefAfter = validateURL(text);
126+
if ( hrefAfter === '' ) { continue; }
127+
if ( hrefAfter === href ) { continue; }
128+
elem.setAttribute('href', hrefAfter);
129+
const count = sanitizeCopycats(href, hrefAfter);
130+
safe.uboLog(logPrefix, `Sanitized ${count+1} links to\n${hrefAfter}`);
131+
}
132+
return true;
133+
};
134+
let observer, timer;
135+
const onDomChanged = mutations => {
136+
if ( timer !== undefined ) { return; }
137+
let shouldSanitize = false;
138+
for ( const mutation of mutations ) {
139+
if ( mutation.addedNodes.length === 0 ) { continue; }
140+
for ( const node of mutation.addedNodes ) {
141+
if ( node.nodeType !== 1 ) { continue; }
142+
shouldSanitize = true;
143+
break;
144+
}
145+
if ( shouldSanitize ) { break; }
146+
}
147+
if ( shouldSanitize === false ) { return; }
148+
timer = safe.onIdle(( ) => {
149+
timer = undefined;
150+
sanitize();
151+
});
152+
};
153+
const start = ( ) => {
154+
if ( sanitize() === false ) { return; }
155+
observer = new MutationObserver(onDomChanged);
156+
observer.observe(document.body, {
157+
subtree: true,
158+
childList: true,
159+
});
160+
};
161+
runAt(( ) => { start(); }, 'interactive');
162+
}
163+
registerScriptlet(hrefSanitizer, {
164+
name: 'href-sanitizer.js',
165+
world: 'ISOLATED',
166+
aliases: [
167+
'urlskip.js',
168+
],
169+
dependencies: [
170+
runAt,
171+
safeSelf,
172+
urlSkip,
173+
],
174+
});

src/js/resources/scriptlets.js

Lines changed: 1 addition & 157 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
*/
2222

2323
import './attribute.js';
24+
import './href-sanitizer.js';
2425
import './replace-argument.js';
2526
import './spoof-css.js';
2627
import './prevent-settimeout.js';
@@ -2576,163 +2577,6 @@ function m3uPrune(
25762577
});
25772578
}
25782579

2579-
/*******************************************************************************
2580-
*
2581-
* @scriptlet href-sanitizer
2582-
*
2583-
* @description
2584-
* Set the `href` attribute to a value found in the DOM at, or below the
2585-
* targeted `a` element.
2586-
*
2587-
* ### Syntax
2588-
*
2589-
* ```text
2590-
* example.org##+js(href-sanitizer, selector [, source])
2591-
* ```
2592-
*
2593-
* - `selector`: required, CSS selector, specifies `a` elements for which the
2594-
* `href` attribute must be overridden.
2595-
* - `source`: optional, default to `text`, specifies from where to get the
2596-
* value which will override the `href` attribute.
2597-
* - `text`: the value will be the first valid URL found in the text
2598-
* content of the targeted `a` element.
2599-
* - `[attr]`: the value will be the attribute _attr_ of the targeted `a`
2600-
* element.
2601-
* - `?param`: the value will be the query parameter _param_ of the URL
2602-
* found in the `href` attribute of the targeted `a` element.
2603-
*
2604-
* ### Examples
2605-
*
2606-
* example.org##+js(href-sanitizer, a)
2607-
* example.org##+js(href-sanitizer, a[title], [title])
2608-
* example.org##+js(href-sanitizer, a[href*="/away.php?to="], ?to)
2609-
*
2610-
* */
2611-
2612-
builtinScriptlets.push({
2613-
name: 'href-sanitizer.js',
2614-
fn: hrefSanitizer,
2615-
world: 'ISOLATED',
2616-
dependencies: [
2617-
'run-at.fn',
2618-
'safe-self.fn',
2619-
],
2620-
});
2621-
function hrefSanitizer(
2622-
selector = '',
2623-
source = ''
2624-
) {
2625-
if ( typeof selector !== 'string' ) { return; }
2626-
if ( selector === '' ) { return; }
2627-
const safe = safeSelf();
2628-
const logPrefix = safe.makeLogPrefix('href-sanitizer', selector, source);
2629-
if ( source === '' ) { source = 'text'; }
2630-
const sanitizeCopycats = (href, text) => {
2631-
let elems = [];
2632-
try {
2633-
elems = document.querySelectorAll(`a[href="${href}"`);
2634-
}
2635-
catch(ex) {
2636-
}
2637-
for ( const elem of elems ) {
2638-
elem.setAttribute('href', text);
2639-
}
2640-
return elems.length;
2641-
};
2642-
const validateURL = text => {
2643-
if ( text === '' ) { return ''; }
2644-
if ( /[\x00-\x20\x7f]/.test(text) ) { return ''; }
2645-
try {
2646-
const url = new URL(text, document.location);
2647-
return url.href;
2648-
} catch(ex) {
2649-
}
2650-
return '';
2651-
};
2652-
const extractParam = (href, source) => {
2653-
if ( Boolean(source) === false ) { return href; }
2654-
const recursive = source.includes('?', 1);
2655-
const end = recursive ? source.indexOf('?', 1) : source.length;
2656-
try {
2657-
const url = new URL(href, document.location);
2658-
let value = url.searchParams.get(source.slice(1, end));
2659-
if ( value === null ) { return href }
2660-
if ( recursive ) { return extractParam(value, source.slice(end)); }
2661-
if ( value.includes(' ') ) {
2662-
value = value.replace(/ /g, '%20');
2663-
}
2664-
return value;
2665-
} catch(x) {
2666-
}
2667-
return href;
2668-
};
2669-
const extractText = (elem, source) => {
2670-
if ( /^\[.*\]$/.test(source) ) {
2671-
return elem.getAttribute(source.slice(1,-1).trim()) || '';
2672-
}
2673-
if ( source.startsWith('?') ) {
2674-
return extractParam(elem.href, source);
2675-
}
2676-
if ( source === 'text' ) {
2677-
return elem.textContent
2678-
.replace(/^[^\x21-\x7e]+/, '') // remove leading invalid characters
2679-
.replace(/[^\x21-\x7e]+$/, '') // remove trailing invalid characters
2680-
;
2681-
}
2682-
return '';
2683-
};
2684-
const sanitize = ( ) => {
2685-
let elems = [];
2686-
try {
2687-
elems = document.querySelectorAll(selector);
2688-
}
2689-
catch(ex) {
2690-
return false;
2691-
}
2692-
for ( const elem of elems ) {
2693-
if ( elem.localName !== 'a' ) { continue; }
2694-
if ( elem.hasAttribute('href') === false ) { continue; }
2695-
const href = elem.getAttribute('href');
2696-
const text = extractText(elem, source);
2697-
const hrefAfter = validateURL(text);
2698-
if ( hrefAfter === '' ) { continue; }
2699-
if ( hrefAfter === href ) { continue; }
2700-
elem.setAttribute('href', hrefAfter);
2701-
const count = sanitizeCopycats(href, hrefAfter);
2702-
safe.uboLog(logPrefix, `Sanitized ${count+1} links to\n${hrefAfter}`);
2703-
}
2704-
return true;
2705-
};
2706-
let observer, timer;
2707-
const onDomChanged = mutations => {
2708-
if ( timer !== undefined ) { return; }
2709-
let shouldSanitize = false;
2710-
for ( const mutation of mutations ) {
2711-
if ( mutation.addedNodes.length === 0 ) { continue; }
2712-
for ( const node of mutation.addedNodes ) {
2713-
if ( node.nodeType !== 1 ) { continue; }
2714-
shouldSanitize = true;
2715-
break;
2716-
}
2717-
if ( shouldSanitize ) { break; }
2718-
}
2719-
if ( shouldSanitize === false ) { return; }
2720-
timer = safe.onIdle(( ) => {
2721-
timer = undefined;
2722-
sanitize();
2723-
});
2724-
};
2725-
const start = ( ) => {
2726-
if ( sanitize() === false ) { return; }
2727-
observer = new MutationObserver(onDomChanged);
2728-
observer.observe(document.body, {
2729-
subtree: true,
2730-
childList: true,
2731-
});
2732-
};
2733-
runAt(( ) => { start(); }, 'interactive');
2734-
}
2735-
27362580
/*******************************************************************************
27372581
*
27382582
* @scriptlet call-nothrow

src/js/urlskip.js

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,6 @@
1919
Home: https://github.com/gorhill/uBlock
2020
*/
2121

22-
const safeBase64Map = { '-': '+', '_': '/' };
23-
const safeBase64Replacer = s => safeBase64Map[s];
24-
2522
/**
2623
* @trustedOption urlskip
2724
*
@@ -106,7 +103,11 @@ export function urlSkip(url, blocked, steps, directive = {}) {
106103
}
107104
// Safe Base64
108105
if ( step === '-safebase64' ) {
109-
urlout = urlin.replace(/[-_]/g, safeBase64Replacer);
106+
if ( urlSkip.safeBase64Replacer === undefined ) {
107+
urlSkip.safeBase64Map = { '-': '+', '_': '/' };
108+
urlSkip.safeBase64Replacer = s => urlSkip.safeBase64Map[s];
109+
}
110+
urlout = urlin.replace(/[-_]/g, urlSkip.safeBase64Replacer);
110111
urlout = self.atob(urlout);
111112
continue;
112113
}

0 commit comments

Comments
 (0)