#38 Wildcard support + list management testing
Merged 4 years ago by quidam. Opened 5 years ago by gioma1.
gioma1/librejs feature/wildcards  into  master

file modified
+29 -6
@@ -36,13 +36,13 @@ 

    }

  

    async whitelist(...keys) {

-     ListManager.move(this.lists.blacklist, this.lists.whitelist, ...keys);

+     await ListManager.move(this.lists.blacklist, this.lists.whitelist, ...keys);

    }

    async blacklist(...keys) {

-     ListManager.move(this.lists.whitelist, this.lists.blacklist, ...keys);

+     await ListManager.move(this.lists.whitelist, this.lists.blacklist, ...keys);

    }

    async forget(...keys) {

-     await Promise.all(Object.values(this.lists).map(l => l.remove(...keys)));

+     await Promise.all(Object.values(this.lists).map(async l => await l.remove(...keys)));

    }

    /* key is a string representing either a URL or an optional path

      with a trailing (hash).
@@ -62,10 +62,11 @@ 

      if (!match) {

        let url = ListStore.urlItem(key);

        let site = ListStore.siteItem(key);

-       return (blacklist.contains(url) || blacklist.contains(site))

+       return (blacklist.contains(url) || ListManager.siteMatch(site, blacklist)

          ? "blacklisted"

-         : whitelist.contains(url) || whitelist.contains(site)

-         ? "whitelisted" : defValue;

+         : whitelist.contains(url) || ListManager.siteMatch(site, whitelist)

+         ? "whitelisted" : defValue

+       );

      }

  

    	let [hashItem, srcHash] = match; // (hash), hash
@@ -74,6 +75,28 @@ 

          ? "whitelisted"

    			: defValue;

    	}

+ 

+     /*

+       Matches by whole site ("http://some.domain.com/*") supporting also

+       wildcarded subdomains ("https://*.domain.com/*").

+     */

+     static siteMatch(url, list) {

+       let site = ListStore.siteItem(url);

+       if (list.contains(site)) {

+         return site;

+       }

+       site = site.replace(/^([\w-]+:\/\/)?(\w)/, "$1*.$2");

+       for (;;) {

+         if (list.contains(site)) {

+           return site;

+         }

+         let oldKey = site;

+         site = site.replace(/(?:\*\.)*\w+(?=\.)/, "*");

+         if (site === oldKey) {

+           return null;

+         }

+       }

+     }

  }

  

  module.exports = { ListManager };

file modified
+1 -1
@@ -78,7 +78,7 @@ 

        let res = await handler.pre(response);

        if (res) return res;

        if (handler.post) handler = handler.post;

-       if (typeof handler !== "function") ResponseProcessor.ACCEPT;

+       if (typeof handler !== "function") return ResponseProcessor.ACCEPT;

      }

  

      let {requestId, responseHeaders} = request;

file modified
+1 -1
@@ -133,7 +133,6 @@ 

    }

  }

  

- var jssha = require('jssha');

  function hash(source){

  	var shaObj = new jssha("SHA-256","TEXT")

  	shaObj.update(source);
@@ -142,4 +141,5 @@ 

  

  if (typeof module === "object") {

    module.exports = { ListStore, Storage, hash };

+   var jssha = require('jssha');

  }

@@ -60,6 +60,7 @@ 

      <div id="info">

          <div id="site">

            <h2 class="site">This whole site <span></span></h2>

+           <div class="status"></div>

            <div class="buttons">

              <button class="whitelist" name="*">Whitelist</button>

              <button class="blacklist" name="*">Blacklist</button>

@@ -59,6 +59,7 @@ 

      setTimeout(close, 100);

      return;

    }

+   if (button.tagName !== "BUTTON") button = button.closest("button");

    if (button.matches(".toggle-source")) {

      let parent = button.parentNode;

      if (!parent.querySelector(".source").textContent) {
@@ -69,10 +70,13 @@ 

      return;

    }

  	if (!button.matches(".buttons > button")) return;

+   let domain = button.querySelector(".domain");

+ 

  	let li = button.closest("li");

  	let entry = li && li._scriptEntry || [currentReport.url, "Page's site"];

  	let action = button.className;

-   let site = button.name === "*";

+   let site = domain ? domain.textContent : button.name === "*" ? currentReport.site : "";

+ 

    if (site) {

      ([action] = action.split("-"));

    }
@@ -169,8 +173,8 @@ 

  */

  function refreshUI(report) {

    currentReport = report;

- 

-   document.querySelector("#site").className = report.siteStatus || "";

+   let {siteStatus, listedSite} = report;

+   document.querySelector("#site").className = siteStatus || "";

    document.querySelector("#site h2").textContent =

      `This site ${report.site}`;

  
@@ -194,6 +198,20 @@ 

      b.disabled = true;

    }

  

+   if (siteStatus && siteStatus !== "unknown") {

+     let siteContainer = document.querySelector("#site");

+     let statusLabel = siteStatus;

+     if (listedSite && listedSite !== report.site) {

+       statusLabel += ` via ${listedSite}`;

+       siteContainer.querySelector(".forget").disabled = true;

+     }

+     let status = siteContainer.querySelector(".status");

+     status.classList.add(siteStatus);

+     status.textContent = statusLabel;

+   } else {

+     document.querySelector("#site .status").textContent = "";

+   }

+ 

    let noscript = scriptsCount === 0;

    document.body.classList.toggle("empty", noscript);

  }

@@ -94,11 +94,14 @@ 

    display: initial;

  }

  

+ .status {

+   margin: .2em;

+ }

  

- button.whitelist {

+ button.whitelist, .status.whitelisted {

    color: #080;

  }

- button.blacklist {

+ button.blacklist, .status.blacklisted {

    color: #800;

  }

  button.forget {

@@ -40,6 +40,9 @@ 

  					error = "Only one single trailing path wildcard (/*) allowed";

  				}

  			} catch (e) {

+ 				if (/^https?:\/\/\*\./.test(url)) {

+ 					return this.malformedUrl(url.replace("*.", ""));

+ 				}

  				error = "Invalid URL";

  				if (url && !url.includes("://")) error += ": missing protocol, either http:// or https://";

  				else if (url.endsWith("://")) error += ": missing domain name";

@@ -45,10 +45,10 @@ 

          <h3>Settings</h3>

      </div>

      <div id="widgets">

-       <fieldset id="section-lists"><legend>Allow or block scripts matching the following URLs ("*" matches any path)</legend>

+       <fieldset id="section-lists"><legend>Allow or block scripts matching the following URLs ("*."" matches any subdomain, "/*" matches any path)</legend>

          <label>Type a new whitelist / blacklist entry:</label>

          <div id="new-site">

-           <input type="text" id="site" value="" placeholder="https://www.gnu.org/*">

+           <input type="text" id="site" value="" placeholder="https://*.gnu.org/*">

            <button id="cmd-whitelist-site" class="white" title="Whitelist this site" default>Whitelist</button>

            <button id="cmd-blacklist-site" class="red" title="Blacklist this site">Blacklist</button>

          </div>

file modified
+53 -45
@@ -134,6 +134,10 @@ 

  	template.url = url;

  	template.site = ListStore.siteItem(url);

  	template.siteStatus = listManager.getStatus(template.site);

+ 	let list = {"whitelisted": whitelist, "blacklisted": blacklist}[template.siteStatus];

+ 	if (list) {

+ 		template.listedSite = ListManager.siteMatch(template.site, list);

+ 	}

  	return template;

  }

  
@@ -326,7 +330,7 @@ 

  			if (m[action]) {

  				let [key] = m[action];

  				if (m.site) {

- 					key = ListStore.siteItem(key);

+ 					key = ListStore.siteItem(m.site);

  				} else {

  					key = ListStore.inlineItem(key) || key;

  				}
@@ -745,12 +749,12 @@ 

  	let scriptName = url.split("/").pop();

  	if (whitelisted) {

  		if (tabId !== -1) {

- 			let site = ListStore.siteItem(url);

+ 			let site = ListManager.siteMatch(url, whitelist);

  			// Accept without reading script, it was explicitly whitelisted

- 			let reason = whitelist.contains(site)

+ 			let reason = site

  				? `All ${site} whitelisted by user`

  				: "Address whitelisted by user";

- 			addReportEntry(tabId, url, {"whitelisted": [url, reason], url});

+ 			addReportEntry(tabId, url, {"whitelisted": [site || url, reason], url});

  		}

  		if (response.startsWith("javascript:"))

  			return result(response);
@@ -801,42 +805,28 @@ 

  	}

  }

  

- /**

- * 	Tests if a request is google analytics or not

- */

- function test_GA(a){ // TODO: DRY me

- 	// This is just an HTML page

- 	if(a.url == 'https://www.google.com/analytics/#?modal_active=none'){

- 		return false;

- 	}

- 	else if(a.url.match(/https:\/\/www\.google\.com\/analytics\//g)){

- 		dbg_print("%c Google analytics (1)","color:red");

- 		return {cancel: true};

+ function blockGoogleAnalytics(request) {

+ 	let {url} = request;

+ 	let res = {};

+ 	if (url === 'https://www.google-analytics.com/analytics.js' ||

+ 		/^https:\/\/www\.google\.com\/analytics\/[^#]/.test(url)

+ 	) {

+ 		res.cancel = true;

  	}

- 	else if(a.url == 'https://www.google-analytics.com/analytics.js'){

- 		dbg_print("%c Google analytics (2)","color:red");

- 		return {cancel: true};

- 	}

- 	else if(a.url == 'https://www.google.com/analytics/js/analytics.min.js'){

- 		dbg_print("%c Google analytics (3)","color:red");

- 		return {cancel: true};

- 	}

- 	else return false;

+ 	return res;

  }

  

- /**

- *	A callback that every type of request invokes.

- */

- function block_ga(a){

- 	var GA = test_GA(a);

- 	if(GA != false){

- 		return GA;

- 	}

- 	else return {};

+ async function blockBlacklistedScripts(request)  {

+ 	let {url, tabId, documentUrl} = request;

+ 	url = ListStore.urlItem(url);

+ 	let status = listManager.getStatus(url);

+ 	if (status !== "blacklisted") return {};

+ 	let blacklistedSite = ListManager.siteMatch(url, blacklist);

+ 	await addReportEntry(tabId, url, {url: documentUrl,

+ 		"blacklisted": [url, /\*/.test(blacklistedSite) ? `User blacklisted ${blacklistedSite}` : "Blacklisted by user"]});

+ 	return {cancel: true};

  }

  

- 

- 

  /**

  *	This listener gets called as soon as we've got all the HTTP headers, can guess

  * content type and encoding, and therefore correctly parse HTML documents
@@ -855,30 +845,37 @@ 

  		url = ListStore.urlItem(url);

  		let site = ListStore.siteItem(url);

  

- 		let blacklistedSite = blacklist.contains(site);

+ 		let blacklistedSite = ListManager.siteMatch(site, blacklist);

  		let blacklisted = blacklistedSite || blacklist.contains(url);

- 		let topUrl = request.frameAncestors && request.frameAncestors.pop() || documentUrl;

+ 		let topUrl = type === "sub_frame" && request.frameAncestors && request.frameAncestors.pop() || documentUrl;

  

  		if (blacklisted) {

  			if (type === "script") {

- 				// abort the request before the response gets fetched

- 				addReportEntry(tabId, url, {url: topUrl,

- 					"blacklisted": [url, blacklistedSite ? `User blacklisted ${site}` : "Blacklisted by user"]});

+ 				// this shouldn't happen, because we intercept earlier in blockBlacklistedScripts()

  				return ResponseProcessor.REJECT;

  			}

+ 			if (type === "main_frame") { // we handle the page change here too, since we won't call edit_html()

+ 				activityReports[tabId] = await createReport({url: fullUrl, tabId});

+ 				// Go on without parsing the page: it was explicitly blacklisted

+ 				let reason = blacklistedSite

+ 					? `All ${blacklistedSite} blacklisted by user`

+ 					: "Address blacklisted by user";

+ 				await addReportEntry(tabId, url, {"blacklisted": [blacklistedSite || url, reason], url: fullUrl});

+ 			}

  			// use CSP to restrict JavaScript execution in the page

  			request.responseHeaders.unshift({

  				name: `Content-security-policy`,

- 				value: `script-src '${blacklistedSite ? 'self' : 'none'}';`

+ 				value: `script-src 'none';`

  			});

+ 			return {responseHeaders: request.responseHeaders}; // let's skip the inline script parsing, since we block by CSP

  		} else {

- 			let whitelistedSite = whitelist.contains(site);

+ 			let whitelistedSite = ListManager.siteMatch(site, whitelist);

  			let whitelisted = response.whitelisted = whitelistedSite || whitelist.contains(url);

  			if (type === "script") {

  				if (whitelisted) {

  					// accept the script and stop processing

  					addReportEntry(tabId, url, {url: topUrl,

- 						"whitelisted": [url, whitelistedSite ? `User whitelisted ${site}` : "Whitelisted by user"]});

+ 						"whitelisted": [url, whitelistedSite ? `User whitelisted ${whitelistedSite}` : "Whitelisted by user"]});

  					return ResponseProcessor.ACCEPT;

  				} else {

  					let scriptInfo = await ExternalLicenses.check({url: fullUrl, tabId, frameId, documentUrl});
@@ -1189,11 +1186,21 @@ 

  		"web_manifest", "websocket", "xbl", "xml_dtd", "xmlhttprequest", "xslt",

  		"other"

  	];

- 	browser.webRequest.onBeforeRequest.addListener(

- 		block_ga,

+ 	browser.webRequest.onBeforeRequest.addListener(blockGoogleAnalytics,

  		{urls: ["<all_urls>"], types: all_types},

  		["blocking"]

  	);

+ 	browser.webRequest.onBeforeRequest.addListener(blockBlacklistedScripts,

+ 		{urls: ["<all_urls>"], types: ["script"]},

+ 		["blocking"]

+ 	);

+ 	browser.webRequest.onResponseStarted.addListener(request => {

+ 		let {tabId} = request;

+ 		let report = activityReports[tabId];

+ 		if (report) {

+ 			updateBadge(tabId, activityReports[tabId]);

+ 		}

+ 	}, {urls: ["<all_urls>"], types: ["main_frame"]});

  

  	// Analyzes all the html documents and external scripts as they're loaded

  	ResponseProcessor.install(ResponseHandler);
@@ -1208,6 +1215,7 @@ 

  			editHtml,

  			handle_script,

  			ExternalLicenses,

+ 			ListManager, ListStore, Storage,

  		};

  		// create or focus the autotest tab if it's a debugging session

  		if ((await browser.management.getSelf()).installType === "development") {

file modified
+39 -1
@@ -38,15 +38,53 @@ 

    let licensed = `// @license ${license.magnet} ${license.id}\n${nontrivial}\n// @license-end`;

    let unknownLicensed = `// @license ${unknownLicense.magnet} ${unknownLicense.id}\n${nontrivial}\n// @license-end`;

    let malformedLicensed = `// @license\n${nontrivial}`;

- 

    let tab, documentUrl;

  

    beforeAll(async () => {

      let url = browser.extension.getURL("/test/resources/index.html");

      tab = (await browser.tabs.query({url}))[0] || (await browser.tabs.create({url}));

      documentUrl = url;

+ 

    });

  

+   describe("The whitelist/blacklist manager", () => {

+     let {ListManager, ListStore, Storage} = LibreJS;

+     let lm = new ListManager(new ListStore("_test.whitelist", Storage.CSV), new ListStore("_test.blacklist", Storage.CSV), new Set());

+     let forgot = ["http://formerly.whitelist.ed/", "http://formerly.blacklist.ed/"];

+ 

+     beforeAll(async () => {

+       await lm.whitelist("https://fsf.org/*", "https://*.gnu.org/*", forgot[0]);

+       await lm.blacklist("https://*.evil.gnu.org/*", "https://verybad.com/*", forgot[1]);

+     });

+ 

+     it("Should handle basic CRUD operations", async () => {

+        expect(lm.getStatus(forgot[0])).toBe("whitelisted");

+        expect(lm.getStatus(forgot[1])).toBe("blacklisted");

+ 

+        await lm.forget(...forgot);

+ 

+        for (let url of forgot) {

+          expect(lm.getStatus(url)).toBe("unknown");

+        }

+     });

+ 

+     it("Should support full path wildcards", () => {

+       expect(lm.getStatus("https://unknown.org")).toBe("unknown");

+       expect(lm.getStatus("https://fsf.org/some/path")).toBe("whitelisted");

+       expect(lm.getStatus("https://fsf.org/")).toBe("whitelisted");

+       expect(lm.getStatus("https://fsf.org")).toBe("whitelisted");

+       expect(lm.getStatus("https://subdomain.fsf.org")).toBe("unknown");

+       expect(lm.getStatus("https://verybad.com/some/other/path?with=querystring")).toBe("blacklisted");

+     });

+     it("Should support subdomain wildcards", () => {

+       expect(lm.getStatus("https://gnu.org")).toBe("whitelisted");

+       expect(lm.getStatus("https://www.gnu.org")).toBe("whitelisted");

+       expect(lm.getStatus("https://evil.gnu.org")).toBe("blacklisted");

+       expect(lm.getStatus("https://more.evil.gnu.org")).toBe("blacklisted");

+       expect(lm.getStatus("https://more.evil.gnu.org/some/evil/path?too")).toBe("blacklisted");

+     });

+   })

+ 

    describe("The external script source processor", () => {

      let url = "https://www.gnu.org/mock-script.js";

  

Adds support for wildcarding subdomains and/or paths (e.g. https://*.gnu.org/* would match any .gnu.org domain, no matter the path), both in the back-end and in the front-end (main panel and preference panel).
Includes testing for whitelisting and blacklisting with support for wildcards.
Fixed some asynchronous call bugs along the way, discovered running the newly added automated tests :)

2 new commits added

  • Fixed UI inconsistencies when whitelisting/blacklisting through wide wildcard matching.
  • Fix Storage.js throwing exception unless loaded as a module.
5 years ago

Just added a couple bug fixes.
The described UI inconsistencies are about showing the site as whitelisted or blacklisted (correctly) when this happens through a match with a list entry wider than the site itself (e.g. https:/.gnu.org/ vs the narrower automatic site entry https://gnu.org/*), but not providing any information about the actual match and instead showing an enabled "Forget" button, which actually does nothing. This is confusing as best (as noted by Ruben), therefore the fix: showing information about the matching entry, if it's different than the site itself, and disabling the "Forget" button in this case (you can always forget by removing the wider entry, if that's what you really want).

1 new commit added

  • More consistent and efficient blacklisting.
5 years ago

In the just added commit, the two blacklisting-related changes discussed in the last meeting:

  1. Blocking blacklisted scripts before we hit the network, in an onBeforeRequest listener

  2. Prevent parsing inline scripts when the page is blacklisted (relying on CSP to block scripting).

Pull-Request has been merged by quidam

4 years ago