#34 Inline widget and de-duplication for inline scripts.
Merged 5 years ago by quidam. Opened 5 years ago by gioma1.

file modified
+8 -1
@@ -50,6 +50,14 @@ 

    */

    getStatus(key, defValue = "unknown") {

      let {blacklist, whitelist} = this.lists;

+     let inline = ListStore.inlineItem(key);

+     if (inline) {

+       return blacklist.contains(inline)

+         ? "blacklisted"

+         : whitelist.contains(inline) ? "whitelisted"

+         : defValue;

+     }

+ 

      let match = key.match(/\(([^)]+)\)(?=[^()]*$)/);

      if (!match) {

        let url = ListStore.urlItem(key);
@@ -61,7 +69,6 @@ 

      }

  

    	let [hashItem, srcHash] = match; // (hash), hash

- 

    	return blacklist.contains(hashItem) ? "blacklisted"

    			: this.builtInHashes.has(srcHash) || whitelist.contains(hashItem)

          ? "whitelisted"

file modified
+16 -1
@@ -66,6 +66,13 @@ 

      });

    }

  

+   static inlineItem(url) {

+     // here we simplify and hash inline script references

+     return url.startsWith("inline:") ? url

+       : url.startsWith("view-source:")

+         && url.replace(/^view-source:[\w-+]+:\/+([^/]+).*#line\d+/,"inline://$1#")

+               .replace(/\n[^]*/, s => s.replace(/\s+/g, ' ').substring(0, 16) + "…" + hash(s.trim()));

+   }

    static hashItem(hash) {

      return hash.startsWith("(") ? hash : `(${hash})`;

    }
@@ -125,6 +132,14 @@ 

      return this.items.has(item);

    }

  }

+ 

+ var jssha = require('jssha');

+ function hash(source){

+ 	var shaObj = new jssha("SHA-256","TEXT")

+ 	shaObj.update(source);

+ 	return shaObj.getHash("HEX");

+ }

+ 

  if (typeof module === "object") {

-   module.exports = { ListStore, Storage };

+   module.exports = { ListStore, Storage, hash };

  }

@@ -73,7 +73,10 @@ 

            </p>

            <ul>

              <li id="li-template">

-                 <a class="script-url" href="#"></a>:

+                 <button class="toggle-source show" title="Show code inline">Show</button>

+                 <button class="toggle-source hide" title="Hide code inline">Hide</button>

+                 <a class="script-url" href="#" target="librejs_viewsource"></a>:

+                 <pre class="source"></pre>

                  <p class="reason"></p>

                  <div class="buttons">

                    <button class="whitelist">Whitelist</button>
@@ -81,6 +84,7 @@ 

                    <button class="forget">Forget</button>

                    <button class="forget" name="*">Forget <span class="domain"></span></button>

                  </div>

+ 

                </li>

            </ul>

          </div>

@@ -55,6 +55,19 @@ 

  

  document.querySelector("#info").addEventListener("click", e => {

  	let button = e.target;

+   if (button.tagName === "A") {

+     setTimeout(close, 100);

+     return;

+   }

+   if (button.matches(".toggle-source")) {

+     let parent = button.parentNode;

+     if (!parent.querySelector(".source").textContent) {

+       parent.querySelector("a").click();

+     } else {

+       parent.classList.toggle("visible");

+     }

+     return;

+   }

  	if (!button.matches(".buttons > button")) return;

  	let li = button.closest("li");

  	let entry = li && li._scriptEntry || [currentReport.url, "Page's site"];
@@ -111,7 +124,7 @@ 

     container.classList.add("empty");

   }

   // generate list

-  let viewSourceToHuman = /^view-source:(.*)#line(\d+)\(([^)]*)\).*/;

+  let viewSourceToHuman = /^view-source:(.*)#line(\d+)\(([^)]*)\)[^]*/;

   for (let entry of entries) {

     let [scriptId, reason] = entry;

  	 let li = liTemplate.cloneNode(true);
@@ -119,6 +132,11 @@ 

  	 a.href = scriptId.split("(")[0];

     if (scriptId.startsWith("view-source:")) {

       a.target ="LibreJS-ViewSource";

+      let source = scriptId.match(/\n([^]*)/);

+      if (source)  {

+        li.querySelector(".source").textContent = source[1];

+        li.querySelector(".toggle-source").style.display = "inline";

+      }

       scriptId = scriptId.replace(viewSourceToHuman, "$3 at line $2 of $1");

     }

     a.textContent = scriptId;

@@ -109,6 +109,28 @@ 

    color: #888 !important;

  }

  

+ button.toggle-source {

+   color: #004;

+   margin-right: .5em;

+   width: 4em;

+   overflow: hide;

+ }

+ 

+ pre.source {

+   display: none;

+   background: white;

+   border: 1px solid #444;

+   padding: .5em;

+   overflow: auto;

+   max-height: 8em;

+   white-space: pre-wrap;

+ }

+ 

+ button.hide { display: none  }

+ .visible > button.show { display: none !important}

+ .visible > pre.source { display: block }

+ .visible > button.hide { display: initial }

+ 

  span.accepted, span.blocked {

      color:#008e00;

      font-size:145%;

file modified
+50 -46
@@ -22,10 +22,9 @@ 

  

  var acorn = require('acorn');

  var acornLoose = require('acorn-loose');

- var jssha = require('jssha');

  var legacy_license_lib = require("./legacy_license_check.js");

  var {ResponseProcessor} = require("./bg/ResponseProcessor");

- var {Storage, ListStore} = require("./common/Storage");

+ var {Storage, ListStore, hash} = require("./common/Storage");

  var {ListManager} = require("./bg/ListManager");

  var {ExternalLicenses} = require("./bg/ExternalLicenses");

  
@@ -51,16 +50,6 @@ 

  	}

  }

  

- /**

- *	Wrapper around crypto lib

- *

- */

- function hash(source){

- 	var shaObj = new jssha("SHA-256","TEXT")

- 	shaObj.update(source);

- 	return shaObj.getHash("HEX");

- }

- 

  /*

  	NONTRIVIAL THINGS:

  	- Fetch
@@ -336,7 +325,11 @@ 

  		for (let action of ["whitelist", "blacklist", "forget"]) {

  			if (m[action]) {

  				let [key] = m[action];

- 				if (m.site) key = ListStore.siteItem(key);

+ 				if (m.site) {

+ 					key = ListStore.siteItem(key);

+ 				} else {

+ 					key = ListStore.inlineItem(key) || key;

+ 				}

  				await listManager[action](key);

  				update = true;

  			}
@@ -776,23 +769,20 @@ 

  	let report = activityReports[tabId] || (activityReports[tabId] = await createReport({tabId}));

  	updateBadge(tabId, report, !verdict);

  	let category = await addReportEntry(tabId, sourceHash, {"url": domain, [verdict ? "accepted" : "blocked"]: [url, reason]});

- 	let scriptSource = verdict ? response : editedSource;

  	switch(category) {

  		case "blacklisted":

- 			if (response.startsWith("javascript:"))

- 				return result(`# LibreJS: script ${category} by user.`);

- 			else

- 				return result(`/* LibreJS: script ${category} by user. */`);

+ 			editedSource = `/* LibreJS: script ${category} by user. */`;

+ 			return result(response.startsWith("javascript:")

+ 				? `javascript:void(${encodeURIComponent(editedSource)})` : editedSource);

  		case "whitelisted":

- 			if (response.startsWith("javascript:"))

- 				return result(scriptSource);

- 			else

- 				return result(`/* LibreJS: script ${category} by user. */\n${scriptSource}`);

+ 			return result(response.startsWith("javascript:")

+ 				? response : `/* LibreJS: script ${category} by user. */\n${response}`);

  		default:

-                         if (response.startsWith("javascript:"))

- 				return result(scriptSource);

- 			else

- 				return result(`/* LibreJS: script ${category}. */\n${scriptSource}`);

+ 			let scriptSource = verdict ? response : editedSource;

+       return result(response.startsWith("javascript:")

+ 				? (verdict ? scriptSource : `javascript:void(/* ${scriptSource} */)`)

+ 				: `/* LibreJS: script ${category}. */\n${scriptSource}`

+ 			);

  	}

  }

  
@@ -1053,17 +1043,20 @@ 

  	let findLine = finder => finder.test(html) && html.substring(0, finder.lastIndex).split(/\n/).length || 0;

  	if (read_metadata(meta_element) || license) {

  		console.log("Valid license for intrinsic events found");

- 		let line = 0;

+ 		let line, extras;

  		if (meta_element) {

  		  line = findLine(/id\s*=\s*['"]?LibreJS-info\b/gi);

+ 			extras = "(0)";

  		} else if (license) {

  			line = html.substring(0, html.indexOf(first_script_src)).split(/\n/).length;

+ 			extras = "\n" + first_script_src;

  		}

- 		let viewUrl = line ? `view-source:${documentUrl}#line${line}(<${meta_element ? meta_element.tagName : "SCRIPT"}>)(0)` : url;

+ 		let viewUrl = line ? `view-source:${documentUrl}#line${line}(<${meta_element ? meta_element.tagName : "SCRIPT"}>)${extras}` : url;

  		addReportEntry(tabId, url, {url, "accepted":[viewUrl, `Global license for the page: ${license}`]});

  		// Do not process inline scripts

  		scripts = [];

  	} else {

+ 		let dejaVu = new Map(); // deduplication map & edited script cache

  		let modified = false;

  		// Deal with intrinsic events

  		let intrinsecindex = 0;
@@ -1071,21 +1064,27 @@ 

  		for (let element of html_doc.all) {

  			let line = -1;

  			for (let attr of element.attributes) {

- 				if (attr.name.startsWith("on") || (attr.name === "href" && attr.value.toLowerCase().startsWith("javascript:"))){

+ 				let {name, value} = attr;

+ 				value = value.trim();

+ 				if (name.startsWith("on") || (name === "href" && value.toLowerCase().startsWith("javascript:"))){

  					intrinsecindex++;

  					if (line === -1) {

  						line = findLine(intrinsicFinder);

  					}

  					try {

- 						let url = `view-source:${documentUrl}#line${line}(<${element.tagName} ${attr.name}>)(${intrinsicIndex})`;

- 						let edited = await get_script(attr.value, url, tabId, whitelist.contains(url));

- 							if (edited) {

- 								let value = edited;

- 								if (value !== attr.value) {

- 									modified = true;

- 									attr.value = value;

- 								}

- 							}

+ 						let key = `<${element.tagName} ${name}="${value}">`;

+ 						let edited;

+ 						if (dejaVu.has(key)) {

+ 							edited = dejaVu.get(key);

+ 						} else {

+ 							let url = `view-source:${documentUrl}#line${line}(<${element.tagName} ${name}>)\n${value.trim()}`;

+ 							if (name === "href") value = decodeURIComponent(value);

+ 							edited = await get_script(value, url, tabId, whitelist.contains(url));						dejaVu.set(key, edited);

+ 						}

+ 						if (edited && edited !== value) {

+ 							modified = true;

+ 							attr.value = edited;

+ 						}

  					} catch (e) {

  						console.error(e);

  					}
@@ -1099,14 +1098,19 @@ 

  			let script = scripts[i];

  			let line = findLine(scriptFinder);

  			if (!script.src && !(script.type && script.type !== "text/javascript")) {

- 				let source = script.textContent;

- 				let url = `view-source:${documentUrl}#line${line}(<SCRIPT>)(${i})`;

- 				let edited = await get_script(source, url, tabId, whitelisted, i);

- 				if (edited) {

- 					let edited_source = edited[0];

- 					let unedited_source = source.trim();

- 					if (edited_source.trim() !== unedited_source) {

- 						script.textContent = edited_source;

+ 				let source = script.textContent.trim();

+ 				let editedSource;

+ 				if (dejaVu.has(source)) {

+ 					editedSource = dejaVu.get(source);

+ 				} else {

+ 					let url = `view-source:${documentUrl}#line${line}(<SCRIPT>)\n${source}`;

+ 					let edited = await get_script(source, url, tabId, whitelisted, i);

+ 					editedSource = edited && edited[0].trim();

+ 					dejaVu.set(url, editedSource);

+ 				}

+ 				if (editedSource) {

+ 					if (source !== editedSource) {

+ 						script.textContent = editedSource;

  						modified = modifiedInline = true;

  					}

  				}

file modified
+1 -1
@@ -2,7 +2,7 @@ 

    "manifest_version": 2,

    "name": "GNU LibreJS [webExtensions]",

    "short_name": "LibreJS [experimental]",

-   "version": "7.19rc3",

+   "version": "7.19rc4",

    "author": "various",

    "description": "Only allows free and/or trivial Javascript to run.",

    "applications": {

file modified
+19
@@ -94,6 +94,10 @@ 

      let addScript = (html, script, before = "</head>") =>

        html.replace(before, `<script>${script}</script>${before}`);

  

+     let addToBody = (html, fragment) => html.replace("</body>", `${fragment}</body>`);

+ 

+     let jsUrl = js => `javascript:${encodeURIComponent(js)}`;

+ 

      function extractScripts(html, def = "") {

        let matches = html && html.match(/<script>[^]*?<\/script>/g);

        return matches && matches.join("") || def;
@@ -165,6 +169,21 @@ 

        expect(scripts).toContain(licensed);

        expect(scripts.replace(licensed, "")).not.toContain(nontrivial);

      });

+ 

+     it("should correctly process (de)duplicated inline scripts", async () => {

+       let trivialAsUrl = jsUrl(trivial);

+       let nontrivialAsUrl = jsUrl(nontrivial);

+       let a = (url, label) => `<a href="${url}">${label}</a>`;

+       let mixedPage = `<body></body>`;

+       for (let dup = 0; dup < 3; dup++) {

+         mixedPage = addToBody(mixedPage, a(trivialAsUrl, `Trivial #${dup}`));

+         mixedPage = addToBody(mixedPage, a(nontrivialAsUrl, `Nontrivial #${dup}`));

+       }

+       let processed = await processHtml(mixedPage);

+       expect(processed).not.toBeNull();

+       expect(processed).toContain(trivialAsUrl);

+       expect(processed).not.toContain(nontrivialAsUrl);

+     });

    });

  

    describe("The external (Web Labels) license checker", () => {

As discussed in our last meeting, this provides a collapsed expandable widget to inspect the intercepted inline scripts (both attribute values and script blocks) in the main popup UI.
The inline scripts are also de-duplicated while scanned, providing an UI entry for the first instance only (to avoid unnecessary clutter) and caching the processing results (to avoid unnecessary CPU work).
Tests for (de)duplicate inline scripts in attributes have been provided as well.

1 new commit added

  • Generate code view widget for first inline script with global license too.
5 years ago

1 new commit added

  • Cosmetic changes to the inline code viewer toggle button.
5 years ago

Both the issues just discussed in our meeting are addressed by the latest 2 commits, respectively. Happy testing :)

1 new commit added

  • Fixed inline whitelisting regressions (issue #35).
5 years ago

1 new commit added

  • Fix for large scripts thrashing the black/white lists when added.
5 years ago

Pull-Request has been merged by quidam

5 years ago