#29 Fixes for license matching, multiple inline scripts, intrinsic event handling
Merged 5 years ago by quidam. Opened 5 years ago by quidam.
quidam/librejs many-fixes  into  master

file modified
+2 -2
@@ -335,7 +335,7 @@ 

  		"onwebkitanimationiteration": true,

  		"onwebkitanimationstart": true,

  		"onwebkittransitionend": true,

- 		"onerror": true,

+ 		"onerror": false,

  		"onafterprint": true,

  		"onbeforeprint": true,

  		"onbeforeunload": true,
@@ -808,7 +808,7 @@ 

  		"window": false,

  		"document": true,

  		"location": false,

- 		"top": true,

+ 		"top": false,

  		"netscape": true,

  		"Node": true,

  		"Document": true,

file modified
+56 -58
@@ -61,29 +61,6 @@ 

  	return shaObj.getHash("HEX");

  }

  

- 

- // the list of all available event attributes

- var intrinsic_events = [

-     "onload",

-     "onunload",

-     "onclick",

-     "ondblclick",

-     "onmousedown",

-     "onmouseup",

-     "onmouseovr",

-     "onmousemove",

-     "onmouseout",

-     "onfocus",

-     "onblur",

-     "onkeypress",

-     "onkeydown",

-     "onkeyup",

-     "onsubmit",

-     "onreset",

-     "onselect",

-     "onchange"

- ];

- 

  /*

  	NONTRIVIAL THINGS:

  	- Fetch
@@ -516,6 +493,7 @@ 

  			return script.charAt(end+i) == "[";

  		}

  		var error_count = 0;

+ 		var defines_functions = false;

  		while(toke !== undefined && toke.type != acorn.tokTypes.eof){

  			if(toke.type.keyword !== undefined){

  				//dbg_print("Keyword:");
@@ -524,10 +502,8 @@ 

  				// This type of loop detection ignores functional loop alternatives and ternary operators

  

  				if(toke.type.keyword == "function"){

- 					dbg_print("%c NONTRIVIAL: Function declaration.","color:red");

- 					if(DEBUG == false){

- 						return [false,"NONTRIVIAL: Function declaration."];

- 					}

+ 					dbg_print("%c NOTICE: Function declaration.","color:green");

+ 					defines_functions = true;

  				}

  

  				if(loopkeys[toke.type.keyword] !== undefined){
@@ -558,13 +534,6 @@ 

  						}

  					}

  				}else if(status === undefined){// is the identifier user defined?

- 					// Are arguments being passed to a user defined variable?

- 					if(being_called(toke.end)){

- 						dbg_print("%c NONTRIVIAL: User defined variable '"+toke.value+"' called as function","color:red");

- 						if(DEBUG == false){

- 							return [false,"NONTRIVIAL: User defined variable '"+toke.value+"' called as function"];

- 						}

- 					}

  					// Is there bracket suffix notation?

  					if(is_bsn(toke.end)){

  						dbg_print("%c NONTRIVIAL: Bracket suffix notation on variable '"+toke.value+"'","color:red");
@@ -586,7 +555,10 @@ 

  		}

  

  		dbg_print("%cAppears to be trivial.","color:green;");

- 		return [true,"Script appears to be trivial."];

+ 		if (defines_functions === true)

+ 			return [true,"Script appears to be trivial but defines functions."];

+ 		else

+ 			return [true,"Script appears to be trivial."];

  }

  

  
@@ -644,15 +616,23 @@ 

  

  

  function validateLicense(matches) {

- 	if (!(Array.isArray(matches) && matches.length === 4)){

+ 	if (!(Array.isArray(matches) && matches.length >= 4)){

  		return [false, "Malformed or unrecognized license tag."];

  	}

  	let [all, tag, link, id] = matches;

- 	let license = licenses[id];

+ 	let license = null;

+ 	if (licenses[id])

+ 		license = licenses[id];

+ 	for (let key in licenses){

+ 		if (licenses[key]["Magnet link"] === link)

+ 			license = licenses[key];

+ 		if (licenses[key]["URL"] === link)

+ 			license = licenses[key];

+ 	}

  	if(!license){

  		return [false, `Unrecognized license "${id}"`];

  	}

- 	if(license["Magnet link"] != link){

+ 	if (!(license["Magnet link"] === link || license["URL"] === link)){

  		return [false, `License magnet link does not match for "${id}".`];

  	}

  	return [true, `Recognized license: "${id}".`];
@@ -696,7 +676,10 @@ 

  			editedSrc += s;

  		} else {

  			partsDenied = true;

- 			editedSrc += `\n/*\nLIBREJS BLOCKED: ${message}\n*/\n`;

+ 			if (s.startsWith("javascript:"))

+ 				editedSrc += `# LIBREJS BLOCKED: ${message}`;

+ 			else

+ 				editedSrc += `/*\nLIBREJS BLOCKED: ${message}\n*/`;

  		}

  		reason += `\n${message}`;

  		return trivial;
@@ -776,7 +759,10 @@ 

  				: "Address whitelisted by user";

  			addReportEntry(tabId, url, {"whitelisted": [url, reason], url});

  		}

- 		return result(`/* LibreJS: script whitelisted by user preference. */\n${response}`);

+ 		if (response.startsWith("javascript:"))

+ 			return result(response);

+ 		else

+ 			return result(`/* LibreJS: script whitelisted by user preference. */\n${response}`);

  	}

  

  	let [verdict, editedSource, reason] = license_read(response, scriptName, index === -2);
@@ -793,10 +779,20 @@ 

  	let scriptSource = verdict ? response : editedSource;

  	switch(category) {

  		case "blacklisted":

+ 			if (response.startsWith("javascript:"))

+ 				return result(`# LibreJS: script ${category} by user.`);

+ 			else

+ 				return result(`/* LibreJS: script ${category} by user. */`);

  		case "whitelisted":

- 			return result(`/* LibreJS: script ${category} by user. */\n${scriptSource}`);

+ 			if (response.startsWith("javascript:"))

+ 				return result(scriptSource);

+ 			else

+ 				return result(`/* LibreJS: script ${category} by user. */\n${scriptSource}`);

  		default:

- 			return result(`/* LibreJS: script ${category}. */\n${scriptSource}`);

+                         if (response.startsWith("javascript:"))

+ 				return result(scriptSource);

+ 			else

+ 				return result(`/* LibreJS: script ${category}. */\n${scriptSource}`);

  	}

  }

  
@@ -1056,20 +1052,21 @@ 

  		let modified = false;

  

  		// Deal with intrinsic events

+ 		let intrinsecindex = 0;

  		for (let element of html_doc.all) {

- 			let attributes = element.attributes;

- 			for (let event of intrinsic_events) {

- 				if (event in attributes) {

- 					let attr = attributes[event];

+ 			for (let attr of element.attributes){

+ 				if (attr.name.startsWith("on") || (attr.name === "href" && attr.value.startsWith("javascript:"))){

+ 					intrinsecindex++;

  					try {

- 						let edited = await get_script(attr.value, `Intrinsic event [${event}]`);

- 					  if (edited) {

- 							let value = edited[0];

- 							if (value !== attr.value) {

- 								modified = true;

- 								attr.value = value;

+ 						let url = `${documentUrl}# Intrinsic event ${intrinsecindex} [${attr.name}]`;

+ 						let edited = await get_script(attr.value, url, tabId, whitelist.contains(url));

+ 							if (edited) {

+ 								let value = edited;

+ 								if (value !== attr.value) {

+ 									modified = true;

+ 									attr.value = value;

+ 								}

  							}

- 						}

  					} catch (e) {

  						console.error(e);

  					}
@@ -1080,6 +1077,7 @@ 

  		let modifiedInline = false;

  		for(let i = 0, len = scripts.length; i < len; i++) {

  			let script = scripts[i];

+ 			let url = `${documentUrl}# script ${i}`;

  			if (!script.src && !(script.type && script.type !== "text/javascript")) {

  				let edited = await get_script(script.textContent, url, tabId, whitelisted, i);

  				if (edited) {
@@ -1091,11 +1089,11 @@ 

  					}

  				}

  			}

- 			if (modified) {

- 				return modifiedInline

- 					? await remove_noscripts(html_doc)

- 					: doc2HTML(html_doc);

- 			}

+ 		}

+ 		if (modified) {

+ 			return modifiedInline

+ 				? await remove_noscripts(html_doc)

+ 				: doc2HTML(html_doc);

  		}

  	}

  	return null;

The changes are described in each commit head, but here is a list:

  • Re-implement intrinsic event iteration
  • Defining or calling functions does not qualify as nontrivial
  • More generalized license matching
  • Correctly handle multiple inline scripts, multiple intrinsic events, whitelisting/blacklisting and listing in the panel.
  • Tokens onerror and top should not be considered nontrivial

Giorgio said:
I'm not sure of why Pagure doesn't seem to allow me to comment on the PR
(even if it does show an enabled green "Merge" button, instead), but the
changes look good to me, thanks!

Pull-Request has been merged by quidam

5 years ago
Metadata