#23 Fixes issue #22 and other @license tag parsing problems
Merged 5 years ago by quidam. Opened 5 years ago by gioma1.

file modified
+26 -9
@@ -33,7 +33,7 @@ 

  

    static install(handler, types = ["main_frame", "sub_frame", "script"]) {

      if (listeners.has(handler)) return false;

-     let listener = 

+     let listener =

        async request =>  await new ResponseTextFilter(request).process(handler);

      listeners.set(handler, listener);

      webRequestEvent.addListener(
@@ -80,7 +80,7 @@ 

        if (handler.post) handler = handler.post;

        if (typeof handler !== "function") ResponseProcessor.ACCEPT;

      }

-     

+ 

      let {requestId, responseHeaders} = request;

      let filter = browser.webRequest.filterResponseData(requestId);

      let buffer = [];
@@ -90,11 +90,29 @@ 

      };

  

      filter.onstop = async event => {

-       let decoder = metaData.createDecoder();

+ 

        let params = {stream: true};

-       response.text = buffer.map(

-         chunk => decoder.decode(chunk, params))

-         .join('');

+       // concatenate chunks

+       let size = buffer.reduce((sum, chunk, n) => sum + chunk.byteLength, 0)

+       let allBytes = new Uint8Array(size);

+       let pos = 0;

+       for (let chunk of buffer) {

+         allBytes.set(new Uint8Array(chunk), pos);

+         pos += chunk.byteLength;

+       }

+       buffer = null; // allow garbage collection

+       if (allBytes.indexOf(0) !== -1) {

+         console.debug("Warning: zeroes in bytestream, probable cached encoding mismatch.", request);

+         if (request.type === "script") {

+           console.debug("It's a script, trying to refetch it.");

+           response.text = await (await fetch(request.url, {cache: "reload", credentials: "include"})).text();

+         } else {

+           console.debug("It's a %s, trying to decode it as UTF-16.", request.type);

+           response.text = new TextDecoder("utf-16be").decode(allBytes);

+         }

+       } else {

+         response.text = metaData.createDecoder().decode(allBytes, {stream: true});

+       }

        let editedText = null;

        try {

          editedText = await handler(response);
@@ -108,10 +126,9 @@ 

          filter.write(new TextEncoder().encode(editedText));

        } else {

          // ... otherwise pass all the raw bytes through

-         for (let chunk of buffer) filter.write(chunk);

+         filter.write(allBytes);

        }

- 

-       filter.disconnect();

+       filter.close();

      }

  

      return metaData.forceUTF8() ? {responseHeaders} : ResponseProcessor.ACCEPT;;

file modified
+85 -88
@@ -599,6 +599,8 @@ 

  *	This can only determine if a script is bad, not if it's good

  *

  *	If it passes the intitial pass, it runs the full pass and returns the result

+ 

+ *	It returns an array of [flag (boolean, false if "bad"), reason (string, human readable report)]

  *

  */

  function evaluate(script,name){
@@ -636,27 +638,24 @@ 

  		return [flag,reason];

  	}

  

- 	var final = full_evaluate(script);

- //	final[1] = final[1] + "<br>";

- 	return final;

+ 	return full_evaluate(script);

  }

  

  

  

- function license_valid(matches){

- 	if(matches.length != 4){

- 		return [false, "malformed or unrecognized license tag"];

- 	}

- 	if(matches[1] != "@license"){

- 		return [false, "malformed or unrecognized license tag"];

+ function validateLicense(matches) {

+ 	if (!(Array.isArray(matches) && matches.length === 4)){

+ 		return [false, "Malformed or unrecognized license tag."];

  	}

- 	if(licenses[matches[3]] === undefined){

- 		return [false, "malformed or unrecognized license tag"];

+ 	let [all, tag, link, id] = matches;

+ 	let license = licenses[id];

+ 	if(!license){

+ 		return [false, `Unrecognized license "${id}"`];

  	}

- 	if(licenses[matches[3]]["Magnet link"] != matches[2]){

- 		return [false, "malformed or unrecognized license tag"];

+ 	if(license["Magnet link"] != link){

+ 		return [false, `License magnet link does not match for "${id}".`];

  	}

- 	return [true,"Recognized license as '"+matches[3]+"'<br>"];

+ 	return [true, `Recognized license: "${id}".`];

  }

  /**

  *
@@ -669,91 +668,89 @@ 

  *		reason text

  *	]

  */

- function license_read(script_src, name, external = false){

- 

- 	var reason_text = "";

- 

- 	var edited_src = "";

- 	var unedited_src = script_src;

- 	var nontrivial_status;

- 	var parts_denied = false;

- 	var parts_accepted = false;

- 	var license = legacy_license_lib.check(script_src);

- 	if(license != false){

- 		return [true,script_src,"Licensed under: "+license];

+ function license_read(scriptSrc, name, external = false){

+ 

+ 	let license = legacy_license_lib.check(scriptSrc);

+ 	if (license){

+ 		return [true, scriptSrc, `Licensed under: ${license}`];

  	}

- 	if (listManager.builtInHashes.has(hash(script_src))){

- 		return [true,script_src,"Common script known to be free software."];

+ 	if (listManager.builtInHashes.has(hash(scriptSrc))){

+ 		return [true, scriptSrc, "Common script known to be free software."];

  	}

- 	while(true){ // TODO: refactor me

- 		// TODO: support multiline comments

- 		var matches = /\/[\/\*]\s*?(@license)\s([\S]+)\s([\S]+$)/gm.exec(unedited_src);

- 		var empty = /[^\s]/gm.exec(unedited_src);

- 		if(empty == null){

- 			return [true,edited_src,reason_text];

+ 

+ 	let editedSrc = "";

+ 	let uneditedSrc = scriptSrc.trim();

+ 	let reason = uneditedSrc ? "" : "Empty source.";

+ 	let partsDenied = false;

+ 	let partsAccepted = false;

+ 

+ 	function checkTriviality(s) {

+ 		if (!s.trim()) {

+ 			return true; // empty, ignore it

  		}

- 		if(matches == null){

- 			if (external)

- 				return [false,edited_src,"External script with no known license."];

- 			else

- 				nontrivial_status = evaluate(unedited_src,name);

- 			if(nontrivial_status[0] == true){

- 				parts_accepted = true;

- 				edited_src += unedited_src;

- 			} else{

- 				parts_denied = true;

- 				edited_src += "\n/*\nLIBREJS BLOCKED:"+nontrivial_status[1]+"\n*/\n";

- 			}

- 			reason_text += "\n" + nontrivial_status[1];

+ 		let [trivial, message] = external ?

+ 			[false, "External script with no known license"]

+ 			: evaluate(s, name);

+ 		if (trivial) {

+ 			partsAccepted = true;

+ 			editedSrc += s;

+ 		} else {

+ 			partsDenied = true;

+ 			editedSrc += `\n/*\nLIBREJS BLOCKED: ${message}\n*/\n`;

+ 		}

+ 		reason += `\n${message}`;

+ 		return trivial;

+ 	}

  

- 			if(parts_denied == true && parts_accepted == true){

- 				reason_text = "Script was determined partly non-trivial after editing. (check source for details)\n"+reason_text;

- 			}

- 			if(parts_denied == true && parts_accepted == false){

- 				return [false,edited_src,reason_text];

- 			}

- 			else return [true,edited_src,reason_text];

+ 	while (uneditedSrc) {

+ 		let openingMatch = /\/[\/\*]\s*?(@license)\s+(\S+)\s+(\S+)\s*$/mi.exec(uneditedSrc);

+ 		if (!openingMatch) { // no license found, check for triviality

+ 			checkTriviality(uneditedSrc);

+ 			break;

+ 		}

  

+ 		let openingIndex = openingMatch.index;

+ 		if (openingIndex) {

+ 			// let's check the triviality of the code before the license tag, if any

+ 			checkTriviality(uneditedSrc.substring(0, openingIndex));

  		}

- 		// sponge

- 		dbg_print("undedited_src:");

- 		dbg_print(unedited_src);

- 		dbg_print(matches);

- 		dbg_print("chopping at " + matches["index"] + ".");

- 		var before = unedited_src.substring(0,matches["index"]);

- 		// sponge

- 		dbg_print("before:");

- 		dbg_print(before);

- 		if (external)

- 			nontrivial_status = [true, "External script with no known license"]

- 		else

- 			nontrivial_status = evaluate(before,name);

- 		if(nontrivial_status[0] == true){

- 			parts_accepted = true;

- 			edited_src += before;

- 		} else{

- 			parts_denied = true;

- 			edited_src += "\n/*\nLIBREJS BLOCKED:"+nontrivial_status[1]+"\n*/\n";

+ 		// let's check the actual license

+ 		uneditedSrc = uneditedSrc.substring(openingIndex);

+ 

+ 		let closureMatch = /\/([*/])\s*@license-end\b[^*/\n]*/i.exec(uneditedSrc);

+ 		if (!closureMatch) {

+ 			let msg = "ERROR: @license with no @license-end";

+ 			return [false, `\n/*\n ${msg} \n*/\n`, msg];

  		}

- 		unedited_src = unedited_src.substr(matches["index"],unedited_src.length);

- 		// TODO: support multiline comments

- 		var matches_end = /\/\/\s*?(@license-end)/gm.exec(unedited_src);

- 		if(matches_end == null){

- 			dbg_print("ERROR: @license with no @license-end");

- 			return [false,"\n/*\n ERROR: @license with no @license-end \n*/\n","ERROR: @license with no @license-end"];

+ 

+ 		let closureEndIndex = closureMatch.index + closureMatch[0].length;

+ 		let commentEndOffset = uneditedSrc.substring(closureEndIndex).indexOf(closureMatch[1] === "*" ? "*/" : "\n");

+ 		if (commentEndOffset !== -1) {

+ 			closureEndIndex += commentEndOffset;

  		}

- 		var endtag_end_index = matches_end["index"]+matches_end[0].length;

- 		var license_res = license_valid(matches);

- 		if(license_res[0] == true){

- 			edited_src =  edited_src + unedited_src.substr(0,endtag_end_index);

- 			reason_text += "\n" + license_res[1];

- 		} else{

- 			edited_src = edited_src + "\n/*\n"+license_res[1]+"\n*/\n";

- 			reason_text += "\n" + license_res[1];

+ 

+ 		let [licenseOK, message] = validateLicense(openingMatch);

+ 		if(licenseOK) {

+ 			editedSrc += uneditedSrc.substr(0, closureEndIndex);

+ 			partsAccepted = true;

+ 		} else {

+ 			editedSrc += `\n/*\n${message}\n*/\n`;

+ 			partsDenied = true;

  		}

+ 		reason += `\n${message}`;

+ 

  		// trim off everything we just evaluated

- 		unedited_src = unedited_src.substr(endtag_end_index,unedited_src.length);

+ 		uneditedSrc = uneditedSrc.substring(closureEndIndex).trim();

+ 	}

+ 

+ 	if(partsDenied) {

+ 		if (partsAccepted) {

+ 			reason = `Some parts of the script have been disabled (check the source for details).\n^--- ${reason}`;

+ 		}

+ 		return [false, editedSrc, reason];

  	}

+ 

+ 	return [true, scriptSrc, reason];

  }

  

  /* *********************************************************************************************** */

This PR refactors the @license tag parsing machinery, making it more readable and maintainable, providing more meaningful "reason" messages and fixing a couple bugs causing unlicensed / non-trivial scripts to be accepted nonetheless, as described by issue #22.

1 new commit added

  • Work-around for StreamFilter bug storing cached script in a way that messes up with encodings later (exacerbated by the license tag fixes).
5 years ago

Pull-Request has been merged by quidam

5 years ago