#24 Lenient WebLabels matching (by URL, by id, by name).
Merged 5 years ago by quidam. Opened 5 years ago by gioma1.
gioma1/librejs weblabels/lenient-match  into  master

file modified
+33 -12
@@ -26,10 +26,28 @@ 

  "use strict";

  

  let licensesByLabel = new Map();

+ let licensesByUrl = new Map();

  {

    let {licenses} = require("../license_definitions");

-   for (let l of Object.values(licenses).filter(l => l.identifier)) {

-     licensesByLabel.set(l.identifier, l);

+   let mapByLabel = (label, license) => licensesByLabel.set(label.toUpperCase(), license);

+   for (let [id, l] of Object.entries(licenses)) {

+     let {identifier, canonicalUrl, licenseName} = l;

+     if (identifier) {

+       mapByLabel(identifier, l);

+     } else {

+       l.identifier = id;

+     }

+     if (id !== identifier) {

+       mapByLabel(id, l);

+     }

+     if (licenseName) {

+       mapByLabel(licenseName, l);

+     }

+     if (Array.isArray(canonicalUrl)) {

+       for (let url of canonicalUrl) {

+         licensesByUrl.set(url, l);

+       }

+     }

    }

  }

  
@@ -55,23 +73,26 @@ 

        return null;

      }

      scriptInfo.licenses = new Set();

-     scriptInfo.allFree = true;

      scriptInfo.toString = function() {

        let licenseIds = [...this.licenses].map(l => l.identifier).sort().join(", ");

        return licenseIds

-          ? (this.allFree ? `Free license${this.licenses.length > 1 ? "s" : ""} (${licenseIds})`

-                          : `Mixed free (${licenseIds}) and unknown licenses`)

+          ? `Free license${this.licenses.size > 1 ? "s" : ""} (${licenseIds})`

           : "Unknown license(s)";

      }

- 

-     for (let {label} of scriptInfo.licenseLinks) {

-       if (licensesByLabel.has(label)) {

-         scriptInfo.licenses.add(licensesByLabel.get(label));

-       } else {

-         scriptInfo.allFree = false;

-         break;

+     let match = (map, key) => {

+       if (map.has(key)) {

+         scriptInfo.licenses.add(map.get(key));

+         return true;

        }

+       return false;

+     };

+ 

+     for (let {label, url} of scriptInfo.licenseLinks) {

+       match(licensesByLabel, label = label.trim().toUpperCase()) ||

+         match(licensesByUrl, url) ||

+         match(licensesByLabel, label.replace(/^GNU-|-(?:OR-LATER|ONLY)$/, ''));

      }

+     scriptInfo.free = scriptInfo.licenses.size > 0;

      return scriptInfo;

    },

  

@@ -79,7 +79,7 @@ 

      async checkLicensedScript(m) {

        let {url, cache} = m;

        if (!licensedScripts) licensedScripts = await fetchLicenseInfo(cache);

-       return licensedScripts.get(url);

+       return licensedScripts.get(url) || licensedScripts.get(url.replace(/\?.*/, ''));

      }

    }

  

file modified
+3 -2
@@ -862,6 +862,7 @@ 

  		let {request} = response;

  		let {url, type, tabId, frameId, documentUrl} = request;

  

+ 		let fullUrl = url;

  		url = ListStore.urlItem(url);

  		let site = ListStore.siteItem(url);

  
@@ -891,11 +892,11 @@ 

  						"whitelisted": [url, whitelistedSite ? `User whitelisted ${site}` : "Whitelisted by user"]});

  					return ResponseProcessor.ACCEPT;

  				} else {

- 					let scriptInfo = await ExternalLicenses.check({url, tabId, frameId, documentUrl});

+ 					let scriptInfo = await ExternalLicenses.check({url: fullUrl, tabId, frameId, documentUrl});

  					if (scriptInfo) {

  						let verdict, ret;

  						let msg = scriptInfo.toString();

- 						if (scriptInfo.allFree) {

+ 						if (scriptInfo.free) {

  							verdict = "accepted";

  							ret = ResponseProcessor.ACCEPT;

  						} else {

It seems people have different expectations about WebLabels matching, based on quite confusing docs & specs.
Either way, some sites rely on (sometimes broken) labels, some on (sometimes arbitrary, rather than canonical) URLs.
This PR tries to accommodate as much as possible of the fuzziness around, making sense of the author's likely intent.

Some sites positively impacted by this change:

https://melpa.org/
https://try.gitea.io/
https://www.eff.org/
https://ssd.eff.org/
https://www.defectivebydesign.org/
https://openwireless.org/
http://boilr.mobi/
https://necessaryandproportionate.org/
https://posteo.de/en
https://defendinnovation.org/
https://ring.cx/en

1 new commit added

  • WebLabels matching by id made case-insensitive.
5 years ago

1 new commit added

  • Match scripts to weblabels also without query strings.
5 years ago

Pull-Request has been merged by quidam

5 years ago