Merge pull request #1090 from adomasven/feature/iframe-translator-rules

Changes for #1021 to support targetAll translator property.
This commit is contained in:
Dan Stillman 2016-09-28 11:42:58 -04:00 committed by GitHub
commit 753252be77
8 changed files with 210 additions and 84 deletions

View File

@ -550,7 +550,15 @@ function haveTranslators(translators, type) {
return a.label.localeCompare(b.label); return a.label.localeCompare(b.label);
}); });
var promises = [];
for(var i in translators) { for(var i in translators) {
promises.push(translators[i].getCode());
}
return Promise.all(promises).then(function(codes) {
for(var i in translators) {
// Make sure translator code is cached on the object
translators[i].code = codes[i];
var translatorTestView = new TranslatorTestView(); var translatorTestView = new TranslatorTestView();
translatorTestView.initWithTranslatorAndType(translators[i], type); translatorTestView.initWithTranslatorAndType(translators[i], type);
if(translatorTestView.canRun) { if(translatorTestView.canRun) {
@ -562,6 +570,7 @@ function haveTranslators(translators, type) {
var ev = document.createEvent('HTMLEvents'); var ev = document.createEvent('HTMLEvents');
ev.initEvent('ZoteroHaveTranslators-'+type, true, true); ev.initEvent('ZoteroHaveTranslators-'+type, true, true);
document.dispatchEvent(ev); document.dispatchEvent(ev);
});
} }
/** /**

View File

@ -362,7 +362,7 @@ Zotero.Translators.CodeGetter.prototype.getAll = function () {
var TRANSLATOR_REQUIRED_PROPERTIES = ["translatorID", "translatorType", "label", "creator", "target", var TRANSLATOR_REQUIRED_PROPERTIES = ["translatorID", "translatorType", "label", "creator", "target",
"priority", "lastUpdated"]; "priority", "lastUpdated"];
var TRANSLATOR_PASSING_PROPERTIES = TRANSLATOR_REQUIRED_PROPERTIES var TRANSLATOR_PASSING_PROPERTIES = TRANSLATOR_REQUIRED_PROPERTIES
.concat(["browserSupport", "code", "runMode", "itemType"]); .concat(["targetAll", "browserSupport", "code", "runMode", "itemType"]);
var TRANSLATOR_SAVE_PROPERTIES = TRANSLATOR_REQUIRED_PROPERTIES.concat(["browserSupport"]); var TRANSLATOR_SAVE_PROPERTIES = TRANSLATOR_REQUIRED_PROPERTIES.concat(["browserSupport"]);
/** /**
* @class Represents an individual translator * @class Represents an individual translator
@ -425,7 +425,10 @@ Zotero.Translator.prototype.init = function(info) {
if(this.translatorType & TRANSLATOR_TYPES["web"]) { if(this.translatorType & TRANSLATOR_TYPES["web"]) {
// compile web regexp // compile web regexp
this.webRegexp = this.target ? new RegExp(this.target, "i") : null; this.webRegexp = {
root: this.target ? new RegExp(this.target, "i") : null,
all: this.targetAll ? new RegExp(this.targetAll, "i") : null
};
} else if(this.hasOwnProperty("webRegexp")) { } else if(this.hasOwnProperty("webRegexp")) {
delete this.webRegexp; delete this.webRegexp;
} }

View File

@ -1883,7 +1883,8 @@ Zotero.Translate.Web.prototype.Sandbox = Zotero.Translate.Sandbox._inheritFromBa
*/ */
Zotero.Translate.Web.prototype.setDocument = function(doc) { Zotero.Translate.Web.prototype.setDocument = function(doc) {
this.document = doc; this.document = doc;
this.setLocation(doc.location.href); this.rootDocument = doc.defaultView.top.document || doc;
this.setLocation(doc.location.href, this.rootDocument.location.href);
} }
/** /**
@ -1900,9 +1901,11 @@ Zotero.Translate.Web.prototype.setCookieSandbox = function(cookieSandbox) {
* Sets the location to operate upon * Sets the location to operate upon
* *
* @param {String} location The URL of the page to translate * @param {String} location The URL of the page to translate
* @param {String} rootLocation The URL of the root page, within which `location` is embedded
*/ */
Zotero.Translate.Web.prototype.setLocation = function(location) { Zotero.Translate.Web.prototype.setLocation = function(location, rootLocation) {
this.location = location; this.location = location;
this.rootLocation = rootLocation || location;
this.path = this.location; this.path = this.location;
} }
@ -1910,7 +1913,7 @@ Zotero.Translate.Web.prototype.setLocation = function(location) {
* Get potential web translators * Get potential web translators
*/ */
Zotero.Translate.Web.prototype._getTranslatorsGetPotentialTranslators = function() { Zotero.Translate.Web.prototype._getTranslatorsGetPotentialTranslators = function() {
return Zotero.Translators.getWebTranslatorsForLocation(this.location); return Zotero.Translators.getWebTranslatorsForLocation(this.location, this.rootLocation);
} }
/** /**

View File

@ -27,12 +27,12 @@
var TRANSLATOR_REQUIRED_PROPERTIES = ["translatorID", "translatorType", "label", "creator", var TRANSLATOR_REQUIRED_PROPERTIES = ["translatorID", "translatorType", "label", "creator",
"target", "priority", "lastUpdated"]; "target", "priority", "lastUpdated"];
// Properties that are preserved if present // Properties that are preserved if present
var TRANSLATOR_OPTIONAL_PROPERTIES = ["browserSupport", "minVersion", "maxVersion", var TRANSLATOR_OPTIONAL_PROPERTIES = ["targetAll", "browserSupport", "minVersion", "maxVersion",
"inRepository", "configOptions", "displayOptions", "inRepository", "configOptions", "displayOptions",
"hiddenPrefs", "itemType"]; "hiddenPrefs", "itemType"];
// Properties that are passed from background to inject page in connector // Properties that are passed from background to inject page in connector
var TRANSLATOR_PASSING_PROPERTIES = TRANSLATOR_REQUIRED_PROPERTIES. var TRANSLATOR_PASSING_PROPERTIES = TRANSLATOR_REQUIRED_PROPERTIES.
concat(["browserSupport", "code", "runMode", "itemType"]); concat(["targetAll", "browserSupport", "code", "runMode", "itemType"]);
// Properties that are saved in connector if set but not required // Properties that are saved in connector if set but not required
var TRANSLATOR_SAVE_PROPERTIES = TRANSLATOR_REQUIRED_PROPERTIES.concat(["browserSupport"]); var TRANSLATOR_SAVE_PROPERTIES = TRANSLATOR_REQUIRED_PROPERTIES.concat(["browserSupport"]);
@ -115,7 +115,10 @@ Zotero.Translator.prototype.init = function(info) {
if(this.translatorType & TRANSLATOR_TYPES["web"]) { if(this.translatorType & TRANSLATOR_TYPES["web"]) {
// compile web regexp // compile web regexp
this.cacheCode |= !this.target; this.cacheCode |= !this.target;
this.webRegexp = this.target ? new RegExp(this.target, "i") : null; this.webRegexp = {
root: this.target ? new RegExp(this.target, "i") : null,
all: this.targetAll ? new RegExp(this.targetAll, "i") : null
};
} else if(this.hasOwnProperty("webRegexp")) { } else if(this.hasOwnProperty("webRegexp")) {
delete this.webRegexp; delete this.webRegexp;
} }

View File

@ -46,7 +46,7 @@ Zotero.Translators = new function() {
Zotero.debug("Initializing translators"); Zotero.debug("Initializing translators");
var start = new Date; var start = new Date;
_cache = {"import":[], "export":[], "web":[], "search":[]}; _cache = {"import":[], "export":[], "web":[], "webWithTargetAll":[], "search":[]};
_translators = {}; _translators = {};
var sql = "SELECT fileName, metadataJSON, lastModifiedTime FROM translatorCache"; var sql = "SELECT fileName, metadataJSON, lastModifiedTime FROM translatorCache";
@ -152,6 +152,9 @@ Zotero.Translators = new function() {
for (let type in TRANSLATOR_TYPES) { for (let type in TRANSLATOR_TYPES) {
if (translator.translatorType & TRANSLATOR_TYPES[type]) { if (translator.translatorType & TRANSLATOR_TYPES[type]) {
_cache[type].push(translator); _cache[type].push(translator);
if ((translator.translatorType & TRANSLATOR_TYPES.web) && translator.targetAll) {
_cache.webWithTargetAll.push(translator);
}
} }
} }
@ -267,77 +270,93 @@ Zotero.Translators = new function() {
/** /**
* Gets web translators for a specific location * Gets web translators for a specific location
* @param {String} uri The URI for which to look for translators * @param {String} uri The URI for which to look for translators
* @param {String} rootUri The root URI of the page, different from `uri` if running in an iframe
*/ */
this.getWebTranslatorsForLocation = function(uri) { this.getWebTranslatorsForLocation = function(URI, rootURI) {
return this.getAllForType("web").then(function(allTranslators) { var isFrame = URI !== rootURI;
var type = isFrame ? "webWithTargetAll" : "web";
return this.getAllForType(type).then(function(allTranslators) {
var potentialTranslators = []; var potentialTranslators = [];
var translatorConverterFunctions = [];
var properHosts = []; var rootSearchURIs = this.getSearchURIs(rootURI);
var proxyHosts = []; var frameSearchURIs = isFrame ? this.getSearchURIs(URI) : rootSearchURIs;
var properURI = Zotero.Proxies.proxyToProper(uri); Zotero.debug("Translators: Looking for translators for "+Object.keys(frameSearchURIs).join(', '));
var knownProxy = properURI !== uri;
if(knownProxy) {
// if we know this proxy, just use the proper URI for detection
var searchURIs = [properURI];
} else {
var searchURIs = [uri];
// if there is a subdomain that is also a TLD, also test against URI with the domain for (let translator of allTranslators) {
// dropped after the TLD translatorLoop:
// (i.e., www.nature.com.mutex.gmu.edu => www.nature.com) for (let rootSearchURI in rootSearchURIs) {
var m = /^(https?:\/\/)([^\/]+)/i.exec(uri); let isGeneric = (!translator.webRegexp.root && translator.runMode === Zotero.Translator.RUN_MODE_IN_BROWSER);
if(m) { if (!isGeneric && !translator.webRegexp.root) {
// First, drop the 0- if it exists (this is an III invention) continue;
var host = m[2]; }
if(host.substr(0, 2) === "0-") host = host.substr(2); let rootURIMatches = isGeneric || rootSearchURI.length < 8192 && translator.webRegexp.root.test(rootSearchURI);
var hostnames = host.split("."); if (translator.webRegexp.all && rootURIMatches) {
for(var i=1; i<hostnames.length-2; i++) { for (let frameSearchURI in frameSearchURIs) {
if(TLDS[hostnames[i].toLowerCase()]) { let frameURIMatches = frameSearchURI.length < 8192 && translator.webRegexp.all.test(frameSearchURI);
var properHost = hostnames.slice(0, i+1).join(".");
searchURIs.push(m[1]+properHost+uri.substr(m[0].length)); if (frameURIMatches) {
properHosts.push(properHost); potentialTranslators.push(translator);
proxyHosts.push(hostnames.slice(i+1).join(".")); translatorConverterFunctions.push(frameSearchURIs[frameSearchURI]);
// prevent adding the translator multiple times
break translatorLoop;
} }
} }
} }
} else if(!isFrame && (isGeneric || rootURIMatches)) {
potentialTranslators.push(translator);
Zotero.debug("Translators: Looking for translators for "+searchURIs.join(", ")); translatorConverterFunctions.push(rootSearchURIs[rootSearchURI]);
var converterFunctions = [];
for(var i=0; i<allTranslators.length; i++) {
for(var j=0; j<searchURIs.length; j++) {
if((!allTranslators[i].webRegexp
&& allTranslators[i].runMode === Zotero.Translator.RUN_MODE_IN_BROWSER)
|| (uri.length < 8192 && allTranslators[i].webRegexp.test(searchURIs[j]))) {
// add translator to list
potentialTranslators.push(allTranslators[i]);
if(j === 0) {
if(knownProxy) {
converterFunctions.push(Zotero.Proxies.properToProxy);
} else {
converterFunctions.push(null);
}
} else {
converterFunctions.push(new function() {
var re = new RegExp('^https?://(?:[^/]+\\.)?'+Zotero.Utilities.quotemeta(properHosts[j-1])+'(?=/)', "gi");
var proxyHost = proxyHosts[j-1].replace(/\$/g, "$$$$");
return function(uri) { return uri.replace(re, "$&."+proxyHost) };
});
}
// don't add translator more than once
break; break;
} }
} }
} }
return [potentialTranslators, converterFunctions]; return [potentialTranslators, translatorConverterFunctions];
}); }.bind(this));
},
/**
* Get the array of searchURIs and related proxy converter functions
*
* @param {String} URI to get searchURIs and converterFunctions for
*/
this.getSearchURIs = function(URI) {
var properURI = Zotero.Proxies.proxyToProper(URI);
if (properURI !== URI) {
// if we know this proxy, just use the proper URI for detection
let obj = {};
obj[properURI] = Zotero.Proxies.properToProxy;
return obj;
} }
var searchURIs = {};
searchURIs[URI] = null;
// if there is a subdomain that is also a TLD, also test against URI with the domain
// dropped after the TLD
// (i.e., www.nature.com.mutex.gmu.edu => www.nature.com)
var m = /^(https?:\/\/)([^\/]+)/i.exec(URI);
if (m) {
// First, drop the 0- if it exists (this is an III invention)
var host = m[2];
if(host.substr(0, 2) === "0-") host = host.substr(2);
var hostnames = host.split(".");
for (var i=1; i<hostnames.length-2; i++) {
if (TLDS[hostnames[i].toLowerCase()]) {
var properHost = hostnames.slice(0, i+1).join(".");
searchURIs[m[1]+properHost+URI.substr(m[0].length)] = new function() {
var re = new RegExp('^https?://(?:[^/]+\\.)?'+Zotero.Utilities.quotemeta(properHost)+'(?=/)', "gi");
var proxyHost = hostnames.slice(i+1).join(".").replace(/\$/g, "$$$$");
return function(uri) { return uri.replace(re, "$&."+proxyHost) };
};
}
}
}
return searchURIs;
},
/** /**
* Gets import translators for a specific location * Gets import translators for a specific location
* @param {String} location The location for which to look for translators * @param {String} location The location for which to look for translators

View File

@ -757,10 +757,11 @@ var generateTranslatorExportData = Zotero.Promise.coroutine(function* generateTr
/** /**
* Build a dummy translator that can be passed to Zotero.Translate * Build a dummy translator that can be passed to Zotero.Translate
*/ */
function buildDummyTranslator(translatorType, code, translatorID="dummy-translator") { function buildDummyTranslator(translatorType, code, info={}) {
let info = { const TRANSLATOR_TYPES = {"import":1, "export":2, "web":4, "search":8};
"translatorID":translatorID, info = Object.assign({
"translatorType":translatorType, "translatorID":"dummy-translator",
"translatorType":TRANSLATOR_TYPES[translatorType],
"label":"Dummy Translator", "label":"Dummy Translator",
"creator":"Simon Kornblith", "creator":"Simon Kornblith",
"target":"", "target":"",
@ -768,10 +769,9 @@ function buildDummyTranslator(translatorType, code, translatorID="dummy-translat
"browserSupport":"g", "browserSupport":"g",
"inRepository":false, "inRepository":false,
"lastUpdated":"0000-00-00 00:00:00", "lastUpdated":"0000-00-00 00:00:00",
}; }, info);
let translator = new Zotero.Translator(info); let translator = new Zotero.Translator(info);
translator.code = code; translator.code = code;
translator.getCode = function() {return Promise.resolve(code)};
return translator; return translator;
} }

View File

@ -561,7 +561,7 @@ describe("Zotero.Translate", function() {
item.title = "The Definitive Guide of Owls"; item.title = "The Definitive Guide of Owls";
item.tags = ['owl', 'tag']; item.tags = ['owl', 'tag'];
item.complete(); item.complete();
}`, 'child-dummy-translator' }`, {translatorID: 'child-dummy-translator'}
); );
sinon.stub(Zotero.Translators, 'get').withArgs('child-dummy-translator').returns(childTranslator); sinon.stub(Zotero.Translators, 'get').withArgs('child-dummy-translator').returns(childTranslator);

View File

@ -0,0 +1,89 @@
"use strict";
describe("Zotero.Translators", function () {
describe("#getWebTranslatorsForLocation()", function () {
var genericTranslator, topLevelTranslator, frameTranslator;
var noMatchURL = 'http://notowls.com/citation/penguin-migration-patterns';
var topMatchURL = 'http://www.owl.com/owl_page/snowy_owl';
var frameMatchURL = 'http://iframe.owl.com/citation/owl-migration-patterns';
before(function* (){
genericTranslator = buildDummyTranslator('web', `function doDetect() {}; function doWeb(); {}`, {
translatorID: 'generic-translator'
});
topLevelTranslator = buildDummyTranslator('web', `function doDetect() {}; function doWeb(); {}`, {
translatorID: 'top-level-translator',
target: "https?://www\\.owl\\.com/(citation|owl_page)/.+"
});
frameTranslator = buildDummyTranslator('web', `function doDetect() {}; function doWeb(); {}`, {
translatorID: 'frame-translator',
target: "https?://([^.]+\\.)?owl\\.com/(citation|owl_page)/.+",
targetAll: "https?://iframe.owl\\.com/(citation|owl_page)/.+"
});
let getAllForType = sinon.stub(Zotero.Translators, 'getAllForType');
getAllForType.withArgs('web').resolves([genericTranslator, topLevelTranslator, frameTranslator]);
getAllForType.withArgs('webWithTargetAll').resolves([frameTranslator]);
let regexp = new RegExp(topLevelTranslator.target, 'i');
assert.isFalse(regexp.test(noMatchURL));
assert.isTrue(regexp.test(topMatchURL));
assert.isFalse(regexp.test(frameMatchURL));
regexp = new RegExp(frameTranslator.target, 'i');
assert.isFalse(regexp.test(noMatchURL));
assert.isTrue(regexp.test(topMatchURL));
assert.isTrue(regexp.test(frameMatchURL));
regexp = new RegExp(frameTranslator.targetAll, 'i');
assert.isFalse(regexp.test(noMatchURL));
assert.isFalse(regexp.test(topMatchURL));
assert.isTrue(regexp.test(frameMatchURL));
});
after(function* (){
Zotero.Translators.getAllForType.restore();
});
describe("when called from a root document", function() {
it("should return generic translators when not matching any translator `target`", function* () {
var translators = yield Zotero.Translators.getWebTranslatorsForLocation(noMatchURL, noMatchURL);
assert.equal(translators[0].length, 1);
assert.equal(translators[0][0].translatorID, 'generic-translator');
});
it("should return all matching translators without `targetAll` property", function* () {
var translators = yield Zotero.Translators.getWebTranslatorsForLocation(topMatchURL, topMatchURL);
assert.equal(translators[0].length, 2);
assert.equal(translators[0][0].translatorID, 'generic-translator');
assert.equal(translators[0][1].translatorID, 'top-level-translator');
});
it("should return translators that match both `target` and `targetAll` when both properties present", function* () {
var translators = yield Zotero.Translators.getWebTranslatorsForLocation(frameMatchURL, frameMatchURL);
assert.equal(translators[0].length, 2);
assert.equal(translators[0][0].translatorID, 'generic-translator');
assert.equal(translators[0][1].translatorID, 'frame-translator');
});
});
describe("when called from an iframe", function() {
it("should not return generic translators or translators without `targetAll` property", function* () {
var translators = yield Zotero.Translators.getWebTranslatorsForLocation(frameMatchURL, noMatchURL);
assert.equal(translators[0].length, 0);
});
it("should not return translators that match `target` but not `targetAll", function* () {
var translators = yield Zotero.Translators.getWebTranslatorsForLocation(noMatchURL, topMatchURL);
assert.equal(translators[0].length, 0);
});
it("should return translators that match both `target` and `targetAll`", function* () {
var translators = yield Zotero.Translators.getWebTranslatorsForLocation(frameMatchURL, topMatchURL);
assert.equal(translators[0].length, 1);
assert.equal(translators[0][0].translatorID, 'frame-translator');
});
});
});
});