Quick update of recognizePDF from Q to Bluebird

This should be refactored further, but this is a start at swapping in
Bluebird. Unfortunately the process doesn't work correctly because of
issues in the translation framework, which will need to be addressed
separately -- right now a parent item is successfully created but the UI
still shows an error and the PDF isn't placed under the parent.
This commit is contained in:
Dan Stillman 2015-12-07 17:23:10 -05:00
parent fb46d6d216
commit bad92a1d22

View File

@ -69,62 +69,70 @@ var Zotero_RecognizePDF = new function() {
* process is to be interrupted * process is to be interrupted
* @return {Promise} A promise resolved when PDF metadata has been retrieved * @return {Promise} A promise resolved when PDF metadata has been retrieved
*/ */
this.recognize = function(file, libraryID, stopCheckCallback) { this.recognize = Zotero.Promise.coroutine(function* (file, libraryID, stopCheckCallback) {
const MAX_PAGES = 15; const MAX_PAGES = 15;
var me = this; var me = this;
return _extractText(file, MAX_PAGES).then(function(lines) { var lines = yield _extractText(file, MAX_PAGES);
// Look for DOI - Use only first 80 lines to avoid catching article references // Look for DOI - Use only first 80 lines to avoid catching article references
var allText = lines.join("\n"), var allText = lines.join("\n"),
firstChunk = lines.slice(0,80).join('\n'), firstChunk = lines.slice(0,80).join('\n'),
doi = Zotero.Utilities.cleanDOI(firstChunk), doi = Zotero.Utilities.cleanDOI(firstChunk),
promise; promise;
Zotero.debug(allText); Zotero.debug(allText);
if(!doi) {
// Look for a JSTOR stable URL, which can be converted to a DOI by prepending 10.2307
doi = firstChunk.match(/www.\jstor\.org\/stable\/(\S+)/i);
if(doi) {
doi = Zotero.Utilities.cleanDOI(
doi[1].indexOf('10.') == 0 ? doi[1] : '10.2307/' + doi[1]
);
}
}
if(!doi) {
// Look for a JSTOR stable URL, which can be converted to a DOI by prepending 10.2307
doi = firstChunk.match(/www.\jstor\.org\/stable\/(\S+)/i);
if(doi) { if(doi) {
// Look up DOI doi = Zotero.Utilities.cleanDOI(
Zotero.debug("RecognizePDF: Found DOI: "+doi); doi[1].indexOf('10.') == 0 ? doi[1] : '10.2307/' + doi[1]
);
var translateDOI = new Zotero.Translate.Search();
translateDOI.setTranslator("11645bd1-0420-45c1-badb-53fb41eeb753");
translateDOI.setSearch({"itemType":"journalArticle", "DOI":doi});
promise = _promiseTranslate(translateDOI, libraryID);
} else {
promise = Zotero.Promise.reject("No DOI found in text");
} }
}
return promise var newItem;
// Look for ISBNs if no DOI if (doi) {
.catch(function(error) { // Look up DOI
Zotero.debug("RecognizePDF: " + error); Zotero.debug("RecognizePDF: Found DOI: "+doi);
var isbns = _findISBNs(allText);
if (isbns.length) {
Zotero.debug("RecognizePDF: Found ISBNs: " + isbns);
var translate = new Zotero.Translate.Search(); var translateDOI = new Zotero.Translate.Search();
translate.setSearch({"itemType":"book", "ISBN":isbns[0]}); translateDOI.setTranslator("11645bd1-0420-45c1-badb-53fb41eeb753");
return _promiseTranslate(translate, libraryID); translateDOI.setSearch({"itemType":"journalArticle", "DOI":doi});
} else { try {
return Zotero.Promise.reject("No ISBN found in text."); newItem = yield _promiseTranslate(translateDOI, libraryID);
} return newItem;
}) }
catch (e) {
Zotero.debug("RecognizePDF: " + e);
}
}
else {
Zotero.debug("RecognizePDF: No DOI found in text");
}
// Look for ISBNs if no DOI
var isbns = _findISBNs(allText);
if (isbns.length) {
Zotero.debug("RecognizePDF: Found ISBNs: " + isbns);
var translate = new Zotero.Translate.Search();
translate.setSearch({"itemType":"book", "ISBN":isbns[0]});
try {
newItem = yield _promiseTranslate(translate, libraryID);
return newItem;
}
catch (e) {
// If no DOI or ISBN, query Google Scholar // If no DOI or ISBN, query Google Scholar
.catch(function(error) { Zotero.debug("RecognizePDF: " + e);
Zotero.debug("RecognizePDF: " + error); }
return me.GSFullTextSearch.findItem(lines, libraryID, stopCheckCallback); }
}); else {
}); Zotero.debug("RecognizePDF: No ISBN found in text");
} }
return this.GSFullTextSearch.findItem(lines, libraryID, stopCheckCallback);
});
/** /**
* Get text from a PDF * Get text from a PDF
@ -183,8 +191,7 @@ var Zotero_RecognizePDF = new function() {
* Attach appropriate handlers to a Zotero.Translate instance and begin translation * Attach appropriate handlers to a Zotero.Translate instance and begin translation
* @return {Promise} * @return {Promise}
*/ */
function _promiseTranslate(translate, libraryID) { var _promiseTranslate = Zotero.Promise.coroutine(function* (translate, libraryID) {
var deferred = Zotero.Promise.defer();
translate.setHandler("select", function(translate, items, callback) { translate.setHandler("select", function(translate, items, callback) {
for(var i in items) { for(var i in items) {
var obj = {}; var obj = {};
@ -193,7 +200,7 @@ var Zotero_RecognizePDF = new function() {
return; return;
} }
}); });
translate.setHandler("done", function(translate, success) { /*translate.setHandler("done", function(translate, success) {
if(success && translate.newItems.length) { if(success && translate.newItems.length) {
deferred.resolve(translate.newItems[0]); deferred.resolve(translate.newItems[0]);
} else { } else {
@ -202,10 +209,15 @@ var Zotero_RecognizePDF = new function() {
: "Could not find a translator for given search item" : "Could not find a translator for given search item"
); );
} }
});*/
var newItems = yield translate.translate({
libraryID
}); });
translate.translate(libraryID, false); if (newItems.length) {
return deferred.promise; return newItems[0];
} }
throw new Error("No items found");
});
/** /**
* Search ISBNs in text * Search ISBNs in text
@ -333,14 +345,14 @@ var Zotero_RecognizePDF = new function() {
this._progressWindow.addEventListener("keypress", this._keypressCancelHandler); this._progressWindow.addEventListener("keypress", this._keypressCancelHandler);
this._progressWindow.addEventListener("close", this._cancelHandler, false); this._progressWindow.addEventListener("close", this._cancelHandler, false);
Zotero_RecognizePDF.GSFullTextSearch.resetQueryLimit(); Zotero_RecognizePDF.GSFullTextSearch.resetQueryLimit();
this._recognizeItem(); return this._recognizeItem();
}, },
/** /**
* Shifts an item off of this._items and recognizes it, then calls itself again if there are more * Shifts an item off of this._items and recognizes it, then calls itself again if there are more
* @private * @private
*/ */
"_recognizeItem": function() { "_recognizeItem": Zotero.Promise.coroutine(function* () {
const SUCCESS_IMAGE = "chrome://zotero/skin/tick.png"; const SUCCESS_IMAGE = "chrome://zotero/skin/tick.png";
const FAILURE_IMAGE = "chrome://zotero/skin/cross.png"; const FAILURE_IMAGE = "chrome://zotero/skin/cross.png";
const LOADING_IMAGE = "chrome://global/skin/icons/loading_16.png"; const LOADING_IMAGE = "chrome://global/skin/icons/loading_16.png";
@ -367,51 +379,66 @@ var Zotero_RecognizePDF = new function() {
var file = item.getFile(), me = this; var file = item.getFile(), me = this;
(file try {
? Zotero_RecognizePDF.recognize(file, item.libraryID, function() { return me._stopped; }) if (file) {
: Q.reject(new Zotero.Exception.Alert("recognizePDF.fileNotFound"))) let newItem = yield Zotero_RecognizePDF.recognize(
.then(function(newItem) { file,
// If already stopped, delete item.libraryID,
if(me._stopped) { () => this._stopped
Zotero.Items.erase(newItem.id); );
throw new Zotero.Exception.Alert('recognizePDF.stopped');
// If already stopped, delete
if (this._stopped) {
yield Zotero.Items.eraseTx(newItem.id);
throw new Zotero.Exception.Alert('recognizePDF.stopped');
}
// put new item in same collections as the old one
let itemCollections = item.getCollections();
for (let i = 0; i < itemCollections.length; i++) {
let collection = yield Zotero.Collections.getAsync(itemCollections[i]);
yield collection.addItem(newItem.id);
}
// put old item as a child of the new item
item.parentID = newItem.id;
yield item.saveTx();
itemTitle.setAttribute("label", newItem.getField("title"));
itemIcon.setAttribute("src", SUCCESS_IMAGE);
this._rowIDs[rowNumber] = newItem.id;
return this._recognizeItem();
} }
else {
// put new item in same collections as the old one throw new Zotero.Exception.Alert("recognizePDF.fileNotFound");
var itemCollections = item.getCollections();
for(var j=0; j<itemCollections.length; j++) {
var collection = Zotero.Collections.get(itemCollections[j]);
collection.addItem(newItem.id);
} }
}
catch (e) {
Zotero.logError(e);
// put old item as a child of the new item itemTitle.setAttribute(
item.parentID = newItem.id; "label",
item.save(); e instanceof Zotero.Exception.Alert
? e.message
itemTitle.setAttribute("label", newItem.getField("title")); : Zotero.getString("recognizePDF.error")
itemIcon.setAttribute("src", SUCCESS_IMAGE); );
me._rowIDs[rowNumber] = newItem.id;
me._recognizeItem();
})
.catch(function(error) {
Zotero.debug(error);
Zotero.logError(error);
itemTitle.setAttribute("label", error instanceof Zotero.Exception.Alert ? error.message : Zotero.getString("recognizePDF.error"));
itemIcon.setAttribute("src", FAILURE_IMAGE); itemIcon.setAttribute("src", FAILURE_IMAGE);
// Don't show "completed" label if stopped on last item // Don't show "completed" label if stopped on last item
if(me._stopped && !me._items.length) { if (this._stopped && !this._items.length) {
me._done(true); this._done(true);
} else { } else {
me._recognizeItem(); return this._recognizeItem();
} }
}).finally(function() { }
finally {
// scroll to this item // scroll to this item
me._progressWindow.document.getElementById("tree").treeBoxObject.scrollToRow(Math.max(0, me._itemTotal-me._items.length-4)); this._progressWindow.document.getElementById("tree").treeBoxObject.scrollToRow(
}).done(); Math.max(0, this._itemTotal - this._items.length - 4)
}, );
}
}),
/** /**
* Cleans up after items are recognized, disabling the cancel button and * Cleans up after items are recognized, disabling the cancel button and
@ -523,10 +550,10 @@ var Zotero_RecognizePDF = new function() {
this.findItem = function(lines, libraryID, stopCheckCallback) { this.findItem = function(lines, libraryID, stopCheckCallback) {
if(!inProgress && queryLimitReached) { if(!inProgress && queryLimitReached) {
// There's no queue, so we can reject immediately // There's no queue, so we can reject immediately
return Q.reject(new Zotero.Exception.Alert("recognizePDF.limit")); return Zotero.Promise.reject(new Zotero.Exception.Alert("recognizePDF.limit"));
} }
var deferred = Q.defer(); var deferred = Zotero.Promise.defer();
queue.push({ queue.push({
deferred: deferred, deferred: deferred,
lines: lines, lines: lines,
@ -571,7 +598,7 @@ var Zotero_RecognizePDF = new function() {
} }
item.deferred.resolve( item.deferred.resolve(
Q.try(getGoodLines, item.lines) Zotero.Promise.try(getGoodLines, item.lines)
.then(function(lines) { .then(function(lines) {
return queryGoogle(lines, item.libraryID, 3); // Try querying 3 times return queryGoogle(lines, item.libraryID, 3); // Try querying 3 times
}) })
@ -634,7 +661,7 @@ var Zotero_RecognizePDF = new function() {
* @param {Integer} tries Number of queries to attempt before giving up * @param {Integer} tries Number of queries to attempt before giving up
* @return {Promise} A promise resolved when PDF metadata has been retrieved * @return {Promise} A promise resolved when PDF metadata has been retrieved
*/ */
function queryGoogle(goodLines, libraryID, tries) { var queryGoogle = Zotero.Promise.coroutine(function* (goodLines, libraryID, tries) {
if(tries <= 0) throw new Zotero.Exception.Alert("recognizePDF.noMatches"); if(tries <= 0) throw new Zotero.Exception.Alert("recognizePDF.noMatches");
// Take the relevant parts of some lines (exclude hyphenated word) // Take the relevant parts of some lines (exclude hyphenated word)
@ -671,20 +698,21 @@ var Zotero_RecognizePDF = new function() {
delay = GOOGLE_SCHOLAR_QUERY_DELAY - (Date.now() - Zotero.HTTP.lastGoogleScholarQueryTime); delay = GOOGLE_SCHOLAR_QUERY_DELAY - (Date.now() - Zotero.HTTP.lastGoogleScholarQueryTime);
// Delay // Delay
return (delay > 0 ? Q.delay(delay) : Q()) if (delay > 0) {
.then(function() { yield Zotero.Promise.delay(delay);
Zotero.HTTP.lastGoogleScholarQueryTime = Date.now(); }
return Zotero.HTTP.promise("GET", url, {"responseType":"document"}) Zotero.HTTP.lastGoogleScholarQueryTime = Date.now();
}) try {
.then(function(xmlhttp) { try {
return _checkCaptchaOK(xmlhttp, 3); let xmlhttp = yield Zotero.HTTP.promise("GET", url, {"responseType":"document"});
}, }
function(e) { catch (e) {
return _checkCaptchaError(e, 3); yield _checkCaptchaError(e, 3);
}) }
.then(function(xmlhttp) { xmlhttp = yield _checkCaptchaOK(xmlhttp, 3);
var doc = xmlhttp.response,
deferred = Q.defer(), let doc = xmlhttp.response,
deferred = Zotero.Promise.defer(),
translate = new Zotero.Translate.Web(); translate = new Zotero.Translate.Web();
translate.setTranslator("57a00950-f0d1-4b41-b6ba-44ff0fc30289"); translate.setTranslator("57a00950-f0d1-4b41-b6ba-44ff0fc30289");
@ -693,7 +721,7 @@ var Zotero_RecognizePDF = new function() {
if(detected.length) { if(detected.length) {
deferred.resolve(_promiseTranslate(translate, libraryID)); deferred.resolve(_promiseTranslate(translate, libraryID));
} else { } else {
deferred.resolve(Q.try(function() { deferred.resolve(Zotero.Promise.try(function() {
return queryGoogle(goodLines, libraryID, tries-1); return queryGoogle(goodLines, libraryID, tries-1);
})); }));
} }
@ -701,14 +729,14 @@ var Zotero_RecognizePDF = new function() {
translate.getTranslators(); translate.getTranslators();
return deferred.promise; return deferred.promise;
}) }
.catch(function(e) { catch (e) {
if(e.name == "recognizePDF.limit") { if(e.name == "recognizePDF.limit") {
queryLimitReached = true; queryLimitReached = true;
} }
throw e; throw e;
}); }
} });
/** /**
* Check for CAPTCHA on a page with HTTP 200 status * Check for CAPTCHA on a page with HTTP 200 status