From bad92a1d2263598aa6ac9c07f84fc34708e1bcd9 Mon Sep 17 00:00:00 2001 From: Dan Stillman Date: Mon, 7 Dec 2015 17:23:10 -0500 Subject: [PATCH] Quick update of recognizePDF from Q to Bluebird This should be refactored further, but this is a start at swapping in Bluebird. Unfortunately the process doesn't work correctly because of issues in the translation framework, which will need to be addressed separately -- right now a parent item is successfully created but the UI still shows an error and the PDF isn't placed under the parent. --- chrome/content/zotero/recognizePDF.js | 260 ++++++++++++++------------ 1 file changed, 144 insertions(+), 116 deletions(-) diff --git a/chrome/content/zotero/recognizePDF.js b/chrome/content/zotero/recognizePDF.js index ef8265f53..a8fa2c48a 100644 --- a/chrome/content/zotero/recognizePDF.js +++ b/chrome/content/zotero/recognizePDF.js @@ -69,62 +69,70 @@ var Zotero_RecognizePDF = new function() { * process is to be interrupted * @return {Promise} A promise resolved when PDF metadata has been retrieved */ - this.recognize = function(file, libraryID, stopCheckCallback) { + this.recognize = Zotero.Promise.coroutine(function* (file, libraryID, stopCheckCallback) { const MAX_PAGES = 15; var me = this; - return _extractText(file, MAX_PAGES).then(function(lines) { - // Look for DOI - Use only first 80 lines to avoid catching article references - var allText = lines.join("\n"), - firstChunk = lines.slice(0,80).join('\n'), - doi = Zotero.Utilities.cleanDOI(firstChunk), - promise; - Zotero.debug(allText); - - if(!doi) { - // Look for a JSTOR stable URL, which can be converted to a DOI by prepending 10.2307 - doi = firstChunk.match(/www.\jstor\.org\/stable\/(\S+)/i); - if(doi) { - doi = Zotero.Utilities.cleanDOI( - doi[1].indexOf('10.') == 0 ? doi[1] : '10.2307/' + doi[1] - ); - } - } - + var lines = yield _extractText(file, MAX_PAGES); + // Look for DOI - Use only first 80 lines to avoid catching article references + var allText = lines.join("\n"), + firstChunk = lines.slice(0,80).join('\n'), + doi = Zotero.Utilities.cleanDOI(firstChunk), + promise; + Zotero.debug(allText); + + if(!doi) { + // Look for a JSTOR stable URL, which can be converted to a DOI by prepending 10.2307 + doi = firstChunk.match(/www.\jstor\.org\/stable\/(\S+)/i); if(doi) { - // Look up DOI - Zotero.debug("RecognizePDF: Found DOI: "+doi); - - var translateDOI = new Zotero.Translate.Search(); - translateDOI.setTranslator("11645bd1-0420-45c1-badb-53fb41eeb753"); - translateDOI.setSearch({"itemType":"journalArticle", "DOI":doi}); - promise = _promiseTranslate(translateDOI, libraryID); - } else { - promise = Zotero.Promise.reject("No DOI found in text"); + doi = Zotero.Utilities.cleanDOI( + doi[1].indexOf('10.') == 0 ? doi[1] : '10.2307/' + doi[1] + ); } + } + + var newItem; + if (doi) { + // Look up DOI + Zotero.debug("RecognizePDF: Found DOI: "+doi); - return promise - // Look for ISBNs if no DOI - .catch(function(error) { - Zotero.debug("RecognizePDF: " + error); - var isbns = _findISBNs(allText); - if (isbns.length) { - Zotero.debug("RecognizePDF: Found ISBNs: " + isbns); - - var translate = new Zotero.Translate.Search(); - translate.setSearch({"itemType":"book", "ISBN":isbns[0]}); - return _promiseTranslate(translate, libraryID); - } else { - return Zotero.Promise.reject("No ISBN found in text."); - } - }) + var translateDOI = new Zotero.Translate.Search(); + translateDOI.setTranslator("11645bd1-0420-45c1-badb-53fb41eeb753"); + translateDOI.setSearch({"itemType":"journalArticle", "DOI":doi}); + try { + newItem = yield _promiseTranslate(translateDOI, libraryID); + return newItem; + } + catch (e) { + Zotero.debug("RecognizePDF: " + e); + } + } + else { + Zotero.debug("RecognizePDF: No DOI found in text"); + } + + // Look for ISBNs if no DOI + var isbns = _findISBNs(allText); + if (isbns.length) { + Zotero.debug("RecognizePDF: Found ISBNs: " + isbns); + + var translate = new Zotero.Translate.Search(); + translate.setSearch({"itemType":"book", "ISBN":isbns[0]}); + try { + newItem = yield _promiseTranslate(translate, libraryID); + return newItem; + } + catch (e) { // If no DOI or ISBN, query Google Scholar - .catch(function(error) { - Zotero.debug("RecognizePDF: " + error); - return me.GSFullTextSearch.findItem(lines, libraryID, stopCheckCallback); - }); - }); - } + Zotero.debug("RecognizePDF: " + e); + } + } + else { + Zotero.debug("RecognizePDF: No ISBN found in text"); + } + + return this.GSFullTextSearch.findItem(lines, libraryID, stopCheckCallback); + }); /** * Get text from a PDF @@ -183,8 +191,7 @@ var Zotero_RecognizePDF = new function() { * Attach appropriate handlers to a Zotero.Translate instance and begin translation * @return {Promise} */ - function _promiseTranslate(translate, libraryID) { - var deferred = Zotero.Promise.defer(); + var _promiseTranslate = Zotero.Promise.coroutine(function* (translate, libraryID) { translate.setHandler("select", function(translate, items, callback) { for(var i in items) { var obj = {}; @@ -193,7 +200,7 @@ var Zotero_RecognizePDF = new function() { return; } }); - translate.setHandler("done", function(translate, success) { + /*translate.setHandler("done", function(translate, success) { if(success && translate.newItems.length) { deferred.resolve(translate.newItems[0]); } else { @@ -202,10 +209,15 @@ var Zotero_RecognizePDF = new function() { : "Could not find a translator for given search item" ); } + });*/ + var newItems = yield translate.translate({ + libraryID }); - translate.translate(libraryID, false); - return deferred.promise; - } + if (newItems.length) { + return newItems[0]; + } + throw new Error("No items found"); + }); /** * Search ISBNs in text @@ -333,14 +345,14 @@ var Zotero_RecognizePDF = new function() { this._progressWindow.addEventListener("keypress", this._keypressCancelHandler); this._progressWindow.addEventListener("close", this._cancelHandler, false); Zotero_RecognizePDF.GSFullTextSearch.resetQueryLimit(); - this._recognizeItem(); + return this._recognizeItem(); }, /** * Shifts an item off of this._items and recognizes it, then calls itself again if there are more * @private */ - "_recognizeItem": function() { + "_recognizeItem": Zotero.Promise.coroutine(function* () { const SUCCESS_IMAGE = "chrome://zotero/skin/tick.png"; const FAILURE_IMAGE = "chrome://zotero/skin/cross.png"; const LOADING_IMAGE = "chrome://global/skin/icons/loading_16.png"; @@ -367,51 +379,66 @@ var Zotero_RecognizePDF = new function() { var file = item.getFile(), me = this; - (file - ? Zotero_RecognizePDF.recognize(file, item.libraryID, function() { return me._stopped; }) - : Q.reject(new Zotero.Exception.Alert("recognizePDF.fileNotFound"))) - .then(function(newItem) { - // If already stopped, delete - if(me._stopped) { - Zotero.Items.erase(newItem.id); - throw new Zotero.Exception.Alert('recognizePDF.stopped'); + try { + if (file) { + let newItem = yield Zotero_RecognizePDF.recognize( + file, + item.libraryID, + () => this._stopped + ); + + // If already stopped, delete + if (this._stopped) { + yield Zotero.Items.eraseTx(newItem.id); + throw new Zotero.Exception.Alert('recognizePDF.stopped'); + } + + // put new item in same collections as the old one + let itemCollections = item.getCollections(); + for (let i = 0; i < itemCollections.length; i++) { + let collection = yield Zotero.Collections.getAsync(itemCollections[i]); + yield collection.addItem(newItem.id); + } + + // put old item as a child of the new item + item.parentID = newItem.id; + yield item.saveTx(); + + itemTitle.setAttribute("label", newItem.getField("title")); + itemIcon.setAttribute("src", SUCCESS_IMAGE); + this._rowIDs[rowNumber] = newItem.id; + + return this._recognizeItem(); } - - // put new item in same collections as the old one - var itemCollections = item.getCollections(); - for(var j=0; j 0 ? Q.delay(delay) : Q()) - .then(function() { - Zotero.HTTP.lastGoogleScholarQueryTime = Date.now(); - return Zotero.HTTP.promise("GET", url, {"responseType":"document"}) - }) - .then(function(xmlhttp) { - return _checkCaptchaOK(xmlhttp, 3); - }, - function(e) { - return _checkCaptchaError(e, 3); - }) - .then(function(xmlhttp) { - var doc = xmlhttp.response, - deferred = Q.defer(), + if (delay > 0) { + yield Zotero.Promise.delay(delay); + } + Zotero.HTTP.lastGoogleScholarQueryTime = Date.now(); + try { + try { + let xmlhttp = yield Zotero.HTTP.promise("GET", url, {"responseType":"document"}); + } + catch (e) { + yield _checkCaptchaError(e, 3); + } + xmlhttp = yield _checkCaptchaOK(xmlhttp, 3); + + let doc = xmlhttp.response, + deferred = Zotero.Promise.defer(), translate = new Zotero.Translate.Web(); translate.setTranslator("57a00950-f0d1-4b41-b6ba-44ff0fc30289"); @@ -693,7 +721,7 @@ var Zotero_RecognizePDF = new function() { if(detected.length) { deferred.resolve(_promiseTranslate(translate, libraryID)); } else { - deferred.resolve(Q.try(function() { + deferred.resolve(Zotero.Promise.try(function() { return queryGoogle(goodLines, libraryID, tries-1); })); } @@ -701,14 +729,14 @@ var Zotero_RecognizePDF = new function() { translate.getTranslators(); return deferred.promise; - }) - .catch(function(e) { + } + catch (e) { if(e.name == "recognizePDF.limit") { queryLimitReached = true; } throw e; - }); - } + } + }); /** * Check for CAPTCHA on a page with HTTP 200 status