Quick update of recognizePDF from Q to Bluebird
This should be refactored further, but this is a start at swapping in Bluebird. Unfortunately the process doesn't work correctly because of issues in the translation framework, which will need to be addressed separately -- right now a parent item is successfully created but the UI still shows an error and the PDF isn't placed under the parent.
This commit is contained in:
parent
fb46d6d216
commit
bad92a1d22
|
@ -69,11 +69,11 @@ var Zotero_RecognizePDF = new function() {
|
||||||
* process is to be interrupted
|
* process is to be interrupted
|
||||||
* @return {Promise} A promise resolved when PDF metadata has been retrieved
|
* @return {Promise} A promise resolved when PDF metadata has been retrieved
|
||||||
*/
|
*/
|
||||||
this.recognize = function(file, libraryID, stopCheckCallback) {
|
this.recognize = Zotero.Promise.coroutine(function* (file, libraryID, stopCheckCallback) {
|
||||||
const MAX_PAGES = 15;
|
const MAX_PAGES = 15;
|
||||||
var me = this;
|
var me = this;
|
||||||
|
|
||||||
return _extractText(file, MAX_PAGES).then(function(lines) {
|
var lines = yield _extractText(file, MAX_PAGES);
|
||||||
// Look for DOI - Use only first 80 lines to avoid catching article references
|
// Look for DOI - Use only first 80 lines to avoid catching article references
|
||||||
var allText = lines.join("\n"),
|
var allText = lines.join("\n"),
|
||||||
firstChunk = lines.slice(0,80).join('\n'),
|
firstChunk = lines.slice(0,80).join('\n'),
|
||||||
|
@ -91,6 +91,7 @@ var Zotero_RecognizePDF = new function() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var newItem;
|
||||||
if (doi) {
|
if (doi) {
|
||||||
// Look up DOI
|
// Look up DOI
|
||||||
Zotero.debug("RecognizePDF: Found DOI: "+doi);
|
Zotero.debug("RecognizePDF: Found DOI: "+doi);
|
||||||
|
@ -98,33 +99,40 @@ var Zotero_RecognizePDF = new function() {
|
||||||
var translateDOI = new Zotero.Translate.Search();
|
var translateDOI = new Zotero.Translate.Search();
|
||||||
translateDOI.setTranslator("11645bd1-0420-45c1-badb-53fb41eeb753");
|
translateDOI.setTranslator("11645bd1-0420-45c1-badb-53fb41eeb753");
|
||||||
translateDOI.setSearch({"itemType":"journalArticle", "DOI":doi});
|
translateDOI.setSearch({"itemType":"journalArticle", "DOI":doi});
|
||||||
promise = _promiseTranslate(translateDOI, libraryID);
|
try {
|
||||||
} else {
|
newItem = yield _promiseTranslate(translateDOI, libraryID);
|
||||||
promise = Zotero.Promise.reject("No DOI found in text");
|
return newItem;
|
||||||
|
}
|
||||||
|
catch (e) {
|
||||||
|
Zotero.debug("RecognizePDF: " + e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
Zotero.debug("RecognizePDF: No DOI found in text");
|
||||||
}
|
}
|
||||||
|
|
||||||
return promise
|
|
||||||
// Look for ISBNs if no DOI
|
// Look for ISBNs if no DOI
|
||||||
.catch(function(error) {
|
|
||||||
Zotero.debug("RecognizePDF: " + error);
|
|
||||||
var isbns = _findISBNs(allText);
|
var isbns = _findISBNs(allText);
|
||||||
if (isbns.length) {
|
if (isbns.length) {
|
||||||
Zotero.debug("RecognizePDF: Found ISBNs: " + isbns);
|
Zotero.debug("RecognizePDF: Found ISBNs: " + isbns);
|
||||||
|
|
||||||
var translate = new Zotero.Translate.Search();
|
var translate = new Zotero.Translate.Search();
|
||||||
translate.setSearch({"itemType":"book", "ISBN":isbns[0]});
|
translate.setSearch({"itemType":"book", "ISBN":isbns[0]});
|
||||||
return _promiseTranslate(translate, libraryID);
|
try {
|
||||||
} else {
|
newItem = yield _promiseTranslate(translate, libraryID);
|
||||||
return Zotero.Promise.reject("No ISBN found in text.");
|
return newItem;
|
||||||
}
|
}
|
||||||
})
|
catch (e) {
|
||||||
// If no DOI or ISBN, query Google Scholar
|
// If no DOI or ISBN, query Google Scholar
|
||||||
.catch(function(error) {
|
Zotero.debug("RecognizePDF: " + e);
|
||||||
Zotero.debug("RecognizePDF: " + error);
|
|
||||||
return me.GSFullTextSearch.findItem(lines, libraryID, stopCheckCallback);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
Zotero.debug("RecognizePDF: No ISBN found in text");
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.GSFullTextSearch.findItem(lines, libraryID, stopCheckCallback);
|
||||||
|
});
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get text from a PDF
|
* Get text from a PDF
|
||||||
|
@ -183,8 +191,7 @@ var Zotero_RecognizePDF = new function() {
|
||||||
* Attach appropriate handlers to a Zotero.Translate instance and begin translation
|
* Attach appropriate handlers to a Zotero.Translate instance and begin translation
|
||||||
* @return {Promise}
|
* @return {Promise}
|
||||||
*/
|
*/
|
||||||
function _promiseTranslate(translate, libraryID) {
|
var _promiseTranslate = Zotero.Promise.coroutine(function* (translate, libraryID) {
|
||||||
var deferred = Zotero.Promise.defer();
|
|
||||||
translate.setHandler("select", function(translate, items, callback) {
|
translate.setHandler("select", function(translate, items, callback) {
|
||||||
for(var i in items) {
|
for(var i in items) {
|
||||||
var obj = {};
|
var obj = {};
|
||||||
|
@ -193,7 +200,7 @@ var Zotero_RecognizePDF = new function() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
translate.setHandler("done", function(translate, success) {
|
/*translate.setHandler("done", function(translate, success) {
|
||||||
if(success && translate.newItems.length) {
|
if(success && translate.newItems.length) {
|
||||||
deferred.resolve(translate.newItems[0]);
|
deferred.resolve(translate.newItems[0]);
|
||||||
} else {
|
} else {
|
||||||
|
@ -202,10 +209,15 @@ var Zotero_RecognizePDF = new function() {
|
||||||
: "Could not find a translator for given search item"
|
: "Could not find a translator for given search item"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
});*/
|
||||||
|
var newItems = yield translate.translate({
|
||||||
|
libraryID
|
||||||
});
|
});
|
||||||
translate.translate(libraryID, false);
|
if (newItems.length) {
|
||||||
return deferred.promise;
|
return newItems[0];
|
||||||
}
|
}
|
||||||
|
throw new Error("No items found");
|
||||||
|
});
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Search ISBNs in text
|
* Search ISBNs in text
|
||||||
|
@ -333,14 +345,14 @@ var Zotero_RecognizePDF = new function() {
|
||||||
this._progressWindow.addEventListener("keypress", this._keypressCancelHandler);
|
this._progressWindow.addEventListener("keypress", this._keypressCancelHandler);
|
||||||
this._progressWindow.addEventListener("close", this._cancelHandler, false);
|
this._progressWindow.addEventListener("close", this._cancelHandler, false);
|
||||||
Zotero_RecognizePDF.GSFullTextSearch.resetQueryLimit();
|
Zotero_RecognizePDF.GSFullTextSearch.resetQueryLimit();
|
||||||
this._recognizeItem();
|
return this._recognizeItem();
|
||||||
},
|
},
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Shifts an item off of this._items and recognizes it, then calls itself again if there are more
|
* Shifts an item off of this._items and recognizes it, then calls itself again if there are more
|
||||||
* @private
|
* @private
|
||||||
*/
|
*/
|
||||||
"_recognizeItem": function() {
|
"_recognizeItem": Zotero.Promise.coroutine(function* () {
|
||||||
const SUCCESS_IMAGE = "chrome://zotero/skin/tick.png";
|
const SUCCESS_IMAGE = "chrome://zotero/skin/tick.png";
|
||||||
const FAILURE_IMAGE = "chrome://zotero/skin/cross.png";
|
const FAILURE_IMAGE = "chrome://zotero/skin/cross.png";
|
||||||
const LOADING_IMAGE = "chrome://global/skin/icons/loading_16.png";
|
const LOADING_IMAGE = "chrome://global/skin/icons/loading_16.png";
|
||||||
|
@ -367,51 +379,66 @@ var Zotero_RecognizePDF = new function() {
|
||||||
|
|
||||||
var file = item.getFile(), me = this;
|
var file = item.getFile(), me = this;
|
||||||
|
|
||||||
(file
|
try {
|
||||||
? Zotero_RecognizePDF.recognize(file, item.libraryID, function() { return me._stopped; })
|
if (file) {
|
||||||
: Q.reject(new Zotero.Exception.Alert("recognizePDF.fileNotFound")))
|
let newItem = yield Zotero_RecognizePDF.recognize(
|
||||||
.then(function(newItem) {
|
file,
|
||||||
|
item.libraryID,
|
||||||
|
() => this._stopped
|
||||||
|
);
|
||||||
|
|
||||||
// If already stopped, delete
|
// If already stopped, delete
|
||||||
if(me._stopped) {
|
if (this._stopped) {
|
||||||
Zotero.Items.erase(newItem.id);
|
yield Zotero.Items.eraseTx(newItem.id);
|
||||||
throw new Zotero.Exception.Alert('recognizePDF.stopped');
|
throw new Zotero.Exception.Alert('recognizePDF.stopped');
|
||||||
}
|
}
|
||||||
|
|
||||||
// put new item in same collections as the old one
|
// put new item in same collections as the old one
|
||||||
var itemCollections = item.getCollections();
|
let itemCollections = item.getCollections();
|
||||||
for(var j=0; j<itemCollections.length; j++) {
|
for (let i = 0; i < itemCollections.length; i++) {
|
||||||
var collection = Zotero.Collections.get(itemCollections[j]);
|
let collection = yield Zotero.Collections.getAsync(itemCollections[i]);
|
||||||
collection.addItem(newItem.id);
|
yield collection.addItem(newItem.id);
|
||||||
}
|
}
|
||||||
|
|
||||||
// put old item as a child of the new item
|
// put old item as a child of the new item
|
||||||
item.parentID = newItem.id;
|
item.parentID = newItem.id;
|
||||||
item.save();
|
yield item.saveTx();
|
||||||
|
|
||||||
itemTitle.setAttribute("label", newItem.getField("title"));
|
itemTitle.setAttribute("label", newItem.getField("title"));
|
||||||
itemIcon.setAttribute("src", SUCCESS_IMAGE);
|
itemIcon.setAttribute("src", SUCCESS_IMAGE);
|
||||||
me._rowIDs[rowNumber] = newItem.id;
|
this._rowIDs[rowNumber] = newItem.id;
|
||||||
|
|
||||||
me._recognizeItem();
|
return this._recognizeItem();
|
||||||
})
|
}
|
||||||
.catch(function(error) {
|
else {
|
||||||
Zotero.debug(error);
|
throw new Zotero.Exception.Alert("recognizePDF.fileNotFound");
|
||||||
Zotero.logError(error);
|
}
|
||||||
|
}
|
||||||
|
catch (e) {
|
||||||
|
Zotero.logError(e);
|
||||||
|
|
||||||
itemTitle.setAttribute("label", error instanceof Zotero.Exception.Alert ? error.message : Zotero.getString("recognizePDF.error"));
|
itemTitle.setAttribute(
|
||||||
|
"label",
|
||||||
|
e instanceof Zotero.Exception.Alert
|
||||||
|
? e.message
|
||||||
|
: Zotero.getString("recognizePDF.error")
|
||||||
|
);
|
||||||
itemIcon.setAttribute("src", FAILURE_IMAGE);
|
itemIcon.setAttribute("src", FAILURE_IMAGE);
|
||||||
|
|
||||||
// Don't show "completed" label if stopped on last item
|
// Don't show "completed" label if stopped on last item
|
||||||
if(me._stopped && !me._items.length) {
|
if (this._stopped && !this._items.length) {
|
||||||
me._done(true);
|
this._done(true);
|
||||||
} else {
|
} else {
|
||||||
me._recognizeItem();
|
return this._recognizeItem();
|
||||||
}
|
}
|
||||||
}).finally(function() {
|
}
|
||||||
|
finally {
|
||||||
// scroll to this item
|
// scroll to this item
|
||||||
me._progressWindow.document.getElementById("tree").treeBoxObject.scrollToRow(Math.max(0, me._itemTotal-me._items.length-4));
|
this._progressWindow.document.getElementById("tree").treeBoxObject.scrollToRow(
|
||||||
}).done();
|
Math.max(0, this._itemTotal - this._items.length - 4)
|
||||||
},
|
);
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Cleans up after items are recognized, disabling the cancel button and
|
* Cleans up after items are recognized, disabling the cancel button and
|
||||||
|
@ -523,10 +550,10 @@ var Zotero_RecognizePDF = new function() {
|
||||||
this.findItem = function(lines, libraryID, stopCheckCallback) {
|
this.findItem = function(lines, libraryID, stopCheckCallback) {
|
||||||
if(!inProgress && queryLimitReached) {
|
if(!inProgress && queryLimitReached) {
|
||||||
// There's no queue, so we can reject immediately
|
// There's no queue, so we can reject immediately
|
||||||
return Q.reject(new Zotero.Exception.Alert("recognizePDF.limit"));
|
return Zotero.Promise.reject(new Zotero.Exception.Alert("recognizePDF.limit"));
|
||||||
}
|
}
|
||||||
|
|
||||||
var deferred = Q.defer();
|
var deferred = Zotero.Promise.defer();
|
||||||
queue.push({
|
queue.push({
|
||||||
deferred: deferred,
|
deferred: deferred,
|
||||||
lines: lines,
|
lines: lines,
|
||||||
|
@ -571,7 +598,7 @@ var Zotero_RecognizePDF = new function() {
|
||||||
}
|
}
|
||||||
|
|
||||||
item.deferred.resolve(
|
item.deferred.resolve(
|
||||||
Q.try(getGoodLines, item.lines)
|
Zotero.Promise.try(getGoodLines, item.lines)
|
||||||
.then(function(lines) {
|
.then(function(lines) {
|
||||||
return queryGoogle(lines, item.libraryID, 3); // Try querying 3 times
|
return queryGoogle(lines, item.libraryID, 3); // Try querying 3 times
|
||||||
})
|
})
|
||||||
|
@ -634,7 +661,7 @@ var Zotero_RecognizePDF = new function() {
|
||||||
* @param {Integer} tries Number of queries to attempt before giving up
|
* @param {Integer} tries Number of queries to attempt before giving up
|
||||||
* @return {Promise} A promise resolved when PDF metadata has been retrieved
|
* @return {Promise} A promise resolved when PDF metadata has been retrieved
|
||||||
*/
|
*/
|
||||||
function queryGoogle(goodLines, libraryID, tries) {
|
var queryGoogle = Zotero.Promise.coroutine(function* (goodLines, libraryID, tries) {
|
||||||
if(tries <= 0) throw new Zotero.Exception.Alert("recognizePDF.noMatches");
|
if(tries <= 0) throw new Zotero.Exception.Alert("recognizePDF.noMatches");
|
||||||
|
|
||||||
// Take the relevant parts of some lines (exclude hyphenated word)
|
// Take the relevant parts of some lines (exclude hyphenated word)
|
||||||
|
@ -671,20 +698,21 @@ var Zotero_RecognizePDF = new function() {
|
||||||
delay = GOOGLE_SCHOLAR_QUERY_DELAY - (Date.now() - Zotero.HTTP.lastGoogleScholarQueryTime);
|
delay = GOOGLE_SCHOLAR_QUERY_DELAY - (Date.now() - Zotero.HTTP.lastGoogleScholarQueryTime);
|
||||||
|
|
||||||
// Delay
|
// Delay
|
||||||
return (delay > 0 ? Q.delay(delay) : Q())
|
if (delay > 0) {
|
||||||
.then(function() {
|
yield Zotero.Promise.delay(delay);
|
||||||
|
}
|
||||||
Zotero.HTTP.lastGoogleScholarQueryTime = Date.now();
|
Zotero.HTTP.lastGoogleScholarQueryTime = Date.now();
|
||||||
return Zotero.HTTP.promise("GET", url, {"responseType":"document"})
|
try {
|
||||||
})
|
try {
|
||||||
.then(function(xmlhttp) {
|
let xmlhttp = yield Zotero.HTTP.promise("GET", url, {"responseType":"document"});
|
||||||
return _checkCaptchaOK(xmlhttp, 3);
|
}
|
||||||
},
|
catch (e) {
|
||||||
function(e) {
|
yield _checkCaptchaError(e, 3);
|
||||||
return _checkCaptchaError(e, 3);
|
}
|
||||||
})
|
xmlhttp = yield _checkCaptchaOK(xmlhttp, 3);
|
||||||
.then(function(xmlhttp) {
|
|
||||||
var doc = xmlhttp.response,
|
let doc = xmlhttp.response,
|
||||||
deferred = Q.defer(),
|
deferred = Zotero.Promise.defer(),
|
||||||
translate = new Zotero.Translate.Web();
|
translate = new Zotero.Translate.Web();
|
||||||
|
|
||||||
translate.setTranslator("57a00950-f0d1-4b41-b6ba-44ff0fc30289");
|
translate.setTranslator("57a00950-f0d1-4b41-b6ba-44ff0fc30289");
|
||||||
|
@ -693,7 +721,7 @@ var Zotero_RecognizePDF = new function() {
|
||||||
if(detected.length) {
|
if(detected.length) {
|
||||||
deferred.resolve(_promiseTranslate(translate, libraryID));
|
deferred.resolve(_promiseTranslate(translate, libraryID));
|
||||||
} else {
|
} else {
|
||||||
deferred.resolve(Q.try(function() {
|
deferred.resolve(Zotero.Promise.try(function() {
|
||||||
return queryGoogle(goodLines, libraryID, tries-1);
|
return queryGoogle(goodLines, libraryID, tries-1);
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
@ -701,14 +729,14 @@ var Zotero_RecognizePDF = new function() {
|
||||||
translate.getTranslators();
|
translate.getTranslators();
|
||||||
|
|
||||||
return deferred.promise;
|
return deferred.promise;
|
||||||
})
|
}
|
||||||
.catch(function(e) {
|
catch (e) {
|
||||||
if(e.name == "recognizePDF.limit") {
|
if(e.name == "recognizePDF.limit") {
|
||||||
queryLimitReached = true;
|
queryLimitReached = true;
|
||||||
}
|
}
|
||||||
throw e;
|
throw e;
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
});
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check for CAPTCHA on a page with HTTP 200 status
|
* Check for CAPTCHA on a page with HTTP 200 status
|
||||||
|
|
Loading…
Reference in New Issue
Block a user