diff --git a/chrome/content/zotero/captcha.js b/chrome/content/zotero/captcha.js
new file mode 100644
index 000000000..72432cd64
--- /dev/null
+++ b/chrome/content/zotero/captcha.js
@@ -0,0 +1,52 @@
+/*
+ ***** BEGIN LICENSE BLOCK *****
+
+ Copyright © 2009 Center for History and New Media
+ George Mason University, Fairfax, Virginia, USA
+ http://zotero.org
+
+ This file is part of Zotero.
+
+ Zotero is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Zotero is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with Zotero. If not, see .
+
+ ***** END LICENSE BLOCK *****
+*/
+
+var Zotero_Captcha = new function() {
+ this._io;
+
+ this.onLoad = function() {
+ this._io = window.arguments[0];
+ document.getElementById('zotero-captcha-image').src = this._io.dataIn.imgUrl;
+ document.getElementById('zotero-captcha-input').focus();
+ }
+
+ this.imageOnLoad = function() {
+ window.sizeToContent();
+ }
+
+ this.resolve = function() {
+ var result = document.getElementById('zotero-captcha-input');
+ if(!result.value) return;
+
+ this._io.dataOut = {
+ captcha: result.value
+ };
+ window.close();
+ }
+
+ this.cancel = function() {
+ window.close();
+ }
+}
\ No newline at end of file
diff --git a/chrome/content/zotero/captcha.xul b/chrome/content/zotero/captcha.xul
new file mode 100644
index 000000000..3893b7b7f
--- /dev/null
+++ b/chrome/content/zotero/captcha.xul
@@ -0,0 +1,21 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/chrome/content/zotero/recognizePDF.js b/chrome/content/zotero/recognizePDF.js
index f399fb90f..92e7297b8 100644
--- a/chrome/content/zotero/recognizePDF.js
+++ b/chrome/content/zotero/recognizePDF.js
@@ -33,7 +33,7 @@
*/
var Zotero_RecognizePDF = new function() {
Components.utils.import("resource://zotero/q.js");
- var _progressWindow, _progressIndicator;
+ var _progressWindow, _progressIndicator, itemRecognizer;
/**
* Checks whether a given PDF could theoretically be recognized
@@ -56,7 +56,7 @@ var Zotero_RecognizePDF = new function() {
var items = ZoteroPane_Local.getSelectedItems();
if (!items) return;
- var itemRecognizer = new Zotero_RecognizePDF.ItemRecognizer();
+ itemRecognizer = new Zotero_RecognizePDF.ItemRecognizer();
itemRecognizer.recognizeItems(items);
}
@@ -70,6 +70,7 @@ var Zotero_RecognizePDF = new function() {
this.recognize = function(file, libraryID) {
const MAX_PAGES = 7;
const GOOGLE_SCHOLAR_QUERY_DELAY = 2000; // in ms
+ var me = this;
return _extractText(file, MAX_PAGES).then(function(lines) {
// Look for DOI - Use only first 80 lines to avoid catching article references
@@ -105,6 +106,9 @@ var Zotero_RecognizePDF = new function() {
return promise.fail(function(error) {
Zotero.debug("RecognizePDF: "+error);
+ // Don't try Google Scholar if we already reached query limit
+ if(itemRecognizer._gsQueryLimitReached) throw new Zotero.Exception.Alert("recognizePDF.limit");
+
// Use only first column from multi-column lines
const lineRe = /^[\s_]*([^\s]+(?: [^\s_]+)+)/;
var cleanedLines = [], cleanedLineLengths = [];
@@ -141,7 +145,7 @@ var Zotero_RecognizePDF = new function() {
var nextLine = 0,
limited = false,
queryGoogle = function() {
- // Once we hit the CAPTCHA once, don't keep trying
+ // If the users fails (or chooses not) to solve the CAPTCHA, don't keep trying
if(limited) throw new Zotero.Exception.Alert("recognizePDF.limit");
// Take the relevant parts of some lines (exclude hyphenated word)
@@ -189,9 +193,7 @@ var Zotero_RecognizePDF = new function() {
translate = new Zotero.Translate.Web();
if(Zotero.Utilities.xpath(doc, "//form[@action='Captcha']").length) {
- // Hit CAPTCHA
- limited = true;
- throw new Zotero.Exception.Alert("recognizePDF.limit");
+ return _solveCaptcha(xmlhttp, 3);
}
translate.setTranslator("57a00950-f0d1-4b41-b6ba-44ff0fc30289");
@@ -207,15 +209,25 @@ var Zotero_RecognizePDF = new function() {
return deferred.promise;
}, function(e) {
- if(e instanceof Zotero.HTTP.UnexpectedStatusException && e.status == 403) {
- // Hit hard block
- throw new Zotero.Exception.Alert("recognizePDF.limit");
+ if(e instanceof Zotero.HTTP.UnexpectedStatusException
+ && (e.status == 403 || e.status == 503)) {
+ return _solveCaptcha(e.xmlhttp, 3); // Give the user 3 chances to get it right
}
throw e;
});
};
-
- return queryGoogle().fail(queryGoogle).fail(queryGoogle);
+
+ var retryCount = 2;
+ var retryGS = function(e) {
+ if(!retryCount--) throw e;
+ // Only retry if we can't find matches
+ if(e instanceof Zotero.Exception.Alert && e.name == "recognizePDF.noMatches") {
+ return queryGoogle().catch(retryGS);
+ }
+ throw e;
+ }
+
+ return queryGoogle().catch(retryGS);
});
});
}
@@ -331,39 +343,87 @@ var Zotero_RecognizePDF = new function() {
}
// Validate ISBNs
- var validIsbns = [];
+ var validIsbns = [], cleanISBN;
for (var i =0; i < isbns.length; i++) {
- if(_isValidISBN(isbns[i])) validIsbns.push(isbns[i]);
+ cleanISBN = Zotero.Utilities.cleanISBN(isbns[i]);
+ if(cleanISBN) validIsbns.push(cleanISBN);
}
return validIsbns;
}
- /**
- * Check whether an ISBNs is valid
- * @private
- * @return {Boolean}
- */
- function _isValidISBN(isbn) {
- if(isbn.length == 13) {
- // ISBN-13 should start with 978 or 979 i.e. GS1 for book publishing industry
- var prefix = isbn.slice(0,3);
- if (prefix != "978" && prefix != "979") return false;
- // Verify check digit
- var check = 0;
- for (var i = 0; i < 13; i+=2) check += isbn[i]*1;
- for (i = 1; i < 12; i+=2) check += 3 * isbn[i]*1;
- return (check % 10 == 0);
- } else if(isbn.length == 10) {
- // Verify ISBN-10 check digit
- var check = 0;
- for (var i = 0; i < 9; i++) check += isbn[i]*1 * (10-i);
- // last number might be 'X'
- if (isbn[9] == 'X' || isbn[9] == 'x') check += 10;
- else check += isbn[i]*1;
- return (check % 11 == 0);
+ function _extractCaptchaFormData(doc) {
+ var formData = {};
+
+ var img = doc.getElementsByTagName('img')[0];
+ if(!img) return;
+ formData.img = img.src;
+
+ var form = doc.forms[0];
+ if(!form) return;
+
+ formData.action = form.action;
+ formData.input = {};
+ var inputs = form.getElementsByTagName('input');
+ for(var i=0, n=inputs.length; i
+
+
@@ -283,3 +285,5 @@
+
+
\ No newline at end of file
diff --git a/chrome/locale/en-US/zotero/zotero.properties b/chrome/locale/en-US/zotero/zotero.properties
index 31e578b44..cc296ad63 100644
--- a/chrome/locale/en-US/zotero/zotero.properties
+++ b/chrome/locale/en-US/zotero/zotero.properties
@@ -897,7 +897,7 @@ recognizePDF.noOCR = PDF does not contain OCRed text.
recognizePDF.couldNotRead = Could not read text from PDF.
recognizePDF.noMatches = No matching references found.
recognizePDF.fileNotFound = File not found.
-recognizePDF.limit = Query limit reached. Try again later.
+recognizePDF.limit = Google Scholar query limit reached. Try again later.
recognizePDF.error = An unexpected error occurred.
recognizePDF.complete.label = Metadata Retrieval Complete.
recognizePDF.close.label = Close