From 45967da4ce65500c33448a155be481a3bb0fccdf Mon Sep 17 00:00:00 2001
From: Avram Lyon <ajlyon@gmail.com>
Date: Sun, 3 Apr 2011 19:50:55 +0000
Subject: [PATCH] Trans: Rename and update Wiley

---
 translators/Wiley InterScience.js   | 125 -----------
 translators/Wiley Online Library.js | 320 ++++++++++++++++++++++++++++
 2 files changed, 320 insertions(+), 125 deletions(-)
 delete mode 100644 translators/Wiley InterScience.js
 create mode 100644 translators/Wiley Online Library.js

diff --git a/translators/Wiley InterScience.js b/translators/Wiley InterScience.js
deleted file mode 100644
index e38925d03..000000000
--- a/translators/Wiley InterScience.js	
+++ /dev/null
@@ -1,125 +0,0 @@
-{
-	"translatorID":"fe728bc9-595a-4f03-98fc-766f1d8d0936",
-	"translatorType":4,
-	"label":"Wiley InterScience",
-	"creator":"Sean Takats and Michael Berkowitz",
-	"target":"https?:\\/\\/(?:www3\\.|www\\.)?interscience\\.wiley\\.com[^\\/]*\\/(?:search\\/|(cgi-bin|journal)\\/[0-9]+\\/abstract|journal)",
-	"minVersion":"1.0.0b4.r5",
-	"maxVersion":"",
-	"priority":100,
-	"inRepository":true,
-	"lastUpdated":"2009-08-03 01:25:00"
-}
-
-function detectWeb(doc, url){
-	var namespace = doc.documentElement.namespaceURI;
-	var nsResolver = namespace ? function(prefix) {
-		if (prefix == 'x') return namespace; else return null;
-	} : null;
-		
-	var xpath = '//input[@name="ID"][@type="checkbox"]';
-	if(doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
-		return "multiple";
-	}
-	if (url.match(/journal\/\d+\/(issue|home)$/)) {
-		return "multiple";
-	}
-	var m = url.match(/https?:\/\/[^\/]*\/(cgi-bin|journal)(\/(abstract|summary))?\/[0-9]+\/abstract/);
-	if (m){
-		return "journalArticle";
-	}
-}
-
-function doWeb(doc, url){
-	var namespace = doc.documentElement.namespaceURI;
-	var nsResolver = namespace ? function(prefix) {
-		if (prefix == 'x') return namespace; else return null;
-	} : null;
-	var host = 'http://' + doc.location.host + "/";
-	Zotero.debug(host);
-	var m = url.match(/https?:\/\/[^\/]*\/(journal|cgi-bin\/summary)\/([0-9]+)\/(abstract)?/);
-	var ids = new Array();
-	if(detectWeb(doc, url) == "multiple") {  //search
-		var id;
-		var title;
-		var availableItems = new Array();
-		var xpath = '//tr[td/input[@name="ID"][@type="checkbox"]]';
-		if (doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
-			elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
-			var elmt = elmts.iterateNext();
-			do {
-				title = doc.evaluate('./td/strong', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
-				id = doc.evaluate('./td/input[@name="ID"][@type="checkbox"]', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().value;
-				availableItems[id] = title;
-			} while (elmt = elmts.iterateNext())
-		} else {
-			var xpath = '//div[@id="contentCell"]/div[*/a]';
-			var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
-			var elmt = elmts.iterateNext();
-			do {
-				title = Zotero.Utilities.trimInternal(doc.evaluate('.//strong', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent);
-				id = doc.evaluate('.//a[1]', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().href.match(/\/([\d]+)\/abstract/)[1];
-				availableItems[id] = title;
-			} while (elmt = elmts.iterateNext())
-		}
-		var items = Zotero.selectItems(availableItems);
-		if(!items) {
-			return true;
-		}
-		for(var id in items) {
-			ids.push(id);
-		}
-		
-	} else if (m){ //single article
-		ids.push(m[2]);
-	}
-	
-	
-	var sets = [];
-	for each (id in ids) {
-		var uri = host + 'tools/citex';
-		var poststring = "clienttype=1&subtype=1&mode=1&version=1&id=" + id;
-		sets.push({ id: id, uri: uri, poststring: poststring });
-	}
-	
-	var setupCallback = function (set, next) {
-		Zotero.Utilities.HTTP.doPost(set.uri, set.poststring, function () {
-			next();
-		});
-	}
-	
-	var processCallback = function (set, next) {
-		var id = set.id;
-		var uri = host+"tools/CitEx";
-		var poststring = "mode=2&format=3&type=2&file=3&exportCitation.x=16&exportCitation.y=10&exportCitation=submit";
-		Zotero.Utilities.HTTP.doPost(uri, poststring, function(text) {
-			var m = text.match(/%A\s(.*)/);  //following lines fix Wiley's incorrect %A tag (should be separate tags for each author)
-			if (m){
-				var newauthors ="";
-				var authors = m[1].split(",")
-				for each (var author in authors){
-					if (author != ""){
-						newauthors = newauthors + "%A "+Zotero.Utilities.unescapeHTML(Zotero.Utilities.trimInternal(author))+"\n";
-					}
-				}
-				text = text.replace(/%A\s.*\n/, newauthors);
-			}
-			var translator = Zotero.loadTranslator("import");
-			translator.setTranslator("881f60f2-0802-411a-9228-ce5f47b64c7d"); //EndNote/Refer/BibIX
-			translator.setString(text);
-			translator.setHandler("itemDone", function(obj, item) {
-				var pdfurl = host + "cgi-bin/fulltext?ID=" + id + "&PLACEBO=IE.pdf&mode=pdf";
-				item.attachments.push({url:pdfurl, title:"Wiley Interscience PDF", mimeType:"application/pdf"});
-				item.DOI = item.url.match(/\.org\/(.*)$/)[1];
-				item.complete();
-			});
-			translator.translate();
-			
-			next();
-		}, null, 'iso-8859-1');
-	}
-	
-	var callbacks = [setupCallback, processCallback];
-	Zotero.Utilities.processAsync(sets, callbacks, function () { Zotero.done(); });
-	Zotero.wait();
-}
\ No newline at end of file
diff --git a/translators/Wiley Online Library.js b/translators/Wiley Online Library.js
new file mode 100644
index 000000000..0451f035b
--- /dev/null
+++ b/translators/Wiley Online Library.js	
@@ -0,0 +1,320 @@
+{
+        "translatorID": "fe728bc9-595a-4f03-98fc-766f1d8d0936",
+        "label": "Wiley Online Library",
+        "creator": "Sean Takats, Michael Berkowitz and Avram Lyon",
+        "target": "https?://onlinelibrary\\.wiley\\.com[^\\/]*/(?:doi|advanced/search)",
+        "minVersion": "1.0.0b4.r5",
+        "maxVersion": "",
+        "priority": 100,
+        "inRepository": "1",
+        "translatorType": 4,
+        "lastUpdated": "2011-03-27 01:29:16"
+}
+
+function detectWeb(doc, url){
+	var namespace = doc.documentElement.namespaceURI;
+	var nsResolver = namespace ? function(prefix) {
+		if (prefix == 'x') return namespace; else return null;
+	} : null;
+	
+	if (url.match(/\/issuetoc|\/results/)) {
+		return "multiple";
+	} else return "journalArticle";
+}
+
+function doWeb(doc, url){
+	var namespace = doc.documentElement.namespaceURI;
+	var nsResolver = namespace ? function(prefix) {
+		if (prefix == 'x') return namespace; else return null;
+	} : null;
+	var host = 'http://' + doc.location.host + "/";
+	
+	var urls = new Array();
+	if(detectWeb(doc, url) == "multiple") {  //search
+		var title;
+		var availableItems = new Array();
+		var articles = doc.evaluate('//li//div[@class="citation article"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null);
+		var article = false;
+		while (article = articles.iterateNext()) {
+			availableItems[article.href] = article.textContent;
+		}
+		var items = Zotero.selectItems(availableItems);
+		if(!items) {
+			return true;
+		}
+		for (var i in items) {
+			urls.push(i);
+		}
+		Zotero.Utilities.processDocuments(urls, scrape, function () { Zotero.done(); });
+	} else { //single article
+		scrape(doc, url);
+	}
+	
+	Zotero.wait();
+}
+
+function parseIdentifier(identifier) {
+	var idPieces = identifier.split(':');
+	if (idPieces.length > 1) {
+		 var prefix = idPieces.shift();
+		 switch (prefix.toLowerCase()) {
+			case "doi": return ["doi", idPieces.join(':')];
+			case "isbn": return ["isbn", idPieces.join(':')];
+			case "issn": return ["issn", idPieces.join(':')];
+			case "pmid": return ["pmid", idPieces.join(':')];
+			default: // do nothing
+		}
+		Zotero.debug("Unknown identifier prefix '"+prefix+"'");
+		return [prefix, idPieces.join(':')];
+	}
+	if (identifer.substr(0,3) == '10.') return ["doi", identifier];
+
+	// If we're here, we have a funny number, and we don't know what to do with it.
+	var ids = idCheck(identifier);
+	if (ids.isbn13) return ["isbn13", isbn13];
+	if (ids.isbn10) return ["isbn10", isbn10];
+	if (ids.issn) return ["issn", isbn10];
+	
+	return ["unknown", identifier];
+}
+
+function addIdentifier(identifier, item) {
+	var parsed = parseIdentifier(identifier);
+	switch (parsed[0]) {
+		case "doi": item.DOI = parsed[1]; break;
+		case "isbn": item.ISBN = parsed[1]; break;
+		case "isbn13": item.ISBN = parsed[1]; break;
+		case "isbn10": item.ISBN = parsed[1]; break;
+		case "issn": item.ISSN = parsed[1]; break;
+		default:
+	}
+}
+
+function scrape(doc,url)
+{
+	var namespace = doc.documentElement.namespaceURI;
+	var nsResolver = namespace ? function(prefix) {
+		if (prefix == 'x') return namespace; else return null;
+	} : null;
+       
+	var newItem=new Zotero.Item("journalArticle");
+       var temp;
+       var xpath;
+       var row;
+       var rows;
+
+       newItem.url = doc.location.href;
+       var metaTags = doc.getElementsByTagName("meta");
+
+       var pages = [false, false];
+       var doi = false;
+       var pdf = false;
+       var html = false;
+	for (var i = 0; i< metaTags.length; i++) {
+		var tag = metaTags[i].getAttribute("name");
+		var value = metaTags[i].getAttribute("content");
+		//Zotero.debug(pages + pdf + html);
+       		//Zotero.debug("Have meta tag: " + tag + " => " + value);
+		switch (tag) {
+			// PRISM
+			case "prism.publicationName": newItem.publicationTitle = value; break;
+			case "prism.issn": if (!newItem.ISSN && value != "NaN" && value != "") newItem.ISSN = value; break;
+			case "prism.eIssn": if (!newItem.ISSN && value != "NaN" && value != "") newItem.ISSN = value; break;
+			// This is often NaN for some reason
+			case "prism.publicationDate": if (!newItem.date && value != "NaN" && value !== "") newItem.date = value; break;
+			case "prism.volume": if (!newItem.volume && value != "NaN" && value != "") newItem.volume = value; break;
+			case "prism.number": if (!newItem.issue && value != "NaN" && value != "") newItem.issue = value; break;
+			// These also seem bad
+			case "prism.startingPage": if(!pages[0] && value != "null" && value != "") pages[0] = value; break;
+			case "prism.endingPage": if(!pages[1] && value != "null" && value != "") pages[1] = value; break;
+			case "prism.number": newItem.issue = value; break;
+			// Google.
+			case "citation_journal_title": if (!newItem.publicationTitle) newItem.publicationTitle = value; break;
+			case "citation_authors":
+				if (newItem.creators.length == 0) {
+					for each(var author in value.split(';')) newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "author", true));
+				}
+				break;
+			case "citation_title": if (!newItem.title) newItem.title = value; break;
+			case "citation_publisher": if (!newItem.publisher) newItem.publisher = value; break;
+			case "citation_date": if (!newItem.date && value != "NaN" && value != "") newItem.date = value; break;
+			case "citation_year": if (!newItem.date && value != "NaN" && value != "") newItem.date = value; break;
+			case "citation_volume": if (!newItem.volume && value != "NaN" && value != "") newItem.volume = value; break;
+			case "citation_issue": if (!newItem.issue && value != "NaN" && value != "") newItem.issue = value; break;
+			case "citation_firstpage": if (!pages[0] && value != "NaN" && value != "") pages[0] = value; break;
+			case "citation_lastpage": if (!pages[1] && value != "NaN" && value != "") pages[1] = value; break;
+			case "citation_issn": if (!newItem.ISSN && value != "NaN" && value != "") newItem.ISSN = value; break;
+			case "citation_isbn": if (!newItem.ISBN && value != "NaN" && value != "") newItem.ISBN = value; break;
+			// Prefer long language names
+			case "citation_language": if ((!newItem.language || newItem.language.length < 4)
+								&& value != "null" && value != "") newItem.language = value; break;
+			case "citation_doi": if (!newItem.DOI) newItem.DOI = value; break;
+			case "citation_abstract": newItem.abstractNote = value; break;
+			case "citation_abstract_html_url": newItem.url = value; break;
+			case "citation_pdf_url": if(!pdf) pdf = value; break;
+			case "citation_keywords": newItem.tags.push(value); break;
+			case "citation_fulltext_html_url": if(!pdf) pdf = value; break;
+			case "fulltext_pdf": if(!pdf) pdf = value; break;
+			// Dublin Core
+			case "dc.publisher": if(!newItem.publisher) newItem.publisher = value; break;
+			case "dc.language": if(!newItem.language) newItem.language = value; break;
+			case "dc.rights": if(!newItem.rights) newItem.rights = value; break;
+			case "dc.title": if(!newItem.title) newItem.title = value; break;
+			case "dc.creator": if(!newItem.creators.length == 0) newItem.creators.push(Zotero.Utilities.cleanAuthor(value)); break;
+			// This is often NaN for some reason
+			case "dc.date": if (!newItem.date && value != "NaN" && value !== "") newItem.date = value; break;
+			case "dc.identifier": addIdentifier(value, newItem); break; 
+			default:
+				Zotero.debug("Ignoring meta tag: " + tag + " => " + value);
+		}
+	}
+	
+	if (pdf) newItem.attachments = [{url:pdf, title:"Wiley Full Text PDF", mimeType:"application/pdf"}];
+	if (html) newItem.attachments = [{url:html, title:"Wiley Full Text HTML"}];
+	
+	if (pages[0] && pages[1]) newItem.pages = pages.join('-')
+	else newItem.pages = pages[0] ? pages[1] : (pages[1] ? pages[1] : "");
+
+	// Abstracts don't seem to come with
+	if (!newItem.abstractNote) {
+		var abstractNode = doc.evaluate('//div[@id="abstract"]/div[@class="para"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+		if (abstractNode) newItem.abstractNote = abstractNode.textContent;
+	}
+       newItem.complete();
+}
+
+// Implementation of ISBN and ISSN check-digit verification
+// Based on ISBN Users' Manual (http://www.isbn.org/standards/home/isbn/international/html/usm4.htm)
+// and the Wikipedia treatment of ISBN (http://en.wikipedia.org/wiki/International_Standard_Book_Number)
+// and the Wikipedia treatment of ISSN (http://en.wikipedia.org/wiki/International_Standard_Serial_Number)
+
+// This will also check ISMN validity, although it does not distinguish from their
+// neighbors in namespace, ISBN-13. It does not handle pre-2008 M-prefixed ISMNs; see
+// http://en.wikipedia.org/wiki/International_Standard_Music_Number
+
+// This does not validate multiple identifiers in one field,
+// but it will gracefully ignore all non-number detritus,
+// such as extraneous hyphens, spaces, and comments.
+
+// It currently maintains hyphens in non-initial and non-final position,
+// discarding consecutive ones beyond the first as well.
+
+// It also adds the customary hyphen to valid ISSNs.
+
+// Takes the first 8 valid digits and tries to read an ISSN,
+// takes the first 10 valid digits and tries to read an ISBN 10,
+// and takes the first 13 valid digits to try to read an ISBN 13
+// Returns an object with four attributes:
+// 	"issn" 
+// 	"isbn10"
+// 	"isbn13"
+// Each will be set to a valid identifier if found, and otherwise be a
+// boolean false.
+
+// There could conceivably be a valid ISBN-13 with an ISBN-10
+// substring; this should probably be interpreted as the latter, but it is a
+idCheck = function(isbn) {
+	// For ISBN 10, multiple by these coefficients, take the sum mod 11
+	// and subtract from 11
+	var isbn10 = [10, 9, 8, 7, 6, 5, 4, 3, 2];
+
+	// For ISBN 13, multiple by these coefficients, take the sum mod 10
+	// and subtract from 10
+	var isbn13 = [1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3];
+
+	// For ISSN, multiply by these coefficients, take the sum mod 11
+	// and subtract from 11
+	var issn = [8, 7, 6, 5, 4, 3, 2];
+
+	// We make a single pass through the provided string, interpreting the
+	// first 10 valid characters as an ISBN-10, and the first 13 as an
+	// ISBN-13. We then return an array of booleans and valid detected
+	// ISBNs.
+
+	var j = 0;
+	var sum8 = 0;
+	var num8 = "";
+	var sum10 = 0;
+	var num10 = "";
+	var sum13 = 0;
+	var num13 = "";
+	var chars = [];
+
+	for (var i=0; i < isbn.length; i++) {
+		if (isbn.charAt(i) == " ") {
+			// Since the space character evaluates as a number,
+			// it is a special case.
+		} else if (j > 0 && isbn.charAt(i) == "-" && isbn.charAt(i-1) != "-") {
+			// Preserve hyphens, except in initial and final position
+			// Also discard consecutive hyphens
+			if(j < 7) num8 += "-";
+			if(j < 10) num10 += "-";
+			if(j < 13) num13 += "-";
+		} else if (j < 7 && ((isbn.charAt(i) - 0) == isbn.charAt(i))) {
+			sum8 += isbn.charAt(i) * issn[j];
+			sum10 += isbn.charAt(i) * isbn10[j];
+			sum13 += isbn.charAt(i) * isbn13[j];
+			num8 += isbn.charAt(i);
+			num10 += isbn.charAt(i);
+			num13 += isbn.charAt(i);
+			j++;
+		} else if (j == 7 &&
+			(isbn.charAt(i) == "X" || isbn.charAt(i) == "x" ||
+				((isbn.charAt(i) - 0) == isbn.charAt(i)))) {
+			// In ISSN, an X represents the check digit "10".
+			if(isbn.charAt(i) == "X" || isbn.charAt(i) == "x") {
+				var check8 = 10;
+				num8 += "X";
+			} else {
+				var check8 = isbn.charAt(i);
+				sum10 += isbn.charAt(i) * isbn10[j];
+				sum13 += isbn.charAt(i) * isbn13[j];
+				num8 += isbn.charAt(i);
+				num10 += isbn.charAt(i);
+				num13 += isbn.charAt(i);
+				j++;
+			}
+		} else if (j < 9 && ((isbn.charAt(i) - 0) == isbn.charAt(i))) {
+			sum10 += isbn.charAt(i) * isbn10[j];
+			sum13 += isbn.charAt(i) * isbn13[j];
+			num10 += isbn.charAt(i);
+			num13 += isbn.charAt(i);
+			j++;
+		} else if (j == 9 &&
+			(isbn.charAt(i) == "X" || isbn.charAt(i) == "x" ||
+				((isbn.charAt(i) - 0) == isbn.charAt(i)))) {
+			// In ISBN-10, an X represents the check digit "10".
+			if(isbn.charAt(i) == "X" || isbn.charAt(i) == "x") {
+				var check10 = 10;
+				num10 += "X";
+			} else {
+				var check10 = isbn.charAt(i);
+				sum13 += isbn.charAt(i) * isbn13[j];
+				num10 += isbn.charAt(i);
+				num13 += isbn.charAt(i);
+				j++;
+			}
+		} else if(j < 12 && ((isbn.charAt(i) - 0) == isbn.charAt(i))) {
+			sum13 += isbn.charAt(i) * isbn13[j];
+			num13 += isbn.charAt(i);
+			j++;
+		} else if (j == 12 && ((isbn.charAt(i) - 0) == isbn.charAt(i))) {
+			var check13 = isbn.charAt(i);
+			num13 += isbn.charAt(i);
+		}
+	}
+	var valid8  = ((11 - sum8 % 11) % 11) == check8;
+	var valid10 = ((11 - sum10 % 11) % 11) == check10;
+	var valid13 = (10 - sum13 % 10 == check13);
+	var matches = false;
+	
+	// Since ISSNs have a standard hyphen placement, we can add a hyphen
+	if (valid8 && (matches = num8.match(/([0-9]{4})([0-9]{3}[0-9Xx])/))) {
+		num8 = matches[1] + '-' + matches[2];
+	} 
+
+	if(!valid8) {num8 = false};
+	if(!valid10) {num10 = false};
+	if(!valid13) {num13 = false};
+	return {"isbn10" : num10, "isbn13" : num13, "issn" : num8};
+}