diff --git a/scrapers.sql b/scrapers.sql index 9defc5261..864a038ab 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -1,4 +1,4 @@ --- 146 +-- 147 -- ***** BEGIN LICENSE BLOCK ***** -- @@ -22,7 +22,7 @@ -- Set the following timestamp to the most recent scraper update date -REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-12-18 06:00:45')); +REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-12-18 15:24:03')); REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b3.r1', '', '2006-12-15 03:40:00', 1, 100, 4, 'Amazon.com', 'Sean Takats', '^https?://(?:www\.)?amazon', 'function detectWeb(doc, url) { @@ -5722,6 +5722,167 @@ function doWeb(doc, url) { Zotero.wait(); }'); +REPLACE INTO translators VALUES ('84564450-d633-4de2-bbcc-451ea580f0d6', '1.0.0b3.r1', '', '2006-12-18 15:24:03', '1', '100', '4', 'Gale Literature Resource Center', 'Simon Kornblith', '^https?://[^/]+/servlet/LitRC?(?:|.*&)srchtp=(?:adv)?mla(?:&|$)', +'function detectWeb(doc, url) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == ''x'') return namespace; else return null; + } : null; + + if(doc.title.length <= 33 || doc.title.substr(0, 33) != "Literature Resource Center -- MLA") return false; + + if(url.indexOf("docNum=") != -1) { // article; + return "journalArticle"; + } else if(doc.evaluate(''//tr[td/span[@class="stndxtralead"]]'', doc, nsResolver, + XPathResult.ANY_TYPE, null).iterateNext()) { + return "multiple"; + } + + return false; +}', +'function extractCitation(type, citation) { + type = Zotero.Utilities.cleanString(type).toLowerCase(); + citation = Zotero.Utilities.cleanString(citation); + + if(type == "book article") { + var item = new Zotero.Item("bookSection"); + } else if(type == "book" || type == "book collection") { + var item = new Zotero.Item("book"); + } else if(type == "dissertation abstract") { + var item = new Zotero.Item("thesis"); + } else { + var item = new Zotero.Item("journalArticle"); + } + + var m; + if(item.itemType == "journalArticle" || item.itemType == "thesis") { + m = citation.match(/^(.+)\. "([^"]+)" (.+), ([0-9\:]*) ?\(([^\)]+)\)(?:, (?:pp\. ([\-0-9]+)|([\-0-9A-Z]+)))?/); + if(!m) return false; + + item.publicationTitle = m[3]; + var parts = m[4].split(":"); + if(parts.length == 2) { + item.volume = parts[0]; + item.issue = parts[1]; + } else { + item.issue = m[4]; + } + item.date = m[5]; + item.pages = m[6] ? m[6] : m[7]; + } else if(item.itemType == "book") { + m = citation.match(/^(.+)\. "([^"]+)" ([^:]+): ([^,]+), ([0-9]{4})\..*?(?:([0-9]+) pp\.)/); + if(!m) return false; + + item.place = m[3]; + item.publisher = m[4]; + item.date = m[5]; + item.pages = m[6]; + } else if(item.itemType == "bookSection") { + m = citation.match(/^(.+)\. "([^"]+)" pp\. ([\-0-9]+)\. (?:((?:[^\.]*|\([^\)]+\)| [A-Z]\.)*)\.)? ([^\(\)]+). ([^:]+): ([^,]+), ([0-9]{4})/); + if(!m) return false; + + Zotero.debug(m); + + item.pages = m[3]; + var bookAuthors = m[4].split(" and "); + for each(var bookAuthor in bookAuthors) { + var n = bookAuthor.match(/^([^,]+), ([^\(]+)(?: \(([^\)]+)\)?)?$/); + if(n) { + var type = (n[3] && n[3].toLowerCase().indexOf("ed.") != -1) ? "editor" : "author"; + item.creators.push({lastName:n[1], firstName:n[2], creatorType:type}) + } + } + item.publicationTitle = m[5]; + item.place = m[6]; + item.publisher = m[7]; + item.date = m[8]; + } + + // add creators + var creators = m[1].split("; "); + for each(var creator in creators) { + item.creators.push(Zotero.Utilities.cleanAuthor(creator, "author", true)); + } + if(m[2][m[2].length-1] == ".") { + item.title = m[2].substr(0, m[2].length-1); + } else { + item.title = m[2]; + } + + return item; +} + +function doWeb(doc, url) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == ''x'') return namespace; else return null; + } : null; + + var uri = doc.location.href; + if(url.indexOf("docNum=") != -1) { // article; + var citation = doc.evaluate(''//td[b/text() = "Source Database:"] | //td[*/b/text() = "Source Database:"]'', doc, nsResolver, + XPathResult.ANY_TYPE, null).iterateNext().innerHTML; + + // ugh + var parts = citation.split(/<\/CENTER>/i); + var citation = parts[parts.length-1]; + citation = citation.replace(/]*>(?:.|[\r\n])*<\/script>/gi, ""); + citation = citation.replace(/]*>(?:.|[\r\n])*<\/a>/gi, ""); + + // big enormous hack, but it works + var span = doc.createElement("span"); + span.innerHTML = citation; + citation = span.textContent; + + var citeM = citation.match(/^\s*([^\n]+)/); + var subjectM = citation.match(/Subject Terms:\s+([^\n]+)/); + var typeM = citation.match(/Document Type:\s+([^\n]+)/); + var issnM = citation.match(/ISSN:\s+([^\n]+)/); + + var item = extractCitation(typeM[1], citeM[1]); + item.tags = subjectM[1].split("; "); + + if(issnM) item.ISSN = issnM[1]; + + item.complete(); + } else { // search results + var items = new Array(); + + var tableRows = doc.evaluate(''//tr[td/span[@class="stndxtralead"]]'', doc, nsResolver, + XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); + // Go through table rows + for(var i=0; i