diff --git a/scrapers.sql b/scrapers.sql index 04bd63dff..d4caed2b4 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -1091,6 +1091,266 @@ REPLACE INTO translators VALUES ('88915634-1af6-c134-0171-56fd198235ed', '1.0.0b Zotero.wait(); }'); +REPLACE INTO translators VALUES ('f39dbd1c-229e-4abb-8414-a09fdbda37b7', '1.0.0b4.r5', '', '2008-06-23 14:40:53', '0', '100', '4', 'Archives Network of Alberta', 'Adam Crymble', 'http://208.38.46.62:8080/', +'function detectWeb(doc, url) { + + var xPathH3 = doc.evaluate(''//h3'', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; + + if (xPathH3.match("Search Results")) { + return "multiple"; + } else if (xPathH3.match("Display")) { + return "book"; + } +}', +'//Archives Network of Alberta translator. Code by Adam Crymble + +function associateData (newItem, dataTags, field, zoteroField) { + if (dataTags[field]) { + newItem[zoteroField] = dataTags[field]; + } +} + +function scrape(doc, url) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == ''x'') return namespace; else return null; + } : null; + + var newItem = new Zotero.Item("book"); + + var dataTags = new Object(); + var manyAuthors = new Array(); + var tagsContent = new Array(); + var tagsContent1 = new Array(); + + var xPathHeaders = doc.evaluate(''//td[1][@class="datalabel"]'', doc, nsResolver, XPathResult.ANY_TYPE, null); + var xPathContent = doc.evaluate(''//td[2][@class="datatext"]'', doc, nsResolver, XPathResult.ANY_TYPE, null); + var xPathCount = doc.evaluate(''count (//td[1][@class="datalabel"])'', doc, nsResolver, XPathResult.ANY_TYPE, null); + + for (var i = 0; i < xPathCount.numberValue; i++) { + fieldTitle = xPathHeaders.iterateNext().textContent.replace(/\s+/g, ''''); + + if (fieldTitle =="Provenance:") { + + dataTags[fieldTitle] = (xPathContent.iterateNext().textContent.replace(/^\s*|\s*$/g, '''')); + + if (dataTags[fieldTitle].match("; ")) { + manyAuthors = dataTags[fieldTitle].split("; "); + } else { + manyAuthors.push(dataTags[fieldTitle]); + } + Zotero.debug(manyAuthors); + + for (var j = 0; j < manyAuthors.length; j++) { + if (manyAuthors[j].match(", ")) { + var authorName = manyAuthors[j].split(","); + authorName[0] = authorName[0].replace(/^\s*|\s*$/g, ''''); + authorName[1] = authorName[1].replace(/^\s*|\s*$/g, ''''); + newItem.creators.push(Zotero.Utilities.cleanAuthor((authorName[1] + (" ") + authorName[0]), "author")); + } else { + newItem.creators.push({lastName: manyAuthors[j], creatorType: "creator"}); + } + } + + } else if (fieldTitle == "Partof:") { + + dataTags[fieldTitle] = ("Part of " + Zotero.Utilities.cleanTags(xPathContent.iterateNext().textContent.replace(/^\s*|\s*$/g, ''''))); + + } else if (fieldTitle == "OnlineFindingAid:") { + dataTags[fieldTitle] = ("Online Finding Aid: " + xPathContent.iterateNext().textContent); + Zotero.debug(dataTags["OnlineFindingAid:"]); + + } else if (fieldTitle == "Names:") { + dataTags[fieldTitle] = (xPathContent.iterateNext().textContent.replace(/^\s*|\s*$/g, '''')); + tagsContent = dataTags[fieldTitle].split(";"); + + } else if (fieldTitle == "Topic:") { + dataTags[fieldTitle] = (xPathContent.iterateNext().textContent.replace(/^\s*|\s*$/g, '''')); + tagsContent1 = dataTags[fieldTitle].split(", "); + + } else { + + dataTags[fieldTitle] = Zotero.Utilities.cleanTags(xPathContent.iterateNext().textContent.replace(/^\s*|\s*$/g, '''')); + } + } + + for (i = 0; i < tagsContent.length; i++) { + tagsContent[i] = tagsContent[i].replace(/^\s*|\s*$/g, ''''); + newItem.tags[i] = tagsContent[i]; + } + + for (i = 0; i < tagsContent1.length; i++) { + + newItem.tags.push(tagsContent1[i]); + } + + associateData (newItem, dataTags, "Title:", "title"); + associateData (newItem, dataTags, "Dates:", "date"); + associateData (newItem, dataTags, "Physicaldesc.:", "pages"); + associateData (newItem, dataTags, "Repository:", "repository"); + associateData (newItem, dataTags, "Scope/Content:", "abstractNote"); + associateData (newItem, dataTags, "Partof:", "series"); + associateData (newItem, dataTags, "OnlineFindingAid:", "extra"); + associateData (newItem, dataTags, "Language:", "language"); + associateData (newItem, dataTags, "RecordNo.:", "callNumber"); + + newItem.url = doc.location.href; + newItem.complete(); +} + +function doWeb(doc, url) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == ''x'') return namespace; else return null; + } : null; + + var articles = new Array(); + + if (detectWeb(doc, url) == "multiple") { + var items = new Object(); + + var xPathTitles = doc.evaluate(''//tr[1]/td[2][@class="datatext"]'', doc, nsResolver, XPathResult.ANY_TYPE, null); + var nextTitle; + + var xPathLinks = doc.evaluate(''//td[1][@class="dataleft"]/a'', doc, nsResolver, XPathResult.ANY_TYPE, null); + + + while (nextTitle = xPathTitles.iterateNext()) { + items[xPathLinks.iterateNext().href] = nextTitle.textContent; + } + + items = Zotero.selectItems(items); + for (var i in items) { + + articles.push(i); + } + } else { + articles = [url]; + } + Zotero.Utilities.processDocuments(articles, scrape, function() {Zotero.done();}); + Zotero.wait(); +}'); + +REPLACE INTO translators VALUES ('a29d22b3-c2e4-4cc0-ace4-6c2326144332', '1.0.0b4.r5', '', '2008-06-23 14:42:02', '0', '100', '4', 'CABI - CAB Abstracts', 'Adam Crymble', 'http://www.cabi.org/', +'function detectWeb(doc, url) { + if (doc.evaluate(''//div/table/tbody/tr[1]/td/a'', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { + return "multiple"; + } else if (doc.evaluate(''//span[@class="PageSubTitle"]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { + return "book"; + } +}', +'//CAB Abstracts translator. Code by Adam Crymble +//only designed for "book" entries. People, projects, sites, etc are ignored by Zotero. + +function scrape(doc, url) { + + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == ''x'') return namespace; else return null; + } : null; + + var newItem = new Zotero.Item("book"); + +//authors + if (doc.evaluate(''//td[@class="smallwebtext"]/table/tbody/tr/td[1]/span[@class="MenuBar"]'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { + var xPathAuthors = doc.evaluate(''//td[@class="smallwebtext"]/table/tbody/tr/td[1]/span[@class="MenuBar"]'', doc, nsResolver, XPathResult.ANY_TYPE, null); + var xPath1Count = doc.evaluate(''count (//td[@class="smallwebtext"]/table/tbody/tr/td[1]/span[@class="MenuBar"])'', doc, nsResolver, XPathResult.ANY_TYPE, null); + var nameTest = 0; + + for (var j = 0; j < xPath1Count.numberValue; j++) { + authors = xPathAuthors.iterateNext().textContent + if (authors.match("by ")) { + var shortenAuthor = authors.indexOf("by ")+3; + + authors = authors.substr(shortenAuthor).split("; "); + + for (var i = 0; i < authors.length; i++) { + + shortenAuthor = authors[i].indexOf(","); + authors[i] = (authors[i].substr(0, shortenAuthor)); + var givenName = (authors[i].split(/\s/)); + authors[i] = ''''; + + for (var k = 0; k < givenName.length; k++) { + if (givenName[k].length == 1) { + authors[i] = (authors[i] + givenName[k] + "."); + } else { + + authors[i] = (authors[i] + " " + givenName[k]); + } + } + newItem.creators.push(Zotero.Utilities.cleanAuthor(authors[i], "author")); + } + } + } + } + +//imprint info + var info = new Array(); + if (doc.evaluate(''//td[3]/table/tbody/tr/td[@class="MenuBar"]'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { + var xPathImprint = doc.evaluate(''//td[3]/table/tbody/tr/td[@class="MenuBar"]'', doc, nsResolver, XPathResult.ANY_TYPE, null); + var imprint = xPathImprint.iterateNext().textContent.split(/\n/); + + + for (var i = 0; i < imprint.length; i++) { + imprint[i] = imprint[i].replace(/^\s*|\s*$/g, ''''); + if (imprint[i].match(/\w/)) { + info.push(imprint[i]); + } + } + + for (var i = 0; i < info.length; i++) { + if (info[i].match("pages")) { + var cutPages = info[i].indexOf("pages"); + newItem.pages = info[i].substr(0, cutPages); + } else if (info[i].match("Date:")) { + newItem.date = info[i].substr(10); + } else if (info[i].match("ISBN: ")) { + newItem.ISBN = info[i].substr(6); + } + } + } + + newItem.title = doc.title; + newItem.url = doc.location.href; + + newItem.complete(); +} + +function doWeb(doc, url) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == ''x'') return namespace; else return null; + } : null; + + var articles = new Array(); + + if (detectWeb(doc, url) == "multiple") { + var items = new Object(); + + var titles = doc.evaluate(''//tr[1]/td/a'', doc, nsResolver, XPathResult.ANY_TYPE, null); + var mediaType = doc.evaluate(''//strong'', doc, nsResolver, XPathResult.ANY_TYPE, null); + + var dump = mediaType.iterateNext(); + + var next_title; + while (next_title = titles.iterateNext()) { + items[next_title.href] = mediaType.iterateNext().textContent; + if (items[next_title.href].match("Book")) { + items[next_title.href] = next_title.textContent; + } + } + items = Zotero.selectItems(items); + for (var i in items) { + articles.push(i); + } + } else { + articles = [url]; + } + Zotero.Utilities.processDocuments(articles, scrape, function() {Zotero.done();}); + Zotero.wait(); +}'); + REPLACE INTO translators VALUES ('a7c8b759-6f8a-4875-9d6e-cc0a99fe8f43', '1.0.0b4.r5', '', '2008-06-20 20:45:00', '0', '100', '4', 'Canadian Letters and Images', 'Adam Crymble', 'http://(www.)?canadianletters.ca/', 'function detectWeb(doc, url) { if (doc.location.href.match("results")) {