diff --git a/scrapers.sql b/scrapers.sql index e7e364692..c634aac74 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -22,7 +22,7 @@ -- Set the following timestamp to the most recent scraper update date -REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-03-31 17:30:00')); +REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-03-31 18:30:00')); REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2008-03-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats and Michael Berkowitz', '^https?://(?:www\.)?amazon', 'function detectWeb(doc, url) { @@ -1579,6 +1579,78 @@ function doWeb(doc, url) { } }'); +REPLACE INTO translators VALUES ('2a5dc3ed-ee5e-4bfb-baad-36ae007e40ce', '1.0.0b4.r5', '', '2008-03-31 18:30:00', '0', '100', '4', 'Berkeley Electronic Press', 'Michael Berkowitz', 'http://www.bepress.com/', +'function detectWeb(doc, url) { + if (url.match("cgi/query.cgi")) { + return "multiple"; + } else if (url.match(/vol[\d+]\/iss[\d]+/)) { + return "journalArticle"; + } +}', +'var tagMap = { + journal_title:"publicationTitle", + title:"title", + date:"date", + volume:"volume", + issue:"issue", + abstract_html_url:"url", + doi:"DOI" +} + +function doWeb(doc, url) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == ''x'') return namespace; else return null; + } : null; + + var articles = new Array(); + if (detectWeb(doc, url) == "multiple") { + var items = new Object(); + var titles = doc.evaluate(''//table[@id="query"]/tbody/tr/td[4]/a'', doc, nsResolver, XPathResult.ANY_TYPE, null); + var next_title; + while (next_title = titles.iterateNext()) { + items[next_title.href] = next_title.textContent; + } + items = Zotero.selectItems(items); + for (var i in items) { + articles.push(i); + } + } else { + articles = [url]; + } + Zotero.debug(articles); + Zotero.Utilities.processDocuments(articles, function(newDoc) { + var metatags = new Object(); + var metas = newDoc.evaluate(''//meta[contains(@name, "bepress_citation")]'', newDoc, null, XPathResult.ANY_TYPE, null); + var next_meta; + while (next_meta = metas.iterateNext()) { + metatags[next_meta.name.replace("bepress_citation_", "")] = next_meta.content; + } + var item = new Zotero.Item("journalArticle"); + + //regularly mapped tags + for (var tag in tagMap) { + if (metatags[tag]) { + item[tagMap[tag]] = metatags[tag]; + } + } + + //authors + var authors = metatags[''authors''].split(";"); + for each (var author in authors) { + item.creators.push(Zotero.Utilities.cleanAuthor(author, "author")); + } + + //attachments + item.attachments = [ + {url:item.url, title:item.title, mimeType:"text/html"}, + {url:metatags[''pdf_url''], title:"Berkeley Electronic Press Full Text PDF", mimeType:"application/pdf"} + ]; + item.complete(); + }, function() {Zotero.done;}); + Zotero.wait(); +}'); + REPLACE INTO translators VALUES ('7cb0089b-9551-44b2-abca-eb03cbf586d9', '1.0.0b4.r5', '', '2008-03-30 08:00:00', '0', '100', '4', 'BioOne', 'Michael Berkowitz', 'http://[^/]*www.bioone.org[^/]*/', 'function detectWeb(doc, url) { if (url.indexOf("searchtype") != -1) {