diff --git a/scrapers.sql b/scrapers.sql index 03b2240b8..bf75fa101 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -22,7 +22,7 @@ -- Set the following timestamp to the most recent scraper update date -REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-05-08 18:30:00')); +REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-05-08 20:30:00')); REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2008-03-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats and Michael Berkowitz', '^https?://(?:www\.)?amazon', 'function detectWeb(doc, url) { @@ -1089,6 +1089,63 @@ REPLACE INTO translators VALUES ('88915634-1af6-c134-0171-56fd198235ed', '1.0.0b Zotero.wait(); }'); +REPLACE INTO translators VALUES ('e8d40f4b-c4c9-41ca-a59f-cf4deb3d3dc5', '1.0.0b4.r5', '', '2008-05-08 20:30:00', '0', '100', '4', 'Business Standard', 'Prashant Iyengar and Michael Berkowitz', 'http://www.business-standard.com', +'function detectWeb(doc, url) { + if (url.match(/googlesearch/)) { + return "multiple"; + } else if (url.match(/common/)) { + return "newspaperArticle"; + } +}', +'function doWeb(doc, url) { + var arts = new Array(); + if (detectWeb(doc, url) == "multiple") { + var links = doc.evaluate(''//a[@class="NewsHead"]'', doc, null, XPathResult.ANY_TYPE, null); + var link; + var items = new Object(); + while (link = links.iterateNext()) { + items[link.href] = Zotero.Utilities.cleanTags(link.textContent); + } + items = Zotero.selectItems(items); + for (var i in items) { + arts.push(i); + } + } else { + arts = [url]; + } + Zotero.debug(arts); + Zotero.Utilities.processDocuments(arts, function(doc) { + var newItem = new Zotero.Item("newspaperArticle"); + newItem.publicationTitle = "The Business Standard"; + newItem.url = doc.location.href; + newItem.websiteTitle="The Business Standard"; + newItem.edition="Online"; + newItem.title = Zotero.Utilities.cleanTags(doc.title); + + + if (doc.evaluate(''//td[@class="author"]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { + var bits = doc.evaluate(''//td[@class="author"]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent.split(/\s+\/\s+/); + newItem.creators.push(Zotero.Utilities.cleanAuthor(bits[0], "author")); + extras = Zotero.Utilities.trimInternal(bits[1]).match(/^(.*)(\s\w+\s+\d+,\s*\d+)$/); + newItem.place = extras[1]; + newItem.date = Zotero.Utilities.trimInternal(extras[2]); + newItem.complete(); + } else if (doc.evaluate(''//td[@class="NewsSummary"]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { + var author = Zotero.Utilities.trimInternal(doc.evaluate(''//td[@class="NewsSummary"]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent); + newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "author")); + var printurl = ''http://www.business-standard.com/general/printpage.php?autono='' + newItem.url.match(/autono=(\d+)/)[1]; + Zotero.debug(printurl); + Zotero.Utilities.HTTP.doGet(printurl, function(text) { + var date = text.match(/