diff --git a/scrapers.sql b/scrapers.sql
index 759f73e5c..8ba5aaeeb 100644
--- a/scrapers.sql
+++ b/scrapers.sql
@@ -703,17 +703,16 @@ function scrape(doc) {
}
}
- newItem.attachments.push({document:doc, title:"History Cooperative Full Text",
- downloadable:true});
-
- newItem.complete();
-
- // don''t actually need date info for a journal article
var month = metaTags.namedItem("PublicationMonth");
var year = metaTags.namedItem("PublicationYear");
if(month && year) {
newItem.date = month.getAttribute("content")+" "+year.getAttribute("content");
}
+
+ newItem.attachments.push({document:doc, title:"History Cooperative Full Text",
+ downloadable:true});
+
+ newItem.complete();
}
function doWeb(doc, url) {
@@ -3344,6 +3343,161 @@ function doWeb(doc, url) {
Scholar.wait();
}');
+REPLACE INTO "translators" VALUES ('ce7a3727-d184-407f-ac12-52837f3361ff', '2006-08-26 14:21:00', 4, 'New York Times', 'Simon Kornblith', '^(?:http://query.nytimes.com/search/query|http://www.nytimes.com/.+)',
+'function getList(urls, each, done) {
+ var url = urls.shift();
+ Scholar.Utilities.HTTP.doGet(url, function(text) {
+ if(each) {
+ each(text, url);
+ }
+
+ if(urls.length) {
+ getList(urls, each, done);
+ } else if(done) {
+ done(text);
+ }
+ });
+}
+
+function detectWeb(doc, url) {
+ if(doc.title.substr(0, 30) == "The New York Times: Search for") {
+ var namespace = doc.documentElement.namespaceURI;
+ var nsResolver = namespace ? function(prefix) {
+ if (prefix == ''x'') return namespace; else return null;
+ } : null;
+
+ var result = doc.evaluate(''//div[@id="srchContent"]'', doc, nsResolver,
+ XPathResult.ANY_TYPE, null).iterateNext();
+ if(result) {
+ return "multiple";
+ }
+ } else {
+ var metaTags = doc.getElementsByTagName("meta");
+ if(metaTags.namedItem("hdl") && metaTags.namedItem("byl")) {
+ return "newspaperArticle";
+ }
+ }
+}',
+'function associateMeta(newItem, metaTags, field, scholarField) {
+ if(metaTags[field]) {
+ newItem[scholarField] = metaTags[field];
+ }
+}
+
+function scrape(doc, url) {
+ var newItem = new Scholar.Item("newspaperArticle");
+ newItem.publicationTitle = "The New York Times";
+ newItem.ISSN = "0362-4331";
+
+ var metaTags = new Object();
+ if(url != undefined) {
+ newItem.url = url;
+ var metaTagRe = /]*>/gi;
+ var nameRe = /name="([^"]+)"/i;
+ var contentRe = /content="([^"]+)"/i;
+ var m = doc.match(metaTagRe);
+
+ if(!m) {
+ return;
+ }
+
+ for(var i=0; i