zotero/translators/The Globe and Mail.js

221 lines
7.5 KiB
JavaScript

{
"translatorID":"e0234bcf-bc56-4577-aa94-fe86a27f6fd6",
"translatorType":4,
"label":"The Globe and Mail",
"creator":"Adam Crymble",
"target":"http://www.theglobeandmail.com",
"minVersion":"1.0.0b4.r5",
"maxVersion":"",
"priority":100,
"inRepository":true,
"lastUpdated":"2008-08-04 07:10:00"
}
function detectWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
var metaTags = new Object();
var metaTagHTML = doc.getElementsByTagName("meta");
for (var i = 0 ; i < metaTagHTML.length ; i++) {
metaTags[metaTagHTML[i].getAttribute("name")] = Zotero.Utilities.cleanTags(metaTagHTML[i].getAttribute("content"));
}
if (doc.evaluate('//div[@id="header"]/h2/a/img', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
var printEdition1 = doc.evaluate('//div[@id="header"]/h2/a/img', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().src;
if (printEdition1.match("printedition")) {
return "newspaperArticle";
}
}
if (doc.evaluate('//p[@id="continueReading"]/strong', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
var payPerView = doc.evaluate('//p[@id="continueReading"]/strong', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
if (payPerView == "purchase this article") {
return "newspaperArticle";
}
}
if (metaTags["article_id"]) {
return "newspaperArticle";
} else if (doc.title.match('globeandmail.com: Search')) {
return "multiple";
}
if (doc.evaluate('//ul[@id="utility"]/li[@class="email"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
var blogCheck = doc.evaluate('//ul[@id="utility"]/li[@class="email"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null);
var blogCheck1 = blogCheck.iterateNext().textContent;
if (blogCheck1.match("blog")) {
if (doc.location.href.match("story")) {
return "blogPost";
}
}
}
}
//Translator for the Globe and Mail newspaper: code by Adam Crymble
function associateMeta (newItem, metaTags, field, zoteroField) {
if (metaTags[field]) {
newItem[zoteroField] = metaTags[field];
}
}
function scrape(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
if (detectWeb(doc, url) == "blogPost") {
var newItem = new Zotero.Item("blogPost");
var title = doc.evaluate('//div[@id="headline"]/h2', doc, nsResolver, XPathResult.ANY_TYPE, null);
newItem.title = title.iterateNext().textContent;
var blogger = doc.evaluate('//div[@id="author"]/p', doc, nsResolver, XPathResult.ANY_TYPE, null);
var bloggerName = blogger.iterateNext().textContent.replace(/^\s*|\s*$/g, '');
var bloggerName1 = bloggerName.split(",");
newItem.creators.push(Zotero.Utilities.cleanAuthor(bloggerName1[0], "author"));
}
var noMetaType = 0;
if (detectWeb(doc, url) == "newspaperArticle") {
var newItem = new Zotero.Item("newspaperArticle");
//checks if the article is from the "Print Edition" which doesn't contain meta data.
if (doc.evaluate('//div[@id="header"]/h2/a/img', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
var printEdition1 = doc.evaluate('//div[@id="header"]/h2/a/img', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().src;
if (printEdition1.match("printedition")) {
noMetaType = 1;
if (doc.evaluate('//div[@id="author"]/p[@class="article-date"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
newItem.date= doc.evaluate('//div[@id="author"]/p[@class="article-date"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
}
}
}
//checks if the article is a Pay per view article.
if (doc.evaluate('//p[@id="continueReading"]/strong', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
var payPerView = doc.evaluate('//p[@id="continueReading"]/strong', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
if (payPerView == "purchase this article") {
noMetaType = 1;
}
}
//format for the "Print Edition" and "Pay per view" articles
if (noMetaType = 1) {
noMetaType = 1;
if (doc.evaluate('//div[@id="headline"]/h2', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
newItem.title = doc.evaluate('//div[@id="headline"]/h2', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
}
if (doc.evaluate('//div[@id="author"]/p[@class="byline"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
var author = doc.evaluate('//div[@id="author"]/p[@class="byline"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
}
noMetaType = 0;
}
var metaTags = new Object();
var tagsContent = new Array();
//get data
var metaTagHTML = doc.getElementsByTagName("meta");
for (var i = 0 ; i < metaTagHTML.length ; i++) {
metaTags[metaTagHTML[i].getAttribute("name")] = Zotero.Utilities.cleanTags(metaTagHTML[i].getAttribute("content"));
}
//author
if (metaTags["byline"]) {
var author = metaTags["byline"];
}
//date
if (metaTags["pubdate"]) {
var month = metaTags["pubdate"].substr(4, 2);
var day = metaTags["pubdate"].substr(6, 2);
var year = metaTags["pubdate"].substr(0, 4);
newItem.date = (year + "-" + month + "-"+ day);
}
//tags
if (metaTags["article_keywords"]) {
tagsContent = metaTags["article_keywords"].split("; ");
}
for (var i = 0; i < tagsContent.length; i++) {
if (tagsContent[i] != (" ") && tagsContent[i] != ("")) {
newItem.tags[i] = tagsContent[i];
}
}
associateMeta (newItem, metaTags, "headline", "title");
associateMeta (newItem, metaTags, "summary", "abstractNote");
associateMeta (newItem, metaTags, "desk", "section");
associateMeta (newItem, metaTags, "article_id", "callNumber");
associateMeta (newItem, metaTags, "credit", "rights");
//rest of author (shared between both newspaperArticle types)
if (author) {
if (author.substr(0,3).toLowerCase() == "by ") {
author= author.substr(3);
}
var authors = author.toLowerCase().split(" and ");
for each(var author in authors) {
var words = author.split(" ");
for (var i in words) {
if (words[i] != "") {
words[i] = words[i][0].toUpperCase() + words[i].substr(1).toLowerCase();
}
}
author = words.join(" ");
newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "author"));
}
}
}
newItem.url = doc.location.href;
newItem.complete();
}
function doWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
var articles = new Array();
if (detectWeb(doc, url) == "multiple") {
var items = new Object();
var next_title = new Array();
var titlesCount = doc.evaluate('count (//h3[@class="storyLink"]/a)', doc, nsResolver, XPathResult.ANY_TYPE, null);
var titles = doc.evaluate('//h3[@class="storyLink"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null);
for (i=0; i < titlesCount.numberValue; i++) {
next_title = titles.iterateNext();
if (next_title.href.match("story")) {
items[next_title.href] = next_title.textContent;
}
}
items = Zotero.selectItems(items);
for (var i in items) {
articles.push(i);
}
} else {
articles = [url];
}
Zotero.Utilities.processDocuments(articles, scrape, function() {Zotero.done();});
Zotero.wait();
}