221 lines
7.5 KiB
JavaScript
221 lines
7.5 KiB
JavaScript
{
|
|
"translatorID":"e0234bcf-bc56-4577-aa94-fe86a27f6fd6",
|
|
"translatorType":4,
|
|
"label":"The Globe and Mail",
|
|
"creator":"Adam Crymble",
|
|
"target":"http://www.theglobeandmail.com",
|
|
"minVersion":"1.0.0b4.r5",
|
|
"maxVersion":"",
|
|
"priority":100,
|
|
"inRepository":true,
|
|
"lastUpdated":"2008-08-04 07:10:00"
|
|
}
|
|
|
|
function detectWeb(doc, url) {
|
|
var namespace = doc.documentElement.namespaceURI;
|
|
var nsResolver = namespace ? function(prefix) {
|
|
if (prefix == 'x') return namespace; else return null;
|
|
} : null;
|
|
|
|
var metaTags = new Object();
|
|
var metaTagHTML = doc.getElementsByTagName("meta");
|
|
for (var i = 0 ; i < metaTagHTML.length ; i++) {
|
|
metaTags[metaTagHTML[i].getAttribute("name")] = Zotero.Utilities.cleanTags(metaTagHTML[i].getAttribute("content"));
|
|
|
|
}
|
|
|
|
if (doc.evaluate('//div[@id="header"]/h2/a/img', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
|
|
var printEdition1 = doc.evaluate('//div[@id="header"]/h2/a/img', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().src;
|
|
if (printEdition1.match("printedition")) {
|
|
return "newspaperArticle";
|
|
}
|
|
}
|
|
|
|
if (doc.evaluate('//p[@id="continueReading"]/strong', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
|
|
var payPerView = doc.evaluate('//p[@id="continueReading"]/strong', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
|
if (payPerView == "purchase this article") {
|
|
return "newspaperArticle";
|
|
}
|
|
}
|
|
|
|
if (metaTags["article_id"]) {
|
|
return "newspaperArticle";
|
|
|
|
} else if (doc.title.match('globeandmail.com: Search')) {
|
|
return "multiple";
|
|
}
|
|
|
|
if (doc.evaluate('//ul[@id="utility"]/li[@class="email"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
|
|
var blogCheck = doc.evaluate('//ul[@id="utility"]/li[@class="email"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null);
|
|
var blogCheck1 = blogCheck.iterateNext().textContent;
|
|
if (blogCheck1.match("blog")) {
|
|
if (doc.location.href.match("story")) {
|
|
return "blogPost";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
//Translator for the Globe and Mail newspaper: code by Adam Crymble
|
|
|
|
function associateMeta (newItem, metaTags, field, zoteroField) {
|
|
if (metaTags[field]) {
|
|
newItem[zoteroField] = metaTags[field];
|
|
}
|
|
}
|
|
|
|
function scrape(doc, url) {
|
|
var namespace = doc.documentElement.namespaceURI;
|
|
var nsResolver = namespace ? function(prefix) {
|
|
if (prefix == 'x') return namespace; else return null;
|
|
} : null;
|
|
|
|
if (detectWeb(doc, url) == "blogPost") {
|
|
var newItem = new Zotero.Item("blogPost");
|
|
|
|
var title = doc.evaluate('//div[@id="headline"]/h2', doc, nsResolver, XPathResult.ANY_TYPE, null);
|
|
newItem.title = title.iterateNext().textContent;
|
|
|
|
var blogger = doc.evaluate('//div[@id="author"]/p', doc, nsResolver, XPathResult.ANY_TYPE, null);
|
|
var bloggerName = blogger.iterateNext().textContent.replace(/^\s*|\s*$/g, '');
|
|
var bloggerName1 = bloggerName.split(",");
|
|
newItem.creators.push(Zotero.Utilities.cleanAuthor(bloggerName1[0], "author"));
|
|
}
|
|
var noMetaType = 0;
|
|
|
|
if (detectWeb(doc, url) == "newspaperArticle") {
|
|
var newItem = new Zotero.Item("newspaperArticle");
|
|
|
|
//checks if the article is from the "Print Edition" which doesn't contain meta data.
|
|
if (doc.evaluate('//div[@id="header"]/h2/a/img', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
|
|
var printEdition1 = doc.evaluate('//div[@id="header"]/h2/a/img', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().src;
|
|
if (printEdition1.match("printedition")) {
|
|
noMetaType = 1;
|
|
if (doc.evaluate('//div[@id="author"]/p[@class="article-date"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
|
|
newItem.date= doc.evaluate('//div[@id="author"]/p[@class="article-date"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
|
}
|
|
}
|
|
}
|
|
|
|
//checks if the article is a Pay per view article.
|
|
if (doc.evaluate('//p[@id="continueReading"]/strong', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
|
|
var payPerView = doc.evaluate('//p[@id="continueReading"]/strong', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
|
if (payPerView == "purchase this article") {
|
|
noMetaType = 1;
|
|
}
|
|
}
|
|
|
|
//format for the "Print Edition" and "Pay per view" articles
|
|
if (noMetaType = 1) {
|
|
noMetaType = 1;
|
|
if (doc.evaluate('//div[@id="headline"]/h2', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
|
|
newItem.title = doc.evaluate('//div[@id="headline"]/h2', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
|
}
|
|
|
|
if (doc.evaluate('//div[@id="author"]/p[@class="byline"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
|
|
var author = doc.evaluate('//div[@id="author"]/p[@class="byline"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
|
}
|
|
noMetaType = 0;
|
|
}
|
|
|
|
var metaTags = new Object();
|
|
var tagsContent = new Array();
|
|
|
|
//get data
|
|
var metaTagHTML = doc.getElementsByTagName("meta");
|
|
for (var i = 0 ; i < metaTagHTML.length ; i++) {
|
|
metaTags[metaTagHTML[i].getAttribute("name")] = Zotero.Utilities.cleanTags(metaTagHTML[i].getAttribute("content"));
|
|
}
|
|
|
|
//author
|
|
if (metaTags["byline"]) {
|
|
var author = metaTags["byline"];
|
|
}
|
|
|
|
//date
|
|
if (metaTags["pubdate"]) {
|
|
var month = metaTags["pubdate"].substr(4, 2);
|
|
var day = metaTags["pubdate"].substr(6, 2);
|
|
var year = metaTags["pubdate"].substr(0, 4);
|
|
|
|
newItem.date = (year + "-" + month + "-"+ day);
|
|
}
|
|
|
|
//tags
|
|
if (metaTags["article_keywords"]) {
|
|
tagsContent = metaTags["article_keywords"].split("; ");
|
|
}
|
|
|
|
for (var i = 0; i < tagsContent.length; i++) {
|
|
if (tagsContent[i] != (" ") && tagsContent[i] != ("")) {
|
|
newItem.tags[i] = tagsContent[i];
|
|
}
|
|
}
|
|
|
|
associateMeta (newItem, metaTags, "headline", "title");
|
|
associateMeta (newItem, metaTags, "summary", "abstractNote");
|
|
associateMeta (newItem, metaTags, "desk", "section");
|
|
associateMeta (newItem, metaTags, "article_id", "callNumber");
|
|
associateMeta (newItem, metaTags, "credit", "rights");
|
|
|
|
//rest of author (shared between both newspaperArticle types)
|
|
if (author) {
|
|
|
|
|
|
if (author.substr(0,3).toLowerCase() == "by ") {
|
|
author= author.substr(3);
|
|
}
|
|
|
|
var authors = author.toLowerCase().split(" and ");
|
|
for each(var author in authors) {
|
|
var words = author.split(" ");
|
|
|
|
for (var i in words) {
|
|
if (words[i] != "") {
|
|
words[i] = words[i][0].toUpperCase() + words[i].substr(1).toLowerCase();
|
|
}
|
|
}
|
|
author = words.join(" ");
|
|
newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "author"));
|
|
}
|
|
}
|
|
}
|
|
|
|
newItem.url = doc.location.href;
|
|
newItem.complete();
|
|
|
|
}
|
|
|
|
function doWeb(doc, url) {
|
|
var namespace = doc.documentElement.namespaceURI;
|
|
var nsResolver = namespace ? function(prefix) {
|
|
if (prefix == 'x') return namespace; else return null;
|
|
} : null;
|
|
|
|
var articles = new Array();
|
|
|
|
if (detectWeb(doc, url) == "multiple") {
|
|
var items = new Object();
|
|
var next_title = new Array();
|
|
|
|
var titlesCount = doc.evaluate('count (//h3[@class="storyLink"]/a)', doc, nsResolver, XPathResult.ANY_TYPE, null);
|
|
var titles = doc.evaluate('//h3[@class="storyLink"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null);
|
|
|
|
for (i=0; i < titlesCount.numberValue; i++) {
|
|
next_title = titles.iterateNext();
|
|
|
|
if (next_title.href.match("story")) {
|
|
items[next_title.href] = next_title.textContent;
|
|
}
|
|
}
|
|
|
|
items = Zotero.selectItems(items);
|
|
for (var i in items) {
|
|
articles.push(i);
|
|
}
|
|
} else {
|
|
articles = [url];
|
|
}
|
|
Zotero.Utilities.processDocuments(articles, scrape, function() {Zotero.done();});
|
|
Zotero.wait();
|
|
} |