zotero/translators/Mainichi Daily News.js

122 lines
3.3 KiB
JavaScript

{
"translatorID":"b56f856e-934e-4b46-bc58-d61dccc9f32f",
"translatorType":4,
"label":"Mainichi Daily News",
"creator":"Frank Bennett",
"target":"^http://(?:search\\.)*mdn\\.mainichi\\.jp/(?:$|result\\?|mdnnews/|perspectives/|features/|arts/|travel/)",
"minVersion":"2.0b7",
"maxVersion":"",
"priority":100,
"inRepository":true,
"lastUpdated":"2010-06-05 20:35:00"
}
// #################################
// #### Local utility functions ####
// #################################
var itemRe = new RegExp('.*/([0-9]{8})[a-z]{1}[0-9]{1}[a-z]{1}[0-9]{2}[a-z]{1}[0-9]{1}[a-z]{2}[0-9]{6}c\.html');
var getResolver = function (doc) {
var namespace, resolver;
namespace = doc.documentElement.namespaceURI;
if (namespace) {
resolver = function(prefix) {
if (prefix == 'x') {
return namespace;
} else {
return null;
}
};
} else {
resolver = null;
}
return resolver;
};
var cleanUp = function (str) {
var ret;
ret = str.replace("\u00a0", " ", "g").replace("\n", " ", "g");
ret = ret.replace(/^\s+/, "").replace(/\s+$/, "").replace(/\s+/g, " ");
ret = ret.replace(/\|.*/, "").replace(/<[^>]+>/g, "");;
ret = Zotero.Utilities.unescapeHTML(ret);
return ret;
}
// #########################
// ##### API functions #####
// #########################
var detectWeb = function (doc, url) {
if (itemRe.test(doc.location.href)) {
return "newspaperArticle";
} else {
return "multiple";
}
}
var doWeb = function (doc, url) {
var type, nsResolver, availableItems, xpath, found, nodes, headline, pos, myurl, m, items, title;
nsResolver = getResolver(doc);
type = detectWeb(doc, url);
if (type === "multiple") {
availableItems = {};
if (url.match(/^http:\/\/search\.mdn\.mainichi\.jp\/result\?/)){
xpath = '//div[@class="ResultTitle"]/a[contains(@href, "mdn.mainichi.jp")]';
} else {
xpath = '//h2[@class="NewsTitle"]/a[@href]|//ul[@class="Mark"]/li/a[@href]';
}
nodes = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
found = nodes.iterateNext();
while (found) {
if (!itemRe.test(found.href)) {
found = nodes.iterateNext();
continue;
}
headline = found.textContent;
headline = cleanUp(headline);
availableItems[found.href] = headline;
found = nodes.iterateNext();
}
if (availableItems.__count__) {
items = Zotero.selectItems(availableItems);
for (myurl in items) {
if (items.hasOwnProperty(myurl)) {
scrapeAndParse(myurl, availableItems[myurl]);
}
}
}
} else if (type === "newspaperArticle") {
xpath = '//h2[@class="NewsTitle"]';
nodes = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
title = nodes.iterateNext();
if (title) {
title = cleanUp(title.textContent);
scrapeAndParse(url, title);
}
}
};
// ############################
// ##### Scraper function #####
// ############################
var scrapeAndParse = function (url, title) {
var item, mytxt, m, val;
item = new Zotero.Item("newspaperArticle");
item.title = title;
item.publicationTitle = "Mainichi Daily News";
item.edition = "online edition";
item.url = url;
m = itemRe.exec(url);
if (m) {
var year = m[1].slice(0,4);
var month = m[1].slice(4,6);
var day = m[1].slice(6,8);
item.date = [year, month, day].join("-");
}
item.attachments.push({title:"Mainichi Daily News snapshot", mimeType:"text/html", url:url});
item.complete();
};