{ "translatorID":"b56d756e-934e-4b46-bc58-d61dccc9f32f", "translatorType":4, "label":"Japan Times Online", "creator":"Frank Bennett", "target":"^http://(?:www|search)\\.japantimes\\.co\\.jp/(?:cgi-bin|gsearch|features|entertainment|sports|life|news|rss)", "minVersion":"2.0b7", "maxVersion":"", "priority":100, "inRepository":true, "lastUpdated":"2010-09-28 07:00:00" } /* Japan Times Online Translator Copyright (C) 2009-2010 Frank Bennett, biercenator@gmail.com This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ // ################################# // #### Local utility functions #### // ################################# var itemRe = new RegExp('^http://search\.japantimes\.co\.jp/(?:cgi-bin|gsearch|features|entertainment|sports|life|news|rss)/[a-z]{2}[0-9]{8}[a-z0-9]{2}\.html'); var getResolver = function (doc) { var namespace, resolver; namespace = doc.documentElement.namespaceURI; if (namespace) { resolver = function(prefix) { if (prefix == 'x') { return namespace; } else { return null; } }; } else { resolver = null; } return resolver; }; var getTagContent = function (txt, attribute, value) { var ret, m, rex; ret = false; rex = RegExp("<[^>]*" + attribute + "=\"" + value + "\"[^>]*>([^<]*)<"); m = rex.exec(txt); if (m) { ret = m[1]; } return ret; } var getTagsWithAttributeAndContent = function (txt, tag, attribute) { var ret, pos, len, lst, m, tagsrex, attribrex; ret = {}; tagsrex = RegExp("(<" + tag + "(?: [^>]*>|>)|)"); attribrex = RegExp(' ' + attribute + '="([^"]+)"'); lst = txt.split(tagsrex); if (lst.length > 1) { len = lst.length; for (pos=1; pos < len; pos += 4) { if (pos < (len - 2) && lst[pos + 2] == ("")) { m = lst[pos].match(attribrex); if (m) { if (!itemRe.exec(m[1])) { continue; } var title = lst[pos + 1]; title = title.replace(/\|.*/, "").replace(/<[^>]+>/g, "");; ret[m[1]] = Zotero.Utilities.unescapeHTML(title); } } } } return ret; } // ######################### // ##### API functions ##### // ######################### var detectWeb = function (doc, url) { if (itemRe.test(doc.location.href)) { return "newspaperArticle"; } else { return "multiple"; } } var doWeb = function (doc, url) { var type, nsResolver, availableItems, xpath, found, nodes, headline, pos, myurl, m, items; nsResolver = getResolver(doc); type = detectWeb(doc, url); if (type === "multiple") { availableItems = {}; if (url.match(/\/gsearch\//)) { // // For Google SafeSearch. Thanks, guys, it was an entertaining afternoon. // xpath = '//iframe[@name="googleSearchFrame"]'; var iframe = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); var address = iframe.src; var page = Zotero.Utilities.retrieveSource(address); availableItems = getTagsWithAttributeAndContent(page, "a", "href"); } else { xpath = '//a[contains(@href, "cgi-bin")]'; nodes = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null); found = nodes.iterateNext(); while (found) { if (!itemRe.test(found)) { found = nodes.iterateNext(); continue; } headline = found.text; // // Some headlines have a weird structure that yields two // entries, the second of which is blank. Nothing is lost // by this construct. // if (!headline.replace("\n", "")) { found = nodes.iterateNext(); continue; } headline = headline.replace("\u00a0", " ", "g").replace("\n", " ", "g"); headline = headline.replace(/^\s+/, "").replace(/\s+$/, "").replace(/\s+/g, " "); availableItems[found.href] = headline; found = nodes.iterateNext(); } } if (availableItems.__count__) { items = Zotero.selectItems(availableItems); for (myurl in items) { if (items.hasOwnProperty(myurl)) { scrapeAndParse(myurl); } } } } else if (type === "newspaperArticle") { scrapeAndParse(url); } }; // ############################ // ##### Scraper function ##### // ############################ var scrapeAndParse = function (url) { var item, mytxt, m, val; item = new Zotero.Item("newspaperArticle"); mytxt = Zotero.Utilities.retrieveSource(url); item.publicationTitle = "Japan Times Online"; item.ISSN = "0289-1956"; item.url = url; val = getTagContent(mytxt, "id", "date"); if (val) { item.date = val; } val = getTagContent(mytxt, "id", "headline"); if (val) { item.title = val; } item.attachments.push({title:"Japan Times Online snapshot", mimeType:"text/html", url:url}); item.complete(); };