From 8d173666c64662a36dea1ff2d16e561b28631b09 Mon Sep 17 00:00:00 2001 From: Avram Lyon Date: Sat, 26 Mar 2011 18:18:27 +0000 Subject: [PATCH] Trans: New translator for Readability-formatted webpages, discussed at http://forums.zotero.org/discussion/17112/ --- translators/Readability.js | 80 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 translators/Readability.js diff --git a/translators/Readability.js b/translators/Readability.js new file mode 100644 index 000000000..fca4ddff7 --- /dev/null +++ b/translators/Readability.js @@ -0,0 +1,80 @@ +{ + "translatorID": "cd77f1e5-507f-4c41-a6d2-bda5fa6f8694", + "label": "Readability", + "creator": "Avram Lyon", + "target": "^https?://www\\.readability\\.com/articles", + "minVersion": "1.0", + "maxVersion": "", + "priority": 100, + "inRepository": "0", + "translatorType": 4, + "lastUpdated": "2011-03-26 17:16:55" +} + +/* + Readability Translator + Copyright (C) 2011 Avram Lyon, ajlyon@gmail.com + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +function detectWeb(doc, url){ + var n = doc.documentElement.namespaceURI; + var ns = n ? function(prefix) { + if (prefix == 'x') return n; else return null; + } : null; + var title = doc.evaluate('//h1[@id="article-entry-title"]', doc, ns, XPathResult.ANY_TYPE, null); + if (title) return "webpage"; + else return false; +} + +function doWeb(doc, url){ + var n = doc.documentElement.namespaceURI; + var ns = n ? function(prefix) { + if (prefix == 'x') return n; else return null; + } : null; + + // Since we don't know much about the site, we have to assume that + // it is a webpage + var item = new Zotero.Item("webpage"); + var title = doc.evaluate('//h1[@id="article-entry-title"]', doc, ns, XPathResult.ANY_TYPE, null); + item.title = title.iterateNext().textContent; + var rurl = doc.evaluate('//a[@id="article-url"]', doc, ns, XPathResult.ANY_TYPE, null); + rurl = rurl.iterateNext(); + item.url = rurl.href; + + // This is just the domain name, but it'll serve as the site title, + // since we don't know anything else. + item.websiteTitle = rurl.textContent; + + // It is possible that Readability sometimes has multiple authors, + // in which case this will have to be slightly amended + var author = doc.evaluate('//span[@id="article-author"]/span[@class="fn"]', doc, ns, XPathResult.ANY_TYPE, null).iterateNext(); + if (author) { + var auts = author.textContent.split(" and "); + for (var i in auts) { + item.creators.push(Zotero.Utilities.cleanAuthor(auts[i],"author")); + } + } + + // There is also a standardized timestamp, but we're ignoring that + // in favor of the nice-looking time. + var time = doc.evaluate('//time[@id="article-timestamp"]', doc, ns, XPathResult.ANY_TYPE, null).iterateNext(); + if(time) item.date = time.textContent; + + // We snapshot the page, using the existing document + // TODO Eliminate the itemType:"attachment" when Z 2.1.2 is released + item.attachments = [{itemType:"attachment", document:doc, title:"Readability Snapshot"}] + item.complete(); +}