diff --git a/translators/TV by the Numbers.js b/translators/TV by the Numbers.js new file mode 100644 index 000000000..43f91d6fc --- /dev/null +++ b/translators/TV by the Numbers.js @@ -0,0 +1,138 @@ +{ + "translatorID":"180a62bf-efdd-4d38-8d85-8971af04dd85", + "label":"TV by the Numbers", + "creator":"odie5533", + "target":"^http://tvbythenumbers\\.com", + "minVersion":"1.0", + "maxVersion":"", + "priority":100, + "inRepository":"0", + "translatorType":4, + "lastUpdated":"2010-08-04 03:31:19" +} + +/* + TV by the Numbers - translator for Zotero + Copyright (C) 2010 odie5533 + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/* + This translator supports saving a snapshot of a single post and saving + the citation of many posts at once without visiting each post. Thus, it does + not save a snapshot when multiple citations are to be saved. +*/ + + +PUB_TITLE = "TV by the Numbers"; +XPATH_TITLE = "//title"; +XPATH_PAGES = null; +XPATH_DATE = "substring-after(substring-before(string(//p[@class='posted_on']),' by '), 'on ')"; +RE_DATE = /(.*)/; +XPATH_AUTHORS = "substring-after(string(//p[@class='posted_on']),' by ')"; +RE_AUTHORS = /(.*)/; + +function detectWeb(doc, url) { + /* site has lots of garbage, check we're on the right doc */ + if (!xpath_string(doc, doc, XPATH_TITLE)) + return; + var posts = doc.evaluate("count(//div[@class='post-alt blog'])", doc, null, + XPathResult.NUMBER_TYPE, null).numberValue; + if (posts == 1) + return "webpage"; + else if (posts > 1) + return "multiple"; +} + +function xpath_string(doc, node, xpath) { + var res = doc.evaluate(xpath, node, null, XPathResult.STRING_TYPE, null); + if (!res || !res.stringValue) + return null; + return Zotero.Utilities.trim(res.stringValue); +} + +function xpre(doc, node, xpath, reg) { + var xpmatch = xpath_string(doc, node, xpath); + return reg.exec(xpmatch)[1]; +} + +function scrape(doc, url) { + var items = new Array(); + var posts = doc.evaluate("//div[@class='post-alt blog']", doc, null, + XPathResult.ANY_TYPE, null); + + var post_count = 0; + + while (post = posts.iterateNext()) { + var newItem = new Zotero.Item("webpage"); + newItem.publicationTitle = PUB_TITLE; + + var link = post.getElementsByTagName("a")[0]; + newItem.url = link.href; + + var title = Zotero.Utilities.unescapeHTML( + Zotero.Utilities.cleanTags(link.textContent)); + title = title.replace(/(\s+)(?:‘|’)|(?:‘|’)(\s+)/g, "$1''$2").replace(/‘|’/g, "'"); + newItem.title = title; + + if (XPATH_DATE) + newItem.date = xpre(doc, post, XPATH_DATE, RE_DATE); + if (XPATH_PAGES) + newItem.pages = xpath_string(doc, post, XPATH_PAGES); + + //authors + var author_text = xpre(doc, post, XPATH_AUTHORS, RE_AUTHORS); + var authors = []; + if (author_text) { + if (author_text.indexOf(" and ") != -1) + authors = author_text.split(" and "); + else if (author_text.indexOf(";") != -1) + authors = author_text.split(";"); + else + authors.push(author_text); + } + for each(var a in authors) + if (a != 'null') + newItem.creators.push( + Zotero.Utilities.cleanAuthor(a, "author")); + + // attach html + if (url == newItem.url) + newItem.attachments.push({title:PUB_TITLE+" Snapshot", + mimeType:"text/html", url:doc.location.href, snapshot:true}); + + newItem.toString = function() { return this.title; }; + items[newItem.url] = newItem; + post_count++; + } + + /* a stupidly complex way of calling selectItems, and then completing + the items which were selected */ + if (post_count > 1) { + var sel_items = new Object(); + for each(var i in items) + sel_items[i.url] = i.title; + sel_items = Zotero.selectItems(sel_items); + + for (var i in sel_items) + items[i].complete(); + } else if (post_count == 1) + for each(var i in items) + i.complete(); +} + +function doWeb(doc, url) { + scrape(doc, url); +}