zotero/translators/TV by the Numbers.js

139 lines
4.6 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"translatorID":"180a62bf-efdd-4d38-8d85-8971af04dd85",
"label":"TV by the Numbers",
"creator":"odie5533",
"target":"^http://tvbythenumbers\\.com",
"minVersion":"1.0",
"maxVersion":"",
"priority":100,
"inRepository":"0",
"translatorType":4,
"lastUpdated":"2010-08-04 03:31:19"
}
/*
TV by the Numbers - translator for Zotero
Copyright (C) 2010 odie5533
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
This translator supports saving a snapshot of a single post and saving
the citation of many posts at once without visiting each post. Thus, it does
not save a snapshot when multiple citations are to be saved.
*/
PUB_TITLE = "TV by the Numbers";
XPATH_TITLE = "//title";
XPATH_PAGES = null;
XPATH_DATE = "substring-after(substring-before(string(//p[@class='posted_on']),' by '), 'on ')";
RE_DATE = /(.*)/;
XPATH_AUTHORS = "substring-after(string(//p[@class='posted_on']),' by ')";
RE_AUTHORS = /(.*)/;
function detectWeb(doc, url) {
/* site has lots of garbage, check we're on the right doc */
if (!xpath_string(doc, doc, XPATH_TITLE))
return;
var posts = doc.evaluate("count(//div[@class='post-alt blog'])", doc, null,
XPathResult.NUMBER_TYPE, null).numberValue;
if (posts == 1)
return "webpage";
else if (posts > 1)
return "multiple";
}
function xpath_string(doc, node, xpath) {
var res = doc.evaluate(xpath, node, null, XPathResult.STRING_TYPE, null);
if (!res || !res.stringValue)
return null;
return Zotero.Utilities.trim(res.stringValue);
}
function xpre(doc, node, xpath, reg) {
var xpmatch = xpath_string(doc, node, xpath);
return reg.exec(xpmatch)[1];
}
function scrape(doc, url) {
var items = new Array();
var posts = doc.evaluate("//div[@class='post-alt blog']", doc, null,
XPathResult.ANY_TYPE, null);
var post_count = 0;
while (post = posts.iterateNext()) {
var newItem = new Zotero.Item("webpage");
newItem.publicationTitle = PUB_TITLE;
var link = post.getElementsByTagName("a")[0];
newItem.url = link.href;
var title = Zotero.Utilities.unescapeHTML(
Zotero.Utilities.cleanTags(link.textContent));
title = title.replace(/(\s+)(?:|)|(?:|)(\s+)/g, "$1''$2").replace(/|/g, "'");
newItem.title = title;
if (XPATH_DATE)
newItem.date = xpre(doc, post, XPATH_DATE, RE_DATE);
if (XPATH_PAGES)
newItem.pages = xpath_string(doc, post, XPATH_PAGES);
//authors
var author_text = xpre(doc, post, XPATH_AUTHORS, RE_AUTHORS);
var authors = [];
if (author_text) {
if (author_text.indexOf(" and ") != -1)
authors = author_text.split(" and ");
else if (author_text.indexOf(";") != -1)
authors = author_text.split(";");
else
authors.push(author_text);
}
for each(var a in authors)
if (a != 'null')
newItem.creators.push(
Zotero.Utilities.cleanAuthor(a, "author"));
// attach html
if (url == newItem.url)
newItem.attachments.push({title:PUB_TITLE+" Snapshot",
mimeType:"text/html", url:doc.location.href, snapshot:true});
newItem.toString = function() { return this.title; };
items[newItem.url] = newItem;
post_count++;
}
/* a stupidly complex way of calling selectItems, and then completing
the items which were selected */
if (post_count > 1) {
var sel_items = new Object();
for each(var i in items)
sel_items[i.url] = i.title;
sel_items = Zotero.selectItems(sel_items);
for (var i in sel_items)
items[i].complete();
} else if (post_count == 1)
for each(var i in items)
i.complete();
}
function doWeb(doc, url) {
scrape(doc, url);
}