zotero/translators/World History Connected.js

125 lines
3.8 KiB
JavaScript

{
"translatorID":"0507797c-9bc4-4374-92ca-9e3763b6922b",
"translatorType":4,
"label":"World History Connected",
"creator":"Frederick Gibbs",
"target":"worldhistoryconnected\\.press|historycooperative.*/whc/",
"minVersion":"1.0.0b4.r5",
"maxVersion":"",
"priority":100,
"inRepository":true,
"lastUpdated":"2008-05-06 08:15:00"
}
function associateMeta(newItem, metaTags, field, zoteroField) {
var field = metaTags.namedItem(field);
if(field) {
newItem[zoteroField] = field.getAttribute("content");
}
}
function scrape(doc) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
var newItem = new Zotero.Item("journalArticle");
newItem.url = doc.location.href;
var titlePath;
var bookTitle;
var month, year;
var metaTags = doc.getElementsByTagName("meta");
associateMeta(newItem, metaTags, "Journal", "publicationTitle");
associateMeta(newItem, metaTags, "Volume", "volume");
associateMeta(newItem, metaTags, "Issue", "issue");
// in the case of book reviews, the title field is blank
//but quotes are not escaped properly, so if an article title begins with quotes, then the title tag looks blank even though it is not.
//(though semantically it is)
//they use the meta tag 'FileType' to indicate Aritlce or Book Review. silly, but we can use it.
if (metaTags.namedItem('FileType').getAttribute("content") == 'Book Review') {
//for a book review, title of reviewed book is
titlePath = '/html/body/table[4]/tbody/tr[3]/td[1]/i';
newItem.title = "Review of " + doc.evaluate(titlePath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
} else {
//it would be nice to grab the title from the meta tags, but quotations are properly escaped and the tags are therefore malformed.
titlePath = '/html/body/table[4]/tbody/tr[2]/td[1]/h2/font/b';
newItem.title = Zotero.Utilities.superCleanString(doc.evaluate(titlePath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent);
}
var author = metaTags.namedItem("Author");
if(author) {
var authors = author.getAttribute("content").split(" and ");
for(j in authors) {
authors[j] = authors[j].replace("Reviewed by ", "");
newItem.creators.push(Zotero.Utilities.cleanAuthor(authors[j], "author"));
}
}
var month = metaTags.namedItem("PublicationMonth");
var year = metaTags.namedItem("PublicationYear");
if(month && year) {
newItem.date = month.getAttribute("content")+" "+year.getAttribute("content");
}
newItem.attachments.push({document:doc, title:"World History Connected Snapshot"});
newItem.complete();
}
function doWeb(doc, url) {
var searchLinks;
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
if(doc.title.indexOf("Contents") != -1 || doc.title.indexOf("Search results") != -1) {
if(doc.title.indexOf("Contents") != -1) {
searchLinks = doc.evaluate('//tbody/tr[2]/td[1]/table/tbody/tr/td/a', doc, nsResolver, XPathResult.ANY_TYPE, null);
}
else if ( doc.title.indexOf("Search results") != -1) {
searchLinks = doc.evaluate('/html/body/dl/dt/strong/a', doc, nsResolver, XPathResult.ANY_TYPE, null);
}
var link;
var title;
var items = new Object();
while (elmt = searchLinks.iterateNext()) {
Zotero.debug(elmt.href);
title = Zotero.Utilities.superCleanString(elmt.textContent);
link = elmt.href;
if (title && link){
items[link] = title;
}
}
items = Zotero.selectItems(items);
if(!items) {
return true;
}
var uris = new Array();
for(var i in items) {
uris.push(i);
}
Zotero.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
function() { Zotero.done(); }, null);
Zotero.wait();
} else {
scrape(doc);
}
}