125 lines
3.8 KiB
JavaScript
125 lines
3.8 KiB
JavaScript
{
|
|
"translatorID":"0507797c-9bc4-4374-92ca-9e3763b6922b",
|
|
"translatorType":4,
|
|
"label":"World History Connected",
|
|
"creator":"Frederick Gibbs",
|
|
"target":"worldhistoryconnected\\.press|historycooperative.*/whc/",
|
|
"minVersion":"1.0.0b4.r5",
|
|
"maxVersion":"",
|
|
"priority":100,
|
|
"inRepository":true,
|
|
"lastUpdated":"2008-05-06 08:15:00"
|
|
}
|
|
|
|
function associateMeta(newItem, metaTags, field, zoteroField) {
|
|
var field = metaTags.namedItem(field);
|
|
if(field) {
|
|
newItem[zoteroField] = field.getAttribute("content");
|
|
}
|
|
}
|
|
|
|
function scrape(doc) {
|
|
|
|
var namespace = doc.documentElement.namespaceURI;
|
|
var nsResolver = namespace ? function(prefix) {
|
|
if (prefix == 'x') return namespace; else return null;
|
|
} : null;
|
|
|
|
var newItem = new Zotero.Item("journalArticle");
|
|
newItem.url = doc.location.href;
|
|
|
|
var titlePath;
|
|
var bookTitle;
|
|
var month, year;
|
|
var metaTags = doc.getElementsByTagName("meta");
|
|
|
|
associateMeta(newItem, metaTags, "Journal", "publicationTitle");
|
|
associateMeta(newItem, metaTags, "Volume", "volume");
|
|
associateMeta(newItem, metaTags, "Issue", "issue");
|
|
|
|
// in the case of book reviews, the title field is blank
|
|
//but quotes are not escaped properly, so if an article title begins with quotes, then the title tag looks blank even though it is not.
|
|
//(though semantically it is)
|
|
//they use the meta tag 'FileType' to indicate Aritlce or Book Review. silly, but we can use it.
|
|
|
|
if (metaTags.namedItem('FileType').getAttribute("content") == 'Book Review') {
|
|
//for a book review, title of reviewed book is
|
|
titlePath = '/html/body/table[4]/tbody/tr[3]/td[1]/i';
|
|
newItem.title = "Review of " + doc.evaluate(titlePath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
|
|
|
} else {
|
|
//it would be nice to grab the title from the meta tags, but quotations are properly escaped and the tags are therefore malformed.
|
|
titlePath = '/html/body/table[4]/tbody/tr[2]/td[1]/h2/font/b';
|
|
newItem.title = Zotero.Utilities.superCleanString(doc.evaluate(titlePath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent);
|
|
}
|
|
|
|
var author = metaTags.namedItem("Author");
|
|
if(author) {
|
|
var authors = author.getAttribute("content").split(" and ");
|
|
for(j in authors) {
|
|
authors[j] = authors[j].replace("Reviewed by ", "");
|
|
newItem.creators.push(Zotero.Utilities.cleanAuthor(authors[j], "author"));
|
|
}
|
|
}
|
|
|
|
var month = metaTags.namedItem("PublicationMonth");
|
|
var year = metaTags.namedItem("PublicationYear");
|
|
if(month && year) {
|
|
newItem.date = month.getAttribute("content")+" "+year.getAttribute("content");
|
|
}
|
|
|
|
newItem.attachments.push({document:doc, title:"World History Connected Snapshot"});
|
|
|
|
newItem.complete();
|
|
}
|
|
|
|
function doWeb(doc, url) {
|
|
|
|
var searchLinks;
|
|
|
|
var namespace = doc.documentElement.namespaceURI;
|
|
var nsResolver = namespace ? function(prefix) {
|
|
if (prefix == 'x') return namespace; else return null;
|
|
} : null;
|
|
|
|
if(doc.title.indexOf("Contents") != -1 || doc.title.indexOf("Search results") != -1) {
|
|
|
|
if(doc.title.indexOf("Contents") != -1) {
|
|
searchLinks = doc.evaluate('//tbody/tr[2]/td[1]/table/tbody/tr/td/a', doc, nsResolver, XPathResult.ANY_TYPE, null);
|
|
}
|
|
else if ( doc.title.indexOf("Search results") != -1) {
|
|
searchLinks = doc.evaluate('/html/body/dl/dt/strong/a', doc, nsResolver, XPathResult.ANY_TYPE, null);
|
|
}
|
|
|
|
var link;
|
|
var title;
|
|
var items = new Object();
|
|
|
|
while (elmt = searchLinks.iterateNext()) {
|
|
Zotero.debug(elmt.href);
|
|
title = Zotero.Utilities.superCleanString(elmt.textContent);
|
|
link = elmt.href;
|
|
if (title && link){
|
|
items[link] = title;
|
|
}
|
|
}
|
|
|
|
items = Zotero.selectItems(items);
|
|
|
|
if(!items) {
|
|
return true;
|
|
}
|
|
|
|
var uris = new Array();
|
|
for(var i in items) {
|
|
uris.push(i);
|
|
}
|
|
|
|
Zotero.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
|
|
function() { Zotero.done(); }, null);
|
|
|
|
Zotero.wait();
|
|
} else {
|
|
scrape(doc);
|
|
}
|
|
} |