zotero/translators/Library Catalog (InnoPAC).js

228 lines
7.6 KiB
JavaScript

{
"translatorID":"4fd6b89b-2316-2dc4-fd87-61a97dd941e8",
"translatorType":4,
"label":"Library Catalog (InnoPAC)",
"creator":"Simon Kornblith and Michael Berkowitz",
"target":"(search~|\\/search\\?|(a|X|t|Y|w)\\?|\\?(searchtype|searchscope)|frameset&FF|record=b[0-9]+(~S[0-9])?|/search/q\\?)",
"minVersion":"1.0.0b3.r1",
"maxVersion":"",
"priority":200,
"inRepository":true,
"lastUpdated":"2010-09-10 13:40:50"
}
function detectWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
//***********
// URL MATCHING - translator should detect the following urls...
// First page results
// http://bearcat.baylor.edu/search~S7/?searchtype=t&searcharg=test&searchscope=7&sortdropdown=-&SORT=D&extended=0&SUBMIT=Search&searchlimits=&searchorigarg=tone+hundred+years+of+solitude
// http://innopac.cooley.edu/search~S0/?searchtype=X&searcharg=test&SORT=DZ&extended=0&SUBMIT=Search&searchlimits=&searchorigarg=Xtest
// TODO: get it working for this: http://opac.library.usyd.edu.au/search
// n page results
// http://bearcat.baylor.edu/search~S7?/ttest/ttest/1837%2C1838%2C2040%2CB/browse/indexsort=-
// http://innopac.cooley.edu/search~S0?/Xtest&SORT=DZ/Xtest&SORT=DZ&SUBKEY=test/1%2C960%2C960%2CB/browse
// Individual item from search
// http://bearcat.baylor.edu/search~S7?/ttest/ttest/1837%2C1838%2C2040%2CB/frameset&FF=ttestteori+english&1%2C1%2C/indexsort=-
// http://innopac.cooley.edu/search~S0?/Xtest&SORT=DZ/Xtest&SORT=DZ&SUBKEY=test/1%2C960%2C960%2CB/frameset&FF=Xtest&SORT=DZ&1%2C1%2C
// Persistent URL for item
// http://bearcat.baylor.edu/record=b1540169~S7
// http://innopac.cooley.edu/record=b507916~S0
// Specific search parameters
// http://library.cooley.edu/search/q?author=shakespeare&title=hamlet
//***********
// Central Michigan University fix
var xpath = '//div[@class="bibRecordLink"]';
var elmt = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(elmt) {
return "book";
}
// possibly disastrous edit to regular expression below
if (!url.match(/SEARCH=/) && !url.match(/searchargs?=/) && !url.match(/&FF/) && !url.match(/search~S[0-9]/) && !url.match(/\/search\/q\?/)) return false;
// First, check to see if the URL alone reveals InnoPAC, since some sites don't reveal the MARC button
var matchRegexp = new RegExp('^https?://[^/]+/search[^/]*\\??/[^/]+/[^/]+/[^/]+\%2C[^/]+/frameset(.+)$');
if(matchRegexp.test(doc.location.href)) {
if (!url.match("SEARCH") && !url.match("searchtype")) {
return "book";
}
}
// Next, look for the MARC button
xpath = '//a[img[@src="/screens/marcdisp.gif" or starts-with(@alt, "MARC ") or @src="/screens/regdisp.gif" or @alt="REGULAR RECORD DISPLAY"]]';
elmt = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(elmt) {
return "book";
}
// Also, check for links to an item display page
var tags = doc.getElementsByTagName("a");
for(var i=0; i<tags.length; i++) {
if(matchRegexp.test(tags[i].href) || tags[i].href.match(/^https?:\/\/([^/]+\/(?:search\??\/|record=?|search%7e\/)|frameset&FF=)/)) {
return "multiple";
}
}
return false;
}
function scrape(marc, newDoc) {
var namespace = newDoc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
var xpath = '//pre/text()';
if (newDoc.evaluate(xpath, newDoc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
var elmts = newDoc.evaluate(xpath, newDoc, null, XPathResult.ANY_TYPE, null);
var useNodeValue = true;
} else {
var elmts = newDoc.evaluate('//pre', newDoc, nsResolver, XPathResult.ANY_TYPE, null);
var useNodeValue = false;
}
var elmt;
while(elmt = elmts.iterateNext()) {
if (useNodeValue) {
var text = elmt.nodeValue;
} else {
var text = elmt.textContent;
}
var newItem = new Zotero.Item();
var record = new marc.record();
var linee = text.split("\n");
for (var i=0; i<linee.length; i++) {
if(!linee[i]) {
continue;
}
linee[i] = linee[i].replace(/[\xA0_\t]/g, " ");
var value = linee[i].substr(7);
if(linee[i].substr(0, 6) == " ") {
// add this onto previous value
tagValue += value;
} else {
if(linee[i].substr(0, 6) == "LEADER") {
// trap leader
record.leader = value;
} else {
if(tagValue) { // finish last tag
tagValue = tagValue.replace(/\|(.)/g, marc.subfieldDelimiter+"$1");
if(tagValue[0] != marc.subfieldDelimiter) {
tagValue = marc.subfieldDelimiter+"a"+tagValue;
}
// add previous tag
record.addField(tag, ind, tagValue);
}
var tag = linee[i].substr(0, 3);
var ind = linee[i].substr(4, 2);
var tagValue = value;
}
}
}
if(tagValue) {
tagValue = tagValue.replace(/\|(.)/g, marc.subfieldDelimiter+"$1");
if(tagValue[0] != marc.subfieldDelimiter) {
tagValue = marc.subfieldDelimiter+"a"+tagValue;
}
// add previous tag
record.addField(tag, ind, tagValue);
}
record.translate(newItem);
var domain = newDoc.location.href.match(/https?:\/\/([^/]+)/);
newItem.repository = domain[1]+" Library Catalog";
newItem.complete();
}
}
function pageByPage(marc, urls) {
Zotero.Utilities.processDocuments(urls, function(newDoc) {
scrape(marc, newDoc);
}, function() { Zotero.done() });
}
function doWeb(doc, url) {
var uri = doc.location.href;
var newUri;
// load translator for MARC
var translator = Zotero.loadTranslator("import");
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
var marc = translator.getTranslatorObject();
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
if (detectWeb(doc, url) == "book") {
var matchRegexp = new RegExp('^(.*)frameset(.+)$');
var m = matchRegexp.exec(uri);
if (m) {
newUri = uri.replace(/frameset/, "marc");
} else {
newUri = doc.evaluate('//a[contains(@href, "frameset")]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().href.replace(/frameset/, 'marc');
}
pageByPage(marc, [newUri]);
} else { // Search results page
// Require link to match this
var tagRegexp = new RegExp();
tagRegexp.compile('^https?://[^/]+/search\\??/[^/]+/[^/]+/[0-9]+\%2C[^/]+/frameset');
var urls = new Array();
var availableItems = new Array();
var firstURL = false;
var tableRows = doc.evaluate('//table[@class="browseScreen"]//tr[@class="browseEntry" or @class="briefCitRow" or td/input[@type="checkbox"] or td[contains(@class,"briefCitRow")]]',
doc, nsResolver, XPathResult.ANY_TYPE, null);
// Go through table rows
var i = 0;
while(tableRow = tableRows.iterateNext()) {
// get link
var links = doc.evaluate('.//span[@class="briefcitTitle"]/a', tableRow, nsResolver, XPathResult.ANY_TYPE, null);
var link = links.iterateNext();
if(!link) {
var links = doc.evaluate(".//a", tableRow, nsResolver, XPathResult.ANY_TYPE, null);
link = links.iterateNext();
}
if(link) {
if(availableItems[link.href]) {
continue;
}
// Go through links
while(link) {
if (link.textContent.match(/\w+/)) availableItems[link.href] = link.textContent;
link = links.iterateNext();
}
i++;
}
};
var items = Zotero.selectItems(availableItems);
if(!items) {
return true;
}
var newUrls = new Array();
for(var url in items) {
newUrls.push(url.replace("frameset", "marc"));
}
pageByPage(marc, newUrls);
}
Zotero.wait();
}