zotero/translators/Library Catalog (InnoPAC).js
2011-08-11 07:01:19 +00:00

278 lines
9.2 KiB
JavaScript

{
"translatorID": "4fd6b89b-2316-2dc4-fd87-61a97dd941e8",
"label": "Library Catalog (InnoPAC)",
"creator": "Simon Kornblith and Michael Berkowitz",
"target": "(search~|\\/search\\?|(a|X|t|Y|w)\\?|\\?(searchtype|searchscope)|frameset&FF|record=b[0-9]+(~S[0-9])?|/search/q\\?)",
"minVersion": "1.0.0b3.r1",
"maxVersion": "",
"priority": 200,
"inRepository": true,
"translatorType": 4,
"lastUpdated": "2011-07-22 17:52:39"
}
function detectWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
//***********
// URL MATCHING - translator should detect the following urls...
// First page results
// http://bearcat.baylor.edu/search~S7/?searchtype=t&searcharg=test&searchscope=7&sortdropdown=-&SORT=D&extended=0&SUBMIT=Search&searchlimits=&searchorigarg=tone+hundred+years+of+solitude
// http://innopac.cooley.edu/search~S0/?searchtype=X&searcharg=test&SORT=DZ&extended=0&SUBMIT=Search&searchlimits=&searchorigarg=Xtest
// TODO: get it working for this: http://opac.library.usyd.edu.au/search
// n page results
// http://bearcat.baylor.edu/search~S7?/ttest/ttest/1837%2C1838%2C2040%2CB/browse/indexsort=-
// http://innopac.cooley.edu/search~S0?/Xtest&SORT=DZ/Xtest&SORT=DZ&SUBKEY=test/1%2C960%2C960%2CB/browse
// Individual item from search
// http://bearcat.baylor.edu/search~S7?/ttest/ttest/1837%2C1838%2C2040%2CB/frameset&FF=ttestteori+english&1%2C1%2C/indexsort=-
// http://innopac.cooley.edu/search~S0?/Xtest&SORT=DZ/Xtest&SORT=DZ&SUBKEY=test/1%2C960%2C960%2CB/frameset&FF=Xtest&SORT=DZ&1%2C1%2C
// Persistent URL for item
// http://bearcat.baylor.edu/record=b1540169~S7
// http://innopac.cooley.edu/record=b507916~S0
// Persistent URL for item, without suffix
// http://luna.wellesley.edu/record=b2398784
// Specific search parameters
// http://library.cooley.edu/search/q?author=shakespeare&title=hamlet
//***********
// Central Michigan University fix
var xpath = '//div[@class="bibRecordLink"]';
var elmt = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(elmt) {
return "book";
}
// Regular expression to reduce false positives
if (!url.match(/SEARCH=/) && !url.match(/searchargs?=/) && !url.match(/&FF/) && !url.match(/search~S[0-9]/) && !url.match(/\/search\/q\?/) && !url.match(/record=/)) return false;
// First, check to see if the URL alone reveals InnoPAC, since some sites don't reveal the MARC button
var matchRegexp = new RegExp('^https?://[^/]+/search[^/]*\\??/[^/]+/[^/]+/[^/]+\%2C[^/]+/frameset(.+)$');
if(matchRegexp.test(doc.location.href)) {
if (!url.match("SEARCH") && !url.match("searchtype")) {
return "book";
}
}
// Next, look for the MARC button
xpath = '//a[img[@src="/screens/marc_display.gif" or @src="/screens/ico_marc.gif" or @src="/screens/marcdisp.gif" or starts-with(@alt, "MARC ") or @src="/screens/regdisp.gif" or @alt="REGULAR RECORD DISPLAY"]] | //a[span/img[@src="/screens/marc_display.gif" or @src="/screens/ico_marc.gif" or @src="/screens/marcdisp.gif" or starts-with(@alt, "MARC ") or @src="/screens/regdisp.gif" or @alt="REGULAR RECORD DISPLAY"]]';
elmt = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(elmt) {
return "book";
}
// Also, check for links to an item display page
var tags = doc.getElementsByTagName("a");
for(var i=0; i<tags.length; i++) {
if(matchRegexp.test(tags[i].href) || tags[i].href.match(/^https?:\/\/([^/]+\/(?:search\??\/|record=?|search%7e\/)|frameset&FF=)/)) {
return "multiple";
}
}
return false;
}
function scrape(marc, newDoc) {
var namespace = newDoc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
var xpath = '//pre/text()';
if (newDoc.evaluate(xpath, newDoc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
var elmts = newDoc.evaluate(xpath, newDoc, null, XPathResult.ANY_TYPE, null);
var useNodeValue = true;
} else {
var elmts = newDoc.evaluate('//pre', newDoc, nsResolver, XPathResult.ANY_TYPE, null);
var useNodeValue = false;
}
var elmt;
while(elmt = elmts.iterateNext()) {
if (useNodeValue) {
var text = elmt.nodeValue;
} else {
var text = elmt.textContent;
}
var newItem = new Zotero.Item();
var record = new marc.record();
var linee = text.split("\n");
for (var i=0; i<linee.length; i++) {
if(!linee[i]) {
continue;
}
linee[i] = linee[i].replace(/[\xA0_\t]/g, " ");
var value = linee[i].substr(7);
if(linee[i].substr(0, 6) == " ") {
// add this onto previous value
tagValue += value;
} else {
if(linee[i].substr(0, 6) == "LEADER") {
// trap leader
record.leader = value;
} else {
if(tagValue) { // finish last tag
tagValue = tagValue.replace(/\|(.)/g, marc.subfieldDelimiter+"$1");
if(tagValue[0] != marc.subfieldDelimiter) {
tagValue = marc.subfieldDelimiter+"a"+tagValue;
}
// add previous tag
record.addField(tag, ind, tagValue);
}
var tag = linee[i].substr(0, 3);
var ind = linee[i].substr(4, 2);
var tagValue = value;
}
}
}
if(tagValue) {
tagValue = tagValue.replace(/\|(.)/g, marc.subfieldDelimiter+"$1");
if(tagValue[0] != marc.subfieldDelimiter) {
tagValue = marc.subfieldDelimiter+"a"+tagValue;
}
// add previous tag
record.addField(tag, ind, tagValue);
}
record.translate(newItem);
var domain = newDoc.location.href.match(/https?:\/\/([^/]+)/);
newItem.repository = domain[1]+" Library Catalog";
newItem.complete();
}
}
function pageByPage(marc, urls) {
Zotero.Utilities.processDocuments(urls, function(newDoc) {
scrape(marc, newDoc);
}, function() { Zotero.done() });
}
function doWeb(doc, url) {
var uri = doc.location.href;
var newUri;
// load translator for MARC
var translator = Zotero.loadTranslator("import");
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
var marc = translator.getTranslatorObject();
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
if (detectWeb(doc, url) == "book") {
var matchRegexp = new RegExp('^(.*)frameset(.+)$');
var m = matchRegexp.exec(uri);
if (m) {
newUri = uri.replace(/frameset/, "marc");
} else {
var xpath = '//a[img[@src="/screens/marc_display.gif" or @src="/screens/ico_marc.gif" or @src="/screens/marcdisp.gif" or starts-with(@alt, "MARC ") or @src="/screens/regdisp.gif" or @alt="REGULAR RECORD DISPLAY"]] | //a[span/img[@src="/screens/marc_display.gif" or @src="/screens/ico_marc.gif" or @src="/screens/marcdisp.gif" or starts-with(@alt, "MARC ") or @src="/screens/regdisp.gif" or @alt="REGULAR RECORD DISPLAY"]]';
newUri = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().href.replace(/frameset/, "marc");
}
pageByPage(marc, [newUri]);
} else { // Search results page
// Require link to match this
var tagRegexp = new RegExp();
tagRegexp.compile('^https?://[^/]+/search\\??/[^/]+/[^/]+/[0-9]+\%2C[^/]+/frameset');
var urls = new Array();
var availableItems = new Array();
var firstURL = false;
var tableRows = doc.evaluate('//table[@class="browseScreen"]//tr[@class="browseEntry" or @class="briefCitRow" or td/input[@type="checkbox"] or td[contains(@class,"briefCitRow")]]',
doc, nsResolver, XPathResult.ANY_TYPE, null);
// Go through table rows
var i = 0;
while(tableRow = tableRows.iterateNext()) {
// get link
var links = doc.evaluate('.//span[@class="briefcitTitle"]/a', tableRow, nsResolver, XPathResult.ANY_TYPE, null);
var link = links.iterateNext();
if(!link) {
var links = doc.evaluate(".//a", tableRow, nsResolver, XPathResult.ANY_TYPE, null);
link = links.iterateNext();
}
if(link) {
if(availableItems[link.href]) {
continue;
}
// Go through links
while(link) {
if (link.textContent.match(/\w+/)) availableItems[link.href] = link.textContent;
link = links.iterateNext();
}
i++;
}
};
var items = Zotero.selectItems(availableItems);
if(!items) {
return true;
}
var newUrls = new Array();
for(var url in items) {
newUrls.push(url.replace("frameset", "marc"));
}
pageByPage(marc, newUrls);
}
Zotero.wait();
}
/** BEGIN TEST CASES **/
var testCases = [
{
"type": "web",
"url": "http://books.luther.edu/record=b2115431~S9",
"items": [
{
"itemType": "book",
"creators": [
{
"firstName": "G. W",
"lastName": "Kimura",
"creatorType": "contributor"
},
{
"lastName": "ebrary, Inc",
"fieldMode": true
}
],
"notes": [],
"tags": [
"Alaska",
"History",
"Alaska",
"Anniversaries, etc",
"Alaska",
"Social conditions",
"Alaska",
"Economic conditions",
"Electronic books"
],
"seeAlso": [],
"attachments": [],
"title": "Alaska at 50 the past, present, and next fifty years of statehood",
"place": "Fairbanks",
"publisher": "University of Alaska Press",
"date": "2009",
"numPages": "285",
"callNumber": "F904 .A477 2009eb",
"libraryCatalog": "books.luther.edu Library Catalog"
}
]
}
]
/** END TEST CASES **/