Addresses #755 by adding two new translators to support NC State and the Florida state university system. Once more Endeca catalogs show up we can work to generalize further.

This commit is contained in:
Sean Takats 2007-09-13 05:27:23 +00:00
parent 97d810fc22
commit f0b25656fd

View File

@ -23,7 +23,7 @@
-- Set the following timestamp to the most recent scraper update date
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2007-09-09 22:00:00'));
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2007-09-13 12:00:00'));
REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2007-06-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats', '^https?://(?:www\.)?amazon',
'function detectWeb(doc, url) {
@ -773,6 +773,164 @@ function doWeb(doc, url) {
Zotero.wait();
}');
REPLACE INTO translators VALUES ('a2363670-7040-4cb9-8c48-6b96584e92ee', '1.0.0b4r1', '', '2007-09-13 12:00:00', '0', '100', '4', 'Florida University Libraries (Endeca 1)', 'Sean Takats', '^http://[^/]+/[^\.]+.jsp\?Nt.=',
'function detectWeb(doc, url){
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == ''x'') return namespace; else return null;
} : null;
var xpath = ''//div[starts-with(@id, "briefTitle")]'';
if(doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
return "multiple";
}
if (url.indexOf("&V=D")){
return "book";
} else if (url.indexOf("&V=M")){
return "book";
} else if (url.indexOf("&V=U")){
return "book";
}
}',
'function doWeb(doc, url){
var newUris = new Array();
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == ''x'') return namespace; else return null;
} : null;
var xpath = ''//div[starts-with(@id, "briefTitle")]/a'';
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
var elmt;
if(elmt = elmts.iterateNext()) {
// search page
var items = new Array();
do {
items[elmt.href] = Zotero.Utilities.cleanString(elmt.textContent);
Zotero.debug(elmt.textContent);
} while (elmt = elmts.iterateNext());
items = Zotero.selectItems(items);
if(!items) {
return true;
}
for(var i in items) {
var newUri = i.replace(/&V=./, "&V=M");
newUris.push(newUri);
}
} else {
// single page
var newURL = url.replace(/&V=./, "&V=M");
newUris.push(newURL);
}
var translator = Zotero.loadTranslator("import");
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
var marc = translator.getTranslatorObject();
Zotero.Utilities.processDocuments(newUris, function(newDoc) {
var uri = newDoc.location.href;
var xpath = ''//tr[@class="trGenContent"][td[3]]'';
var elmts = newDoc.evaluate(xpath, newDoc, nsResolver, XPathResult.ANY_TYPE, null);
var elmt;
var record = new marc.record();
while(elmt = elmts.iterateNext()) {
var field = Zotero.Utilities.superCleanString(doc.evaluate(''./TD[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent);
var value = doc.evaluate(''./TD[3]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
if(field == "LDR") {
record.leader = value;
} else if(field != "FMT") {
Zotero.debug("field=" + field);
value = value.replace(/\|([a-z]) /g, marc.subfieldDelimiter+"$1");
var code = field.substring(0, 3);
var ind = "";
if(field.length > 3) {
ind = field[3];
if(field.length > 4) {
ind += field[4];
}
}
record.addField(code, ind, value);
}
}
var newItem = new Zotero.Item();
record.translate(newItem);
var domain = url.match(/https?:\/\/([^/]+)/);
newItem.repository = domain[1]+" Library Catalog";
newItem.complete();
}, function() { Zotero.done(); }, null);
Zotero.wait();
}');
REPLACE INTO translators VALUES ('da440efe-646c-4a18-9958-abe1f7d55cde', '1.0.0b4r1', '', '2007-09-13 12:00:00', '0', '100', '4', 'NCSU Library (Endeca 2)', 'Sean Takats', '^https?://[^\.]+.lib.ncsu.edu/(?:web2/tramp2\.exe|catalog/\?)',
'function detectWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == ''x'') return namespace; else return null;
} : null;
var xpath = ''//a[contains(text(), "MARC record")]'';
if(doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
return "book";
}
xpath = ''//span[@class="resultTitle"]/a'';
if(doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
return "multiple";
}
}',
'function scrape(text){
var tempidRe = new RegExp("/web2/tramp2\.exe/goto/([^?]+)\?");
var tempidMatch = tempidRe.exec(text);
var tempid = tempidMatch[1];
marcUri = "http://catalog.lib.ncsu.edu/web2/tramp2.exe/download_hitlist/" + tempid;
marcUri = marcUri + "/NCSUCatResults.mrc?server=1home&format=MARC&server=1home&item=1&item_source=1home";
Zotero.Utilities.HTTP.doGet(marcUri, function(text) {
// load translator for MARC
var marc = Zotero.loadTranslator("import");
marc.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
marc.setString(text);
marc.translate();
}, function() {Zotero.done()}, null);
}
function doWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == ''x'') return namespace; else return null;
} : null;
var xpath = ''//span[@class="resultTitle"]/a'';
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
var elmt;
if(elmt = elmts.iterateNext()) {
// search results page
var newUris = new Array();
var items = new Array();
do {
items[elmt.href] = Zotero.Utilities.cleanString(elmt.textContent);
} while (elmt = elmts.iterateNext());
items = Zotero.selectItems(items);
if(!items) {
return true;
}
for(var i in items) {
newUris.push(i);
}
Zotero.Utilities.HTTP.doGet(newUris, function(text) { scrape(text) },
function() {}, null);
Zotero.wait();
} else if (elmt = doc.evaluate(''//a[contains(text(), "MARC record")]'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()){
// single book
scrape(elmt.href);
Zotero.wait();
}
}');
REPLACE INTO translators VALUES ('88915634-1af6-c134-0171-56fd198235ed', '1.0.0b3.r1', '', '2007-07-31 16:45:00', '1', '100', '4', 'Library Catalog (Voyager)', 'Simon Kornblith', 'Pwebrecon\.cgi',
'function detectWeb(doc, url) {
var export_options = doc.forms.namedItem(''frm'').elements.namedItem(''RD'').options;