-Addresses #1018. New target regex for InnoPAC translator. Needs a bit more testing before I close the ticket, but has worked fine so far.

This commit is contained in:
Michael Berkowitz 2008-05-21 16:31:00 +00:00
parent 412269efd3
commit a0d3fde115

View File

@ -22,7 +22,7 @@
-- Set the following timestamp to the most recent scraper update date -- Set the following timestamp to the most recent scraper update date
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-05-21 16:00:00')); REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-05-21 17:30:00'));
REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2008-03-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats and Michael Berkowitz', '^https?://(?:www\.)?amazon', REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2008-03-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats and Michael Berkowitz', '^https?://(?:www\.)?amazon',
'function detectWeb(doc, url) { 'function detectWeb(doc, url) {
@ -10771,10 +10771,10 @@ function doWeb(doc, url) {
Zotero.wait(); Zotero.wait();
}'); }');
REPLACE INTO translators VALUES ('4fd6b89b-2316-2dc4-fd87-61a97dd941e8', '1.0.0b3.r1', '', '2008-05-15 00:30:00', '1', '200', '4', 'Library Catalog (InnoPAC)', 'Simon Kornblith and Michael Berkowitz', 'https?://[^/]+/(search(\*spi)?(\?|~(S[\d]+)?)?)\??/(a|X|t)?\??', REPLACE INTO translators VALUES ('4fd6b89b-2316-2dc4-fd87-61a97dd941e8', '1.0.0b3.r1', '', '2008-05-21 17:30:00', '1', '200', '4', 'Library Catalog (InnoPAC)', 'Simon Kornblith and Michael Berkowitz', '(search~|search\?|(a|X|t|Y|w)\?|\?(searchtype|searchscope)|frameset&FF)',
'function detectWeb(doc, url) { 'function detectWeb(doc, url) {
// First, check to see if the URL alone reveals InnoPAC, since some sites don''t reveal the MARC button // First, check to see if the URL alone reveals InnoPAC, since some sites don''t reveal the MARC button
var matchRegexp = new RegExp(''^https?://[^/]+/search[^/]*\\??/[^/]+(/[^/]+/[0-9]+\%2C[^/]+/frameset(.+)$)?''); var matchRegexp = new RegExp(''^https?://[^/]+/search[^/]*\\??/[^/]+/[^/]+/[0-9]+\%2C[^/]+/frameset(.+)$'');
if(matchRegexp.test(doc.location.href)) { if(matchRegexp.test(doc.location.href)) {
if (!url.match("SEARCH") && !url.match("searchtype")) { if (!url.match("SEARCH") && !url.match("searchtype")) {
return "book"; return "book";
@ -10919,12 +10919,10 @@ function doWeb(doc, url) {
var i = 0; var i = 0;
while(tableRow = tableRows.iterateNext()) { while(tableRow = tableRows.iterateNext()) {
// get link // get link
var links = doc.evaluate(''.//span[@class="briefcitTitle"]/a'', tableRow, var links = doc.evaluate(''.//span[@class="briefcitTitle"]/a'', tableRow, nsResolver, XPathResult.ANY_TYPE, null);
nsResolver, XPathResult.ANY_TYPE, null);
var link = links.iterateNext(); var link = links.iterateNext();
if(!link) { if(!link) {
var links = doc.evaluate(".//a", tableRow, nsResolver, var links = doc.evaluate(".//a", tableRow, nsResolver, XPathResult.ANY_TYPE, null);
XPathResult.ANY_TYPE, null);
link = links.iterateNext(); link = links.iterateNext();
} }
@ -10936,7 +10934,7 @@ function doWeb(doc, url) {
// Go through links // Go through links
while(link) { while(link) {
availableItems[link.href] = link.textContent; if (link.textContent.match(/\w+/)) availableItems[link.href] = link.textContent;
link = links.iterateNext(); link = links.iterateNext();
} }
i++; i++;