From 6f19b215f51e6cc52d1b099cf66c02284b90d59c Mon Sep 17 00:00:00 2001 From: Simon Kornblith Date: Fri, 23 Jun 2006 21:27:32 +0000 Subject: [PATCH] Search result scraping for GEAC catalogs --- scrapers.sql | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/scrapers.sql b/scrapers.sql index e15077038..7d77b537c 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -1382,18 +1382,39 @@ for(i in uris) { wait();'); -REPLACE INTO "scrapers" VALUES('c0e6fda6-0ecd-e4f4-39ca-37a4de436e15', '2006-06-18 11:19:00', 'GEAC Scraper', 'Simon Kornblith', '/(?:Geac)?FETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html)', NULL, +REPLACE INTO "scrapers" VALUES('c0e6fda6-0ecd-e4f4-39ca-37a4de436e15', '2006-06-18 11:19:00', 'GEAC Scraper', 'Simon Kornblith', '/(?:GeacQUERY|(?:Geac)?FETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html))', NULL, 'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#''; var prefixDC = ''http://purl.org/dc/elements/1.1/''; var prefixDCMI = ''http://purl.org/dc/dcmitype/''; var prefixDummy = ''http://chnm.gmu.edu/firefox-scholar/''; var uri = doc.location.href; -var newUri = uri.replace(/([:&])next=html\/geacnffull.html/, "$1next=html/marc.html"); -newUri = newUri.replace(/([:&])next=html\/record.html/, "$1next=html/marc.html"); -utilities.loadDocument(newUri, browser, function(newBrowser) { - newDoc = newBrowser.contentDocument; +var uris = new Array(); + +if(uri.indexOf("/GeacQUERY") > 0) { + var items = utilities.getItemArray(doc, doc, "(?:Geac)?FETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html)"); + items = utilities.selectItems(items); + + if(!items) { + return true; + } + + var uris = new Array(); + for(i in items) { + var newUri = i.replace(/([:&])next=html\/geacnffull.html/, "$1next=html/marc.html"); + newUri = newUri.replace(/([:&])next=html\/record.html/, "$1next=html/marc.html"); + uris.push(newUri); + } +} else { + var newUri = uri.replace(/([:&])next=html\/geacnffull.html/, "$1next=html/marc.html"); + newUri = newUri.replace(/([:&])next=html\/record.html/, "$1next=html/marc.html"); + uris.push(newUri); +} + +utilities.processDocuments(browser, null, uris, function(newBrowser) { + var newDoc = newBrowser.contentDocument; + var uri = newDoc.location.href; var namespace = newDoc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) { @@ -1436,8 +1457,7 @@ utilities.loadDocument(newUri, browser, function(newBrowser) { } utilities.importMARCRecord(record, uri, model); - done(); -}, function() {}); +}, function() { done(); }, function() {}); wait();');