From 303c6ee68d8c2e4536359192565175779a1c0863 Mon Sep 17 00:00:00 2001 From: Simon Kornblith Date: Mon, 26 Jun 2006 01:08:59 +0000 Subject: [PATCH] closes #41, get library call number --- .../content/scholar/xpcom/ingester.js | 67 +++++++++++++------ scrapers.sql | 15 +++-- 2 files changed, 55 insertions(+), 27 deletions(-) diff --git a/chrome/chromeFiles/content/scholar/xpcom/ingester.js b/chrome/chromeFiles/content/scholar/xpcom/ingester.js index 370f1b774..197cf9881 100644 --- a/chrome/chromeFiles/content/scholar/xpcom/ingester.js +++ b/chrome/chromeFiles/content/scholar/xpcom/ingester.js @@ -412,8 +412,8 @@ Scholar.Ingester.Utilities.prototype.cleanAuthor = function(author) { * Cleans whitespace off a string and replaces multiple spaces with one */ Scholar.Ingester.Utilities.prototype.cleanString = function(s) { - s = this.trimString(s); - return s.replace(/[ \xA0]+/g, " "); + s = s.replace(/[ \xA0]+/g, " "); + return this.trimString(s); } /* @@ -523,14 +523,18 @@ Scholar.Ingester.Utilities.prototype._MARCAssociateField = function(record, uri, Scholar.debug('Found '+field.length+' matches for '+fieldNo+part); if(field) { for(i in field) { - if(field[i][part]) { - var value = field[i][part]; - Scholar.debug(value); - if(fieldNo == '245') { // special case - title + subtitle - if(field[i]['b']) { - value += ' '+field[i]['b']; + var value; + for(var j=0; j oldIndex) { + oldIndex = newIndex; + var callNumber = this.model.data[uri][prefixDC + 'identifier'][i].substring(prefix.length+1); + } + } + if(callNumber) { + newItem.setField("callNumber", callNumber); } } diff --git a/scrapers.sql b/scrapers.sql index 3aad546ed..be4df2287 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -1,7 +1,7 @@ --- 23 +-- 24 -- Set the following timestamp to the most recent scraper update date -REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-06-25 18:00:00')); +REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-06-25 21:06:00')); REPLACE INTO "scrapers" VALUES('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-06-22 22:58:00', 'Amazon.com Scraper', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/)', NULL, 'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#''; var prefixDC = ''http://purl.org/dc/elements/1.1/''; @@ -809,7 +809,7 @@ if(newUri) { wait();'); -REPLACE INTO "scrapers" VALUES('add7c71c-21f3-ee14-d188-caf9da12728b', '2006-06-25 15:32:00', 'SIRSI 2003+ Scraper', 'Simon Kornblith', '/uhtbin/cgisirsi', +REPLACE INTO "scrapers" VALUES('add7c71c-21f3-ee14-d188-caf9da12728b', '2006-06-25 21:06:00', 'SIRSI 2003+ Scraper', 'Simon Kornblith', '/uhtbin/cgisirsi', 'var namespace = doc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) { if (prefix == ''x'') return namespace; else return null; @@ -911,6 +911,11 @@ function scrape(doc) { } catch (e) {} } + var callNumber = utilities.getNode(doc, doc, ''//tr/td[1][@class="holdingslist"]/text()'', nsResolver); + if(callNumber && callNumber.nodeValue) { + model.addStatement(uri, prefixDC + "identifier", "CN "+callNumber.nodeValue, true); + } + model.addStatement(uri, prefixRDF + "type", prefixDummy + "book", false); return true; } @@ -1343,7 +1348,7 @@ if(detailRe.test(doc.location.href)) { wait(); }'); -REPLACE INTO "scrapers" VALUES('cf87eca8-041d-b954-795a-2d86348999d5', '2006-06-23 13:34:00', 'Aleph Scraper', 'Simon Kornblith', '^http://[^/]+/F(?:/[A-Z0-9\-]+(?:\?.*)?$|\?func=find)', +REPLACE INTO "scrapers" VALUES('cf87eca8-041d-b954-795a-2d86348999d5', '2006-06-25 20:51:00', 'Aleph Scraper', 'Simon Kornblith', '^http://[^/]+/F(?:/[A-Z0-9\-]+(?:\?.*)?$|\?func=find)', 'var singleRe = new RegExp("^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=[0-9]{3}"); if(singleRe.test(doc.location.href)) { @@ -1425,8 +1430,6 @@ utilities.processDocuments(browser, null, newUris, function(newBrowser) { record.add_field(code, ind1, ind2, value); } } - - model.addStatement(uri, prefixRDF + "type", prefixDummy + "book", false); utilities.importMARCRecord(record, uri, model); }, function() { done(); }, function() {});