Search results scraping for LexisNexis
This commit is contained in:
parent
a48ea7dabf
commit
6a627fad0a
26
scrapers.sql
26
scrapers.sql
|
@ -1200,12 +1200,13 @@ for (var i = 0; i < elmts.length; i++) {
|
|||
}
|
||||
model.addStatement(uri, prefixRDF + "type", prefixDummy + "journalArticle", false);');
|
||||
|
||||
REPLACE INTO "scrapers" VALUES('b047a13c-fe5c-6604-c997-bef15e502b09', '2006-06-18 10:13:00', 'LexisNexis Scraper', 'Simon Kornblith', '^http://web\.lexis-nexis\.com/universe/document', NULL,
|
||||
REPLACE INTO "scrapers" VALUES('b047a13c-fe5c-6604-c997-bef15e502b09', '2006-06-18 10:13:00', 'LexisNexis Scraper', 'Simon Kornblith', '^http://web\.lexis-nexis\.com/universe/(?:document|doclist)', NULL,
|
||||
'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
|
||||
var prefixDC = ''http://purl.org/dc/elements/1.1/'';
|
||||
var prefixDCMI = ''http://purl.org/dc/dcmitype/'';
|
||||
var prefixDummy = ''http://chnm.gmu.edu/firefox-scholar/'';
|
||||
|
||||
function scrape(doc) {
|
||||
var uri = doc.location.href;
|
||||
|
||||
var citationDataDiv;
|
||||
|
@ -1268,6 +1269,29 @@ if(m) {
|
|||
for(i in authors) {
|
||||
model.addStatement(uri, prefixDC + "creator", authors[i].replace(" *", ""), true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var detailRe = new RegExp("^http://[^/]+/universe/document");
|
||||
if(detailRe.test(doc.location.href)) {
|
||||
scrape(doc);
|
||||
} else {
|
||||
var items = utilities.getItemArray(doc, doc, "^http://[^/]+/universe/document");
|
||||
items = utilities.selectItems(items);
|
||||
|
||||
if(!items) {
|
||||
return true;
|
||||
}
|
||||
|
||||
var uris = new Array();
|
||||
for(i in items) {
|
||||
uris.push(i);
|
||||
}
|
||||
|
||||
utilities.processDocuments(browser, null, uris, function(browser) { scrape(browser.contentDocument) },
|
||||
function() { done(); }, function() {});
|
||||
|
||||
wait();
|
||||
}');
|
||||
|
||||
REPLACE INTO "scrapers" VALUES('cf87eca8-041d-b954-795a-2d86348999d5', '2006-06-23 13:34:00', 'Aleph Scraper', 'Simon Kornblith', '^http://[^/]+/F(?:/[A-Z0-9\-]+(?:\?.*)?$|\?func=find)',
|
||||
|
|
Loading…
Reference in New Issue
Block a user