LexisNexis translator now supports search results pages.

This commit is contained in:
Sean Takats 2008-03-17 05:31:18 +00:00
parent cfadc215ed
commit 33e953aa02

View File

@ -22,7 +22,7 @@
-- Set the following timestamp to the most recent scraper update date
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-03-14 20:15:00'));
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-03-17 08:00:00'));
REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2007-06-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats', '^https?://(?:www\.)?amazon',
'function detectWeb(doc, url) {
@ -8300,13 +8300,13 @@ function doWeb(doc, url) {
Zotero.wait();
}');
REPLACE INTO translators VALUES ('b047a13c-fe5c-6604-c997-bef15e502b09', '1.0.0b3.r1', '', '2008-01-29 23:00:00', '1', '100', '4', 'LexisNexis', 'Sean Takats', 'https?://[^/]*lexis-?nexis\.com[^/]*/us/lnacademic',
REPLACE INTO translators VALUES ('b047a13c-fe5c-6604-c997-bef15e502b09', '1.0.0b3.r1', '', '2008-03-17 08:00:00', '1', '100', '4', 'LexisNexis', 'Sean Takats', 'https?://[^/]*lexis-?nexis\.com[^/]*/us/lnacademic',
'function detectWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == ''x'') return namespace; else return null;
} : null;
Zotero.debug(doc.title);
if (doc.title.substr(doc.title.length-8, 8)=="Document"){
var xpath = ''//input[@name="cisb"]'';
var elmt = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
@ -8314,31 +8314,73 @@ REPLACE INTO translators VALUES ('b047a13c-fe5c-6604-c997-bef15e502b09', '1.0.0b
return "newspaperArticle";
}
}
var xpath = ''//input[@name="frm_tagged_documents" and @type="checkbox"]'';
var elmt = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
if (elmt.iterateNext()){
return "multiple";
}
}',
'function doWeb(doc, url) {
var hostRe = new RegExp("^http(?:s)?://[^/]+");
var m = hostRe.exec(doc.location.href);
var host = m[0];
'function doWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == ''x'') return namespace; else return null;
} : null;
var risb = doc.evaluate(''//input[@name="risb"]'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().value;
var cisb = doc.evaluate(''//input[@name="cisb"]'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().value;
var uri = host+"/us/lnacademic/results/listview/delPrep.do?cisb="+cisb+"&risb="+risb+"&mode=delivery_refworks";
var hiddenInputs = doc.evaluate(''//form[@name="results_docview_DocumentForm"]//input[@type="hidden"]'', doc, nsResolver,
// define results navigation frame doc for export buttons and hidden fields
var rfDoc = doc.defaultView.window.top.frames[1].document;
var xpath = ''//img[@title="Export Bibliographic References"]'';
var elmt = doc.evaluate(xpath, rfDoc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
var hostRe = new RegExp("^http(?:s)?://[^/]+");
var m = hostRe.exec(doc.location.href);
var host = m[0];
var risb = doc.evaluate(''//input[@name="risb"]'', rfDoc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().value;
var cisb = doc.evaluate(''//input[@name="cisb"]'', rfDoc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().value;
var uri = host+"/us/lnacademic/results/listview/delPrep.do?cisb="+cisb+"&risb="+risb+"&mode=delivery_refworks";
var hiddenInputs = doc.evaluate(''//form[@name="results_docview_DocumentForm"]//input[@type="hidden" and not(@name="tagData")]'', rfDoc, nsResolver,
XPathResult.ANY_TYPE, null);
var hiddenInput;
var poststring="";
while(hiddenInput = hiddenInputs.iterateNext()) {
poststring = poststring+"&"+hiddenInput.name+"="+encodeURIComponent(hiddenInput.value);
}
poststring = poststring + "&hiddensearchfield=Narrow+Search&reloadClassif=&format=GNBFI&focusTerms=&nextSteps=0";
var xpath = ''//input[@name="frm_tagged_documents" and @type="checkbox"]'';
var elmt = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
if (doc.title.substr(doc.title.length-8, 8)=="Document"){
// single page
var delRange = "cur";
poststring = poststring + "&hiddensearchfield=Narrow+Search&reloadClassif=&format=GNBFI&focusTerms=&nextSteps=0";
} else {
// get multiple item titles and tags
var xpath = ''//tr[td/input[@name="frm_tagged_documents"]]'';
var rows = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
var title;
var tagNumber;
var items = new Object();
while (row = rows.iterateNext()){
title = doc.evaluate(''.//a'', row, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
tagNumber = doc.evaluate(''./td/input[@name="frm_tagged_documents"]'', row, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().value;
items[tagNumber] = title;
}
var tagData = "";
items = Zotero.selectItems(items);
if (!items) {
return true;
}
for (var i in items) {
tagData += "-"+i;
}
tagData = tagData.substr(1);
var delRange = "tag";
poststring = poststring + "&tagData=" + tagData + "&hiddensearchfield=Narrow+Search&reloadClassif=&selDomainID=4&format=GNBLIST&focusTerms=&sort=RELEVANCE&nextSteps=0";
}
Zotero.Utilities.HTTP.doPost(uri, poststring, function(text) {
uri = host+"/us/lnacademic/delivery/refExport.do";
var disb = text.match(/<input type="hidden" name="disb" value="([^"]+)">/);
var poststring = "delRange=cur&selDocs=&disb="+disb[1]+"&initializationPage=0";
poststring = "delRange="+delRange+"&selDocs=&disb="+disb[1]+"&initializationPage=0";
Zotero.Utilities.HTTP.doPost(uri, poststring, function(text) {
uri = text.match(/&amp;url=([^'']+)''/)
uri = decodeURIComponent(uri[1]);
@ -8346,41 +8388,44 @@ REPLACE INTO translators VALUES ('b047a13c-fe5c-6604-c997-bef15e502b09', '1.0.0b
var uris = new Array();
uris.push(uri);
Zotero.Utilities.processDocuments(uris, function(newDoc){
var newItem = new Zotero.Item("newspaperArticle");
var title = newDoc.evaluate(''//div[@class="HEADLINE"]'', newDoc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (title){
newItem.title = title.textContent;
}else{
newItem.title = " ";
}
var date = newDoc.evaluate(''//meta[@name="_lndateissue"]/@content'', newDoc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (date){
date = date.nodeValue;
var m = date.match(/([^T]+)T/);
date = m[1];
Zotero.debug(date);
if (date.length == 8){
date = date.substr(0,4) + "-" + date.substr(4,2) + "-" + date.substr(6,2);
} else if (date.length == 6){
date = date.substr(0,4) + "-" + date.substr(4,2);
var elmts =newDoc.evaluate(''//html'', newDoc, nsResolver, XPathResult.ANY_TYPE, null);
var elmt;
while (elmt = elmts.iterateNext()){
var newItem = new Zotero.Item("newspaperArticle");
var title = newDoc.evaluate(''.//div[@class="HEADLINE"]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (title.textContent){
newItem.title = title.textContent;
}else{
newItem.title = " ";
}
newItem.date = date;
var date = newDoc.evaluate(''.//meta[@name="_lndateissue"]/@content'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (date){
date = date.nodeValue;
var m = date.match(/([^T]+)T/);
date = m[1];
if (date.length == 8){
date = date.substr(0,4) + "-" + date.substr(4,2) + "-" + date.substr(6,2);
} else if (date.length == 6){
date = date.substr(0,4) + "-" + date.substr(4,2);
}
newItem.date = date;
}
var publicationTitle = newDoc.evaluate(''.//div[@class="PUB"]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (publicationTitle){
newItem.publicationTitle = publicationTitle.textContent;
}
var section = newDoc.evaluate(''.//div[@class="SECTION-INFO"]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (section){
newItem.section = section.textContent;
}
var author = newDoc.evaluate(''.//div[@class="BYLINE"]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (author){
newItem.creators.push(Zotero.Utilities.cleanAuthor(author.textContent, "author"));
}
newItem.respository = "lexisnexis.com";
newItem.url = url;
newItem.complete()
}
var publicationTitle = newDoc.evaluate(''//div[@class="PUB"]'', newDoc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (publicationTitle){
newItem.publicationTitle = publicationTitle.textContent;
}
var section = newDoc.evaluate(''//div[@class="SECTION-INFO"]'', newDoc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (section){
newItem.section = section.textContent;
}
var author = newDoc.evaluate(''//div[@class="BYLINE"]'', newDoc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (author){
newItem.creators.push(Zotero.Utilities.cleanAuthor(author.textContent, "author"));
}
newItem.respository = "lexisnexis.com";
newItem.url = url;
newItem.complete();
Zotero.done();
});
});