-Adds support for alternate search results page in NCBI PubMed translator.
This commit is contained in:
parent
45ddc138f2
commit
6e7807c236
15
scrapers.sql
15
scrapers.sql
|
@ -22,7 +22,7 @@
|
|||
|
||||
|
||||
-- Set the following timestamp to the most recent scraper update date
|
||||
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-03-10 19:45:00'));
|
||||
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-03-10 20:15:00'));
|
||||
|
||||
REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2007-06-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats', '^https?://(?:www\.)?amazon',
|
||||
'function detectWeb(doc, url) {
|
||||
|
@ -9013,7 +9013,7 @@ REPLACE INTO translators VALUES ('c54d1932-73ce-dfd4-a943-109380e06574', '1.0.0b
|
|||
}
|
||||
}');
|
||||
|
||||
REPLACE INTO translators VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '1.0.0b3.r1', '', '2008-01-23 18:30:00', '1', '100', '4', 'NCBI PubMed', 'Simon Kornblith and Michael Berkowitz', 'http://[^/]*www\.ncbi\.nlm\.nih\.gov[^/]*/(pubmed|sites/entrez|entrez/query\.fcgi\?.*db=PubMed)',
|
||||
REPLACE INTO translators VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '1.0.0b3.r1', '', '2008-03-10 20:15:00', '1', '100', '4', 'NCBI PubMed', 'Simon Kornblith and Michael Berkowitz', 'http://[^/]*www\.ncbi\.nlm\.nih\.gov[^/]*/(pubmed|sites/entrez|entrez/query\.fcgi\?.*db=PubMed)',
|
||||
'function detectWeb(doc, url) {
|
||||
var namespace = doc.documentElement.namespaceURI;
|
||||
var nsResolver = namespace ? function(prefix) {
|
||||
|
@ -9162,12 +9162,19 @@ function doWeb(doc, url) {
|
|||
var items = new Array();
|
||||
var tableRows = doc.evaluate(''//div[@class="rprt"]'', doc, // edited for new PubMed
|
||||
nsResolver, XPathResult.ANY_TYPE, null);
|
||||
|
||||
if (!tableRows.iterateNext()) {
|
||||
tableRows = doc.evaluate(''//div[@class="ResultSet"]/dl'', doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||||
var other = true;
|
||||
}
|
||||
var tableRow;
|
||||
// Go through table rows
|
||||
while(tableRow = tableRows.iterateNext()) {
|
||||
uid = doc.evaluate(''.//input[@id="UidCheckBox"]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
var article = doc.evaluate(''.//div[@class="title"]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
if (other) {
|
||||
var article = doc.evaluate(''.//h2'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
} else {
|
||||
var article = doc.evaluate(''.//div[@class="title"]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
}
|
||||
items[uid.value] = article.textContent;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user