-Adds support for alternate search results page in NCBI PubMed translator.

2008-03-10 19:53:26 +00:00 · 2008-03-10 19:53:26 +00:00 · 6e7807c236
commit 6e7807c236
parent 45ddc138f2
1 changed files with 11 additions and 4 deletions
--- a/scrapers.sql
+++ b/scrapers.sql
@ -22,7 +22,7 @@


 -- Set the following timestamp to the most recent scraper update date
-REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-03-10 19:45:00'));
+REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-03-10 20:15:00'));

 REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2007-06-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats', '^https?://(?:www\.)?amazon', 
 'function detectWeb(doc, url) { 
@ -9013,7 +9013,7 @@ REPLACE INTO translators VALUES ('c54d1932-73ce-dfd4-a943-109380e06574', '1.0.0b
 	}
 }');

-REPLACE INTO translators VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '1.0.0b3.r1', '', '2008-01-23 18:30:00', '1', '100', '4', 'NCBI PubMed', 'Simon Kornblith and Michael Berkowitz', 'http://[^/]*www\.ncbi\.nlm\.nih\.gov[^/]*/(pubmed|sites/entrez|entrez/query\.fcgi\?.*db=PubMed)', 
+REPLACE INTO translators VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '1.0.0b3.r1', '', '2008-03-10 20:15:00', '1', '100', '4', 'NCBI PubMed', 'Simon Kornblith and Michael Berkowitz', 'http://[^/]*www\.ncbi\.nlm\.nih\.gov[^/]*/(pubmed|sites/entrez|entrez/query\.fcgi\?.*db=PubMed)', 
 'function detectWeb(doc, url) {
 	var namespace = doc.documentElement.namespaceURI;
 	var nsResolver = namespace ? function(prefix) {
@ -9162,12 +9162,19 @@ function doWeb(doc, url) {
 			var items = new Array();
 			var tableRows = doc.evaluate(''//div[@class="rprt"]'', doc, // edited for new PubMed
 					     nsResolver, XPathResult.ANY_TYPE, null);
-			
+			if (!tableRows.iterateNext()) {
+				tableRows = doc.evaluate(''//div[@class="ResultSet"]/dl'', doc, nsResolver, XPathResult.ANY_TYPE, null);
+				var other = true;
+			}
 			var tableRow;
 			// Go through table rows
 			while(tableRow = tableRows.iterateNext()) {
 				uid = doc.evaluate(''.//input[@id="UidCheckBox"]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+				if (other) {
+					var article = doc.evaluate(''.//h2'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+				} else {
 					var article = doc.evaluate(''.//div[@class="title"]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+				}
 				items[uid.value] = article.textContent;
 			}