From 2b5b65f4ddbe8d253509ed716f103c22469c8b66 Mon Sep 17 00:00:00 2001 From: Simon Kornblith Date: Mon, 7 Aug 2006 00:30:36 +0000 Subject: [PATCH] addresses #83, figure out how to implement OpenURL adds preliminary support for COinS microformat data. does not yet support COinS where there is only a DOI or ISBN. --- scrapers.sql | 180 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 178 insertions(+), 2 deletions(-) diff --git a/scrapers.sql b/scrapers.sql index 928023528..324f21403 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -1,7 +1,7 @@ --- 35 +-- 36 -- Set the following timestamp to the most recent scraper update date -REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-07-07 12:44:00')); +REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-06 19:14:00')); REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-06-28 23:08:00', 4, 'Amazon.com Scraper', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/|s/)', 'function detect(doc, url) { @@ -2333,6 +2333,182 @@ REPLACE INTO "translators" VALUES ('951c027d-74ac-47d4-a107-9c3069ab7b48', '2006 translator.doImport(); }'); +REPLACE INTO "translators" VALUES ('05d07af9-105a-4572-99f6-a8e231c0daef', '2006-08-06 19:14:00', 4, 'COinS Scraper', 'Simon Kornblith', NULL, +'function detect(doc, url) { + var spanTags = doc.getElementsByTagName("span"); + + var encounteredType = false; + + for(var i=0; i 1) { + var selectArray = new Array(); + + for(var i in newItems) { + selectArray[i] = newItems.title; + } + selectArray = Scholar.selectItems(selectArray); + for(var i in selectArray) { + newItems[i].complete(); + } + } else { + newItems[0].complete(); + } +}'); REPLACE INTO "translators" VALUES ('3e684d82-73a3-9a34-095f-19b112d88bbf', '2006-06-26 16:01:00', 4, 'Google Books Scraper', 'Simon Kornblith', '^http://books\.google\.com/books\?(.*vid=.*\&id=.*|.*q=.*)', 'function detect(doc, url) { var re = new RegExp(''^http://books\\.google\\.com/books\\?vid=([^&]+).*\\&id=([^&]+)'', ''i'');