closes #162, do sniffing for import formats
import should now work regardless of file extensions. this should make #86 (steal EndNote download links) fairly easy to implement.
This commit is contained in:
parent
d67d96c321
commit
504ebf8996
|
@ -261,8 +261,24 @@ Scholar.Translate.prototype.getTranslators = function() {
|
||||||
Scholar.debug("searching for translators for "+this.path);
|
Scholar.debug("searching for translators for "+this.path);
|
||||||
|
|
||||||
// see which translators can translate
|
// see which translators can translate
|
||||||
|
var possibleTranslators = this._findTranslators(translators);
|
||||||
|
|
||||||
|
return possibleTranslators;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* gets translator options to be displayed in a dialog
|
||||||
|
*
|
||||||
|
* NOT IMPLEMENTED
|
||||||
|
*/
|
||||||
|
Scholar.Translate.prototype.displayOptions = function() {
|
||||||
|
}
|
||||||
|
|
||||||
|
Scholar.Translate.prototype._findTranslators = function(translators, ignoreExtensions) {
|
||||||
|
var possibleTranslators = new Array();
|
||||||
for(var i in translators) {
|
for(var i in translators) {
|
||||||
if(this._canTranslate(translators[i])) {
|
if(this._canTranslate(translators[i], ignoreExtensions)) {
|
||||||
Scholar.debug("found translator "+translators[i].label);
|
Scholar.debug("found translator "+translators[i].label);
|
||||||
|
|
||||||
// for some reason, and i'm not quite sure what this reason is,
|
// for some reason, and i'm not quite sure what this reason is,
|
||||||
|
@ -276,17 +292,12 @@ Scholar.Translate.prototype.getTranslators = function() {
|
||||||
possibleTranslators.push(translator);
|
possibleTranslators.push(translator);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if(!possibleTranslators.length && this.type == "import" && !ignoreExtensions) {
|
||||||
return possibleTranslators;
|
Scholar.debug("looking a second time");
|
||||||
|
// try search again, ignoring file extensions
|
||||||
|
return this._findTranslators(translators, true);
|
||||||
}
|
}
|
||||||
}
|
return possibleTranslators;
|
||||||
|
|
||||||
/*
|
|
||||||
* gets translator options to be displayed in a dialog
|
|
||||||
*
|
|
||||||
* NOT IMPLEMENTED
|
|
||||||
*/
|
|
||||||
Scholar.Translate.prototype.displayOptions = function() {
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Scholar.Translate.prototype._loadTranslator = function() {
|
Scholar.Translate.prototype._loadTranslator = function() {
|
||||||
|
@ -467,7 +478,7 @@ Scholar.Translate.prototype._generateSandbox = function() {
|
||||||
/*
|
/*
|
||||||
* Check to see if _scraper_ can scrape this document
|
* Check to see if _scraper_ can scrape this document
|
||||||
*/
|
*/
|
||||||
Scholar.Translate.prototype._canTranslate = function(translator) {
|
Scholar.Translate.prototype._canTranslate = function(translator, ignoreExtensions) {
|
||||||
// Test location with regular expression
|
// Test location with regular expression
|
||||||
// If this is slow, we could preload all scrapers and compile regular
|
// If this is slow, we could preload all scrapers and compile regular
|
||||||
// expressions, so each check will be faster
|
// expressions, so each check will be faster
|
||||||
|
@ -482,6 +493,12 @@ Scholar.Translate.prototype._canTranslate = function(translator) {
|
||||||
if(regularExpression.test(this.path)) {
|
if(regularExpression.test(this.path)) {
|
||||||
canTranslate = true;
|
canTranslate = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(ignoreExtensions) {
|
||||||
|
// if we're ignoring extensions, that means we already tried
|
||||||
|
// everything without ignoring extensions and it didn't work
|
||||||
|
canTranslate = !canTranslate;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
var canTranslate = true;
|
var canTranslate = true;
|
||||||
}
|
}
|
||||||
|
|
52
scrapers.sql
52
scrapers.sql
|
@ -1,7 +1,7 @@
|
||||||
-- 38
|
-- 39
|
||||||
|
|
||||||
-- Set the following timestamp to the most recent scraper update date
|
-- Set the following timestamp to the most recent scraper update date
|
||||||
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-07 01:09:00'));
|
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-07 21:55:00'));
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-06-28 23:08:00', 4, 'Amazon.com Scraper', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/|s/)',
|
REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-06-28 23:08:00', 4, 'Amazon.com Scraper', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/|s/)',
|
||||||
'function detectWeb(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
|
@ -2163,7 +2163,7 @@ REPLACE INTO "translators" VALUES ('c54d1932-73ce-dfd4-a943-109380e06574', '2006
|
||||||
}
|
}
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '2006-06-26 16:01:00', 12, 'PubMed Scraper', 'Simon Kornblith', '^http://www\.ncbi\.nlm\.nih\.gov/entrez/query\.fcgi\?(?:.*db=PubMed.*list_uids=[0-9]|.*list_uids=[0-9].*db=PubMed|.*db=PubMed.*CMD=search|.*CMD=search.*db=PubMed)',
|
REPLACE INTO "translators" VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '2006-08-07 21:55:00', 12, 'PubMed Scraper', 'Simon Kornblith', '^http://www\.ncbi\.nlm\.nih\.gov/entrez/query\.fcgi\?(?:.*db=PubMed.*list_uids=[0-9]|.*list_uids=[0-9].*db=PubMed|.*db=PubMed.*CMD=search|.*CMD=search.*db=PubMed)',
|
||||||
'function detectWeb(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
if(doc.location.href.indexOf("list_uids=") >= 0) {
|
if(doc.location.href.indexOf("list_uids=") >= 0) {
|
||||||
return "journalArticle";
|
return "journalArticle";
|
||||||
|
@ -2738,7 +2738,15 @@ function doSearch(item) {
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('0e2235e7-babf-413c-9acf-f27cce5f059c', '2006-07-05 23:40:00', 3, 'MODS (XML)', 'Simon Kornblith', 'xml',
|
REPLACE INTO "translators" VALUES ('0e2235e7-babf-413c-9acf-f27cce5f059c', '2006-07-05 23:40:00', 3, 'MODS (XML)', 'Simon Kornblith', 'xml',
|
||||||
'Scholar.addOption("exportNotes", true);
|
'Scholar.addOption("exportNotes", true);
|
||||||
Scholar.addOption("exportFileData", true);',
|
Scholar.addOption("exportFileData", true);
|
||||||
|
|
||||||
|
function detectImport() {
|
||||||
|
var read = Scholar.read(512);
|
||||||
|
var modsTagRegexp = /<mods[^>]+>/
|
||||||
|
if(modsTagRegexp.test(read)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}',
|
||||||
'var partialItemTypes = ["bookSection", "journalArticle", "magazineArticle", "newspaperArticle"];
|
'var partialItemTypes = ["bookSection", "journalArticle", "magazineArticle", "newspaperArticle"];
|
||||||
|
|
||||||
function doExport() {
|
function doExport() {
|
||||||
|
@ -3664,7 +3672,17 @@ REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('5e3ad958-ac79-463d-812b-a86a9235c28f', '2006-07-15 17:09:00', 1, 'RDF', 'Simon Kornblith', 'rdf',
|
REPLACE INTO "translators" VALUES ('5e3ad958-ac79-463d-812b-a86a9235c28f', '2006-07-15 17:09:00', 1, 'RDF', 'Simon Kornblith', 'rdf',
|
||||||
'Scholar.configure("dataMode", "rdf");',
|
'Scholar.configure("dataMode", "rdf");
|
||||||
|
|
||||||
|
function detectImport() {
|
||||||
|
// unfortunately, Mozilla will let you create a data source from any type
|
||||||
|
// of XML, so we need to make sure there are actually nodes
|
||||||
|
|
||||||
|
var nodes = Scholar.RDF.getAllResources();
|
||||||
|
if(nodes) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}',
|
||||||
'// gets the first result set for a property that can be encoded in multiple
|
'// gets the first result set for a property that can be encoded in multiple
|
||||||
// ontologies
|
// ontologies
|
||||||
function getFirstResults(node, properties, onlyOneString) {
|
function getFirstResults(node, properties, onlyOneString) {
|
||||||
|
@ -4052,7 +4070,20 @@ function doImport() {
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7', '2006-06-30 15:36:00', 3, 'RIS', 'Simon Kornblith', 'ris',
|
REPLACE INTO "translators" VALUES ('32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7', '2006-06-30 15:36:00', 3, 'RIS', 'Simon Kornblith', 'ris',
|
||||||
'Scholar.configure("dataMode", "line");
|
'Scholar.configure("dataMode", "line");
|
||||||
Scholar.addOption("exportNotes", true);',
|
Scholar.addOption("exportNotes", true);
|
||||||
|
|
||||||
|
function detectImport() {
|
||||||
|
var line
|
||||||
|
while(line = Scholar.read()) {
|
||||||
|
if(line.replace(/\s/g, "") != "") {
|
||||||
|
if(line.substr(0, 6) == "TY - ") {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}',
|
||||||
'var itemsWithYears = ["book", "bookSection", "thesis", "film"];
|
'var itemsWithYears = ["book", "bookSection", "thesis", "film"];
|
||||||
|
|
||||||
var fieldMap = {
|
var fieldMap = {
|
||||||
|
@ -4325,7 +4356,13 @@ function doExport() {
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('a6ee60df-1ddc-4aae-bb25-45e0537be973', '2006-07-16 17:18:00', 1, 'MARC', 'Simon Kornblith', 'marc',
|
REPLACE INTO "translators" VALUES ('a6ee60df-1ddc-4aae-bb25-45e0537be973', '2006-07-16 17:18:00', 1, 'MARC', 'Simon Kornblith', 'marc',
|
||||||
NULL,
|
'function detectImport() {
|
||||||
|
var marcRecordRegexp = /^[0-9]{5}[a-z ]{3}$/
|
||||||
|
var read = Scholar.read(8);
|
||||||
|
if(marcRecordRegexp.test(read)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}',
|
||||||
'/*
|
'/*
|
||||||
* Original version of MARC record library copyright (C) 2005 Stefano Bargioni,
|
* Original version of MARC record library copyright (C) 2005 Stefano Bargioni,
|
||||||
* licensed under the LGPL
|
* licensed under the LGPL
|
||||||
|
@ -4801,7 +4838,6 @@ function doImport(url) { // the URL is actually here for other translators
|
||||||
|
|
||||||
while(text = Scholar.read(4096)) { // read in 4096 byte increments
|
while(text = Scholar.read(4096)) { // read in 4096 byte increments
|
||||||
var records = text.split("\x1D");
|
var records = text.split("\x1D");
|
||||||
Scholar.Utilities.debugPrint(records);
|
|
||||||
|
|
||||||
if(records.length > 1) {
|
if(records.length > 1) {
|
||||||
records[0] = holdOver + records[0];
|
records[0] = holdOver + records[0];
|
||||||
|
|
Loading…
Reference in New Issue
Block a user