- properly escape URLs for unAPI
- better handling of MODS webpage metadata - support abstract in MODS - improved URI restriction regexp
This commit is contained in:
parent
e67158c63c
commit
8cfe98cf26
|
@ -422,7 +422,7 @@ Zotero.Utilities.Ingester.prototype.loadDocument = function(url, succeeded, fail
|
||||||
}
|
}
|
||||||
|
|
||||||
Zotero.Utilities.Ingester._protocolRe = new RegExp();
|
Zotero.Utilities.Ingester._protocolRe = new RegExp();
|
||||||
Zotero.Utilities.Ingester._protocolRe.compile("^(?:(?:http|https|ftp):|[^:]*/)", "i");
|
Zotero.Utilities.Ingester._protocolRe.compile("^(?:(?:http|https|ftp):|[^:](?:/.*)?$)", "i");
|
||||||
Zotero.Utilities.Ingester.prototype.processDocuments = function(urls, processor, done, exception) {
|
Zotero.Utilities.Ingester.prototype.processDocuments = function(urls, processor, done, exception) {
|
||||||
if(this.translate.locationIsProxied) {
|
if(this.translate.locationIsProxied) {
|
||||||
for(var i in urls) {
|
for(var i in urls) {
|
||||||
|
|
58
scrapers.sql
58
scrapers.sql
|
@ -1,4 +1,4 @@
|
||||||
-- 184
|
-- 185
|
||||||
|
|
||||||
-- ***** BEGIN LICENSE BLOCK *****
|
-- ***** BEGIN LICENSE BLOCK *****
|
||||||
--
|
--
|
||||||
|
@ -22,7 +22,7 @@
|
||||||
|
|
||||||
|
|
||||||
-- Set the following timestamp to the most recent scraper update date
|
-- Set the following timestamp to the most recent scraper update date
|
||||||
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2007-03-20 17:45:00'));
|
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2007-03-20 17:50:13'));
|
||||||
|
|
||||||
REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b3.r1', '', '2006-12-15 03:40:00', 1, 100, 4, 'Amazon.com', 'Sean Takats', '^https?://(?:www\.)?amazon',
|
REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b3.r1', '', '2006-12-15 03:40:00', 1, 100, 4, 'Amazon.com', 'Sean Takats', '^https?://(?:www\.)?amazon',
|
||||||
'function detectWeb(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
|
@ -608,7 +608,7 @@ function itemComplete(newItem, url) {
|
||||||
}
|
}
|
||||||
|
|
||||||
var useSnapshot = false;
|
var useSnapshot = false;
|
||||||
|
f
|
||||||
function doWeb(doc, url) {
|
function doWeb(doc, url) {
|
||||||
var namespace = doc.documentElement.namespaceURI;
|
var namespace = doc.documentElement.namespaceURI;
|
||||||
var nsResolver = namespace ? function(prefix) {
|
var nsResolver = namespace ? function(prefix) {
|
||||||
|
@ -3155,7 +3155,7 @@ function doWeb(doc, url) {
|
||||||
}
|
}
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO translators VALUES ('e7e01cac-1e37-4da6-b078-a0e8343b0e98', '1.0.0b4r1', '', '2007-03-19 22:51:00', '1', '90', '4', 'unAPI', 'Simon Kornblith', '',
|
REPLACE INTO translators VALUES ('e7e01cac-1e37-4da6-b078-a0e8343b0e98', '1.0.0b4r1', '', '2007-03-20 17:50:13', '1', '90', '4', 'unAPI', 'Simon Kornblith', '',
|
||||||
'var RECOGNIZABLE_FORMATS = ["mods", "marc", "endnote", "ris", "bibtex", "rdf"];
|
'var RECOGNIZABLE_FORMATS = ["mods", "marc", "endnote", "ris", "bibtex", "rdf"];
|
||||||
var FORMAT_GUIDS = {
|
var FORMAT_GUIDS = {
|
||||||
"mods":"0e2235e7-babf-413c-9acf-f27cce5f059c",
|
"mods":"0e2235e7-babf-413c-9acf-f27cce5f059c",
|
||||||
|
@ -3186,11 +3186,10 @@ function detectWeb(doc, url) {
|
||||||
// look for abbrs
|
// look for abbrs
|
||||||
var abbrs = doc.getElementsByTagName("abbr");
|
var abbrs = doc.getElementsByTagName("abbr");
|
||||||
for each(var abbr in abbrs) {
|
for each(var abbr in abbrs) {
|
||||||
if(abbr.getAttribute) {
|
if(abbr.getAttribute && abbr.getAttribute("class") &&
|
||||||
if(abbr.getAttribute("class").split(" ").indexOf("unapi-id") != -1 && abbr.getAttribute("title")) {
|
abbr.getAttribute("class").split(" ").indexOf("unapi-id") != -1 && abbr.getAttribute("title")) {
|
||||||
// found an abbr
|
// found an abbr
|
||||||
unsearchedIds.push(abbr.getAttribute("title"));
|
unsearchedIds.push(escape(abbr.getAttribute("title")));
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3273,22 +3272,22 @@ function checkFormats(text) {
|
||||||
|
|
||||||
if(format.@namespace_uri == "http://www.loc.gov/mods/v3" || lowerName == "mods" || format.@docs == "http://www.loc.gov/standards/mods/") {
|
if(format.@namespace_uri == "http://www.loc.gov/mods/v3" || lowerName == "mods" || format.@docs == "http://www.loc.gov/standards/mods/") {
|
||||||
if(!foundFormat["mods"] || lowerName.indexOf("full") != -1) {
|
if(!foundFormat["mods"] || lowerName.indexOf("full") != -1) {
|
||||||
foundFormat["mods"] = name;
|
foundFormat["mods"] = escape(name);
|
||||||
}
|
}
|
||||||
} else if(lowerName.match(/^marc\b/)) {
|
} else if(lowerName.match(/^marc\b/)) {
|
||||||
if(!foundFormat["marc"] || lowerName.indexOf("utf8") != -1) {
|
if(!foundFormat["marc"] || lowerName.indexOf("utf8") != -1) {
|
||||||
foundFormat["marc"] = name;
|
foundFormat["marc"] = escape(name);
|
||||||
}
|
}
|
||||||
} else if(lowerName == "rdf_dc") {
|
} else if(lowerName == "rdf_dc") {
|
||||||
foundFormat["rdf"] = name;
|
foundFormat["rdf"] = escape(name);
|
||||||
} else if(format.@docs.text() == "http://www.refman.com/support/risformat_intro.asp" || lowerName.match(/^ris\b/)) {
|
} else if(format.@docs.text() == "http://www.refman.com/support/risformat_intro.asp" || lowerName.match(/^ris\b/)) {
|
||||||
if(!foundFormat["ris"] || lowerName.indexOf("utf8") != -1) {
|
if(!foundFormat["ris"] || lowerName.indexOf("utf8") != -1) {
|
||||||
foundFormat["ris"] = name;
|
foundFormat["ris"] = escape(name);
|
||||||
}
|
}
|
||||||
} else if(lowerName == "bibtex") {
|
} else if(lowerName == "bibtex") {
|
||||||
foundFormat["bibtex"] = name;
|
foundFormat["bibtex"] = escape(name);
|
||||||
} else if(lowerName == "endnote") {
|
} else if(lowerName == "endnote") {
|
||||||
foundFormat["endnote"] = name;
|
foundFormat["endnote"] = escape(name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7048,7 +7047,7 @@ REPLACE INTO translators VALUES ('af4cf622-eaca-450b-bd45-0f4ba345d081', '1.0.0b
|
||||||
Zotero.wait();
|
Zotero.wait();
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO translators VALUES ('0e2235e7-babf-413c-9acf-f27cce5f059c', '1.0.0b3.r1', '', '2007-03-19 22:51:00', 1, 50, 3, 'MODS', 'Simon Kornblith', 'xml',
|
REPLACE INTO translators VALUES ('0e2235e7-babf-413c-9acf-f27cce5f059c', '1.0.0b3.r1', '', '2007-03-20 17:50:13', 1, 50, 3, 'MODS', 'Simon Kornblith', 'xml',
|
||||||
'Zotero.addOption("exportNotes", true);
|
'Zotero.addOption("exportNotes", true);
|
||||||
|
|
||||||
function detectImport() {
|
function detectImport() {
|
||||||
|
@ -7292,6 +7291,11 @@ function doExport() {
|
||||||
mods.relatedItem.titleInfo += <titleInfo type="abbreviated"><title>{item.journalAbbreviation}</title></titleInfo>;
|
mods.relatedItem.titleInfo += <titleInfo type="abbreviated"><title>{item.journalAbbreviation}</title></titleInfo>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// XML tag abstract; object field abstractNote
|
||||||
|
if(item.abstractNote) {
|
||||||
|
mods.abstract = item.abstractNote;
|
||||||
|
}
|
||||||
|
|
||||||
if(mods.relatedItem.length() == 1 && isPartialItem) {
|
if(mods.relatedItem.length() == 1 && isPartialItem) {
|
||||||
mods.relatedItem.@type = "host";
|
mods.relatedItem.@type = "host";
|
||||||
}
|
}
|
||||||
|
@ -7441,7 +7445,19 @@ function doImport() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!newItem.itemType) newItem.itemType = "book";
|
// check if this is an electronic resource
|
||||||
|
if(!newItem.itemType) {
|
||||||
|
for each(var form in mods.m::physicalDescription.m::form) {
|
||||||
|
if(form.@authority == "marcform" || form.@authority == "marc") {
|
||||||
|
if(form.text().toString() == "electronic") {
|
||||||
|
newItem.itemType = "webpage";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!newItem.itemType) newItem.itemType = "book";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var isPartialItem = Zotero.Utilities.inArray(newItem.itemType, partialItemTypes);
|
var isPartialItem = Zotero.Utilities.inArray(newItem.itemType, partialItemTypes);
|
||||||
|
@ -7577,7 +7593,11 @@ function doImport() {
|
||||||
}
|
}
|
||||||
// publisher/distributor
|
// publisher/distributor
|
||||||
if(originInfo.m::publisher.length()) {
|
if(originInfo.m::publisher.length()) {
|
||||||
newItem.publisher = newItem.distributor = originInfo.m::publisher[0].text().toString();
|
if(newItem.itemType == "webpage" || newItem.itemType == "website") {
|
||||||
|
newItem.publicationTitle = originInfo.m::publisher[0].text().toString();
|
||||||
|
} else {
|
||||||
|
newItem.publisher = originInfo.m::publisher[0].text().toString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// date
|
// date
|
||||||
if(originInfo.m::copyrightDate.length()) {
|
if(originInfo.m::copyrightDate.length()) {
|
||||||
|
@ -7609,6 +7629,8 @@ function doImport() {
|
||||||
newItem.archiveLocation = mods.m::location.m::physicalLocation.text().toString();
|
newItem.archiveLocation = mods.m::location.m::physicalLocation.text().toString();
|
||||||
// url
|
// url
|
||||||
newItem.url = mods.m::location.m::url.text().toString();
|
newItem.url = mods.m::location.m::url.text().toString();
|
||||||
|
// abstract
|
||||||
|
newItem.abstractNote = mods.m::abstract.text().toString();
|
||||||
|
|
||||||
/** NOTES **/
|
/** NOTES **/
|
||||||
for each(var note in mods.m::note) {
|
for each(var note in mods.m::note) {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user