From c97b216c572a6d2a404441923c63a8bbac70d2a7 Mon Sep 17 00:00:00 2001 From: Simon Kornblith Date: Thu, 3 Jan 2008 09:47:57 +0000 Subject: [PATCH] closes #821, Validate and correct URLs passed from translators closes #760, Files specified in file:// URLs should be imported --- chrome/content/zotero/xpcom/translate.js | 57 +++++++++++++++++++----- scrapers.sql | 34 ++++++-------- 2 files changed, 60 insertions(+), 31 deletions(-) diff --git a/chrome/content/zotero/xpcom/translate.js b/chrome/content/zotero/xpcom/translate.js index 20c641c14..5814b024b 100644 --- a/chrome/content/zotero/xpcom/translate.js +++ b/chrome/content/zotero/xpcom/translate.js @@ -1040,6 +1040,7 @@ Zotero.Translate.prototype._itemTagsAndSeeAlso = function(item, newItem) { /* * executed when an item is done and ready to be loaded into the database */ +Zotero.Translate._urlRe = /(([A-Za-z]+):\/\/[^\s]*)/i; Zotero.Translate.prototype._itemDone = function(item, attachedTo) { if(this.type == "web") { // store repository if this item was captured from a website, and @@ -1091,18 +1092,36 @@ Zotero.Translate.prototype._itemDone = function(item, attachedTo) { Zotero.debug("Translate: adding attachment"); + if(!item.url && !item.path) { + Zotero.debug("Translate: ignoring attachment: no path or URL specified"); + return; + } + + if(!item.path) { + // see if this is actually a file URL + var m = Zotero.Translate._urlRe.exec(item.url); + var protocol = m ? m[2].toLowerCase() : ""; + if(protocol == "file") { + item.path = item.url; + item.url = false; + } else if(protocol != "http" && protocol != "https") { + Zotero.debug("Translate: unrecognized protocol "+protocol); + return; + } + } + if(!item.path) { // create from URL - if(item.url) { + try { var myID = Zotero.Attachments.linkFromURL(item.url, attachedTo, (item.mimeType ? item.mimeType : undefined), (item.title ? item.title : undefined)); - Zotero.debug("Translate: created attachment; id is "+myID); - var newItem = Zotero.Items.get(myID); - } else { - Zotero.debug("Translate: not adding attachment: no path or url specified"); + } catch(e) { + Zotero.debug("Translate: error adding attachment "+item.url); return; } + Zotero.debug("Translate: created attachment; id is "+myID); + var newItem = Zotero.Items.get(myID); } else { // generate nsIFile var IOService = Components.classes["@mozilla.org/network/io-service;1"]. @@ -1111,10 +1130,14 @@ Zotero.Translate.prototype._itemDone = function(item, attachedTo) { var file = uri.QueryInterface(Components.interfaces.nsIFileURL).file; if (!file.exists()) { + // use item title if possible, or else file leaf name + var title = item.title; + if(!title) title = file.leafName; + var myID = Zotero.Attachments.createMissingAttachment( item.url ? Zotero.Attachments.LINK_MODE_IMPORTED_URL : Zotero.Attachments.LINK_MODE_IMPORTED_FILE, - file, item.url ? item.url : null, item.title, + file, item.url ? item.url : null, title, item.mimeType, item.charset, attachedTo); } else if (item.url) { @@ -1262,16 +1285,24 @@ Zotero.Translate.prototype._itemDone = function(item, attachedTo) { Zotero.debug("Translate: NOTICE: either mimeType or title is missing; attaching file will be slower"); } - Zotero.Attachments.linkFromURL(attachment.url, myID, - (attachment.mimeType ? attachment.mimeType : undefined), - (attachment.title ? attachment.title : undefined)); + try { + Zotero.Attachments.linkFromURL(attachment.url, myID, + (attachment.mimeType ? attachment.mimeType : undefined), + (attachment.title ? attachment.title : undefined)); + } catch(e) { + Zotero.debug("Translate: error adding attachment "+attachment.url); + } } } else if(attachment.document || (attachment.mimeType && attachment.mimeType == "text/html") || Zotero.Prefs.get("downloadAssociatedFiles")) { // if snapshot is not explicitly set to false, retrieve snapshot if(attachment.document) { - Zotero.Attachments.importFromDocument(attachment.document, myID, attachment.title); + try { + Zotero.Attachments.importFromDocument(attachment.document, myID, attachment.title); + } catch(e) { + Zotero.debug("Translate: error attaching document"); + } } else { var mimeType = null; var title = null; @@ -1296,7 +1327,11 @@ Zotero.Translate.prototype._itemDone = function(item, attachedTo) { } var fileBaseName = Zotero.Attachments.getFileBaseNameFromItem(myID); - Zotero.Attachments.importFromURL(attachment.url, myID, title, fileBaseName); + try { + Zotero.Attachments.importFromURL(attachment.url, myID, title, fileBaseName); + } catch(e) { + Zotero.debug("Zotero.Translate: error adding attachment "+attachment.url); + } } } } diff --git a/scrapers.sql b/scrapers.sql index 6ebca2e5e..c086d5a53 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -15001,7 +15001,7 @@ function doImport() { } }'); -REPLACE INTO translators VALUES ('32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7', '1.0.0b4.r1', '', '2007-11-29 21:00:00', '1', '100', '3', 'RIS', 'Simon Kornblith', 'ris', +REPLACE INTO translators VALUES ('32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7', '1.0.0b4.r1', '', '2008-01-03 09:44:52', '1', '100', '3', 'RIS', 'Simon Kornblith', 'ris', 'Zotero.configure("dataMode", "line"); Zotero.addOption("exportNotes", true); @@ -15234,26 +15234,20 @@ function processTag(item, tag, value) { } } else if(tag == "UR" || tag == "L1" || tag == "L2" || tag == "L4") { // URL - //using regex to test URL until #821 is fixed - urlRe = /(https?:\/\/[^\s]*)/; - var m = urlRe.exec(value); - if (m){ - value = m[1]; - if(!item.url) { + if(!item.url) { item.url = value; - } - if(tag == "UR") { - item.attachments.push({url:value}); - } else if(tag == "L1") { - item.attachments.push({url:value, mimeType:"application/pdf", - title:"Full Text (PDF)", downloadable:true}); - } else if(tag == "L2") { - item.attachments.push({url:value, mimeType:"text/html", - title:"Full Text (HTML)", downloadable:true}); - } else if(tag == "L4") { - item.attachments.push({url:value, - title:"Image", downloadable:true}); - } + } + if(tag == "UR") { + item.attachments.push({url:value}); + } else if(tag == "L1") { + item.attachments.push({url:value, mimeType:"application/pdf", + title:"Full Text (PDF)", downloadable:true}); + } else if(tag == "L2") { + item.attachments.push({url:value, mimeType:"text/html", + title:"Full Text (HTML)", downloadable:true}); + } else if(tag == "L4") { + item.attachments.push({url:value, + title:"Image", downloadable:true}); } } else if (tag == "IS") { // Issue Number (patent: patentNumber)