Fixes #766, Zotero saves text/html URLs with .pdf extensions as PDFs

Addresses #460, importFromURL fails when importing PDFs from servers that do not properly support HEAD requests

Now inspects supposed PDFs after download and deletes if not actually PDF format

Also:

- Fixed bug when running importFromDocument() on a PDF on Windows that would result in an incomplete or missing (since r1688) attachment item
- importFromDocument() no longer returns an itemID, since it can be partly asynchronous now
- Added rudimentary 'text/html' support for Zotero.MIME.sniffForMIMEType()
This commit is contained in:
Dan Stillman 2007-09-18 10:34:34 +00:00
parent 2167094e61
commit deeab05a64
3 changed files with 116 additions and 79 deletions

View File

@ -185,6 +185,8 @@ Zotero.Attachments = new function(){
// Override MIME type to application/pdf if extension is .pdf -- // Override MIME type to application/pdf if extension is .pdf --
// workaround for sites that respond to the HEAD request with an // workaround for sites that respond to the HEAD request with an
// invalid MIME type (https://www.zotero.org/trac/ticket/460) // invalid MIME type (https://www.zotero.org/trac/ticket/460)
//
// Downloaded file is inspected below and deleted if actually HTML
if (ext == 'pdf') { if (ext == 'pdf') {
mimeType = 'application/pdf'; mimeType = 'application/pdf';
} }
@ -246,12 +248,7 @@ Zotero.Attachments = new function(){
attachmentItem.setField('url', url); attachmentItem.setField('url', url);
attachmentItem.setField('accessDate', "CURRENT_TIMESTAMP"); attachmentItem.setField('accessDate', "CURRENT_TIMESTAMP");
// Don't send a Notifier event on the incomplete item // Don't send a Notifier event on the incomplete item
var disabled = Zotero.Notifier.disable(); var itemID = attachmentItem.save();
attachmentItem.save();
if (disabled) {
Zotero.Notifier.enable();
}
var itemID = attachmentItem.getID();
// Add to collections // Add to collections
if (parentCollectionIDs){ if (parentCollectionIDs){
@ -272,6 +269,15 @@ Zotero.Attachments = new function(){
wbp.progressListener = new Zotero.WebProgressFinishListener(function(){ wbp.progressListener = new Zotero.WebProgressFinishListener(function(){
try { try {
var str = Zotero.File.getSample(file);
if (Zotero.MIME.sniffForMIMEType(str) != 'application/pdf') {
Zotero.debug("Downloaded PDF did not have MIME type "
+ "'application/pdf' in Attachments.importFromURL()", 2);
var item = Zotero.Items.get(itemID);
item.erase();
return;
}
_addToDB(file, url, title, Zotero.Attachments.LINK_MODE_IMPORTED_URL, _addToDB(file, url, title, Zotero.Attachments.LINK_MODE_IMPORTED_URL,
mimeType, null, sourceItemID, itemID); mimeType, null, sourceItemID, itemID);
@ -281,6 +287,8 @@ Zotero.Attachments = new function(){
// is flushed to disk, so we just wait a second // is flushed to disk, so we just wait a second
// and hope for the best -- we'll index it later // and hope for the best -- we'll index it later
// if it fails // if it fails
//
// TODO: index later
var timer = Components.classes["@mozilla.org/timer;1"]. var timer = Components.classes["@mozilla.org/timer;1"].
createInstance(Components.interfaces.nsITimer); createInstance(Components.interfaces.nsITimer);
timer.initWithCallback({notify: function() { timer.initWithCallback({notify: function() {
@ -289,30 +297,24 @@ Zotero.Attachments = new function(){
} }
catch (e) { catch (e) {
// Clean up // Clean up
if (itemID) { var item = Zotero.Items.get(itemID);
var item = Zotero.Items.get(itemID); item.erase();
if (item) {
item.erase();
}
try {
var destDir = Zotero.getStorageDirectory();
destDir.append(itemID);
if (destDir.exists()) {
destDir.remove(true);
}
}
catch (e) {}
}
throw (e); throw (e);
} }
}); });
// Disable the Notifier during the commit
var disabled = Zotero.Notifier.disable();
// The attachment is still incomplete here, but we can't risk // The attachment is still incomplete here, but we can't risk
// leaving the transaction open if the callback never triggers // leaving the transaction open if the callback never triggers
Zotero.DB.commitTransaction(); Zotero.DB.commitTransaction();
if (disabled) {
Zotero.Notifier.enable();
}
wbp.saveURI(nsIURL, null, null, null, null, file); wbp.saveURI(nsIURL, null, null, null, null, file);
} }
catch (e){ catch (e){
@ -400,7 +402,8 @@ Zotero.Attachments = new function(){
// thread, but at least it lets the menu close) // thread, but at least it lets the menu close)
setTimeout(function() { setTimeout(function() {
if (Zotero.Fulltext.isCachedMIMEType(mimeType)) { if (Zotero.Fulltext.isCachedMIMEType(mimeType)) {
Zotero.Fulltext.indexItems([itemID]); // No file, so no point running the PDF indexer
//Zotero.Fulltext.indexItems([itemID]);
} }
else { else {
Zotero.Fulltext.indexDocument(document, itemID); Zotero.Fulltext.indexDocument(document, itemID);
@ -412,9 +415,7 @@ Zotero.Attachments = new function(){
/* /*
* Save a snapshot -- uses synchronous WebPageDump * Save a snapshot -- uses synchronous WebPageDump or asynchronous saveURI()
*
* Returns itemID of attachment
*/ */
function importFromDocument(document, sourceItemID, forceTitle, parentCollectionIDs) { function importFromDocument(document, sourceItemID, forceTitle, parentCollectionIDs) {
Zotero.debug('Importing attachment from document'); Zotero.debug('Importing attachment from document');
@ -456,50 +457,6 @@ Zotero.Attachments = new function(){
var fileName = _getFileNameFromURL(url, mimeType); var fileName = _getFileNameFromURL(url, mimeType);
file.append(fileName); file.append(fileName);
if (mimeType == 'text/html') {
// Load WebPageDump code
Components.classes["@mozilla.org/moz/jssubscript-loader;1"]
.getService(Components.interfaces.mozIJSSubScriptLoader)
.loadSubScript("chrome://zotero/content/webpagedump/common.js");
Components.classes["@mozilla.org/moz/jssubscript-loader;1"]
.getService(Components.interfaces.mozIJSSubScriptLoader)
.loadSubScript("chrome://zotero/content/webpagedump/domsaver.js");
wpdDOMSaver.init(file.path, document)
wpdDOMSaver.saveHTMLDocument()
}
else {
Zotero.debug('Saving with saveURI()');
const nsIWBP = Components.interfaces.nsIWebBrowserPersist;
var wbp = Components
.classes["@mozilla.org/embedding/browser/nsWebBrowserPersist;1"]
.createInstance(nsIWBP);
wbp.persistFlags = nsIWBP.PERSIST_FLAGS_AUTODETECT_APPLY_CONVERSION
| nsIWBP.PERSIST_FLAGS_FROM_CACHE;
var ioService = Components.classes["@mozilla.org/network/io-service;1"]
.getService(Components.interfaces.nsIIOService);
var nsIURL = ioService.newURI(url, null, null);
wbp.saveURI(nsIURL, null, null, null, null, file);
}
_addToDB(file, url, title, Zotero.Attachments.LINK_MODE_IMPORTED_URL,
mimeType, charsetID, sourceItemID, itemID);
Zotero.Notifier.trigger('add', 'item', itemID);
// Add to collections
if (parentCollectionIDs){
var ids = Zotero.flattenArguments(parentCollectionIDs);
for each(var id in ids){
var col = Zotero.Collections.get(id);
col.addItem(itemID);
}
}
Zotero.DB.commitTransaction();
if (mimeType == 'application/pdf') { if (mimeType == 'application/pdf') {
var f = function() { var f = function() {
Zotero.Fulltext.indexPDF(file, itemID); Zotero.Fulltext.indexPDF(file, itemID);
@ -513,14 +470,92 @@ Zotero.Attachments = new function(){
}; };
} }
// We don't have any way of knowing that the file is flushed to if (mimeType == 'text/html') {
// disk, so we just wait a second and hope for the best -- var sync = true;
// we'll index it later if it fails
var timer = Components.classes["@mozilla.org/timer;1"]. // Load WebPageDump code
createInstance(Components.interfaces.nsITimer); Components.classes["@mozilla.org/moz/jssubscript-loader;1"]
timer.initWithCallback({notify: f}, 1000, .getService(Components.interfaces.mozIJSSubScriptLoader)
Components.interfaces.nsITimer.TYPE_ONE_SHOT); .loadSubScript("chrome://zotero/content/webpagedump/common.js");
Components.classes["@mozilla.org/moz/jssubscript-loader;1"]
.getService(Components.interfaces.mozIJSSubScriptLoader)
.loadSubScript("chrome://zotero/content/webpagedump/domsaver.js");
wpdDOMSaver.init(file.path, document);
wpdDOMSaver.saveHTMLDocument();
_addToDB(file, url, title, Zotero.Attachments.LINK_MODE_IMPORTED_URL,
mimeType, charsetID, sourceItemID, itemID);
}
else {
Zotero.debug('Saving with saveURI()');
const nsIWBP = Components.interfaces.nsIWebBrowserPersist;
var wbp = Components
.classes["@mozilla.org/embedding/browser/nsWebBrowserPersist;1"]
.createInstance(nsIWBP);
wbp.persistFlags = nsIWBP.PERSIST_FLAGS_AUTODETECT_APPLY_CONVERSION
| nsIWBP.PERSIST_FLAGS_FROM_CACHE;
var ioService = Components.classes["@mozilla.org/network/io-service;1"]
.getService(Components.interfaces.nsIIOService);
var nsIURL = ioService.newURI(url, null, null);
wbp.progressListener = new Zotero.WebProgressFinishListener(function () {
try {
_addToDB(file, url, title, Zotero.Attachments.LINK_MODE_IMPORTED_URL,
mimeType, charsetID, sourceItemID, itemID);
Zotero.Notifier.trigger('add', 'item', itemID);
// We don't have any way of knowing that the file is flushed to
// disk, so we just wait a second and hope for the best --
// we'll index it later if it fails
//
// TODO: index later
var timer = Components.classes["@mozilla.org/timer;1"].
createInstance(Components.interfaces.nsITimer);
timer.initWithCallback({notify: f}, 1000,
Components.interfaces.nsITimer.TYPE_ONE_SHOT);
}
catch (e) {
// Clean up
var item = Zotero.Items.get(itemID);
item.erase();
throw (e);
}
});
wbp.saveURI(nsIURL, null, null, null, null, file);
}
// Add to collections
if (parentCollectionIDs){
var ids = Zotero.flattenArguments(parentCollectionIDs);
for each(var id in ids){
var col = Zotero.Collections.get(id);
col.addItem(itemID);
}
}
// Disable the Notifier during the commit if this is async
if (!sync) {
var disabled = Zotero.Notifier.disable();
}
Zotero.DB.commitTransaction();
if (disabled) {
Zotero.Notifier.enable();
}
if (sync) {
Zotero.Notifier.trigger('add', 'item', itemID);
// Wait a second before indexing (see note above)
var timer = Components.classes["@mozilla.org/timer;1"].
createInstance(Components.interfaces.nsITimer);
timer.initWithCallback({notify: f}, 1000,
Components.interfaces.nsITimer.TYPE_ONE_SHOT);
}
} }
catch (e) { catch (e) {
Zotero.DB.rollbackTransaction(); Zotero.DB.rollbackTransaction();
@ -539,8 +574,6 @@ Zotero.Attachments = new function(){
throw (e); throw (e);
} }
return itemID;
} }

View File

@ -461,6 +461,8 @@ Zotero.Fulltext = new function(){
var file = i.getFile(); var file = i.getFile();
if (!file){ if (!file){
Zotero.debug("No file to index for item " + i.getID()
+ " in Fulltext.indexItems()");
continue; continue;
} }

View File

@ -40,7 +40,9 @@ Zotero.MIME = new function(){
["From", 'text/plain'], ["From", 'text/plain'],
[">From", 'text/plain'], [">From", 'text/plain'],
["#!", 'text/plain'], ["#!", 'text/plain'],
["<?xml", 'text/xml'] ["<?xml", 'text/xml'],
["<!DOCTYPE html", 'text/html'],
["<html", 'text/html']
]; ];
var _textTypes = { var _textTypes = {