Fixes #766, Zotero saves text/html URLs with .pdf extensions as PDFs
Addresses #460, importFromURL fails when importing PDFs from servers that do not properly support HEAD requests Now inspects supposed PDFs after download and deletes if not actually PDF format Also: - Fixed bug when running importFromDocument() on a PDF on Windows that would result in an incomplete or missing (since r1688) attachment item - importFromDocument() no longer returns an itemID, since it can be partly asynchronous now - Added rudimentary 'text/html' support for Zotero.MIME.sniffForMIMEType()
This commit is contained in:
parent
2167094e61
commit
deeab05a64
|
@ -185,6 +185,8 @@ Zotero.Attachments = new function(){
|
|||
// Override MIME type to application/pdf if extension is .pdf --
|
||||
// workaround for sites that respond to the HEAD request with an
|
||||
// invalid MIME type (https://www.zotero.org/trac/ticket/460)
|
||||
//
|
||||
// Downloaded file is inspected below and deleted if actually HTML
|
||||
if (ext == 'pdf') {
|
||||
mimeType = 'application/pdf';
|
||||
}
|
||||
|
@ -246,12 +248,7 @@ Zotero.Attachments = new function(){
|
|||
attachmentItem.setField('url', url);
|
||||
attachmentItem.setField('accessDate', "CURRENT_TIMESTAMP");
|
||||
// Don't send a Notifier event on the incomplete item
|
||||
var disabled = Zotero.Notifier.disable();
|
||||
attachmentItem.save();
|
||||
if (disabled) {
|
||||
Zotero.Notifier.enable();
|
||||
}
|
||||
var itemID = attachmentItem.getID();
|
||||
var itemID = attachmentItem.save();
|
||||
|
||||
// Add to collections
|
||||
if (parentCollectionIDs){
|
||||
|
@ -272,6 +269,15 @@ Zotero.Attachments = new function(){
|
|||
|
||||
wbp.progressListener = new Zotero.WebProgressFinishListener(function(){
|
||||
try {
|
||||
var str = Zotero.File.getSample(file);
|
||||
if (Zotero.MIME.sniffForMIMEType(str) != 'application/pdf') {
|
||||
Zotero.debug("Downloaded PDF did not have MIME type "
|
||||
+ "'application/pdf' in Attachments.importFromURL()", 2);
|
||||
var item = Zotero.Items.get(itemID);
|
||||
item.erase();
|
||||
return;
|
||||
}
|
||||
|
||||
_addToDB(file, url, title, Zotero.Attachments.LINK_MODE_IMPORTED_URL,
|
||||
mimeType, null, sourceItemID, itemID);
|
||||
|
||||
|
@ -281,6 +287,8 @@ Zotero.Attachments = new function(){
|
|||
// is flushed to disk, so we just wait a second
|
||||
// and hope for the best -- we'll index it later
|
||||
// if it fails
|
||||
//
|
||||
// TODO: index later
|
||||
var timer = Components.classes["@mozilla.org/timer;1"].
|
||||
createInstance(Components.interfaces.nsITimer);
|
||||
timer.initWithCallback({notify: function() {
|
||||
|
@ -289,30 +297,24 @@ Zotero.Attachments = new function(){
|
|||
}
|
||||
catch (e) {
|
||||
// Clean up
|
||||
if (itemID) {
|
||||
var item = Zotero.Items.get(itemID);
|
||||
if (item) {
|
||||
item.erase();
|
||||
}
|
||||
|
||||
try {
|
||||
var destDir = Zotero.getStorageDirectory();
|
||||
destDir.append(itemID);
|
||||
if (destDir.exists()) {
|
||||
destDir.remove(true);
|
||||
}
|
||||
}
|
||||
catch (e) {}
|
||||
}
|
||||
var item = Zotero.Items.get(itemID);
|
||||
item.erase();
|
||||
|
||||
throw (e);
|
||||
}
|
||||
});
|
||||
|
||||
// Disable the Notifier during the commit
|
||||
var disabled = Zotero.Notifier.disable();
|
||||
|
||||
// The attachment is still incomplete here, but we can't risk
|
||||
// leaving the transaction open if the callback never triggers
|
||||
Zotero.DB.commitTransaction();
|
||||
|
||||
if (disabled) {
|
||||
Zotero.Notifier.enable();
|
||||
}
|
||||
|
||||
wbp.saveURI(nsIURL, null, null, null, null, file);
|
||||
}
|
||||
catch (e){
|
||||
|
@ -400,7 +402,8 @@ Zotero.Attachments = new function(){
|
|||
// thread, but at least it lets the menu close)
|
||||
setTimeout(function() {
|
||||
if (Zotero.Fulltext.isCachedMIMEType(mimeType)) {
|
||||
Zotero.Fulltext.indexItems([itemID]);
|
||||
// No file, so no point running the PDF indexer
|
||||
//Zotero.Fulltext.indexItems([itemID]);
|
||||
}
|
||||
else {
|
||||
Zotero.Fulltext.indexDocument(document, itemID);
|
||||
|
@ -412,9 +415,7 @@ Zotero.Attachments = new function(){
|
|||
|
||||
|
||||
/*
|
||||
* Save a snapshot -- uses synchronous WebPageDump
|
||||
*
|
||||
* Returns itemID of attachment
|
||||
* Save a snapshot -- uses synchronous WebPageDump or asynchronous saveURI()
|
||||
*/
|
||||
function importFromDocument(document, sourceItemID, forceTitle, parentCollectionIDs) {
|
||||
Zotero.debug('Importing attachment from document');
|
||||
|
@ -456,50 +457,6 @@ Zotero.Attachments = new function(){
|
|||
var fileName = _getFileNameFromURL(url, mimeType);
|
||||
file.append(fileName);
|
||||
|
||||
if (mimeType == 'text/html') {
|
||||
// Load WebPageDump code
|
||||
Components.classes["@mozilla.org/moz/jssubscript-loader;1"]
|
||||
.getService(Components.interfaces.mozIJSSubScriptLoader)
|
||||
.loadSubScript("chrome://zotero/content/webpagedump/common.js");
|
||||
|
||||
Components.classes["@mozilla.org/moz/jssubscript-loader;1"]
|
||||
.getService(Components.interfaces.mozIJSSubScriptLoader)
|
||||
.loadSubScript("chrome://zotero/content/webpagedump/domsaver.js");
|
||||
|
||||
wpdDOMSaver.init(file.path, document)
|
||||
wpdDOMSaver.saveHTMLDocument()
|
||||
}
|
||||
else {
|
||||
Zotero.debug('Saving with saveURI()');
|
||||
const nsIWBP = Components.interfaces.nsIWebBrowserPersist;
|
||||
var wbp = Components
|
||||
.classes["@mozilla.org/embedding/browser/nsWebBrowserPersist;1"]
|
||||
.createInstance(nsIWBP);
|
||||
wbp.persistFlags = nsIWBP.PERSIST_FLAGS_AUTODETECT_APPLY_CONVERSION
|
||||
| nsIWBP.PERSIST_FLAGS_FROM_CACHE;
|
||||
var ioService = Components.classes["@mozilla.org/network/io-service;1"]
|
||||
.getService(Components.interfaces.nsIIOService);
|
||||
var nsIURL = ioService.newURI(url, null, null);
|
||||
wbp.saveURI(nsIURL, null, null, null, null, file);
|
||||
}
|
||||
|
||||
|
||||
_addToDB(file, url, title, Zotero.Attachments.LINK_MODE_IMPORTED_URL,
|
||||
mimeType, charsetID, sourceItemID, itemID);
|
||||
|
||||
Zotero.Notifier.trigger('add', 'item', itemID);
|
||||
|
||||
// Add to collections
|
||||
if (parentCollectionIDs){
|
||||
var ids = Zotero.flattenArguments(parentCollectionIDs);
|
||||
for each(var id in ids){
|
||||
var col = Zotero.Collections.get(id);
|
||||
col.addItem(itemID);
|
||||
}
|
||||
}
|
||||
|
||||
Zotero.DB.commitTransaction();
|
||||
|
||||
if (mimeType == 'application/pdf') {
|
||||
var f = function() {
|
||||
Zotero.Fulltext.indexPDF(file, itemID);
|
||||
|
@ -513,14 +470,92 @@ Zotero.Attachments = new function(){
|
|||
};
|
||||
}
|
||||
|
||||
// We don't have any way of knowing that the file is flushed to
|
||||
// disk, so we just wait a second and hope for the best --
|
||||
// we'll index it later if it fails
|
||||
var timer = Components.classes["@mozilla.org/timer;1"].
|
||||
createInstance(Components.interfaces.nsITimer);
|
||||
timer.initWithCallback({notify: f}, 1000,
|
||||
Components.interfaces.nsITimer.TYPE_ONE_SHOT);
|
||||
if (mimeType == 'text/html') {
|
||||
var sync = true;
|
||||
|
||||
// Load WebPageDump code
|
||||
Components.classes["@mozilla.org/moz/jssubscript-loader;1"]
|
||||
.getService(Components.interfaces.mozIJSSubScriptLoader)
|
||||
.loadSubScript("chrome://zotero/content/webpagedump/common.js");
|
||||
|
||||
Components.classes["@mozilla.org/moz/jssubscript-loader;1"]
|
||||
.getService(Components.interfaces.mozIJSSubScriptLoader)
|
||||
.loadSubScript("chrome://zotero/content/webpagedump/domsaver.js");
|
||||
|
||||
wpdDOMSaver.init(file.path, document);
|
||||
wpdDOMSaver.saveHTMLDocument();
|
||||
|
||||
_addToDB(file, url, title, Zotero.Attachments.LINK_MODE_IMPORTED_URL,
|
||||
mimeType, charsetID, sourceItemID, itemID);
|
||||
}
|
||||
else {
|
||||
Zotero.debug('Saving with saveURI()');
|
||||
const nsIWBP = Components.interfaces.nsIWebBrowserPersist;
|
||||
var wbp = Components
|
||||
.classes["@mozilla.org/embedding/browser/nsWebBrowserPersist;1"]
|
||||
.createInstance(nsIWBP);
|
||||
wbp.persistFlags = nsIWBP.PERSIST_FLAGS_AUTODETECT_APPLY_CONVERSION
|
||||
| nsIWBP.PERSIST_FLAGS_FROM_CACHE;
|
||||
var ioService = Components.classes["@mozilla.org/network/io-service;1"]
|
||||
.getService(Components.interfaces.nsIIOService);
|
||||
var nsIURL = ioService.newURI(url, null, null);
|
||||
wbp.progressListener = new Zotero.WebProgressFinishListener(function () {
|
||||
try {
|
||||
_addToDB(file, url, title, Zotero.Attachments.LINK_MODE_IMPORTED_URL,
|
||||
mimeType, charsetID, sourceItemID, itemID);
|
||||
|
||||
Zotero.Notifier.trigger('add', 'item', itemID);
|
||||
|
||||
// We don't have any way of knowing that the file is flushed to
|
||||
// disk, so we just wait a second and hope for the best --
|
||||
// we'll index it later if it fails
|
||||
//
|
||||
// TODO: index later
|
||||
var timer = Components.classes["@mozilla.org/timer;1"].
|
||||
createInstance(Components.interfaces.nsITimer);
|
||||
timer.initWithCallback({notify: f}, 1000,
|
||||
Components.interfaces.nsITimer.TYPE_ONE_SHOT);
|
||||
}
|
||||
catch (e) {
|
||||
// Clean up
|
||||
var item = Zotero.Items.get(itemID);
|
||||
item.erase();
|
||||
|
||||
throw (e);
|
||||
}
|
||||
});
|
||||
wbp.saveURI(nsIURL, null, null, null, null, file);
|
||||
}
|
||||
|
||||
// Add to collections
|
||||
if (parentCollectionIDs){
|
||||
var ids = Zotero.flattenArguments(parentCollectionIDs);
|
||||
for each(var id in ids){
|
||||
var col = Zotero.Collections.get(id);
|
||||
col.addItem(itemID);
|
||||
}
|
||||
}
|
||||
|
||||
// Disable the Notifier during the commit if this is async
|
||||
if (!sync) {
|
||||
var disabled = Zotero.Notifier.disable();
|
||||
}
|
||||
|
||||
Zotero.DB.commitTransaction();
|
||||
|
||||
if (disabled) {
|
||||
Zotero.Notifier.enable();
|
||||
}
|
||||
|
||||
if (sync) {
|
||||
Zotero.Notifier.trigger('add', 'item', itemID);
|
||||
|
||||
// Wait a second before indexing (see note above)
|
||||
var timer = Components.classes["@mozilla.org/timer;1"].
|
||||
createInstance(Components.interfaces.nsITimer);
|
||||
timer.initWithCallback({notify: f}, 1000,
|
||||
Components.interfaces.nsITimer.TYPE_ONE_SHOT);
|
||||
}
|
||||
}
|
||||
catch (e) {
|
||||
Zotero.DB.rollbackTransaction();
|
||||
|
@ -539,8 +574,6 @@ Zotero.Attachments = new function(){
|
|||
|
||||
throw (e);
|
||||
}
|
||||
|
||||
return itemID;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -461,6 +461,8 @@ Zotero.Fulltext = new function(){
|
|||
|
||||
var file = i.getFile();
|
||||
if (!file){
|
||||
Zotero.debug("No file to index for item " + i.getID()
|
||||
+ " in Fulltext.indexItems()");
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
@ -40,7 +40,9 @@ Zotero.MIME = new function(){
|
|||
["From", 'text/plain'],
|
||||
[">From", 'text/plain'],
|
||||
["#!", 'text/plain'],
|
||||
["<?xml", 'text/xml']
|
||||
["<?xml", 'text/xml'],
|
||||
["<!DOCTYPE html", 'text/html'],
|
||||
["<html", 'text/html']
|
||||
];
|
||||
|
||||
var _textTypes = {
|
||||
|
|
Loading…
Reference in New Issue
Block a user