Better MIME type detection of Office files

For at least one Windows user, a .docx file was being interpreted as
text/plain. Instead of relying entirely on the system, hard-code some
extensions we know. (More can be added.)

Also:

- Determine MIME type when opening files instead of using stored type,
  since we might have gotten smarter
This commit is contained in:
Dan Stillman 2013-01-28 22:44:02 -05:00
parent 6520a71788
commit 56bb5b17ad
2 changed files with 66 additions and 22 deletions

View File

@ -29,11 +29,8 @@ Zotero.MIME = new function(){
this.getPrimaryExtension = getPrimaryExtension; this.getPrimaryExtension = getPrimaryExtension;
this.sniffForMIMEType = sniffForMIMEType; this.sniffForMIMEType = sniffForMIMEType;
this.sniffForBinary = sniffForBinary; this.sniffForBinary = sniffForBinary;
this.getMIMETypeFromData = getMIMETypeFromData;
this.getMIMETypeFromFile = getMIMETypeFromFile;
this.hasNativeHandler = hasNativeHandler; this.hasNativeHandler = hasNativeHandler;
this.hasInternalHandler = hasInternalHandler; this.hasInternalHandler = hasInternalHandler;
this.fileHasInternalHandler = fileHasInternalHandler;
// Magic numbers // Magic numbers
var _snifferEntries = [ var _snifferEntries = [
@ -55,6 +52,41 @@ Zotero.MIME = new function(){
]; ];
var _extensions = {
// MS Office
'doc': 'application/msword',
'dot': 'application/msword',
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'dotx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.template',
'docm': 'application/vnd.ms-word.document.macroEnabled.12',
'dotm': 'application/vnd.ms-word.template.macroEnabled.12',
'xls': 'application/vnd.ms-excel',
'xlt': 'application/vnd.ms-excel',
'xla': 'application/vnd.ms-excel',
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'xltx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.template',
'xlsm': 'application/vnd.ms-excel.sheet.macroEnabled.12',
'xltm': 'application/vnd.ms-excel.template.macroEnabled.12',
'xlam': 'application/vnd.ms-excel.addin.macroEnabled.12',
'xlsb': 'application/vnd.ms-excel.sheet.binary.macroEnabled.12',
'ppt': 'application/vnd.ms-powerpoint',
'pot': 'application/vnd.ms-powerpoint',
'pps': 'application/vnd.ms-powerpoint',
'ppa': 'application/vnd.ms-powerpoint',
'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'potx': 'application/vnd.openxmlformats-officedocument.presentationml.template',
'ppsx': 'application/vnd.openxmlformats-officedocument.presentationml.slideshow',
'ppam': 'application/vnd.ms-powerpoint.addin.macroEnabled.12',
'pptm': 'application/vnd.ms-powerpoint.presentation.macroEnabled.12',
'potm': 'application/vnd.ms-powerpoint.template.macroEnabled.12',
'ppsm': 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12',
// OpenOffice/LibreOffice
'odt': 'application/vnd.oasis.opendocument.text',
'pdf': 'application/pdf'
};
var _textTypes = { var _textTypes = {
'application/xhtml+xml': true, 'application/xhtml+xml': true,
'application/xml': true, 'application/xml': true,
@ -245,22 +277,19 @@ Zotero.MIME = new function(){
* *
* ext is an optional file extension hint if data sniffing is unsuccessful * ext is an optional file extension hint if data sniffing is unsuccessful
*/ */
function getMIMETypeFromData(str, ext){ this.getMIMETypeFromData = function (str, ext){
var mimeType = sniffForMIMEType(str); var mimeType = sniffForMIMEType(str);
if (mimeType){ if (mimeType){
Zotero.debug('Detected MIME type ' + mimeType); Zotero.debug('Detected MIME type ' + mimeType);
return mimeType; return mimeType;
} }
try { if (ext) {
if (ext) { mimeType = this.getMIMETypeFromExtension(ext);
var mimeType = Components.classes["@mozilla.org/uriloader/external-helper-app-service;1"] if (mimeType) {
.getService(Components.interfaces.nsIMIMEService).getTypeFromExtension(ext);
Zotero.debug('Got MIME type ' + mimeType + ' from extension');
return mimeType; return mimeType;
} }
} }
catch (e) {}
var mimeType = sniffForBinary(str); var mimeType = sniffForBinary(str);
Zotero.debug('Cannot determine MIME type from magic number or extension -- settling for ' + mimeType); Zotero.debug('Cannot determine MIME type from magic number or extension -- settling for ' + mimeType);
@ -268,15 +297,34 @@ Zotero.MIME = new function(){
} }
this.getMIMETypeFromExtension = function (ext) {
var type = false;
if (_extensions[ext]) {
var type = _extensions[ext];
}
else {
try {
var type = Components.classes["@mozilla.org/uriloader/external-helper-app-service;1"]
.getService(Components.interfaces.nsIMIMEService).getTypeFromExtension(ext);
}
catch (e) {}
}
Zotero.debug("Got MIME type " + type + " from extension '" + ext + "'");
return type;
}
/* /*
* Try to determine the MIME type of the file, using a few different * Try to determine the MIME type of the file, using a few different
* techniques * techniques
*/ */
function getMIMETypeFromFile(file){ this.getMIMETypeFromFile = function (file) {
var str = Zotero.File.getSample(file); var str = Zotero.File.getSample(file);
var ext = Zotero.File.getExtension(file); var ext = Zotero.File.getExtension(file);
return getMIMETypeFromData(str, ext); return this.getMIMETypeFromData(str, ext);
} }
@ -378,8 +426,8 @@ Zotero.MIME = new function(){
} }
function fileHasInternalHandler(file){ this.fileHasInternalHandler = function (file){
var mimeType = getMIMETypeFromFile(file); var mimeType = this.getMIMETypeFromFile(file);
var ext = Zotero.File.getExtension(file); var ext = Zotero.File.getExtension(file);
return hasInternalHandler(mimeType, ext); return hasInternalHandler(mimeType, ext);
} }

View File

@ -3443,14 +3443,10 @@ var ZoteroPane = new function()
if(forceExternalViewer !== undefined) { if(forceExternalViewer !== undefined) {
var externalViewer = forceExternalViewer; var externalViewer = forceExternalViewer;
} else { } else {
var mimeType = attachment.attachmentMIMEType; var mimeType = Zotero.MIME.getMIMETypeFromFile(file);
// If no MIME type specified, try to detect again (I guess in case
// we've gotten smarter since the file was imported?) //var mimeType = attachment.attachmentMIMEType;
if (!mimeType) { // TODO: update DB with new info if changed?
mimeType = Zotero.MIME.getMIMETypeFromFile(file);
// TODO: update DB with new info
}
var ext = Zotero.File.getExtension(file); var ext = Zotero.File.getExtension(file);
var externalViewer = Zotero.isStandalone || (!Zotero.MIME.hasNativeHandler(mimeType, ext) && var externalViewer = Zotero.isStandalone || (!Zotero.MIME.hasNativeHandler(mimeType, ext) &&