From f3a66085f50f07a7dcc62e177909be0556bd2868 Mon Sep 17 00:00:00 2001 From: Dan Stillman Date: Wed, 9 Aug 2006 18:37:34 +0000 Subject: [PATCH] Closes #173, Try to detect content type of linked pages without loading entire file Closes #174, Don't load images and attached files when detecting content type in linkFromURL() If mime type not provided, Scholar.Files.linkFromURL() now uses XMLHTTPRequest HEAD request to get the content type without loading file (thanks Simon for the idea) If title not provided, try to figure it out from URL, though not particularly intelligently (last slash) Note that order of title and mimeType parameters is now swapped This code should be a bit smarter about unexpected conditions --- .../content/scholar/xpcom/data_access.js | 30 +++++++++---------- .../content/scholar/xpcom/utilities.js | 23 ++++++++++++++ 2 files changed, 37 insertions(+), 16 deletions(-) diff --git a/chrome/chromeFiles/content/scholar/xpcom/data_access.js b/chrome/chromeFiles/content/scholar/xpcom/data_access.js index e33e99e8e..4f40e0736 100644 --- a/chrome/chromeFiles/content/scholar/xpcom/data_access.js +++ b/chrome/chromeFiles/content/scholar/xpcom/data_access.js @@ -1969,26 +1969,24 @@ Scholar.Files = new function(){ } - function linkFromURL(url, sourceItemID, title, mimeType){ - // If we're given the title and mime type, don't bother fetching the page + function linkFromURL(url, sourceItemID, mimeType, title){ + // If no title provided, figure it out from the URL + if (!title){ + title = url.substring(url.lastIndexOf('/')+1); + } + + // If we have the title and mime type, skip loading if (title && mimeType){ - _addToDB(null, url, title, this.LINK_MODE_LINKED_URL, mimeType, null, sourceItemID); + _addToDB(null, url, title, this.LINK_MODE_LINKED_URL, mimeType, + null, sourceItemID); return; } - // TODO: try to get title and content type without fetching the whole page - // - https://chnm.gmu.edu/trac/scholar/ticket/173 - // (or, failing that, at least check the file size and don't load huge files) - // - // DEBUG: don't load images and other attached files - // - https://chnm.gmu.edu/trac/scholar/ticket/174 - var browser = Scholar.Browser.createHiddenBrowser(); - browser.addEventListener("pageshow", function(){ - Scholar.Files.linkFromDocument(browser.contentDocument, sourceItemID); - browser.removeEventListener("pageshow", arguments.callee, true); - Scholar.Browser.deleteHiddenBrowser(browser); - }, true); - browser.loadURI(url, null, null, null, null); + // Otherwise do a head request for the mime type + Scholar.Utilities.HTTP.doHead(url, function(obj){ + _addToDB(null, url, title, Scholar.Files.LINK_MODE_LINKED_URL, + obj.channel.contentType, null, sourceItemID); + }); } diff --git a/chrome/chromeFiles/content/scholar/xpcom/utilities.js b/chrome/chromeFiles/content/scholar/xpcom/utilities.js index 4e92408a7..e01e170a5 100644 --- a/chrome/chromeFiles/content/scholar/xpcom/utilities.js +++ b/chrome/chromeFiles/content/scholar/xpcom/utilities.js @@ -375,9 +375,11 @@ Scholar.Utilities.Ingester.HTTPUtilities.prototype.doOptions = function(url, onS Scholar.Utilities.HTTP = new function() { this.doGet = doGet; this.doPost = doPost; + this.doHead = doHead; this.doOptions = doOptions; this.browserIsOffline = browserIsOffline; + /** * Send an HTTP GET request via XMLHTTPRequest * @@ -444,6 +446,27 @@ Scholar.Utilities.HTTP = new function() { } + function doHead(url, callback1, callback2) { + Scholar.debug("HTTP HEAD "+url); + if (this.browserIsOffline()){ + return false; + } + + var xmlhttp = Components.classes["@mozilla.org/xmlextras/xmlhttprequest;1"] + .createInstance(); + + var test = xmlhttp.open('HEAD', url, true); + + xmlhttp.onreadystatechange = function(){ + _stateChange(xmlhttp, callback1, callback2); + }; + + xmlhttp.send(null); + + return true; + } + + /** * Send an HTTP OPTIONS request via XMLHTTPRequest *