diff --git a/chrome/content/zotero/feedSettings.js b/chrome/content/zotero/feedSettings.js index 41734c115..000c81433 100644 --- a/chrome/content/zotero/feedSettings.js +++ b/chrome/content/zotero/feedSettings.js @@ -118,7 +118,7 @@ var Zotero_Feed_Settings = new function() { document.documentElement.getButton('accept').disabled = true; }; - this.validateUrl = function() { + this.validateUrl = Zotero.Promise.coroutine(function* () { if (feedReader) { feedReader.terminate(); feedReader = null; @@ -128,36 +128,37 @@ var Zotero_Feed_Settings = new function() { urlTainted = false; if (!url) return; - let fr = feedReader = new Zotero.FeedReader(url); - fr.feedProperties - .then( feed => { - if (feedReader !== fr || urlTainted) return; - - let title = document.getElementById('feed-title'); - if (!data.url && feed.title) { - title.value = feed.title; - } - - let ttl = document.getElementById('feed-ttl'); - if (!data.url && feed.ttl) { - ttl.value = Math.floor(feed.ttl / 60) || 1; - } - - document.getElementById('feed-url').value = url; - - urlIsValid = true; - title.disabled = false; - ttl.disabled = false; - document.getElementById('feed-cleanAfter').disabled = false; - document.documentElement.getButton('accept').disabled = false; - }) - .catch( e => { - Zotero.debug(e); - }) - .finally( () => { - if (feedReader === fr) feedReader = null; - }); - }; + try { + let fr = feedReader = new Zotero.FeedReader(url); + yield fr.process(); + let feed = fr.feedProperties; + if (feedReader !== fr || urlTainted) return; + + let title = document.getElementById('feed-title'); + if (!data.url && feed.title) { + title.value = feed.title; + } + + let ttl = document.getElementById('feed-ttl'); + if (!data.url && feed.ttl) { + ttl.value = Math.floor(feed.ttl / 60) || 1; + } + + document.getElementById('feed-url').value = url; + + urlIsValid = true; + title.disabled = false; + ttl.disabled = false; + document.getElementById('feed-cleanAfter').disabled = false; + document.documentElement.getButton('accept').disabled = false; + } + catch (e) { + Zotero.debug(e); + } + finally { + if (feedReader === fr) feedReader = null; + } + }); this.accept = function() { data.url = document.getElementById('feed-url').value; diff --git a/chrome/content/zotero/xpcom/data/feed.js b/chrome/content/zotero/xpcom/data/feed.js index b29274c39..b3dfa2055 100644 --- a/chrome/content/zotero/xpcom/data/feed.js +++ b/chrome/content/zotero/xpcom/data/feed.js @@ -325,6 +325,7 @@ Zotero.Feed.prototype._updateFeed = Zotero.Promise.coroutine(function* () { yield this.clearExpiredItems(); try { let fr = new Zotero.FeedReader(this.url); + yield fr.process(); let itemIterator = new fr.ItemIterator(); let item, toAdd = [], processedGUIDs = []; while (item = yield itemIterator.next().value) { diff --git a/chrome/content/zotero/xpcom/feedReader.js b/chrome/content/zotero/xpcom/feedReader.js index d0a1b0903..794d33cff 100644 --- a/chrome/content/zotero/xpcom/feedReader.js +++ b/chrome/content/zotero/xpcom/feedReader.js @@ -52,477 +52,481 @@ * @method {void} terminate Stops retrieving/parsing the feed. Data parsed up * to this point is still available. */ -Zotero.FeedReader = new function() { - let ios = Components.classes["@mozilla.org/network/io-service;1"] - .getService(Components.interfaces.nsIIOService); +Zotero.FeedReader = function(url) { + if (!url) throw new Error("Feed URL must be supplied"); + - /***************************** - * Item processing functions * - *****************************/ - - /** - * Determine item type based on item data - */ - function guessItemType(item) { - // Default to journalArticle - item.itemType = 'journalArticle'; + this._url = url; + this._feedItems = [Zotero.Promise.defer()]; + this._feedProcessed = Zotero.Promise.defer(); + + let feedFetched = Zotero.Promise.defer(); + feedFetched.promise.then(function(feed) { + let info = {}; - if (item.ISSN) { - return; // journalArticle + info.title = feed.title ? feed.title.plainText() : ''; + info.subtitle = feed.subtitle ? feed.subtitle.plainText() : ''; + + if (feed.updated) info.updated = new Date(feed.updated); + + // categories: MDN says "not yet implemented" + + info.creators = Zotero.FeedReader._processCreators(feed, 'authors', 'author'); + + // TODO: image as icon + + let publicationTitle = Zotero.FeedReader._getFeedField(feed, 'publicationName', 'prism') + || Zotero.FeedReader._getFeedField(feed, 'pubTitle'); + if (publicationTitle) info.publicationTitle = publicationTitle; + + let publisher = Zotero.FeedReader._getFeedField(feed, 'publisher', 'dc'); + if (publisher) info.publisher = publisher; + + let rights = (feed.rights && feed.rights.plainText()) + || Zotero.FeedReader._getFeedField(feed, 'copyright', 'prism') + || Zotero.FeedReader._getFeedField(feed, 'rights', 'dc') + || Zotero.FeedReader._getFeedField(feed, 'copyright'); + if (rights) info.rights = rights; + + let issn = Zotero.FeedReader._getFeedField(feed, 'issn', 'prism'); + if (issn) info.ISSN = issn; + + let isbn = Zotero.FeedReader._getFeedField(feed, 'isbn', 'prism') + || Zotero.FeedReader._getFeedField(feed, 'isbn') + if (isbn) info.ISBN = isbn; + + let language = Zotero.FeedReader._getFeedField(feed, 'language', 'dc') + || Zotero.FeedReader._getFeedField(feed, 'language'); + if (language) info.language = language; + + let ttl = Zotero.FeedReader._getFeedField(feed, 'ttl'); + if (ttl) info.ttl = ttl; + + this._feedProperties = info; + this._feed = feed; + return info; + }.bind(this)).then(function(){ + let items = this._feed.items; + if (items && items.length) { + for (let i=0; i { + if (!result.doc) { + this.terminate("No Feed"); return; } + + let newFeed = result.doc.QueryInterface(Components.interfaces.nsIFeed); + feedFetched.resolve(newFeed); } }; - /* - * Fetch creators from given field of a feed entry - */ - function processCreators(feedEntry, field, role) { - let names = [], - nameStr; - try { - let personArr = feedEntry[field]; // Seems like this part can throw if there is no author data in the feed - for (let i=0; i 1 - // If only one comma and first part has more than one space, - // it's probably not lastName, firstName - || (commas == 1 && name.split(/\s*,/)[0].indexOf(' ') != -1) - ) - ) { - // Probably multiple authors listed in a single field - nameStr = name; - break; // For clarity. personArr.length == 1 anyway - } else { - names.push(name); - } - } - } catch(e) { - if (e.result != Components.results.NS_ERROR_FAILURE) throw e - - if (field != 'authors') return []; - - // ieeexplore places these in "authors"... sigh - nameStr = getFeedField(feedEntry, null, 'authors'); - if (nameStr) nameStr = Zotero.Utilities.trimInternal(nameStr); - if (!nameStr) return []; - } - - if (nameStr) { - names = nameStr.split(/\s(?:and|&)\s|\s*[,;]\s*/); - } - - let creators = []; - for (let i=0; i= items.length + }; + }; + + return iterator; + } +}, {lazy: true}); + + +/***************************** + * Item processing functions * + *****************************/ + +/** + * Determine item type based on item data + */ +Zotero.FeedReader._guessItemType = function(item) { + // Default to journalArticle + item.itemType = 'journalArticle'; + + if (item.ISSN) { + return; // journalArticle + } + + if (item.ISBN) { + item.itemType = 'bookSection'; return; } - /* - * Parse feed entry into a Zotero item - */ - function getFeedItem(feedEntry, feedInfo) { - // ID is not required, but most feeds have these and we have to rely on them - // to handle updating properly - if (!feedEntry.id) { - Zotero.debug("FeedReader: Feed item missing an ID"); + if (item.publicationType) { + let type = item.publicationType.toLowerCase(); + if (type.indexOf('conference') != -1) { + item.itemType = 'conferencePaper'; return; } - - let item = { - guid: feedEntry.id - }; - - if (feedEntry.title) item.title = getRichText(feedEntry.title, 'title'); - - if (feedEntry.summary) { - item.abstractNote = getRichText(feedEntry.summary, 'abstractNote'); + if (type.indexOf('journal') != -1) { + item.itemType = 'journalArticle'; + return; + } + if (type.indexOf('book') != -1) { + item.itemType = 'bookSection'; + return; + } + } +}; + +/* + * Fetch creators from given field of a feed entry + */ +Zotero.FeedReader._processCreators = function(feedEntry, field, role) { + let names = [], + nameStr; + try { + let personArr = feedEntry[field]; // Seems like this part can throw if there is no author data in the feed + for (let i=0; i 1 + // If only one comma and first part has more than one space, + // it's probably not lastName, firstName + || (commas == 1 && name.split(/\s*,/)[0].indexOf(' ') != -1) + ) + ) { + // Probably multiple authors listed in a single field + nameStr = name; + break; // For clarity. personArr.length == 1 anyway + } else { + names.push(name); } } + } catch(e) { + if (e.result != Components.results.NS_ERROR_FAILURE) throw e; - if (feedEntry.link) item.url = feedEntry.link.spec; + if (field != 'authors') return []; - if (feedEntry.updated) item.dateModified = new Date(feedEntry.updated); + // ieeexplore places these in "authors"... sigh + nameStr = Zotero.FeedReader._getFeedField(feedEntry, 'authors'); + if (nameStr) nameStr = Zotero.Utilities.trimInternal(nameStr); + if (!nameStr) return []; + } + + if (nameStr) { + names = nameStr.split(/\s(?:and|&)\s|\s*[,;]\s*/); + } + + let creators = []; + for (let i=0; i { - let items = feed.items; - if (items && items.length) { - for (let i=0; i { - // Make sure the last promise gets resolved to null - let lastItem = this._feedItems[this._feedItems.length - 1]; - lastItem.resolve(null); - }); - - // Set up asynchronous feed processor - let feedProcessor = Components.classes["@mozilla.org/feed-processor;1"] - .createInstance(Components.interfaces.nsIFeedProcessor); - - let feedUrl = ios.newURI(url, null, null); - feedProcessor.parseAsync(null, feedUrl); - - feedProcessor.listener = { - /* - * MDN suggests that we could use nsIFeedProgressListener to handle the feed - * as it gets loaded, but this is actually not implemented (as of 32.0.3), - * so we have to load the whole feed and handle it in handleResult. - */ - handleResult: (result) => { - if (!result.doc) { - this.terminate("No Feed"); - return; - } - - let newFeed = result.doc.QueryInterface(Components.interfaces.nsIFeed); - this._feed.resolve(newFeed); - } - }; - - Zotero.debug("FeedReader: Fetching feed from " + feedUrl.spec); - - this._channel = ios.newChannelFromURI2(feedUrl, null, - Services.scriptSecurityManager.getSystemPrincipal(), null, - Ci.nsILoadInfo.SEC_NORMAL, Ci.nsIContentPolicy.TYPE_OTHER); - this._channel.asyncOpen(feedProcessor, null); // Sends an HTTP request + if (!item.dateModified) { + // When there's no reliable modification date, we can assume that item doesn't get updated + Zotero.debug("FeedReader: Feed item missing a modification date (" + item.guid + ")"); } - Zotero.defineProperty(FeedReader.prototype, 'feedProperties', { - get: function() this._feedProperties - }); + if (!item.date && item.dateModified) { + // Use lastModified date + item.date = Zotero.FeedReader._formatDate(item.dateModified); + } - /* - * Feed item iterator - * Each iteration returns a _promise_ for an item. The promise _MUST_ be - * resolved before requesting the next item. - * The last item will always be resolved to `null`, unless the feed processing - * is terminated ahead of time, in which case it will be rejected with the reason - * for termination. - */ - Zotero.defineProperty(FeedReader.prototype, 'ItemIterator', { - get: function() { - let items = this._feedItems; - - let iterator = function() { - this.index = 0; - }; - - iterator.prototype.next = function() { - let item = items[this.index++]; - return { - value: item ? item.promise : null, - done: this.index >= items.length - }; - }; - - return iterator; - } - }, {lazy: true}); + // Convert date modified to string, since those are directly comparable + if (item.dateModified) item.dateModified = Zotero.Date.dateToSQL(item.dateModified, true); - /* - * Terminate feed processing at any given time - * @param {String} status Reason for terminating processing - */ - FeedReader.prototype.terminate = function(status) { - Zotero.debug("FeedReader: Terminating feed reader (" + status + ")"); - - // Reject feed promise if not resolved yet - if (this._feed.promise.isPending()) { - this._feed.reject(status); - } - - // Reject feed item promise if not resolved yet - let lastItem = this._feedItems[this._feedItems.length - 1]; - if (lastItem.promise.isPending()) { - lastItem.reject(status); - } - - // Close feed connection - if (this._channel.isPending) { - this._channel.cancel(Components.results.NS_BINDING_ABORTED); - } - }; + if (feedEntry.rights) item.rights = Zotero.FeedReader._getRichText(feedEntry.rights, 'rights'); - return FeedReader; -}; \ No newline at end of file + item.creators = Zotero.FeedReader._processCreators(feedEntry, 'authors', 'author'); + if (!item.creators.length) { + // Use feed authors as item author. Maybe not the best idea. + for (let i=0; i + + + + Liftoff News + http://liftoff.msfc.nasa.gov/ + Liftoff to Space Exploration. + en-us + Tue, 10 Jun 2003 04:00:00 GMT + Tue, 10 Jun 2003 09:41:01 GMT + http://blogs.law.harvard.edu/tech/rss + Weblog Editor 2.0 + editor@example.com + webmaster@example.com + + Star City + http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp + How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>. + Tue, 03 Jun 2003 09:39:21 GMT + http://liftoff.msfc.nasa.gov/2003/06/03.html#item573 + + + Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm">partial eclipse of the Sun</a> on Saturday, May 31st. + Fri, 30 May 2003 11:06:42 GMT + http://liftoff.msfc.nasa.gov/2003/05/30.html#item572 + + + The Engine That Does More + http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp + Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that. + Tue, 27 May 2003 08:37:32 GMT + http://liftoff.msfc.nasa.gov/2003/05/27.html#item571 + + + Astronauts' Dirty Laundry + http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp + Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options. + Tue, 20 May 2003 08:56:02 GMT + http://liftoff.msfc.nasa.gov/2003/05/20.html#item570 + + + \ No newline at end of file diff --git a/test/tests/data/feedDetailed.rss b/test/tests/data/feedDetailed.rss new file mode 100644 index 000000000..c298fcc1b --- /dev/null +++ b/test/tests/data/feedDetailed.rss @@ -0,0 +1,89 @@ + + + + Feed + Feed Description + http://www.example.com/feed.rss + Publisher + en + ©2016 Published by Publisher + Feed Author + Publication + 0000-0000 + 2016-01-07-08:00 + ©2016 Published by Publisher + rights@example.com + 60 + + + + + + + + + + Title 1 + http://www.example.com/item1 + Description 1 + en + 10 + 20 + Title 1 + Author1 A.T. Rohtua, Author2 A. Auth, Author3 Autho + Contributor1 A.T. Rotubirtnoc, Contributor2 C. Contrib, Contributor3 Contr + Publisher + Feed + 2016-01-07 + 10.1000/182 + 0000-0000 + Publication + 2016-01-07 + Article + + + Title 2 + http://www.example.com/item2 + Description 2 + en + 10 + 20 + Title 2 + Author1 A.T. Rohtua, Author2 A. Auth, Author3 Autho + Contributor1 A.T. Rotubirtnoc, Contributor2 C. Contrib, Contributor3 Contr + Publisher + Feed + 2016-01-07 + 10.1000/182 + 0000-0000 + Publication + 2016-01-07 + Article + + + Title 3 + http://www.example.com/item3 + Description 3 + en + Some Publication + 10 + 20 + Title 3 + Author1 A.T. Rohtua, Author2 A. Auth, Author3 Autho + Contributor1 A.T. Rotubirtnoc, Contributor2 C. Contrib, Contributor3 Contr + Publisher + Feed + 2016-01-07 + 10.1000/182 + 0000-0000 + Publication + 2016-01-07 + Article + + \ No newline at end of file diff --git a/test/tests/feedReaderTest.js b/test/tests/feedReaderTest.js new file mode 100644 index 000000000..441246808 --- /dev/null +++ b/test/tests/feedReaderTest.js @@ -0,0 +1,167 @@ +"use strict"; + +describe("Zotero.FeedReader", function () { + + var htmlUrl = getTestDataItemUrl("test.html"); + + var feedUrl = getTestDataItemUrl("feed.rss"); + var feedInfo = { + title: 'Liftoff News', + subtitle: 'Liftoff to Space Exploration.', + updated: new Date("Tue, 10 Jun 2003 09:41:01 GMT"), + creators: [{ + firstName: '', + lastName: 'editor@example.com', + creatorType: 'author', + fieldMode: 1 + }], + language: 'en-us' + }; + + var detailedFeedUrl = getTestDataItemUrl("feedDetailed.rss"); + var detailedFeedInfo = { + title: 'Feed', + subtitle: 'Feed Description', + creators: [{firstName: 'Feed', lastName: 'Author', creatorType: 'author'}], + publicationTitle: 'Publication', + publisher: 'Publisher', + rights: '©2016 Published by Publisher', + ISSN: '0000-0000', + language: 'en' + }; + + describe('FeedReader()', function () { + it('should throw if url not provided', function() { + assert.throw(() => new Zotero.FeedReader()) + }); + + it('should throw if url invalid', function() { + assert.throw(() => new Zotero.FeedReader('invalid url')) + }); + }); + + describe('#process()', function() { + it('should reject if the provided url is not a valid feed', function* () { + let fr = new Zotero.FeedReader(htmlUrl); + let e = yield getPromiseError(fr.process()); + assert.ok(e); + e = yield getPromiseError(fr._feedItems[fr._feedItems.length-1].promise); + assert.ok(e); + }); + + it('should set #feedProperties on FeedReader object', function* () { + let fr = new Zotero.FeedReader(feedUrl); + assert.throw(() => fr.feedProperties); + yield fr.process(); + assert.ok(fr.feedProperties); + }); + }); + + describe('#terminate()', function() { + it('should reject last feed item and feed processing promise if feed not processed yet', function* () { + let fr = new Zotero.FeedReader(feedUrl); + fr.terminate("test"); + let e = yield getPromiseError(fr.process()); + assert.ok(e); + e = yield getPromiseError(fr._feedItems[fr._feedItems.length-1].promise); + assert.ok(e); + }); + + it('should reject last feed item if feed processed', function* () { + let fr = new Zotero.FeedReader(feedUrl); + yield fr.process(); + fr.terminate("test"); + let e = yield getPromiseError(fr._feedItems[fr._feedItems.length-1].promise); + assert.ok(e); + }); + }); + + describe('#feedProperties', function() { + it('should throw if accessed before feed is processed', function () { + let fr = new Zotero.FeedReader(feedUrl); + assert.throw(() => fr.feedProperties); + }); + + it('should have correct values for a sparse feed', function* () { + let fr = new Zotero.FeedReader(feedUrl); + yield fr.process(); + assert.deepEqual(fr.feedProperties, feedInfo); + }); + + it('should have correct values for a detailed feed', function* () { + let fr = new Zotero.FeedReader(detailedFeedUrl); + yield fr.process(); + assert.deepEqual(fr.feedProperties, detailedFeedInfo); + }); + }); + + describe('#ItemIterator()', function() { + it('should throw if called before feed is resolved', function() { + let fr = new Zotero.FeedReader(feedUrl); + assert.throw(() => new fr.ItemIterator); + }); + + it('should parse items correctly for a sparse feed', function* () { + let expected = { + guid: 'http://liftoff.msfc.nasa.gov/2003/06/03.html#item573', + title: 'Star City', + abstractNote: 'How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia\'s Star City.', + url: 'http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp', + dateModified: '2003-06-03 09:39:21', + date: '2003-06-03 09:39:21', + creators: [{ + firstName: '', + lastName: 'editor@example.com', + creatorType: 'author', + fieldMode: 1 + }], + language: 'en-us', + itemType: 'journalArticle' + }; + + let fr = new Zotero.FeedReader(feedUrl); + yield fr.process(); + let itemIterator = new fr.ItemIterator(); + let item = yield itemIterator.next().value; + assert.deepEqual(item, expected); + }); + + it('should parse items correctly for a detailed feed', function* () { + let expected = { guid: 'http://www.example.com/item1', + title: 'Title 1', + abstractNote: 'Description 1', + url: 'http://www.example.com/item1', + dateModified: '2016-01-07 00:00:00', + date: '2016-01-07', + creators: [ + { firstName: 'Author1 A. T.', lastName: 'Rohtua', creatorType: 'author' }, + { firstName: 'Author2 A.', lastName: 'Auth', creatorType: 'author' }, + { firstName: 'Author3', lastName: 'Autho', creatorType: 'author' }, + { firstName: 'Contributor1 A. T.', lastName: 'Rotubirtnoc', creatorType: 'contributor' }, + { firstName: 'Contributor2 C.', lastName: 'Contrib', creatorType: 'contributor' }, + { firstName: 'Contributor3', lastName: 'Contr', creatorType: 'contributor' } + ], + publicationTitle: 'Publication', + ISSN: '0000-0000', + publisher: 'Publisher', + rights: '©2016 Published by Publisher', + language: 'en', + itemType: 'journalArticle' + }; + + let fr = new Zotero.FeedReader(detailedFeedUrl); + yield fr.process(); + let itemIterator = new fr.ItemIterator(); + let item = yield itemIterator.next().value; + assert.deepEqual(item, expected); + }); + it('should resolve last item with null', function* () { + let fr = new Zotero.FeedReader(feedUrl); + yield fr.process(); + let itemIterator = new fr.ItemIterator(); + let item; + while(item = yield itemIterator.next().value); + assert.isNull(item); + }); + }); +}) \ No newline at end of file