
WebPageDump hasn't been updated in years, it can result in multi-megabyte snapshots on pages with lots of JS and ads, and there are often rendering problems viewing snapshots of JS-heavy pages. ScrapBook X, a fork of ScrapBook (which WBP was based on), is actively maintained and works much better, but like ScrapBook it's not modular and it can produce massive snapshots (e.g., 12MB for a single page with lots of ads). Rather than produce ugly, broken-looking pages, we can run eligible pages through Firefox's Reader Mode (a modified Readability under the hood) and save those. While this won't always be perfect, most of the time it will save what people actually care about - the text of the page -- and it avoids filling up people's storage directories and storage accounts with junk. We should probably reduce the number of translators that save snapshots in general, but this lessens their impact. The current implementation will need to be updated for Standalone, either by including the Reader Mode files in Standalone or switching to other JS libraries. This strips the standard controls before saving, but it might be nice to provide some format options when viewing (or just run the page through Reader Mode again). We can also customize the default styling.
245 lines
6.8 KiB
JavaScript
245 lines
6.8 KiB
JavaScript
"use strict";
|
|
|
|
describe("Connector Server", function () {
|
|
Components.utils.import("resource://zotero-unit/httpd.js");
|
|
var win, connectorServerPath, testServerPath, httpd;
|
|
var testServerPort = 16213;
|
|
|
|
before(function* () {
|
|
Zotero.Prefs.set("httpServer.enabled", true);
|
|
yield resetDB({
|
|
thisArg: this,
|
|
skipBundledFiles: true
|
|
});
|
|
|
|
win = yield loadZoteroPane();
|
|
connectorServerPath = 'http://127.0.0.1:' + Zotero.Prefs.get('httpServer.port');
|
|
testServerPath = 'http://127.0.0.1:' + testServerPort;
|
|
});
|
|
|
|
beforeEach(function () {
|
|
httpd = new HttpServer();
|
|
httpd.start(testServerPort);
|
|
});
|
|
|
|
afterEach(function* () {
|
|
var defer = new Zotero.Promise.defer();
|
|
httpd.stop(() => defer.resolve());
|
|
yield defer.promise;
|
|
});
|
|
|
|
after(function () {
|
|
win.close();
|
|
});
|
|
|
|
describe("/connector/saveItems", function () {
|
|
// TODO: Test cookies
|
|
it("should save a translated item to the current selected collection", function* () {
|
|
var collection = yield createDataObject('collection');
|
|
yield waitForItemsLoad(win);
|
|
|
|
var body = {
|
|
items: [
|
|
{
|
|
itemType: "newspaperArticle",
|
|
title: "Title",
|
|
creators: [
|
|
{
|
|
firstName: "First",
|
|
lastName: "Last",
|
|
creatorType: "author"
|
|
}
|
|
],
|
|
attachments: [
|
|
{
|
|
title: "Attachment",
|
|
url: `${testServerPath}/attachment`,
|
|
mimeType: "text/html"
|
|
}
|
|
]
|
|
}
|
|
],
|
|
uri: "http://example.com"
|
|
};
|
|
|
|
httpd.registerPathHandler(
|
|
"/attachment",
|
|
{
|
|
handle: function (request, response) {
|
|
response.setStatusLine(null, 200, "OK");
|
|
response.write("<html><head><title>Title</title><body>Body</body></html>");
|
|
}
|
|
}
|
|
);
|
|
|
|
var promise = waitForItemEvent('add');
|
|
var req = yield Zotero.HTTP.request(
|
|
'POST',
|
|
connectorServerPath + "/connector/saveItems",
|
|
{
|
|
headers: {
|
|
"Content-Type": "application/json"
|
|
},
|
|
body: JSON.stringify(body)
|
|
}
|
|
);
|
|
|
|
// Check parent item
|
|
var ids = yield promise;
|
|
assert.lengthOf(ids, 1);
|
|
var item = Zotero.Items.get(ids[0]);
|
|
assert.equal(Zotero.ItemTypes.getName(item.itemTypeID), 'newspaperArticle');
|
|
assert.isTrue(collection.hasItem(item.id));
|
|
|
|
// Check attachment
|
|
promise = waitForItemEvent('add');
|
|
ids = yield promise;
|
|
assert.lengthOf(ids, 1);
|
|
item = Zotero.Items.get(ids[0]);
|
|
assert.isTrue(item.isImportedAttachment());
|
|
|
|
// Wait until indexing is done
|
|
yield waitForItemEvent('refresh');
|
|
});
|
|
});
|
|
|
|
describe("/connector/saveSnapshot", function () {
|
|
it("should save a webpage item and non-readerable snapshot to the current selected collection", function* () {
|
|
var collection = yield createDataObject('collection');
|
|
yield waitForItemsLoad(win);
|
|
|
|
// saveSnapshot saves parent and child before returning
|
|
var ids1, ids2;
|
|
var promise = waitForItemEvent('add').then(function (ids) {
|
|
ids1 = ids;
|
|
return waitForItemEvent('add').then(function (ids) {
|
|
ids2 = ids;
|
|
});
|
|
});
|
|
yield Zotero.HTTP.request(
|
|
'POST',
|
|
connectorServerPath + "/connector/saveSnapshot",
|
|
{
|
|
headers: {
|
|
"Content-Type": "application/json"
|
|
},
|
|
body: JSON.stringify({
|
|
url: "http://example.com",
|
|
html: "<html><head><title>Title</title><body>Body</body></html>"
|
|
})
|
|
}
|
|
);
|
|
|
|
assert.isTrue(promise.isFulfilled());
|
|
|
|
// Check parent item
|
|
assert.lengthOf(ids1, 1);
|
|
var item = Zotero.Items.get(ids1[0]);
|
|
assert.equal(Zotero.ItemTypes.getName(item.itemTypeID), 'webpage');
|
|
assert.isTrue(collection.hasItem(item.id));
|
|
assert.equal(item.getField('title'), 'Title');
|
|
|
|
// Check attachment
|
|
assert.lengthOf(ids2, 1);
|
|
item = Zotero.Items.get(ids2[0]);
|
|
assert.isTrue(item.isImportedAttachment());
|
|
assert.equal(item.getField('title'), 'Title');
|
|
var path = yield item.getFilePathAsync();
|
|
var fileContents = yield Zotero.File.getContentsAsync(path);
|
|
assert.notInclude(fileContents, 'moz-reader-content');
|
|
});
|
|
|
|
it("should save a webpage item and readerable snapshot to the current selected collection", function* () {
|
|
var collection = yield createDataObject('collection');
|
|
yield waitForItemsLoad(win);
|
|
|
|
var pageTitle = "Page Title";
|
|
var articleTitle = "Article Title";
|
|
var byline = "First Last";
|
|
var content = "<p>" + new Array(50).fill("").map(x => Zotero.Utilities.randomString()).join(" ") + "</p>"
|
|
+ "<p>" + new Array(50).fill("").map(x => Zotero.Utilities.randomString()).join(" ") + "</p>"
|
|
+ "<p>" + new Array(50).fill("").map(x => Zotero.Utilities.randomString()).join(" ") + "</p>"
|
|
|
|
// saveSnapshot saves parent and child before returning
|
|
var ids1, ids2;
|
|
var promise = waitForItemEvent('add').then(function (ids) {
|
|
ids1 = ids;
|
|
return waitForItemEvent('add').then(function (ids) {
|
|
ids2 = ids;
|
|
});
|
|
});
|
|
yield Zotero.HTTP.request(
|
|
'POST',
|
|
connectorServerPath + "/connector/saveSnapshot",
|
|
{
|
|
headers: {
|
|
"Content-Type": "application/json"
|
|
},
|
|
body: JSON.stringify({
|
|
url: "http://example.com/articles/1",
|
|
html: `<html><head><title>${pageTitle}</title><body>`
|
|
+ "<article>"
|
|
+ `<h1>${articleTitle}</h1>`
|
|
+ `<p class="author">${byline}</p>`
|
|
+ content
|
|
+ "</article>"
|
|
+ "</body></html>"
|
|
})
|
|
}
|
|
);
|
|
|
|
assert.isTrue(promise.isFulfilled());
|
|
|
|
// Check parent item
|
|
assert.lengthOf(ids1, 1);
|
|
var item = Zotero.Items.get(ids1[0]);
|
|
assert.equal(Zotero.ItemTypes.getName(item.itemTypeID), 'webpage');
|
|
assert.isTrue(collection.hasItem(item.id));
|
|
assert.equal(item.getField('title'), pageTitle);
|
|
|
|
// Check attachment
|
|
assert.lengthOf(ids2, 1);
|
|
item = Zotero.Items.get(ids2[0]);
|
|
assert.isTrue(item.isImportedAttachment());
|
|
assert.equal(item.getField('title'), pageTitle);
|
|
var path = yield item.getFilePathAsync();
|
|
var fileContents = yield Zotero.File.getContentsAsync(path);
|
|
assert.include(fileContents, 'moz-reader-content');
|
|
assert.include(fileContents, content.match(/[a-z]+/i)[0]);
|
|
});
|
|
|
|
it("should save a PDF to the current selected collection", function* () {
|
|
var collection = yield createDataObject('collection');
|
|
yield waitForItemsLoad(win);
|
|
|
|
var file = getTestDataDirectory();
|
|
file.append('test.pdf');
|
|
httpd.registerFile("/test.pdf", file);
|
|
|
|
var ids;
|
|
var promise = waitForItemEvent('add');
|
|
yield Zotero.HTTP.request(
|
|
'POST',
|
|
connectorServerPath + "/connector/saveSnapshot",
|
|
{
|
|
headers: {
|
|
"Content-Type": "application/json"
|
|
},
|
|
body: JSON.stringify({
|
|
url: testServerPath + "/test.pdf",
|
|
pdf: true
|
|
})
|
|
}
|
|
);
|
|
|
|
var ids = yield promise;
|
|
|
|
assert.lengthOf(ids, 1);
|
|
var item = Zotero.Items.get(ids[0]);
|
|
assert.isTrue(item.isImportedAttachment());
|
|
assert.equal(item.attachmentContentType, 'application/pdf');
|
|
assert.isTrue(collection.hasItem(item.id));
|
|
});
|
|
});
|
|
});
|