- add xml/e4x and xml/dom dataMode options
- parse XML encoding declarations in translate.js - fix errors importing MODS from clipboard
This commit is contained in:
parent
e3e8881282
commit
40443c6b91
|
@ -1820,55 +1820,23 @@ Zotero.Translate.prototype._importDoneSniffing = function(charset) {
|
||||||
/*
|
/*
|
||||||
* set up import for IO
|
* set up import for IO
|
||||||
*/
|
*/
|
||||||
Zotero.Translate.prototype._importConfigureIO = function(charset) {
|
Zotero.Translate.prototype._importConfigureIO = function(charset) {
|
||||||
if(this.configOptions.dataMode && (this.configOptions.dataMode == "rdf" || this.configOptions.dataMode == "rdf/n3")) {
|
|
||||||
if(!this._rdf) {
|
|
||||||
Zotero.debug("Translate: initializing data store");
|
|
||||||
// initialize data store
|
|
||||||
this._rdf = new Zotero.RDF.AJAW.RDFIndexedFormula();
|
|
||||||
|
|
||||||
Zotero.debug("Translate: loading data");
|
|
||||||
// load data into store
|
|
||||||
var IOService = Components.classes['@mozilla.org/network/io-service;1']
|
|
||||||
.getService(Components.interfaces.nsIIOService);
|
|
||||||
if(this._storage) {
|
|
||||||
// parse from string
|
|
||||||
var baseURI = (this.location ? IOService.newURI(this.location, "utf-8", null) : null);
|
|
||||||
var nodeTree = (new DOMParser()).parseFromString(this._storage, 'text/xml');
|
|
||||||
} else {
|
|
||||||
// get URI
|
|
||||||
var fileHandler = IOService.getProtocolHandler("file")
|
|
||||||
.QueryInterface(Components.interfaces.nsIFileProtocolHandler);
|
|
||||||
var baseURI = fileHandler.getURLSpecFromFile(this.location);
|
|
||||||
|
|
||||||
// load XML from file using xmlhttp for charset detection
|
|
||||||
var xmlhttp = Components.classes["@mozilla.org/xmlextras/xmlhttprequest;1"].
|
|
||||||
createInstance(Components.interfaces.nsIXMLHttpRequest);
|
|
||||||
xmlhttp.overrideMimeType("text/xml");
|
|
||||||
xmlhttp.open("GET", baseURI, false); // Synchronous
|
|
||||||
xmlhttp.send("");
|
|
||||||
|
|
||||||
var nodeTree = xmlhttp.responseXML;
|
|
||||||
if(nodeTree.getElementsByTagName("parsererror").length) {
|
|
||||||
this._rdf = false;
|
|
||||||
throw("RDF/XML parse error; loading data into data store failed");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var parser = new Zotero.RDF.AJAW.RDFParser(this._rdf);
|
|
||||||
parser.parse(nodeTree, baseURI);
|
|
||||||
}
|
|
||||||
|
|
||||||
Zotero.debug("adding apis");
|
|
||||||
// add RDF features to sandbox
|
|
||||||
this._sandbox.Zotero.RDF = new Zotero.Translate.RDF(this._rdf);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(this._storage) {
|
if(this._storage) {
|
||||||
// import from string
|
// import from string
|
||||||
this._storageFunctions(true);
|
|
||||||
this._storagePointer = 0;
|
this._storagePointer = 0;
|
||||||
|
|
||||||
|
if(this.configOptions.dataMode && this.configOptions.dataMode == "xml/dom" || this.configOptions.dataMode == "rdf") {
|
||||||
|
// for DOM XML, handle with parseFromString
|
||||||
|
if(this.configOptions.dataMode == "xml/dom" || !this._rdf) {
|
||||||
|
var xmlNodes = Components.classes["@mozilla.org/xmlextras/domparser;1"]
|
||||||
|
.createInstance(Components.interfaces.nsIDOMParser)
|
||||||
|
.parseFromString(this._storage, "text/xml");
|
||||||
|
}
|
||||||
|
} else if(this.configOptions.dataMode && this.configOptions.dataMode == "xml/e4x") {
|
||||||
|
var xmlNodes = new XML(this._storage.replace(/<\?xml[^>]+\?>/, ""));
|
||||||
|
} else {
|
||||||
|
this._storageFunctions(true);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// import from file
|
// import from file
|
||||||
|
|
||||||
|
@ -1885,8 +1853,10 @@ Zotero.Translate.prototype._importConfigureIO = function(charset) {
|
||||||
this._streams.push(this._inputStream);
|
this._streams.push(this._inputStream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var sStream = null;
|
||||||
var bomLength = 0;
|
var bomLength = 0;
|
||||||
if(charset === undefined || (charset && charset.length > 3 && charset.substr(0, 3) == "UTF")) {
|
if(!charset && this._charset) charset = this._charset;
|
||||||
|
if(!charset || (charset && charset.length > 3 && charset.substr(0, 3) == "UTF")) {
|
||||||
// seek past BOM
|
// seek past BOM
|
||||||
var bomCharset = this._importGetBOM();
|
var bomCharset = this._importGetBOM();
|
||||||
var bomLength = (bomCharset ? BOMs[bomCharset].length : 0);
|
var bomLength = (bomCharset ? BOMs[bomCharset].length : 0);
|
||||||
|
@ -1895,80 +1865,185 @@ Zotero.Translate.prototype._importConfigureIO = function(charset) {
|
||||||
if(bomCharset) charset = this._charset = bomCharset;
|
if(bomCharset) charset = this._charset = bomCharset;
|
||||||
}
|
}
|
||||||
|
|
||||||
var intlStream = null;
|
// look for/seek past XML charset declaration
|
||||||
if(charset) {
|
if(this.configOptions.dataMode && (this.configOptions.dataMode == "xml/e4x"
|
||||||
// if have detected charset
|
|| this.configOptions.dataMode == "xml/dom"
|
||||||
Zotero.debug("Translate: Using detected character set "+charset, 3);
|
|| this.configOptions.dataMode == "rdf")) {
|
||||||
// convert from detected charset
|
|
||||||
intlStream = Components.classes["@mozilla.org/intl/converter-input-stream;1"]
|
sStream = Components.classes["@mozilla.org/scriptableinputstream;1"]
|
||||||
.createInstance(Components.interfaces.nsIConverterInputStream);
|
.createInstance(Components.interfaces.nsIScriptableInputStream);
|
||||||
intlStream.init(this._inputStream, charset, 65535,
|
sStream.init(this._inputStream);
|
||||||
Components.interfaces.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER);
|
|
||||||
me._streams.push(intlStream);
|
|
||||||
}
|
|
||||||
|
|
||||||
// allow translator to set charset
|
|
||||||
this._sandbox.Zotero.setCharacterSet = function(charset) {
|
|
||||||
// seek back to the beginning
|
|
||||||
me._inputStream.QueryInterface(Components.interfaces.nsISeekableStream)
|
|
||||||
.seek(Components.interfaces.nsISeekableStream.NS_SEEK_SET, bomLength);
|
|
||||||
|
|
||||||
intlStream = Components.classes["@mozilla.org/intl/converter-input-stream;1"]
|
|
||||||
.createInstance(Components.interfaces.nsIConverterInputStream);
|
|
||||||
try {
|
|
||||||
intlStream.init(me._inputStream, charset, 65535,
|
|
||||||
Components.interfaces.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER);
|
|
||||||
} catch(e) {
|
|
||||||
throw "Text encoding not supported";
|
|
||||||
}
|
|
||||||
me._streams.push(intlStream);
|
|
||||||
}
|
|
||||||
|
|
||||||
var str = new Object();
|
|
||||||
if(this.configOptions.dataMode && this.configOptions.dataMode == "line") { // line by line reading
|
|
||||||
this._inputStream.QueryInterface(Components.interfaces.nsILineInputStream);
|
|
||||||
|
|
||||||
this._sandbox.Zotero.read = function() {
|
|
||||||
if(intlStream && intlStream instanceof Components.interfaces.nsIUnicharLineInputStream) {
|
|
||||||
var amountRead = intlStream.readLine(str);
|
|
||||||
} else {
|
|
||||||
var amountRead = me._inputStream.readLine(str);
|
|
||||||
}
|
|
||||||
if(amountRead) {
|
|
||||||
return str.value;
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else { // block reading
|
|
||||||
var sStream;
|
|
||||||
|
|
||||||
this._sandbox.Zotero.read = function(amount) {
|
|
||||||
if(intlStream) {
|
|
||||||
// read from international stream, if one is available
|
|
||||||
var amountRead = intlStream.readString(amount, str);
|
|
||||||
|
|
||||||
if(amountRead) {
|
|
||||||
return str.value;
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// allocate sStream on the fly
|
|
||||||
if(!sStream) {
|
|
||||||
sStream = Components.classes["@mozilla.org/scriptableinputstream;1"]
|
|
||||||
.createInstance(Components.interfaces.nsIScriptableInputStream);
|
|
||||||
sStream.init(me._inputStream);
|
|
||||||
}
|
|
||||||
|
|
||||||
// read from the scriptable input stream
|
|
||||||
var string = sStream.read(amount);
|
|
||||||
return string;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// attach sStream to stack of streams to close
|
|
||||||
this._streams.push(sStream);
|
this._streams.push(sStream);
|
||||||
|
|
||||||
|
// read until we see if the file begins with a parse instruction
|
||||||
|
const whitespaceRe = /\s/g;
|
||||||
|
var read;
|
||||||
|
do {
|
||||||
|
read = sStream.read(1);
|
||||||
|
} while(whitespaceRe.test(read))
|
||||||
|
|
||||||
|
if(read != "<") throw "XML load error: text does not start with <";
|
||||||
|
|
||||||
|
var firstPart = read + sStream.read(4);
|
||||||
|
if(firstPart == "<?xml") {
|
||||||
|
// got a parse instruction, read until it ends
|
||||||
|
read = true;
|
||||||
|
while((read !== false) && (read !== ">")) {
|
||||||
|
read = sStream.read(1);
|
||||||
|
firstPart += read;
|
||||||
|
}
|
||||||
|
|
||||||
|
var encodingRe = /encoding=['"]([^'"]+)['"]/;
|
||||||
|
var m = encodingRe.exec(firstPart);
|
||||||
|
if(m) {
|
||||||
|
try {
|
||||||
|
var charconv = Components.classes["@mozilla.org/charset-converter-manager;1"]
|
||||||
|
.getService(Components.interfaces.nsICharsetConverterManager)
|
||||||
|
.getCharsetTitle(m[1]);
|
||||||
|
if(charconv) charset = this._charset = m[1];
|
||||||
|
} catch(e) {}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
this._inputStream.QueryInterface(Components.interfaces.nsISeekableStream)
|
||||||
|
.seek(Components.interfaces.nsISeekableStream.NS_SEEK_SET, bomLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!charset) charset = "UTF-8";
|
||||||
|
}
|
||||||
|
|
||||||
|
if(this.configOptions.dataMode && this.configOptions.dataMode == "xml/dom" || this.configOptions.dataMode == "rdf") {
|
||||||
|
// for DOM XML, pass charset to parseFromStream
|
||||||
|
if(this.configOptions.dataMode == "xml/dom" || !this._rdf) {
|
||||||
|
var xmlNodes = Components.classes["@mozilla.org/xmlextras/domparser;1"]
|
||||||
|
.createInstance(Components.interfaces.nsIDOMParser)
|
||||||
|
.parseFromStream(this._inputStream, charset, this.location.fileSize, "text/xml");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
var intlStream = null;
|
||||||
|
if(charset) {
|
||||||
|
// if have detected charset
|
||||||
|
Zotero.debug("Translate: Using detected character set "+charset, 3);
|
||||||
|
// convert from detected charset
|
||||||
|
intlStream = Components.classes["@mozilla.org/intl/converter-input-stream;1"]
|
||||||
|
.createInstance(Components.interfaces.nsIConverterInputStream);
|
||||||
|
intlStream.init(this._inputStream, charset, 65535,
|
||||||
|
Components.interfaces.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER);
|
||||||
|
me._streams.push(intlStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(this.configOptions.dataMode && this.configOptions.dataMode == "xml/e4x") {
|
||||||
|
// read in 16384 byte increments
|
||||||
|
var xmlNodes = "";
|
||||||
|
var str = {};
|
||||||
|
while(intlStream.readString(16384, str)) {
|
||||||
|
xmlNodes += str.value;
|
||||||
|
}
|
||||||
|
|
||||||
|
// create xml
|
||||||
|
xmlNodes = new XML(xmlNodes);
|
||||||
|
} else {
|
||||||
|
// standard text reading tools
|
||||||
|
|
||||||
|
// allow translator to set charset
|
||||||
|
this._sandbox.Zotero.setCharacterSet = function(charset) {
|
||||||
|
// seek back to the beginning
|
||||||
|
me._inputStream.QueryInterface(Components.interfaces.nsISeekableStream)
|
||||||
|
.seek(Components.interfaces.nsISeekableStream.NS_SEEK_SET, bomLength);
|
||||||
|
|
||||||
|
intlStream = Components.classes["@mozilla.org/intl/converter-input-stream;1"]
|
||||||
|
.createInstance(Components.interfaces.nsIConverterInputStream);
|
||||||
|
try {
|
||||||
|
intlStream.init(me._inputStream, charset, 65535,
|
||||||
|
Components.interfaces.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER);
|
||||||
|
} catch(e) {
|
||||||
|
throw "Text encoding not supported";
|
||||||
|
}
|
||||||
|
me._streams.push(intlStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
var str = new Object();
|
||||||
|
if(this.configOptions.dataMode && this.configOptions.dataMode == "line") { // line by line reading
|
||||||
|
this._inputStream.QueryInterface(Components.interfaces.nsILineInputStream);
|
||||||
|
|
||||||
|
this._sandbox.Zotero.read = function() {
|
||||||
|
if(intlStream && intlStream instanceof Components.interfaces.nsIUnicharLineInputStream) {
|
||||||
|
var amountRead = intlStream.readLine(str);
|
||||||
|
} else {
|
||||||
|
var amountRead = me._inputStream.readLine(str);
|
||||||
|
}
|
||||||
|
if(amountRead) {
|
||||||
|
return str.value;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else { // block reading
|
||||||
|
this._sandbox.Zotero.read = function(amount) {
|
||||||
|
if(intlStream) {
|
||||||
|
// read from international stream, if one is available
|
||||||
|
var amountRead = intlStream.readString(amount, str);
|
||||||
|
|
||||||
|
if(amountRead) {
|
||||||
|
return str.value;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// allocate sStream on the fly
|
||||||
|
if(!sStream) {
|
||||||
|
sStream = Components.classes["@mozilla.org/scriptableinputstream;1"]
|
||||||
|
.createInstance(Components.interfaces.nsIScriptableInputStream);
|
||||||
|
sStream.init(me._inputStream);
|
||||||
|
this._streams.push(sStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
// read from the scriptable input stream
|
||||||
|
var string = sStream.read(amount);
|
||||||
|
return string;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(this.configOptions.dataMode) {
|
||||||
|
// make sure DOM XML actually got parsed
|
||||||
|
if(xmlNodes && (this.configOptions.dataMode == "rdf" || this.configOptions.dataMode == "xml/dom")
|
||||||
|
&& xmlNodes.getElementsByTagName("parsererror").length) {
|
||||||
|
this._rdf = undefined;
|
||||||
|
throw("XML parser error: loading data into data store failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
if(this.configOptions.dataMode == "rdf") {
|
||||||
|
// set up RDF
|
||||||
|
if(!this._rdf) {
|
||||||
|
// get URI
|
||||||
|
var IOService = Components.classes['@mozilla.org/network/io-service;1']
|
||||||
|
.getService(Components.interfaces.nsIIOService);
|
||||||
|
if(this._storage) {
|
||||||
|
var baseURI = this.location ? this.location : null;
|
||||||
|
} else {
|
||||||
|
var fileHandler = IOService.getProtocolHandler("file")
|
||||||
|
.QueryInterface(Components.interfaces.nsIFileProtocolHandler);
|
||||||
|
var baseURI = fileHandler.getURLSpecFromFile(this.location);
|
||||||
|
}
|
||||||
|
|
||||||
|
Zotero.debug("Translate: initializing data store");
|
||||||
|
this._rdf = new Zotero.RDF.AJAW.RDFIndexedFormula();
|
||||||
|
|
||||||
|
Zotero.debug("Translate: loading data");
|
||||||
|
var parser = new Zotero.RDF.AJAW.RDFParser(this._rdf);
|
||||||
|
parser.parse(xmlNodes, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
// add RDF features to sandbox
|
||||||
|
this._sandbox.Zotero.RDF = new Zotero.Translate.RDF(this._rdf);
|
||||||
|
} else if(this.configOptions.dataMode == "xml/e4x" || this.configOptions.dataMode == "xml/dom") {
|
||||||
|
// add getXML function
|
||||||
|
this._sandbox.Zotero.getXML = function() {
|
||||||
|
return xmlNodes;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
"label":"MODS",
|
"label":"MODS",
|
||||||
"creator":"Simon Kornblith",
|
"creator":"Simon Kornblith",
|
||||||
"target":"xml",
|
"target":"xml",
|
||||||
"minVersion":"1.0.8",
|
"minVersion":"2.0b6.3",
|
||||||
"maxVersion":"",
|
"maxVersion":"",
|
||||||
"priority":50,
|
"priority":50,
|
||||||
"inRepository":true,
|
"inRepository":true,
|
||||||
|
@ -12,13 +12,11 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
Zotero.addOption("exportNotes", true);
|
Zotero.addOption("exportNotes", true);
|
||||||
|
Zotero.configure("dataMode", "xml/e4x");
|
||||||
|
|
||||||
function detectImport() {
|
function detectImport() {
|
||||||
var read = Zotero.read(512);
|
var name = Zotero.getXML().name();
|
||||||
var modsTagRegexp = /<mods[^>]+>/
|
return name.uri == "http://www.loc.gov/mods/v3" && (name.localName == "modsCollection" || name.localName == "mods");
|
||||||
if(modsTagRegexp.test(read)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var partialItemTypes = ["bookSection", "journalArticle", "magazineArticle", "newspaperArticle"];
|
var partialItemTypes = ["bookSection", "journalArticle", "magazineArticle", "newspaperArticle"];
|
||||||
|
@ -341,47 +339,11 @@ function doImport() {
|
||||||
"web site":"webpage"
|
"web site":"webpage"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
var read;
|
|
||||||
|
|
||||||
// read until we see if the file begins with a parse instruction
|
|
||||||
read = " ";
|
|
||||||
while(read == " " || read == "\n" || read == "\r") {
|
|
||||||
read = Zotero.read(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
var firstPart = read + Zotero.read(4);
|
|
||||||
if(firstPart == "<?xml") {
|
|
||||||
// got a parse instruction, read until it ends
|
|
||||||
read = true;
|
|
||||||
while((read !== false) && (read !== ">")) {
|
|
||||||
read = Zotero.read(1);
|
|
||||||
firstPart += read;
|
|
||||||
}
|
|
||||||
var encodingRe = /encoding=['"]([^'"]+)['"]/;
|
|
||||||
var m = encodingRe.exec(firstPart);
|
|
||||||
// set character set
|
|
||||||
try {
|
|
||||||
Zotero.setCharacterSet(m[1]);
|
|
||||||
} catch(e) {
|
|
||||||
Zotero.setCharacterSet("utf-8");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
Zotero.setCharacterSet("utf-8");
|
|
||||||
}
|
|
||||||
|
|
||||||
// read in 16384 byte increments
|
|
||||||
var text = "";
|
|
||||||
while(read = Zotero.read(16384)) {
|
|
||||||
text += read;
|
|
||||||
}
|
|
||||||
text = text.replace(/<\?xml[^>]+\?>/, "");
|
|
||||||
|
|
||||||
// parse with E4X
|
// parse with E4X
|
||||||
var m = new Namespace("http://www.loc.gov/mods/v3");
|
var m = new Namespace("http://www.loc.gov/mods/v3");
|
||||||
// why does this default namespace declaration not work!?
|
// why does this default namespace declaration not work!?
|
||||||
default xml namespace = m;
|
default xml namespace = m;
|
||||||
var xml = new XML(text);
|
var xml = Zotero.getXML();
|
||||||
|
|
||||||
if(xml.m::mods.length()) {
|
if(xml.m::mods.length()) {
|
||||||
var modsElements = xml.m::mods;
|
var modsElements = xml.m::mods;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user