Fix streaming UTF8 decoding

This commit is contained in:
Daniel Huigens 2018-05-29 15:38:34 +02:00
parent f0633f00ea
commit f79a3f718d

View File

@ -56,10 +56,6 @@ Literal.prototype.setText = function(text, format='utf8') {
this.data = null; this.data = null;
}; };
function normalize(text) {
return util.nativeEOL(util.decode_utf8(text));
}
/** /**
* Returns literal data packets as native JavaScript string * Returns literal data packets as native JavaScript string
* with normalized end of line to \n * with normalized end of line to \n
@ -68,19 +64,22 @@ function normalize(text) {
Literal.prototype.getText = function() { Literal.prototype.getText = function() {
if (this.text === null) { if (this.text === null) {
let lastChar = ''; let lastChar = '';
this.text = stream.transform(stream.clone(this.data), value => { const decoder = new TextDecoder('utf8');
const text = lastChar + util.Uint8Array_to_str(value); // eslint-disable-next-line no-inner-declarations
// decode UTF8 and normalize EOL to \n function process(value, lastChunk=false) {
const normalized = normalize(text); // decode UTF8
// if last two bytes are \r\n or an UTF8 sequence, return them immediately const text = lastChar + decoder.decode(value, { stream: !lastChunk });
if (text.length >= 2 && text.slice(-2) !== normalized.slice(-2)) { // normalize EOL to \n
lastChar = ''; const normalized = util.nativeEOL(text);
return normalized; // if last char is \r, store it for the next chunk so we can normalize \r\n
if (normalized[normalized.length - 1] === '\r') {
lastChar = '\r';
return normalized.slice(0, -1);
} }
// else, store the last character for the next chunk in case it's \r or half an UTF8 sequence lastChar = '';
lastChar = text[text.length - 1]; return normalized;
return normalized.slice(0, -1); }
}, () => lastChar); this.text = stream.transform(stream.clone(this.data), process, () => process(new Uint8Array(), true));
} }
return stream.clone(this.text); return stream.clone(this.text);
}; };