more async/await
This commit is contained in:
parent
f8768360ee
commit
036f12441b
477
zimmer.js
477
zimmer.js
|
@ -317,10 +317,10 @@ class Writer {
|
||||||
// Cluster
|
// Cluster
|
||||||
//
|
//
|
||||||
|
|
||||||
// ClusterSizeThreshold = 8 * 1024 * 1024
|
// var ClusterSizeThreshold = 8 * 1024 * 1024
|
||||||
var ClusterSizeThreshold = 4 * 1024 * 1024
|
//~ var ClusterSizeThreshold = 4 * 1024 * 1024
|
||||||
// ClusterSizeThreshold = 2 * 1024 * 1024
|
var ClusterSizeThreshold = 1 * 1024 * 1024
|
||||||
// ClusterSizeThreshold = 2 * 1024
|
// var ClusterSizeThreshold = 2 * 1024 * 1024
|
||||||
|
|
||||||
class Cluster {
|
class Cluster {
|
||||||
constructor ( compressible ) {
|
constructor ( compressible ) {
|
||||||
|
@ -354,7 +354,7 @@ class Cluster {
|
||||||
// <2nd Blob> data n/a n/a data of the <2nd Blob>
|
// <2nd Blob> data n/a n/a data of the <2nd Blob>
|
||||||
// ... data ... n/a ...
|
// ... data ... n/a ...
|
||||||
|
|
||||||
save () {
|
async save () {
|
||||||
//~ log( 'Cluster.prototype.save', this.compressible, this.blobs )
|
//~ log( 'Cluster.prototype.save', this.compressible, this.blobs )
|
||||||
|
|
||||||
var nBlobs = this.blobs.length
|
var nBlobs = this.blobs.length
|
||||||
|
@ -379,15 +379,14 @@ class Cluster {
|
||||||
var compression = this.compressible ? 4 : 0
|
var compression = this.compressible ? 4 : 0
|
||||||
var id = this.id
|
var id = this.id
|
||||||
|
|
||||||
return Promise.coroutine( function* () {
|
|
||||||
if ( compression ) {
|
if ( compression ) {
|
||||||
// https://tukaani.org/lzma/benchmarks.html
|
// https://tukaani.org/lzma/benchmarks.html
|
||||||
data = yield lzma.compress( data, 7 ) // 3 | lzma.PRESET_EXTREME )
|
data = await lzma.compress( data, 7 ) // 3 | lzma.PRESET_EXTREME )
|
||||||
log( 'Cluster lzma compressed' )
|
log( 'Cluster lzma compressed' )
|
||||||
}
|
}
|
||||||
|
|
||||||
log( 'Cluster write', id, compression )
|
log( 'Cluster write', id, compression )
|
||||||
const offset = yield out.write( Buffer.concat([ Buffer.from([ compression ]), data ]))
|
const offset = await out.write( Buffer.concat([ Buffer.from([ compression ]), data ]))
|
||||||
|
|
||||||
log( 'Cluster saved', id, offset )
|
log( 'Cluster saved', id, offset )
|
||||||
return indexerDb.run(
|
return indexerDb.run(
|
||||||
|
@ -397,7 +396,6 @@ class Cluster {
|
||||||
offset
|
offset
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
}) ()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -415,7 +413,7 @@ var ClusterWriter = {
|
||||||
{ max: 8, }
|
{ max: 8, }
|
||||||
),
|
),
|
||||||
|
|
||||||
append: function ( mimeType, data, id /* for debugging */ ) {
|
append: async function ( mimeType, data, id /* for debugging */ ) {
|
||||||
//~ log( 'ClusterWriter.append', arguments )
|
//~ log( 'ClusterWriter.append', arguments )
|
||||||
|
|
||||||
var compressible = ClusterWriter.isCompressible( mimeType, data, id )
|
var compressible = ClusterWriter.isCompressible( mimeType, data, id )
|
||||||
|
@ -425,14 +423,12 @@ var ClusterWriter = {
|
||||||
|
|
||||||
if ( blobNum === false ) { // store to a new cluster
|
if ( blobNum === false ) { // store to a new cluster
|
||||||
ClusterWriter[ compressible ] = new Cluster( compressible )
|
ClusterWriter[ compressible ] = new Cluster( compressible )
|
||||||
const ready = ClusterWriter.pool.acquire()
|
const token = await ClusterWriter.pool.acquire()
|
||||||
|
|
||||||
ready.then( token => cluster.save()
|
cluster.save()
|
||||||
.then( () => ClusterWriter.pool.release( token ))
|
.then( () => ClusterWriter.pool.release( token ))
|
||||||
)
|
|
||||||
|
|
||||||
return ready
|
return ClusterWriter.append( mimeType, data, id )
|
||||||
.then( () => ClusterWriter.append( mimeType, data, id ))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
log( 'ClusterWriter.append', compressible, clusterNum, blobNum, data.length, id )
|
log( 'ClusterWriter.append', compressible, clusterNum, blobNum, data.length, id )
|
||||||
|
@ -458,28 +454,29 @@ var ClusterWriter = {
|
||||||
// <nth Cluster> integer (n-1)*8 8 pointer to the <nth Cluster>
|
// <nth Cluster> integer (n-1)*8 8 pointer to the <nth Cluster>
|
||||||
// ... integer ... 8 ...
|
// ... integer ... 8 ...
|
||||||
|
|
||||||
storeIndex: function () {
|
storeIndex: async function () {
|
||||||
return saveIndex (
|
return saveIndex ({
|
||||||
`
|
query:
|
||||||
SELECT
|
'SELECT ' +
|
||||||
offset
|
'offset ' +
|
||||||
FROM clusters
|
'FROM clusters ' +
|
||||||
ORDER BY id
|
'ORDER BY id ' +
|
||||||
;
|
';',
|
||||||
`,
|
byteLength: 8,
|
||||||
8, 'offset', header.clusterCount, 'storeClusterIndex'
|
rowField: 'offset',
|
||||||
)
|
count: header.clusterCount,
|
||||||
|
logInfo: 'storeClusterIndex',
|
||||||
|
})
|
||||||
.then( offset => header.clusterPtrPos = offset )
|
.then( offset => header.clusterPtrPos = offset )
|
||||||
},
|
},
|
||||||
|
|
||||||
finish: function () {
|
finish: async function () {
|
||||||
//~ log( 'ClusterWriter.finish', ClusterWriter )
|
//~ log( 'ClusterWriter.finish', ClusterWriter )
|
||||||
|
await ClusterWriter[ true ].save() // save last compressible cluster
|
||||||
return ClusterWriter[ true ].save() // save last compressible cluster
|
await ClusterWriter[ false ].save() // save last uncompressible cluster
|
||||||
.then( () => ClusterWriter[ false ].save()) // save last uncompressible cluster
|
await ClusterWriter.pool.drain()
|
||||||
.then( () => ClusterWriter.pool.drain())
|
await ClusterWriter.pool.clear()
|
||||||
.then( () => ClusterWriter.pool.clear())
|
return ClusterWriter.storeIndex()
|
||||||
.then( () => ClusterWriter.storeIndex())
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -530,8 +527,7 @@ class Item {
|
||||||
|
|
||||||
saveItemIndex () {
|
saveItemIndex () {
|
||||||
if ( ! this.path ) {
|
if ( ! this.path ) {
|
||||||
console.trace( 'Item no url', this )
|
fatal( 'Item no url', this )
|
||||||
process.exit( 1 )
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const row = [
|
const row = [
|
||||||
|
@ -564,11 +560,10 @@ class Item {
|
||||||
// title string n/a zero terminated string with an title as refered in the Title pointer list or empty; in case it is empty, the URL is used as title
|
// title string n/a zero terminated string with an title as refered in the Title pointer list or empty; in case it is empty, the URL is used as title
|
||||||
// parameter data see parameter len (not used) extra parameters
|
// parameter data see parameter len (not used) extra parameters
|
||||||
|
|
||||||
storeDirEntry ( clusterIdx, blobIdx, redirectTarget ) {
|
async storeDirEntry ( clusterIdx, blobIdx, redirectTarget ) {
|
||||||
if ( clusterIdx == null ) {
|
if ( clusterIdx == null ) {
|
||||||
console.error( 'storeDirEntry error: clusterIdx == null', this )
|
fatal( 'storeDirEntry error: clusterIdx == null', this )
|
||||||
pprocess.exit( 1 )
|
return
|
||||||
return Promise.resolve()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
header.articleCount++
|
header.articleCount++
|
||||||
|
@ -589,30 +584,25 @@ class Item {
|
||||||
var urlBuf = Buffer.from( this.path + '\0' )
|
var urlBuf = Buffer.from( this.path + '\0' )
|
||||||
var titleBuf = Buffer.from( this.title + '\0' )
|
var titleBuf = Buffer.from( this.title + '\0' )
|
||||||
|
|
||||||
return out.write( Buffer.concat([ buf, urlBuf, titleBuf ]))
|
this.dirEntry = await out.write( Buffer.concat([ buf, urlBuf, titleBuf ]))
|
||||||
.then( offset => {
|
log( 'storeDirEntry done', this.dirEntry, buf.length, this.path )
|
||||||
log( 'storeDirEntry done', offset, buf.length, this.path )
|
return this.saveDirEntryIndex( this.dirEntry )
|
||||||
return this.dirEntry = offset
|
|
||||||
})
|
|
||||||
.then( offset => this.saveDirEntryIndex( offset ))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
saveDirEntryIndex ( offset ) {
|
async saveDirEntryIndex ( offset ) {
|
||||||
return this.getId()
|
const id = await this.getId()
|
||||||
.then( id => {
|
try {
|
||||||
log( 'saveDirEntryIndex', id, offset, this.path )
|
log( 'saveDirEntryIndex', id, offset, this.path )
|
||||||
return indexerDb.run(
|
return await indexerDb.run(
|
||||||
'INSERT INTO dirEntries (id, offset) VALUES (?,?)',
|
'INSERT INTO dirEntries (id, offset) VALUES (?,?)',
|
||||||
[
|
[
|
||||||
id,
|
id,
|
||||||
offset,
|
offset,
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
.catch( err => {
|
} catch ( err ) {
|
||||||
console.error( 'saveDirEntryIndex error', err, this )
|
fatal( 'saveDirEntryIndex error', err, this )
|
||||||
process.exit( 1 )
|
}
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -686,9 +676,8 @@ class Redirect extends Item {
|
||||||
return this.saveRedirectIndex()
|
return this.saveRedirectIndex()
|
||||||
}
|
}
|
||||||
|
|
||||||
saveRedirectIndex () {
|
async saveRedirectIndex () {
|
||||||
return this.getId()
|
const id = await this.getId()
|
||||||
.then( id => {
|
|
||||||
return indexerDb.run(
|
return indexerDb.run(
|
||||||
'INSERT INTO redirects (id, targetKey, fragment) VALUES (?,?,?)',
|
'INSERT INTO redirects (id, targetKey, fragment) VALUES (?,?,?)',
|
||||||
[
|
[
|
||||||
|
@ -697,7 +686,6 @@ class Redirect extends Item {
|
||||||
this.target.fragment,
|
this.target.fragment,
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -743,31 +731,28 @@ class DataItem extends Item {
|
||||||
super( params )
|
super( params )
|
||||||
}
|
}
|
||||||
|
|
||||||
process () {
|
async process () {
|
||||||
//~ log( 'DataItem process', this.path )
|
//~ log( 'DataItem process', this.path )
|
||||||
return this.store()
|
try {
|
||||||
.then( () => super.process())
|
await this.store()
|
||||||
.catch( err => {
|
await super.process()
|
||||||
|
} catch ( err ) {
|
||||||
if ( err instanceof NoProcessingRequired )
|
if ( err instanceof NoProcessingRequired )
|
||||||
return
|
return
|
||||||
console.error( 'Item process error', this.path, err )
|
fatal( 'Item process error', this.path, err )
|
||||||
process.exit( 1 )
|
}
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
store () {
|
async store () {
|
||||||
return this.getData()
|
let data = await this.getData()
|
||||||
.then( data => {
|
|
||||||
if ( data == null ) {
|
if ( data == null ) {
|
||||||
console.error( 'DataItem.store error: data == null', this )
|
fatal( 'DataItem.store error: data == null', this )
|
||||||
process.exit( 1 )
|
|
||||||
}
|
}
|
||||||
if ( !( data instanceof Buffer )) {
|
if ( !( data instanceof Buffer )) {
|
||||||
data = Buffer.from( data )
|
data = Buffer.from( data )
|
||||||
}
|
}
|
||||||
return ClusterWriter.append( this.mimeType, data, this.path )
|
const [ clusterIdx, blobIdx ] = await ClusterWriter.append( this.mimeType, data, this.path )
|
||||||
.then( ([ clusterIdx, blobIdx ]) => Object.assign( this, { clusterIdx, blobIdx }))
|
Object.assign( this, { clusterIdx, blobIdx })
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
getData () {
|
getData () {
|
||||||
|
@ -812,39 +797,37 @@ class File extends DataItem {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
processJpeg ( data ) {
|
async processJpeg ( data ) {
|
||||||
if ( ! argv.optimg )
|
if ( ! argv.optimg )
|
||||||
return data
|
return data
|
||||||
this.mimeType = 'image/jpeg'
|
this.mimeType = 'image/jpeg'
|
||||||
return spawn(
|
try {
|
||||||
|
return await spawn(
|
||||||
mozjpeg,
|
mozjpeg,
|
||||||
[ '-quality', argv.jpegquality, data.length < 20000 ? '-baseline' : '-progressive' ],
|
[ '-quality', argv.jpegquality, data.length < 20000 ? '-baseline' : '-progressive' ],
|
||||||
data
|
data
|
||||||
)
|
)
|
||||||
.catch( err => {
|
} catch ( err ) {
|
||||||
log( 'Error otimizing jpeg', err, this )
|
log( 'Error otimizing jpeg', err, this )
|
||||||
return data
|
return data
|
||||||
})
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
processImage ( data ) {
|
async processImage ( data ) {
|
||||||
if ( ! argv.optimg )
|
if ( ! argv.optimg )
|
||||||
return data
|
return data
|
||||||
return Promise.coroutine( function* () {
|
try {
|
||||||
|
|
||||||
const image = sharp( data )
|
const image = sharp( data )
|
||||||
|
const metadata = await image.metadata()
|
||||||
const metadata = yield image.metadata()
|
if ( metadata.format == 'gif' && isAnimated( data )) {
|
||||||
|
|
||||||
if ( metadata.format == 'gif' && isAnimated( data ))
|
|
||||||
return data
|
return data
|
||||||
|
}
|
||||||
if ( metadata.hasAlpha && metadata.channels == 1 ) {
|
if ( metadata.hasAlpha && metadata.channels == 1 ) {
|
||||||
log( 'metadata.channels == 1', this.path )
|
log( 'metadata.channels == 1', this.path )
|
||||||
} else if ( metadata.hasAlpha && metadata.channels > 1 ) {
|
} else if ( metadata.hasAlpha && metadata.channels > 1 ) {
|
||||||
if ( data.length > 20000 ) {
|
if ( data.length > 20000 ) {
|
||||||
// Is this rather opaque?
|
// Is this rather opaque?
|
||||||
const alpha = yield image
|
const alpha = await image
|
||||||
.clone()
|
.clone()
|
||||||
.extractChannel( metadata.channels - 1 )
|
.extractChannel( metadata.channels - 1 )
|
||||||
.raw()
|
.raw()
|
||||||
|
@ -856,7 +839,7 @@ class File extends DataItem {
|
||||||
if ( isOpaque ) { // convert to JPEG
|
if ( isOpaque ) { // convert to JPEG
|
||||||
log( 'isOpaque', this.path )
|
log( 'isOpaque', this.path )
|
||||||
if ( metadata.format == 'gif' )
|
if ( metadata.format == 'gif' )
|
||||||
data = yield image.toBuffer()
|
data = await image.toBuffer()
|
||||||
return this.processJpeg ( data )
|
return this.processJpeg ( data )
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -864,11 +847,10 @@ class File extends DataItem {
|
||||||
if ( metadata.format == 'gif' )
|
if ( metadata.format == 'gif' )
|
||||||
return data
|
return data
|
||||||
return image.toBuffer()
|
return image.toBuffer()
|
||||||
}).call( this )
|
} catch ( err ) {
|
||||||
.catch( err => {
|
|
||||||
log( 'Error otimizing image', err, this )
|
log( 'Error otimizing image', err, this )
|
||||||
return data
|
return data
|
||||||
})
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -890,18 +872,16 @@ class RawFile extends File {
|
||||||
return fullPath( this.path )
|
return fullPath( this.path )
|
||||||
}
|
}
|
||||||
|
|
||||||
preProcess ( data ) {
|
async preProcess ( data ) {
|
||||||
return Promise.coroutine( function* () {
|
|
||||||
if ( ! this.mimeType ) {
|
if ( ! this.mimeType ) {
|
||||||
this.mimeType = yield mimeFromData( data )
|
this.mimeType = await mimeFromData( data )
|
||||||
this.nameSpace = this.nameSpace || getNameSpace( this.mimeType )
|
this.nameSpace = this.nameSpace || getNameSpace( this.mimeType )
|
||||||
}
|
}
|
||||||
if ( argv.inflateHtml && this.mimeType == 'text/html' )
|
if ( argv.inflateHtml && this.mimeType == 'text/html' ) {
|
||||||
data = yield zlib.inflate( data ) // inflateData
|
data = await zlib.inflate( data ) // inflateData
|
||||||
return data
|
}
|
||||||
}).call( this )
|
await this.preProcessHtml( data )
|
||||||
.then( data => this.preProcessHtml( data ))
|
return super.preProcess( data )
|
||||||
.then( data => super.preProcess( data ))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
preProcessHtml ( data ) {
|
preProcessHtml ( data ) {
|
||||||
|
@ -1018,7 +998,7 @@ class RawFile extends File {
|
||||||
//
|
//
|
||||||
// Favicon a favicon (48x48) is also mandatory and should be located at /-/favicon
|
// Favicon a favicon (48x48) is also mandatory and should be located at /-/favicon
|
||||||
|
|
||||||
function loadMetadata () {
|
function fillInMetadata () {
|
||||||
const outParsed = osPath.parse( outPath )
|
const outParsed = osPath.parse( outPath )
|
||||||
const metadata = [
|
const metadata = [
|
||||||
[ 'Name', outParsed.base ],
|
[ 'Name', outParsed.base ],
|
||||||
|
@ -1066,11 +1046,8 @@ function loadMetadata () {
|
||||||
return Promise.all( done )
|
return Promise.all( done )
|
||||||
}
|
}
|
||||||
|
|
||||||
function openMetadata( dbName ) {
|
async function openMetadata( dbName ) {
|
||||||
return Promise.resolve()
|
indexerDb = await sqlite.open( dbName )
|
||||||
.then( () => sqlite.open( dbName ))
|
|
||||||
.then( db => {
|
|
||||||
indexerDb = db
|
|
||||||
return indexerDb.exec(`
|
return indexerDb.exec(`
|
||||||
PRAGMA synchronous = OFF;
|
PRAGMA synchronous = OFF;
|
||||||
PRAGMA journal_mode = WAL;
|
PRAGMA journal_mode = WAL;
|
||||||
|
@ -1093,20 +1070,18 @@ function openMetadata( dbName ) {
|
||||||
`
|
`
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
function newMetadata() {
|
async function newMetadata() {
|
||||||
var dbName = ''
|
var dbName = ''
|
||||||
if ( argv.verbose ) {
|
if ( argv.verbose ) {
|
||||||
var parsed = osPath.parse( outPath )
|
var parsed = osPath.parse( outPath )
|
||||||
dbName = osPath.join( parsed.dir, parsed.base + '.db' )
|
dbName = osPath.join( parsed.dir, parsed.base + '.db' )
|
||||||
}
|
}
|
||||||
return fs.unlink( dbName )
|
try {
|
||||||
.catch( () => null )
|
await fs.unlink( dbName )
|
||||||
.then( () => sqlite.open( dbName ))
|
} catch ( err ) {
|
||||||
.then( db => {
|
}
|
||||||
indexerDb = db
|
indexerDb = await sqlite.open( dbName )
|
||||||
return indexerDb.exec(
|
return indexerDb.exec(
|
||||||
'PRAGMA synchronous = OFF;' +
|
'PRAGMA synchronous = OFF;' +
|
||||||
'PRAGMA journal_mode = OFF;' +
|
'PRAGMA journal_mode = OFF;' +
|
||||||
|
@ -1135,8 +1110,6 @@ function newMetadata() {
|
||||||
');'
|
');'
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
function sortArticles () {
|
function sortArticles () {
|
||||||
return indexerDb.exec(`
|
return indexerDb.exec(`
|
||||||
|
@ -1156,14 +1129,14 @@ function sortArticles () {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
function loadRedirects () {
|
async function loadRedirects () {
|
||||||
var redirectsFile
|
var redirectsFile
|
||||||
if ( preProcessed )
|
if ( preProcessed )
|
||||||
redirectsFile = osPath.join( srcPath, 'redirects.csv' )
|
redirectsFile = osPath.join( srcPath, 'redirects.csv' )
|
||||||
else if ( argv.redirects )
|
else if ( argv.redirects )
|
||||||
redirectsFile = expandHomeDir( argv.redirects )
|
redirectsFile = expandHomeDir( argv.redirects )
|
||||||
else
|
else
|
||||||
return Promise.resolve()
|
return
|
||||||
|
|
||||||
const getRow = cvsReader( redirectsFile, {
|
const getRow = cvsReader( redirectsFile, {
|
||||||
columns:[ 'nameSpace', 'path', 'title', 'to' ],
|
columns:[ 'nameSpace', 'path', 'title', 'to' ],
|
||||||
|
@ -1171,24 +1144,15 @@ function loadRedirects () {
|
||||||
relax_column_count: true
|
relax_column_count: true
|
||||||
})
|
})
|
||||||
|
|
||||||
return Promise.coroutine( function* () {
|
let row
|
||||||
while ( true ) {
|
while ( row = await getRow() ) {
|
||||||
const row = yield getRow()
|
|
||||||
log( 'loadRedirects', row )
|
log( 'loadRedirects', row )
|
||||||
if ( row ) {
|
await new Redirect( row ).process()
|
||||||
yield new Redirect( row )
|
|
||||||
.process()
|
|
||||||
} else {
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}) ()
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
function resolveRedirects () {
|
async function resolveRedirects () {
|
||||||
return Promise.coroutine( function* () {
|
var stmt = await indexerDb.prepare( `
|
||||||
var stmt = yield indexerDb.prepare( `
|
|
||||||
SELECT
|
SELECT
|
||||||
src.id AS id,
|
src.id AS id,
|
||||||
src.urlKey AS urlKey,
|
src.urlKey AS urlKey,
|
||||||
|
@ -1217,52 +1181,41 @@ function resolveRedirects () {
|
||||||
USING (id)
|
USING (id)
|
||||||
WHERE targetId IS NOT NULL
|
WHERE targetId IS NOT NULL
|
||||||
;`)
|
;`)
|
||||||
while ( true ) {
|
let row
|
||||||
const row = yield stmt.get()
|
while ( row = await stmt.get() ) {
|
||||||
if ( ! row ) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
var nameSpace = row.urlKey[ 0 ]
|
var nameSpace = row.urlKey[ 0 ]
|
||||||
var path = row.urlKey.substr( 1 )
|
var path = row.urlKey.substr( 1 )
|
||||||
var title = ( row.titleKey == row.urlKey ) ? '' : row.titleKey.substr( 1 )
|
var title = ( row.titleKey == row.urlKey ) ? '' : row.titleKey.substr( 1 )
|
||||||
var target = row.targetRow - 1
|
var target = row.targetRow - 1
|
||||||
if ( path == 'mainpage' ) {
|
|
||||||
mainPage.target = target
|
|
||||||
}
|
|
||||||
|
|
||||||
yield new ResolvedRedirect ( row.id, nameSpace, path, title, target, row.revision )
|
await new ResolvedRedirect ( row.id, nameSpace, path, title, target, row.revision )
|
||||||
.process()
|
.process()
|
||||||
}
|
}
|
||||||
yield stmt.finalize()
|
return stmt.finalize()
|
||||||
}) ()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function saveIndex ( query, byteLength, rowField, count, logInfo ) {
|
async function saveIndex ( params ) {
|
||||||
logInfo = logInfo || 'saveIndex'
|
const logInfo = params.logInfo || 'saveIndex'
|
||||||
log( logInfo, 'start', count )
|
log( logInfo, 'start', params.count )
|
||||||
|
|
||||||
return Promise.coroutine( function* () {
|
|
||||||
var startOffset
|
var startOffset
|
||||||
|
var stmt = await indexerDb.prepare( params.query )
|
||||||
var i = 0
|
var i = 0
|
||||||
var stmt = yield indexerDb.prepare( query )
|
for ( let row; row = await stmt.get(); i++ ) {
|
||||||
while ( true ) {
|
|
||||||
const row = yield stmt.get()
|
|
||||||
if ( ! row )
|
|
||||||
break
|
|
||||||
log( logInfo, i, row )
|
log( logInfo, i, row )
|
||||||
i++
|
if ( params.rowCb )
|
||||||
var buf = Buffer.allocUnsafe( byteLength )
|
params.rowCb( row, i )
|
||||||
writeUIntLE( buf, row[ rowField ], 0, byteLength )
|
var buf = Buffer.allocUnsafe( params.byteLength )
|
||||||
|
writeUIntLE( buf, row[ params.rowField ], 0, params.byteLength )
|
||||||
|
|
||||||
var offset = yield out.write( buf )
|
var offset = await out.write( buf )
|
||||||
if ( ! startOffset )
|
if ( ! startOffset )
|
||||||
startOffset = offset
|
startOffset = offset
|
||||||
}
|
}
|
||||||
yield stmt.finalize()
|
await stmt.finalize()
|
||||||
|
|
||||||
log( logInfo, 'done', i, count, startOffset )
|
log( logInfo, 'done', i, params.count, startOffset )
|
||||||
return Promise.resolve( startOffset )
|
return startOffset
|
||||||
}) ()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// URL Pointer List (urlPtrPos)
|
// URL Pointer List (urlPtrPos)
|
||||||
|
@ -1278,21 +1231,28 @@ function saveIndex ( query, byteLength, rowField, count, logInfo ) {
|
||||||
// <nth URL> integer (n-1)*8 8 pointer to the directory entry of <nth URL>
|
// <nth URL> integer (n-1)*8 8 pointer to the directory entry of <nth URL>
|
||||||
// ... integer ... 8 ...
|
// ... integer ... 8 ...
|
||||||
|
|
||||||
function storeUrlIndex () {
|
async function storeUrlIndex () {
|
||||||
return saveIndex (`
|
header.urlPtrPos = await saveIndex ({
|
||||||
SELECT
|
query:
|
||||||
urlSorted.rowid,
|
'SELECT ' +
|
||||||
id,
|
'urlSorted.rowid, ' +
|
||||||
urlKey,
|
'id, ' +
|
||||||
offset
|
'urlKey, ' +
|
||||||
FROM urlSorted
|
'offset ' +
|
||||||
LEFT OUTER JOIN dirEntries
|
'FROM urlSorted ' +
|
||||||
USING (id)
|
'LEFT OUTER JOIN dirEntries ' +
|
||||||
ORDER BY urlSorted.rowid
|
'USING (id) ' +
|
||||||
;`,
|
'ORDER BY urlSorted.rowid ' +
|
||||||
8, 'offset', header.articleCount, 'storeUrlIndex'
|
';',
|
||||||
)
|
byteLength: 8,
|
||||||
.then( offset => header.urlPtrPos = offset )
|
rowField: 'offset',
|
||||||
|
count: header.articleCount,
|
||||||
|
logInfo: 'storeUrlIndex',
|
||||||
|
rowCb: (row, index) => {
|
||||||
|
if ( row.urlKey == mainPage.urlKey )
|
||||||
|
mainPage.index = index
|
||||||
|
}
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Title Pointer List (titlePtrPos)
|
// Title Pointer List (titlePtrPos)
|
||||||
|
@ -1304,8 +1264,9 @@ function storeUrlIndex () {
|
||||||
// <nth Title> integer (n-1)*4 4 pointer to the URL pointer of <nth Title>
|
// <nth Title> integer (n-1)*4 4 pointer to the URL pointer of <nth Title>
|
||||||
// ... integer ... 4 ...
|
// ... integer ... 4 ...
|
||||||
|
|
||||||
function storeTitleIndex () {
|
async function storeTitleIndex () {
|
||||||
return saveIndex (
|
header.titlePtrPos = await saveIndex ({
|
||||||
|
query:
|
||||||
'SELECT ' +
|
'SELECT ' +
|
||||||
'titleKey, ' +
|
'titleKey, ' +
|
||||||
'urlSorted.rowid - 1 AS articleNumber ' +
|
'urlSorted.rowid - 1 AS articleNumber ' +
|
||||||
|
@ -1314,9 +1275,11 @@ function storeTitleIndex () {
|
||||||
'USING (id) ' +
|
'USING (id) ' +
|
||||||
'ORDER BY titleKey ' +
|
'ORDER BY titleKey ' +
|
||||||
';',
|
';',
|
||||||
4, 'articleNumber', header.articleCount, 'storeTitleIndex'
|
byteLength: 4,
|
||||||
)
|
rowField: 'articleNumber',
|
||||||
.then( offset => header.titlePtrPos = offset )
|
count: header.articleCount,
|
||||||
|
logInfo: 'storeTitleIndex',
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// MIME Type List (mimeListPos)
|
// MIME Type List (mimeListPos)
|
||||||
|
@ -1335,14 +1298,13 @@ function getMimeTypes () {
|
||||||
log( 'MimeTypes', mimeTypeList.length, buf.length )
|
log( 'MimeTypes', mimeTypeList.length, buf.length )
|
||||||
|
|
||||||
if ( buf.length > maxMimeLength ) {
|
if ( buf.length > maxMimeLength ) {
|
||||||
console.error( 'Error: mime type list length >', maxMimeLength )
|
fatal( 'Error: mime type list length >', maxMimeLength )
|
||||||
process.exit( 1 )
|
|
||||||
}
|
}
|
||||||
return buf
|
return buf
|
||||||
}
|
}
|
||||||
|
|
||||||
function getHeader () {
|
function getHeader () {
|
||||||
header.mainPage = mainPage.target || header.mainPage
|
header.mainPage = mainPage.index || header.mainPage
|
||||||
|
|
||||||
//~ log( 'Header', 'articleCount', header.articleCount, 'clusterCount', header.clusterCount, 'mainPage', mainPage )
|
//~ log( 'Header', 'articleCount', header.articleCount, 'clusterCount', header.clusterCount, 'mainPage', mainPage )
|
||||||
log( 'Header', header )
|
log( 'Header', header )
|
||||||
|
@ -1370,68 +1332,56 @@ function getHeader () {
|
||||||
return buf
|
return buf
|
||||||
}
|
}
|
||||||
|
|
||||||
function storeHeader() {
|
async function storeHeader() {
|
||||||
var buf = Buffer.concat([ getHeader(), getMimeTypes() ])
|
var buf = Buffer.concat([ getHeader(), getMimeTypes() ])
|
||||||
var fd = fs.openSync( outPath, 'r+' )
|
var fd = await fs.open( outPath, 'r+' )
|
||||||
fs.writeSync( fd, buf, 0, buf.length, 0 )
|
await fs.writeSync( fd, buf, 0, buf.length, 0 )
|
||||||
fs.closeSync( fd )
|
return fs.close( fd )
|
||||||
return Promise.resolve()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function calculateFileHash () {
|
function calculateFileHash () {
|
||||||
var outHash
|
var outHash
|
||||||
var hash = crypto.createHash( 'md5' )
|
var hash = crypto.createHash( 'md5' )
|
||||||
var stream = fs.createReadStream( outPath )
|
var stream = fs.createReadStream( outPath )
|
||||||
var resolve
|
|
||||||
|
|
||||||
stream.on( 'data', data => hash.update( data ))
|
stream.on( 'data', data => hash.update( data ))
|
||||||
stream.on( 'end', () => {
|
|
||||||
outHash = hash.digest()
|
|
||||||
log( 'outHash', outHash )
|
|
||||||
fs.appendFileSync( outPath, outHash )
|
|
||||||
resolve()
|
|
||||||
})
|
|
||||||
|
|
||||||
return new Promise( r => resolve = r )
|
return new Promise( (resolve, reject ) => stream.on( 'end', async () => {
|
||||||
|
outHash = hash.digest()
|
||||||
|
await fs.appendFile( outPath, outHash )
|
||||||
|
log( 'outHash', outHash )
|
||||||
|
resolve()
|
||||||
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
function initialise () {
|
async function initialise () {
|
||||||
var stat = fs.statSync( srcPath ) // check source
|
var stat = await fs.stat( srcPath )
|
||||||
if ( ! stat.isDirectory() ) {
|
if ( ! stat.isDirectory() ) {
|
||||||
return Promise.reject( new Error( srcPath + ' is not a directory' ))
|
throw new Error( srcPath + ' is not a directory' )
|
||||||
}
|
}
|
||||||
|
|
||||||
out = new Writer( outPath ); // create output file
|
out = new Writer( outPath ); // create output file
|
||||||
log( 'reserving space for header and mime type list' )
|
log( 'reserving space for header and mime type list' )
|
||||||
out.write( Buffer.alloc( headerLength + maxMimeLength ))
|
await out.write( Buffer.alloc( headerLength + maxMimeLength ))
|
||||||
|
|
||||||
var metadata = osPath.join( srcPath, 'metadata.db' )
|
var metadata = osPath.join( srcPath, 'metadata.db' )
|
||||||
if ( fs.existsSync( metadata )) {
|
if ( await fs.exists( metadata )) {
|
||||||
preProcessed = true
|
preProcessed = true
|
||||||
return openMetadata( metadata )
|
try {
|
||||||
.then( () => loadMimeTypes())
|
mainPage.urlKey = await fs.readFile( osPath.join( srcPath, 'mainpage' ))
|
||||||
|
} catch ( err ) {
|
||||||
|
}
|
||||||
|
await openMetadata( metadata )
|
||||||
|
return loadMimeTypes()
|
||||||
|
} else {
|
||||||
|
await newMetadata()
|
||||||
|
return fillInMetadata()
|
||||||
}
|
}
|
||||||
return newMetadata()
|
|
||||||
.then( () => loadMetadata())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function rawLoader () {
|
async function rawLoader () {
|
||||||
const dirs = [ '' ]
|
const dirs = [ '' ]
|
||||||
|
|
||||||
function scanDirectories ( path ) {
|
|
||||||
return Promise.coroutine( function* () {
|
|
||||||
for ( let path; ( path = dirs.shift()) != null; ) {
|
|
||||||
log( 'scanDirectory', path )
|
|
||||||
|
|
||||||
yield Promise.map(
|
|
||||||
fs.readdir( fullPath( path )),
|
|
||||||
fname => parseDirEntry( osPath.join( path, fname )),
|
|
||||||
{ concurrency: 8 }
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}) ()
|
|
||||||
}
|
|
||||||
|
|
||||||
function parseDirEntry ( path ) {
|
function parseDirEntry ( path ) {
|
||||||
if ( path == 'metadata.csv' || path == 'redirects.csv' )
|
if ( path == 'metadata.csv' || path == 'redirects.csv' )
|
||||||
return Promise.resolve()
|
return Promise.resolve()
|
||||||
|
@ -1448,13 +1398,22 @@ function rawLoader () {
|
||||||
}
|
}
|
||||||
|
|
||||||
log( 'rawLoader start' )
|
log( 'rawLoader start' )
|
||||||
return scanDirectories()
|
// scan Directories
|
||||||
.then( () => log( 'rawLoader finished !!!!!!!!!' ))
|
for ( let path; ( path = dirs.shift()) != null; ) {
|
||||||
|
log( 'scanDirectory', path )
|
||||||
|
|
||||||
|
await Promise.map(
|
||||||
|
fs.readdir( fullPath( path )),
|
||||||
|
fname => parseDirEntry( osPath.join( path, fname )),
|
||||||
|
{ concurrency: 8 }
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
function loadPreProcessedArticles () {
|
log( 'rawLoader finished !!!!!!!!!' )
|
||||||
return Promise.coroutine( function* () {
|
}
|
||||||
var stmt = yield indexerDb.prepare( `
|
|
||||||
|
async function loadPreProcessedArticles () {
|
||||||
|
var stmt = await indexerDb.prepare( `
|
||||||
SELECT
|
SELECT
|
||||||
id ,
|
id ,
|
||||||
mimeId ,
|
mimeId ,
|
||||||
|
@ -1465,14 +1424,14 @@ function loadPreProcessedArticles () {
|
||||||
WHERE mimeId IS NOT 0xffff
|
WHERE mimeId IS NOT 0xffff
|
||||||
;`)
|
;`)
|
||||||
while ( true ) {
|
while ( true ) {
|
||||||
const row = yield stmt.get()
|
const row = await stmt.get()
|
||||||
if ( ! row ) {
|
if ( ! row ) {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
var nameSpace = row.urlKey[ 0 ]
|
var nameSpace = row.urlKey[ 0 ]
|
||||||
var path = row.urlKey.substr( 1 )
|
var path = row.urlKey.substr( 1 )
|
||||||
var title = ( row.titleKey == row.urlKey ) ? '' : row.titleKey.substr( 1 )
|
var title = ( row.titleKey == row.urlKey ) ? '' : row.titleKey.substr( 1 )
|
||||||
yield new File( {
|
await new File( {
|
||||||
nameSpace,
|
nameSpace,
|
||||||
path,
|
path,
|
||||||
title,
|
title,
|
||||||
|
@ -1482,13 +1441,11 @@ function loadPreProcessedArticles () {
|
||||||
} )
|
} )
|
||||||
.process()
|
.process()
|
||||||
}
|
}
|
||||||
yield stmt.finalize()
|
return stmt.finalize()
|
||||||
}) ()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function loadMimeTypes () {
|
async function loadMimeTypes () {
|
||||||
return Promise.coroutine( function * () {
|
var stmt = await indexerDb.prepare( `
|
||||||
var stmt = yield indexerDb.prepare( `
|
|
||||||
SELECT
|
SELECT
|
||||||
id ,
|
id ,
|
||||||
value
|
value
|
||||||
|
@ -1496,48 +1453,40 @@ function loadMimeTypes () {
|
||||||
ORDER BY id
|
ORDER BY id
|
||||||
;`)
|
;`)
|
||||||
while ( true ) {
|
while ( true ) {
|
||||||
const row = yield stmt.get()
|
const row = await stmt.get()
|
||||||
if ( ! row ) {
|
if ( ! row ) {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
mimeTypeList.push( row.value )
|
mimeTypeList.push( row.value )
|
||||||
}
|
}
|
||||||
yield stmt.finalize()
|
return stmt.finalize()
|
||||||
}) ()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function loadRawArticles () {
|
async function loadRawArticles () {
|
||||||
return Promise.resolve()
|
await rawLoader()
|
||||||
.then( () => rawLoader())
|
return loadRedirects()
|
||||||
.then( () => loadRedirects())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function postProcess () {
|
async function postProcess () {
|
||||||
return Promise.coroutine( function* () {
|
await ClusterWriter.finish()
|
||||||
yield ClusterWriter.finish()
|
await sortArticles()
|
||||||
yield sortArticles()
|
await resolveRedirects()
|
||||||
yield resolveRedirects()
|
await storeUrlIndex()
|
||||||
yield storeUrlIndex()
|
return storeTitleIndex()
|
||||||
yield storeTitleIndex()
|
|
||||||
}) ()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function finalise () {
|
async function finalise () {
|
||||||
return Promise.coroutine( function* () {
|
header.checksumPos = await out.close() // close the output stream
|
||||||
header.checksumPos = yield out.close() // close the output stream
|
await indexerDb.close()
|
||||||
yield indexerDb.close()
|
await storeHeader()
|
||||||
yield storeHeader()
|
return calculateFileHash()
|
||||||
yield calculateFileHash()
|
|
||||||
}) ()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function core () {
|
async function core () {
|
||||||
return Promise.coroutine( function* () {
|
await initialise()
|
||||||
yield initialise()
|
await ( preProcessed ? loadPreProcessedArticles() : loadRawArticles() )
|
||||||
yield preProcessed ? loadPreProcessedArticles() : loadRawArticles()
|
await postProcess()
|
||||||
yield postProcess()
|
await finalise()
|
||||||
yield finalise()
|
|
||||||
}) ()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mandatory arguments:
|
// Mandatory arguments:
|
||||||
|
@ -1607,12 +1556,8 @@ function main () {
|
||||||
outPath = parsed.base + '.zim'
|
outPath = parsed.base + '.zim'
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( argv.minChunkSize ) {
|
//~ if ( argv.minChunkSize ) {
|
||||||
ClusterSizeThreshold = argv.minChunkSize * 1024
|
//~ ClusterSizeThreshold = argv.minChunkSize * 1024
|
||||||
}
|
|
||||||
|
|
||||||
//~ mainPage = {
|
|
||||||
//~ title: argv.welcome
|
|
||||||
//~ }
|
//~ }
|
||||||
|
|
||||||
core ()
|
core ()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user