more async/await

This commit is contained in:
v 2018-11-28 18:48:28 +03:00
parent f8768360ee
commit 036f12441b

477
zimmer.js
View File

@ -317,10 +317,10 @@ class Writer {
// Cluster // Cluster
// //
// ClusterSizeThreshold = 8 * 1024 * 1024 // var ClusterSizeThreshold = 8 * 1024 * 1024
var ClusterSizeThreshold = 4 * 1024 * 1024 //~ var ClusterSizeThreshold = 4 * 1024 * 1024
// ClusterSizeThreshold = 2 * 1024 * 1024 var ClusterSizeThreshold = 1 * 1024 * 1024
// ClusterSizeThreshold = 2 * 1024 // var ClusterSizeThreshold = 2 * 1024 * 1024
class Cluster { class Cluster {
constructor ( compressible ) { constructor ( compressible ) {
@ -354,7 +354,7 @@ class Cluster {
// <2nd Blob> data n/a n/a data of the <2nd Blob> // <2nd Blob> data n/a n/a data of the <2nd Blob>
// ... data ... n/a ... // ... data ... n/a ...
save () { async save () {
//~ log( 'Cluster.prototype.save', this.compressible, this.blobs ) //~ log( 'Cluster.prototype.save', this.compressible, this.blobs )
var nBlobs = this.blobs.length var nBlobs = this.blobs.length
@ -379,15 +379,14 @@ class Cluster {
var compression = this.compressible ? 4 : 0 var compression = this.compressible ? 4 : 0
var id = this.id var id = this.id
return Promise.coroutine( function* () {
if ( compression ) { if ( compression ) {
// https://tukaani.org/lzma/benchmarks.html // https://tukaani.org/lzma/benchmarks.html
data = yield lzma.compress( data, 7 ) // 3 | lzma.PRESET_EXTREME ) data = await lzma.compress( data, 7 ) // 3 | lzma.PRESET_EXTREME )
log( 'Cluster lzma compressed' ) log( 'Cluster lzma compressed' )
} }
log( 'Cluster write', id, compression ) log( 'Cluster write', id, compression )
const offset = yield out.write( Buffer.concat([ Buffer.from([ compression ]), data ])) const offset = await out.write( Buffer.concat([ Buffer.from([ compression ]), data ]))
log( 'Cluster saved', id, offset ) log( 'Cluster saved', id, offset )
return indexerDb.run( return indexerDb.run(
@ -397,7 +396,6 @@ class Cluster {
offset offset
] ]
) )
}) ()
} }
} }
@ -415,7 +413,7 @@ var ClusterWriter = {
{ max: 8, } { max: 8, }
), ),
append: function ( mimeType, data, id /* for debugging */ ) { append: async function ( mimeType, data, id /* for debugging */ ) {
//~ log( 'ClusterWriter.append', arguments ) //~ log( 'ClusterWriter.append', arguments )
var compressible = ClusterWriter.isCompressible( mimeType, data, id ) var compressible = ClusterWriter.isCompressible( mimeType, data, id )
@ -425,14 +423,12 @@ var ClusterWriter = {
if ( blobNum === false ) { // store to a new cluster if ( blobNum === false ) { // store to a new cluster
ClusterWriter[ compressible ] = new Cluster( compressible ) ClusterWriter[ compressible ] = new Cluster( compressible )
const ready = ClusterWriter.pool.acquire() const token = await ClusterWriter.pool.acquire()
ready.then( token => cluster.save() cluster.save()
.then( () => ClusterWriter.pool.release( token )) .then( () => ClusterWriter.pool.release( token ))
)
return ready return ClusterWriter.append( mimeType, data, id )
.then( () => ClusterWriter.append( mimeType, data, id ))
} }
log( 'ClusterWriter.append', compressible, clusterNum, blobNum, data.length, id ) log( 'ClusterWriter.append', compressible, clusterNum, blobNum, data.length, id )
@ -458,28 +454,29 @@ var ClusterWriter = {
// <nth Cluster> integer (n-1)*8 8 pointer to the <nth Cluster> // <nth Cluster> integer (n-1)*8 8 pointer to the <nth Cluster>
// ... integer ... 8 ... // ... integer ... 8 ...
storeIndex: function () { storeIndex: async function () {
return saveIndex ( return saveIndex ({
` query:
SELECT 'SELECT ' +
offset 'offset ' +
FROM clusters 'FROM clusters ' +
ORDER BY id 'ORDER BY id ' +
; ';',
`, byteLength: 8,
8, 'offset', header.clusterCount, 'storeClusterIndex' rowField: 'offset',
) count: header.clusterCount,
logInfo: 'storeClusterIndex',
})
.then( offset => header.clusterPtrPos = offset ) .then( offset => header.clusterPtrPos = offset )
}, },
finish: function () { finish: async function () {
//~ log( 'ClusterWriter.finish', ClusterWriter ) //~ log( 'ClusterWriter.finish', ClusterWriter )
await ClusterWriter[ true ].save() // save last compressible cluster
return ClusterWriter[ true ].save() // save last compressible cluster await ClusterWriter[ false ].save() // save last uncompressible cluster
.then( () => ClusterWriter[ false ].save()) // save last uncompressible cluster await ClusterWriter.pool.drain()
.then( () => ClusterWriter.pool.drain()) await ClusterWriter.pool.clear()
.then( () => ClusterWriter.pool.clear()) return ClusterWriter.storeIndex()
.then( () => ClusterWriter.storeIndex())
}, },
} }
@ -530,8 +527,7 @@ class Item {
saveItemIndex () { saveItemIndex () {
if ( ! this.path ) { if ( ! this.path ) {
console.trace( 'Item no url', this ) fatal( 'Item no url', this )
process.exit( 1 )
} }
const row = [ const row = [
@ -564,11 +560,10 @@ class Item {
// title string n/a zero terminated string with an title as refered in the Title pointer list or empty; in case it is empty, the URL is used as title // title string n/a zero terminated string with an title as refered in the Title pointer list or empty; in case it is empty, the URL is used as title
// parameter data see parameter len (not used) extra parameters // parameter data see parameter len (not used) extra parameters
storeDirEntry ( clusterIdx, blobIdx, redirectTarget ) { async storeDirEntry ( clusterIdx, blobIdx, redirectTarget ) {
if ( clusterIdx == null ) { if ( clusterIdx == null ) {
console.error( 'storeDirEntry error: clusterIdx == null', this ) fatal( 'storeDirEntry error: clusterIdx == null', this )
pprocess.exit( 1 ) return
return Promise.resolve()
} }
header.articleCount++ header.articleCount++
@ -589,30 +584,25 @@ class Item {
var urlBuf = Buffer.from( this.path + '\0' ) var urlBuf = Buffer.from( this.path + '\0' )
var titleBuf = Buffer.from( this.title + '\0' ) var titleBuf = Buffer.from( this.title + '\0' )
return out.write( Buffer.concat([ buf, urlBuf, titleBuf ])) this.dirEntry = await out.write( Buffer.concat([ buf, urlBuf, titleBuf ]))
.then( offset => { log( 'storeDirEntry done', this.dirEntry, buf.length, this.path )
log( 'storeDirEntry done', offset, buf.length, this.path ) return this.saveDirEntryIndex( this.dirEntry )
return this.dirEntry = offset
})
.then( offset => this.saveDirEntryIndex( offset ))
} }
saveDirEntryIndex ( offset ) { async saveDirEntryIndex ( offset ) {
return this.getId() const id = await this.getId()
.then( id => { try {
log( 'saveDirEntryIndex', id, offset, this.path ) log( 'saveDirEntryIndex', id, offset, this.path )
return indexerDb.run( return await indexerDb.run(
'INSERT INTO dirEntries (id, offset) VALUES (?,?)', 'INSERT INTO dirEntries (id, offset) VALUES (?,?)',
[ [
id, id,
offset, offset,
] ]
) )
.catch( err => { } catch ( err ) {
console.error( 'saveDirEntryIndex error', err, this ) fatal( 'saveDirEntryIndex error', err, this )
process.exit( 1 ) }
})
})
} }
} }
@ -686,9 +676,8 @@ class Redirect extends Item {
return this.saveRedirectIndex() return this.saveRedirectIndex()
} }
saveRedirectIndex () { async saveRedirectIndex () {
return this.getId() const id = await this.getId()
.then( id => {
return indexerDb.run( return indexerDb.run(
'INSERT INTO redirects (id, targetKey, fragment) VALUES (?,?,?)', 'INSERT INTO redirects (id, targetKey, fragment) VALUES (?,?,?)',
[ [
@ -697,7 +686,6 @@ class Redirect extends Item {
this.target.fragment, this.target.fragment,
] ]
) )
})
} }
} }
@ -743,31 +731,28 @@ class DataItem extends Item {
super( params ) super( params )
} }
process () { async process () {
//~ log( 'DataItem process', this.path ) //~ log( 'DataItem process', this.path )
return this.store() try {
.then( () => super.process()) await this.store()
.catch( err => { await super.process()
} catch ( err ) {
if ( err instanceof NoProcessingRequired ) if ( err instanceof NoProcessingRequired )
return return
console.error( 'Item process error', this.path, err ) fatal( 'Item process error', this.path, err )
process.exit( 1 ) }
})
} }
store () { async store () {
return this.getData() let data = await this.getData()
.then( data => {
if ( data == null ) { if ( data == null ) {
console.error( 'DataItem.store error: data == null', this ) fatal( 'DataItem.store error: data == null', this )
process.exit( 1 )
} }
if ( !( data instanceof Buffer )) { if ( !( data instanceof Buffer )) {
data = Buffer.from( data ) data = Buffer.from( data )
} }
return ClusterWriter.append( this.mimeType, data, this.path ) const [ clusterIdx, blobIdx ] = await ClusterWriter.append( this.mimeType, data, this.path )
.then( ([ clusterIdx, blobIdx ]) => Object.assign( this, { clusterIdx, blobIdx })) Object.assign( this, { clusterIdx, blobIdx })
})
} }
getData () { getData () {
@ -812,39 +797,37 @@ class File extends DataItem {
} }
} }
processJpeg ( data ) { async processJpeg ( data ) {
if ( ! argv.optimg ) if ( ! argv.optimg )
return data return data
this.mimeType = 'image/jpeg' this.mimeType = 'image/jpeg'
return spawn( try {
return await spawn(
mozjpeg, mozjpeg,
[ '-quality', argv.jpegquality, data.length < 20000 ? '-baseline' : '-progressive' ], [ '-quality', argv.jpegquality, data.length < 20000 ? '-baseline' : '-progressive' ],
data data
) )
.catch( err => { } catch ( err ) {
log( 'Error otimizing jpeg', err, this ) log( 'Error otimizing jpeg', err, this )
return data return data
}) }
} }
processImage ( data ) { async processImage ( data ) {
if ( ! argv.optimg ) if ( ! argv.optimg )
return data return data
return Promise.coroutine( function* () { try {
const image = sharp( data ) const image = sharp( data )
const metadata = await image.metadata()
const metadata = yield image.metadata() if ( metadata.format == 'gif' && isAnimated( data )) {
if ( metadata.format == 'gif' && isAnimated( data ))
return data return data
}
if ( metadata.hasAlpha && metadata.channels == 1 ) { if ( metadata.hasAlpha && metadata.channels == 1 ) {
log( 'metadata.channels == 1', this.path ) log( 'metadata.channels == 1', this.path )
} else if ( metadata.hasAlpha && metadata.channels > 1 ) { } else if ( metadata.hasAlpha && metadata.channels > 1 ) {
if ( data.length > 20000 ) { if ( data.length > 20000 ) {
// Is this rather opaque? // Is this rather opaque?
const alpha = yield image const alpha = await image
.clone() .clone()
.extractChannel( metadata.channels - 1 ) .extractChannel( metadata.channels - 1 )
.raw() .raw()
@ -856,7 +839,7 @@ class File extends DataItem {
if ( isOpaque ) { // convert to JPEG if ( isOpaque ) { // convert to JPEG
log( 'isOpaque', this.path ) log( 'isOpaque', this.path )
if ( metadata.format == 'gif' ) if ( metadata.format == 'gif' )
data = yield image.toBuffer() data = await image.toBuffer()
return this.processJpeg ( data ) return this.processJpeg ( data )
} }
} }
@ -864,11 +847,10 @@ class File extends DataItem {
if ( metadata.format == 'gif' ) if ( metadata.format == 'gif' )
return data return data
return image.toBuffer() return image.toBuffer()
}).call( this ) } catch ( err ) {
.catch( err => {
log( 'Error otimizing image', err, this ) log( 'Error otimizing image', err, this )
return data return data
}) }
} }
} }
@ -890,18 +872,16 @@ class RawFile extends File {
return fullPath( this.path ) return fullPath( this.path )
} }
preProcess ( data ) { async preProcess ( data ) {
return Promise.coroutine( function* () {
if ( ! this.mimeType ) { if ( ! this.mimeType ) {
this.mimeType = yield mimeFromData( data ) this.mimeType = await mimeFromData( data )
this.nameSpace = this.nameSpace || getNameSpace( this.mimeType ) this.nameSpace = this.nameSpace || getNameSpace( this.mimeType )
} }
if ( argv.inflateHtml && this.mimeType == 'text/html' ) if ( argv.inflateHtml && this.mimeType == 'text/html' ) {
data = yield zlib.inflate( data ) // inflateData data = await zlib.inflate( data ) // inflateData
return data }
}).call( this ) await this.preProcessHtml( data )
.then( data => this.preProcessHtml( data )) return super.preProcess( data )
.then( data => super.preProcess( data ))
} }
preProcessHtml ( data ) { preProcessHtml ( data ) {
@ -1018,7 +998,7 @@ class RawFile extends File {
// //
// Favicon a favicon (48x48) is also mandatory and should be located at /-/favicon // Favicon a favicon (48x48) is also mandatory and should be located at /-/favicon
function loadMetadata () { function fillInMetadata () {
const outParsed = osPath.parse( outPath ) const outParsed = osPath.parse( outPath )
const metadata = [ const metadata = [
[ 'Name', outParsed.base ], [ 'Name', outParsed.base ],
@ -1066,11 +1046,8 @@ function loadMetadata () {
return Promise.all( done ) return Promise.all( done )
} }
function openMetadata( dbName ) { async function openMetadata( dbName ) {
return Promise.resolve() indexerDb = await sqlite.open( dbName )
.then( () => sqlite.open( dbName ))
.then( db => {
indexerDb = db
return indexerDb.exec(` return indexerDb.exec(`
PRAGMA synchronous = OFF; PRAGMA synchronous = OFF;
PRAGMA journal_mode = WAL; PRAGMA journal_mode = WAL;
@ -1093,20 +1070,18 @@ function openMetadata( dbName ) {
` `
) )
} }
)
}
function newMetadata() { async function newMetadata() {
var dbName = '' var dbName = ''
if ( argv.verbose ) { if ( argv.verbose ) {
var parsed = osPath.parse( outPath ) var parsed = osPath.parse( outPath )
dbName = osPath.join( parsed.dir, parsed.base + '.db' ) dbName = osPath.join( parsed.dir, parsed.base + '.db' )
} }
return fs.unlink( dbName ) try {
.catch( () => null ) await fs.unlink( dbName )
.then( () => sqlite.open( dbName )) } catch ( err ) {
.then( db => { }
indexerDb = db indexerDb = await sqlite.open( dbName )
return indexerDb.exec( return indexerDb.exec(
'PRAGMA synchronous = OFF;' + 'PRAGMA synchronous = OFF;' +
'PRAGMA journal_mode = OFF;' + 'PRAGMA journal_mode = OFF;' +
@ -1135,8 +1110,6 @@ function newMetadata() {
');' ');'
) )
} }
)
}
function sortArticles () { function sortArticles () {
return indexerDb.exec(` return indexerDb.exec(`
@ -1156,14 +1129,14 @@ function sortArticles () {
) )
} }
function loadRedirects () { async function loadRedirects () {
var redirectsFile var redirectsFile
if ( preProcessed ) if ( preProcessed )
redirectsFile = osPath.join( srcPath, 'redirects.csv' ) redirectsFile = osPath.join( srcPath, 'redirects.csv' )
else if ( argv.redirects ) else if ( argv.redirects )
redirectsFile = expandHomeDir( argv.redirects ) redirectsFile = expandHomeDir( argv.redirects )
else else
return Promise.resolve() return
const getRow = cvsReader( redirectsFile, { const getRow = cvsReader( redirectsFile, {
columns:[ 'nameSpace', 'path', 'title', 'to' ], columns:[ 'nameSpace', 'path', 'title', 'to' ],
@ -1171,24 +1144,15 @@ function loadRedirects () {
relax_column_count: true relax_column_count: true
}) })
return Promise.coroutine( function* () { let row
while ( true ) { while ( row = await getRow() ) {
const row = yield getRow()
log( 'loadRedirects', row ) log( 'loadRedirects', row )
if ( row ) { await new Redirect( row ).process()
yield new Redirect( row )
.process()
} else {
return
} }
} }
}) ()
}
function resolveRedirects () { async function resolveRedirects () {
return Promise.coroutine( function* () { var stmt = await indexerDb.prepare( `
var stmt = yield indexerDb.prepare( `
SELECT SELECT
src.id AS id, src.id AS id,
src.urlKey AS urlKey, src.urlKey AS urlKey,
@ -1217,52 +1181,41 @@ function resolveRedirects () {
USING (id) USING (id)
WHERE targetId IS NOT NULL WHERE targetId IS NOT NULL
;`) ;`)
while ( true ) { let row
const row = yield stmt.get() while ( row = await stmt.get() ) {
if ( ! row ) {
break
}
var nameSpace = row.urlKey[ 0 ] var nameSpace = row.urlKey[ 0 ]
var path = row.urlKey.substr( 1 ) var path = row.urlKey.substr( 1 )
var title = ( row.titleKey == row.urlKey ) ? '' : row.titleKey.substr( 1 ) var title = ( row.titleKey == row.urlKey ) ? '' : row.titleKey.substr( 1 )
var target = row.targetRow - 1 var target = row.targetRow - 1
if ( path == 'mainpage' ) {
mainPage.target = target
}
yield new ResolvedRedirect ( row.id, nameSpace, path, title, target, row.revision ) await new ResolvedRedirect ( row.id, nameSpace, path, title, target, row.revision )
.process() .process()
} }
yield stmt.finalize() return stmt.finalize()
}) ()
} }
function saveIndex ( query, byteLength, rowField, count, logInfo ) { async function saveIndex ( params ) {
logInfo = logInfo || 'saveIndex' const logInfo = params.logInfo || 'saveIndex'
log( logInfo, 'start', count ) log( logInfo, 'start', params.count )
return Promise.coroutine( function* () {
var startOffset var startOffset
var stmt = await indexerDb.prepare( params.query )
var i = 0 var i = 0
var stmt = yield indexerDb.prepare( query ) for ( let row; row = await stmt.get(); i++ ) {
while ( true ) {
const row = yield stmt.get()
if ( ! row )
break
log( logInfo, i, row ) log( logInfo, i, row )
i++ if ( params.rowCb )
var buf = Buffer.allocUnsafe( byteLength ) params.rowCb( row, i )
writeUIntLE( buf, row[ rowField ], 0, byteLength ) var buf = Buffer.allocUnsafe( params.byteLength )
writeUIntLE( buf, row[ params.rowField ], 0, params.byteLength )
var offset = yield out.write( buf ) var offset = await out.write( buf )
if ( ! startOffset ) if ( ! startOffset )
startOffset = offset startOffset = offset
} }
yield stmt.finalize() await stmt.finalize()
log( logInfo, 'done', i, count, startOffset ) log( logInfo, 'done', i, params.count, startOffset )
return Promise.resolve( startOffset ) return startOffset
}) ()
} }
// URL Pointer List (urlPtrPos) // URL Pointer List (urlPtrPos)
@ -1278,21 +1231,28 @@ function saveIndex ( query, byteLength, rowField, count, logInfo ) {
// <nth URL> integer (n-1)*8 8 pointer to the directory entry of <nth URL> // <nth URL> integer (n-1)*8 8 pointer to the directory entry of <nth URL>
// ... integer ... 8 ... // ... integer ... 8 ...
function storeUrlIndex () { async function storeUrlIndex () {
return saveIndex (` header.urlPtrPos = await saveIndex ({
SELECT query:
urlSorted.rowid, 'SELECT ' +
id, 'urlSorted.rowid, ' +
urlKey, 'id, ' +
offset 'urlKey, ' +
FROM urlSorted 'offset ' +
LEFT OUTER JOIN dirEntries 'FROM urlSorted ' +
USING (id) 'LEFT OUTER JOIN dirEntries ' +
ORDER BY urlSorted.rowid 'USING (id) ' +
;`, 'ORDER BY urlSorted.rowid ' +
8, 'offset', header.articleCount, 'storeUrlIndex' ';',
) byteLength: 8,
.then( offset => header.urlPtrPos = offset ) rowField: 'offset',
count: header.articleCount,
logInfo: 'storeUrlIndex',
rowCb: (row, index) => {
if ( row.urlKey == mainPage.urlKey )
mainPage.index = index
}
})
} }
// Title Pointer List (titlePtrPos) // Title Pointer List (titlePtrPos)
@ -1304,8 +1264,9 @@ function storeUrlIndex () {
// <nth Title> integer (n-1)*4 4 pointer to the URL pointer of <nth Title> // <nth Title> integer (n-1)*4 4 pointer to the URL pointer of <nth Title>
// ... integer ... 4 ... // ... integer ... 4 ...
function storeTitleIndex () { async function storeTitleIndex () {
return saveIndex ( header.titlePtrPos = await saveIndex ({
query:
'SELECT ' + 'SELECT ' +
'titleKey, ' + 'titleKey, ' +
'urlSorted.rowid - 1 AS articleNumber ' + 'urlSorted.rowid - 1 AS articleNumber ' +
@ -1314,9 +1275,11 @@ function storeTitleIndex () {
'USING (id) ' + 'USING (id) ' +
'ORDER BY titleKey ' + 'ORDER BY titleKey ' +
';', ';',
4, 'articleNumber', header.articleCount, 'storeTitleIndex' byteLength: 4,
) rowField: 'articleNumber',
.then( offset => header.titlePtrPos = offset ) count: header.articleCount,
logInfo: 'storeTitleIndex',
})
} }
// MIME Type List (mimeListPos) // MIME Type List (mimeListPos)
@ -1335,14 +1298,13 @@ function getMimeTypes () {
log( 'MimeTypes', mimeTypeList.length, buf.length ) log( 'MimeTypes', mimeTypeList.length, buf.length )
if ( buf.length > maxMimeLength ) { if ( buf.length > maxMimeLength ) {
console.error( 'Error: mime type list length >', maxMimeLength ) fatal( 'Error: mime type list length >', maxMimeLength )
process.exit( 1 )
} }
return buf return buf
} }
function getHeader () { function getHeader () {
header.mainPage = mainPage.target || header.mainPage header.mainPage = mainPage.index || header.mainPage
//~ log( 'Header', 'articleCount', header.articleCount, 'clusterCount', header.clusterCount, 'mainPage', mainPage ) //~ log( 'Header', 'articleCount', header.articleCount, 'clusterCount', header.clusterCount, 'mainPage', mainPage )
log( 'Header', header ) log( 'Header', header )
@ -1370,68 +1332,56 @@ function getHeader () {
return buf return buf
} }
function storeHeader() { async function storeHeader() {
var buf = Buffer.concat([ getHeader(), getMimeTypes() ]) var buf = Buffer.concat([ getHeader(), getMimeTypes() ])
var fd = fs.openSync( outPath, 'r+' ) var fd = await fs.open( outPath, 'r+' )
fs.writeSync( fd, buf, 0, buf.length, 0 ) await fs.writeSync( fd, buf, 0, buf.length, 0 )
fs.closeSync( fd ) return fs.close( fd )
return Promise.resolve()
} }
function calculateFileHash () { function calculateFileHash () {
var outHash var outHash
var hash = crypto.createHash( 'md5' ) var hash = crypto.createHash( 'md5' )
var stream = fs.createReadStream( outPath ) var stream = fs.createReadStream( outPath )
var resolve
stream.on( 'data', data => hash.update( data )) stream.on( 'data', data => hash.update( data ))
stream.on( 'end', () => {
outHash = hash.digest()
log( 'outHash', outHash )
fs.appendFileSync( outPath, outHash )
resolve()
})
return new Promise( r => resolve = r ) return new Promise( (resolve, reject ) => stream.on( 'end', async () => {
outHash = hash.digest()
await fs.appendFile( outPath, outHash )
log( 'outHash', outHash )
resolve()
}))
} }
function initialise () { async function initialise () {
var stat = fs.statSync( srcPath ) // check source var stat = await fs.stat( srcPath )
if ( ! stat.isDirectory() ) { if ( ! stat.isDirectory() ) {
return Promise.reject( new Error( srcPath + ' is not a directory' )) throw new Error( srcPath + ' is not a directory' )
} }
out = new Writer( outPath ); // create output file out = new Writer( outPath ); // create output file
log( 'reserving space for header and mime type list' ) log( 'reserving space for header and mime type list' )
out.write( Buffer.alloc( headerLength + maxMimeLength )) await out.write( Buffer.alloc( headerLength + maxMimeLength ))
var metadata = osPath.join( srcPath, 'metadata.db' ) var metadata = osPath.join( srcPath, 'metadata.db' )
if ( fs.existsSync( metadata )) { if ( await fs.exists( metadata )) {
preProcessed = true preProcessed = true
return openMetadata( metadata ) try {
.then( () => loadMimeTypes()) mainPage.urlKey = await fs.readFile( osPath.join( srcPath, 'mainpage' ))
} catch ( err ) {
}
await openMetadata( metadata )
return loadMimeTypes()
} else {
await newMetadata()
return fillInMetadata()
} }
return newMetadata()
.then( () => loadMetadata())
} }
function rawLoader () { async function rawLoader () {
const dirs = [ '' ] const dirs = [ '' ]
function scanDirectories ( path ) {
return Promise.coroutine( function* () {
for ( let path; ( path = dirs.shift()) != null; ) {
log( 'scanDirectory', path )
yield Promise.map(
fs.readdir( fullPath( path )),
fname => parseDirEntry( osPath.join( path, fname )),
{ concurrency: 8 }
)
}
}) ()
}
function parseDirEntry ( path ) { function parseDirEntry ( path ) {
if ( path == 'metadata.csv' || path == 'redirects.csv' ) if ( path == 'metadata.csv' || path == 'redirects.csv' )
return Promise.resolve() return Promise.resolve()
@ -1448,13 +1398,22 @@ function rawLoader () {
} }
log( 'rawLoader start' ) log( 'rawLoader start' )
return scanDirectories() // scan Directories
.then( () => log( 'rawLoader finished !!!!!!!!!' )) for ( let path; ( path = dirs.shift()) != null; ) {
log( 'scanDirectory', path )
await Promise.map(
fs.readdir( fullPath( path )),
fname => parseDirEntry( osPath.join( path, fname )),
{ concurrency: 8 }
)
} }
function loadPreProcessedArticles () { log( 'rawLoader finished !!!!!!!!!' )
return Promise.coroutine( function* () { }
var stmt = yield indexerDb.prepare( `
async function loadPreProcessedArticles () {
var stmt = await indexerDb.prepare( `
SELECT SELECT
id , id ,
mimeId , mimeId ,
@ -1465,14 +1424,14 @@ function loadPreProcessedArticles () {
WHERE mimeId IS NOT 0xffff WHERE mimeId IS NOT 0xffff
;`) ;`)
while ( true ) { while ( true ) {
const row = yield stmt.get() const row = await stmt.get()
if ( ! row ) { if ( ! row ) {
break break
} }
var nameSpace = row.urlKey[ 0 ] var nameSpace = row.urlKey[ 0 ]
var path = row.urlKey.substr( 1 ) var path = row.urlKey.substr( 1 )
var title = ( row.titleKey == row.urlKey ) ? '' : row.titleKey.substr( 1 ) var title = ( row.titleKey == row.urlKey ) ? '' : row.titleKey.substr( 1 )
yield new File( { await new File( {
nameSpace, nameSpace,
path, path,
title, title,
@ -1482,13 +1441,11 @@ function loadPreProcessedArticles () {
} ) } )
.process() .process()
} }
yield stmt.finalize() return stmt.finalize()
}) ()
} }
function loadMimeTypes () { async function loadMimeTypes () {
return Promise.coroutine( function * () { var stmt = await indexerDb.prepare( `
var stmt = yield indexerDb.prepare( `
SELECT SELECT
id , id ,
value value
@ -1496,48 +1453,40 @@ function loadMimeTypes () {
ORDER BY id ORDER BY id
;`) ;`)
while ( true ) { while ( true ) {
const row = yield stmt.get() const row = await stmt.get()
if ( ! row ) { if ( ! row ) {
break break
} }
mimeTypeList.push( row.value ) mimeTypeList.push( row.value )
} }
yield stmt.finalize() return stmt.finalize()
}) ()
} }
function loadRawArticles () { async function loadRawArticles () {
return Promise.resolve() await rawLoader()
.then( () => rawLoader()) return loadRedirects()
.then( () => loadRedirects())
} }
function postProcess () { async function postProcess () {
return Promise.coroutine( function* () { await ClusterWriter.finish()
yield ClusterWriter.finish() await sortArticles()
yield sortArticles() await resolveRedirects()
yield resolveRedirects() await storeUrlIndex()
yield storeUrlIndex() return storeTitleIndex()
yield storeTitleIndex()
}) ()
} }
function finalise () { async function finalise () {
return Promise.coroutine( function* () { header.checksumPos = await out.close() // close the output stream
header.checksumPos = yield out.close() // close the output stream await indexerDb.close()
yield indexerDb.close() await storeHeader()
yield storeHeader() return calculateFileHash()
yield calculateFileHash()
}) ()
} }
function core () { async function core () {
return Promise.coroutine( function* () { await initialise()
yield initialise() await ( preProcessed ? loadPreProcessedArticles() : loadRawArticles() )
yield preProcessed ? loadPreProcessedArticles() : loadRawArticles() await postProcess()
yield postProcess() await finalise()
yield finalise()
}) ()
} }
// Mandatory arguments: // Mandatory arguments:
@ -1607,12 +1556,8 @@ function main () {
outPath = parsed.base + '.zim' outPath = parsed.base + '.zim'
} }
if ( argv.minChunkSize ) { //~ if ( argv.minChunkSize ) {
ClusterSizeThreshold = argv.minChunkSize * 1024 //~ ClusterSizeThreshold = argv.minChunkSize * 1024
}
//~ mainPage = {
//~ title: argv.welcome
//~ } //~ }
core () core ()