wikizimmer.js: async/await

This commit is contained in:
v 2018-11-12 20:19:04 +03:00
parent 60c584ead6
commit 7efe44d042
3 changed files with 172 additions and 183 deletions

View File

@ -11,7 +11,7 @@ The major point is that `wikizimmer.js` unlike to [mwoffliner](https://github.co
The package is relatively easy to install and it can even process some wikis running rather old versions of the Mediawiki engine. The package is relatively easy to install and it can even process some wikis running rather old versions of the Mediawiki engine.
## Installation ## Installation
Requirement: `node` version >= 6.x. Requirement: `node` version >= 8.x.
### With npm globally ### With npm globally

View File

@ -42,6 +42,9 @@
"sqlite3": "*", "sqlite3": "*",
"uuid": "*" "uuid": "*"
}, },
"engines" : {
"node" : ">=8.0.0"
},
"bin": { "bin": {
"zimmer": "./zimmer.js", "zimmer": "./zimmer.js",
"wikizimmer": "./wikizimmer.js" "wikizimmer": "./wikizimmer.js"

View File

@ -796,10 +796,9 @@ class GlobalCss extends StyleItem {
.then( chunks => chunks.join( '\n' )) .then( chunks => chunks.join( '\n' ))
} }
transformCss( cssUrl ) { async transformCss( cssUrl ) {
return Promise.coroutine( function* () {
let css = new StyleItem( cssUrl ) let css = new StyleItem( cssUrl )
const src = yield css.data() const src = await css.data()
// collect urls using dummy replacements // collect urls using dummy replacements
const urlre = /(url\(['"]?)([^\)]*[^\)'"])(['"]?\))/g const urlre = /(url\(['"]?)([^\)]*[^\)'"])(['"]?\))/g
@ -811,7 +810,7 @@ class GlobalCss extends StyleItem {
} }
return match return match
}) })
const resolvedUrls = yield Promise.all( requests ) const resolvedUrls = await Promise.all( requests )
const transformed = src.replace( urlre, ( match, start, url, end ) => { const transformed = src.replace( urlre, ( match, start, url, end ) => {
const rurl = resolvedUrls.shift() const rurl = resolvedUrls.shift()
if ( rurl == null ) if ( rurl == null )
@ -827,13 +826,11 @@ class GlobalCss extends StyleItem {
${transformed} ${transformed}
` `
return outcss return outcss
}) ()
} }
} }
function processSamplePage ( samplePageUrl, rmdir) { async function processSamplePage ( samplePageUrl, rmdir) {
return Promise.coroutine( function* () { const resp = await requestPromise({
const resp = yield requestPromise({
url: encodeurl( samplePageUrl ), url: encodeurl( samplePageUrl ),
resolveWithFullResponse: true, resolveWithFullResponse: true,
}) })
@ -847,8 +844,8 @@ function processSamplePage ( samplePageUrl, rmdir) {
const urlp = urlconv.parse( realUrl ) const urlp = urlconv.parse( realUrl )
wiki.saveDir = sanitizeFN( urlp.hostname ) wiki.saveDir = sanitizeFN( urlp.hostname )
if ( rmdir ) if ( rmdir )
yield fs.remove( wiki.saveDir ) await fs.remove( wiki.saveDir )
yield fs.mkdirs( wiki.saveDir ) await fs.mkdirs( wiki.saveDir )
const dom = cheerio.load( resp.body ) const dom = cheerio.load( resp.body )
const historyLink = dom('#ca-history a').attr('href') const historyLink = dom('#ca-history a').attr('href')
@ -864,7 +861,6 @@ function processSamplePage ( samplePageUrl, rmdir) {
log(indexPhp, wiki.apiUrl) log(indexPhp, wiki.apiUrl)
return dom return dom
})()
} }
function loadTemplate () { function loadTemplate () {
@ -873,9 +869,8 @@ function loadTemplate () {
.then( stub => (wiki.pageTemplate = stub)) .then( stub => (wiki.pageTemplate = stub))
} }
function getSiteInfo () { async function getSiteInfo () {
return Promise.coroutine(function* () { const resp = await api ({
const resp = yield api ({
action: 'query', action: 'query',
meta: 'siteinfo', meta: 'siteinfo',
siprop: 'general|namespaces|namespacealiases', siprop: 'general|namespaces|namespacealiases',
@ -890,10 +885,9 @@ function getSiteInfo () {
wiki.articleBase = info.general.base.split( wiki.articlePath )[0] + wiki.articlePath wiki.articleBase = info.general.base.split( wiki.articlePath )[0] + wiki.articlePath
wiki.baseParsed = urlconv.parse( wiki.articleBase ) wiki.baseParsed = urlconv.parse( wiki.articleBase )
wiki.nameSpaces = new NameSpaceSet( info ) wiki.nameSpaces = new NameSpaceSet( info )
}) ()
} }
function saveMetadata () { async function saveMetadata () {
// Name yes A human readable identifier for the resource. It's the same across versions (should be stable across time). MUST be prefixed by the packager name. kiwix.wikipedia_en.nopics // Name yes A human readable identifier for the resource. It's the same across versions (should be stable across time). MUST be prefixed by the packager name. kiwix.wikipedia_en.nopics
// Title yes title of zim file English Wikipedia // Title yes title of zim file English Wikipedia
@ -929,25 +923,21 @@ function saveMetadata () {
Source: urlconv.resolve( wiki.articleBase, wiki.info.general.server ), Source: urlconv.resolve( wiki.articleBase, wiki.info.general.server ),
} }
return Promise.coroutine( function * () { await new MainPage().process()
yield new MainPage().process() await new FavIcon().process()
yield new FavIcon().process()
for ( let i in metadata ) { for ( let i in metadata ) {
yield new Metadata( i, metadata[i] ).process() await new Metadata( i, metadata[i] ).process()
} }
}) ()
} }
function saveMimeTypes () { async function saveMimeTypes () {
return Promise.coroutine( function * () {
for ( let i=0, li=mimeIds.length; i < li; i++ ) { for ( let i=0, li=mimeIds.length; i < li; i++ ) {
yield indexerDb.run( await indexerDb.run(
'INSERT INTO mimeTypes (id, value) VALUES (?,?)', 'INSERT INTO mimeTypes (id, value) VALUES (?,?)',
[ i + 1, mimeIds[ i ]] [ i + 1, mimeIds[ i ]]
) )
} }
}) ()
} }
function batchRedirects ( pageInfos ) { function batchRedirects ( pageInfos ) {
@ -998,11 +988,10 @@ function batchRedirects ( pageInfos ) {
}) })
} }
function batchPages ( nameSpace ) { async function batchPages ( nameSpace ) {
const queryPageLimit = 500 const queryPageLimit = 500
const queryMaxTitles = 50 const queryMaxTitles = 50
return Promise.coroutine( function* () {
const exclude = command.exclude ? const exclude = command.exclude ?
new RegExp( command.exclude ) : new RegExp( command.exclude ) :
{ test: () => false } { test: () => false }
@ -1025,16 +1014,16 @@ function batchPages ( nameSpace ) {
let continueFrom = '' let continueFrom = ''
while ( true ) { while ( true ) {
yield indexerDb.run( await indexerDb.run(
'INSERT OR REPLACE INTO continue (id, "from") VALUES (1, ?)', 'INSERT OR REPLACE INTO continue (id, "from") VALUES (1, ?)',
[ continueFrom ] [ continueFrom ]
) )
if ( continueFrom == null ) if ( continueFrom == null )
break break
yield indexerDb.run( 'BEGIN' ) await indexerDb.run( 'BEGIN' )
const resp = yield api( query ) const resp = await api( query )
let pages = {} let pages = {}
try { try {
pages = resp.query.pages pages = resp.query.pages
@ -1067,9 +1056,9 @@ function batchPages ( nameSpace ) {
return new Article( pageInfo ).process() return new Article( pageInfo ).process()
}) })
done.push( batchRedirects( redirects )) done.push( batchRedirects( redirects ))
yield Promise.all( done ) await Promise.all( done )
yield indexerDb.run( 'COMMIT' ) await indexerDb.run( 'COMMIT' )
continueFrom = null continueFrom = null
try { try {
@ -1082,23 +1071,20 @@ function batchPages ( nameSpace ) {
log( 'getPages', 'No continue key' ) log( 'getPages', 'No continue key' )
} }
} }
})()
} }
function getPages () { async function getPages () {
return Promise.coroutine( function* () {
if ( command.titles ) { if ( command.titles ) {
log( 'Titles', command.titles ) log( 'Titles', command.titles )
yield batchPages() await batchPages()
} else { } else {
wiki.nameSpaces.init( command.nameSpaces ) wiki.nameSpaces.init( command.nameSpaces )
for ( let nameSpace of wiki.nameSpaces ) { for ( let nameSpace of wiki.nameSpaces ) {
log( 'Name Space', nameSpace ) log( 'Name Space', nameSpace )
yield batchPages( nameSpace ) await batchPages( nameSpace )
} }
} }
log( '**************** done' ) log( '**************** done' )
})()
} }
function loadCss( dom ) { function loadCss( dom ) {