wikizimmer.js: async/await
This commit is contained in:
parent
60c584ead6
commit
7efe44d042
|
@ -11,7 +11,7 @@ The major point is that `wikizimmer.js` unlike to [mwoffliner](https://github.co
|
|||
The package is relatively easy to install and it can even process some wikis running rather old versions of the Mediawiki engine.
|
||||
|
||||
## Installation
|
||||
Requirement: `node` version >= 6.x.
|
||||
Requirement: `node` version >= 8.x.
|
||||
|
||||
### With npm globally
|
||||
|
||||
|
|
|
@ -42,6 +42,9 @@
|
|||
"sqlite3": "*",
|
||||
"uuid": "*"
|
||||
},
|
||||
"engines" : {
|
||||
"node" : ">=8.0.0"
|
||||
},
|
||||
"bin": {
|
||||
"zimmer": "./zimmer.js",
|
||||
"wikizimmer": "./wikizimmer.js"
|
||||
|
|
|
@ -796,10 +796,9 @@ class GlobalCss extends StyleItem {
|
|||
.then( chunks => chunks.join( '\n' ))
|
||||
}
|
||||
|
||||
transformCss( cssUrl ) {
|
||||
return Promise.coroutine( function* () {
|
||||
async transformCss( cssUrl ) {
|
||||
let css = new StyleItem( cssUrl )
|
||||
const src = yield css.data()
|
||||
const src = await css.data()
|
||||
|
||||
// collect urls using dummy replacements
|
||||
const urlre = /(url\(['"]?)([^\)]*[^\)'"])(['"]?\))/g
|
||||
|
@ -811,7 +810,7 @@ class GlobalCss extends StyleItem {
|
|||
}
|
||||
return match
|
||||
})
|
||||
const resolvedUrls = yield Promise.all( requests )
|
||||
const resolvedUrls = await Promise.all( requests )
|
||||
const transformed = src.replace( urlre, ( match, start, url, end ) => {
|
||||
const rurl = resolvedUrls.shift()
|
||||
if ( rurl == null )
|
||||
|
@ -827,13 +826,11 @@ class GlobalCss extends StyleItem {
|
|||
${transformed}
|
||||
`
|
||||
return outcss
|
||||
}) ()
|
||||
}
|
||||
}
|
||||
|
||||
function processSamplePage ( samplePageUrl, rmdir) {
|
||||
return Promise.coroutine( function* () {
|
||||
const resp = yield requestPromise({
|
||||
async function processSamplePage ( samplePageUrl, rmdir) {
|
||||
const resp = await requestPromise({
|
||||
url: encodeurl( samplePageUrl ),
|
||||
resolveWithFullResponse: true,
|
||||
})
|
||||
|
@ -847,8 +844,8 @@ function processSamplePage ( samplePageUrl, rmdir) {
|
|||
const urlp = urlconv.parse( realUrl )
|
||||
wiki.saveDir = sanitizeFN( urlp.hostname )
|
||||
if ( rmdir )
|
||||
yield fs.remove( wiki.saveDir )
|
||||
yield fs.mkdirs( wiki.saveDir )
|
||||
await fs.remove( wiki.saveDir )
|
||||
await fs.mkdirs( wiki.saveDir )
|
||||
|
||||
const dom = cheerio.load( resp.body )
|
||||
const historyLink = dom('#ca-history a').attr('href')
|
||||
|
@ -864,7 +861,6 @@ function processSamplePage ( samplePageUrl, rmdir) {
|
|||
log(indexPhp, wiki.apiUrl)
|
||||
|
||||
return dom
|
||||
})()
|
||||
}
|
||||
|
||||
function loadTemplate () {
|
||||
|
@ -873,9 +869,8 @@ function loadTemplate () {
|
|||
.then( stub => (wiki.pageTemplate = stub))
|
||||
}
|
||||
|
||||
function getSiteInfo () {
|
||||
return Promise.coroutine(function* () {
|
||||
const resp = yield api ({
|
||||
async function getSiteInfo () {
|
||||
const resp = await api ({
|
||||
action: 'query',
|
||||
meta: 'siteinfo',
|
||||
siprop: 'general|namespaces|namespacealiases',
|
||||
|
@ -890,10 +885,9 @@ function getSiteInfo () {
|
|||
wiki.articleBase = info.general.base.split( wiki.articlePath )[0] + wiki.articlePath
|
||||
wiki.baseParsed = urlconv.parse( wiki.articleBase )
|
||||
wiki.nameSpaces = new NameSpaceSet( info )
|
||||
}) ()
|
||||
}
|
||||
|
||||
function saveMetadata () {
|
||||
async function saveMetadata () {
|
||||
|
||||
// Name yes A human readable identifier for the resource. It's the same across versions (should be stable across time). MUST be prefixed by the packager name. kiwix.wikipedia_en.nopics
|
||||
// Title yes title of zim file English Wikipedia
|
||||
|
@ -929,25 +923,21 @@ function saveMetadata () {
|
|||
Source: urlconv.resolve( wiki.articleBase, wiki.info.general.server ),
|
||||
}
|
||||
|
||||
return Promise.coroutine( function * () {
|
||||
yield new MainPage().process()
|
||||
yield new FavIcon().process()
|
||||
await new MainPage().process()
|
||||
await new FavIcon().process()
|
||||
|
||||
for ( let i in metadata ) {
|
||||
yield new Metadata( i, metadata[i] ).process()
|
||||
await new Metadata( i, metadata[i] ).process()
|
||||
}
|
||||
}) ()
|
||||
}
|
||||
|
||||
function saveMimeTypes () {
|
||||
return Promise.coroutine( function * () {
|
||||
async function saveMimeTypes () {
|
||||
for ( let i=0, li=mimeIds.length; i < li; i++ ) {
|
||||
yield indexerDb.run(
|
||||
await indexerDb.run(
|
||||
'INSERT INTO mimeTypes (id, value) VALUES (?,?)',
|
||||
[ i + 1, mimeIds[ i ]]
|
||||
)
|
||||
}
|
||||
}) ()
|
||||
}
|
||||
|
||||
function batchRedirects ( pageInfos ) {
|
||||
|
@ -998,11 +988,10 @@ function batchRedirects ( pageInfos ) {
|
|||
})
|
||||
}
|
||||
|
||||
function batchPages ( nameSpace ) {
|
||||
async function batchPages ( nameSpace ) {
|
||||
const queryPageLimit = 500
|
||||
const queryMaxTitles = 50
|
||||
|
||||
return Promise.coroutine( function* () {
|
||||
const exclude = command.exclude ?
|
||||
new RegExp( command.exclude ) :
|
||||
{ test: () => false }
|
||||
|
@ -1025,16 +1014,16 @@ function batchPages ( nameSpace ) {
|
|||
|
||||
let continueFrom = ''
|
||||
while ( true ) {
|
||||
yield indexerDb.run(
|
||||
await indexerDb.run(
|
||||
'INSERT OR REPLACE INTO continue (id, "from") VALUES (1, ?)',
|
||||
[ continueFrom ]
|
||||
)
|
||||
if ( continueFrom == null )
|
||||
break
|
||||
|
||||
yield indexerDb.run( 'BEGIN' )
|
||||
await indexerDb.run( 'BEGIN' )
|
||||
|
||||
const resp = yield api( query )
|
||||
const resp = await api( query )
|
||||
let pages = {}
|
||||
try {
|
||||
pages = resp.query.pages
|
||||
|
@ -1067,9 +1056,9 @@ function batchPages ( nameSpace ) {
|
|||
return new Article( pageInfo ).process()
|
||||
})
|
||||
done.push( batchRedirects( redirects ))
|
||||
yield Promise.all( done )
|
||||
await Promise.all( done )
|
||||
|
||||
yield indexerDb.run( 'COMMIT' )
|
||||
await indexerDb.run( 'COMMIT' )
|
||||
|
||||
continueFrom = null
|
||||
try {
|
||||
|
@ -1082,23 +1071,20 @@ function batchPages ( nameSpace ) {
|
|||
log( 'getPages', 'No continue key' )
|
||||
}
|
||||
}
|
||||
})()
|
||||
}
|
||||
|
||||
function getPages () {
|
||||
return Promise.coroutine( function* () {
|
||||
async function getPages () {
|
||||
if ( command.titles ) {
|
||||
log( 'Titles', command.titles )
|
||||
yield batchPages()
|
||||
await batchPages()
|
||||
} else {
|
||||
wiki.nameSpaces.init( command.nameSpaces )
|
||||
for ( let nameSpace of wiki.nameSpaces ) {
|
||||
log( 'Name Space', nameSpace )
|
||||
yield batchPages( nameSpace )
|
||||
await batchPages( nameSpace )
|
||||
}
|
||||
}
|
||||
log( '**************** done' )
|
||||
})()
|
||||
}
|
||||
|
||||
function loadCss( dom ) {
|
||||
|
|
Loading…
Reference in New Issue
Block a user