From 959f71d0eace657899f9ab6b148c213067140f2a Mon Sep 17 00:00:00 2001 From: v Date: Sun, 3 Sep 2017 15:48:28 +0300 Subject: [PATCH] scripts installation --- README.md | 131 ++++++++++++++++++++++++++++++++++++++------------- package.json | 9 +++- 2 files changed, 105 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 6006a46..97bd593 100644 --- a/README.md +++ b/README.md @@ -1,47 +1,112 @@ -# zimmer +***zimmer*** package is primarily a tool for creating a [ZIM](http://www.openzim.org/wiki/OpenZIM) dump from a Mediawiki-based wiki. -This is a nodejs [ZIM](http://www.openzim.org/wiki/OpenZIM) file creator -- mostly a drop-in replacement for [zimwriterfs](https://github.com/wikimedia/openzim/tree/master/zimwriterfs). +The package consists of 2 scripts: -A notable exception: it does't support *withFullTextIndex* option (as index format is not documented at the [OpenZIM wiki](http://www.openzim.org/wiki/ZIM_Index_Format)). +- wikizimmer.js dumps the wiki's articles (name space 0) into a collection of static HTML files. + +- zimmer.js builds a ZIM file from a static HTML files collection. Historically, zimmer.js is mostly a drop-in replacement for [zimwriterfs](https://github.com/wikimedia/openzim/tree/master/zimwriterfs) with a notable exception: it doesn't support *withFullTextIndex* option (index format is [not documented](http://www.openzim.org/wiki/ZIM_Index_Format)). + +The major point is that `wikizimmer.js` unlikely to [mwoffliner](https://github.com/openzim/mwoffliner) doesn't depend on the [Parsoid](https://www.mediawiki.org/wiki/Parsoid) and [Redis](https://redis.io/) and `zimmer.js` unlikely to [zimwriterfs](https://github.com/wikimedia/openzim/tree/master/zimwriterfs) doesn't depend on the [zimlib](http://www.openzim.org/wiki/Zimlib). + +The package is relatively easy to install and it can even process some wikis running rather old versions of the Mediawiki engine. ## Installation -Requirement: `node` version >=6.x +Requirement: `node` version >= 6.x. + +### With npm globally + +``` +npm i -g git+https://github.com/vadp/zimmer +``` + +or + +### Manually * Clone *zimmer* from Github or download ZIP * Install dependencies: `npm install` -* Make `zimmer.js` executable +* Make `wikizimmer.js` and `zimmer.js` executable +* Optionally symlink both scripts into some directory available in your $PATH: -Optionaly to make it work as a replacement for *zimwriterfs*: -* Symlink *zimmer* as *zimwriterfs*: `ln -s zimmer.js /zimwriterfs` -* Make sure genuine *zimwriterfs* is not in the $PATH - -[mwoffliner](https://github.com/kiwix/mwoffliner), for example, then should pick the zimmer up instead of the *zimwriterfs* when it creates ZIM file. +``` + ln -s wikizimmer.js /wikizimmer + ln -s zimmer.js /zimmer +``` ## Usage -``` -zimmer.js [options]... HTML_DIRECTORY ZIM_FILE -Mandatory arguments: - -w, --welcome path of default/main HTML page. The path must be relative to HTML_DIRECTORY. - -f, --favicon path of ZIM file favicon. The path must be relative to HTML_DIRECTORY and the image a 48x48 PNG. - -l, --language language code of the content in ISO639-3 - -t, --title title of the ZIM file - -d, --description short description of the content - -c, --creator creator(s) of the content - -p, --publisher creator of the ZIM file itself - - HTML_DIRECTORY is the path of the directory containing the HTML pages you want to put in the ZIM file, - ZIM_FILE is the path of the ZIM file you want to obtain. - - Optional arguments: - -v, --verbose print processing details on STDOUT - -h, --help print this help - -m, --minChunkSize number of bytes per ZIM cluster (defaul: 4096) - -x, --inflateHtml try to inflate HTML files before packing (*.html, *.htm, ...) - -u, --uniqueNamespace put everything in the same namespace 'A'. Might be necessary to avoid problems with dynamic/javascript data loading. - -r, --redirects path to the CSV file with the list of redirects (url, title, target_url tab separated). -``` +The process of creating a ZIM file from a wiki consists of 2 parts. Example: -`./zimmer.js -t 'some title' [name_of_your_zim.zim] ` +* Dumping a with to a local collection of static HTML files: + +`wikizimmer https://en.wikivoyage.org/wiki/Pisa` + + will dump ***all*** `https://en.wikivoyage.org` articles to the directory `en.wikivoyage.org`. The URL to a particular page is quite important in this case as this page's styling is used as a template for all other pages in the dump, so wikivoyage listings, for example, are rendered correctly at the static page of the dump. + +* Building a ZIM file: + +`zimmer --optimg en.wikivoyage.org` + +will pack the content of the `en.wikivoyage.org` into the `en.wikivoyage.org.zim`. zimmer.js with `--optimg` option will recompress the images in the dump to save some space. + + +## Command line options + +Run either of scripts with '--help' switch to see the list of all options available: + +``` +$ wikizimmer -h + + Usage: wikizimmer [options] + + Dump a static-HTML snapshot of a MediaWiki-powered wiki. + + Where: + wiki-page-URL URL of a sample page at the wiki to be dumped. + This page's styling will be used as a template for all pages in the dump. + + Options: + + -V, --version output the version number + -t, --titles get only titles listed (separated by "|") + -r, --rmdir delete destination directory before processing the source + -noimages don't download images + -nocss don't page styling + -nopages don't save downloaded pages + -h, --help output usage information +``` + +``` +$ zimmer -h + + Usage: zimmer [options] [zim-file...] + + Pack a directory into a zim file + + Where: + source-directory path to the directory with HTML pages to pack into a ZIM file + zim-file optional path for the output + + Options: + + -V, --version output the version number + -w, --welcome path of default/main HTML page. The path must be relative to HTML_DIRECTORY + -f, --favicon path of ZIM file favicon. The path must be relative to HTML_DIRECTORY and the image a 48x48 PNG + -l, --language language code of the content in ISO639-3 + -t, --title title of the ZIM file + -d, --description <text> short description of the content + -c, --creator <text> creator(s) of the content + -p, --publisher <text> creator of the ZIM file itself + -v, --verbose print processing details on STDOUT + -m, --minChunkSize <size> number of bytes per ZIM cluster (default: 2048) + -x, --inflateHtml try to inflate HTML files before packing (*.html, *.htm, ...) + -u, --uniqueNamespace put everything in the same namespace "A". Might be necessary to avoid problems with dynamic/javascript data loading + -r, --redirects <path> path to the CSV file with the list of redirects (url, title, target_url tab separated) + --optimg optimise images + --jpegquality <factor> JPEG quality + -h, --help output usage information +``` + +**NB:** The most options of the zimmer.js are really optional if it's used in combination with wikizimmer.js as the later one saves the relevant metadata into the dump directory. Perhaps only `--optimg` is quite important one if you want to save some space. diff --git a/package.json b/package.json index 6e4084c..0cbe072 100644 --- a/package.json +++ b/package.json @@ -13,9 +13,11 @@ "pack" ], "dependencies": { + "animated-gif-detector": "^1.2.0", "bluebird": "*", "cheerio": "*", "child-process": "*", + "commander": "^2.11.0", "csv-parse": "*", "encodeurl": "^1.0.1", "expand-home-dir": "*", @@ -36,8 +38,11 @@ "sharp": "^0.17.3", "sqlite": "^2.8.0", "sqlite3": "*", - "uuid": "*", - "commander": "^2.11.0" + "uuid": "*" + }, + "bin": { + "zimmer": "./zimmer.js", + "wikizimmer": "./wikizimmer.js" }, "author": "Vadim Shlykahov", "license": "ISC"