diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..0aaaec0 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,21 @@ +name: Lint +on: + push: + tags: + - v* + branches: + - main + pull_request: +jobs: + golangci: + name: lint + runs-on: ubuntu-latest + steps: + - uses: actions/setup-go@v3 + with: + go-version: 1.20.x + - uses: actions/checkout@v3 + - uses: golangci/golangci-lint-action@v3 + with: + version: v1.52 + skip-go-installation: true diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..a535393 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,18 @@ +name: Test +on: + push: + tags: + - v* + branches: + - main + pull_request: +jobs: + test: + name: test + runs-on: ubuntu-latest + steps: + - uses: actions/setup-go@v3 + with: + go-version: 1.20.x + - uses: actions/checkout@v3 + - run: go test --race ./... diff --git a/.gitignore b/.gitignore index bb4765d..89f9ac0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1 @@ -clone -node_modules -package-lock.json +out/ diff --git a/LICENSE b/LICENSE index 895f945..2a9d64d 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2019 Henrique Dias +Copyright (c) 2023 Henrique Dias Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 4da3608..bd2af96 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,19 @@ -# xkcd-clone +# xkcd-archiver -[![](https://img.shields.io/npm/v/xkcd-clone.svg?style=flat-square)](https://www.npmjs.com/package/xkcd-clone) - -> Clones the xkcd comics. +> A cloner of the [XKCD](https://xkcd.com/) comics for archiving purposes. ## Usage ``` -npx xkcd-clone -d [--empty] +Usage: + xkcd-archiver output [flags] + +Flags: + --empty empty output directory + -f, --from uint first comic to download (default 1) + -h, --help help for xkcd-archiver + --skip-html do not generate HTML files + -t, --to uint last comic to download ``` ## Contributing diff --git a/assets/comic.html b/assets/comic.html new file mode 100644 index 0000000..0349db2 --- /dev/null +++ b/assets/comic.html @@ -0,0 +1,39 @@ + + + + {{ .Num }} - {{ .Title }} - XKCD Archive + + + + + + +
+

{{ .Num }} - {{ .Title }}

+ + {{ if .Image }} +
+ +
+ {{ end }} +

+ This is an archival copy of the works + from xkcd.com. +

+
+ + diff --git a/assets/home.html b/assets/home.html new file mode 100644 index 0000000..237620f --- /dev/null +++ b/assets/home.html @@ -0,0 +1,24 @@ + + + + XKCD Archive + + + + + + +
+

XKCD Archive

+ +

+ This is an archival copy of the works + from xkcd.com. +

+
+ + diff --git a/assets/styles.css b/assets/styles.css new file mode 100644 index 0000000..94bcb4c --- /dev/null +++ b/assets/styles.css @@ -0,0 +1,68 @@ +*, +*::before, +*::after { + box-sizing: border-box; +} + +:root { + --accent: #96A8C8; + --accent-darker: #6E7B91; +} + +a { + color: var(--accent); + text-decoration: none; + font-weight: 800; +} + +body { + background-color: var(--accent); + text-align: center; + font-variant: small-caps; + font-family: Lucida, Helvetica, sans-serif; + margin: 0; + padding: 1rem; +} + +main { + background: white; + border-style: solid; + border-width: 1.5px; + border-color: #333; + border-radius: 12px; + max-width: 50rem; + margin: 0 auto; +} + +img { + max-width: 100%; +} + +nav a { + background-color: var(--accent-darker); + color: #FFF; + border: 1.5px solid #333; + font-weight: bold; + padding: 1.5px 12px; + margin: 0 4px; + text-decoration: none; + border-radius: 3px; + box-shadow: 0 0 5px 0 gray; + display: inline-block; +} + +nav a:hover { + background-color: #FFF; + color: var(--accent-darker); + box-shadow: none; +} + +ul { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(20rem, 1fr)); + text-align: left; + grid-column-gap: 1rem; + padding: 0; + list-style: none; + margin: 1rem; +} \ No newline at end of file diff --git a/bin/index.js b/bin/index.js deleted file mode 100644 index c40dae1..0000000 --- a/bin/index.js +++ /dev/null @@ -1,134 +0,0 @@ -#!/usr/bin/env node - -import fs from 'fs-extra' -import yargs from 'yargs' -import * as url from 'url' -import { hideBin } from 'yargs/helpers' -import { basename, extname, join } from 'path' -import { getLatestId, getComic } from '../lib/xkcd.js' -import { homePage, comicPage } from '../lib/html.js' -import { pad, progress } from '../lib/helpers.js' -import { createRequire } from 'module' - -const argv = yargs(hideBin(process.argv)) - .usage('$0', 'Clones XKCD comics. By default it only downloads the missing comics.') - .scriptName('xkcd-clone') - .option('dir', { - alias: 'd', - describe: 'Output directory', - type: 'string', - demandOption: true - }).option('empty', { - alias: 'e', - describe: 'Redownload all comics', - type: 'boolean' - }) - .help() - .argv - -async function write ({ data, img }, dir, latest) { - const hasImage = img !== null - - try { - await fs.outputJSON(join(dir, 'info.json'), data, { spaces: '\t' }) - await fs.outputFile(join(dir, 'index.html'), comicPage(data, latest, hasImage)) - - if (hasImage) { - await fs.outputFile(join(dir, basename(data.img)), Buffer.from(img)) - await fs.outputFile(join(dir, `image${extname(data.img)}`), Buffer.from(img)) - } - } catch (err) { - await fs.remove(dir) - throw err - } -} - -async function run () { - console.log(`😊 Going to clone XKCD to ${argv.dir}`) - - let added = [] - const errored = [] - - let latest = null - - try { - console.log('🔍 Finding the latest comic') - latest = await getLatestId() - console.log(`😁 Found! We're on comic number ${latest}!`) - - await fs.ensureDir(argv.dir) - if (argv.empty) { - await fs.emptyDir(argv.dir) - } - - for (let i = 1; i <= latest; i++) { - const num = pad(i, 4) - const dir = join(argv.dir, num) - - progress(`📦 Fetching ${i} out of ${latest}`) - - if (await fs.pathExists(dir)) { - const data = await fs.readJSON(join(dir, 'info.json')) - added.push({ id: i, title: data.title, num }) - await fs.outputFile(join(dir, 'index.html'), comicPage(data, latest)) - continue - } else if (i === 404) { - continue - } - - let comic = null - - const info = { - id: i, - dir: dir, - num: num - } - - try { - comic = await getComic(i) - info.title = comic.data.title - await write(comic, dir, latest) - added.push(info) - } catch (err) { - progress(`😢 Could not fetch ${i}, will try again later\n`) - errored.push(info) - } - } - } catch (err) { - console.log(`🐉 ${err.stack}`) - process.exit(1) - } - - for (const info of errored) { - const { id, dir, num } = info - for (let i = 0; i < 3; i++) { - try { - const comic = await getComic(id) - await write(comic, dir, latest) - added.push(info) - break - } catch (err) { - if (i === 2) { - console.log(`😢 ${num} could not be fetched: ${err.toString()}`) - } - } - } - } - - if (errored.length === 0) { - progress('📦 All comics fetched\n') - } else { - progress('📦 Some comics fetched\n') - } - - const require = createRequire(import.meta.url) - - added = added.sort((a, b) => a.num - b.num) - await fs.remove(join(argv.dir, 'latest')) - await fs.copy(join(argv.dir, pad(latest, 4)), join(argv.dir, 'latest')) - await fs.copyFile(join(require.resolve('tachyons'), '../tachyons.min.css'), join(argv.dir, 'tachyons.css')) - await fs.copyFile(join(require.resolve('tachyons-columns'), '../../css/tachyons-columns.min.css'), join(argv.dir, 'tachyons-columns.css')) - await fs.outputFile(join(argv.dir, 'index.html'), homePage(added)) -} - -run() diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..5da0de4 --- /dev/null +++ b/go.mod @@ -0,0 +1,13 @@ +module github.com/hacdias/xkcd-archiver + +go 1.20 + +require ( + github.com/karlseguin/typed v1.1.8 + github.com/spf13/cobra v1.7.0 +) + +require ( + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/spf13/pflag v1.0.5 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..b45094c --- /dev/null +++ b/go.sum @@ -0,0 +1,12 @@ +github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/karlseguin/typed v1.1.8 h1:ND0eDpwiUFIrm/n1ehxUyh/XNGs9zkYrLxtGqENSalY= +github.com/karlseguin/typed v1.1.8/go.mod h1:pZlmYaWQ7MVpwfIOP88fASh3LopVxKeE+uNXW3hQ2D8= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I= +github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/lib/helpers.js b/lib/helpers.js deleted file mode 100644 index a05803e..0000000 --- a/lib/helpers.js +++ /dev/null @@ -1,24 +0,0 @@ -export const progress = (str) => { - if (process.stdout.isTTY) { - process.stdout.clearLine() - process.stdout.cursorTo(0) - process.stdout.write(str) - } else { - console.log(str) - } -} - -export const pad = (str, max) => { - str = str.toString() - return str.length < max ? pad('0' + str, max) : str -} - -const tagsToReplace = { - '&': '&', - '<': '<', - '>': '>' -} - -export const escapeHtml = (html) => { - html.replace(/[&<>]/g, tag => tagsToReplace[tag] || tag) -} diff --git a/lib/html.js b/lib/html.js deleted file mode 100644 index 37f804d..0000000 --- a/lib/html.js +++ /dev/null @@ -1,73 +0,0 @@ -import { basename } from 'path' -import { pad, escapeHtml } from './helpers.js' - -const credits = '

This work is licensed under a Creative Commons Attribution-NonCommercial 2.5 License.
Originally from xkcd.com.

' - -const classes = { - body: 'bg-washed-blue navy sans-serif ml-auto mr-auto w-90', - title: 'tc mh0 mt4 mb3 f2 small-caps tracked', - btn: 'dib navy mh2 pa2 bg-light-blue hover-bg-lightest-blue br2 ba bw1 b--navy no-underline' -} - -export const comicPage = ({ alt, title, transcript, num, img }, latest) => ` - - ${num} - ${title} - - - - - - - -

${title} #${num}

- - - - -

${escapeHtml(transcript)}

- ${credits} - -` - -export const homePage = (list) => ` - - XKCD - - - - - - - - -

XKCD

- - - - - - - ${credits} - -` diff --git a/lib/xkcd.js b/lib/xkcd.js deleted file mode 100644 index 79701b0..0000000 --- a/lib/xkcd.js +++ /dev/null @@ -1,38 +0,0 @@ -import fetch from 'node-fetch' -import path from 'path' - -export async function getLatestId () { - const raw = await fetch('https://xkcd.com/info.0.json') - const data = await raw.json() - return data.num -} - -async function getImage (url) { - const ext = path.extname(url) - const url2x = `${path.dirname(url)}/${path.basename(url, ext)}_2x${ext}` - - let res = await fetch(url2x) - if (!res.ok) { - res = await fetch(url) - } - - if (!res.ok) { - throw new Error('bad image request') - } - - return res.arrayBuffer() -} - -export async function getComic (id) { - const raw = await fetch(`https://xkcd.com/${id}/info.0.json`) - const data = await raw.json() - let img = null - - // Some comics, such as 1608 and 1663, are composed by interactive - // games and cannot be downloaded as images, so we just ignore them. - if (data.img !== 'https://imgs.xkcd.com/comics/') { - img = await getImage(data.img) - } - - return { data, img } -} diff --git a/main.go b/main.go new file mode 100644 index 0000000..944eee3 --- /dev/null +++ b/main.go @@ -0,0 +1,203 @@ +package main + +import ( + "bytes" + "embed" + "fmt" + "html/template" + "io/fs" + "log" + "os" + "path/filepath" + "sort" + + "github.com/spf13/cobra" +) + +const ( + dirPermissions fs.FileMode = 0744 + filePermissions fs.FileMode = 0666 +) + +var ( + //go:embed assets/* + assets embed.FS +) + +func init() { + cmd.Flags().Bool("empty", false, "empty output directory") + cmd.Flags().Bool("skip-html", false, "do not generate HTML files") + cmd.Flags().UintP("from", "f", 1, "first comic to download") + cmd.Flags().UintP("to", "t", 0, "last comic to download") +} + +func main() { + if err := cmd.Execute(); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } +} + +var cmd = &cobra.Command{ + Use: "xkcd-archiver output", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + empty, _ := cmd.Flags().GetBool("empty") + skipHTML, _ := cmd.Flags().GetBool("skip-html") + from, _ := cmd.Flags().GetUint("from") + to, _ := cmd.Flags().GetUint("to") + out := args[0] + + latest, err := getLatestID() + if err != nil { + return err + } + + if from == 0 { + from = 1 + } + + if to == 0 { + to = uint(latest) + } + + if empty { + err := os.RemoveAll(out) + if err != nil { + return err + } + } + + err = os.MkdirAll(out, dirPermissions) + if err != nil { + return err + } + + log.Printf("Downloading comics from %d to %d\n", from, to) + + comicTemplate, err := getTemplate("comic") + if err != nil { + return err + } + + homeTemplate, err := getTemplate("home") + if err != nil { + return err + } + + homeData := &homeData{} + + for id := from; id <= to; id++ { + if id == 404 { + // Comic 404 does not exist. + continue + } + + comicDir := filepath.Join(out, fmt.Sprintf("%d", id)) + metadata, err := getComic(comicDir, id) + if err != nil { + return err + } + + data := &comicData{ + Num: id, + Title: metadata.String("title"), + Alt: metadata.String("alt"), + Image: metadata.String("img"), + } + + if id > 1 { + data.Prev = fmt.Sprintf("../%d/", id-1) + } + + if id < uint(latest) { + data.Next = fmt.Sprintf("../%d/", id+1) + } + + var b bytes.Buffer + err = comicTemplate.Execute(&b, data) + if err != nil { + return err + } + err = os.WriteFile(filepath.Join(comicDir, "index.html"), b.Bytes(), filePermissions) + if err != nil { + return err + } + + homeData.Comics = append(homeData.Comics, data) + } + + if !skipHTML { + sort.Slice(homeData.Comics, func(i, j int) bool { + return homeData.Comics[i].Num > homeData.Comics[j].Num + }) + + var b bytes.Buffer + err = homeTemplate.Execute(&b, homeData) + if err != nil { + return err + } + err = os.WriteFile(filepath.Join(out, "index.html"), b.Bytes(), filePermissions) + if err != nil { + return err + } + + // added = added.sort((a, b) => a.num - b.num) + // await fs.outputFile(join(argv.dir, 'index.html'), homePage(added)) + + styles, err := assets.ReadFile("assets/styles.css") + if err != nil { + return err + } + err = os.WriteFile(filepath.Join(out, "styles.css"), styles, filePermissions) + if err != nil { + return err + } + } + + // for (const info of errored) { + // const { id, dir, num } = info + // for (let i = 0; i < 3; i++) { + // try { + // const comic = await getComic(id) + // await write(comic, dir, latest) + // added.push(info) + // break + // } catch (err) { + // if (i === 2) { + // console.log(`😢 ${num} could not be fetched: ${err.toString()}`) + // } + // } + // } + // } + + // if (errored.length === 0) { + // progress('📦 All comics fetched\n') + // } else { + // progress('📦 Some comics fetched\n') + // } + + return nil + }, +} + +type comicData struct { + Num uint + Title string + Alt string + Prev string + Next string + Image string +} + +type homeData struct { + Comics []*comicData +} + +func getTemplate(name string) (*template.Template, error) { + comicBytes, err := assets.ReadFile("assets/" + name + ".html") + if err != nil { + return nil, err + } + return template.New("").Parse(string(comicBytes)) +} diff --git a/package.json b/package.json deleted file mode 100644 index ebe74ec..0000000 --- a/package.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "name": "xkcd-clone", - "version": "2.0.1", - "description": "Clone xkcd comics.", - "main": "./lib/index.js", - "type": "module", - "bin": { - "xkcd-clone": "./bin/index.js" - }, - "license": "MIT", - "author": "Henrique Dias ", - "dependencies": { - "fs-extra": "^10.1.0", - "node-fetch": "^3.2.10", - "tachyons": "^4.12.0", - "tachyons-columns": "^1.0.5", - "yargs": "^17.5.1" - } -} diff --git a/xkcd.go b/xkcd.go new file mode 100644 index 0000000..95f305e --- /dev/null +++ b/xkcd.go @@ -0,0 +1,140 @@ +package main + +import ( + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "os" + "path" + "path/filepath" + "strings" + + "github.com/karlseguin/typed" +) + +func get(url string) ([]byte, error) { + res, err := http.DefaultClient.Get(url) + if err != nil { + return nil, err + } + defer res.Body.Close() + + if res.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status code: %d", res.StatusCode) + } + + data, err := io.ReadAll(res.Body) + if err != nil { + return nil, err + } + + return data, nil +} + +func getJSON(url string) (typed.Typed, error) { + data, err := get(url) + if err != nil { + return nil, err + } + + m, err := typed.Json(data) + if err != nil { + return nil, err + } + + return m, nil +} + +func getComicMetadata(id uint) (typed.Typed, error) { + return getJSON(fmt.Sprintf("https://xkcd.com/%d/info.0.json", id)) +} + +func getLatestID() (uint, error) { + m, err := getJSON("https://xkcd.com/info.0.json") + if err != nil { + return 0, err + } + + id, ok := m.IntIf("num") + if !ok { + return 0, errors.New("key 'num' does not exist") + } + + return uint(id), nil +} + +func getImage(url string) ([]byte, error) { + ext := path.Ext(url) + url = strings.TrimSuffix(url, ext) + + data, err := get(url + "_2x" + ext) + if err != nil { + data, err = get(url + ext) + } + + return data, err +} + +// ensureComic fetches comic #id and stores it in out. Returns metadata. +func ensureComic(out string, id uint) (typed.Typed, error) { + err := os.MkdirAll(out, dirPermissions) + if err != nil { + return nil, err + } + + metadata, err := getComicMetadata(id) + if err != nil { + return nil, err + } + + // Some comics, such as 1608 and 1663, are composed by interactive + // games and cannot be downloaded as images, so we just ignore them. + if imgURL := metadata.StringOr("img", "https://imgs.xkcd.com/comics/"); imgURL != "https://imgs.xkcd.com/comics/" { + imgBytes, err := getImage(imgURL) + if err != nil { + return nil, err + } + + imgName := path.Base(imgURL) + err = os.WriteFile(filepath.Join(out, imgName), imgBytes, filePermissions) + if err != nil { + return nil, err + } + + metadata["img"] = "./" + imgName + } else { + metadata["img"] = "" + } + + infoBytes, err := json.MarshalIndent(metadata, "", " ") + if err != nil { + return nil, err + } + + err = os.WriteFile(filepath.Join(out, "info.json"), infoBytes, filePermissions) + if err != nil { + return nil, err + } + + return metadata, nil +} + +// getComic gets comic #id metadata from out, or fetches from Internet. +func getComic(out string, id uint) (typed.Typed, error) { + _, err := os.Stat(out) + + if os.IsNotExist(err) { + return ensureComic(out, id) + } else if err == nil { + data, err := os.ReadFile(filepath.Join(out, "info.json")) + if err != nil { + return nil, err + } + + return typed.Json(data) + } + + return nil, err +}