feat: rewrite in Go

This commit is contained in:
Henrique Dias 2023-06-01 21:24:08 +02:00 committed by GitHub
parent e07dbb23e0
commit 708864997d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 551 additions and 297 deletions

21
.github/workflows/lint.yml vendored Normal file
View File

@ -0,0 +1,21 @@
name: Lint
on:
push:
tags:
- v*
branches:
- main
pull_request:
jobs:
golangci:
name: lint
runs-on: ubuntu-latest
steps:
- uses: actions/setup-go@v3
with:
go-version: 1.20.x
- uses: actions/checkout@v3
- uses: golangci/golangci-lint-action@v3
with:
version: v1.52
skip-go-installation: true

18
.github/workflows/test.yml vendored Normal file
View File

@ -0,0 +1,18 @@
name: Test
on:
push:
tags:
- v*
branches:
- main
pull_request:
jobs:
test:
name: test
runs-on: ubuntu-latest
steps:
- uses: actions/setup-go@v3
with:
go-version: 1.20.x
- uses: actions/checkout@v3
- run: go test --race ./...

4
.gitignore vendored
View File

@ -1,3 +1 @@
clone
node_modules
package-lock.json
out/

View File

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2019 Henrique Dias
Copyright (c) 2023 Henrique Dias
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,13 +1,19 @@
# xkcd-clone
# xkcd-archiver
[![](https://img.shields.io/npm/v/xkcd-clone.svg?style=flat-square)](https://www.npmjs.com/package/xkcd-clone)
> Clones the xkcd comics.
> A cloner of the [XKCD](https://xkcd.com/) comics for archiving purposes.
## Usage
```
npx xkcd-clone -d <directory> [--empty]
Usage:
xkcd-archiver output [flags]
Flags:
--empty empty output directory
-f, --from uint first comic to download (default 1)
-h, --help help for xkcd-archiver
--skip-html do not generate HTML files
-t, --to uint last comic to download
```
## Contributing

39
assets/comic.html Normal file
View File

@ -0,0 +1,39 @@
<!DOCTYPE html>
<html>
<head>
<title>{{ .Num }} - {{ .Title }} - XKCD Archive</title>
<meta charset=utf-8>
<meta name=viewport content="width=device-width,initial-scale=1">
<meta name="robots" content="noindex">
<link rel="stylesheet" href="../styles.css"/>
</head>
<body>
<main>
<h1>{{ .Num }} - {{ .Title }}</h1>
<nav>
{{- with .Prev }}
<a href="{{ . }}" aria-label="Previous" role="button"><span aria-hidden="true">← Prev</span></a>
{{- else }}
<a aria-disabled="true" aria-label="Previous" role="button" tabindex="-1"><span aria-hidden="true">← Prev</span></a>
{{- end }}
<a role="button" href="../">Home</a>
{{- with .Next }}
<a href="{{ . }}" aria-label="Next" role="button"><span aria-hidden="true">Next →</span></a>
{{- else }}
<a aria-disabled="true" aria-label="Next" role="button" tabindex="-1"><span aria-hidden="true">Next →</span></a>
{{- end }}
</nav>
{{ if .Image }}
<figure>
<img src="./{{ .Image }}" {{ with .Title }}title="{{ . }}"{{ end }} {{ with .Alt }}alt="{{ . }}"{{ end }}>
</figure>
{{ end }}
<p>
This is an <a rel="noopener noreferrer" href="https://github.com/hacdias/xkcd-archiver">archival copy</a> of the works
from <a rel="noopener noreferrer" href="https://xkcd.com/">xkcd.com</a>.
</p>
</main>
</body>
</html>

24
assets/home.html Normal file
View File

@ -0,0 +1,24 @@
<!DOCTYPE html>
<html>
<head>
<title>XKCD Archive</title>
<meta charset=utf-8>
<meta name=viewport content="width=device-width,initial-scale=1">
<meta name="robots" content="noindex">
<link rel="stylesheet" href="./styles.css"/>
</head>
<body>
<main>
<h1>XKCD Archive</h1>
<ul>
{{ range .Comics }}
<li><a href="./{{ .Num }}/">{{ .Num }} - {{ .Title }}</a></li>
{{ end }}
</ul>
<p>
This is an <a rel="noopener noreferrer" href="https://github.com/hacdias/xkcd-archiver">archival copy</a> of the works
from <a rel="noopener noreferrer" href="https://xkcd.com/">xkcd.com</a>.
</p>
</main>
</body>
</html>

68
assets/styles.css Normal file
View File

@ -0,0 +1,68 @@
*,
*::before,
*::after {
box-sizing: border-box;
}
:root {
--accent: #96A8C8;
--accent-darker: #6E7B91;
}
a {
color: var(--accent);
text-decoration: none;
font-weight: 800;
}
body {
background-color: var(--accent);
text-align: center;
font-variant: small-caps;
font-family: Lucida, Helvetica, sans-serif;
margin: 0;
padding: 1rem;
}
main {
background: white;
border-style: solid;
border-width: 1.5px;
border-color: #333;
border-radius: 12px;
max-width: 50rem;
margin: 0 auto;
}
img {
max-width: 100%;
}
nav a {
background-color: var(--accent-darker);
color: #FFF;
border: 1.5px solid #333;
font-weight: bold;
padding: 1.5px 12px;
margin: 0 4px;
text-decoration: none;
border-radius: 3px;
box-shadow: 0 0 5px 0 gray;
display: inline-block;
}
nav a:hover {
background-color: #FFF;
color: var(--accent-darker);
box-shadow: none;
}
ul {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(20rem, 1fr));
text-align: left;
grid-column-gap: 1rem;
padding: 0;
list-style: none;
margin: 1rem;
}

View File

@ -1,134 +0,0 @@
#!/usr/bin/env node
import fs from 'fs-extra'
import yargs from 'yargs'
import * as url from 'url'
import { hideBin } from 'yargs/helpers'
import { basename, extname, join } from 'path'
import { getLatestId, getComic } from '../lib/xkcd.js'
import { homePage, comicPage } from '../lib/html.js'
import { pad, progress } from '../lib/helpers.js'
import { createRequire } from 'module'
const argv = yargs(hideBin(process.argv))
.usage('$0', 'Clones XKCD comics. By default it only downloads the missing comics.')
.scriptName('xkcd-clone')
.option('dir', {
alias: 'd',
describe: 'Output directory',
type: 'string',
demandOption: true
}).option('empty', {
alias: 'e',
describe: 'Redownload all comics',
type: 'boolean'
})
.help()
.argv
async function write ({ data, img }, dir, latest) {
const hasImage = img !== null
try {
await fs.outputJSON(join(dir, 'info.json'), data, { spaces: '\t' })
await fs.outputFile(join(dir, 'index.html'), comicPage(data, latest, hasImage))
if (hasImage) {
await fs.outputFile(join(dir, basename(data.img)), Buffer.from(img))
await fs.outputFile(join(dir, `image${extname(data.img)}`), Buffer.from(img))
}
} catch (err) {
await fs.remove(dir)
throw err
}
}
async function run () {
console.log(`😊 Going to clone XKCD to ${argv.dir}`)
let added = []
const errored = []
let latest = null
try {
console.log('🔍 Finding the latest comic')
latest = await getLatestId()
console.log(`😁 Found! We're on comic number ${latest}!`)
await fs.ensureDir(argv.dir)
if (argv.empty) {
await fs.emptyDir(argv.dir)
}
for (let i = 1; i <= latest; i++) {
const num = pad(i, 4)
const dir = join(argv.dir, num)
progress(`📦 Fetching ${i} out of ${latest}`)
if (await fs.pathExists(dir)) {
const data = await fs.readJSON(join(dir, 'info.json'))
added.push({ id: i, title: data.title, num })
await fs.outputFile(join(dir, 'index.html'), comicPage(data, latest))
continue
} else if (i === 404) {
continue
}
let comic = null
const info = {
id: i,
dir: dir,
num: num
}
try {
comic = await getComic(i)
info.title = comic.data.title
await write(comic, dir, latest)
added.push(info)
} catch (err) {
progress(`😢 Could not fetch ${i}, will try again later\n`)
errored.push(info)
}
}
} catch (err) {
console.log(`🐉 ${err.stack}`)
process.exit(1)
}
for (const info of errored) {
const { id, dir, num } = info
for (let i = 0; i < 3; i++) {
try {
const comic = await getComic(id)
await write(comic, dir, latest)
added.push(info)
break
} catch (err) {
if (i === 2) {
console.log(`😢 ${num} could not be fetched: ${err.toString()}`)
}
}
}
}
if (errored.length === 0) {
progress('📦 All comics fetched\n')
} else {
progress('📦 Some comics fetched\n')
}
const require = createRequire(import.meta.url)
added = added.sort((a, b) => a.num - b.num)
await fs.remove(join(argv.dir, 'latest'))
await fs.copy(join(argv.dir, pad(latest, 4)), join(argv.dir, 'latest'))
await fs.copyFile(join(require.resolve('tachyons'), '../tachyons.min.css'), join(argv.dir, 'tachyons.css'))
await fs.copyFile(join(require.resolve('tachyons-columns'), '../../css/tachyons-columns.min.css'), join(argv.dir, 'tachyons-columns.css'))
await fs.outputFile(join(argv.dir, 'index.html'), homePage(added))
}
run()

13
go.mod Normal file
View File

@ -0,0 +1,13 @@
module github.com/hacdias/xkcd-archiver
go 1.20
require (
github.com/karlseguin/typed v1.1.8
github.com/spf13/cobra v1.7.0
)
require (
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
)

12
go.sum Normal file
View File

@ -0,0 +1,12 @@
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/karlseguin/typed v1.1.8 h1:ND0eDpwiUFIrm/n1ehxUyh/XNGs9zkYrLxtGqENSalY=
github.com/karlseguin/typed v1.1.8/go.mod h1:pZlmYaWQ7MVpwfIOP88fASh3LopVxKeE+uNXW3hQ2D8=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I=
github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@ -1,24 +0,0 @@
export const progress = (str) => {
if (process.stdout.isTTY) {
process.stdout.clearLine()
process.stdout.cursorTo(0)
process.stdout.write(str)
} else {
console.log(str)
}
}
export const pad = (str, max) => {
str = str.toString()
return str.length < max ? pad('0' + str, max) : str
}
const tagsToReplace = {
'&': '&amp;',
'<': '&lt;',
'>': '&gt;'
}
export const escapeHtml = (html) => {
html.replace(/[&<>]/g, tag => tagsToReplace[tag] || tag)
}

View File

@ -1,73 +0,0 @@
import { basename } from 'path'
import { pad, escapeHtml } from './helpers.js'
const credits = '<p class="mv4 tc f6 small-caps">This work is licensed under a Creative Commons Attribution-NonCommercial 2.5 License.<br>Originally from <a target="_blank" class="blue hover-dark-blue no-underline" href="https://xkcd.com/">xkcd.com</a>.</p>'
const classes = {
body: 'bg-washed-blue navy sans-serif ml-auto mr-auto w-90',
title: 'tc mh0 mt4 mb3 f2 small-caps tracked',
btn: 'dib navy mh2 pa2 bg-light-blue hover-bg-lightest-blue br2 ba bw1 b--navy no-underline'
}
export const comicPage = ({ alt, title, transcript, num, img }, latest) => `<html>
<head>
<title>${num} - ${title}</title>
<meta charset=utf-8>
<meta name=viewport content="width=device-width,initial-scale=1">
<meta name="robots" content="noindex">
<link rel="stylesheet" href="../tachyons.css"/>
<link rel="stylesheet" href="../tachyons-columns.css"/>
</head>
<body class="${classes.body} mw7 tc">
<h1 class="${classes.title}">${title} <span class="light-blue">#${num}</span></h1>
<nav class="mv3">
<a class="${classes.btn}" href="${num !== 1 ? `../${pad(num - 1, 4)}/index.html` : '#'}"><span class="gray"></span> Prev</a>
<a class="${classes.btn}" href="../index.html">Home</a>
<a class="${classes.btn}" href="${num !== latest ? `../${pad(num + 1, 4)}/index.html` : '#'}">Next <span class="gray"></span></a>
</nav>
<img src="./${basename(img)}" title="${alt.replace('"', '&quot;')}">
<p class="dn">${escapeHtml(transcript)}</p>
${credits}
</body>
</html>`
export const homePage = (list) => `<html>
<head>
<title>XKCD</title>
<meta charset=utf-8>
<meta name=viewport content="width=device-width,initial-scale=1">
<meta name="robots" content="noindex">
<link rel="stylesheet" href="./tachyons.css"/>
<link rel="stylesheet" href="./tachyons-columns.css"/>
</head>
<body class="${classes.body} mw8">
<script>
function goToRandom(min, max) {
min = Math.ceil(min);
max = Math.floor(max);
return Math.floor(Math.random() * (max - min)) + min; //The maximum is exclusive and the minimum is inclusive
}
</script>
<h1 class="${classes.title}">XKCD</h1>
<nav class="mv3 tc">
<a class="${classes.btn}" id="random" href="">Random</a>
<a class="${classes.btn}" href="${pad(list[list.length - 1].num, 4)}/index.html">Latest</a>
</nav>
<ul class="list pa0 ma0 cc2-m cc3-l">
${list.map(({ id, title, num }) => `<li class="mv1">
<a class="blue hover-dark-blue no-underline" href="./${num}/index.html"><span class="b">${id}</span> - ${title}</a>
</li>`).join('\n')}
</ul>
<script>
var el = document.getElementById('random')
var comics = document.querySelector('.list').children
el.href = comics[Math.floor(Math.random() * (comics.length))].querySelector('a').href
</script>
${credits}
</body>
</html>`

View File

@ -1,38 +0,0 @@
import fetch from 'node-fetch'
import path from 'path'
export async function getLatestId () {
const raw = await fetch('https://xkcd.com/info.0.json')
const data = await raw.json()
return data.num
}
async function getImage (url) {
const ext = path.extname(url)
const url2x = `${path.dirname(url)}/${path.basename(url, ext)}_2x${ext}`
let res = await fetch(url2x)
if (!res.ok) {
res = await fetch(url)
}
if (!res.ok) {
throw new Error('bad image request')
}
return res.arrayBuffer()
}
export async function getComic (id) {
const raw = await fetch(`https://xkcd.com/${id}/info.0.json`)
const data = await raw.json()
let img = null
// Some comics, such as 1608 and 1663, are composed by interactive
// games and cannot be downloaded as images, so we just ignore them.
if (data.img !== 'https://imgs.xkcd.com/comics/') {
img = await getImage(data.img)
}
return { data, img }
}

203
main.go Normal file
View File

@ -0,0 +1,203 @@
package main
import (
"bytes"
"embed"
"fmt"
"html/template"
"io/fs"
"log"
"os"
"path/filepath"
"sort"
"github.com/spf13/cobra"
)
const (
dirPermissions fs.FileMode = 0744
filePermissions fs.FileMode = 0666
)
var (
//go:embed assets/*
assets embed.FS
)
func init() {
cmd.Flags().Bool("empty", false, "empty output directory")
cmd.Flags().Bool("skip-html", false, "do not generate HTML files")
cmd.Flags().UintP("from", "f", 1, "first comic to download")
cmd.Flags().UintP("to", "t", 0, "last comic to download")
}
func main() {
if err := cmd.Execute(); err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
}
var cmd = &cobra.Command{
Use: "xkcd-archiver output",
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
empty, _ := cmd.Flags().GetBool("empty")
skipHTML, _ := cmd.Flags().GetBool("skip-html")
from, _ := cmd.Flags().GetUint("from")
to, _ := cmd.Flags().GetUint("to")
out := args[0]
latest, err := getLatestID()
if err != nil {
return err
}
if from == 0 {
from = 1
}
if to == 0 {
to = uint(latest)
}
if empty {
err := os.RemoveAll(out)
if err != nil {
return err
}
}
err = os.MkdirAll(out, dirPermissions)
if err != nil {
return err
}
log.Printf("Downloading comics from %d to %d\n", from, to)
comicTemplate, err := getTemplate("comic")
if err != nil {
return err
}
homeTemplate, err := getTemplate("home")
if err != nil {
return err
}
homeData := &homeData{}
for id := from; id <= to; id++ {
if id == 404 {
// Comic 404 does not exist.
continue
}
comicDir := filepath.Join(out, fmt.Sprintf("%d", id))
metadata, err := getComic(comicDir, id)
if err != nil {
return err
}
data := &comicData{
Num: id,
Title: metadata.String("title"),
Alt: metadata.String("alt"),
Image: metadata.String("img"),
}
if id > 1 {
data.Prev = fmt.Sprintf("../%d/", id-1)
}
if id < uint(latest) {
data.Next = fmt.Sprintf("../%d/", id+1)
}
var b bytes.Buffer
err = comicTemplate.Execute(&b, data)
if err != nil {
return err
}
err = os.WriteFile(filepath.Join(comicDir, "index.html"), b.Bytes(), filePermissions)
if err != nil {
return err
}
homeData.Comics = append(homeData.Comics, data)
}
if !skipHTML {
sort.Slice(homeData.Comics, func(i, j int) bool {
return homeData.Comics[i].Num > homeData.Comics[j].Num
})
var b bytes.Buffer
err = homeTemplate.Execute(&b, homeData)
if err != nil {
return err
}
err = os.WriteFile(filepath.Join(out, "index.html"), b.Bytes(), filePermissions)
if err != nil {
return err
}
// added = added.sort((a, b) => a.num - b.num)
// await fs.outputFile(join(argv.dir, 'index.html'), homePage(added))
styles, err := assets.ReadFile("assets/styles.css")
if err != nil {
return err
}
err = os.WriteFile(filepath.Join(out, "styles.css"), styles, filePermissions)
if err != nil {
return err
}
}
// for (const info of errored) {
// const { id, dir, num } = info
// for (let i = 0; i < 3; i++) {
// try {
// const comic = await getComic(id)
// await write(comic, dir, latest)
// added.push(info)
// break
// } catch (err) {
// if (i === 2) {
// console.log(`😢 ${num} could not be fetched: ${err.toString()}`)
// }
// }
// }
// }
// if (errored.length === 0) {
// progress('📦 All comics fetched\n')
// } else {
// progress('📦 Some comics fetched\n')
// }
return nil
},
}
type comicData struct {
Num uint
Title string
Alt string
Prev string
Next string
Image string
}
type homeData struct {
Comics []*comicData
}
func getTemplate(name string) (*template.Template, error) {
comicBytes, err := assets.ReadFile("assets/" + name + ".html")
if err != nil {
return nil, err
}
return template.New("").Parse(string(comicBytes))
}

View File

@ -1,19 +0,0 @@
{
"name": "xkcd-clone",
"version": "2.0.1",
"description": "Clone xkcd comics.",
"main": "./lib/index.js",
"type": "module",
"bin": {
"xkcd-clone": "./bin/index.js"
},
"license": "MIT",
"author": "Henrique Dias <hacdias@gmail.com>",
"dependencies": {
"fs-extra": "^10.1.0",
"node-fetch": "^3.2.10",
"tachyons": "^4.12.0",
"tachyons-columns": "^1.0.5",
"yargs": "^17.5.1"
}
}

140
xkcd.go Normal file
View File

@ -0,0 +1,140 @@
package main
import (
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"os"
"path"
"path/filepath"
"strings"
"github.com/karlseguin/typed"
)
func get(url string) ([]byte, error) {
res, err := http.DefaultClient.Get(url)
if err != nil {
return nil, err
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code: %d", res.StatusCode)
}
data, err := io.ReadAll(res.Body)
if err != nil {
return nil, err
}
return data, nil
}
func getJSON(url string) (typed.Typed, error) {
data, err := get(url)
if err != nil {
return nil, err
}
m, err := typed.Json(data)
if err != nil {
return nil, err
}
return m, nil
}
func getComicMetadata(id uint) (typed.Typed, error) {
return getJSON(fmt.Sprintf("https://xkcd.com/%d/info.0.json", id))
}
func getLatestID() (uint, error) {
m, err := getJSON("https://xkcd.com/info.0.json")
if err != nil {
return 0, err
}
id, ok := m.IntIf("num")
if !ok {
return 0, errors.New("key 'num' does not exist")
}
return uint(id), nil
}
func getImage(url string) ([]byte, error) {
ext := path.Ext(url)
url = strings.TrimSuffix(url, ext)
data, err := get(url + "_2x" + ext)
if err != nil {
data, err = get(url + ext)
}
return data, err
}
// ensureComic fetches comic #id and stores it in out. Returns metadata.
func ensureComic(out string, id uint) (typed.Typed, error) {
err := os.MkdirAll(out, dirPermissions)
if err != nil {
return nil, err
}
metadata, err := getComicMetadata(id)
if err != nil {
return nil, err
}
// Some comics, such as 1608 and 1663, are composed by interactive
// games and cannot be downloaded as images, so we just ignore them.
if imgURL := metadata.StringOr("img", "https://imgs.xkcd.com/comics/"); imgURL != "https://imgs.xkcd.com/comics/" {
imgBytes, err := getImage(imgURL)
if err != nil {
return nil, err
}
imgName := path.Base(imgURL)
err = os.WriteFile(filepath.Join(out, imgName), imgBytes, filePermissions)
if err != nil {
return nil, err
}
metadata["img"] = "./" + imgName
} else {
metadata["img"] = ""
}
infoBytes, err := json.MarshalIndent(metadata, "", " ")
if err != nil {
return nil, err
}
err = os.WriteFile(filepath.Join(out, "info.json"), infoBytes, filePermissions)
if err != nil {
return nil, err
}
return metadata, nil
}
// getComic gets comic #id metadata from out, or fetches from Internet.
func getComic(out string, id uint) (typed.Typed, error) {
_, err := os.Stat(out)
if os.IsNotExist(err) {
return ensureComic(out, id)
} else if err == nil {
data, err := os.ReadFile(filepath.Join(out, "info.json"))
if err != nil {
return nil, err
}
return typed.Json(data)
}
return nil, err
}