racket/collects/meta/web/www/code.rkt
Eli Barzilay 9d8b0b3051 Add rel="nofollow" to short example links.
The examples make up the majority of the front page, and it turns out
that google used some of these links in "sitelinks" in search result.
These links should be the main entry points, and we ended up having such
gems as "Printf" be there.  They provide a way to "demote" URLS so they
won't show there, but doing that for one link means that another will
pop in (and there's a limit of a 100 such demotions).  So add this in an
attempt to make it not consider them as things that can be used in the
sitelinks.  This might be reverted if it doesn't help.

(Note that this is discouraged as a way to "sculpt pagerank", but the
purpose here is not to affect how they crawl the web pages and make page
reputation flow, but only to cover up for a missing feature that ends up
with horrible sitelinks.  The pages are scanned anyway from the doc
pages.)
2012-06-27 09:26:28 -04:00

117 lines
4.9 KiB
Racket

#lang meta/web
(require syntax-color/module-lexer setup/xref scribble/xref)
(provide code)
(define doc-root "http://docs.racket-lang.org/")
(define expand-namespace (make-base-namespace))
(define xref (load-collections-xref))
(define (code . strs)
(define str
(let* ([str (string-append* strs)]
[N (- 6 (length (regexp-match-positions* "\n" str)))])
(cond [(N . > . 0) (string-append str (make-string N #\newline))]
[(N . < . 0) (error 'code "too many lines in example: ~e" str)]
[else str])))
(define bstr (string->bytes/utf-8 (regexp-replace* #rx"(?m:^$)" str "\xA0")))
(define in (open-input-bytes bstr))
(define (substring* bstr start [end (bytes-length bstr)])
(bytes->string/utf-8 (subbytes bstr start end)))
(define e
(parameterize ([read-accept-reader #t] [current-namespace expand-namespace])
(expand (read-syntax 'prog (open-input-bytes bstr)))))
(define ids
(let loop ([e e])
(cond
[(and (identifier? e) (syntax-original? e))
(define pos (sub1 (syntax-position e)))
(define b (identifier-binding e))
(define imp?
(and (list? b)
(let-values ([(name base) (module-path-index-split (car b))])
(or name base))))
(define tag (and imp? (xref-binding->definition-tag xref e 0)))
(list (list (cond [(not imp?) 'id]
[(not tag) 'importid]
[else (define-values [p a]
(xref-tag->path+anchor
xref tag #:external-root-url doc-root))
(cons (if (eq? (car tag) 'form)
'linkimportform 'linkimportid)
(if a (format "~a#~a" p a) p))])
pos (+ pos (syntax-span e)) 1))]
[(syntax? e)
(append (loop (syntax-e e))
(loop (or (syntax-property e 'origin) null))
(loop (or (syntax-property e 'disappeared-use) null)))]
[(pair? e) (append (loop (car e)) (loop (cdr e)))]
[else null])))
(define (link-mod mp-stx priority #:orig? [always-orig? #f])
(if (or always-orig? (syntax-original? mp-stx))
(let ([mp (syntax->datum mp-stx)])
(define-values [p a]
(xref-tag->path+anchor xref `(mod-path ,(format "~s" mp))
#:external-root-url doc-root))
(if p
(list (let ([pos (sub1 (syntax-position mp-stx))])
(list (cons 'modpath (if a (format "~a#~a" p a) p))
pos (+ pos (syntax-span mp-stx)) priority)))
null))
null))
(define mods
(let loop ([e e])
(syntax-case e (module #%require begin)
[(module name lang (mod-beg form ...))
(append* (link-mod #'lang 2) (map loop (syntax->list #'(form ...))))]
[(#%require spec ...)
(append-map (λ (spec)
;; Need to add support for renaming forms, etc.:
(if (module-path? (syntax->datum spec))
(link-mod spec 2)
null))
(syntax->list #'(spec ...)))]
[(begin form ...) (append-map loop (syntax->list #'(form ...)))]
[else null])))
(define language
(let ([m (regexp-match #rx"^#lang ([-a-zA-Z/._+]+)" bstr)])
(if m
(link-mod #:orig? #t
(datum->syntax #f
(string->symbol (bytes->string/utf-8 (cadr m)))
(vector 'in 1 6 7 (bytes-length (cadr m))))
3)
null)))
(define raw-tokens
(let loop ([mode #f])
(define-values [lexeme type data start end backup-delta mode*]
(module-lexer in 0 mode))
(if (eof-object? lexeme)
null
(cons (list type (sub1 start) (sub1 end) 0) (loop mode*)))))
(define tokens
(sort (append ids mods language
(filter (λ (x) (not (eq? (car x) 'symbol)))
;; Drop #lang entry:
(cdr raw-tokens)))
(λ (a b) (or (< (cadr a) (cadr b))
(and (= (cadr a) (cadr b))
(> (cadddr a) (cadddr b)))))))
(let loop ([pos 0] [tokens tokens])
(cond
[(null? tokens) (list (substring* bstr pos))]
[(eq? (caar tokens) 'white-space) (loop pos (cdr tokens))]
[(= pos (cadar tokens))
(define style (caar tokens))
(define s (substring* bstr (cadar tokens) (caddar tokens)))
(cons (if (pair? style)
(a href: (cdr style) class: @list{code@(car style)}
rel: 'nofollow s)
(span class: @list{code@style} s))
(loop (caddar tokens) (cdr tokens)))]
[(> pos (cadar tokens)) (loop pos (cdr tokens))]
[else (cons (substring* bstr pos (cadar tokens))
(loop (cadar tokens) tokens))])))