racket/collects/games/gobblet/explore.rkt
2010-04-27 16:50:15 -06:00

631 lines
23 KiB
Racket

;; This is the main search engine for auto-play.
;; See `make-search' for the main entry point.
(module explore mzscheme
(require mzlib/unitsig
mzlib/etc
mzlib/list
"sig.ss"
"test.ss")
(provide explore-unit)
;; Debugging:
(define-syntax (log-printf stx)
(syntax-case stx ()
[(_ n i arg ...)
(<= (syntax-e #'n) 0) ; adjust this number for print levels
#'(begin
(when (i . < . 100)
(printf arg ...))
(void))]
[(_ n i arg ...)
#'(void)]))
(define explore-unit
(unit/sig explore^
(import config^ model^)
(define-struct config (max-depth memory canonicalize rate-board canned-moves))
;; make-search : (canonicalize-proc -> (board sym num num -> num))
;; (canonicalize-proc hash-table -> (board sym compact xform -> plan))
;; -> search-proc
;; where canonicalize-proc = (board sym -> (cons compact xform))
;; and search-proc is below.
;; Returns a search procedure that embeds a canonicalization table and
;; a memory of canned moves from the `make-canned-moves' procedure.
;; The `make-canned-moves' proc can add to the given hash table, mapping
;; canonical compact boards to (listof (cons num plan)), where the
;; num for a plan is a rating for how good the plan is; +inf.0 means
;; forced win, and -inf.0 means forced loss. A plan is created
;; with `make-plan', described below.
(define (make-search make-rate-board make-canned-moves)
;; Long-term memory (i.e., spans searches)
(define init-memory (make-hash-table 'equal))
(define canonicalize (make-canonicalize))
(define rate-board (make-rate-board canonicalize))
(define canned-moves (make-canned-moves canonicalize init-memory))
(when learn?
(load-memory init-memory canonicalize))
;; search-proc : num num num sym board (listof board) -> play
;; Finds a move given search parameters, whose turn it is,
;; the current board, and a list of past boards (not
;; including the current one, used to avoid cycles in the game).
;; The result is a play, which can be applied to a board with
;; `apply-play'.
(lambda (timeout max-steps one-step-depth
me board history)
(let* ([result #f]
[once-sema (make-semaphore)]
[result-sema (make-semaphore)]
;; Short-term memory (i.e., discarded after this search)
[memory (make-hash-table 'equal)])
;; Record game-history boards as loop ties
(let loop ([history history][me (other me)])
(unless (null? history)
(let ([key+xform (canonicalize (car history) me)])
(hash-table-put! memory (car key+xform) LOOP-TIE))
(loop (cdr history) (other me))))
;; Copy canned and learned info into short-term memory:
(hash-table-for-each init-memory (lambda (k v) (hash-table-put! memory k v)))
;; Search in a background thread:
(let ([t (thread
(lambda ()
;; Try just one chunk of lookaheads, then loop
;; for more ambitious searches (if there's time)
(let loop ([steps (if (= timeout +inf.0)
max-steps
1)]
[max-depth (if (= timeout +inf.0)
one-step-depth
2)])
(set! result
;; ======== Here's where we get a move ============
(let ([v (multi-step-minmax
steps
3 ; span
(make-config
(min max-depth one-step-depth)
memory canonicalize rate-board canned-moves)
0 ; indent
init-memory
me board)])
(log-printf 1 0 "> ~a/~a Result: ~a~n"
steps (min max-depth one-step-depth)
(play->string v))
v))
;; We have at least one result, now.
(semaphore-post once-sema)
;; If we could learn more by searching deeper, then
;; do so.
(unless (or (and (= steps max-steps)
(one-step-depth . <= . max-depth))
((car result) . = . +inf.0)
((car result) . = . -inf.0))
(if (one-step-depth . <= . max-depth)
(loop (add1 steps) 2)
(loop steps (add1 max-depth)))))
(semaphore-post result-sema)))])
;; Sync with the background thread and return the result:
(sync/timeout timeout result-sema)
(semaphore-wait once-sema)
(kill-thread t)
(when (null? (cdr result))
(error 'search "didn't find a move!?"))
(cdr result)))))
;; `make-plan' takes a piece size, the source position (#f and #f for off
;; the board), the destination position, a xform inidcating how to
;; transform the positions into canonical positions, and a number
;; that estimates how many more steps until the end of game.
(define-struct plan (size from-i from-j to-i to-j xform turns))
;; apply-play : board play -> board
;; A play is (list piece from-i from-j to-i to-j turns)
;; where turns is an estimate of how many moves remain in
;; the game; the turns part is not used here (it can be left
;; out), but it is returned by a search-proc.
(define (apply-play board m)
(move board
(list-ref m 0)
(list-ref m 1)
(list-ref m 2)
(list-ref m 3)
(list-ref m 4)
(lambda (new-board)
new-board)
(lambda ()
(error 'apply-play "bad move: ~a" m))))
;; ------------------------------------------------------------
;; Checking and combining move plans
(define delay-loss? #t)
;; Discourage loops:
(define LOOP-TIE `((-1000.0 loop!)))
;; Translates a plan into the given coordinate xform
(define (xlate m xform)
(let-values ([(from-i from-j)
(if (plan-from-i m)
(unapply-xform xform (apply-xform (plan-xform m)
(plan-from-i m)
(plan-from-j m)))
(values #f #f))]
[(to-i to-j)
(unapply-xform xform (apply-xform (plan-xform m)
(plan-to-i m)
(plan-to-j m)))])
(make-plan (plan-size m) from-i from-j to-i to-j xform (plan-turns m))))
(define (found-win? v)
(and (pair? v)
(= (caar v) +inf.0)))
(define (immediate? v)
(and (pair? v) (zero? (get-depth (car v)))))
(define (found-lose? v)
(and (pair? v)
(= (caar v) -inf.0)))
(define (get-depth a)
(if (plan? (cdr a))
(plan-turns (cdr a))
0))
;; Keeps the best move --- up to `span' of them --- in `a' and `b'.
;; The two lists are sorted, and the result should keep them sorted.
;; For -inf.0 ratings, prefer the move farthest from the end of the
;; game, otherwise prefer the move closest.
(define (best span a b)
(cond
;; First, cases where span, a, or b goes to zero/null:
[(zero? span) null]
[(null? a)
(if (null? b)
null
(cons (car b) (best (sub1 span) null (cdr b))))]
[(null? b)
(cons (car a) (best (sub1 span) null (cdr a)))]
;; Pick best from first of a and first of b:
;; - Case 1: a is rated better
[(> (caar a) (caar b))
(cons (car a) (best (sub1 span) (cdr a) b))]
;; - Case 2: b is rated better
[(< (caar a) (caar b))
(cons (car b) (best (sub1 span) a (cdr b)))]
;; - Case 3: same ratings, so pick based on distance to end-of-game
;; - Subcase 1: we're picking between losses, and we want to delay the loss
[(and delay-loss?
(= (caar a) -inf.0))
(if (> (get-depth (car a)) (get-depth (car b)))
(cons (car a) (best (sub1 span) (cdr a) b))
(cons (car b) (best (sub1 span) a (cdr b))))]
;; - Subcase 2: a reaches the end first
[(< (get-depth (car a)) (get-depth (car b)))
(cons (car a) (best (sub1 span) (cdr a) b))]
;; - Subcase 3: b reaches the end first (or no later than a)
[else (cons (car b) (best (sub1 span) a (cdr b)))]))
;; --- TESTS ---
#;
(let* ([plan1 (make-plan 0 0 0 0 0 0 1)]
[plan2 (make-plan 0 0 0 0 0 0 2)]
[plan1s (list (cons 2 plan1) (cons 1 plan1))])
;; Check empty/zero combinations:
(test null (best 20 null null))
(test plan1s (best 2 plan1s null))
(test plan1s (best 2 null plan1s))
(test plan1s (best 20 null plan1s))
(test null (best 0 plan1s plan1s))
;; Check rating choice
(test (list (cons 2 plan1)) (best 1 (list (cons 2 plan1)) (list (cons 1 plan1))))
(test (list (cons 1 plan1)) (best 1 (list (cons 1 plan1)) (list (cons -inf.0 plan2))))
(test (list (cons 1 plan2)) (best 1 (list (cons -inf.0 plan1)) (list (cons 1 plan2))))
(test (list (cons 10 plan2) (cons 2 plan1)) (best 2
(list (cons 2 plan1) (cons 1 plan2))
(list (cons 10 plan2) (cons 1 plan1))))
;; Check time-til-end choice:
(test (list (cons 1 plan1)) (best 1 (list (cons 1 plan1)) (list (cons 1 plan2))))
(test (list (cons -inf.0 plan2)) (best 1 (list (cons -inf.0 plan1))
(list (cons -inf.0 plan2)))))
;; ------------------------------------------------------------
;; Multi-step minmax (non-exhaustive):
(define hit-count 0)
(define depth-count 0)
(define explore-count 0)
(define enter-count 0)
(define move-count 0)
;; ...state and search params... -> (values (listof (cons num plan)) xform)
;; Minimax search up to the given max-depth, returning up to span
;; choices of move.
(define (minmax depth span config me board last-to-i last-to-j)
(set! hit-count (add1 hit-count))
(let* ([board-key+xform ((config-canonicalize config) board me)]
[board-key (car board-key+xform)]
[xform (cdr board-key+xform)]
[key (vector board-key (- (config-max-depth config) depth) span)])
(let ([choices
(cond
;; Check for known win/loss at arbitrary depth:
[(hash-table-get (config-memory config) board-key (lambda () #f))
=> (lambda (x) x)]
;; Check for known result at specific remaining depth:
[(hash-table-get (config-memory config) key (lambda () #f))
=> (lambda (x) x)]
;; Check for immediate loss (only rating matters; plan is never used)
[(winner? board (other me))
(hash-table-put! (config-memory config) board-key '((-inf.0)))
'((-inf.0))]
;; Check for immediate loss (only rating matters)
[(winner? board me)
(hash-table-put! (config-memory config) board-key '((+inf.0)))
'((+inf.0))]
;; Check for depth
[(depth . >= . (config-max-depth config))
(set! depth-count (add1 depth-count))
(let ([l (list
(list ((config-rate-board config) board me last-to-i last-to-j)))])
(hash-table-put! (config-memory config) key l)
l)]
;; Otherwise, we explore this state...
[else
(set! depth-count (add1 depth-count))
(set! explore-count (add1 explore-count))
;; In case we get back here while we're looking, claim an unknown tie:
(hash-table-put! (config-memory config) board-key LOOP-TIE)
(let* ([choices
(map (lambda (g)
;; Make sure each canned move is in our coordinate system:
(cons (car g) (xlate (cdr g) xform)))
((config-canned-moves config) board me board-key xform))]
[choices
(if (found-win? choices)
choices
(try-all-enters choices depth span config
me board xform))]
[choices
(if (found-win? choices)
choices
(try-all-moves choices depth span config
me board xform))]
[choices (if (null? choices)
;; No moves! We lose
'((-inf.0))
;; We have at least one move
choices)])
(hash-table-remove! (config-memory config) board-key)
(let ([key (if (and ((caar choices) . < . +inf.0)
((caar choices) . > . -inf.0))
;; Result is only valid to current depth limit:
key
;; Win or loss: result is valid to any depth:
board-key)])
(hash-table-put! (config-memory config) key choices)
choices))])])
(values choices xform))))
;; try-all-enters : ... -> (listof (cons num plan))
;; Try moving each available off-board piece onto each board position
(define (try-all-enters choices depth span config me board xform)
(let loop ([enters (pick-enters board me)]
[choices choices])
(if (null? enters)
choices
;; For this piece....
(let ([p (list-ref (if (eq? me 'red) red-pieces yellow-pieces)
(car enters))])
(loop (cdr enters)
;; ... try every target position:
(fold-board/choices
span choices
(lambda (i j)
(try-move depth config
board me
p #f #f i j xform))))))))
;; try-all-moves : ... -> (listof (cons num plan))
;; Try moving each on-board piece onto each other board position
(define (try-all-moves choices depth span config me board xform)
;; From each source...
(fold-board/choices
span choices
(lambda (from-i from-j)
;; ... if it has my piece...
(let ([l (board-ref board from-i from-j)])
(if (and (pair? l)
(eq? me (piece-color (car l))))
;; ... try every target position:
(fold-board/choices
span choices
(lambda (to-i to-j)
(try-move depth config
board me
(car l) from-i from-j to-i to-j xform)))
;; Can't move from here:
null)))))
;; Try the move, and if it's ok, call `minmax' with the other
;; player and invert the result
(define (try-move depth config
board me
p from-i from-j to-i to-j xform)
(move board p from-i from-j to-i to-j
(lambda (new-board)
;; Move is ok; rate it
(set! move-count (add1 move-count))
;; Min-max recur for other player:
(let-values ([(his-choices sxform)
(minmax (add1 depth) 1 config
(other me) new-board
to-i to-j)])
#;
(when (zero? depth)
(show-recur (piece-size p) from-i from-j to-i to-j his-choices))
;; Construct a plan for this choice, and rate it
;; opposite of the minmax result
(list (cons (- (caar his-choices))
(make-plan (piece-size p) from-i from-j to-i to-j
xform
(add1 (get-depth (car his-choices))))))))
(lambda ()
;; Move isn't ok
null)))
;; pick-enters: board -> (listof num)
(define (pick-enters board me)
(let loop ([avail-pieces (available-off-board board me)]
[played-sizes null])
(cond
[(null? avail-pieces) null]
[(memq (caar avail-pieces) played-sizes)
(loop (cdr avail-pieces)
played-sizes)]
[else
(cons
;; piece to move:
(caar avail-pieces)
;; Try pieces from other stacks:
(loop (cdr avail-pieces)
(cons (caar avail-pieces) played-sizes)))])))
;; Like `fold-board', but auto combines choices and
;; handles shortcut for known immediate wins
(define (fold-board/choices span choices f)
(fold-board
(lambda (i j choices)
(if (and (found-win? choices)
(immediate? choices))
choices
(best span
choices
(f i j))))
choices))
;; --- TESTS ---
#;
(let* ([plan (make-plan 0 0 0 0 0 0 1)])
;; fold-board/choices
(test (if (= BOARD-SIZE 3)
(list (cons 4 plan) (cons 3 plan))
(list (cons 6 plan) (cons 5 plan)))
(fold-board/choices 2 null (lambda (i j)
(list (cons (+ i j) plan)))))
;; pick-enters
(let* ([one-red (move empty-board (list-ref red-pieces (sub1 BOARD-SIZE))
#f #f 0 0 values void)]
[two-red (move one-red (list-ref red-pieces (- BOARD-SIZE 2))
#f #f 1 1 values void)]
[three-red (move two-red (list-ref red-pieces (sub1 BOARD-SIZE))
#f #f 2 2 values void)]
[place-all (lambda (l)
(cdr
(fold-board (lambda (i j l+b)
(if (null? (car l+b))
l+b
(cons (cdr (car l+b))
(move (cdr l+b) (caar l+b)
#f #f i j values void))))
(cons l empty-board))))])
(test (if (= BOARD-SIZE 3) '(2 1 0) '(3))
(pick-enters empty-board 'red))
(test (if (= BOARD-SIZE 3) '(2 1 0) '(3 2))
(pick-enters one-red 'red))
(test (if (= BOARD-SIZE 3) '(2 1 0) '(3 1))
(pick-enters two-red 'red))
(test (if (= BOARD-SIZE 3) '(1 0) '(3 2 1))
(pick-enters three-red 'red))
(let ([all-red-pieces (apply append
(vector->list (make-vector (sub1 BOARD-SIZE) red-pieces)))])
(test null (pick-enters (place-all all-red-pieces) 'red))
(test '(2) (pick-enters (place-all (remq (list-ref red-pieces 2)
all-red-pieces))
'red))
(test (if (= BOARD-SIZE 3) '(1 0) '(1))
(pick-enters (place-all (remq (list-ref red-pieces 0)
(remq (list-ref red-pieces 1)
all-red-pieces)))
'red)))))
;; ------------------------------------------------------------
;; Multi-step minmax (non-exhaustive):
;; Apply minmax, and if steps > 1, rate resulting moves by applying
;; minmax to them. Meanwhile, in learning mode, record any resulting
;; move that is known to lead to winning or losing.
(define (multi-step-minmax steps span config indent init-memory me board)
(define first-move?
((fold-board (lambda (i j v) (+ v (length (board-ref board i j)))) 0) . < . 2))
(define now (current-inexact-milliseconds))
(set! hit-count 0)
(set! depth-count 0)
(set! explore-count 0)
(set! enter-count 0)
(set! move-count 0)
(log-printf 1 indent "~a> ~a Exploring for ~a~n" (make-string indent #\space) steps me)
(let-values ([(vs xform)
(minmax 0
(if (or (steps . <= . 1) first-move?)
1
span)
config
me
board #f #f)])
(log-printf 2 indent "~a>> Done ~a ~a ~a ~a+~a [~a secs]~n"
(make-string indent #\space)
hit-count depth-count explore-count enter-count move-count
(float->string (/ (- (current-inexact-milliseconds) now) 1000)))
(let ([plays
(map (lambda (v)
;; Transform each result, and turn it into a list
(cons (car v)
(let ([m (xlate (cdr v) xform)])
(list (list-ref (if (eq? me 'red)
red-pieces
yellow-pieces)
(plan-size m))
(plan-from-i m)
(plan-from-j m)
(plan-to-i m)
(plan-to-j m)
(get-depth v)))))
(filter (lambda (v) (plan? (cdr v))) vs))])
(log-printf 3 indent "~a>> Best Plays: ~a\n"
(make-string indent #\space) (plays->string
(make-string (+ 15 indent) #\space)
plays))
;; Record what we've learned...
(when (and learn?
(= steps 1))
(record-result plays board me config init-memory))
(if (or (steps . <= . 1) first-move?)
(car plays)
(let ([nexts
;; See what the other player thinks about our candidate moves,
;; and pick the one that looks worst to the other player.
(if ((caar plays) . < . +inf.0)
(sort
(map
(lambda (play)
(log-printf 4 indent " ~a>>> Checking: ~a\n"
(make-string indent #\space) (play->string play))
(if (= -inf.0 (car play))
(begin
(log-printf 4 indent " ~a>>>> losing\n"
(make-string indent #\space))
play)
(let ([r (cons (- (car (multi-step-minmax
(sub1 steps) span config
(+ 3 indent) init-memory
(other me)
(apply-play board (cdr play)))))
(cdr play))])
(log-printf 4 indent " ~a>>>> deeper = ~a\n"
(make-string indent #\space)
(float->string (car r)))
r)))
plays)
(lambda (a b) (> (car a) (car b))))
(list (car plays)))])
(car nexts))))))
;; ------------------------------------------------------------
;; Multi-run memory:
(define learn? #f)
(define MEMORY-FILE (and learn?
(build-path (find-system-path 'addon-dir)
(format "gobblet-memory-~a.ss" BOARD-SIZE))))
(define (record-result plays board me config init-memory)
(when (or (found-win? plays)
(found-lose? plays))
(let ([board-key+xform ((config-canonicalize config) board me)])
(hash-table-get init-memory
(car board-key+xform)
(lambda ()
;; This is new...
(with-output-to-file MEMORY-FILE
(lambda ()
(let ([m (cdar plays)])
(printf "(~a ~a ~a)~n#|~n~a|#~n"
(if (found-win? plays) 'win 'lose)
(car board-key+xform)
(list
(piece-color (list-ref m 0))
(piece-size (list-ref m 0))
(list-ref m 1) (list-ref m 2)
(list-ref m 3) (list-ref m 4)
(list-ref m 5))
(board->string 0 board))))
'append))))))
;; to load what we've learned from previous runs
(define (load-memory init-memory canonicalize)
(with-handlers ([exn:fail:filesystem? void])
(with-input-from-file MEMORY-FILE
(lambda ()
(let loop ()
(let ([v (read)])
(unless (eof-object? v)
(let ([board-key+xform (canonicalize (cadr v) #f)])
(hash-table-put! init-memory
(car board-key+xform)
(list
(cons (if (eq? 'win (car v)) +inf.0 -inf.0)
(let ([n (caddr v)])
(make-plan
(cadr n)
(list-ref n 2) (list-ref n 3)
(list-ref n 4) (list-ref n 5)
(cdr board-key+xform)
(list-ref n 6)))))))
(loop))))))))
;; ------------------------------------------------------------
;; Debugging helpers
(define (float->string v)
(let ([s (string-append (number->string v) "000000")])
(substring s 0 (min 6 (string-length s)))))
(define (play->string p)
(format "~a (~a,~a)->(~a,~a) [~a/~a]"
(piece-size (list-ref p 1))
(list-ref p 2) (list-ref p 3) (list-ref p 4) (list-ref p 5)
(float->string (car p))
(list-ref p 6)))
(define (plays->string is p)
(if (null? p)
"()"
(let ([s (plays->string is (cdr p))])
(if (null? (cdr p))
(play->string (car p))
(string-append (play->string (car p))
"\n"
is
s)))))
(define (show-recur sz from-i from-j to-i to-j sv)
(if (not (plan? (cdar sv)))
(printf " Recur ~a (~a,~a)->(~a,~a) ; ??? = ~a/~a~n"
sz from-i from-j to-i to-j
(caar sv) (get-depth (car sv)))
(printf " Recur ~a (~a,~a)->(~a,~a) ; (~a,~a)->(~a,~a) = ~a/~a~n"
sz from-i from-j to-i to-j
(plan-from-i (cdar sv)) (plan-from-j (cdar sv))
(plan-to-i (cdar sv)) (plan-to-j (cdar sv))
(caar sv) (get-depth (car sv))))))))