From aec409001844b4ab4c20bea2352c65d556d2f358 Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Fri, 1 Jun 2018 19:01:52 +0100 Subject: [PATCH] Make a nicer little monitoring script --- on-ping-service-failure.sh | 26 ++++++++++++++++++++++++++ ping-service.sh | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100755 on-ping-service-failure.sh diff --git a/on-ping-service-failure.sh b/on-ping-service-failure.sh new file mode 100755 index 0000000..ede5736 --- /dev/null +++ b/on-ping-service-failure.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# +# I am potentially invoked when ping-service.sh (which see) fails to +# contact the racket-pkg-website service. +# + +# Request a dump of running threads: +touch $HOME/racket-pkg-website/signals/.dumpinfo + +# Wait a few seconds for the dump to complete: +sleep 10 + +# Tar up the most recent few hours' worth of logs: +logarchive=$HOME/ping-failure-logs-$(date +%Y%m%d%H%M%S).tar.gz +( + cd $HOME/service/racket-pkg-website/log/main/; \ + ls -tr | tail -n 10 | xargs tar -zcf $logarchive \ +) + +# Restart the service using daemontools: +svc -du $HOME/service/racket-pkg-website + +# Finally, complain out loud. We expect to be running in some kind of +# cron-ish context, so the output we produce here will likely find its +# way into an email to a responsible party. +echo "racket-pkg-website on-ping-service-failure.sh invoked. logarchive=$logarchive" diff --git a/ping-service.sh b/ping-service.sh index 0325788..4dd38f5 100755 --- a/ping-service.sh +++ b/ping-service.sh @@ -6,7 +6,7 @@ # # For example, to monitor racket-pkg-website, try # -# ./ping-service.sh https://localhost:8444/ping 'touch .../signals/.dumpinfo; sleep 10; svc -du ...' +# ./ping-service.sh https://localhost:8444/ping $HOME/racket-pkg-website/on-ping-service-failure.sh if [ "$#" != "2" ] then