mirror of
https://github.com/mailcow/mailcow-dockerized.git
synced 2024-12-23 02:04:46 +02:00
unbound: fix healthcheck logging + added fail tolerance to checks (#6004)
* unbound: fix healthcheck logging to stdout + rewrote healthcheck logic * compose: bump unbound tag * unbound: fixed healthcheck logic
This commit is contained in:
parent
b1c1e403d2
commit
b26ccc2019
@ -5,11 +5,14 @@ LABEL maintainer = "The Infrastructure Company GmbH <info@servercow.de>"
|
|||||||
RUN apk add --update --no-cache \
|
RUN apk add --update --no-cache \
|
||||||
curl \
|
curl \
|
||||||
bind-tools \
|
bind-tools \
|
||||||
|
coreutils \
|
||||||
unbound \
|
unbound \
|
||||||
bash \
|
bash \
|
||||||
openssl \
|
openssl \
|
||||||
drill \
|
drill \
|
||||||
tzdata \
|
tzdata \
|
||||||
|
syslog-ng \
|
||||||
|
supervisor \
|
||||||
&& curl -o /etc/unbound/root.hints https://www.internic.net/domain/named.cache \
|
&& curl -o /etc/unbound/root.hints https://www.internic.net/domain/named.cache \
|
||||||
&& chown root:unbound /etc/unbound \
|
&& chown root:unbound /etc/unbound \
|
||||||
&& adduser unbound tty \
|
&& adduser unbound tty \
|
||||||
@ -21,9 +24,13 @@ COPY docker-entrypoint.sh /docker-entrypoint.sh
|
|||||||
|
|
||||||
# healthcheck (dig, ping)
|
# healthcheck (dig, ping)
|
||||||
COPY healthcheck.sh /healthcheck.sh
|
COPY healthcheck.sh /healthcheck.sh
|
||||||
|
COPY syslog-ng.conf /etc/syslog-ng/syslog-ng.conf
|
||||||
|
COPY supervisord.conf /etc/supervisor/supervisord.conf
|
||||||
|
COPY stop-supervisor.sh /usr/local/sbin/stop-supervisor.sh
|
||||||
|
|
||||||
RUN chmod +x /healthcheck.sh
|
RUN chmod +x /healthcheck.sh
|
||||||
HEALTHCHECK --interval=30s --timeout=30s CMD [ "/healthcheck.sh" ]
|
HEALTHCHECK --interval=30s --timeout=10s \
|
||||||
|
CMD sh -c '[ -f /tmp/healthcheck_status ] && [ "$(cat /tmp/healthcheck_status)" -eq 0 ] || exit 1'
|
||||||
|
|
||||||
ENTRYPOINT ["/docker-entrypoint.sh"]
|
ENTRYPOINT ["/docker-entrypoint.sh"]
|
||||||
|
CMD exec /usr/bin/supervisord -c /etc/supervisor/supervisord.conf
|
||||||
CMD ["/usr/sbin/unbound"]
|
|
||||||
|
@ -1,76 +1,102 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# Skip Unbound (DNS Resolver) Healthchecks (NOT Recommended!)
|
STATUS_FILE="/tmp/healthcheck_status"
|
||||||
if [[ "${SKIP_UNBOUND_HEALTHCHECK}" =~ ^([yY][eE][sS]|[yY])+$ ]]; then
|
RUNS=0
|
||||||
SKIP_UNBOUND_HEALTHCHECK=y
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Reset logfile
|
# Declare log function for logfile to stdout
|
||||||
echo "$(date +"%Y-%m-%d %H:%M:%S"): Starting health check - logs can be found in /var/log/healthcheck.log"
|
function log_to_stdout() {
|
||||||
echo "$(date +"%Y-%m-%d %H:%M:%S"): Starting health check" > /var/log/healthcheck.log
|
echo "$(date +"%Y-%m-%d %H:%M:%S"): $1"
|
||||||
|
|
||||||
# Declare log function for logfile inside container
|
|
||||||
function log_to_file() {
|
|
||||||
echo "$(date +"%Y-%m-%d %H:%M:%S"): $1" >> /var/log/healthcheck.log
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# General Ping function to check general pingability
|
# General Ping function to check general pingability
|
||||||
function check_ping() {
|
function check_ping() {
|
||||||
declare -a ipstoping=("1.1.1.1" "8.8.8.8" "9.9.9.9")
|
declare -a ipstoping=("1.1.1.1" "8.8.8.8" "9.9.9.9")
|
||||||
|
local fail_tolerance=1
|
||||||
|
local failures=0
|
||||||
|
|
||||||
for ip in "${ipstoping[@]}" ; do
|
for ip in "${ipstoping[@]}" ; do
|
||||||
ping -q -c 3 -w 5 "$ip"
|
success=false
|
||||||
if [ $? -ne 0 ]; then
|
for ((i=1; i<=3; i++)); do
|
||||||
log_to_file "Healthcheck: Couldn't ping $ip for 5 seconds... Gave up!"
|
ping -q -c 3 -w 5 "$ip" > /dev/null
|
||||||
log_to_file "Please check your internet connection or firewall rules to fix this error, because a simple ping test should always go through from the unbound container!"
|
if [ $? -eq 0 ]; then
|
||||||
return 1
|
success=true
|
||||||
|
break
|
||||||
|
else
|
||||||
|
log_to_stdout "Healthcheck: Failed to ping $ip on attempt $i. Trying again..."
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
log_to_file "Healthcheck: Ping Checks WORKING properly!"
|
if [ "$success" = false ]; then
|
||||||
return 0
|
log_to_stdout "Healthcheck: Couldn't ping $ip after 3 attempts. Marking this IP as failed."
|
||||||
|
((failures++))
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ $failures -gt $fail_tolerance ]; then
|
||||||
|
log_to_stdout "Healthcheck: Too many ping failures ($fail_tolerance failures allowed, you got $failures failures), marking Healthcheck as unhealthy..."
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# General DNS Resolve Check against Unbound Resolver himself
|
# General DNS Resolve Check against Unbound Resolver himself
|
||||||
function check_dns() {
|
function check_dns() {
|
||||||
declare -a domains=("mailcow.email" "github.com" "hub.docker.com")
|
declare -a domains=("fuzzy.mailcow.email" "github.com" "hub.docker.com")
|
||||||
|
local fail_tolerance=1
|
||||||
|
local failures=0
|
||||||
|
|
||||||
for domain in "${domains[@]}" ; do
|
for domain in "${domains[@]}" ; do
|
||||||
|
success=false
|
||||||
for ((i=1; i<=3; i++)); do
|
for ((i=1; i<=3; i++)); do
|
||||||
dig +short +timeout=2 +tries=1 "$domain" @127.0.0.1 > /dev/null
|
dig_output=$(dig +short +timeout=2 +tries=1 "$domain" @127.0.0.1 2>/dev/null)
|
||||||
if [ $? -ne 0 ]; then
|
dig_rc=$?
|
||||||
log_to_file "Healthcheck: DNS Resolution Failed on $i attempt! Trying again..."
|
|
||||||
if [ $i -eq 3 ]; then
|
if [ $dig_rc -ne 0 ] || [ -z "$dig_output" ]; then
|
||||||
log_to_file "Healthcheck: DNS Resolution not possible after $i attempts... Gave up!"
|
log_to_stdout "Healthcheck: DNS Resolution Failed on attempt $i for $domain! Trying again..."
|
||||||
log_to_file "Maybe check your outbound firewall, as it needs to resolve DNS over TCP AND UDP!"
|
else
|
||||||
|
success=true
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ "$success" = false ]; then
|
||||||
|
log_to_stdout "Healthcheck: DNS Resolution not possible after 3 attempts for $domain... Gave up!"
|
||||||
|
((failures++))
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ $failures -gt $fail_tolerance ]; then
|
||||||
|
log_to_stdout "Healthcheck: Too many DNS failures ($fail_tolerance failures allowed, you got $failures failures), marking Healthcheck as unhealthy..."
|
||||||
return 1
|
return 1
|
||||||
fi
|
fi
|
||||||
fi
|
|
||||||
done
|
|
||||||
done
|
|
||||||
|
|
||||||
log_to_file "Healthcheck: DNS Resolver WORKING properly!"
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
if [[ ${SKIP_UNBOUND_HEALTHCHECK} == "y" ]]; then
|
while true; do
|
||||||
log_to_file "Healthcheck: ALL CHECKS WERE SKIPPED! Unbound is healthy!"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# run checks, if check is not returning 0 (return value if check is ok), healthcheck will exit with 1 (marked in docker as unhealthy)
|
if [[ ${SKIP_UNBOUND_HEALTHCHECK} == "y" ]]; then
|
||||||
check_ping
|
log_to_stdout "Healthcheck: ALL CHECKS WERE SKIPPED! Unbound is healthy!"
|
||||||
|
echo "0" > $STATUS_FILE
|
||||||
|
sleep 365d
|
||||||
|
fi
|
||||||
|
|
||||||
if [ $? -ne 0 ]; then
|
# run checks, if check is not returning 0 (return value if check is ok), healthcheck will exit with 1 (marked in docker as unhealthy)
|
||||||
exit 1
|
check_ping
|
||||||
fi
|
PING_STATUS=$?
|
||||||
|
|
||||||
check_dns
|
check_dns
|
||||||
|
DNS_STATUS=$?
|
||||||
|
|
||||||
if [ $? -ne 0 ]; then
|
if [ $PING_STATUS -ne 0 ] || [ $DNS_STATUS -ne 0 ]; then
|
||||||
exit 1
|
echo "1" > $STATUS_FILE
|
||||||
fi
|
|
||||||
|
|
||||||
log_to_file "Healthcheck: ALL CHECKS WERE SUCCESSFUL! Unbound is healthy!"
|
else
|
||||||
exit 0
|
echo "0" > $STATUS_FILE
|
||||||
|
fi
|
||||||
|
|
||||||
|
sleep 30
|
||||||
|
|
||||||
|
done
|
10
data/Dockerfiles/unbound/stop-supervisor.sh
Executable file
10
data/Dockerfiles/unbound/stop-supervisor.sh
Executable file
@ -0,0 +1,10 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
printf "READY\n";
|
||||||
|
|
||||||
|
while read line; do
|
||||||
|
echo "Processing Event: $line" >&2;
|
||||||
|
kill -3 $(cat "/var/run/supervisord.pid")
|
||||||
|
done < /dev/stdin
|
||||||
|
|
||||||
|
rm -rf /tmp/healthcheck_status
|
32
data/Dockerfiles/unbound/supervisord.conf
Normal file
32
data/Dockerfiles/unbound/supervisord.conf
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
[supervisord]
|
||||||
|
nodaemon=true
|
||||||
|
user=root
|
||||||
|
pidfile=/var/run/supervisord.pid
|
||||||
|
|
||||||
|
[program:syslog-ng]
|
||||||
|
command=/usr/sbin/syslog-ng --foreground --no-caps
|
||||||
|
stdout_logfile=/dev/stdout
|
||||||
|
stdout_logfile_maxbytes=0
|
||||||
|
stderr_logfile=/dev/stderr
|
||||||
|
stderr_logfile_maxbytes=0
|
||||||
|
autostart=true
|
||||||
|
|
||||||
|
[program:unbound]
|
||||||
|
command=/usr/sbin/unbound
|
||||||
|
stdout_logfile=/dev/stdout
|
||||||
|
stdout_logfile_maxbytes=0
|
||||||
|
stderr_logfile=/dev/stderr
|
||||||
|
stderr_logfile_maxbytes=0
|
||||||
|
autorestart=true
|
||||||
|
|
||||||
|
[program:unbound-healthcheck]
|
||||||
|
command=/bin/bash /healthcheck.sh
|
||||||
|
stdout_logfile=/dev/stdout
|
||||||
|
stdout_logfile_maxbytes=0
|
||||||
|
stderr_logfile=/dev/stderr
|
||||||
|
stderr_logfile_maxbytes=0
|
||||||
|
autorestart=true
|
||||||
|
|
||||||
|
[eventlistener:processes]
|
||||||
|
command=/usr/local/sbin/stop-supervisor.sh
|
||||||
|
events=PROCESS_STATE_STOPPED, PROCESS_STATE_EXITED, PROCESS_STATE_FATAL
|
21
data/Dockerfiles/unbound/syslog-ng.conf
Normal file
21
data/Dockerfiles/unbound/syslog-ng.conf
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
@version: 4.5
|
||||||
|
@include "scl.conf"
|
||||||
|
options {
|
||||||
|
chain_hostnames(off);
|
||||||
|
flush_lines(0);
|
||||||
|
use_dns(no);
|
||||||
|
use_fqdn(no);
|
||||||
|
owner("root"); group("adm"); perm(0640);
|
||||||
|
stats(freq(0));
|
||||||
|
keep_timestamp(no);
|
||||||
|
bad_hostname("^gconfd$");
|
||||||
|
};
|
||||||
|
source s_dgram {
|
||||||
|
unix-dgram("/dev/log");
|
||||||
|
internal();
|
||||||
|
};
|
||||||
|
destination d_stdout { pipe("/dev/stdout"); };
|
||||||
|
log {
|
||||||
|
source(s_dgram);
|
||||||
|
destination(d_stdout);
|
||||||
|
};
|
@ -1,7 +1,7 @@
|
|||||||
services:
|
services:
|
||||||
|
|
||||||
unbound-mailcow:
|
unbound-mailcow:
|
||||||
image: mailcow/unbound:1.22
|
image: mailcow/unbound:1.23
|
||||||
environment:
|
environment:
|
||||||
- TZ=${TZ}
|
- TZ=${TZ}
|
||||||
- SKIP_UNBOUND_HEALTHCHECK=${SKIP_UNBOUND_HEALTHCHECK:-n}
|
- SKIP_UNBOUND_HEALTHCHECK=${SKIP_UNBOUND_HEALTHCHECK:-n}
|
||||||
|
Loading…
Reference in New Issue
Block a user