From 74f00fcf0ac596c5e4ca91e7adee82232e3c7091 Mon Sep 17 00:00:00 2001 From: Christof Gerber Date: Thu, 3 Jul 2025 17:06:52 +0200 Subject: [PATCH 1/2] Add a docker-compose setup --- Dockerfile.alertmanager | 49 ++++++++++++ Dockerfile.webhook | 28 +++++++ docker-helper.sh | 172 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 249 insertions(+) create mode 100644 Dockerfile.alertmanager create mode 100644 Dockerfile.webhook create mode 100755 docker-helper.sh diff --git a/Dockerfile.alertmanager b/Dockerfile.alertmanager new file mode 100644 index 0000000000..37c2894ae9 --- /dev/null +++ b/Dockerfile.alertmanager @@ -0,0 +1,49 @@ +# Multi-stage Dockerfile for Alertmanager +FROM golang:1.23-alpine AS builder + +# Install build dependencies +RUN apk add --no-cache git make bash nodejs npm + +# Set working directory +WORKDIR /app + +# Copy go mod files +COPY go.mod go.sum ./ +RUN go mod download + +# Copy source code +COPY . . + +# Build the binaries (skip UI build for faster compilation) +# Force native architecture build +RUN CGO_ENABLED=0 GOOS=linux go build -o alertmanager ./cmd/alertmanager +RUN CGO_ENABLED=0 GOOS=linux go build -o amtool ./cmd/amtool + +# Final stage +FROM alpine:latest + +# Install ca-certificates for TLS with retry logic +RUN apk update && apk --no-cache add ca-certificates tzdata || \ + (sleep 5 && apk update && apk --no-cache add ca-certificates tzdata) + +# Create alertmanager user +RUN addgroup -S alertmanager && adduser -S alertmanager -G alertmanager + +# Copy binaries from builder stage +COPY --from=builder /app/alertmanager /bin/alertmanager +COPY --from=builder /app/amtool /bin/amtool + +# Copy default configuration +COPY examples/ha/alertmanager.yml /etc/alertmanager/alertmanager.yml + +# Create directories and set permissions +RUN mkdir -p /alertmanager && \ + chown -R alertmanager:alertmanager /etc/alertmanager /alertmanager + +USER alertmanager +EXPOSE 9093 +VOLUME ["/alertmanager"] +WORKDIR /alertmanager +ENTRYPOINT ["/bin/alertmanager"] +CMD ["--config.file=/etc/alertmanager/alertmanager.yml", \ + "--storage.path=/alertmanager"] diff --git a/Dockerfile.webhook b/Dockerfile.webhook new file mode 100644 index 0000000000..05832edc02 --- /dev/null +++ b/Dockerfile.webhook @@ -0,0 +1,28 @@ +# Dockerfile for webhook echo server +FROM golang:1.23-alpine AS builder + +# Set working directory +WORKDIR /app + +# Copy the echo.go file +COPY examples/webhook/echo.go . + +# Build the binary +RUN CGO_ENABLED=0 go build -o webhook-server echo.go + +# Final stage +FROM alpine:latest + +# Install ca-certificates for TLS with retry logic +RUN apk update && apk --no-cache add ca-certificates || \ + (sleep 5 && apk update && apk --no-cache add ca-certificates) + +# Create webhook user +RUN addgroup -S webhook && adduser -S webhook -G webhook + +# Copy binary from builder stage +COPY --from=builder /app/webhook-server /bin/webhook-server + +USER webhook +EXPOSE 5001 +ENTRYPOINT ["/bin/webhook-server"] diff --git a/docker-helper.sh b/docker-helper.sh new file mode 100755 index 0000000000..445c7fdf60 --- /dev/null +++ b/docker-helper.sh @@ -0,0 +1,172 @@ +#!/usr/bin/env bash + +# Helper script to manage Alertmanager Docker Compose setup + +set -e + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +log_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Function to start services +start_services() { + log_info "Starting Alertmanager cluster and webhook server..." + docker compose up -d + + log_info "Waiting for services to be ready..." + sleep 10 + + # Check if services are healthy + log_info "Checking service health..." + docker compose ps + + echo + log_info "Services are running!" + echo "Alertmanager instances:" + echo " - Instance 1: http://localhost:9093" + echo " - Instance 2: http://localhost:9094" + echo " - Instance 3: http://localhost:9095" + echo "Webhook server: http://localhost:5001" +} + +# Function to stop services +stop_services() { + log_info "Stopping Alertmanager cluster..." + docker compose down + log_info "Services stopped." +} + +# Function to restart services +restart_services() { + log_info "Restarting Alertmanager cluster..." + docker compose restart + log_info "Services restarted." +} + +# Function to show logs +show_logs() { + if [ -n "$1" ]; then + docker compose logs -f "$1" + else + docker compose logs -f + fi +} + +# Function to send test alerts +send_alerts() { + log_info "Sending test alerts..." + + # Check if send_alerts.sh exists + if [ ! -f "examples/ha/send_alerts.sh" ]; then + log_error "send_alerts.sh not found at examples/ha/send_alerts.sh" + exit 1 + fi + + # Make it executable + chmod +x examples/ha/send_alerts.sh + + # Run the script + ./examples/ha/send_alerts.sh + + log_info "Test alerts sent!" + log_info "Check webhook server logs: docker compose logs webhook-server" +} + +# Function to check service status +check_status() { + log_info "Checking service status..." + docker compose ps + + echo + log_info "Service endpoints:" + echo " - Alertmanager 1: http://localhost:9093" + echo " - Alertmanager 2: http://localhost:9094" + echo " - Alertmanager 3: http://localhost:9095" + echo " - Webhook server: http://localhost:5001" + + echo + log_info "Health checks:" + for port in 9093 9094 9095; do + if curl -s -o /dev/null -w "%{http_code}" "http://localhost:$port/-/healthy" | grep -q "200"; then + echo -e " - Alertmanager ($port): ${GREEN}✓ Healthy${NC}" + else + echo -e " - Alertmanager ($port): ${RED}✗ Unhealthy${NC}" + fi + done + + if curl -s -o /dev/null -w "%{http_code}" "http://localhost:5001/" | grep -q "200"; then + echo -e " - Webhook server: ${GREEN}✓ Healthy${NC}" + else + echo -e " - Webhook server: ${RED}✗ Unhealthy${NC}" + fi +} + +# Function to clean up everything +cleanup() { + log_info "Cleaning up containers, images, and volumes..." + docker compose down -v --rmi all + log_info "Cleanup complete." +} + +# Main function +main() { + case "${1:-}" in + start) + start_services + ;; + stop) + stop_services + ;; + restart) + restart_services + ;; + logs) + show_logs "$2" + ;; + send-alerts) + send_alerts + ;; + status) + check_status + ;; + cleanup) + cleanup + ;; + *) + echo "Usage: $0 {start|stop|restart|logs [service]|send-alerts|status|cleanup}" + echo + echo "Commands:" + echo " start - Start all services" + echo " stop - Stop all services" + echo " restart - Restart all services" + echo " logs - Show logs for all services" + echo " logs - Show logs for specific service" + echo " send-alerts - Send test alerts" + echo " status - Check service status" + echo " cleanup - Remove containers, images, and volumes" + echo + echo "Examples:" + echo " $0 start" + echo " $0 logs webhook-server" + echo " $0 send-alerts" + exit 1 + ;; + esac +} + +# Run main function +main "$@" From 0ca87d1f4674eae933192bf97922b69c78cb42de Mon Sep 17 00:00:00 2001 From: Christof Gerber Date: Thu, 3 Jul 2025 17:54:16 +0200 Subject: [PATCH 2/2] Add missing files --- alertmanager-1.yml | 16 ++++++++ alertmanager-2.yml | 16 ++++++++ alertmanager-3.yml | 16 ++++++++ docker-compose.yml | 99 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 147 insertions(+) create mode 100644 alertmanager-1.yml create mode 100644 alertmanager-2.yml create mode 100644 alertmanager-3.yml create mode 100644 docker-compose.yml diff --git a/alertmanager-1.yml b/alertmanager-1.yml new file mode 100644 index 0000000000..de2bbd60a9 --- /dev/null +++ b/alertmanager-1.yml @@ -0,0 +1,16 @@ +route: + group_by: ['alertname'] + group_wait: 30s + group_interval: 5m + repeat_interval: 1h + receiver: 'web.hook' +receivers: + - name: 'web.hook' + webhook_configs: + - url: 'http://webhook-server:5001/' +inhibit_rules: + - source_match: + severity: 'critical' + target_match: + severity: 'warning' + equal: ['alertname', 'dev', 'instance'] diff --git a/alertmanager-2.yml b/alertmanager-2.yml new file mode 100644 index 0000000000..de2bbd60a9 --- /dev/null +++ b/alertmanager-2.yml @@ -0,0 +1,16 @@ +route: + group_by: ['alertname'] + group_wait: 30s + group_interval: 5m + repeat_interval: 1h + receiver: 'web.hook' +receivers: + - name: 'web.hook' + webhook_configs: + - url: 'http://webhook-server:5001/' +inhibit_rules: + - source_match: + severity: 'critical' + target_match: + severity: 'warning' + equal: ['alertname', 'dev', 'instance'] diff --git a/alertmanager-3.yml b/alertmanager-3.yml new file mode 100644 index 0000000000..de2bbd60a9 --- /dev/null +++ b/alertmanager-3.yml @@ -0,0 +1,16 @@ +route: + group_by: ['alertname'] + group_wait: 30s + group_interval: 5m + repeat_interval: 1h + receiver: 'web.hook' +receivers: + - name: 'web.hook' + webhook_configs: + - url: 'http://webhook-server:5001/' +inhibit_rules: + - source_match: + severity: 'critical' + target_match: + severity: 'warning' + equal: ['alertname', 'dev', 'instance'] diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000000..da0f589866 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,99 @@ +services: + alertmanager-1: + build: + context: . + dockerfile: Dockerfile.alertmanager + platform: linux/amd64 + ports: + - "9093:9093" + volumes: + - ./alertmanager-1.yml:/etc/alertmanager/alertmanager.yml + - alertmanager-1-data:/alertmanager + command: + - '--config.file=/etc/alertmanager/alertmanager.yml' + - '--storage.path=/alertmanager' + - '--web.listen-address=0.0.0.0:9093' + - '--cluster.listen-address=0.0.0.0:9094' + - '--cluster.peer=alertmanager-2:9094' + - '--cluster.peer=alertmanager-3:9094' + networks: + - alertmanager + healthcheck: + test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:9093/-/healthy"] + interval: 30s + timeout: 10s + retries: 3 + + alertmanager-2: + build: + context: . + dockerfile: Dockerfile.alertmanager + platform: linux/amd64 + ports: + - "9094:9093" + volumes: + - ./alertmanager-2.yml:/etc/alertmanager/alertmanager.yml + - alertmanager-2-data:/alertmanager + command: + - '--config.file=/etc/alertmanager/alertmanager.yml' + - '--storage.path=/alertmanager' + - '--web.listen-address=0.0.0.0:9093' + - '--cluster.listen-address=0.0.0.0:9094' + - '--cluster.peer=alertmanager-1:9094' + - '--cluster.peer=alertmanager-3:9094' + networks: + - alertmanager + healthcheck: + test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:9093/-/healthy"] + interval: 30s + timeout: 10s + retries: 3 + + alertmanager-3: + build: + context: . + dockerfile: Dockerfile.alertmanager + platform: linux/amd64 + ports: + - "9095:9093" + volumes: + - ./alertmanager-3.yml:/etc/alertmanager/alertmanager.yml + - alertmanager-3-data:/alertmanager + command: + - '--config.file=/etc/alertmanager/alertmanager.yml' + - '--storage.path=/alertmanager' + - '--web.listen-address=0.0.0.0:9093' + - '--cluster.listen-address=0.0.0.0:9094' + - '--cluster.peer=alertmanager-1:9094' + - '--cluster.peer=alertmanager-2:9094' + networks: + - alertmanager + healthcheck: + test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:9093/-/healthy"] + interval: 30s + timeout: 10s + retries: 3 + + webhook-server: + build: + context: . + dockerfile: Dockerfile.webhook + platform: linux/amd64 + ports: + - "5001:5001" + networks: + - alertmanager + healthcheck: + test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:5001/"] + interval: 30s + timeout: 10s + retries: 3 + +volumes: + alertmanager-1-data: + alertmanager-2-data: + alertmanager-3-data: + +networks: + alertmanager: + driver: bridge