v0.3.0 - Complete deployment documentation and examples - Added comprehensive deployment guide, automated deployment scripts, nginx SSL proxy setup, backup automation, and monitoring tools. Includes VPS deployment, cloud platform guides, and practical examples for production deployment of event-based configuration system.
This commit is contained in:
460
examples/deployment/monitoring/monitor-relay.sh
Executable file
460
examples/deployment/monitoring/monitor-relay.sh
Executable file
@@ -0,0 +1,460 @@
|
||||
#!/bin/bash
|
||||
|
||||
# C Nostr Relay - Monitoring Script
|
||||
# Comprehensive monitoring for event-based configuration relay
|
||||
|
||||
set -e
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Configuration
|
||||
RELAY_DIR="/opt/c-relay"
|
||||
SERVICE_NAME="c-relay"
|
||||
RELAY_PORT="8888"
|
||||
LOG_FILE="/var/log/relay-monitor.log"
|
||||
ALERT_EMAIL=""
|
||||
WEBHOOK_URL=""
|
||||
CHECK_INTERVAL="60"
|
||||
MAX_MEMORY_MB="1024"
|
||||
MAX_DB_SIZE_MB="10240"
|
||||
MIN_DISK_SPACE_MB="1024"
|
||||
|
||||
# Counters for statistics
|
||||
TOTAL_CHECKS=0
|
||||
FAILED_CHECKS=0
|
||||
ALERTS_SENT=0
|
||||
|
||||
# Functions
|
||||
print_step() {
|
||||
echo -e "${BLUE}[INFO]${NC} $1"
|
||||
log_message "INFO" "$1"
|
||||
}
|
||||
|
||||
print_success() {
|
||||
echo -e "${GREEN}[OK]${NC} $1"
|
||||
log_message "OK" "$1"
|
||||
}
|
||||
|
||||
print_warning() {
|
||||
echo -e "${YELLOW}[WARN]${NC} $1"
|
||||
log_message "WARN" "$1"
|
||||
}
|
||||
|
||||
print_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
log_message "ERROR" "$1"
|
||||
}
|
||||
|
||||
log_message() {
|
||||
local level="$1"
|
||||
local message="$2"
|
||||
echo "$(date '+%Y-%m-%d %H:%M:%S') [$level] $message" >> "$LOG_FILE"
|
||||
}
|
||||
|
||||
show_help() {
|
||||
echo "Usage: $0 [OPTIONS]"
|
||||
echo
|
||||
echo "Options:"
|
||||
echo " -d, --relay-dir DIR Relay directory (default: /opt/c-relay)"
|
||||
echo " -p, --port PORT Relay port (default: 8888)"
|
||||
echo " -i, --interval SECONDS Check interval (default: 60)"
|
||||
echo " -e, --email EMAIL Alert email address"
|
||||
echo " -w, --webhook URL Webhook URL for alerts"
|
||||
echo " -m, --max-memory MB Max memory usage alert (default: 1024MB)"
|
||||
echo " -s, --max-db-size MB Max database size alert (default: 10240MB)"
|
||||
echo " -f, --min-free-space MB Min disk space alert (default: 1024MB)"
|
||||
echo " -c, --continuous Run continuously (daemon mode)"
|
||||
echo " -h, --help Show this help message"
|
||||
echo
|
||||
echo "Examples:"
|
||||
echo " $0 # Single check"
|
||||
echo " $0 -c -i 30 -e admin@example.com # Continuous monitoring"
|
||||
echo " $0 -w https://hooks.slack.com/... # Webhook notifications"
|
||||
}
|
||||
|
||||
parse_args() {
|
||||
CONTINUOUS="false"
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-d|--relay-dir)
|
||||
RELAY_DIR="$2"
|
||||
shift 2
|
||||
;;
|
||||
-p|--port)
|
||||
RELAY_PORT="$2"
|
||||
shift 2
|
||||
;;
|
||||
-i|--interval)
|
||||
CHECK_INTERVAL="$2"
|
||||
shift 2
|
||||
;;
|
||||
-e|--email)
|
||||
ALERT_EMAIL="$2"
|
||||
shift 2
|
||||
;;
|
||||
-w|--webhook)
|
||||
WEBHOOK_URL="$2"
|
||||
shift 2
|
||||
;;
|
||||
-m|--max-memory)
|
||||
MAX_MEMORY_MB="$2"
|
||||
shift 2
|
||||
;;
|
||||
-s|--max-db-size)
|
||||
MAX_DB_SIZE_MB="$2"
|
||||
shift 2
|
||||
;;
|
||||
-f|--min-free-space)
|
||||
MIN_DISK_SPACE_MB="$2"
|
||||
shift 2
|
||||
;;
|
||||
-c|--continuous)
|
||||
CONTINUOUS="true"
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
print_error "Unknown option: $1"
|
||||
show_help
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
check_process_running() {
|
||||
print_step "Checking if relay process is running..."
|
||||
|
||||
if pgrep -f "c_relay_x86" > /dev/null; then
|
||||
print_success "Relay process is running"
|
||||
return 0
|
||||
else
|
||||
print_error "Relay process is not running"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_port_listening() {
|
||||
print_step "Checking if port $RELAY_PORT is listening..."
|
||||
|
||||
if netstat -tln 2>/dev/null | grep -q ":$RELAY_PORT " || \
|
||||
ss -tln 2>/dev/null | grep -q ":$RELAY_PORT "; then
|
||||
print_success "Port $RELAY_PORT is listening"
|
||||
return 0
|
||||
else
|
||||
print_error "Port $RELAY_PORT is not listening"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_service_status() {
|
||||
print_step "Checking systemd service status..."
|
||||
|
||||
if systemctl is-active --quiet "$SERVICE_NAME"; then
|
||||
print_success "Service $SERVICE_NAME is active"
|
||||
return 0
|
||||
else
|
||||
local status=$(systemctl is-active "$SERVICE_NAME" 2>/dev/null || echo "unknown")
|
||||
print_error "Service $SERVICE_NAME status: $status"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_memory_usage() {
|
||||
print_step "Checking memory usage..."
|
||||
|
||||
local memory_kb=$(ps aux | grep "c_relay_x86" | grep -v grep | awk '{sum+=$6} END {print sum}')
|
||||
|
||||
if [[ -z "$memory_kb" ]]; then
|
||||
print_warning "Could not determine memory usage"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local memory_mb=$((memory_kb / 1024))
|
||||
|
||||
if [[ $memory_mb -gt $MAX_MEMORY_MB ]]; then
|
||||
print_error "High memory usage: ${memory_mb}MB (limit: ${MAX_MEMORY_MB}MB)"
|
||||
return 1
|
||||
else
|
||||
print_success "Memory usage: ${memory_mb}MB"
|
||||
return 0
|
||||
fi
|
||||
}
|
||||
|
||||
check_database_size() {
|
||||
print_step "Checking database size..."
|
||||
|
||||
local db_files=($(find "$RELAY_DIR" -name "*.nrdb" 2>/dev/null))
|
||||
|
||||
if [[ ${#db_files[@]} -eq 0 ]]; then
|
||||
print_warning "No database files found"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local total_size=0
|
||||
for db_file in "${db_files[@]}"; do
|
||||
if [[ -r "$db_file" ]]; then
|
||||
local size_kb=$(du -k "$db_file" | cut -f1)
|
||||
total_size=$((total_size + size_kb))
|
||||
fi
|
||||
done
|
||||
|
||||
local total_size_mb=$((total_size / 1024))
|
||||
|
||||
if [[ $total_size_mb -gt $MAX_DB_SIZE_MB ]]; then
|
||||
print_error "Large database size: ${total_size_mb}MB (limit: ${MAX_DB_SIZE_MB}MB)"
|
||||
return 1
|
||||
else
|
||||
print_success "Database size: ${total_size_mb}MB"
|
||||
return 0
|
||||
fi
|
||||
}
|
||||
|
||||
check_disk_space() {
|
||||
print_step "Checking disk space..."
|
||||
|
||||
local free_space_kb=$(df "$RELAY_DIR" | awk 'NR==2 {print $4}')
|
||||
local free_space_mb=$((free_space_kb / 1024))
|
||||
|
||||
if [[ $free_space_mb -lt $MIN_DISK_SPACE_MB ]]; then
|
||||
print_error "Low disk space: ${free_space_mb}MB (minimum: ${MIN_DISK_SPACE_MB}MB)"
|
||||
return 1
|
||||
else
|
||||
print_success "Free disk space: ${free_space_mb}MB"
|
||||
return 0
|
||||
fi
|
||||
}
|
||||
|
||||
check_database_integrity() {
|
||||
print_step "Checking database integrity..."
|
||||
|
||||
local db_files=($(find "$RELAY_DIR" -name "*.nrdb" 2>/dev/null))
|
||||
|
||||
if [[ ${#db_files[@]} -eq 0 ]]; then
|
||||
print_warning "No database files to check"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local integrity_ok=true
|
||||
for db_file in "${db_files[@]}"; do
|
||||
if [[ -r "$db_file" ]]; then
|
||||
if timeout 30 sqlite3 "$db_file" "PRAGMA integrity_check;" | grep -q "ok"; then
|
||||
print_success "Database integrity OK: $(basename "$db_file")"
|
||||
else
|
||||
print_error "Database integrity failed: $(basename "$db_file")"
|
||||
integrity_ok=false
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
if $integrity_ok; then
|
||||
return 0
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_websocket_connection() {
|
||||
print_step "Checking WebSocket connection..."
|
||||
|
||||
# Simple connection test using curl
|
||||
if timeout 10 curl -s -N -H "Connection: Upgrade" \
|
||||
-H "Upgrade: websocket" -H "Sec-WebSocket-Key: test" \
|
||||
-H "Sec-WebSocket-Version: 13" \
|
||||
"http://localhost:$RELAY_PORT/" >/dev/null 2>&1; then
|
||||
print_success "WebSocket connection test passed"
|
||||
return 0
|
||||
else
|
||||
print_warning "WebSocket connection test failed (may be normal)"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_configuration_events() {
|
||||
print_step "Checking configuration events..."
|
||||
|
||||
local db_files=($(find "$RELAY_DIR" -name "*.nrdb" 2>/dev/null))
|
||||
|
||||
if [[ ${#db_files[@]} -eq 0 ]]; then
|
||||
print_warning "No database files found"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local config_count=0
|
||||
for db_file in "${db_files[@]}"; do
|
||||
if [[ -r "$db_file" ]]; then
|
||||
local count=$(sqlite3 "$db_file" "SELECT COUNT(*) FROM events WHERE kind = 33334;" 2>/dev/null || echo "0")
|
||||
config_count=$((config_count + count))
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ $config_count -gt 0 ]]; then
|
||||
print_success "Configuration events found: $config_count"
|
||||
return 0
|
||||
else
|
||||
print_warning "No configuration events found"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
send_alert() {
|
||||
local subject="$1"
|
||||
local message="$2"
|
||||
local severity="$3"
|
||||
|
||||
ALERTS_SENT=$((ALERTS_SENT + 1))
|
||||
|
||||
# Email alert
|
||||
if [[ -n "$ALERT_EMAIL" ]] && command -v mail >/dev/null 2>&1; then
|
||||
echo -e "$message" | mail -s "$subject" "$ALERT_EMAIL"
|
||||
print_step "Alert sent to $ALERT_EMAIL"
|
||||
fi
|
||||
|
||||
# Webhook alert
|
||||
if [[ -n "$WEBHOOK_URL" ]] && command -v curl >/dev/null 2>&1; then
|
||||
local webhook_data="{\"text\":\"$subject\",\"attachments\":[{\"color\":\"$severity\",\"text\":\"$message\"}]}"
|
||||
curl -X POST -H 'Content-type: application/json' \
|
||||
--data "$webhook_data" "$WEBHOOK_URL" >/dev/null 2>&1
|
||||
print_step "Alert sent to webhook"
|
||||
fi
|
||||
}
|
||||
|
||||
restart_service() {
|
||||
print_step "Attempting to restart service..."
|
||||
|
||||
if systemctl restart "$SERVICE_NAME"; then
|
||||
print_success "Service restarted successfully"
|
||||
sleep 5 # Wait for service to stabilize
|
||||
return 0
|
||||
else
|
||||
print_error "Failed to restart service"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
run_checks() {
|
||||
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
||||
local failed_checks=0
|
||||
local total_checks=8
|
||||
|
||||
echo
|
||||
echo "🔍 Relay Health Check - $timestamp"
|
||||
echo "=================================="
|
||||
|
||||
# Core functionality checks
|
||||
check_process_running || ((failed_checks++))
|
||||
check_service_status || ((failed_checks++))
|
||||
check_port_listening || ((failed_checks++))
|
||||
|
||||
# Resource checks
|
||||
check_memory_usage || ((failed_checks++))
|
||||
check_disk_space || ((failed_checks++))
|
||||
check_database_size || ((failed_checks++))
|
||||
|
||||
# Database checks
|
||||
check_database_integrity || ((failed_checks++))
|
||||
check_configuration_events || ((failed_checks++))
|
||||
|
||||
# Optional checks
|
||||
check_websocket_connection # Don't count this as critical
|
||||
|
||||
TOTAL_CHECKS=$((TOTAL_CHECKS + total_checks))
|
||||
FAILED_CHECKS=$((FAILED_CHECKS + failed_checks))
|
||||
|
||||
# Summary
|
||||
echo
|
||||
if [[ $failed_checks -eq 0 ]]; then
|
||||
print_success "All checks passed ($total_checks/$total_checks)"
|
||||
return 0
|
||||
else
|
||||
print_error "Failed checks: $failed_checks/$total_checks"
|
||||
|
||||
# Send alert if configured
|
||||
if [[ -n "$ALERT_EMAIL" || -n "$WEBHOOK_URL" ]]; then
|
||||
local alert_subject="C Nostr Relay Health Alert"
|
||||
local alert_message="Relay health check failed.
|
||||
|
||||
Failed checks: $failed_checks/$total_checks
|
||||
Time: $timestamp
|
||||
Host: $(hostname)
|
||||
Service: $SERVICE_NAME
|
||||
Port: $RELAY_PORT
|
||||
|
||||
Please check the relay logs:
|
||||
sudo journalctl -u $SERVICE_NAME --since '10 minutes ago'
|
||||
"
|
||||
send_alert "$alert_subject" "$alert_message" "danger"
|
||||
fi
|
||||
|
||||
# Auto-restart if service is down
|
||||
if ! check_process_running >/dev/null 2>&1; then
|
||||
print_step "Process is down, attempting restart..."
|
||||
restart_service
|
||||
fi
|
||||
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
show_statistics() {
|
||||
if [[ $TOTAL_CHECKS -gt 0 ]]; then
|
||||
local success_rate=$(( (TOTAL_CHECKS - FAILED_CHECKS) * 100 / TOTAL_CHECKS ))
|
||||
echo
|
||||
echo "📊 Monitoring Statistics"
|
||||
echo "======================="
|
||||
echo "Total Checks: $TOTAL_CHECKS"
|
||||
echo "Failed Checks: $FAILED_CHECKS"
|
||||
echo "Success Rate: ${success_rate}%"
|
||||
echo "Alerts Sent: $ALERTS_SENT"
|
||||
fi
|
||||
}
|
||||
|
||||
cleanup() {
|
||||
echo
|
||||
print_step "Monitoring stopped"
|
||||
show_statistics
|
||||
exit 0
|
||||
}
|
||||
|
||||
# Main execution
|
||||
main() {
|
||||
echo
|
||||
echo "📡 C Nostr Relay - Health Monitor"
|
||||
echo "================================="
|
||||
echo
|
||||
|
||||
# Initialize log file
|
||||
mkdir -p "$(dirname "$LOG_FILE")"
|
||||
touch "$LOG_FILE"
|
||||
|
||||
parse_args "$@"
|
||||
|
||||
# Trap signals for cleanup
|
||||
trap cleanup SIGINT SIGTERM
|
||||
|
||||
if [[ "$CONTINUOUS" == "true" ]]; then
|
||||
print_step "Starting continuous monitoring (interval: ${CHECK_INTERVAL}s)"
|
||||
print_step "Press Ctrl+C to stop"
|
||||
|
||||
while true; do
|
||||
run_checks
|
||||
sleep "$CHECK_INTERVAL"
|
||||
done
|
||||
else
|
||||
run_checks
|
||||
fi
|
||||
|
||||
show_statistics
|
||||
}
|
||||
|
||||
# Run main function
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user