#!/usr/bin/env bash # # ThrillWiki Deployment Automation Service Script # Comprehensive automated deployment management with preset integration # # Features: # - Cross-shell compatible (bash/zsh) # - Deployment preset integration # - Health monitoring and recovery # - Smart deployment coordination # - Systemd service integration # - GitHub authentication management # - Server lifecycle management # set -e # [AWS-SECRET-REMOVED]==================================== # SCRIPT CONFIGURATION # [AWS-SECRET-REMOVED]==================================== # Cross-shell compatible script directory detection if [ -n "${BASH_SOURCE:-}" ]; then SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" SCRIPT_NAME="$(basename "${BASH_SOURCE[0]}")" elif [ -n "${ZSH_NAME:-}" ]; then SCRIPT_DIR="$(cd "$(dirname "${(%):-%x}")" && pwd)" SCRIPT_NAME="$(basename "${(%):-%x}")" else SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" SCRIPT_NAME="$(basename "$0")" fi PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)" # Default configuration (can be overridden by environment) DEPLOYMENT_PRESET="${DEPLOYMENT_PRESET:-dev}" PULL_INTERVAL="${PULL_INTERVAL:-300}" HEALTH_CHECK_INTERVAL="${HEALTH_CHECK_INTERVAL:-60}" DEBUG_MODE="${DEBUG_MODE:-false}" LOG_LEVEL="${LOG_LEVEL:-INFO}" MAX_RESTART_ATTEMPTS="${MAX_RESTART_ATTEMPTS:-3}" RESTART_COOLDOWN="${RESTART_COOLDOWN:-300}" # Logging configuration LOG_DIR="${LOG_DIR:-$PROJECT_DIR/logs}" LOG_FILE="${LOG_FILE:-$LOG_DIR/deployment-automation.log}" LOCK_FILE="${LOCK_FILE:-/tmp/thrillwiki-deployment.lock}" # [AWS-SECRET-REMOVED]==================================== # COLOR DEFINITIONS # [AWS-SECRET-REMOVED]==================================== RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' PURPLE='\033[0;35m' CYAN='\033[0;36m' BOLD='\033[1m' NC='\033[0m' # No Color # [AWS-SECRET-REMOVED]==================================== # LOGGING FUNCTIONS # [AWS-SECRET-REMOVED]==================================== deploy_log() { local level="$1" local color="$2" local message="$3" local timestamp="$(date '+%Y-%m-%d %H:%M:%S')" # Ensure log directory exists mkdir -p "$(dirname "$LOG_FILE")" # Log to file (without colors) echo "[$timestamp] [$level] [DEPLOY-AUTO] $message" >> "$LOG_FILE" # Log to console (with colors) if not running as systemd service if [ -t 1 ] && [ "${SYSTEMD_EXEC_PID:-}" = "" ]; then echo -e "${color}[$timestamp] [DEPLOY-AUTO-$level]${NC} $message" fi # Log to systemd journal if running as service if [ "${SYSTEMD_EXEC_PID:-}" != "" ]; then echo "$message" fi } deploy_info() { deploy_log "INFO" "$BLUE" "$1" } deploy_success() { deploy_log "SUCCESS" "$GREEN" "✅ $1" } deploy_warning() { deploy_log "WARNING" "$YELLOW" "⚠️ $1" } deploy_error() { deploy_log "ERROR" "$RED" "❌ $1" } deploy_debug() { if [ "${DEBUG_MODE:-false}" = "true" ] || [ "${LOG_LEVEL:-INFO}" = "DEBUG" ]; then deploy_log "DEBUG" "$PURPLE" "🔍 $1" fi } deploy_progress() { deploy_log "PROGRESS" "$CYAN" "🚀 $1" } # [AWS-SECRET-REMOVED]==================================== # UTILITY FUNCTIONS # [AWS-SECRET-REMOVED]==================================== # Cross-shell compatible command existence check command_exists() { command -v "$1" >/dev/null 2>&1 } # Lock file management acquire_lock() { if [ -f "$LOCK_FILE" ]; then local lock_pid lock_pid=$(cat "$LOCK_FILE" 2>/dev/null || echo "") if [ -n "$lock_pid" ] && kill -0 "$lock_pid" 2>/dev/null; then deploy_warning "Another deployment automation instance is already running (PID: $lock_pid)" return 1 else deploy_info "Removing stale lock file" rm -f "$LOCK_FILE" fi fi echo $$ > "$LOCK_FILE" deploy_debug "Lock acquired (PID: $$)" return 0 } release_lock() { if [ -f "$LOCK_FILE" ]; then rm -f "$LOCK_FILE" deploy_debug "Lock released" fi } # Trap for cleanup cleanup_and_exit() { deploy_info "Deployment automation service stopping" release_lock exit 0 } # [AWS-SECRET-REMOVED]==================================== # PRESET CONFIGURATION FUNCTIONS # [AWS-SECRET-REMOVED]==================================== # Apply deployment preset configuration apply_preset_configuration() { local preset="${DEPLOYMENT_PRESET:-dev}" deploy_info "Applying deployment preset: $preset" case "$preset" in "dev") PULL_INTERVAL="${PULL_INTERVAL:-60}" HEALTH_CHECK_INTERVAL="${HEALTH_CHECK_INTERVAL:-30}" DEBUG_MODE="${DEBUG_MODE:-true}" LOG_LEVEL="${LOG_LEVEL:-DEBUG}" AUTO_MIGRATE="${AUTO_MIGRATE:-true}" AUTO_UPDATE_DEPENDENCIES="${AUTO_UPDATE_DEPENDENCIES:-true}" ;; "prod") PULL_INTERVAL="${PULL_INTERVAL:-300}" HEALTH_CHECK_INTERVAL="${HEALTH_CHECK_INTERVAL:-60}" DEBUG_MODE="${DEBUG_MODE:-false}" LOG_LEVEL="${LOG_LEVEL:-WARNING}" AUTO_MIGRATE="${AUTO_MIGRATE:-true}" AUTO_UPDATE_DEPENDENCIES="${AUTO_UPDATE_DEPENDENCIES:-false}" ;; "demo") PULL_INTERVAL="${PULL_INTERVAL:-120}" HEALTH_CHECK_INTERVAL="${HEALTH_CHECK_INTERVAL:-45}" DEBUG_MODE="${DEBUG_MODE:-false}" LOG_LEVEL="${LOG_LEVEL:-INFO}" AUTO_MIGRATE="${AUTO_MIGRATE:-true}" AUTO_UPDATE_DEPENDENCIES="${AUTO_UPDATE_DEPENDENCIES:-true}" ;; "testing") PULL_INTERVAL="${PULL_INTERVAL:-180}" HEALTH_CHECK_INTERVAL="${HEALTH_CHECK_INTERVAL:-30}" DEBUG_MODE="${DEBUG_MODE:-true}" LOG_LEVEL="${LOG_LEVEL:-DEBUG}" AUTO_MIGRATE="${AUTO_MIGRATE:-true}" AUTO_UPDATE_DEPENDENCIES="${AUTO_UPDATE_DEPENDENCIES:-true}" ;; *) deploy_warning "Unknown preset '$preset', using development defaults" PULL_INTERVAL="${PULL_INTERVAL:-60}" HEALTH_CHECK_INTERVAL="${HEALTH_CHECK_INTERVAL:-30}" DEBUG_MODE="${DEBUG_MODE:-true}" LOG_LEVEL="${LOG_LEVEL:-DEBUG}" ;; esac deploy_success "Preset configuration applied successfully" deploy_debug "Configuration: interval=${PULL_INTERVAL}s, health=${HEALTH_CHECK_INTERVAL}s, debug=$DEBUG_MODE" } # [AWS-SECRET-REMOVED]==================================== # HEALTH CHECK FUNCTIONS # [AWS-SECRET-REMOVED]==================================== # Check if smart deployment service is healthy check_smart_deployment_health() { deploy_debug "Checking smart deployment service health" # Check if smart-deploy script exists and is executable local smart_deploy_script="$PROJECT_DIR/scripts/smart-deploy.sh" if [ ! -x "$smart_deploy_script" ]; then deploy_warning "Smart deployment script not found or not executable: $smart_deploy_script" return 1 fi # Check if systemd timer is active if command_exists systemctl; then if systemctl is-active --quiet thrillwiki-smart-deploy.timer 2>/dev/null; then deploy_debug "Smart deployment timer is active" else deploy_warning "Smart deployment timer is not active" return 1 fi fi return 0 } # Check if development server is healthy check_development_server_health() { deploy_debug "Checking development server health" local health_url="${HEALTH_CHECK_URL:-http://localhost:8000/}" local timeout="${HEALTH_CHECK_TIMEOUT:-30}" if command_exists curl; then if curl -s --connect-timeout "$timeout" "$health_url" > /dev/null 2>&1; then deploy_debug "Development server health check passed" return 0 else deploy_warning "Development server health check failed" return 1 fi else deploy_warning "curl not available for health checks" return 1 fi } # Check GitHub authentication check_github_authentication() { deploy_debug "Checking GitHub authentication" local github_token="" # Try to get token from file if [ -f "${GITHUB_TOKEN_FILE:-$PROJECT_DIR/.github-pat}" ]; then github_token=$(cat "${GITHUB_TOKEN_FILE:-$PROJECT_DIR/.github-pat}" 2>/dev/null | tr -d '\n\r') fi # Try environment variable if [ -z "$github_token" ] && [ -n "${GITHUB_TOKEN:-}" ]; then github_token="$GITHUB_TOKEN" fi if [ -z "$github_token" ]; then deploy_warning "No GitHub token found" return 1 fi # Test GitHub API access if command_exists curl; then local response response=$(curl -s -H "Authorization: token $github_token" https://api.github.com/user 2>/dev/null) if echo "$response" | grep -q '"login"'; then deploy_debug "GitHub authentication verified" return 0 else deploy_warning "GitHub authentication failed" return 1 fi else deploy_warning "Cannot verify GitHub authentication - curl not available" return 1 fi } # Comprehensive system health check perform_health_check() { deploy_debug "Performing comprehensive health check" local health_issues=0 # Check smart deployment if ! check_smart_deployment_health; then ((health_issues++)) fi # Check development server if ! check_development_server_health; then ((health_issues++)) fi # Check GitHub authentication if ! check_github_authentication; then ((health_issues++)) fi if [ $health_issues -eq 0 ]; then deploy_success "All health checks passed" return 0 else deploy_warning "Health check found $health_issues issue(s)" return 1 fi } # [AWS-SECRET-REMOVED]==================================== # RECOVERY FUNCTIONS # [AWS-SECRET-REMOVED]==================================== # Restart smart deployment timer restart_smart_deployment() { deploy_info "Restarting smart deployment timer" if command_exists systemctl; then if systemctl restart thrillwiki-smart-deploy.timer 2>/dev/null; then deploy_success "Smart deployment timer restarted" return 0 else deploy_error "Failed to restart smart deployment timer" return 1 fi else deploy_warning "systemctl not available - cannot restart smart deployment" return 1 fi } # Restart development server through smart deployment restart_development_server() { deploy_info "Restarting development server" local smart_deploy_script="$PROJECT_DIR/scripts/smart-deploy.sh" if [ -x "$smart_deploy_script" ]; then if "$smart_deploy_script" restart-server 2>&1 | while IFS= read -r line; do deploy_debug "Smart deploy: $line" done; then deploy_success "Development server restart initiated" return 0 else deploy_error "Failed to restart development server" return 1 fi else deploy_warning "Smart deployment script not available" return 1 fi } # Attempt recovery from health check failures attempt_recovery() { local attempt="$1" local max_attempts="$2" deploy_info "Attempting recovery (attempt $attempt/$max_attempts)" # Try restarting smart deployment if restart_smart_deployment; then sleep 30 # Wait for service to stabilize # Try restarting development server if restart_development_server; then sleep 60 # Wait for server to start # Recheck health if perform_health_check; then deploy_success "Recovery successful" return 0 fi fi fi deploy_warning "Recovery attempt $attempt failed" return 1 } # [AWS-SECRET-REMOVED]==================================== # MAIN AUTOMATION LOOP # [AWS-SECRET-REMOVED]==================================== # Main deployment automation service run_deployment_automation() { deploy_info "Starting deployment automation service" deploy_info "Preset: $DEPLOYMENT_PRESET, Pull interval: ${PULL_INTERVAL}s, Health check: ${HEALTH_CHECK_INTERVAL}s" local consecutive_failures=0 local last_recovery_attempt=0 while true; do # Perform health check if perform_health_check; then consecutive_failures=0 deploy_debug "System healthy - continuing monitoring" else ((consecutive_failures++)) deploy_warning "Health check failed (consecutive failures: $consecutive_failures)" # Attempt recovery if we have consecutive failures if [ $consecutive_failures -ge 3 ]; then local current_time current_time=$(date +%s) # Check if enough time has passed since last recovery attempt if [ $((current_time - last_recovery_attempt)) -ge $RESTART_COOLDOWN ]; then deploy_info "Too many consecutive failures, attempting recovery" local recovery_attempt=1 while [ $recovery_attempt -le $MAX_RESTART_ATTEMPTS ]; do if attempt_recovery "$recovery_attempt" "$MAX_RESTART_ATTEMPTS"; then consecutive_failures=0 last_recovery_attempt=$current_time break fi ((recovery_attempt++)) if [ $recovery_attempt -le $MAX_RESTART_ATTEMPTS ]; then sleep 60 # Wait between recovery attempts fi done if [ $recovery_attempt -gt $MAX_RESTART_ATTEMPTS ]; then deploy_error "All recovery attempts failed - manual intervention may be required" # Reset failure count to prevent continuous recovery attempts consecutive_failures=0 last_recovery_attempt=$current_time fi else deploy_debug "Recovery cooldown in effect, waiting before next attempt" fi fi fi # Wait for next health check cycle sleep "$HEALTH_CHECK_INTERVAL" done } # [AWS-SECRET-REMOVED]==================================== # INITIALIZATION AND STARTUP # [AWS-SECRET-REMOVED]==================================== # Initialize deployment automation initialize_automation() { deploy_info "Initializing ThrillWiki deployment automation" # Ensure we're in the project directory cd "$PROJECT_DIR" # Apply preset configuration apply_preset_configuration # Set up signal handlers trap cleanup_and_exit INT TERM # Acquire lock if ! acquire_lock; then deploy_error "Failed to acquire deployment lock" exit 1 fi # Perform initial health check deploy_info "Performing initial system health check" if ! perform_health_check; then deploy_warning "Initial health check detected issues - will monitor and attempt recovery" fi deploy_success "Deployment automation initialized successfully" } # [AWS-SECRET-REMOVED]==================================== # COMMAND HANDLING # [AWS-SECRET-REMOVED]==================================== # Handle script commands case "${1:-start}" in start) initialize_automation run_deployment_automation ;; health-check) if perform_health_check; then echo "System is healthy" exit 0 else echo "System health check failed" exit 1 fi ;; restart-smart-deploy) restart_smart_deployment ;; restart-server) restart_development_server ;; status) if [ -f "$LOCK_FILE" ]; then local lock_pid lock_pid=$(cat "$LOCK_FILE" 2>/dev/null || echo "") if [ -n "$lock_pid" ] && kill -0 "$lock_pid" 2>/dev/null; then echo "Deployment automation is running (PID: $lock_pid)" exit 0 else echo "Deployment automation is not running (stale lock file)" exit 1 fi else echo "Deployment automation is not running" exit 1 fi ;; stop) if [ -f "$LOCK_FILE" ]; then local lock_pid lock_pid=$(cat "$LOCK_FILE" 2>/dev/null || echo "") if [ -n "$lock_pid" ] && kill -0 "$lock_pid" 2>/dev/null; then echo "Stopping deployment automation (PID: $lock_pid)" kill -TERM "$lock_pid" sleep 5 if kill -0 "$lock_pid" 2>/dev/null; then kill -KILL "$lock_pid" fi rm -f "$LOCK_FILE" echo "Deployment automation stopped" else echo "Deployment automation is not running" rm -f "$LOCK_FILE" fi else echo "Deployment automation is not running" fi ;; *) echo "Usage: $0 {start|stop|status|health-check|restart-smart-deploy|restart-server}" exit 1 ;; esac