Files
thrillwiki_django_no_react/shared/scripts/vm/deploy-automation.sh
pacnpal d504d41de2 feat: complete monorepo structure with frontend and shared resources
- Add complete backend/ directory with full Django application
- Add frontend/ directory with Vite + TypeScript setup ready for Next.js
- Add comprehensive shared/ directory with:
  - Complete documentation and memory-bank archives
  - Media files and avatars (letters, park/ride images)
  - Deployment scripts and automation tools
  - Shared types and utilities
- Add architecture/ directory with migration guides
- Configure pnpm workspace for monorepo development
- Update .gitignore to exclude .django_tailwind_cli/ build artifacts
- Preserve all historical documentation in shared/docs/memory-bank/
- Set up proper structure for full-stack development with shared resources
2025-08-23 18:40:07 -04:00

560 lines
17 KiB
Bash
Executable File

#!/usr/bin/env bash
#
# ThrillWiki Deployment Automation Service Script
# Comprehensive automated deployment management with preset integration
#
# Features:
# - Cross-shell compatible (bash/zsh)
# - Deployment preset integration
# - Health monitoring and recovery
# - Smart deployment coordination
# - Systemd service integration
# - GitHub authentication management
# - Server lifecycle management
#
set -e
# [AWS-SECRET-REMOVED]====================================
# SCRIPT CONFIGURATION
# [AWS-SECRET-REMOVED]====================================
# Cross-shell compatible script directory detection
if [ -n "${BASH_SOURCE:-}" ]; then
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SCRIPT_NAME="$(basename "${BASH_SOURCE[0]}")"
elif [ -n "${ZSH_NAME:-}" ]; then
SCRIPT_DIR="$(cd "$(dirname "${(%):-%x}")" && pwd)"
SCRIPT_NAME="$(basename "${(%):-%x}")"
else
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
SCRIPT_NAME="$(basename "$0")"
fi
PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
# Default configuration (can be overridden by environment)
DEPLOYMENT_PRESET="${DEPLOYMENT_PRESET:-dev}"
PULL_INTERVAL="${PULL_INTERVAL:-300}"
HEALTH_CHECK_INTERVAL="${HEALTH_CHECK_INTERVAL:-60}"
DEBUG_MODE="${DEBUG_MODE:-false}"
LOG_LEVEL="${LOG_LEVEL:-INFO}"
MAX_RESTART_ATTEMPTS="${MAX_RESTART_ATTEMPTS:-3}"
RESTART_COOLDOWN="${RESTART_COOLDOWN:-300}"
# Logging configuration
LOG_DIR="${LOG_DIR:-$PROJECT_DIR/logs}"
LOG_FILE="${LOG_FILE:-$LOG_DIR/deployment-automation.log}"
LOCK_FILE="${LOCK_FILE:-/tmp/thrillwiki-deployment.lock}"
# [AWS-SECRET-REMOVED]====================================
# COLOR DEFINITIONS
# [AWS-SECRET-REMOVED]====================================
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
PURPLE='\033[0;35m'
CYAN='\033[0;36m'
BOLD='\033[1m'
NC='\033[0m' # No Color
# [AWS-SECRET-REMOVED]====================================
# LOGGING FUNCTIONS
# [AWS-SECRET-REMOVED]====================================
deploy_log() {
local level="$1"
local color="$2"
local message="$3"
local timestamp="$(date '+%Y-%m-%d %H:%M:%S')"
# Ensure log directory exists
mkdir -p "$(dirname "$LOG_FILE")"
# Log to file (without colors)
echo "[$timestamp] [$level] [DEPLOY-AUTO] $message" >> "$LOG_FILE"
# Log to console (with colors) if not running as systemd service
if [ -t 1 ] && [ "${SYSTEMD_EXEC_PID:-}" = "" ]; then
echo -e "${color}[$timestamp] [DEPLOY-AUTO-$level]${NC} $message"
fi
# Log to systemd journal if running as service
if [ "${SYSTEMD_EXEC_PID:-}" != "" ]; then
echo "$message"
fi
}
deploy_info() {
deploy_log "INFO" "$BLUE" "$1"
}
deploy_success() {
deploy_log "SUCCESS" "$GREEN" "$1"
}
deploy_warning() {
deploy_log "WARNING" "$YELLOW" "⚠️ $1"
}
deploy_error() {
deploy_log "ERROR" "$RED" "$1"
}
deploy_debug() {
if [ "${DEBUG_MODE:-false}" = "true" ] || [ "${LOG_LEVEL:-INFO}" = "DEBUG" ]; then
deploy_log "DEBUG" "$PURPLE" "🔍 $1"
fi
}
deploy_progress() {
deploy_log "PROGRESS" "$CYAN" "🚀 $1"
}
# [AWS-SECRET-REMOVED]====================================
# UTILITY FUNCTIONS
# [AWS-SECRET-REMOVED]====================================
# Cross-shell compatible command existence check
command_exists() {
command -v "$1" >/dev/null 2>&1
}
# Lock file management
acquire_lock() {
if [ -f "$LOCK_FILE" ]; then
local lock_pid
lock_pid=$(cat "$LOCK_FILE" 2>/dev/null || echo "")
if [ -n "$lock_pid" ] && kill -0 "$lock_pid" 2>/dev/null; then
deploy_warning "Another deployment automation instance is already running (PID: $lock_pid)"
return 1
else
deploy_info "Removing stale lock file"
rm -f "$LOCK_FILE"
fi
fi
echo $$ > "$LOCK_FILE"
deploy_debug "Lock acquired (PID: $$)"
return 0
}
release_lock() {
if [ -f "$LOCK_FILE" ]; then
rm -f "$LOCK_FILE"
deploy_debug "Lock released"
fi
}
# Trap for cleanup
cleanup_and_exit() {
deploy_info "Deployment automation service stopping"
release_lock
exit 0
}
# [AWS-SECRET-REMOVED]====================================
# PRESET CONFIGURATION FUNCTIONS
# [AWS-SECRET-REMOVED]====================================
# Apply deployment preset configuration
apply_preset_configuration() {
local preset="${DEPLOYMENT_PRESET:-dev}"
deploy_info "Applying deployment preset: $preset"
case "$preset" in
"dev")
PULL_INTERVAL="${PULL_INTERVAL:-60}"
HEALTH_CHECK_INTERVAL="${HEALTH_CHECK_INTERVAL:-30}"
DEBUG_MODE="${DEBUG_MODE:-true}"
LOG_LEVEL="${LOG_LEVEL:-DEBUG}"
AUTO_MIGRATE="${AUTO_MIGRATE:-true}"
AUTO_UPDATE_DEPENDENCIES="${AUTO_UPDATE_DEPENDENCIES:-true}"
;;
"prod")
PULL_INTERVAL="${PULL_INTERVAL:-300}"
HEALTH_CHECK_INTERVAL="${HEALTH_CHECK_INTERVAL:-60}"
DEBUG_MODE="${DEBUG_MODE:-false}"
LOG_LEVEL="${LOG_LEVEL:-WARNING}"
AUTO_MIGRATE="${AUTO_MIGRATE:-true}"
AUTO_UPDATE_DEPENDENCIES="${AUTO_UPDATE_DEPENDENCIES:-false}"
;;
"demo")
PULL_INTERVAL="${PULL_INTERVAL:-120}"
HEALTH_CHECK_INTERVAL="${HEALTH_CHECK_INTERVAL:-45}"
DEBUG_MODE="${DEBUG_MODE:-false}"
LOG_LEVEL="${LOG_LEVEL:-INFO}"
AUTO_MIGRATE="${AUTO_MIGRATE:-true}"
AUTO_UPDATE_DEPENDENCIES="${AUTO_UPDATE_DEPENDENCIES:-true}"
;;
"testing")
PULL_INTERVAL="${PULL_INTERVAL:-180}"
HEALTH_CHECK_INTERVAL="${HEALTH_CHECK_INTERVAL:-30}"
DEBUG_MODE="${DEBUG_MODE:-true}"
LOG_LEVEL="${LOG_LEVEL:-DEBUG}"
AUTO_MIGRATE="${AUTO_MIGRATE:-true}"
AUTO_UPDATE_DEPENDENCIES="${AUTO_UPDATE_DEPENDENCIES:-true}"
;;
*)
deploy_warning "Unknown preset '$preset', using development defaults"
PULL_INTERVAL="${PULL_INTERVAL:-60}"
HEALTH_CHECK_INTERVAL="${HEALTH_CHECK_INTERVAL:-30}"
DEBUG_MODE="${DEBUG_MODE:-true}"
LOG_LEVEL="${LOG_LEVEL:-DEBUG}"
;;
esac
deploy_success "Preset configuration applied successfully"
deploy_debug "Configuration: interval=${PULL_INTERVAL}s, health=${HEALTH_CHECK_INTERVAL}s, debug=$DEBUG_MODE"
}
# [AWS-SECRET-REMOVED]====================================
# HEALTH CHECK FUNCTIONS
# [AWS-SECRET-REMOVED]====================================
# Check if smart deployment service is healthy
check_smart_deployment_health() {
deploy_debug "Checking smart deployment service health"
# Check if smart-deploy script exists and is executable
local smart_deploy_script="$PROJECT_DIR/scripts/smart-deploy.sh"
if [ ! -x "$smart_deploy_script" ]; then
deploy_warning "Smart deployment script not found or not executable: $smart_deploy_script"
return 1
fi
# Check if systemd timer is active
if command_exists systemctl; then
if systemctl is-active --quiet thrillwiki-smart-deploy.timer 2>/dev/null; then
deploy_debug "Smart deployment timer is active"
else
deploy_warning "Smart deployment timer is not active"
return 1
fi
fi
return 0
}
# Check if development server is healthy
check_development_server_health() {
deploy_debug "Checking development server health"
local health_url="${HEALTH_CHECK_URL:-http://localhost:8000/}"
local timeout="${HEALTH_CHECK_TIMEOUT:-30}"
if command_exists curl; then
if curl -s --connect-timeout "$timeout" "$health_url" > /dev/null 2>&1; then
deploy_debug "Development server health check passed"
return 0
else
deploy_warning "Development server health check failed"
return 1
fi
else
deploy_warning "curl not available for health checks"
return 1
fi
}
# Check GitHub authentication
check_github_authentication() {
deploy_debug "Checking GitHub authentication"
local github_token=""
# Try to get token from file
if [ -f "${GITHUB_TOKEN_FILE:-$PROJECT_DIR/.github-pat}" ]; then
github_token=$(cat "${GITHUB_TOKEN_FILE:-$PROJECT_DIR/.github-pat}" 2>/dev/null | tr -d '\n\r')
fi
# Try environment variable
if [ -z "$github_token" ] && [ -n "${GITHUB_TOKEN:-}" ]; then
github_token="$GITHUB_TOKEN"
fi
if [ -z "$github_token" ]; then
deploy_warning "No GitHub token found"
return 1
fi
# Test GitHub API access
if command_exists curl; then
local response
response=$(curl -s -H "Authorization: token $github_token" https://api.github.com/user 2>/dev/null)
if echo "$response" | grep -q '"login"'; then
deploy_debug "GitHub authentication verified"
return 0
else
deploy_warning "GitHub authentication failed"
return 1
fi
else
deploy_warning "Cannot verify GitHub authentication - curl not available"
return 1
fi
}
# Comprehensive system health check
perform_health_check() {
deploy_debug "Performing comprehensive health check"
local health_issues=0
# Check smart deployment
if ! check_smart_deployment_health; then
((health_issues++))
fi
# Check development server
if ! check_development_server_health; then
((health_issues++))
fi
# Check GitHub authentication
if ! check_github_authentication; then
((health_issues++))
fi
if [ $health_issues -eq 0 ]; then
deploy_success "All health checks passed"
return 0
else
deploy_warning "Health check found $health_issues issue(s)"
return 1
fi
}
# [AWS-SECRET-REMOVED]====================================
# RECOVERY FUNCTIONS
# [AWS-SECRET-REMOVED]====================================
# Restart smart deployment timer
restart_smart_deployment() {
deploy_info "Restarting smart deployment timer"
if command_exists systemctl; then
if systemctl restart thrillwiki-smart-deploy.timer 2>/dev/null; then
deploy_success "Smart deployment timer restarted"
return 0
else
deploy_error "Failed to restart smart deployment timer"
return 1
fi
else
deploy_warning "systemctl not available - cannot restart smart deployment"
return 1
fi
}
# Restart development server through smart deployment
restart_development_server() {
deploy_info "Restarting development server"
local smart_deploy_script="$PROJECT_DIR/scripts/smart-deploy.sh"
if [ -x "$smart_deploy_script" ]; then
if "$smart_deploy_script" restart-server 2>&1 | while IFS= read -r line; do
deploy_debug "Smart deploy: $line"
done; then
deploy_success "Development server restart initiated"
return 0
else
deploy_error "Failed to restart development server"
return 1
fi
else
deploy_warning "Smart deployment script not available"
return 1
fi
}
# Attempt recovery from health check failures
attempt_recovery() {
local attempt="$1"
local max_attempts="$2"
deploy_info "Attempting recovery (attempt $attempt/$max_attempts)"
# Try restarting smart deployment
if restart_smart_deployment; then
sleep 30 # Wait for service to stabilize
# Try restarting development server
if restart_development_server; then
sleep 60 # Wait for server to start
# Recheck health
if perform_health_check; then
deploy_success "Recovery successful"
return 0
fi
fi
fi
deploy_warning "Recovery attempt $attempt failed"
return 1
}
# [AWS-SECRET-REMOVED]====================================
# MAIN AUTOMATION LOOP
# [AWS-SECRET-REMOVED]====================================
# Main deployment automation service
run_deployment_automation() {
deploy_info "Starting deployment automation service"
deploy_info "Preset: $DEPLOYMENT_PRESET, Pull interval: ${PULL_INTERVAL}s, Health check: ${HEALTH_CHECK_INTERVAL}s"
local consecutive_failures=0
local last_recovery_attempt=0
while true; do
# Perform health check
if perform_health_check; then
consecutive_failures=0
deploy_debug "System healthy - continuing monitoring"
else
((consecutive_failures++))
deploy_warning "Health check failed (consecutive failures: $consecutive_failures)"
# Attempt recovery if we have consecutive failures
if [ $consecutive_failures -ge 3 ]; then
local current_time
current_time=$(date +%s)
# Check if enough time has passed since last recovery attempt
if [ $((current_time - last_recovery_attempt)) -ge $RESTART_COOLDOWN ]; then
deploy_info "Too many consecutive failures, attempting recovery"
local recovery_attempt=1
while [ $recovery_attempt -le $MAX_RESTART_ATTEMPTS ]; do
if attempt_recovery "$recovery_attempt" "$MAX_RESTART_ATTEMPTS"; then
consecutive_failures=0
last_recovery_attempt=$current_time
break
fi
((recovery_attempt++))
if [ $recovery_attempt -le $MAX_RESTART_ATTEMPTS ]; then
sleep 60 # Wait between recovery attempts
fi
done
if [ $recovery_attempt -gt $MAX_RESTART_ATTEMPTS ]; then
deploy_error "All recovery attempts failed - manual intervention may be required"
# Reset failure count to prevent continuous recovery attempts
consecutive_failures=0
last_recovery_attempt=$current_time
fi
else
deploy_debug "Recovery cooldown in effect, waiting before next attempt"
fi
fi
fi
# Wait for next health check cycle
sleep "$HEALTH_CHECK_INTERVAL"
done
}
# [AWS-SECRET-REMOVED]====================================
# INITIALIZATION AND STARTUP
# [AWS-SECRET-REMOVED]====================================
# Initialize deployment automation
initialize_automation() {
deploy_info "Initializing ThrillWiki deployment automation"
# Ensure we're in the project directory
cd "$PROJECT_DIR"
# Apply preset configuration
apply_preset_configuration
# Set up signal handlers
trap cleanup_and_exit INT TERM
# Acquire lock
if ! acquire_lock; then
deploy_error "Failed to acquire deployment lock"
exit 1
fi
# Perform initial health check
deploy_info "Performing initial system health check"
if ! perform_health_check; then
deploy_warning "Initial health check detected issues - will monitor and attempt recovery"
fi
deploy_success "Deployment automation initialized successfully"
}
# [AWS-SECRET-REMOVED]====================================
# COMMAND HANDLING
# [AWS-SECRET-REMOVED]====================================
# Handle script commands
case "${1:-start}" in
start)
initialize_automation
run_deployment_automation
;;
health-check)
if perform_health_check; then
echo "System is healthy"
exit 0
else
echo "System health check failed"
exit 1
fi
;;
restart-smart-deploy)
restart_smart_deployment
;;
restart-server)
restart_development_server
;;
status)
if [ -f "$LOCK_FILE" ]; then
local lock_pid
lock_pid=$(cat "$LOCK_FILE" 2>/dev/null || echo "")
if [ -n "$lock_pid" ] && kill -0 "$lock_pid" 2>/dev/null; then
echo "Deployment automation is running (PID: $lock_pid)"
exit 0
else
echo "Deployment automation is not running (stale lock file)"
exit 1
fi
else
echo "Deployment automation is not running"
exit 1
fi
;;
stop)
if [ -f "$LOCK_FILE" ]; then
local lock_pid
lock_pid=$(cat "$LOCK_FILE" 2>/dev/null || echo "")
if [ -n "$lock_pid" ] && kill -0 "$lock_pid" 2>/dev/null; then
echo "Stopping deployment automation (PID: $lock_pid)"
kill -TERM "$lock_pid"
sleep 5
if kill -0 "$lock_pid" 2>/dev/null; then
kill -KILL "$lock_pid"
fi
rm -f "$LOCK_FILE"
echo "Deployment automation stopped"
else
echo "Deployment automation is not running"
rm -f "$LOCK_FILE"
fi
else
echo "Deployment automation is not running"
fi
;;
*)
echo "Usage: $0 {start|stop|status|health-check|restart-smart-deploy|restart-server}"
exit 1
;;
esac