Files
thrillwiki_django_no_react/scripts/vm/diagnose-systemd-architecture.sh
pacnpal f4f8ec8f9b Configure PostgreSQL with PostGIS support
- Updated database settings to use dj_database_url for environment-based configuration
- Added dj-database-url dependency
- Configured PostGIS backend for spatial data support
- Set default DATABASE_URL for production PostgreSQL connection
2025-08-19 18:51:33 -04:00

113 lines
4.6 KiB
Bash
Executable File

#!/usr/bin/env bash
#
# Systemd Service Architecture Diagnosis Script
# Validates assumptions about timeout/restart cycles
#
set -e
echo "=== ThrillWiki Systemd Service Architecture Diagnosis ==="
echo "Timestamp: $(date)"
echo
# Check current service status
echo "1. CHECKING SERVICE STATUS"
echo "=========================="
echo "thrillwiki-deployment.service status:"
systemctl status thrillwiki-deployment.service --no-pager -l || echo "Service not active"
echo
echo "thrillwiki-smart-deploy.service status:"
systemctl status thrillwiki-smart-deploy.service --no-pager -l || echo "Service not active"
echo
echo "thrillwiki-smart-deploy.timer status:"
systemctl status thrillwiki-smart-deploy.timer --no-pager -l || echo "Timer not active"
echo
# Check recent journal logs for timeout/restart patterns
echo "2. CHECKING RECENT SYSTEMD LOGS (LAST 50 LINES)"
echo "[AWS-SECRET-REMOVED]======="
echo "Looking for timeout and restart patterns:"
journalctl -u thrillwiki-deployment.service --no-pager -n 50 | grep -E "(timeout|restart|failed|stopped)" || echo "No timeout/restart patterns found in recent logs"
echo
# Check if deploy-automation.sh is designed as infinite loop
echo "3. ANALYZING SCRIPT DESIGN"
echo "=========================="
echo "Checking if deploy-automation.sh contains infinite loops:"
if grep -n "while true" [AWS-SECRET-REMOVED]eploy-automation.sh 2>/dev/null; then
echo "✗ FOUND: Script contains 'while true' infinite loop - this conflicts with systemd service expectations"
else
echo "✓ No infinite loops found"
fi
echo
# Check service configuration issues
echo "4. ANALYZING SERVICE CONFIGURATION"
echo "=================================="
echo "Checking thrillwiki-deployment.service configuration:"
echo "- Type: $(grep '^Type=' [AWS-SECRET-REMOVED]emd/thrillwiki-deployment.service || echo 'Not specified')"
echo "- Restart: $(grep '^Restart=' [AWS-SECRET-REMOVED]emd/thrillwiki-deployment.service || echo 'Not specified')"
echo "- RestartSec: $(grep '^RestartSec=' [AWS-SECRET-REMOVED]emd/thrillwiki-deployment.service || echo 'Not specified')"
echo "- RuntimeMaxSec: $(grep '^RuntimeMaxSec=' [AWS-SECRET-REMOVED]emd/thrillwiki-deployment.service || echo 'Not specified')"
echo "- WatchdogSec: $(grep '^WatchdogSec=' [AWS-SECRET-REMOVED]emd/thrillwiki-deployment.service || echo 'Not specified')"
echo
# Check smart-deploy configuration (correct approach)
echo "Checking thrillwiki-smart-deploy.service configuration:"
echo "- Type: $(grep '^Type=' [AWS-SECRET-REMOVED]emd/thrillwiki-smart-deploy.service || echo 'Not specified')"
echo "- ExecStart: $(grep '^ExecStart=' [AWS-SECRET-REMOVED]emd/thrillwiki-smart-deploy.service || echo 'Not specified')"
echo
# Check timer configuration
echo "Checking thrillwiki-smart-deploy.timer configuration:"
echo "- OnBootSec: $(grep '^OnBootSec=' [AWS-SECRET-REMOVED]emd/thrillwiki-smart-deploy.timer || echo 'Not specified')"
echo "- OnUnitActiveSec: $(grep '^OnUnitActiveSec=' [AWS-SECRET-REMOVED]emd/thrillwiki-smart-deploy.timer || echo 'Not specified')"
echo
# Check if smart-deploy.sh exists and is executable
echo "5. CHECKING TIMER TARGET SCRIPT"
echo "==============================="
if [ -f "[AWS-SECRET-REMOVED]t-deploy.sh" ]; then
if [ -x "[AWS-SECRET-REMOVED]t-deploy.sh" ]; then
echo "✓ smart-deploy.sh exists and is executable"
else
echo "✗ smart-deploy.sh exists but is not executable"
fi
else
echo "✗ smart-deploy.sh does not exist"
fi
echo
# Resource analysis
echo "6. CHECKING SYSTEM RESOURCES"
echo "============================"
echo "Current process using deployment automation:"
ps aux | grep -E "(deploy-automation|smart-deploy)" | grep -v grep || echo "No deployment processes running"
echo
echo "Lock file status:"
if [ -f "/tmp/thrillwiki-deployment.lock" ]; then
echo "✗ Lock file exists: /tmp/thrillwiki-deployment.lock"
echo "Lock PID: $(cat /tmp/thrillwiki-deployment.lock 2>/dev/null || echo 'unreadable')"
else
echo "✓ No lock file present"
fi
echo
# Architectural recommendation
echo "7. ARCHITECTURE ANALYSIS"
echo "========================"
echo "CURRENT PROBLEMATIC ARCHITECTURE:"
echo "thrillwiki-deployment.service (Type=simple, Restart=always)"
echo " └── deploy-automation.sh (infinite loop script)"
echo " └── RESULT: Service times out and restarts continuously"
echo
echo "RECOMMENDED CORRECT ARCHITECTURE:"
echo "thrillwiki-smart-deploy.timer (every 5 minutes)"
echo " └── thrillwiki-smart-deploy.service (Type=oneshot)"
echo " └── smart-deploy.sh (runs once, exits cleanly)"
echo
echo "DIAGNOSIS COMPLETE"
echo "=================="