Skip to content

Quick Reference Cards

[!tip] Quick Access Bookmark this page for fast access to commonly used commands and procedures.

Docker Swarm Commands

Cluster Management

# View cluster nodes
docker node ls

# Inspect node details
docker node inspect <node-name>

# View node labels
docker node inspect <node-name> --format '{{.Spec.Labels}}'

# Apply all node labels
./sh-label-nodes.sh

# Drain node for maintenance
docker node update --availability drain <node-name>

# Activate node after maintenance
docker node update --availability active <node-name>

Service Management

# List all services
docker service ls

# List services with custom format
docker service ls --format "table {{.Name}}\t{{.Replicas}}\t{{.Image}}"

# View service details
docker service ps <service-name>

# View service logs
docker service logs -f <service-name>

# Force service restart
docker service update --force <service-name>

# Scale service
docker service scale <service-name>=<replicas>

# Remove service
docker service rm <service-name>

Stack Management

# Deploy stack
docker stack deploy -c <stack-file>.yml <stack-name> --with-registry-auth

# List stacks
docker stack ls

# List services in stack
docker stack services <stack-name>

# View stack tasks
docker stack ps <stack-name>

# Remove stack
docker stack rm <stack-name>

Infrastructure Scripts

Quick Deployment

# Start all infrastructure (from manager node)
./docker-start.sh

# Start specific stack
docker stack deploy -c stack-homenet1.yml homenet1 --with-registry-auth

# Start ARR stack (SSH to node 202)
ssh 100.1.100.202
cd ~/homenet-docker-services
docker-compose -f docker-compose.worker2.yml up -d --remove-orphans

Configuration Sync

# Full bidirectional sync (scripts, YML, config)
./sh-sync.sh

# Config-only sync
./sh-sync-config.sh

# Scripts and YML only
./sh-sync-scripts.sh

Session Management

# Open tmux session to all 5 nodes
./sh-session.sh

# Monitor services across nodes
./sh-status.sh

Node Access

SSH Quick Access

# Manager node
ssh 100.1.100.201
ssh homenet-ubuntu1

# Worker nodes
ssh 100.1.100.202  # Media node
ssh 100.1.100.203  # Surveillance node
ssh 100.1.100.204  # Dashboard node
ssh 100.1.100.205  # General workload node

Node-Specific Operations

Node 201 (Manager):

# Check database services
docker service ps homenet1_mariadb
docker service ps homenet1_influxdb

# View Prometheus targets
curl http://localhost:9090/api/v1/targets

# Check Grafana
curl http://localhost:3010/api/health

Node 202 (Media):

# ARR stack status (Docker Compose)
docker-compose -f docker-compose.worker2.yml ps

# Plex status
docker service ps homenet4_plex

# Check GPU availability
nvidia-smi

Node 203 (Surveillance):

# iSpy status
docker service ps homenet3_ispy

# Check camera storage
du -sh /nfs_cams/ispy/*


Monitoring & Logs

Prometheus

# Access Prometheus
http://100.1.100.201:9090

# Query API
curl 'http://100.1.100.201:9090/api/v1/query?query=up'

# Check targets
http://100.1.100.201:9090/targets

# View alerts
http://100.1.100.201:9090/alerts

Grafana

# Access Grafana
http://100.1.100.201:3010

# Backup dashboards
tar -czf grafana-backup-$(date +%Y%m%d).tar.gz /homenet_config/grafana/

# Restore dashboards
tar -xzf grafana-backup-*.tar.gz -C /homenet_config/

Service Logs

# Tail service logs
docker service logs -f <service-name>

# View logs from specific container
docker service logs <service-name> --since 1h

# Follow multiple services
docker service logs -f homenet1_mariadb &
docker service logs -f homenet1_influxdb &

# All Docker logs
./sh-tail-docker-logs.sh

System Logs

# Docker daemon logs
journalctl -u docker.service -f

# System messages
journalctl -f

# Filter by service
journalctl -u homenet-stack.service

Storage Management

NFS Mounts

# Check all NFS mounts
df -h | grep nfs

# Verify and correct mounts
./sh-correct-mounts.sh

# Show mount details
mount | grep nfs

# Remount if needed
sudo mount -a

Storage Usage

# Check storage capacity
df -h

# Find large directories
du -h --max-depth=1 /nfs_data | sort -hr | head -20

# Find large files
find /nfs_data -type f -size +1G -exec ls -lh {} \;

# Clean temp files
./sh-delete-temp.sh

Docker Volumes

# List volumes
docker volume ls

# Inspect volume
docker volume inspect <volume-name>

# Remove unused volumes
docker volume prune

# Backup volume
docker run --rm -v <volume-name>:/data -v $(pwd):/backup \
  alpine tar czf /backup/<volume-name>.tar.gz /data

Database Operations

Backups

# Run database backup script
./sh-backup-databases.sh

# Vaultwarden backup
./sh-backup-vaultwarden.sh

# Monitor backup health
./sh-monitor-backups.sh

MariaDB

# Access MariaDB container
docker exec -it $(docker ps -qf "name=mariadb") mysql -p

# Backup database
docker exec $(docker ps -qf "name=mariadb") \
  mysqldump -p<password> <database> > backup.sql

# Restore database
docker exec -i $(docker ps -qf "name=mariadb") \
  mysql -p<password> <database> < backup.sql

PostgreSQL

# Access PostgreSQL container
docker exec -it $(docker ps -qf "name=postgres") psql -U <user>

# Backup database
docker exec $(docker ps -qf "name=postgres") \
  pg_dump -U <user> <database> > backup.sql

# Restore database
docker exec -i $(docker ps -qf "name=postgres") \
  psql -U <user> <database> < backup.sql

InfluxDB

# Access InfluxDB
http://100.1.100.201:8086

# Backup via API
curl -XPOST http://100.1.100.201:8086/api/v2/backup

# Query data
influx query 'from(bucket:"<bucket>") |> range(start: -1h)'

Troubleshooting

Service Not Starting

# Check service status (detailed)
docker service ps <service-name> --no-trunc

# View service logs
docker service logs <service-name> -f

# Inspect service configuration
docker service inspect <service-name>

# Force restart
docker service update --force <service-name>

# Remove and redeploy
docker service rm <service-name>
docker stack deploy -c <stack-file>.yml <stack-name> --with-registry-auth

Network Issues

# List networks
docker network ls

# Inspect network
docker network inspect <network-name>

# Test connectivity
docker run --rm --network <network-name> alpine ping <service-name>

# Check DNS resolution
docker run --rm --network <network-name> alpine nslookup <service-name>

Storage Issues

# Check disk space
df -h

# Check NFS server connectivity
ping 100.1.100.199

# Test NFS mount
showmount -e 100.1.100.199

# Remount NFS
sudo umount /nfs_data
sudo mount -a

Node Issues

# Check node status
docker node ls

# Inspect node
docker node inspect <node-name>

# View tasks on node
docker node ps <node-name>

# Check node resources
ssh <node-ip>
top
df -h
free -h

ARR Stack (Docker Compose on Node 202)

Management Commands

# SSH to node 202
ssh 100.1.100.202
cd ~/homenet-docker-services

# View status
docker-compose -f docker-compose.worker2.yml ps

# Start stack
docker-compose -f docker-compose.worker2.yml up -d --remove-orphans

# Stop stack
docker-compose -f docker-compose.worker2.yml down

# Restart service
docker-compose -f docker-compose.worker2.yml restart <service>

# View logs
docker-compose -f docker-compose.worker2.yml logs -f <service>

# Pull updates
docker-compose -f docker-compose.worker2.yml pull
docker-compose -f docker-compose.worker2.yml up -d

ARR Services

# Radarr logs
docker-compose -f docker-compose.worker2.yml logs -f radarr

# Sonarr logs
docker-compose -f docker-compose.worker2.yml logs -f sonarr

# Transmission logs
docker-compose -f docker-compose.worker2.yml logs -f transmission

# Check Wireguard VPN
docker-compose -f docker-compose.worker2.yml exec wireguard curl ifconfig.me

Traefik

Access & Management

# Traefik dashboard
http://100.1.100.201:8080/dashboard/

# View routers
curl http://100.1.100.201:8080/api/http/routers

# View services
curl http://100.1.100.201:8080/api/http/services

# Check certificates
docker service logs traefik_traefik | grep -i cert

# Force certificate renewal
docker service update --force traefik_traefik

Maintenance Procedures

Update Service Image

# Method 1: Update stack file and redeploy
# Edit stack-homenet4.yml, change image version
docker stack deploy -c stack-homenet4.yml homenet4 --with-registry-auth

# Method 2: Force update without changing file
docker service update --image <new-image>:<tag> <service-name>

# Method 3: Pull latest tag
docker service update --force --image <image>:latest <service-name>

Node Reboot

# 1. Drain node (services migrate to other nodes)
docker node update --availability drain <node-name>

# 2. Wait for services to migrate
watch docker node ps <node-name>

# 3. Reboot node
ssh <node-ip> sudo reboot

# 4. After reboot, reactivate node
docker node update --availability active <node-name>

Stack Update

# 1. Edit stack file
nano stack-<name>.yml

# 2. Validate YAML
docker stack config -c stack-<name>.yml

# 3. Deploy update
docker stack deploy -c stack-<name>.yml <stack-name> --with-registry-auth

# 4. Monitor rollout
watch docker stack ps <stack-name>

Emergency Procedures

Critical Service Down

# 1. Check service status
docker service ps <service> --no-trunc

# 2. Check logs
docker service logs <service> -f

# 3. Quick restart
docker service update --force <service>

# 4. If still failing, check node
docker node ls
docker node ps <node-with-service>

# 5. Check storage
df -h | grep nfs

# 6. Last resort: remove and redeploy
docker service rm <service>
docker stack deploy -c <stack-file>.yml <stack-name> --with-registry-auth

Cluster Manager Down

# If manager node (201) is down:
# 1. Check Proxmox console
https://100.1.100.10:8006

# 2. Start VM if stopped
# 3. Check NFS mounts after boot
ssh 100.1.100.201
df -h | grep nfs

# 4. Verify Swarm status
docker node ls

# 5. Restart critical services if needed
docker service update --force homenet1_mariadb
docker service update --force homenet1_influxdb

Storage Full

# 1. Immediate cleanup
./sh-delete-temp.sh

# 2. Find space hogs
du -h --max-depth=1 /nfs_data | sort -hr | head -20

# 3. Clean Docker
docker system prune -af --volumes

# 4. Archive old data
# Move old media/photos to external storage

# 5. Expand storage (OMV server)
# See [[05-Storage/Storage-Critical-Warning]]

Common Workflows

Adding a New Service

# 1. Choose appropriate stack file
# 2. Add service definition
nano stack-homenet4.yml

# 3. Create config directory
mkdir -p config/<service-name>

# 4. Add node label if needed
# Edit sh-label-nodes.sh and run it

# 5. Test deployment
docker stack deploy -c stack-homenet4.yml homenet4 --with-registry-auth

# 6. Verify logs
docker service logs -f homenet4_<service>

# 7. Sync to all nodes
./sh-sync.sh

Rotating Service Password

# 1. Generate new password
export BW_SESSION=$(bw unlock --raw)
./sh-generate-passwords.sh <service> <FIELD>

# 2. Update service environment
# Edit stack file with new ${SERVICE_FIELD}

# 3. Redeploy service
docker stack deploy -c stack-<service>.yml <service> --with-registry-auth

# 4. Verify service started
docker service ps <service>

Investigating Service Failure

# 1. Check service status
docker service ps <service> --no-trunc

# 2. View recent logs
docker service logs <service> --tail 100

# 3. Check node placement
docker service inspect <service> | grep -i constraint

# 4. Verify node labels
docker node inspect <node> --format '{{.Spec.Labels}}'

# 5. Check network connectivity
docker network inspect <network>

# 6. Verify storage
df -h | grep nfs

# 7. Check resource constraints
ssh <node-ip>
free -h
df -h

Useful One-Liners

# List all running services with ports
docker service ls --format "table {{.Name}}\t{{.Replicas}}\t{{.Ports}}"

# Count services by stack
docker service ls --format "{{.Name}}" | cut -d'_' -f1 | sort | uniq -c

# Find services on specific node
docker service ps $(docker service ls -q) --filter "node=homenet-ubuntu2"

# Show services with 0 replicas
docker service ls | grep "0/0"

# Restart all services in stack
for svc in $(docker stack services -q <stack>); do docker service update --force $svc; done

# Check all NFS mounts across cluster
for node in 201 202 203 204 205; do ssh 100.1.100.$node "df -h | grep nfs"; done

# View logs from all services with pattern
docker service ls -q | xargs -I {} sh -c 'docker service logs {} --tail 10'

  • [[01-Infrastructure/Cluster-Overview|Cluster Overview]]
  • [[02-Services/Service-Catalog|Service Catalog]]
  • [[03-Operations/Stack-Deployment|Stack Deployment]]
  • [[06-Troubleshooting/Known-Issues|Known Issues]]
  • [[06-Troubleshooting/Service-Restart-Runbook|Service Restart Runbook]]

Last Updated: 2026-01-11 Quick Tip: Press Ctrl+F in Obsidian to search this page for specific commands