public-ready-init

This commit is contained in:
Damien Coles 2026-01-26 00:44:31 -05:00
commit 0c77379ab3
51 changed files with 4079 additions and 0 deletions

42
.gitignore vendored Normal file
View File

@ -0,0 +1,42 @@
# Terraform
terraform/.terraform/
terraform/*.tfstate
terraform/*.tfstate.*
terraform/*.tfvars
!terraform/*.tfvars.example
terraform/crash.log
# Ansible
ansible/inventory.ini
!ansible/inventory.ini.example
ansible/services.yml
!ansible/services.yml.example
ansible/vault/secrets.yml
ansible/vault/*pass*
ansible/databases/
# Nebula
nebula/ca.key
nebula/**/*.key
*.key
*.pem
# Secrets
secrets/
.env
.env.*
token.txt
# Backups
backup/
# IDE
.idea/
.vscode/
*.swp
*.swo
*~
# OS
.DS_Store
Thumbs.db

146
README.md Normal file
View File

@ -0,0 +1,146 @@
# Arvandor
Production-grade infrastructure-as-code for running services on Proxmox with enterprise HA patterns.
## Overview
Arvandor provides a complete infrastructure stack:
- **Terraform** - VM provisioning on Proxmox
- **Ansible** - Configuration management
- **Nebula** - Encrypted overlay network
- **Vault** - Secrets management (3-node Raft cluster)
- **PostgreSQL** - Database (3-node Patroni + etcd)
- **Valkey** - Cache/queue (3-node Sentinel)
- **Garage** - S3-compatible storage (3-node cluster)
## Architecture
```
┌─────────────────────────────────────────────────────────────────────────┐
│ Proxmox Host │
├─────────────────────────────────────────────────────────────────────────┤
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ Management │ │ Services │ │ Data │ │ Workloads │ │
│ │ 1000-1999 │ │ 2000-2999 │ │ 3000-3999 │ │ 4000-4999 │ │
│ │ │ │ │ │ │ │ │ │
│ │ DNS, Caddy │ │ Vault │ │ PostgreSQL │ │ Your Apps │ │
│ │ Lighthouse │ │ Gitea │ │ Valkey │ │ │ │
│ │ │ │ │ │ Garage │ │ │ │
│ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ │
│ │ │ │ │ │
│ └────────────────┴────────────────┴────────────────┘ │
│ │ │
│ Nebula Overlay (10.10.10.0/24) │
└─────────────────────────────────────────────────────────────────────────┘
```
## Quick Start
### 1. Prerequisites
- Proxmox VE host
- Arch Linux VM template (VMID 9000)
- Terraform, Ansible installed locally
- Nebula binary for certificate generation
### 2. Configure
```bash
# Clone repository
git clone <repo-url> arvandor
cd arvandor
# Configure Terraform
cp terraform/terraform.tfvars.example terraform/terraform.tfvars
vim terraform/terraform.tfvars
# Configure Ansible
cp ansible/inventory.ini.example ansible/inventory.ini
vim ansible/inventory.ini
# Generate Nebula CA
cd nebula
nebula-cert ca -name "Arvandor CA"
```
### 3. Provision
```bash
# Create VMs
cd terraform
terraform init
terraform plan
terraform apply
# Bootstrap VMs (in order)
cd ../ansible
ansible-playbook -i inventory.ini playbooks/bootstrap.yml
ansible-playbook -i inventory.ini playbooks/security.yml
ansible-playbook -i inventory.ini playbooks/nebula.yml
```
### 4. Deploy Services
```bash
# DNS server
ansible-playbook -i inventory.ini playbooks/dns.yml
# PostgreSQL HA cluster
ansible-playbook -i inventory.ini playbooks/postgres-ha.yml
# Valkey Sentinel
ansible-playbook -i inventory.ini playbooks/valkey-sentinel.yml
# Garage S3
ansible-playbook -i inventory.ini playbooks/garage.yml
```
## Directory Structure
```
arvandor/
├── terraform/ # VM provisioning
│ ├── modules/vm/ # Reusable VM module
│ ├── management.tf # DNS, Caddy
│ ├── services.tf # Vault, Gitea
│ └── data.tf # PostgreSQL, Valkey, Garage
├── ansible/ # Configuration management
│ ├── playbooks/ # Core playbooks
│ ├── templates/ # Jinja2 templates
│ └── vault/ # Ansible Vault secrets
├── nebula/ # Overlay network
│ └── configs/ # Per-host certificates
├── network/ # Host networking
└── docs/ # Documentation
```
## Network Design
### Two-Network Model
| Network | CIDR | Purpose |
|---------|------|---------|
| Bridge (vmbr1) | 192.168.100.0/24 | Provisioning only |
| Nebula | 10.10.10.0/24 | All application traffic |
VMs only accept traffic from the Proxmox host (for Ansible) and the Nebula overlay. This provides isolation even if someone gains bridge network access.
### Security Groups (Nebula)
| Group | Purpose |
|-------|---------|
| `admin` | Full access (your devices) |
| `infrastructure` | Core services |
| `projects` | Application workloads |
| `games` | Isolated game servers |
## Documentation
- [Getting Started](docs/getting-started.md) - Detailed setup guide
- [Architecture](docs/architecture.md) - Design decisions
- [Provisioning Guide](docs/provisioning-guide.md) - Adding new VMs
## License
MIT

20
ansible/.gitignore vendored Normal file
View File

@ -0,0 +1,20 @@
# Inventory (contains IPs and hostnames)
inventory.ini
!inventory.ini.example
# Services (contains real service configs)
services.yml
!services.yml.example
# Ansible Vault secrets
vault/secrets.yml
vault/ansible_vault_pass
vault/*.pass
# Database dumps
databases/
# SSH keys
*.key
*.pem
id_*

View File

@ -0,0 +1,89 @@
# Arvandor Infrastructure Inventory
#
# Groups:
# infrastructure - Core services (Nebula group: infrastructure)
# projects - Application workloads (Nebula group: projects)
# games - Game servers (Nebula group: games)
# all - All managed VMs
#
# Variables per host:
# ansible_host - Bridge network IP (for SSH via Proxmox jump)
# nebula_ip - Overlay network IP (for inter-service communication)
# vmid - Proxmox VM ID (used for cert paths)
[infrastructure]
dns ansible_host=192.168.100.11 nebula_ip=10.10.10.11 vmid=1001
caddy ansible_host=192.168.100.12 nebula_ip=10.10.10.12 vmid=1002
vault-01 ansible_host=192.168.100.20 nebula_ip=10.10.10.20 vmid=2000
vault-02 ansible_host=192.168.100.21 nebula_ip=10.10.10.21 vmid=2001
vault-03 ansible_host=192.168.100.22 nebula_ip=10.10.10.22 vmid=2002
gitea ansible_host=192.168.100.23 nebula_ip=10.10.10.23 vmid=2003
postgres-01 ansible_host=192.168.100.30 nebula_ip=10.10.10.30 vmid=3000
postgres-02 ansible_host=192.168.100.31 nebula_ip=10.10.10.31 vmid=3001
postgres-03 ansible_host=192.168.100.32 nebula_ip=10.10.10.32 vmid=3002
valkey-01 ansible_host=192.168.100.33 nebula_ip=10.10.10.33 vmid=3003
valkey-02 ansible_host=192.168.100.34 nebula_ip=10.10.10.34 vmid=3004
valkey-03 ansible_host=192.168.100.35 nebula_ip=10.10.10.35 vmid=3005
garage-01 ansible_host=192.168.100.39 nebula_ip=10.10.10.39 vmid=3009
garage-02 ansible_host=192.168.100.40 nebula_ip=10.10.10.40 vmid=3010
garage-03 ansible_host=192.168.100.41 nebula_ip=10.10.10.41 vmid=3011
[projects]
app-server ansible_host=192.168.100.50 nebula_ip=10.10.10.50 vmid=4050
[games]
# Example game servers (firewall disabled, use host DNAT + guest ufw)
# minecraft ansible_host=192.168.100.52 nebula_ip=10.10.10.52 vmid=4052
[docker]
gitea
app-server
[vault]
vault-01
vault-02
vault-03
# PostgreSQL HA Cluster (Patroni + etcd)
[postgres]
postgres-01
postgres-02
postgres-03
# Valkey Sentinel (1 master + 2 replicas)
[valkey]
valkey-01
valkey-02
valkey-03
# Garage S3 Cluster
[garage]
garage-01
garage-02
garage-03
[all:children]
infrastructure
projects
games
[all:vars]
# SSH jumps through the Proxmox host to reach VMs on bridge network
ansible_user=admin
ansible_ssh_common_args='-o ProxyCommand="ssh -W %h:%p -q admin@10.10.10.1"'
ansible_ssh_extra_args='-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
ansible_python_interpreter=/usr/bin/python3
# Infrastructure context (update these for your environment)
lighthouse_nebula_ip=10.10.10.10
lighthouse_bridge_ip=192.168.100.10
lighthouse_public_ip=203.0.113.10
# Optional: secondary lighthouse for reduced latency
home_lighthouse_nebula_ip=10.10.10.102
home_lighthouse_public_ip=203.0.113.20
proxmox_host_ip=192.168.100.1
bridge_network=192.168.100.0/24
# Caddy reverse proxy (for iptables rules)
caddy_nebula_ip=10.10.10.12

View File

@ -0,0 +1,63 @@
---
# Bootstrap playbook for new VMs
#
# Run FIRST on newly provisioned VMs before security/nebula playbooks.
# Updates system packages and reboots if kernel changed.
#
# Usage: ansible-playbook -i inventory.ini playbooks/bootstrap.yml --limit "new-vm"
- name: Bootstrap New VMs
hosts: all
become: true
tasks:
- name: Initialize pacman keyring
command: pacman-key --init
args:
creates: /etc/pacman.d/gnupg/trustdb.gpg
- name: Populate pacman keyring with Arch Linux keys
command: pacman-key --populate archlinux
register: populate_result
changed_when: "'locally signed' in populate_result.stdout"
- name: Update archlinux-keyring package first
community.general.pacman:
name: archlinux-keyring
state: latest
update_cache: true
- name: Get current running kernel version
command: uname -r
register: running_kernel
changed_when: false
- name: Update all packages
community.general.pacman:
update_cache: true
upgrade: true
register: update_result
- name: Install essential packages
community.general.pacman:
name:
- rsync
state: present
- name: Get installed kernel version
shell: pacman -Q linux | awk '{print $2}' | sed 's/\.arch/-arch/'
register: installed_kernel
changed_when: false
- name: Check if reboot is needed (kernel updated)
set_fact:
reboot_needed: "{{ running_kernel.stdout not in installed_kernel.stdout }}"
- name: Display kernel status
debug:
msg: "Running: {{ running_kernel.stdout }}, Installed: {{ installed_kernel.stdout }}, Reboot needed: {{ reboot_needed }}"
- name: Reboot if kernel was updated
reboot:
msg: "Kernel updated, rebooting"
reboot_timeout: 300
when: reboot_needed | bool

View File

@ -0,0 +1,337 @@
---
# Data Service Provisioning Playbook
#
# Provisions PostgreSQL database, Valkey ACL user, Garage S3 bucket/key,
# and Vault credentials for a service defined in services.yml.
#
# Usage:
# ansible-playbook -i inventory.ini playbooks/data-service.yml -e "service=myapp"
#
# With database restore:
# ansible-playbook -i inventory.ini playbooks/data-service.yml -e "service=myapp" -e "restore=true"
#
# Prerequisites:
# - postgres-primary running (run playbooks/postgres.yml first)
# - valkey-primary running with ACLs (run playbooks/valkey.yml first)
# - Vault cluster initialized and unsealed (run playbooks/vault.yml first)
# - Database secrets engine enabled: vault secrets enable database
# - VAULT_ADDR and VAULT_TOKEN environment variables set
- name: Load Service Configuration
hosts: localhost
gather_facts: false
vars_files:
- ../services.yml
tasks:
- name: Validate service parameter
fail:
msg: "Service '{{ service }}' not found in services.yml"
when: service not in services
- name: Set service facts
set_fact:
svc: "{{ services[service] }}"
postgres_enabled: "{{ services[service].postgres.enabled | default(false) }}"
valkey_enabled: "{{ services[service].valkey.enabled | default(false) }}"
s3_enabled: "{{ services[service].s3.enabled | default(false) }}"
vault_roles: "{{ services[service].vault_roles | default(['app', 'migrate']) }}"
- name: Display service info
debug:
msg: |
Service: {{ service }}
Description: {{ svc.description }}
PostgreSQL: {{ postgres_enabled }}
Valkey: {{ valkey_enabled }} (prefix: {{ svc.valkey.key_prefix | default(service) }}:*)
S3: {{ s3_enabled }} (bucket: {{ svc.s3.bucket | default(service + '-media') }})
Vault roles: {{ vault_roles | join(', ') }}
- name: Setup PostgreSQL Database and Roles
hosts: postgres-01
become: true
vars_files:
- ../vault/secrets.yml
- ../services.yml
vars:
svc: "{{ services[service] }}"
tasks:
- name: Skip if PostgreSQL not enabled
meta: end_host
when: not (svc.postgres.enabled | default(false))
- name: Check if database exists
become_user: postgres
shell: psql -tAc "SELECT 1 FROM pg_database WHERE datname='{{ service }}'"
register: db_exists
changed_when: false
- name: Template static roles SQL
template:
src: ../templates/pg-static-roles.sql.j2
dest: "/tmp/{{ service }}-roles.sql"
mode: '0644'
when: db_exists.stdout != "1"
- name: Create database and static roles
become_user: postgres
shell: psql -f /tmp/{{ service }}-roles.sql
when: db_exists.stdout != "1"
- name: Create common extensions (requires superuser)
become_user: postgres
shell: |
psql -d {{ service }} -c "CREATE EXTENSION IF NOT EXISTS btree_gist;"
psql -d {{ service }} -c 'CREATE EXTENSION IF NOT EXISTS "uuid-ossp";'
when: db_exists.stdout != "1"
- name: Clean up SQL file
file:
path: "/tmp/{{ service }}-roles.sql"
state: absent
- name: Check for dump file
delegate_to: localhost
become: false
stat:
path: "{{ playbook_dir }}/../{{ svc.postgres.restore_from }}"
register: dump_file
when: restore | default(false) | bool
- name: Copy dump to server
copy:
src: "{{ playbook_dir }}/../{{ svc.postgres.restore_from }}"
dest: "/tmp/{{ service }}.dump"
mode: '0644'
when:
- restore | default(false) | bool
- dump_file.stat.exists | default(false)
- name: Restore database from dump
become_user: postgres
shell: pg_restore --no-owner --no-privileges -d {{ service }} /tmp/{{ service }}.dump
when:
- restore | default(false) | bool
- dump_file.stat.exists | default(false)
ignore_errors: true # May fail if data already exists
- name: Clean up dump file
file:
path: "/tmp/{{ service }}.dump"
state: absent
when: restore | default(false) | bool
- name: Setup Valkey ACL User
hosts: valkey-01
become: true
vars_files:
- ../vault/secrets.yml
- ../services.yml
vars:
svc: "{{ services[service] }}"
valkey_nebula_ip: "{{ hostvars['valkey-01']['nebula_ip'] }}"
tasks:
- name: Skip if Valkey not enabled
meta: end_host
when: not (svc.valkey.enabled | default(false))
- name: Generate service password
set_fact:
valkey_service_password: "{{ lookup('password', '/dev/null length=32 chars=hexdigits') }}"
- name: Check if ACL user exists
command: valkey-cli -h {{ valkey_nebula_ip }} --user admin --pass {{ valkey_admin_password }} ACL GETUSER {{ service }}
register: acl_user_check
changed_when: false
failed_when: false
no_log: true
- name: Create ACL user for service
shell: |
valkey-cli -h {{ valkey_nebula_ip }} --user admin --pass {{ valkey_admin_password }} \
ACL SETUSER {{ service }} on '>{{ valkey_service_password }}' '~{{ svc.valkey.key_prefix | default(service) }}:*' '&*' '+@all'
when: acl_user_check.rc != 0
no_log: true
- name: Update ACL user password if exists
shell: |
valkey-cli -h {{ valkey_nebula_ip }} --user admin --pass {{ valkey_admin_password }} \
ACL SETUSER {{ service }} on '>{{ valkey_service_password }}' '~{{ svc.valkey.key_prefix | default(service) }}:*' '&*' '+@all'
when: acl_user_check.rc == 0
no_log: true
- name: Persist ACL to disk
command: valkey-cli -h {{ valkey_nebula_ip }} --user admin --pass {{ valkey_admin_password }} ACL SAVE
no_log: true
- name: Store credentials in Vault
delegate_to: localhost
become: false
shell: |
vault kv put secret/{{ service }}/valkey \
host={{ valkey_nebula_ip }} \
port=6379 \
username={{ service }} \
password={{ valkey_service_password }} \
key_prefix={{ svc.valkey.key_prefix | default(service) }}
environment:
VAULT_ADDR: "{{ lookup('env', 'VAULT_ADDR') | default('http://' + hostvars['vault-01']['nebula_ip'] + ':8200', true) }}"
VAULT_TOKEN: "{{ lookup('env', 'VAULT_TOKEN') }}"
no_log: true
- name: Setup Garage S3 Bucket and Key
hosts: garage-01
become: true
vars_files:
- ../services.yml
vars:
svc: "{{ services[service] }}"
garage_nebula_ip: "{{ hostvars['garage-01']['nebula_ip'] }}"
tasks:
- name: Skip if S3 not enabled
meta: end_host
when: not (svc.s3.enabled | default(false))
- name: Set bucket name
set_fact:
bucket_name: "{{ svc.s3.bucket | default(service + '-media') }}"
- name: Check if bucket exists
command: garage -c /etc/garage/garage.toml bucket list
register: bucket_list
changed_when: false
- name: Create bucket if needed
command: garage -c /etc/garage/garage.toml bucket create {{ bucket_name }}
when: bucket_name not in bucket_list.stdout
- name: Check if key exists
command: garage -c /etc/garage/garage.toml key list
register: key_list
changed_when: false
- name: Create API key for service
command: garage -c /etc/garage/garage.toml key create {{ service }}-key
register: key_create
when: (service + '-key') not in key_list.stdout
- name: Get key info
command: garage -c /etc/garage/garage.toml key info {{ service }}-key --show-secret
register: key_info
changed_when: false
no_log: true
- name: Parse key credentials
set_fact:
s3_access_key: "{{ key_info.stdout | regex_search('Key ID: ([A-Za-z0-9]+)', '\\1') | first }}"
s3_secret_key: "{{ key_info.stdout | regex_search('Secret key: ([a-f0-9]+)', '\\1') | first }}"
no_log: true
- name: Grant bucket permissions to key
command: >
garage -c /etc/garage/garage.toml bucket allow {{ bucket_name }}
--read --write --key {{ service }}-key
register: bucket_allow
changed_when: "'already' not in bucket_allow.stderr"
- name: Store S3 credentials in Vault
delegate_to: localhost
become: false
shell: |
vault kv put secret/{{ service }}/s3 \
access_key={{ s3_access_key }} \
secret_key={{ s3_secret_key }} \
bucket={{ bucket_name }} \
endpoint=http://{{ garage_nebula_ip }}:3900
environment:
VAULT_ADDR: "{{ lookup('env', 'VAULT_ADDR') | default('http://' + hostvars['vault-01']['nebula_ip'] + ':8200', true) }}"
VAULT_TOKEN: "{{ lookup('env', 'VAULT_TOKEN') }}"
no_log: true
- name: Configure Vault Database Credentials
hosts: localhost
gather_facts: false
vars_files:
- ../vault/secrets.yml
- ../services.yml
vars:
svc: "{{ services[service] }}"
postgres_nebula_ip: "{{ hostvars['postgres-01']['nebula_ip'] }}"
vault_nebula_ip: "{{ hostvars['vault-01']['nebula_ip'] }}"
environment:
VAULT_ADDR: "{{ vault_addr | default('http://' + vault_nebula_ip + ':8200') }}"
tasks:
- name: Skip if PostgreSQL not enabled
meta: end_play
when: not (svc.postgres.enabled | default(false))
- name: Check if VAULT_TOKEN is set
fail:
msg: "VAULT_TOKEN environment variable must be set"
when: lookup('env', 'VAULT_TOKEN') == ''
- name: Configure Vault database connection
shell: |
vault write database/config/{{ service }} \
plugin_name="postgresql-database-plugin" \
allowed_roles="{{ service }}-app,{{ service }}-migrate" \
connection_url="postgresql://{% raw %}{{username}}:{{password}}{% endraw %}@{{ postgres_nebula_ip }}:5432/{{ service }}" \
username="vault_admin" \
password="{{ vault_admin_password }}"
register: vault_config
changed_when: vault_config.rc == 0
- name: Create Vault app role
shell: |
vault write database/roles/{{ service }}-app \
db_name="{{ service }}" \
creation_statements="CREATE ROLE \"{% raw %}{{name}}{% endraw %}\" WITH LOGIN PASSWORD '{% raw %}{{password}}{% endraw %}' VALID UNTIL '{% raw %}{{expiration}}{% endraw %}' INHERIT; GRANT {{ service }}_app TO \"{% raw %}{{name}}{% endraw %}\"; ALTER ROLE \"{% raw %}{{name}}{% endraw %}\" SET ROLE = {{ service }}_app;" \
revocation_statements="REASSIGN OWNED BY \"{% raw %}{{name}}{% endraw %}\" TO {{ service }}_owner; REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA public FROM \"{% raw %}{{name}}{% endraw %}\"; REVOKE ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public FROM \"{% raw %}{{name}}{% endraw %}\"; REVOKE USAGE ON SCHEMA public FROM \"{% raw %}{{name}}{% endraw %}\"; REVOKE CONNECT ON DATABASE {{ service }} FROM \"{% raw %}{{name}}{% endraw %}\"; DROP ROLE IF EXISTS \"{% raw %}{{name}}{% endraw %}\";" \
default_ttl="1h" \
max_ttl="24h"
when: "'app' in (svc.vault_roles | default(['app', 'migrate']))"
- name: Create Vault migrate role
shell: |
vault write database/roles/{{ service }}-migrate \
db_name="{{ service }}" \
creation_statements="CREATE ROLE \"{% raw %}{{name}}{% endraw %}\" WITH LOGIN PASSWORD '{% raw %}{{password}}{% endraw %}' VALID UNTIL '{% raw %}{{expiration}}{% endraw %}' INHERIT; GRANT {{ service }}_migrate TO \"{% raw %}{{name}}{% endraw %}\"; ALTER ROLE \"{% raw %}{{name}}{% endraw %}\" SET ROLE = {{ service }}_migrate;" \
revocation_statements="REASSIGN OWNED BY \"{% raw %}{{name}}{% endraw %}\" TO {{ service }}_owner; REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA public FROM \"{% raw %}{{name}}{% endraw %}\"; REVOKE ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public FROM \"{% raw %}{{name}}{% endraw %}\"; REVOKE ALL PRIVILEGES ON ALL FUNCTIONS IN SCHEMA public FROM \"{% raw %}{{name}}{% endraw %}\"; REVOKE ALL PRIVILEGES ON SCHEMA public FROM \"{% raw %}{{name}}{% endraw %}\"; REVOKE CONNECT ON DATABASE {{ service }} FROM \"{% raw %}{{name}}{% endraw %}\"; DROP ROLE IF EXISTS \"{% raw %}{{name}}{% endraw %}\";" \
default_ttl="15m" \
max_ttl="1h"
when: "'migrate' in (svc.vault_roles | default(['app', 'migrate']))"
- name: Display Service Summary
hosts: localhost
gather_facts: false
vars_files:
- ../services.yml
vars:
svc: "{{ services[service] }}"
postgres_ip: "{{ hostvars['postgres-01']['nebula_ip'] }}"
valkey_ip: "{{ hostvars['valkey-01']['nebula_ip'] }}"
garage_ip: "{{ hostvars['garage-01']['nebula_ip'] }}"
tasks:
- name: Service provisioning complete
debug:
msg:
- "=========================================="
- "Service: {{ service }}"
- "Description: {{ svc.description }}"
- "=========================================="
- ""
- "PostgreSQL:"
- " Database: {{ service }} @ {{ postgres_ip }}:5432"
- " App credentials: vault read database/creds/{{ service }}-app"
- " Migrate credentials: vault read database/creds/{{ service }}-migrate"
- ""
- "Valkey:"
- " Host: {{ valkey_ip }}:6379"
- " User: {{ service }}"
- " Key prefix: {{ svc.valkey.key_prefix | default(service) }}:*"
- " Credentials: vault kv get secret/{{ service }}/valkey"
- ""
- "S3:"
- " Bucket: {{ svc.s3.bucket | default(service + '-media') }} @ http://{{ garage_ip }}:3900"
- " Credentials: vault kv get secret/{{ service }}/s3"
- ""
- "=========================================="

View File

@ -0,0 +1,35 @@
---
# DNS Client Configuration Playbook
#
# Usage: ansible-playbook -i inventory.ini playbooks/dns-client.yml
#
# Configures all VMs to use the internal Unbound DNS server.
# Run AFTER dns.yml has configured the server.
- name: Configure DNS Clients
hosts: all
become: true
vars:
dns_server: "{{ hostvars['dns']['nebula_ip'] }}"
tasks:
- name: Configure resolv.conf to use internal DNS
copy:
dest: /etc/resolv.conf
content: |
# Managed by Ansible - changes will be overwritten
# Internal DNS server on Nebula overlay
nameserver {{ dns_server }}
# Fallback to public DNS if internal is unreachable
nameserver 1.1.1.1
nameserver 8.8.8.8
# Search domain for short hostnames
search nebula
owner: root
group: root
mode: '0644'
- name: Test DNS resolution
command: getent hosts lighthouse.nebula
register: dns_test
changed_when: false
failed_when: dns_test.rc != 0

77
ansible/playbooks/dns.yml Normal file
View File

@ -0,0 +1,77 @@
---
# Unbound DNS Server Playbook
#
# Usage: ansible-playbook -i inventory.ini playbooks/dns.yml
#
# Configures Unbound as a recursive resolver with local DNS records
# for the Nebula overlay network.
- name: Setup Unbound DNS Server
hosts: dns
become: true
tasks:
- name: Install unbound and bind-tools
community.general.pacman:
name:
- unbound
- bind # provides dig for verification
state: present
- name: Create unbound config directory
file:
path: /etc/unbound
state: directory
owner: root
group: root
mode: '0755'
- name: Deploy main unbound configuration
template:
src: ../templates/unbound.conf.j2
dest: /etc/unbound/unbound.conf
owner: root
group: root
mode: '0644'
notify: Restart unbound
- name: Deploy local zones configuration
template:
src: ../templates/unbound-local-zones.conf.j2
dest: /etc/unbound/local-zones.conf
owner: root
group: root
mode: '0644'
notify: Restart unbound
- name: Deploy unbound systemd service
template:
src: ../templates/unbound.service.j2
dest: /etc/systemd/system/unbound.service
owner: root
group: root
mode: '0644'
notify:
- Reload systemd
- Restart unbound
- name: Enable and start unbound
systemd:
name: unbound
state: started
enabled: true
- name: Verify unbound is responding
command: dig @127.0.0.1 dns.nebula +short
register: dns_test
changed_when: false
failed_when: dns_test.stdout != hostvars['dns']['nebula_ip']
handlers:
- name: Reload systemd
systemd:
daemon_reload: true
- name: Restart unbound
systemd:
name: unbound
state: restarted

View File

@ -0,0 +1,50 @@
---
# Docker playbook for VMs that need containerization
#
# Usage: ansible-playbook -i inventory.ini playbooks/docker.yml --limit "docker"
- name: Install and Configure Docker
hosts: docker
become: true
tasks:
- name: Install Docker and Docker Compose
community.general.pacman:
name:
- docker
- docker-compose
- docker-buildx
state: present
- name: Create Docker daemon configuration directory
file:
path: /etc/docker
state: directory
mode: '0755'
- name: Configure Docker daemon with DNS
copy:
content: |
{
"dns": ["1.1.1.1", "8.8.8.8"]
}
dest: /etc/docker/daemon.json
mode: '0644'
notify: Restart Docker
- name: Enable and start Docker service
systemd:
name: docker
enabled: true
state: started
- name: Add user to docker group
user:
name: "{{ ansible_user }}"
groups: docker
append: true
handlers:
- name: Restart Docker
systemd:
name: docker
state: restarted

View File

@ -0,0 +1,187 @@
---
# Garage S3 Cluster Setup (3 nodes, replication factor 3)
#
# Usage:
# # Full deployment:
# ansible-playbook -i inventory.ini playbooks/garage.yml
#
# # Just install/configure (no layout):
# ansible-playbook -i inventory.ini playbooks/garage.yml --tags install
#
# # Just configure layout (after install):
# ansible-playbook -i inventory.ini playbooks/garage.yml --tags layout
- name: Install and Configure Garage on All Nodes
hosts: garage
become: true
tags: [install]
vars_files:
- ../vault/secrets.yml
tasks:
- name: Download Garage binary
get_url:
url: "https://garagehq.deuxfleurs.fr/_releases/v1.0.1/x86_64-unknown-linux-musl/garage"
dest: /usr/local/bin/garage
mode: '0755'
- name: Create garage user
user:
name: garage
system: true
shell: /sbin/nologin
home: /var/lib/garage
create_home: false
- name: Create garage directories
file:
path: "{{ item }}"
state: directory
owner: garage
group: garage
mode: '0750'
loop:
- /var/lib/garage
- /var/lib/garage/meta
- /var/lib/garage/data
- /etc/garage
- name: Deploy garage configuration
template:
src: ../templates/garage.toml.j2
dest: /etc/garage/garage.toml
owner: garage
group: garage
mode: '0600'
notify: restart garage
- name: Deploy garage systemd service
copy:
dest: /etc/systemd/system/garage.service
content: |
[Unit]
Description=Garage S3-compatible object storage
Documentation=https://garagehq.deuxfleurs.fr/
After=network.target nebula.service
Wants=network-online.target
[Service]
Type=simple
User=garage
Group=garage
ExecStart=/usr/local/bin/garage -c /etc/garage/garage.toml server
Restart=always
RestartSec=5
[Install]
WantedBy=multi-user.target
mode: '0644'
notify:
- reload systemd
- restart garage
- name: Flush handlers to apply config before starting
meta: flush_handlers
- name: Start and enable garage
systemd:
name: garage
state: started
enabled: true
daemon_reload: true
- name: Wait for Garage RPC to be ready
wait_for:
host: "{{ nebula_ip }}"
port: 3901
timeout: 30
- name: Get node ID
command: garage -c /etc/garage/garage.toml node id -q
register: node_id
changed_when: false
- name: Display node ID
debug:
msg: "Node {{ inventory_hostname }}: {{ node_id.stdout }}"
handlers:
- name: reload systemd
systemd:
daemon_reload: true
- name: restart garage
systemd:
name: garage
state: restarted
- name: Configure Garage Cluster Layout
hosts: garage-01
become: true
tags: [layout]
vars_files:
- ../vault/secrets.yml
tasks:
- name: Wait for all nodes to connect
pause:
seconds: 10
- name: Check cluster status
command: garage -c /etc/garage/garage.toml status
register: cluster_status
changed_when: false
- name: Display cluster status
debug:
msg: "{{ cluster_status.stdout_lines }}"
- name: Get current layout
command: garage -c /etc/garage/garage.toml layout show
register: layout_show
changed_when: false
- name: Check if layout needs configuration
set_fact:
layout_needs_config: "{{ 'no role' in layout_show.stdout }}"
- name: Get node IDs for layout
command: garage -c /etc/garage/garage.toml status
register: status_output
changed_when: false
when: layout_needs_config
- name: Parse node IDs
set_fact:
node_ids: "{{ status_output.stdout | regex_findall('([a-f0-9]{16})\\s+' + item + '\\s') }}"
loop:
- "{{ hostvars['garage-01']['nebula_ip'] }}"
- "{{ hostvars['garage-02']['nebula_ip'] }}"
- "{{ hostvars['garage-03']['nebula_ip'] }}"
register: parsed_nodes
when: layout_needs_config
- name: Assign layout to nodes
command: >
garage -c /etc/garage/garage.toml layout assign
-z dc1 -c 200GB -t {{ item.item | regex_replace('10\\.10\\.10\\.(\\d+)', 'garage-\\1') | regex_replace('garage-39', 'garage-01') | regex_replace('garage-40', 'garage-02') | regex_replace('garage-41', 'garage-03') }}
{{ item.ansible_facts.node_ids[0] }}
loop: "{{ parsed_nodes.results }}"
when: layout_needs_config and item.ansible_facts.node_ids is defined and item.ansible_facts.node_ids | length > 0
- name: Apply layout
command: garage -c /etc/garage/garage.toml layout apply --version 1
when: layout_needs_config
register: layout_apply
- name: Display layout result
debug:
var: layout_apply.stdout_lines
when: layout_apply is changed
- name: Show final layout
command: garage -c /etc/garage/garage.toml layout show
register: final_layout
changed_when: false
- name: Display final layout
debug:
msg: "{{ final_layout.stdout_lines }}"

View File

@ -0,0 +1,43 @@
---
- name: Configure Nebula Overlay Network
hosts: all
become: true
tasks:
- name: Install the Nebula network overlay
community.general.pacman:
name: nebula
state: present
- name: Make sure configuration directories exists
file:
path: /etc/nebula
state: directory
mode: '0755'
- name: Copy over the Nebula CA certificate
copy:
src: ../../nebula/ca.crt
dest: /etc/nebula/ca.crt
mode: '0644'
- name: Copy over certificates and keys for the nodes
copy:
src: "../../nebula/configs/{{ vmid }}/{{ inventory_hostname }}/{{ inventory_hostname }}.{{ item }}"
dest: "/etc/nebula/config.{{ item }}"
mode: '0600'
loop:
- crt
- key
- name: Create new node configurations
template:
src: ../templates/nebula-config.yml.j2
dest: /etc/nebula/config.yml
notify: restart nebula
handlers:
- name: restart nebula
systemd:
name: nebula
state: restarted
enabled: true

View File

@ -0,0 +1,277 @@
---
# PostgreSQL High Availability with Patroni + etcd
# Run on postgres group hosts
#
# Usage:
# # Initialize first node (with existing data):
# ansible-playbook -i inventory.ini playbooks/postgres-ha.yml --limit postgres-01 -e "patroni_bootstrap=true"
#
# # Join additional nodes:
# ansible-playbook -i inventory.ini playbooks/postgres-ha.yml --limit postgres-02
#
# # All nodes at once (after bootstrap):
# ansible-playbook -i inventory.ini playbooks/postgres-ha.yml --limit postgres
- name: Configure PostgreSQL HA with Patroni + etcd
hosts: postgres
become: true
vars:
patroni_superuser_password: "{{ lookup('env', 'PATRONI_SUPERUSER_PASSWORD') | default('changeme', true) }}"
patroni_replicator_password: "{{ lookup('env', 'PATRONI_REPLICATOR_PASSWORD') | default('changeme', true) }}"
patroni_bootstrap: false
etcd_version: "3.5.17"
tasks:
# ============================================
# ETCD SETUP
# ============================================
- name: Check if etcd is installed
stat:
path: /usr/local/bin/etcd
register: etcd_binary
- name: Download etcd
get_url:
url: "https://github.com/etcd-io/etcd/releases/download/v{{ etcd_version }}/etcd-v{{ etcd_version }}-linux-amd64.tar.gz"
dest: /tmp/etcd.tar.gz
mode: '0644'
when: not etcd_binary.stat.exists
- name: Extract etcd
unarchive:
src: /tmp/etcd.tar.gz
dest: /tmp
remote_src: true
when: not etcd_binary.stat.exists
- name: Install etcd binaries
copy:
src: "/tmp/etcd-v{{ etcd_version }}-linux-amd64/{{ item }}"
dest: "/usr/local/bin/{{ item }}"
mode: '0755'
remote_src: true
loop:
- etcd
- etcdctl
- etcdutl
when: not etcd_binary.stat.exists
- name: Create symlinks for etcd binaries
file:
src: "/usr/local/bin/{{ item }}"
dest: "/usr/bin/{{ item }}"
state: link
loop:
- etcd
- etcdctl
- etcdutl
- name: Create etcd user
user:
name: etcd
system: true
shell: /sbin/nologin
home: /var/lib/etcd
create_home: true
- name: Create etcd config directory
file:
path: /etc/etcd
state: directory
mode: '0755'
- name: Create etcd data directory
file:
path: /var/lib/etcd
state: directory
owner: etcd
group: etcd
mode: '0700'
- name: Deploy etcd configuration
template:
src: ../templates/etcd.conf.j2
dest: /etc/etcd/etcd.conf
mode: '0644'
notify: restart etcd
- name: Deploy etcd systemd service
template:
src: ../templates/etcd.service.j2
dest: /etc/systemd/system/etcd.service
mode: '0644'
notify:
- reload systemd
- restart etcd
- name: Enable and start etcd
systemd:
name: etcd
state: started
enabled: true
daemon_reload: true
- name: Wait for etcd to be healthy
command: etcdctl endpoint health --endpoints=http://127.0.0.1:2379
register: etcd_health
until: etcd_health.rc == 0
retries: 30
delay: 2
changed_when: false
# ============================================
# POSTGRESQL SETUP
# ============================================
- name: Install PostgreSQL
community.general.pacman:
name: postgresql
state: present
# ============================================
# PATRONI SETUP
# ============================================
- name: Install Patroni dependencies
community.general.pacman:
name:
- python
- python-pip
- python-psycopg2
- python-yaml
- python-urllib3
- python-certifi
- python-virtualenv
state: present
- name: Create Patroni virtual environment
command: python -m venv /opt/patroni
args:
creates: /opt/patroni/bin/python
- name: Install Patroni in virtual environment
pip:
name:
- patroni[etcd3]
- psycopg2-binary
state: present
virtualenv: /opt/patroni
- name: Create PostgreSQL run directory
file:
path: /run/postgresql
state: directory
owner: postgres
group: postgres
mode: '0755'
- name: Create tmpfiles config for postgresql run directory
copy:
content: "d /run/postgresql 0755 postgres postgres -"
dest: /etc/tmpfiles.d/postgresql.conf
mode: '0644'
- name: Create patroni symlink
file:
src: /opt/patroni/bin/patroni
dest: /usr/local/bin/patroni
state: link
- name: Create patroni config directory
file:
path: /etc/patroni
state: directory
mode: '0755'
- name: Stop PostgreSQL service (Patroni will manage it)
systemd:
name: postgresql
state: stopped
enabled: false
ignore_errors: true
# For bootstrap node with existing data
- name: Prepare existing data directory for Patroni takeover
block:
- name: Ensure postgres owns data directory
file:
path: /var/lib/postgres/data
owner: postgres
group: postgres
recurse: true
- name: Create replicator role
become_user: postgres
command: >
psql -c "DO $$
BEGIN
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'replicator') THEN
CREATE ROLE replicator WITH REPLICATION LOGIN PASSWORD '{{ patroni_replicator_password }}';
END IF;
END $$;"
when: patroni_bootstrap | bool
ignore_errors: true
- name: Set postgres superuser password
become_user: postgres
command: psql -c "ALTER USER postgres WITH PASSWORD '{{ patroni_superuser_password }}';"
when: patroni_bootstrap | bool
ignore_errors: true
when: patroni_bootstrap | bool
- name: Deploy Patroni configuration
template:
src: ../templates/patroni.yml.j2
dest: /etc/patroni/patroni.yml
owner: postgres
group: postgres
mode: '0600'
notify: restart patroni
- name: Create .pgpass file for postgres user
copy:
content: |
*:*:*:postgres:{{ patroni_superuser_password }}
*:*:*:replicator:{{ patroni_replicator_password }}
dest: /var/lib/postgres/.pgpass
owner: postgres
group: postgres
mode: '0600'
- name: Deploy Patroni systemd service
template:
src: ../templates/patroni.service.j2
dest: /etc/systemd/system/patroni.service
mode: '0644'
notify:
- reload systemd
- restart patroni
- name: Enable and start Patroni
systemd:
name: patroni
state: started
enabled: true
daemon_reload: true
- name: Wait for Patroni to be healthy
uri:
url: "http://{{ nebula_ip }}:8008/health"
status_code: 200
register: patroni_health
until: patroni_health.status == 200
retries: 30
delay: 5
handlers:
- name: reload systemd
systemd:
daemon_reload: true
- name: restart etcd
systemd:
name: etcd
state: restarted
- name: restart patroni
systemd:
name: patroni
state: restarted

View File

@ -0,0 +1,103 @@
---
# Security playbook: iptables + fail2ban for all VMs
#
# Run: ansible-playbook -i inventory.ini playbooks/security.yml
#
# This playbook uses direct iptables rules instead of ufw to ensure
# bridge network traffic is properly blocked (ufw's before.rules allows
# ICMP before custom deny rules can take effect).
- name: Configure Security for All VMs
hosts: all
become: true
tasks:
# Load netfilter kernel modules (required on fresh VMs)
- name: Load netfilter kernel modules
community.general.modprobe:
name: "{{ item }}"
state: present
loop:
- ip_tables
- ip6_tables
- iptable_filter
- ip6table_filter
# Install security packages
- name: Install iptables and fail2ban
community.general.pacman:
name:
- iptables
- fail2ban
state: present
# Stop and disable ufw if present (migrating to iptables)
- name: Check if ufw is installed
command: pacman -Q ufw
register: ufw_check
ignore_errors: true
changed_when: false
- name: Stop ufw if running
systemd:
name: ufw
state: stopped
when: ufw_check.rc == 0
ignore_errors: true
- name: Disable ufw
systemd:
name: ufw
enabled: false
when: ufw_check.rc == 0
ignore_errors: true
# Deploy iptables rules
- name: Deploy iptables rules
template:
src: ../templates/iptables.rules.j2
dest: /etc/iptables/iptables.rules
mode: '0644'
notify: reload iptables
# Enable and start iptables service
- name: Enable and start iptables
systemd:
name: iptables
state: started
enabled: true
# Configure fail2ban
- name: Create fail2ban local config
copy:
dest: /etc/fail2ban/jail.local
content: |
[DEFAULT]
bantime = 1h
findtime = 10m
maxretry = 5
[sshd]
enabled = true
port = ssh
filter = sshd
backend = systemd
mode: '0644'
notify: restart fail2ban
# Enable fail2ban service
- name: Enable and start fail2ban
systemd:
name: fail2ban
state: started
enabled: true
handlers:
- name: reload iptables
systemd:
name: iptables
state: restarted
- name: restart fail2ban
systemd:
name: fail2ban
state: restarted

View File

@ -0,0 +1,155 @@
---
# Valkey Sentinel Setup (1 master + 2 replicas + Sentinel on each)
#
# Provides automatic failover without requiring cluster-aware clients.
# Apps connect directly to master or use Sentinel-aware clients.
#
# Usage:
# ansible-playbook -i inventory.ini playbooks/valkey-sentinel.yml
- name: Configure Valkey with Sentinel
hosts: valkey
become: true
vars_files:
- ../vault/secrets.yml
vars:
valkey_maxmemory: "256mb"
valkey_maxmemory_policy: "allkeys-lru"
valkey_role: "{{ 'master' if inventory_hostname == 'valkey-01' else 'replica' }}"
tasks:
- name: Stop valkey service
systemd:
name: valkey
state: stopped
ignore_errors: true
- name: Remove cluster data files
file:
path: "{{ item }}"
state: absent
loop:
- /var/lib/valkey/nodes.conf
- /var/lib/valkey/dump.rdb
- name: Deploy standalone Valkey configuration
template:
src: ../templates/valkey-standalone.conf.j2
dest: /etc/valkey/valkey.conf
owner: valkey
group: valkey
mode: '0640'
- name: Deploy ACL file
template:
src: ../templates/valkey-acl.j2
dest: /etc/valkey/users.acl
owner: valkey
group: valkey
mode: '0600'
- name: Create Sentinel data directory
file:
path: /var/lib/valkey/sentinel
state: directory
owner: valkey
group: valkey
mode: '0750'
- name: Deploy Sentinel configuration
template:
src: ../templates/valkey-sentinel.conf.j2
dest: /etc/valkey/sentinel.conf
owner: valkey
group: valkey
mode: '0640'
- name: Deploy Sentinel systemd service
copy:
dest: /etc/systemd/system/valkey-sentinel.service
content: |
[Unit]
Description=Valkey Sentinel
Documentation=https://valkey.io/
After=network.target valkey.service nebula.service
Wants=network-online.target
[Service]
Type=simple
User=valkey
Group=valkey
ExecStart=/usr/bin/valkey-sentinel /etc/valkey/sentinel.conf
Restart=always
RestartSec=5
[Install]
WantedBy=multi-user.target
mode: '0644'
- name: Reload systemd
systemd:
daemon_reload: true
- name: Start Valkey service
systemd:
name: valkey
state: started
enabled: true
- name: Wait for Valkey to be ready
wait_for:
host: "{{ nebula_ip }}"
port: 6379
timeout: 30
- name: Start Sentinel on all nodes
hosts: valkey
become: true
serial: 1
tasks:
- name: Wait for master to be ready (replicas only)
wait_for:
host: "{{ hostvars['valkey-01']['nebula_ip'] }}"
port: 6379
timeout: 30
when: inventory_hostname != 'valkey-01'
- name: Start Sentinel service
systemd:
name: valkey-sentinel
state: started
enabled: true
- name: Wait for Sentinel to be ready
wait_for:
host: "{{ nebula_ip }}"
port: 26379
timeout: 30
- name: Verify Sentinel Setup
hosts: valkey-01
become: true
vars_files:
- ../vault/secrets.yml
tasks:
- name: Check replication status
command: >
valkey-cli -h {{ nebula_ip }} -p 6379
--user admin --pass {{ valkey_admin_password }}
info replication
register: replication_info
changed_when: false
- name: Display replication status
debug:
msg: "{{ replication_info.stdout_lines }}"
- name: Check Sentinel status
command: >
valkey-cli -h {{ nebula_ip }} -p 26379
sentinel master valkey-ha
register: sentinel_info
changed_when: false
- name: Display Sentinel status
debug:
msg: "{{ sentinel_info.stdout_lines }}"

View File

@ -0,0 +1,80 @@
---
# Valkey (Redis fork) Primary Setup
#
# Usage: ansible-playbook -i inventory.ini playbooks/valkey.yml
#
# Creates:
# - Valkey server on valkey-primary
# - Configured for Nebula network access
# - 16 databases (0-15) for multi-tenant use
- name: Setup Valkey Primary
hosts: valkey-primary
become: true
vars_files:
- ../vault/secrets.yml
vars:
valkey_maxmemory: "256mb"
valkey_maxmemory_policy: "allkeys-lru"
tasks:
- name: Install valkey
community.general.pacman:
name: valkey
state: present
- name: Create systemd override directory
file:
path: /etc/systemd/system/valkey.service.d
state: directory
mode: '0755'
- name: Add systemd override for ACL write access
copy:
dest: /etc/systemd/system/valkey.service.d/override.conf
content: |
[Service]
ReadWritePaths=/etc/valkey
mode: '0644'
notify:
- reload systemd
- restart valkey
- name: Deploy ACL file
template:
src: ../templates/valkey-acl.j2
dest: /etc/valkey/users.acl
owner: valkey
group: valkey
mode: '0600'
notify: restart valkey
- name: Deploy valkey.conf
template:
src: ../templates/valkey.conf.j2
dest: /etc/valkey/valkey.conf
owner: valkey
group: valkey
mode: '0640'
notify: restart valkey
- name: Start and enable valkey
systemd:
name: valkey
state: started
enabled: true
- name: Wait for Valkey to be ready
wait_for:
host: "{{ nebula_ip }}"
port: 6379
timeout: 30
handlers:
- name: reload systemd
systemd:
daemon_reload: true
- name: restart valkey
systemd:
name: valkey
state: restarted

View File

@ -0,0 +1,57 @@
# Service Manifest
#
# Defines applications, their git repos, data services, and deployment targets.
#
# Usage:
# ansible-playbook playbooks/data-service.yml -e "service=myapp"
git_base_url: "git@git.infra.example:org"
services:
myapp:
description: "Example web application"
host: app-server
deploy_path: /opt/myapp
repos:
- name: myapp
dest: myapp
version: main
postgres:
enabled: true
# restore_from: "databases/dumps/myapp.dump" # Optional: restore from backup
valkey:
enabled: true
key_prefix: "myapp" # Access to myapp:* keys only
s3:
enabled: true
bucket: "myapp-media"
vault_roles:
- app # 1h TTL, DML only (SELECT, INSERT, UPDATE, DELETE)
- migrate # 15m TTL, DDL+DML (for migrations)
another-service:
description: "Another example service"
host: another-server
deploy_path: /opt/another
repos:
- name: another
dest: another
version: main
postgres:
enabled: true
valkey:
enabled: true
key_prefix: "another"
vault_roles:
- app
- migrate
# Valkey key prefix allocation:
# All services use database /0 with key prefixes for namespace isolation.
# Each service gets an ACL user that can only access {service}:* keys.
# Credentials are provisioned by data-service.yml and stored in Vault.
# S3 bucket allocation:
# Each service gets its own bucket (default: {service}-media) with a dedicated API key.
# Buckets are created on the Garage cluster with read/write permissions.
# Credentials are provisioned by data-service.yml and stored in Vault at secret/{service}/s3.

View File

@ -0,0 +1,21 @@
# etcd configuration for Patroni cluster
# Node: {{ inventory_hostname }}
name: '{{ inventory_hostname }}'
data-dir: /var/lib/etcd
# Cluster communication
initial-advertise-peer-urls: http://{{ nebula_ip }}:2380
listen-peer-urls: http://{{ nebula_ip }}:2380
listen-client-urls: http://{{ nebula_ip }}:2379,http://127.0.0.1:2379
advertise-client-urls: http://{{ nebula_ip }}:2379
# Cluster bootstrap
initial-cluster-token: 'patroni-etcd-cluster'
initial-cluster: {% for host in groups['postgres'] %}{{ host }}=http://{{ hostvars[host]['nebula_ip'] }}:2380{% if not loop.last %},{% endif %}{% endfor %}
initial-cluster-state: 'new'
# Performance tuning
heartbeat-interval: 1000
election-timeout: 5000

View File

@ -0,0 +1,16 @@
[Unit]
Description=etcd distributed key-value store
Documentation=https://etcd.io/docs/
After=network.target nebula.service
Wants=network-online.target
[Service]
Type=notify
User=etcd
ExecStart=/usr/local/bin/etcd --config-file=/etc/etcd/etcd.conf
Restart=always
RestartSec=5
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,48 @@
# Garage S3 Configuration
# Generated by Ansible - do not edit manually
# Metadata directory (LMDB database)
metadata_dir = "/var/lib/garage/meta"
# Data directory (actual S3 objects)
data_dir = "/var/lib/garage/data"
# Database engine (LMDB recommended for clusters with replication)
db_engine = "lmdb"
# Replication factor (3 = all nodes have all data)
replication_factor = 3
# Compression level (1-19, higher = more CPU, smaller files)
compression_level = 2
# Automatic metadata snapshots (protects against LMDB corruption)
metadata_auto_snapshot_interval = "6h"
# RPC configuration (inter-node communication)
rpc_bind_addr = "{{ nebula_ip }}:3901"
rpc_public_addr = "{{ nebula_ip }}:3901"
rpc_secret = "{{ garage_rpc_secret }}"
# Bootstrap peers (connect to other nodes)
bootstrap_peers = [
{% for host in groups['garage'] if host != inventory_hostname %}
"{{ hostvars[host]['nebula_ip'] }}:3901",
{% endfor %}
]
# S3 API endpoint
[s3_api]
api_bind_addr = "{{ nebula_ip }}:3900"
s3_region = "garage"
root_domain = ".s3.garage.nebula"
# S3 Web endpoint (for static website hosting)
[s3_web]
bind_addr = "{{ nebula_ip }}:3902"
root_domain = ".web.garage.nebula"
# Admin API (for bucket management)
[admin]
api_bind_addr = "{{ nebula_ip }}:3903"
admin_token = "{{ garage_admin_token }}"

View File

@ -0,0 +1,71 @@
*filter
:INPUT DROP [0:0]
:FORWARD DROP [0:0]
:OUTPUT ACCEPT [0:0]
# Allow established and related connections
-A INPUT -m state --state ESTABLISHED,RELATED -j ACCEPT
# Allow loopback
-A INPUT -i lo -j ACCEPT
# Allow all traffic on Nebula interface (encrypted overlay)
-A INPUT -i nebula1 -j ACCEPT
# ============================================================
# Bridge network rules (192.168.100.0/24)
# Only allow Proxmox host - block all other VMs on the bridge
# ============================================================
# Allow Proxmox host for management/Ansible
-A INPUT -s {{ proxmox_host_ip }} -j ACCEPT
# Allow Nebula UDP from lighthouse (required for overlay connectivity)
-A INPUT -s {{ lighthouse_bridge_ip }} -p udp --dport 4242 -j ACCEPT
# DROP everything else from bridge network (force Nebula for inter-VM)
-A INPUT -s {{ bridge_network }} -j DROP
# ============================================================
# Caddy-proxied ports (Nebula only - reverse proxy traffic)
# These ports are NOT public; only Caddy can reach them
# ============================================================
{% if caddy_proxied_ports_tcp is defined %}
# Web services proxied through Caddy (Nebula only)
{% for port in caddy_proxied_ports_tcp %}
-A INPUT -s {{ caddy_nebula_ip }} -p tcp --dport {{ port }} -j ACCEPT
{% endfor %}
{% endif %}
{% if caddy_proxied_ports_udp is defined %}
# UDP services proxied through Caddy (Nebula only)
{% for port in caddy_proxied_ports_udp %}
-A INPUT -s {{ caddy_nebula_ip }} -p udp --dport {{ port }} -j ACCEPT
{% endfor %}
{% endif %}
# ============================================================
# Public-facing ports (for DNAT'd traffic from internet)
# ============================================================
{% if game_ports_tcp is defined %}
# Game server TCP ports (internet -> Proxmox DNAT -> VM)
{% for port in game_ports_tcp %}
-A INPUT -p tcp --dport {{ port }} -j ACCEPT
{% endfor %}
{% endif %}
{% if game_ports_udp is defined %}
# Game server UDP ports (internet -> Proxmox DNAT -> VM)
{% for port in game_ports_udp %}
-A INPUT -p udp --dport {{ port }} -j ACCEPT
{% endfor %}
{% endif %}
# ============================================================
# Default deny - drop everything not explicitly allowed
# ============================================================
-A INPUT -j DROP
COMMIT

View File

@ -0,0 +1,67 @@
pki:
ca: /etc/nebula/ca.crt
cert: /etc/nebula/config.crt
key: /etc/nebula/config.key
static_host_map:
# Primary lighthouse (InterServer datacenter)
"{{ lighthouse_nebula_ip }}": ["{{ lighthouse_bridge_ip }}:4242", "{{ lighthouse_public_ip }}:4242"]
# Home lighthouse (Dell) for reduced latency from home
"{{ home_lighthouse_nebula_ip }}": ["{{ home_lighthouse_public_ip }}:4242"]
lighthouse:
am_lighthouse: false
interval: 60
hosts:
- "{{ lighthouse_nebula_ip }}"
- "{{ home_lighthouse_nebula_ip }}"
punchy:
punch: true
respond: true
relay:
relays:
- "{{ lighthouse_nebula_ip }}"
- "{{ home_lighthouse_nebula_ip }}"
listen:
host: 0.0.0.0
port: 0
tun:
dev: nebula1
drop_local_broadcast: true
drop_multicast: true
firewall:
conntrack:
tcp_timeout: 12h
udp_timeout: 3m
default_timeout: 10m
outbound:
- port: any
proto: any
host: any
inbound:
# Admin (laptop) has full access
- port: any
proto: any
group: admin
# Infrastructure can talk to each other
- port: any
proto: any
group: infrastructure
# Projects can access infrastructure services
- port: any
proto: any
group: projects
# Allow ICMP from anyone (ping)
- port: any
proto: icmp
host: any

View File

@ -0,0 +1,19 @@
[Unit]
Description=Patroni PostgreSQL HA Manager
Documentation=https://patroni.readthedocs.io/
After=network.target etcd.service nebula.service
Wants=network-online.target
[Service]
Type=simple
User=postgres
Group=postgres
ExecStart=/opt/patroni/bin/patroni /etc/patroni/patroni.yml
ExecReload=/bin/kill -HUP $MAINPID
KillMode=process
Restart=always
RestartSec=5
TimeoutSec=30
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,63 @@
scope: postgres-cluster
name: {{ inventory_hostname }}
restapi:
listen: {{ nebula_ip }}:8008
connect_address: {{ nebula_ip }}:8008
etcd3:
hosts: {% for host in groups['postgres'] %}{{ hostvars[host]['nebula_ip'] }}:2379{% if not loop.last %},{% endif %}{% endfor %}
bootstrap:
dcs:
ttl: 30
loop_wait: 10
retry_timeout: 10
maximum_lag_on_failover: 1048576
postgresql:
use_pg_rewind: true
use_slots: true
parameters:
wal_level: replica
hot_standby: "on"
max_connections: 200
max_worker_processes: 8
max_wal_senders: 10
max_replication_slots: 10
hot_standby_feedback: "on"
wal_log_hints: "on"
listen_addresses: '*'
initdb:
- encoding: UTF8
- data-checksums
- locale: C
pg_hba:
- host replication replicator 10.10.10.0/24 md5
- host all all 10.10.10.0/24 md5
- host all all 127.0.0.1/32 md5
- local all all trust
postgresql:
listen: {{ nebula_ip }}:5432
connect_address: {{ nebula_ip }}:5432
data_dir: /var/lib/postgres/data
bin_dir: /usr/bin
pgpass: /var/lib/postgres/.pgpass
authentication:
replication:
username: replicator
password: {{ patroni_replicator_password }}
superuser:
username: postgres
password: {{ patroni_superuser_password }}
parameters:
unix_socket_directories: '/run/postgresql'
tags:
nofailover: false
noloadbalance: false
clonefrom: false
nosync: false

View File

@ -0,0 +1,83 @@
-- PostgreSQL Static Roles for {{ service }}
-- Generated by Ansible - run once per service
--
-- Creates:
-- {{ service }}_owner - Owns database and all objects (NOLOGIN)
-- {{ service }}_app - DML permissions (SELECT, INSERT, UPDATE, DELETE)
-- {{ service }}_migrate - DDL+DML permissions (for migrations)
--
-- Vault dynamic roles inherit from _app or _migrate
-- =============================================================================
-- 1. Create owner role (NOLOGIN, owns all objects)
-- =============================================================================
CREATE ROLE {{ service }}_owner NOLOGIN;
-- =============================================================================
-- 2. Create database owned by the owner role
-- =============================================================================
CREATE DATABASE {{ service }} OWNER {{ service }}_owner;
-- =============================================================================
-- 3. Connect to the new database for schema grants
-- =============================================================================
\c {{ service }}
-- =============================================================================
-- 4. Create app role template (DML only - SELECT, INSERT, UPDATE, DELETE)
-- =============================================================================
CREATE ROLE {{ service }}_app NOLOGIN NOINHERIT;
-- Grant DML permissions
GRANT CONNECT ON DATABASE {{ service }} TO {{ service }}_app;
GRANT USAGE ON SCHEMA public TO {{ service }}_app;
GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO {{ service }}_app;
GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO {{ service }}_app;
-- =============================================================================
-- 5. Create migrate role template (DDL+DML - for migrations)
-- =============================================================================
CREATE ROLE {{ service }}_migrate NOLOGIN NOINHERIT;
-- Grant all privileges
GRANT CONNECT ON DATABASE {{ service }} TO {{ service }}_migrate;
GRANT ALL PRIVILEGES ON SCHEMA public TO {{ service }}_migrate;
GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO {{ service }}_migrate;
GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO {{ service }}_migrate;
GRANT ALL PRIVILEGES ON ALL FUNCTIONS IN SCHEMA public TO {{ service }}_migrate;
-- =============================================================================
-- 6. Set default privileges for future objects (CRITICAL)
-- Ensures new tables created during migrations are accessible
-- =============================================================================
-- Default privileges for objects created by _owner role
ALTER DEFAULT PRIVILEGES FOR ROLE {{ service }}_owner IN SCHEMA public
GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO {{ service }}_app;
ALTER DEFAULT PRIVILEGES FOR ROLE {{ service }}_owner IN SCHEMA public
GRANT USAGE, SELECT ON SEQUENCES TO {{ service }}_app;
ALTER DEFAULT PRIVILEGES FOR ROLE {{ service }}_owner IN SCHEMA public
GRANT ALL PRIVILEGES ON TABLES TO {{ service }}_migrate;
ALTER DEFAULT PRIVILEGES FOR ROLE {{ service }}_owner IN SCHEMA public
GRANT ALL PRIVILEGES ON SEQUENCES TO {{ service }}_migrate;
ALTER DEFAULT PRIVILEGES FOR ROLE {{ service }}_owner IN SCHEMA public
GRANT ALL PRIVILEGES ON FUNCTIONS TO {{ service }}_migrate;
-- Default privileges for objects created by _migrate role
-- (migrations run as _migrate via SET ROLE, so tables are owned by _migrate)
ALTER DEFAULT PRIVILEGES FOR ROLE {{ service }}_migrate IN SCHEMA public
GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO {{ service }}_app;
ALTER DEFAULT PRIVILEGES FOR ROLE {{ service }}_migrate IN SCHEMA public
GRANT USAGE, SELECT ON SEQUENCES TO {{ service }}_app;
-- =============================================================================
-- 7. Grant roles to vault_admin (WITH ADMIN OPTION for dynamic role creation)
-- =============================================================================
GRANT {{ service }}_app TO vault_admin WITH ADMIN OPTION;
GRANT {{ service }}_migrate TO vault_admin WITH ADMIN OPTION;
GRANT {{ service }}_owner TO vault_admin; -- For REASSIGN OWNED during revocation

View File

@ -0,0 +1,34 @@
# Local zones for Nebula overlay network
# Generated by Ansible from inventory - do not edit manually
# ============================================================
# .nebula zone - All VMs
# ============================================================
local-zone: "nebula." static
# Lighthouse (not in inventory, uses variable)
local-data: "lighthouse.nebula. IN A {{ lighthouse_nebula_ip }}"
local-data-ptr: "{{ lighthouse_nebula_ip }} lighthouse.nebula"
# Proxmox host (not in inventory)
local-data: "proxmox.nebula. IN A 10.10.10.1"
local-data-ptr: "10.10.10.1 proxmox.nebula"
# All VMs from inventory
{% for host in groups['all'] %}
local-data: "{{ host }}.nebula. IN A {{ hostvars[host]['nebula_ip'] }}"
local-data-ptr: "{{ hostvars[host]['nebula_ip'] }} {{ host }}.nebula"
{% endfor %}
# ============================================================
# Custom domain aliases (optional)
# Add your own domain mappings here
# ============================================================
# Example:
# local-zone: "myapp.infra.example." static
# local-data: "myapp.infra.example. IN A {{ hostvars['app-server']['nebula_ip'] }}"
# ============================================================
# Reverse DNS zone for 10.10.10.0/24
# ============================================================
local-zone: "10.10.10.in-addr.arpa." static

View File

@ -0,0 +1,55 @@
# Unbound DNS configuration for Nebula overlay network
# Deployed by Ansible - do not edit manually
server:
# Network settings - bind to Nebula interface only
interface: 127.0.0.1
interface: {{ hostvars['dns']['nebula_ip'] }}
port: 53
do-ip4: yes
do-ip6: no
do-udp: yes
do-tcp: yes
# Access control - Nebula network only
access-control: 127.0.0.0/8 allow
access-control: 10.10.10.0/24 allow
access-control: 0.0.0.0/0 refuse
# Performance tuning
num-threads: 2
msg-cache-size: 16m
rrset-cache-size: 32m
cache-min-ttl: 300
cache-max-ttl: 86400
# Privacy
hide-identity: yes
hide-version: yes
# Security hardening
harden-glue: yes
harden-dnssec-stripped: yes
harden-referral-path: yes
use-caps-for-id: yes
# Rate limiting
ip-ratelimit: 100
# Logging
verbosity: 1
logfile: ""
use-syslog: yes
# Include local zone definitions
include: /etc/unbound/local-zones.conf
# Forward external queries to public DNS
# Cloudflare primary (faster - we're behind their proxy)
# Google fallback
forward-zone:
name: "."
forward-addr: 1.1.1.1
forward-addr: 1.0.0.1
forward-addr: 8.8.8.8
forward-addr: 8.8.4.4

View File

@ -0,0 +1,15 @@
[Unit]
Description=Unbound DNS resolver
Documentation=man:unbound(8)
After=network.target nebula.service
Wants=nebula.service
[Service]
Type=simple
ExecStart=/usr/bin/unbound -d -c /etc/unbound/unbound.conf
ExecReload=/bin/kill -HUP $MAINPID
Restart=on-failure
RestartSec=5
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,10 @@
# Valkey ACL Configuration
# Admin user has full access
user admin on >{{ valkey_admin_password }} ~* &* +@all
# Disable default user
user default off
# Service users are created dynamically by data-service.yml
# Example format:
# user myapp on >password ~myapp:* &* +@all

View File

@ -0,0 +1,44 @@
# Valkey Cluster Configuration
# Generated by Ansible - do not edit manually
# Network
bind {{ nebula_ip }}
port 6379
protected-mode yes
# Cluster mode
cluster-enabled yes
cluster-config-file /var/lib/valkey/nodes.conf
cluster-node-timeout 5000
cluster-announce-ip {{ nebula_ip }}
cluster-announce-port 6379
cluster-announce-bus-port 16379
# General
daemonize no
pidfile /run/valkey/valkey.pid
loglevel notice
logfile ""
# Databases (cluster mode only uses db 0)
databases 1
# Memory Management
maxmemory {{ valkey_maxmemory }}
maxmemory-policy {{ valkey_maxmemory_policy }}
# Persistence (minimal for cluster mode)
save ""
appendonly no
# Security - ACL-based authentication
aclfile /etc/valkey/users.acl
# Limits
maxclients 1000
timeout 0
tcp-keepalive 300
# Slow log
slowlog-log-slower-than 10000
slowlog-max-len 128

View File

@ -0,0 +1,21 @@
# Valkey Sentinel Configuration
# Generated by Ansible - do not edit manually
port 26379
bind {{ nebula_ip }}
# Sentinel monitoring configuration
sentinel monitor valkey-ha {{ hostvars['valkey-01']['nebula_ip'] }} 6379 2
sentinel auth-pass valkey-ha {{ valkey_admin_password }}
sentinel auth-user valkey-ha admin
sentinel down-after-milliseconds valkey-ha 5000
sentinel failover-timeout valkey-ha 60000
sentinel parallel-syncs valkey-ha 1
# Sentinel authentication
sentinel sentinel-user admin
sentinel sentinel-pass {{ valkey_admin_password }}
# Announce IP for Nebula network
sentinel announce-ip {{ nebula_ip }}
sentinel announce-port 26379

View File

@ -0,0 +1,46 @@
# Valkey Standalone Configuration (Master-Replica mode)
# Generated by Ansible - do not edit manually
# Network
bind {{ nebula_ip }}
port 6379
protected-mode yes
# Disable cluster mode
cluster-enabled no
# General
daemonize no
pidfile /run/valkey/valkey.pid
loglevel notice
logfile ""
# Databases
databases 16
# Memory Management
maxmemory {{ valkey_maxmemory }}
maxmemory-policy {{ valkey_maxmemory_policy }}
# Persistence (minimal for caching)
save ""
appendonly no
# Security - ACL-based authentication
aclfile /etc/valkey/users.acl
# Replication (configured on replicas only)
{% if valkey_role == 'replica' %}
replicaof {{ hostvars['valkey-01']['nebula_ip'] }} 6379
masterauth {{ valkey_admin_password }}
masteruser admin
{% endif %}
# Limits
maxclients 1000
timeout 0
tcp-keepalive 300
# Slow log
slowlog-log-slower-than 10000
slowlog-max-len 128

View File

@ -0,0 +1,40 @@
# Valkey Configuration
# Generated by Ansible - do not edit manually
# Network
bind {{ nebula_ip }}
port 6379
protected-mode yes
# General
daemonize no
pidfile /run/valkey/valkey.pid
loglevel notice
logfile ""
# Databases (0-15 available for multi-tenant use)
# See services.yml for DB allocation
databases 16
# Memory Management
maxmemory {{ valkey_maxmemory }}
maxmemory-policy {{ valkey_maxmemory_policy }}
# Persistence (disable for pure caching)
# Enable if you need persistence
save ""
appendonly no
# Security - ACL-based authentication
# Each service gets its own ACL user with scoped key prefix access
# Users are provisioned by data-service.yml and stored in Vault
aclfile /etc/valkey/users.acl
# Limits
maxclients 1000
timeout 0
tcp-keepalive 300
# Slow log
slowlog-log-slower-than 10000
slowlog-max-len 128

44
ansible/vault/README.md Normal file
View File

@ -0,0 +1,44 @@
# Ansible Vault Secrets
This directory stores encrypted secrets used by playbooks.
## Setup
1. Create a password file (excluded from git):
```bash
echo "your-vault-password" > ansible_vault_pass
chmod 600 ansible_vault_pass
```
2. Create the secrets file:
```bash
ansible-vault create secrets.yml --vault-password-file ansible_vault_pass
```
3. Add your secrets (example structure):
```yaml
# Valkey admin password (used by valkey.yml)
valkey_admin_password: "your-strong-password"
# Vault admin database password (used by data-service.yml)
vault_admin_password: "your-vault-admin-password"
```
## Usage
Reference in playbooks:
```yaml
vars_files:
- ../vault/secrets.yml
```
Run playbooks with vault password:
```bash
ansible-playbook -i inventory.ini playbooks/valkey.yml --vault-password-file vault/ansible_vault_pass
```
Or set the environment variable:
```bash
export ANSIBLE_VAULT_PASSWORD_FILE=vault/ansible_vault_pass
ansible-playbook -i inventory.ini playbooks/valkey.yml
```

139
docs/architecture.md Normal file
View File

@ -0,0 +1,139 @@
# Architecture
This document explains the design decisions behind Arvandor.
## Network Separation
### Why Two Networks?
```
Internet ──► Proxmox Host ──► vmbr1 (192.168.100.0/24)
└──► Nebula (10.10.10.0/24)
```
**Bridge Network (vmbr1)**
- Used only for Terraform provisioning and Ansible access
- VMs firewall blocks all bridge traffic except from Proxmox host
- No inter-VM communication on this network
**Nebula Overlay**
- All application traffic uses encrypted Nebula tunnels
- Group-based firewall rules for segmentation
- Works across any network boundary (cloud, datacenter, home)
### Benefits
1. **Defense in depth** - Compromise of bridge network doesn't expose services
2. **Migration ready** - Move VMs anywhere, Nebula handles connectivity
3. **Zero-trust** - VMs authenticate via certificates, not network position
## VMID Allocation
VMIDs follow a logical pattern:
| Range | Purpose | Example |
|-------|---------|---------|
| 1000-1999 | Management | DNS, Caddy |
| 2000-2999 | Services | Vault, Gitea |
| 3000-3999 | Data | PostgreSQL, Valkey |
| 4000-4999 | Workloads | Applications |
| 5000-5999 | Monitoring | Prometheus |
The last digits determine the IP address:
- VMID 1001 → x.x.x.11
- VMID 3000 → x.x.x.30
## High Availability
All data services run as 3-node clusters:
### PostgreSQL (Patroni + etcd)
```
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
│ postgres-01 │ │ postgres-02 │ │ postgres-03 │
│ Leader │◄─│ Replica │◄─│ Replica │
│ + etcd │ │ + etcd │ │ + etcd │
└─────────────┘ └─────────────┘ └─────────────┘
```
- Patroni handles leader election
- etcd provides distributed consensus
- Automatic failover on leader failure
### Valkey (Sentinel)
```
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
│ valkey-01 │ │ valkey-02 │ │ valkey-03 │
│ Master │──│ Replica │ │ Replica │
│ + Sentinel │ │ + Sentinel │ │ + Sentinel │
└─────────────┘ └─────────────┘ └─────────────┘
```
- Sentinel monitors master health
- Automatic promotion on master failure
- ACL-based per-service key isolation
### Vault (Raft)
```
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
│ vault-01 │ │ vault-02 │ │ vault-03 │
│ Leader │──│ Standby │──│ Standby │
└─────────────┘ └─────────────┘ └─────────────┘
```
- Integrated Raft storage (no external backend)
- Automatic leader election
- Unseal required after restart
## Security Model
### Three-Layer Firewall
```
┌─────────────────────────────────────────────────────────────┐
│ 1. Proxmox VM Firewall → Egress control │
│ 2. Nebula Groups → East-west segmentation │
│ 3. Guest iptables → Defense in depth │
└─────────────────────────────────────────────────────────────┘
```
### Nebula Groups
| Group | Can Access |
|-------|------------|
| admin | Everything |
| infrastructure | infrastructure |
| projects | infrastructure |
| games | Nothing (isolated) |
### Vault Integration
Applications use Vault for:
- Dynamic database credentials (short-lived)
- Service secrets (API keys, etc.)
- AppRole authentication
## Service Discovery
Internal DNS provides hostname resolution:
```
<hostname>.nebula → Nebula IP
```
VMs query 10.10.10.11 (DNS server) via Nebula. External queries forward to Cloudflare (1.1.1.1).
## Provisioning Flow
```
1. terraform apply → Create VM
2. bootstrap.yml → Update packages
3. security.yml → Configure firewall
4. nebula.yml → Join overlay network
5. <service>.yml → Deploy service
6. data-service.yml → Provision credentials
```

197
docs/getting-started.md Normal file
View File

@ -0,0 +1,197 @@
# Getting Started
This guide walks through setting up Arvandor from scratch.
## Prerequisites
### Proxmox Host
- Proxmox VE 7.x or 8.x
- Two network bridges:
- `vmbr0` - Public interface
- `vmbr1` - Internal VM network (192.168.100.0/24)
- IP forwarding enabled
### VM Template
Create an Arch Linux template (VMID 9000):
1. Download Arch Linux ISO
2. Create VM, install Arch with basic setup
3. Install `openssh`, `python` (for Ansible)
4. Enable cloud-init or configure static user
5. Convert to template
### Local Tools
```bash
# Terraform
wget -O- https://apt.releases.hashicorp.com/gpg | gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg
echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list
sudo apt update && sudo apt install terraform
# Ansible
pip install ansible
# Nebula
wget https://github.com/slackhq/nebula/releases/download/v1.9.0/nebula-linux-amd64.tar.gz
tar xzf nebula-linux-amd64.tar.gz
sudo mv nebula nebula-cert /usr/local/bin/
```
## Step 1: Configure Terraform
```bash
cd terraform
# Copy example configuration
cp terraform.tfvars.example terraform.tfvars
# Edit with your values
vim terraform.tfvars
```
Required variables:
- `proxmox_endpoint` - Your Proxmox API URL
- `proxmox_api_token_id` - API token ID
- `proxmox_api_token_secret` - API token secret
- `proxmox_node` - Node name (e.g., "pve")
- `username` - Default VM username
- `password` - Default VM password
- `ssh_key_path` - Path to your SSH public key
## Step 2: Create Proxmox API Token
In Proxmox:
1. Datacenter → Permissions → API Tokens
2. Add token for a user with `PVEAdmin` or `Administrator` role
3. Copy the token ID and secret
## Step 3: Generate Nebula CA
```bash
cd nebula
# Generate Certificate Authority
nebula-cert ca -name "Arvandor CA" -duration 87600h
# This creates:
# - ca.crt (share with all hosts)
# - ca.key (keep secure!)
```
## Step 4: Provision VMs
```bash
cd terraform
terraform init
terraform plan
terraform apply
```
This creates all VMs defined in the .tf files.
## Step 5: Generate Nebula Certificates
For each VM, generate a certificate:
```bash
cd nebula
# DNS server
nebula-cert sign -ca-crt ca.crt -ca-key ca.key \
-name "dns" -networks "10.10.10.11/24" -groups "infrastructure" \
-out-crt configs/1001/dns/dns.crt \
-out-key configs/1001/dns/dns.key
# Repeat for all VMs...
```
## Step 6: Configure Ansible Inventory
```bash
cd ansible
cp inventory.ini.example inventory.ini
vim inventory.ini
```
Update:
- VM hostnames and IPs
- SSH jump host configuration
- Infrastructure variables
## Step 7: Bootstrap VMs
Run playbooks in order:
```bash
# Update packages, reboot if kernel changed
ansible-playbook -i inventory.ini playbooks/bootstrap.yml
# Configure iptables and fail2ban
ansible-playbook -i inventory.ini playbooks/security.yml
# Join Nebula overlay network
ansible-playbook -i inventory.ini playbooks/nebula.yml
```
## Step 8: Deploy Core Services
```bash
# DNS (required for hostname resolution)
ansible-playbook -i inventory.ini playbooks/dns.yml
ansible-playbook -i inventory.ini playbooks/dns-client.yml
# PostgreSQL HA cluster
ansible-playbook -i inventory.ini playbooks/postgres-ha.yml
# Valkey with Sentinel
ansible-playbook -i inventory.ini playbooks/valkey-sentinel.yml
# Garage S3 storage
ansible-playbook -i inventory.ini playbooks/garage.yml
```
## Step 9: Configure Host Port Forwarding
On the Proxmox host:
```bash
# Copy and configure the script
cp network/port-forward.sh.example /root/network/port-forward.sh
chmod +x /root/network/port-forward.sh
vim /root/network/port-forward.sh
# Test
./port-forward.sh --dry-run
# Apply
./port-forward.sh
```
## Verification
Test connectivity:
```bash
# SSH to VM via Nebula
ssh admin@10.10.10.11
# Test DNS resolution
dig @10.10.10.11 vault-01.nebula
# Test PostgreSQL
psql -h 10.10.10.30 -U postgres -c "SELECT 1"
# Test Valkey
valkey-cli -h 10.10.10.33 PING
```
## Next Steps
- Add your application VMs to `terraform/workloads.tf`
- Create services in `ansible/services.yml`
- Provision app credentials with `data-service.yml`

199
docs/provisioning-guide.md Normal file
View File

@ -0,0 +1,199 @@
# Provisioning Guide
Step-by-step guide for adding new VMs to the infrastructure.
## Adding a New VM
### 1. Choose VMID and IP
Select a VMID based on the VM's purpose:
| Purpose | VMID Range | Example |
|---------|------------|---------|
| Management | 1000-1999 | 1003 |
| Services | 2000-2999 | 2004 |
| Data | 3000-3999 | 3012 |
| Workloads | 4000-4999 | 4056 |
| Monitoring | 5000-5999 | 5001 |
IP is derived from VMID:
- Bridge: 192.168.100.XX
- Nebula: 10.10.10.XX
Where XX is the last 2 digits of VMID.
### 2. Add to Terraform
Edit the appropriate .tf file:
```hcl
module "myapp" {
source = "./modules/vm"
name = "myapp"
vmid = 4056
node_name = var.proxmox_node
bridge_ip = "192.168.100.56"
gateway = var.gateway
datastore_id = var.datastore_id
clone_vmid = var.template_vmid
cores = 2
memory = 4096
disk_size = 50
username = var.username
password = var.password
ssh_key_path = var.ssh_key_path
}
```
Apply:
```bash
cd terraform
terraform plan
terraform apply
```
### 3. Generate Nebula Certificate
```bash
cd nebula
nebula-cert sign -ca-crt ca.crt -ca-key ca.key \
-name "myapp" \
-networks "10.10.10.56/24" \
-groups "projects" \
-out-crt configs/4056/myapp/myapp.crt \
-out-key configs/4056/myapp/myapp.key
```
Choose the appropriate group:
- `infrastructure` - Core services
- `projects` - Applications needing infrastructure access
- `games` - Isolated workloads
### 4. Add to Ansible Inventory
Edit `ansible/inventory.ini`:
```ini
[projects]
myapp ansible_host=192.168.100.56 nebula_ip=10.10.10.56 vmid=4056
[docker]
myapp
```
### 5. Run Bootstrap Playbooks
```bash
cd ansible
# Update packages
ansible-playbook -i inventory.ini playbooks/bootstrap.yml --limit "myapp"
# Configure firewall
ansible-playbook -i inventory.ini playbooks/security.yml --limit "myapp"
# Join Nebula
ansible-playbook -i inventory.ini playbooks/nebula.yml --limit "myapp"
# Configure DNS client
ansible-playbook -i inventory.ini playbooks/dns-client.yml --limit "myapp"
# Install Docker (if needed)
ansible-playbook -i inventory.ini playbooks/docker.yml --limit "myapp"
```
### 6. Update DNS (Optional)
If you want a `.nebula` hostname, re-run the DNS playbook:
```bash
ansible-playbook -i inventory.ini playbooks/dns.yml
```
### 7. Verify
```bash
# Test SSH via Nebula
ssh admin@10.10.10.56
# Test hostname resolution
dig @10.10.10.11 myapp.nebula
```
## Adding a Service with Database
### 1. Define in services.yml
```yaml
services:
myapp:
description: "My Application"
host: myapp
deploy_path: /opt/myapp
postgres:
enabled: true
valkey:
enabled: true
key_prefix: "myapp"
s3:
enabled: true
bucket: "myapp-media"
vault_roles:
- app
- migrate
```
### 2. Provision Data Services
```bash
ansible-playbook -i inventory.ini playbooks/data-service.yml -e "service=myapp"
```
This creates:
- PostgreSQL database with static roles
- Valkey ACL user with key prefix
- Garage S3 bucket with API key
- Vault database engine roles
### 3. Retrieve Credentials
```bash
# Database credentials (dynamic)
vault read database/creds/myapp-app
vault read database/creds/myapp-migrate
# Valkey credentials (static, stored in Vault)
vault kv get secret/myapp/valkey
# S3 credentials (static, stored in Vault)
vault kv get secret/myapp/s3
```
## Removing a VM
### 1. Remove from Terraform
Comment out or delete the module from .tf file, then:
```bash
terraform plan
terraform apply
```
### 2. Remove from Inventory
Edit `ansible/inventory.ini` and remove the host.
### 3. Clean up Certificates
```bash
rm -rf nebula/configs/<vmid>/
```
### 4. Update DNS
```bash
ansible-playbook -i inventory.ini playbooks/dns.yml
```

8
nebula/.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
# CA private key - NEVER COMMIT
ca.key
# All private keys
*.key
# Host certificates in configs/
configs/*/*/*.key

164
nebula/README.md Normal file
View File

@ -0,0 +1,164 @@
# Nebula Overlay Network
Nebula is a scalable overlay network that provides encrypted connectivity between all VMs regardless of their physical location.
## Architecture
```
┌─────────────┐
│ Lighthouse │
│ 10.10.10.10 │
└──────┬──────┘
┌─────────────────┼─────────────────┐
│ │ │
┌────▼────┐ ┌────▼────┐ ┌────▼────┐
│ VM 1 │ │ VM 2 │ │ VM 3 │
│ 10.10.10.11 │ 10.10.10.20 │ 10.10.10.30
└─────────┘ └─────────┘ └─────────┘
```
## Groups
Nebula uses groups for firewall segmentation:
| Group | Purpose | Can Access |
|-------|---------|------------|
| `admin` | Personal devices | Everything |
| `infrastructure` | Core services | Each other |
| `projects` | Application workloads | Infrastructure |
| `lighthouse` | Nebula relays | - |
| `games` | Game servers | Isolated |
## Setup
### 1. Generate Certificate Authority
```bash
nebula-cert ca -name "Arvandor CA" -duration 87600h
```
This creates:
- `ca.crt` - Certificate (share with all hosts)
- `ca.key` - Private key (keep secure, do not commit!)
### 2. Generate Host Certificates
```bash
# Infrastructure VM example
nebula-cert sign -ca-crt ca.crt -ca-key ca.key \
-name "dns" \
-networks "10.10.10.11/24" \
-groups "infrastructure" \
-out-crt configs/1001/dns/dns.crt \
-out-key configs/1001/dns/dns.key
# Application VM example
nebula-cert sign -ca-crt ca.crt -ca-key ca.key \
-name "app-server" \
-networks "10.10.10.50/24" \
-groups "projects" \
-out-crt configs/4050/app-server/app-server.crt \
-out-key configs/4050/app-server/app-server.key
# Lighthouse
nebula-cert sign -ca-crt ca.crt -ca-key ca.key \
-name "lighthouse" \
-networks "10.10.10.10/24" \
-groups "infrastructure,lighthouse" \
-out-crt configs/1000/lighthouse/lighthouse.crt \
-out-key configs/1000/lighthouse/lighthouse.key
```
### 3. Directory Structure
```
nebula/
├── ca.crt # Certificate authority (commit this)
├── ca.key # CA private key (DO NOT COMMIT)
├── configs/
│ ├── 1000/lighthouse/
│ │ ├── lighthouse.crt
│ │ └── lighthouse.key
│ ├── 1001/dns/
│ │ ├── dns.crt
│ │ └── dns.key
│ └── ...
└── README.md
```
### 4. Deploy with Ansible
The `nebula.yml` playbook deploys certificates and configuration:
```bash
ansible-playbook -i inventory.ini playbooks/nebula.yml --limit "new-vm"
```
## Lighthouse Configuration
The lighthouse requires manual configuration (not managed by Ansible):
```yaml
# /etc/nebula/config.yml on lighthouse
pki:
ca: /etc/nebula/ca.crt
cert: /etc/nebula/config.crt
key: /etc/nebula/config.key
static_host_map: {}
lighthouse:
am_lighthouse: true
serve_dns: false
listen:
host: 0.0.0.0
port: 4242
punchy:
punch: true
respond: true
relay:
am_relay: true
tun:
dev: nebula1
drop_local_broadcast: true
drop_multicast: true
firewall:
conntrack:
tcp_timeout: 12h
udp_timeout: 3m
default_timeout: 10m
outbound:
- port: any
proto: any
host: any
inbound:
- port: any
proto: any
group: admin
- port: any
proto: any
group: infrastructure
- port: any
proto: icmp
host: any
```
## IP Allocation
| VMID Range | Network Segment | Last Octet |
|------------|-----------------|------------|
| 1000-1999 | Management | 10-19 |
| 2000-2999 | Services | 20-29 |
| 3000-3999 | Data | 30-49 |
| 4000-4999 | Workloads | 50-59 |
| 5000-5999 | Monitoring | 90-99 |
Example: VMID 3000 → 10.10.10.30

61
network/ip-schema.example Normal file
View File

@ -0,0 +1,61 @@
# Arvandor IP Schema
#
# This documents the IP addressing scheme for the infrastructure.
## Networks
| Network | CIDR | Purpose |
|---------|------|---------|
| Public | 203.0.113.10 | External access (vmbr0) |
| Bridge | 192.168.100.0/24 | VM provisioning network (vmbr1) |
| Nebula | 10.10.10.0/24 | Encrypted overlay network |
## VMID Ranges
| Range | Domain | Nebula Group | Purpose |
|-------|--------|--------------|---------|
| 1000-1999 | Management | infrastructure | DNS, Caddy, Lighthouse |
| 2000-2999 | Services | infrastructure | Vault, Gitea |
| 3000-3999 | Data | infrastructure | PostgreSQL, Valkey, Garage |
| 4000-4999 | Workloads | projects/games | Applications, game servers |
| 5000-5999 | Monitoring | infrastructure | Prometheus, Grafana, Loki |
## IP Pattern
VMID determines IP address. Last 2-3 digits become the last octet:
- VMID 1001 → 192.168.100.11 / 10.10.10.11
- VMID 2000 → 192.168.100.20 / 10.10.10.20
- VMID 3009 → 192.168.100.39 / 10.10.10.39
## Reserved Addresses
| IP | Host | Purpose |
|----|------|---------|
| 192.168.100.1 | Proxmox host | Gateway, Ansible jump host |
| 10.10.10.1 | Proxmox host | Nebula endpoint for management |
| 10.10.10.10 | Lighthouse | Nebula discovery/relay |
| 10.10.10.11 | DNS | Internal DNS server |
| 10.10.10.12 | Caddy | Reverse proxy |
| 10.10.10.20-22 | Vault cluster | Secrets management |
| 10.10.10.30-32 | PostgreSQL | Database cluster |
| 10.10.10.33-35 | Valkey | Cache/queue cluster |
| 10.10.10.39-41 | Garage | S3 storage cluster |
## Example VM Allocation
```
VMID 1001 - dns
Bridge: 192.168.100.11
Nebula: 10.10.10.11
Group: infrastructure
VMID 2000 - vault-01
Bridge: 192.168.100.20
Nebula: 10.10.10.20
Group: infrastructure
VMID 4050 - app-server
Bridge: 192.168.100.50
Nebula: 10.10.10.50
Group: projects
```

View File

@ -0,0 +1,230 @@
#!/bin/bash
set -euo pipefail
# =============================================================================
# Arvandor Port Forwarding Script
# =============================================================================
# Configures NAT (DNAT/SNAT) and FORWARD rules for Proxmox host.
# Uses a custom chain (ARVANDOR-FORWARD) to avoid conflicts with PVE firewall.
#
# Usage:
# ./port-forward.sh # Apply rules
# ./port-forward.sh --dry-run # Show what would be done
# ./port-forward.sh --restore # Restore backup
# ./port-forward.sh --status # Show current rules
# =============================================================================
# -----------------------------------------------------------------------------
# Configuration - UPDATE THESE FOR YOUR ENVIRONMENT
# -----------------------------------------------------------------------------
NETWORK_INTERFACE="vmbr0"
INTERNAL_NETWORK="192.168.100.0/24"
PUBLIC_IP="203.0.113.10" # Your public IP
CUSTOM_CHAIN="ARVANDOR-FORWARD"
BACKUP_FILE="/root/network/iptables.backup"
# Nebula Lighthouse
NEBULA_IP="192.168.100.10"
NEBULA_PORT="4242"
# Caddy (Reverse Proxy)
CADDY_IP="192.168.100.12"
CADDY_HTTP_PORT="80"
CADDY_HTTPS_PORT="443"
# Gitea (Optional)
GITEA_IP="192.168.100.23"
GITEA_SSH_PORT="2222"
# Security - restrict SSH to specific IP
ALLOWED_SSH_IP="203.0.113.20" # Your home IP
# -----------------------------------------------------------------------------
# Functions
# -----------------------------------------------------------------------------
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
}
error() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: $*" >&2
}
die() {
error "$*"
exit 1
}
check_root() {
[[ $EUID -eq 0 ]] || die "This script must be run as root"
}
check_interface() {
local iface=$1
ip link show "$iface" &>/dev/null || die "Interface $iface does not exist"
}
backup_rules() {
log "Backing up current iptables rules to $BACKUP_FILE"
mkdir -p "$(dirname "$BACKUP_FILE")"
iptables-save > "$BACKUP_FILE"
}
restore_rules() {
[[ -f "$BACKUP_FILE" ]] || die "Backup file $BACKUP_FILE not found"
log "Restoring iptables rules from $BACKUP_FILE"
iptables-restore < "$BACKUP_FILE"
log "Rules restored successfully"
}
setup_custom_chain() {
# Create custom chain if it doesn't exist
if ! iptables -L "$CUSTOM_CHAIN" -n &>/dev/null; then
log "Creating custom chain: $CUSTOM_CHAIN"
iptables -N "$CUSTOM_CHAIN"
fi
# Ensure chain is jumped to from FORWARD (only once)
if ! iptables -C FORWARD -j "$CUSTOM_CHAIN" &>/dev/null; then
log "Inserting jump to $CUSTOM_CHAIN in FORWARD chain"
iptables -I FORWARD 1 -j "$CUSTOM_CHAIN"
fi
# Flush the custom chain
log "Flushing custom chain: $CUSTOM_CHAIN"
iptables -F "$CUSTOM_CHAIN"
}
apply_rules() {
local dry_run=${1:-false}
if [[ "$dry_run" == "true" ]]; then
log "=== DRY RUN MODE - No changes will be made ==="
echo ""
echo "Would apply the following rules:"
echo ""
echo "NAT PREROUTING (DNAT):"
echo " - UDP $NEBULA_PORT → $NEBULA_IP:$NEBULA_PORT (Nebula)"
echo " - TCP $CADDY_HTTP_PORT → $CADDY_IP:$CADDY_HTTP_PORT (HTTP)"
echo " - TCP $CADDY_HTTPS_PORT → $CADDY_IP:$CADDY_HTTPS_PORT (HTTPS)"
echo " - TCP $GITEA_SSH_PORT → $GITEA_IP:$GITEA_SSH_PORT (Gitea SSH)"
echo ""
echo "FORWARD chain ($CUSTOM_CHAIN):"
echo " - Allow traffic to all above destinations"
echo ""
echo "INPUT:"
echo " - Allow Nebula (nebula1 interface)"
echo " - Allow SSH from $ALLOWED_SSH_IP"
echo " - Drop SSH from all others"
echo " - Block Proxmox UI from $NETWORK_INTERFACE"
return
fi
# --- NAT Rules ---
log "Flushing NAT rules..."
iptables -t nat -F PREROUTING
iptables -t nat -F POSTROUTING
log "Setting up NAT masquerading..."
iptables -t nat -A POSTROUTING -s "$INTERNAL_NETWORK" -o "$NETWORK_INTERFACE" -j MASQUERADE
log "Setting up hairpin NAT for Nebula..."
iptables -t nat -A PREROUTING -s "$INTERNAL_NETWORK" -d "$PUBLIC_IP" -p udp --dport "$NEBULA_PORT" -j DNAT --to-destination "$NEBULA_IP:$NEBULA_PORT"
iptables -t nat -A POSTROUTING -s "$INTERNAL_NETWORK" -d "$NEBULA_IP" -p udp --dport "$NEBULA_PORT" -j SNAT --to-source "$PUBLIC_IP"
log "Setting up hairpin NAT for Gitea SSH..."
iptables -t nat -A PREROUTING -s "$INTERNAL_NETWORK" -d "$PUBLIC_IP" -p tcp --dport "$GITEA_SSH_PORT" -j DNAT --to-destination "$GITEA_IP:$GITEA_SSH_PORT"
iptables -t nat -A POSTROUTING -s "$INTERNAL_NETWORK" -d "$GITEA_IP" -p tcp --dport "$GITEA_SSH_PORT" -j SNAT --to-source "$PUBLIC_IP"
log "Setting up DNAT rules..."
# Nebula
iptables -t nat -A PREROUTING -i "$NETWORK_INTERFACE" -p udp --dport "$NEBULA_PORT" -j DNAT --to-destination "$NEBULA_IP:$NEBULA_PORT"
# Caddy
iptables -t nat -A PREROUTING -i "$NETWORK_INTERFACE" -p tcp --dport "$CADDY_HTTP_PORT" -j DNAT --to-destination "$CADDY_IP:$CADDY_HTTP_PORT"
iptables -t nat -A PREROUTING -i "$NETWORK_INTERFACE" -p tcp --dport "$CADDY_HTTPS_PORT" -j DNAT --to-destination "$CADDY_IP:$CADDY_HTTPS_PORT"
# Gitea SSH
iptables -t nat -A PREROUTING -i "$NETWORK_INTERFACE" -p tcp --dport "$GITEA_SSH_PORT" -j DNAT --to-destination "$GITEA_IP:$GITEA_SSH_PORT"
# --- FORWARD Rules (custom chain) ---
setup_custom_chain
log "Adding FORWARD rules to $CUSTOM_CHAIN..."
iptables -A "$CUSTOM_CHAIN" -d "$CADDY_IP" -p tcp --dport "$CADDY_HTTP_PORT" -j ACCEPT
iptables -A "$CUSTOM_CHAIN" -d "$CADDY_IP" -p tcp --dport "$CADDY_HTTPS_PORT" -j ACCEPT
iptables -A "$CUSTOM_CHAIN" -d "$NEBULA_IP" -p udp --dport "$NEBULA_PORT" -j ACCEPT
iptables -A "$CUSTOM_CHAIN" -d "$GITEA_IP" -p tcp --dport "$GITEA_SSH_PORT" -j ACCEPT
# --- INPUT Rules ---
log "Flushing INPUT rules..."
iptables -F INPUT
log "Setting up INPUT rules..."
iptables -A INPUT -i nebula1 -j ACCEPT
iptables -A INPUT -p tcp --dport 22 -s "$ALLOWED_SSH_IP" -j ACCEPT
iptables -A INPUT -p tcp --dport 22 -j DROP
iptables -I INPUT -i "$NETWORK_INTERFACE" -p tcp --dport 8006 -j DROP
iptables -I INPUT -i vmbr1 -p tcp --dport 8006 -j ACCEPT
}
save_rules() {
log "Saving iptables rules persistently..."
if command -v netfilter-persistent &>/dev/null; then
netfilter-persistent save
log "Rules saved via netfilter-persistent"
else
die "netfilter-persistent not found. Install with: apt install iptables-persistent"
fi
}
show_status() {
echo ""
echo "=== Port Forwarding Status ==="
echo ""
echo "NAT PREROUTING rules:"
iptables -t nat -L PREROUTING -n --line-numbers 2>/dev/null | head -20
echo ""
echo "FORWARD chain ($CUSTOM_CHAIN):"
iptables -L "$CUSTOM_CHAIN" -n --line-numbers 2>/dev/null || echo "Chain not found"
echo ""
echo "=== Services ==="
echo " HTTP/HTTPS: 80,443 → Caddy ($CADDY_IP)"
echo " Nebula: $NEBULA_PORT → Lighthouse ($NEBULA_IP)"
echo " Gitea SSH: $GITEA_SSH_PORT → $GITEA_IP"
}
# -----------------------------------------------------------------------------
# Main
# -----------------------------------------------------------------------------
main() {
local action="${1:-apply}"
case "$action" in
--dry-run|-n)
check_root
check_interface "$NETWORK_INTERFACE"
apply_rules true
;;
--restore|-r)
check_root
restore_rules
;;
--status|-s)
show_status
;;
apply|"")
check_root
check_interface "$NETWORK_INTERFACE"
backup_rules
apply_rules false
save_rules
log "Setup complete!"
show_status
;;
*)
echo "Usage: $0 [--dry-run|--restore|--status]"
exit 1
;;
esac
}
main "$@"

15
terraform/.gitignore vendored Normal file
View File

@ -0,0 +1,15 @@
# Terraform
.terraform/
*.tfstate
*.tfstate.*
*.tfvars
!*.tfvars.example
*.tfvars.json
crash.log
crash.*.log
override.tf
override.tf.json
*_override.tf
*_override.tf.json
.terraformrc
terraform.rc

184
terraform/data.tf Normal file
View File

@ -0,0 +1,184 @@
# Data Tier (3000-3999)
#
# Enterprise HA data services with automatic failover.
# All VMs communicate via Nebula overlay (10.10.10.x) for migration-ready architecture.
#
# PostgreSQL Cluster (Patroni + etcd):
# 3000 postgres-01 10.10.10.30 - Patroni node (primary/replica elected dynamically)
# 3001 postgres-02 10.10.10.31 - Patroni node
# 3002 postgres-03 10.10.10.32 - Patroni node
#
# Valkey Sentinel (1 master + 2 replicas, Sentinel on each):
# 3003 valkey-01 10.10.10.33 - Master + Sentinel
# 3004 valkey-02 10.10.10.34 - Replica + Sentinel
# 3005 valkey-03 10.10.10.35 - Replica + Sentinel
#
# Garage S3 Cluster:
# 3009 garage-01 10.10.10.39 - S3-compatible storage node
# 3010 garage-02 10.10.10.40 - S3-compatible storage node
# 3011 garage-03 10.10.10.41 - S3-compatible storage node
# =============================================================================
# PostgreSQL HA Cluster (3 nodes)
# =============================================================================
module "postgres-01" {
source = "./modules/vm"
name = "postgres-01"
vmid = 3000
node_name = var.proxmox_node
bridge_ip = "192.168.100.30"
gateway = var.gateway
datastore_id = var.datastore_id
clone_vmid = var.template_vmid
cores = 2
memory = 4096
disk_size = 100
username = var.username
password = var.password
ssh_key_path = var.ssh_key_path
}
module "postgres-02" {
source = "./modules/vm"
name = "postgres-02"
vmid = 3001
node_name = var.proxmox_node
bridge_ip = "192.168.100.31"
gateway = var.gateway
datastore_id = var.datastore_id
clone_vmid = var.template_vmid
cores = 2
memory = 4096
disk_size = 100
username = var.username
password = var.password
ssh_key_path = var.ssh_key_path
}
module "postgres-03" {
source = "./modules/vm"
name = "postgres-03"
vmid = 3002
node_name = var.proxmox_node
bridge_ip = "192.168.100.32"
gateway = var.gateway
datastore_id = var.datastore_id
clone_vmid = var.template_vmid
cores = 2
memory = 4096
disk_size = 100
username = var.username
password = var.password
ssh_key_path = var.ssh_key_path
}
# =============================================================================
# Valkey Sentinel (3 nodes: 1 master + 2 replicas + Sentinel on each)
# =============================================================================
module "valkey-01" {
source = "./modules/vm"
name = "valkey-01"
vmid = 3003
node_name = var.proxmox_node
bridge_ip = "192.168.100.33"
gateway = var.gateway
datastore_id = var.datastore_id
clone_vmid = var.template_vmid
cores = 2
memory = 2048
disk_size = 50
username = var.username
password = var.password
ssh_key_path = var.ssh_key_path
}
module "valkey-02" {
source = "./modules/vm"
name = "valkey-02"
vmid = 3004
node_name = var.proxmox_node
bridge_ip = "192.168.100.34"
gateway = var.gateway
datastore_id = var.datastore_id
clone_vmid = var.template_vmid
cores = 2
memory = 2048
disk_size = 50
username = var.username
password = var.password
ssh_key_path = var.ssh_key_path
}
module "valkey-03" {
source = "./modules/vm"
name = "valkey-03"
vmid = 3005
node_name = var.proxmox_node
bridge_ip = "192.168.100.35"
gateway = var.gateway
datastore_id = var.datastore_id
clone_vmid = var.template_vmid
cores = 2
memory = 2048
disk_size = 50
username = var.username
password = var.password
ssh_key_path = var.ssh_key_path
}
# =============================================================================
# Garage S3 Cluster (3 nodes)
# =============================================================================
module "garage-01" {
source = "./modules/vm"
name = "garage-01"
vmid = 3009
node_name = var.proxmox_node
bridge_ip = "192.168.100.39"
gateway = var.gateway
datastore_id = var.datastore_id
clone_vmid = var.template_vmid
cores = 2
memory = 2048
disk_size = 200
username = var.username
password = var.password
ssh_key_path = var.ssh_key_path
}
module "garage-02" {
source = "./modules/vm"
name = "garage-02"
vmid = 3010
node_name = var.proxmox_node
bridge_ip = "192.168.100.40"
gateway = var.gateway
datastore_id = var.datastore_id
clone_vmid = var.template_vmid
cores = 2
memory = 2048
disk_size = 200
username = var.username
password = var.password
ssh_key_path = var.ssh_key_path
}
module "garage-03" {
source = "./modules/vm"
name = "garage-03"
vmid = 3011
node_name = var.proxmox_node
bridge_ip = "192.168.100.41"
gateway = var.gateway
datastore_id = var.datastore_id
clone_vmid = var.template_vmid
cores = 2
memory = 2048
disk_size = 200
username = var.username
password = var.password
ssh_key_path = var.ssh_key_path
}

11
terraform/firewall.tf Normal file
View File

@ -0,0 +1,11 @@
# Firewall Configuration
#
# Security groups are managed manually in Proxmox UI:
# Datacenter Firewall Security Group
#
# Groups:
# - base-egress: HTTP, HTTPS, DNS, NTP (default for VMs)
# - restricted: UDP 4242 only (Nebula tunnels, no internet)
#
# VMs reference these groups via the firewall_security_group variable.
# East-west segmentation (VM-to-VM) is handled by Nebula groups.

37
terraform/management.tf Normal file
View File

@ -0,0 +1,37 @@
# Management (1000-1999)
#
# Core infrastructure services that other VMs depend on.
# Lighthouse and DNS should be provisioned first.
#
# VMs:
# 1000 lighthouse 192.168.100.10 - Nebula lighthouse/relay
# 1001 dns 192.168.100.11 - Internal DNS server
# 1002 caddy 192.168.100.12 - Reverse proxy
module "dns" {
source = "./modules/vm"
name = "dns"
vmid = 1001
node_name = var.proxmox_node
bridge_ip = "192.168.100.11"
gateway = var.gateway
datastore_id = var.datastore_id
clone_vmid = var.template_vmid
username = var.username
password = var.password
ssh_key_path = var.ssh_key_path
}
module "caddy" {
source = "./modules/vm"
name = "caddy"
vmid = 1002
node_name = var.proxmox_node
bridge_ip = "192.168.100.12"
gateway = var.gateway
datastore_id = var.datastore_id
clone_vmid = var.template_vmid
username = var.username
password = var.password
ssh_key_path = var.ssh_key_path
}

View File

@ -0,0 +1,76 @@
terraform {
required_providers {
proxmox = {
source = "bpg/proxmox"
}
}
}
resource "proxmox_virtual_environment_vm" "vm" {
name = var.name
node_name = var.node_name
vm_id = var.vmid
clone {
vm_id = var.clone_vmid
}
cpu {
cores = var.cores
}
memory {
dedicated = var.memory
floating = var.memory_floating
}
disk {
datastore_id = var.datastore_id
interface = "scsi0"
iothread = true
discard = "on"
size = var.disk_size
}
network_device {
bridge = var.network_bridge
}
initialization {
datastore_id = var.datastore_id
ip_config {
ipv4 {
address = "${var.bridge_ip}/24"
gateway = var.gateway
}
}
user_account {
username = var.username
password = var.password
keys = [trimspace(file(var.ssh_key_path))]
}
}
}
# Firewall configuration - always manage options to explicitly enable/disable
resource "proxmox_virtual_environment_firewall_options" "vm" {
node_name = var.node_name
vm_id = proxmox_virtual_environment_vm.vm.vm_id
enabled = var.firewall_enabled
input_policy = var.firewall_enabled ? var.firewall_input_policy : "ACCEPT"
output_policy = var.firewall_enabled ? var.firewall_output_policy : "ACCEPT"
}
resource "proxmox_virtual_environment_firewall_rules" "vm" {
count = var.firewall_enabled ? 1 : 0
node_name = var.node_name
vm_id = proxmox_virtual_environment_vm.vm.vm_id
rule {
security_group = var.firewall_security_group
}
depends_on = [proxmox_virtual_environment_firewall_options.vm]
}

View File

@ -0,0 +1,14 @@
output "vm_id" {
value = proxmox_virtual_environment_vm.vm.vm_id
description = "The Proxmox VM ID"
}
output "ip_address" {
value = var.bridge_ip
description = "The bridge IP address"
}
output "name" {
value = proxmox_virtual_environment_vm.vm.name
description = "The VM name"
}

View File

@ -0,0 +1,107 @@
variable "name" {
type = string
description = "VM name"
}
variable "vmid" {
type = number
description = "Proxmox VM ID"
}
variable "node_name" {
type = string
description = "Proxmox node name"
}
variable "bridge_ip" {
type = string
description = "IP address on bridge network (without CIDR)"
}
variable "gateway" {
type = string
default = "192.168.100.1"
description = "Gateway IP address"
}
variable "network_bridge" {
type = string
default = "vmbr1"
description = "Network bridge name"
}
variable "datastore_id" {
type = string
default = "local-zfs"
description = "Proxmox datastore for disks"
}
variable "cores" {
type = number
default = 1
description = "Number of CPU cores"
}
variable "memory" {
type = number
default = 2048
description = "Memory in MB"
}
variable "memory_floating" {
type = number
default = null
description = "Floating memory (ballooning) in MB"
}
variable "disk_size" {
type = number
default = 50
description = "Disk size in GB"
}
variable "clone_vmid" {
type = number
default = 9000
description = "Template VM ID to clone from"
}
variable "username" {
type = string
description = "VM user account name"
}
variable "password" {
type = string
sensitive = true
description = "VM user account password"
}
variable "ssh_key_path" {
type = string
description = "Path to SSH public key file"
}
variable "firewall_enabled" {
type = bool
default = true
description = "Enable Proxmox firewall for this VM"
}
variable "firewall_security_group" {
type = string
default = "base-egress"
description = "Security group to assign (base-egress, infrastructure, restricted)"
}
variable "firewall_input_policy" {
type = string
default = "DROP"
description = "Default policy for inbound traffic"
}
variable "firewall_output_policy" {
type = string
default = "DROP"
description = "Default policy for outbound traffic"
}

14
terraform/providers.tf Normal file
View File

@ -0,0 +1,14 @@
terraform {
required_providers {
proxmox = {
source = "bpg/proxmox"
version = "0.66.1"
}
}
}
provider "proxmox" {
endpoint = var.proxmox_endpoint
api_token = "${var.proxmox_api_token_id}=${var.proxmox_api_token_secret}"
insecure = var.proxmox_insecure
}

78
terraform/services.tf Normal file
View File

@ -0,0 +1,78 @@
# Trusted Services (2000-2999)
#
# Infrastructure services that support development and operations.
# All VMs in this tier use the "infrastructure" Nebula group.
#
# VMs:
# 2000 vault-01 192.168.100.20 - Vault cluster node 1
# 2001 vault-02 192.168.100.21 - Vault cluster node 2
# 2002 vault-03 192.168.100.22 - Vault cluster node 3
# 2003 gitea 192.168.100.23 - Git hosting
module "vault-01" {
source = "./modules/vm"
name = "vault-01"
vmid = 2000
node_name = var.proxmox_node
bridge_ip = "192.168.100.20"
gateway = var.gateway
datastore_id = var.datastore_id
clone_vmid = var.template_vmid
cores = 2
memory = 2048
disk_size = 50
username = var.username
password = var.password
ssh_key_path = var.ssh_key_path
}
module "vault-02" {
source = "./modules/vm"
name = "vault-02"
vmid = 2001
node_name = var.proxmox_node
bridge_ip = "192.168.100.21"
gateway = var.gateway
datastore_id = var.datastore_id
clone_vmid = var.template_vmid
cores = 2
memory = 2048
disk_size = 50
username = var.username
password = var.password
ssh_key_path = var.ssh_key_path
}
module "vault-03" {
source = "./modules/vm"
name = "vault-03"
vmid = 2002
node_name = var.proxmox_node
bridge_ip = "192.168.100.22"
gateway = var.gateway
datastore_id = var.datastore_id
clone_vmid = var.template_vmid
cores = 2
memory = 2048
disk_size = 50
username = var.username
password = var.password
ssh_key_path = var.ssh_key_path
}
module "gitea" {
source = "./modules/vm"
name = "gitea"
vmid = 2003
node_name = var.proxmox_node
bridge_ip = "192.168.100.23"
gateway = var.gateway
datastore_id = var.datastore_id
clone_vmid = var.template_vmid
cores = 2
memory = 2048
disk_size = 100
username = var.username
password = var.password
ssh_key_path = var.ssh_key_path
}

View File

@ -0,0 +1,13 @@
# Proxmox Connection
proxmox_endpoint = "https://proxmox.example:8006/"
proxmox_api_token_id = "terraform@pve!terraform"
proxmox_api_token_secret = "your-api-token-secret-here"
proxmox_insecure = true
proxmox_node = "pve"
# VM Defaults
username = "admin"
password = "changeme"
ssh_key_path = "~/.ssh/id_ed25519.pub"
datastore_id = "local-zfs"
template_vmid = 9000

74
terraform/vars.tf Normal file
View File

@ -0,0 +1,74 @@
# =============================================================================
# Proxmox Connection
# =============================================================================
variable "proxmox_endpoint" {
type = string
description = "Proxmox API endpoint (e.g., https://proxmox.example:8006/)"
}
variable "proxmox_api_token_id" {
type = string
description = "Proxmox API token ID (e.g., terraform@pve!terraform)"
}
variable "proxmox_api_token_secret" {
type = string
sensitive = true
description = "Proxmox API token secret"
}
variable "proxmox_insecure" {
type = bool
default = true
description = "Skip TLS verification for self-signed certificates"
}
variable "proxmox_node" {
type = string
description = "Proxmox node name to deploy VMs on"
}
# =============================================================================
# VM Defaults
# =============================================================================
variable "username" {
type = string
description = "Default VM user account name"
}
variable "password" {
type = string
sensitive = true
description = "Default VM user account password"
}
variable "ssh_key_path" {
type = string
description = "Path to SSH public key file"
}
variable "datastore_id" {
type = string
default = "local-zfs"
description = "Default Proxmox datastore for VM disks"
}
variable "network_bridge" {
type = string
default = "vmbr1"
description = "Default network bridge for VMs"
}
variable "gateway" {
type = string
default = "192.168.100.1"
description = "Default gateway for VMs"
}
variable "template_vmid" {
type = number
default = 9000
description = "Template VM ID to clone from"
}