From 0c77379ab3127b3bc81fb28e4614eddc8240df6f Mon Sep 17 00:00:00 2001 From: Damien Coles Date: Mon, 26 Jan 2026 00:44:31 -0500 Subject: [PATCH] public-ready-init --- .gitignore | 42 +++ README.md | 146 ++++++++ ansible/.gitignore | 20 ++ ansible/inventory.ini.example | 89 +++++ ansible/playbooks/bootstrap.yml | 63 ++++ ansible/playbooks/data-service.yml | 337 ++++++++++++++++++ ansible/playbooks/dns-client.yml | 35 ++ ansible/playbooks/dns.yml | 77 ++++ ansible/playbooks/docker.yml | 50 +++ ansible/playbooks/garage.yml | 187 ++++++++++ ansible/playbooks/nebula.yml | 43 +++ ansible/playbooks/postgres-ha.yml | 277 ++++++++++++++ ansible/playbooks/security.yml | 103 ++++++ ansible/playbooks/valkey-sentinel.yml | 155 ++++++++ ansible/playbooks/valkey.yml | 80 +++++ ansible/services.yml.example | 57 +++ ansible/templates/etcd.conf.j2 | 21 ++ ansible/templates/etcd.service.j2 | 16 + ansible/templates/garage.toml.j2 | 48 +++ ansible/templates/iptables.rules.j2 | 71 ++++ ansible/templates/nebula-config.yml.j2 | 67 ++++ ansible/templates/patroni.service.j2 | 19 + ansible/templates/patroni.yml.j2 | 63 ++++ ansible/templates/pg-static-roles.sql.j2 | 83 +++++ ansible/templates/unbound-local-zones.conf.j2 | 34 ++ ansible/templates/unbound.conf.j2 | 55 +++ ansible/templates/unbound.service.j2 | 15 + ansible/templates/valkey-acl.j2 | 10 + ansible/templates/valkey-cluster.conf.j2 | 44 +++ ansible/templates/valkey-sentinel.conf.j2 | 21 ++ ansible/templates/valkey-standalone.conf.j2 | 46 +++ ansible/templates/valkey.conf.j2 | 40 +++ ansible/vault/README.md | 44 +++ docs/architecture.md | 139 ++++++++ docs/getting-started.md | 197 ++++++++++ docs/provisioning-guide.md | 199 +++++++++++ nebula/.gitignore | 8 + nebula/README.md | 164 +++++++++ network/ip-schema.example | 61 ++++ network/port-forward.sh.example | 230 ++++++++++++ terraform/.gitignore | 15 + terraform/data.tf | 184 ++++++++++ terraform/firewall.tf | 11 + terraform/management.tf | 37 ++ terraform/modules/vm/main.tf | 76 ++++ terraform/modules/vm/outputs.tf | 14 + terraform/modules/vm/variables.tf | 107 ++++++ terraform/providers.tf | 14 + terraform/services.tf | 78 ++++ terraform/terraform.tfvars.example | 13 + terraform/vars.tf | 74 ++++ 51 files changed, 4079 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 ansible/.gitignore create mode 100644 ansible/inventory.ini.example create mode 100644 ansible/playbooks/bootstrap.yml create mode 100644 ansible/playbooks/data-service.yml create mode 100644 ansible/playbooks/dns-client.yml create mode 100644 ansible/playbooks/dns.yml create mode 100644 ansible/playbooks/docker.yml create mode 100644 ansible/playbooks/garage.yml create mode 100644 ansible/playbooks/nebula.yml create mode 100644 ansible/playbooks/postgres-ha.yml create mode 100644 ansible/playbooks/security.yml create mode 100644 ansible/playbooks/valkey-sentinel.yml create mode 100644 ansible/playbooks/valkey.yml create mode 100644 ansible/services.yml.example create mode 100644 ansible/templates/etcd.conf.j2 create mode 100644 ansible/templates/etcd.service.j2 create mode 100644 ansible/templates/garage.toml.j2 create mode 100644 ansible/templates/iptables.rules.j2 create mode 100644 ansible/templates/nebula-config.yml.j2 create mode 100644 ansible/templates/patroni.service.j2 create mode 100644 ansible/templates/patroni.yml.j2 create mode 100644 ansible/templates/pg-static-roles.sql.j2 create mode 100644 ansible/templates/unbound-local-zones.conf.j2 create mode 100644 ansible/templates/unbound.conf.j2 create mode 100644 ansible/templates/unbound.service.j2 create mode 100644 ansible/templates/valkey-acl.j2 create mode 100644 ansible/templates/valkey-cluster.conf.j2 create mode 100644 ansible/templates/valkey-sentinel.conf.j2 create mode 100644 ansible/templates/valkey-standalone.conf.j2 create mode 100644 ansible/templates/valkey.conf.j2 create mode 100644 ansible/vault/README.md create mode 100644 docs/architecture.md create mode 100644 docs/getting-started.md create mode 100644 docs/provisioning-guide.md create mode 100644 nebula/.gitignore create mode 100644 nebula/README.md create mode 100644 network/ip-schema.example create mode 100644 network/port-forward.sh.example create mode 100644 terraform/.gitignore create mode 100644 terraform/data.tf create mode 100644 terraform/firewall.tf create mode 100644 terraform/management.tf create mode 100644 terraform/modules/vm/main.tf create mode 100644 terraform/modules/vm/outputs.tf create mode 100644 terraform/modules/vm/variables.tf create mode 100644 terraform/providers.tf create mode 100644 terraform/services.tf create mode 100644 terraform/terraform.tfvars.example create mode 100644 terraform/vars.tf diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f6fa091 --- /dev/null +++ b/.gitignore @@ -0,0 +1,42 @@ +# Terraform +terraform/.terraform/ +terraform/*.tfstate +terraform/*.tfstate.* +terraform/*.tfvars +!terraform/*.tfvars.example +terraform/crash.log + +# Ansible +ansible/inventory.ini +!ansible/inventory.ini.example +ansible/services.yml +!ansible/services.yml.example +ansible/vault/secrets.yml +ansible/vault/*pass* +ansible/databases/ + +# Nebula +nebula/ca.key +nebula/**/*.key +*.key +*.pem + +# Secrets +secrets/ +.env +.env.* +token.txt + +# Backups +backup/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db diff --git a/README.md b/README.md new file mode 100644 index 0000000..8e53067 --- /dev/null +++ b/README.md @@ -0,0 +1,146 @@ +# Arvandor + +Production-grade infrastructure-as-code for running services on Proxmox with enterprise HA patterns. + +## Overview + +Arvandor provides a complete infrastructure stack: + +- **Terraform** - VM provisioning on Proxmox +- **Ansible** - Configuration management +- **Nebula** - Encrypted overlay network +- **Vault** - Secrets management (3-node Raft cluster) +- **PostgreSQL** - Database (3-node Patroni + etcd) +- **Valkey** - Cache/queue (3-node Sentinel) +- **Garage** - S3-compatible storage (3-node cluster) + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ Proxmox Host │ +├─────────────────────────────────────────────────────────────────────────┤ +│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ +│ │ Management │ │ Services │ │ Data │ │ Workloads │ │ +│ │ 1000-1999 │ │ 2000-2999 │ │ 3000-3999 │ │ 4000-4999 │ │ +│ │ │ │ │ │ │ │ │ │ +│ │ DNS, Caddy │ │ Vault │ │ PostgreSQL │ │ Your Apps │ │ +│ │ Lighthouse │ │ Gitea │ │ Valkey │ │ │ │ +│ │ │ │ │ │ Garage │ │ │ │ +│ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ │ +│ │ │ │ │ │ +│ └────────────────┴────────────────┴────────────────┘ │ +│ │ │ +│ Nebula Overlay (10.10.10.0/24) │ +└─────────────────────────────────────────────────────────────────────────┘ +``` + +## Quick Start + +### 1. Prerequisites + +- Proxmox VE host +- Arch Linux VM template (VMID 9000) +- Terraform, Ansible installed locally +- Nebula binary for certificate generation + +### 2. Configure + +```bash +# Clone repository +git clone arvandor +cd arvandor + +# Configure Terraform +cp terraform/terraform.tfvars.example terraform/terraform.tfvars +vim terraform/terraform.tfvars + +# Configure Ansible +cp ansible/inventory.ini.example ansible/inventory.ini +vim ansible/inventory.ini + +# Generate Nebula CA +cd nebula +nebula-cert ca -name "Arvandor CA" +``` + +### 3. Provision + +```bash +# Create VMs +cd terraform +terraform init +terraform plan +terraform apply + +# Bootstrap VMs (in order) +cd ../ansible +ansible-playbook -i inventory.ini playbooks/bootstrap.yml +ansible-playbook -i inventory.ini playbooks/security.yml +ansible-playbook -i inventory.ini playbooks/nebula.yml +``` + +### 4. Deploy Services + +```bash +# DNS server +ansible-playbook -i inventory.ini playbooks/dns.yml + +# PostgreSQL HA cluster +ansible-playbook -i inventory.ini playbooks/postgres-ha.yml + +# Valkey Sentinel +ansible-playbook -i inventory.ini playbooks/valkey-sentinel.yml + +# Garage S3 +ansible-playbook -i inventory.ini playbooks/garage.yml +``` + +## Directory Structure + +``` +arvandor/ +├── terraform/ # VM provisioning +│ ├── modules/vm/ # Reusable VM module +│ ├── management.tf # DNS, Caddy +│ ├── services.tf # Vault, Gitea +│ └── data.tf # PostgreSQL, Valkey, Garage +├── ansible/ # Configuration management +│ ├── playbooks/ # Core playbooks +│ ├── templates/ # Jinja2 templates +│ └── vault/ # Ansible Vault secrets +├── nebula/ # Overlay network +│ └── configs/ # Per-host certificates +├── network/ # Host networking +└── docs/ # Documentation +``` + +## Network Design + +### Two-Network Model + +| Network | CIDR | Purpose | +|---------|------|---------| +| Bridge (vmbr1) | 192.168.100.0/24 | Provisioning only | +| Nebula | 10.10.10.0/24 | All application traffic | + +VMs only accept traffic from the Proxmox host (for Ansible) and the Nebula overlay. This provides isolation even if someone gains bridge network access. + +### Security Groups (Nebula) + +| Group | Purpose | +|-------|---------| +| `admin` | Full access (your devices) | +| `infrastructure` | Core services | +| `projects` | Application workloads | +| `games` | Isolated game servers | + +## Documentation + +- [Getting Started](docs/getting-started.md) - Detailed setup guide +- [Architecture](docs/architecture.md) - Design decisions +- [Provisioning Guide](docs/provisioning-guide.md) - Adding new VMs + +## License + +MIT diff --git a/ansible/.gitignore b/ansible/.gitignore new file mode 100644 index 0000000..f118bf2 --- /dev/null +++ b/ansible/.gitignore @@ -0,0 +1,20 @@ +# Inventory (contains IPs and hostnames) +inventory.ini +!inventory.ini.example + +# Services (contains real service configs) +services.yml +!services.yml.example + +# Ansible Vault secrets +vault/secrets.yml +vault/ansible_vault_pass +vault/*.pass + +# Database dumps +databases/ + +# SSH keys +*.key +*.pem +id_* diff --git a/ansible/inventory.ini.example b/ansible/inventory.ini.example new file mode 100644 index 0000000..acdc7c2 --- /dev/null +++ b/ansible/inventory.ini.example @@ -0,0 +1,89 @@ +# Arvandor Infrastructure Inventory +# +# Groups: +# infrastructure - Core services (Nebula group: infrastructure) +# projects - Application workloads (Nebula group: projects) +# games - Game servers (Nebula group: games) +# all - All managed VMs +# +# Variables per host: +# ansible_host - Bridge network IP (for SSH via Proxmox jump) +# nebula_ip - Overlay network IP (for inter-service communication) +# vmid - Proxmox VM ID (used for cert paths) + +[infrastructure] +dns ansible_host=192.168.100.11 nebula_ip=10.10.10.11 vmid=1001 +caddy ansible_host=192.168.100.12 nebula_ip=10.10.10.12 vmid=1002 +vault-01 ansible_host=192.168.100.20 nebula_ip=10.10.10.20 vmid=2000 +vault-02 ansible_host=192.168.100.21 nebula_ip=10.10.10.21 vmid=2001 +vault-03 ansible_host=192.168.100.22 nebula_ip=10.10.10.22 vmid=2002 +gitea ansible_host=192.168.100.23 nebula_ip=10.10.10.23 vmid=2003 +postgres-01 ansible_host=192.168.100.30 nebula_ip=10.10.10.30 vmid=3000 +postgres-02 ansible_host=192.168.100.31 nebula_ip=10.10.10.31 vmid=3001 +postgres-03 ansible_host=192.168.100.32 nebula_ip=10.10.10.32 vmid=3002 +valkey-01 ansible_host=192.168.100.33 nebula_ip=10.10.10.33 vmid=3003 +valkey-02 ansible_host=192.168.100.34 nebula_ip=10.10.10.34 vmid=3004 +valkey-03 ansible_host=192.168.100.35 nebula_ip=10.10.10.35 vmid=3005 +garage-01 ansible_host=192.168.100.39 nebula_ip=10.10.10.39 vmid=3009 +garage-02 ansible_host=192.168.100.40 nebula_ip=10.10.10.40 vmid=3010 +garage-03 ansible_host=192.168.100.41 nebula_ip=10.10.10.41 vmid=3011 + +[projects] +app-server ansible_host=192.168.100.50 nebula_ip=10.10.10.50 vmid=4050 + +[games] +# Example game servers (firewall disabled, use host DNAT + guest ufw) +# minecraft ansible_host=192.168.100.52 nebula_ip=10.10.10.52 vmid=4052 + +[docker] +gitea +app-server + +[vault] +vault-01 +vault-02 +vault-03 + +# PostgreSQL HA Cluster (Patroni + etcd) +[postgres] +postgres-01 +postgres-02 +postgres-03 + +# Valkey Sentinel (1 master + 2 replicas) +[valkey] +valkey-01 +valkey-02 +valkey-03 + +# Garage S3 Cluster +[garage] +garage-01 +garage-02 +garage-03 + +[all:children] +infrastructure +projects +games + +[all:vars] +# SSH jumps through the Proxmox host to reach VMs on bridge network +ansible_user=admin +ansible_ssh_common_args='-o ProxyCommand="ssh -W %h:%p -q admin@10.10.10.1"' +ansible_ssh_extra_args='-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null' +ansible_python_interpreter=/usr/bin/python3 + +# Infrastructure context (update these for your environment) +lighthouse_nebula_ip=10.10.10.10 +lighthouse_bridge_ip=192.168.100.10 +lighthouse_public_ip=203.0.113.10 + +# Optional: secondary lighthouse for reduced latency +home_lighthouse_nebula_ip=10.10.10.102 +home_lighthouse_public_ip=203.0.113.20 +proxmox_host_ip=192.168.100.1 +bridge_network=192.168.100.0/24 + +# Caddy reverse proxy (for iptables rules) +caddy_nebula_ip=10.10.10.12 diff --git a/ansible/playbooks/bootstrap.yml b/ansible/playbooks/bootstrap.yml new file mode 100644 index 0000000..1df1da6 --- /dev/null +++ b/ansible/playbooks/bootstrap.yml @@ -0,0 +1,63 @@ +--- +# Bootstrap playbook for new VMs +# +# Run FIRST on newly provisioned VMs before security/nebula playbooks. +# Updates system packages and reboots if kernel changed. +# +# Usage: ansible-playbook -i inventory.ini playbooks/bootstrap.yml --limit "new-vm" + +- name: Bootstrap New VMs + hosts: all + become: true + tasks: + - name: Initialize pacman keyring + command: pacman-key --init + args: + creates: /etc/pacman.d/gnupg/trustdb.gpg + + - name: Populate pacman keyring with Arch Linux keys + command: pacman-key --populate archlinux + register: populate_result + changed_when: "'locally signed' in populate_result.stdout" + + - name: Update archlinux-keyring package first + community.general.pacman: + name: archlinux-keyring + state: latest + update_cache: true + + - name: Get current running kernel version + command: uname -r + register: running_kernel + changed_when: false + + - name: Update all packages + community.general.pacman: + update_cache: true + upgrade: true + register: update_result + + - name: Install essential packages + community.general.pacman: + name: + - rsync + state: present + + - name: Get installed kernel version + shell: pacman -Q linux | awk '{print $2}' | sed 's/\.arch/-arch/' + register: installed_kernel + changed_when: false + + - name: Check if reboot is needed (kernel updated) + set_fact: + reboot_needed: "{{ running_kernel.stdout not in installed_kernel.stdout }}" + + - name: Display kernel status + debug: + msg: "Running: {{ running_kernel.stdout }}, Installed: {{ installed_kernel.stdout }}, Reboot needed: {{ reboot_needed }}" + + - name: Reboot if kernel was updated + reboot: + msg: "Kernel updated, rebooting" + reboot_timeout: 300 + when: reboot_needed | bool diff --git a/ansible/playbooks/data-service.yml b/ansible/playbooks/data-service.yml new file mode 100644 index 0000000..439b6c3 --- /dev/null +++ b/ansible/playbooks/data-service.yml @@ -0,0 +1,337 @@ +--- +# Data Service Provisioning Playbook +# +# Provisions PostgreSQL database, Valkey ACL user, Garage S3 bucket/key, +# and Vault credentials for a service defined in services.yml. +# +# Usage: +# ansible-playbook -i inventory.ini playbooks/data-service.yml -e "service=myapp" +# +# With database restore: +# ansible-playbook -i inventory.ini playbooks/data-service.yml -e "service=myapp" -e "restore=true" +# +# Prerequisites: +# - postgres-primary running (run playbooks/postgres.yml first) +# - valkey-primary running with ACLs (run playbooks/valkey.yml first) +# - Vault cluster initialized and unsealed (run playbooks/vault.yml first) +# - Database secrets engine enabled: vault secrets enable database +# - VAULT_ADDR and VAULT_TOKEN environment variables set + +- name: Load Service Configuration + hosts: localhost + gather_facts: false + vars_files: + - ../services.yml + tasks: + - name: Validate service parameter + fail: + msg: "Service '{{ service }}' not found in services.yml" + when: service not in services + + - name: Set service facts + set_fact: + svc: "{{ services[service] }}" + postgres_enabled: "{{ services[service].postgres.enabled | default(false) }}" + valkey_enabled: "{{ services[service].valkey.enabled | default(false) }}" + s3_enabled: "{{ services[service].s3.enabled | default(false) }}" + vault_roles: "{{ services[service].vault_roles | default(['app', 'migrate']) }}" + + - name: Display service info + debug: + msg: | + Service: {{ service }} + Description: {{ svc.description }} + PostgreSQL: {{ postgres_enabled }} + Valkey: {{ valkey_enabled }} (prefix: {{ svc.valkey.key_prefix | default(service) }}:*) + S3: {{ s3_enabled }} (bucket: {{ svc.s3.bucket | default(service + '-media') }}) + Vault roles: {{ vault_roles | join(', ') }} + +- name: Setup PostgreSQL Database and Roles + hosts: postgres-01 + become: true + vars_files: + - ../vault/secrets.yml + - ../services.yml + vars: + svc: "{{ services[service] }}" + tasks: + - name: Skip if PostgreSQL not enabled + meta: end_host + when: not (svc.postgres.enabled | default(false)) + + - name: Check if database exists + become_user: postgres + shell: psql -tAc "SELECT 1 FROM pg_database WHERE datname='{{ service }}'" + register: db_exists + changed_when: false + + - name: Template static roles SQL + template: + src: ../templates/pg-static-roles.sql.j2 + dest: "/tmp/{{ service }}-roles.sql" + mode: '0644' + when: db_exists.stdout != "1" + + - name: Create database and static roles + become_user: postgres + shell: psql -f /tmp/{{ service }}-roles.sql + when: db_exists.stdout != "1" + + - name: Create common extensions (requires superuser) + become_user: postgres + shell: | + psql -d {{ service }} -c "CREATE EXTENSION IF NOT EXISTS btree_gist;" + psql -d {{ service }} -c 'CREATE EXTENSION IF NOT EXISTS "uuid-ossp";' + when: db_exists.stdout != "1" + + - name: Clean up SQL file + file: + path: "/tmp/{{ service }}-roles.sql" + state: absent + + - name: Check for dump file + delegate_to: localhost + become: false + stat: + path: "{{ playbook_dir }}/../{{ svc.postgres.restore_from }}" + register: dump_file + when: restore | default(false) | bool + + - name: Copy dump to server + copy: + src: "{{ playbook_dir }}/../{{ svc.postgres.restore_from }}" + dest: "/tmp/{{ service }}.dump" + mode: '0644' + when: + - restore | default(false) | bool + - dump_file.stat.exists | default(false) + + - name: Restore database from dump + become_user: postgres + shell: pg_restore --no-owner --no-privileges -d {{ service }} /tmp/{{ service }}.dump + when: + - restore | default(false) | bool + - dump_file.stat.exists | default(false) + ignore_errors: true # May fail if data already exists + + - name: Clean up dump file + file: + path: "/tmp/{{ service }}.dump" + state: absent + when: restore | default(false) | bool + +- name: Setup Valkey ACL User + hosts: valkey-01 + become: true + vars_files: + - ../vault/secrets.yml + - ../services.yml + vars: + svc: "{{ services[service] }}" + valkey_nebula_ip: "{{ hostvars['valkey-01']['nebula_ip'] }}" + tasks: + - name: Skip if Valkey not enabled + meta: end_host + when: not (svc.valkey.enabled | default(false)) + + - name: Generate service password + set_fact: + valkey_service_password: "{{ lookup('password', '/dev/null length=32 chars=hexdigits') }}" + + - name: Check if ACL user exists + command: valkey-cli -h {{ valkey_nebula_ip }} --user admin --pass {{ valkey_admin_password }} ACL GETUSER {{ service }} + register: acl_user_check + changed_when: false + failed_when: false + no_log: true + + - name: Create ACL user for service + shell: | + valkey-cli -h {{ valkey_nebula_ip }} --user admin --pass {{ valkey_admin_password }} \ + ACL SETUSER {{ service }} on '>{{ valkey_service_password }}' '~{{ svc.valkey.key_prefix | default(service) }}:*' '&*' '+@all' + when: acl_user_check.rc != 0 + no_log: true + + - name: Update ACL user password if exists + shell: | + valkey-cli -h {{ valkey_nebula_ip }} --user admin --pass {{ valkey_admin_password }} \ + ACL SETUSER {{ service }} on '>{{ valkey_service_password }}' '~{{ svc.valkey.key_prefix | default(service) }}:*' '&*' '+@all' + when: acl_user_check.rc == 0 + no_log: true + + - name: Persist ACL to disk + command: valkey-cli -h {{ valkey_nebula_ip }} --user admin --pass {{ valkey_admin_password }} ACL SAVE + no_log: true + + - name: Store credentials in Vault + delegate_to: localhost + become: false + shell: | + vault kv put secret/{{ service }}/valkey \ + host={{ valkey_nebula_ip }} \ + port=6379 \ + username={{ service }} \ + password={{ valkey_service_password }} \ + key_prefix={{ svc.valkey.key_prefix | default(service) }} + environment: + VAULT_ADDR: "{{ lookup('env', 'VAULT_ADDR') | default('http://' + hostvars['vault-01']['nebula_ip'] + ':8200', true) }}" + VAULT_TOKEN: "{{ lookup('env', 'VAULT_TOKEN') }}" + no_log: true + +- name: Setup Garage S3 Bucket and Key + hosts: garage-01 + become: true + vars_files: + - ../services.yml + vars: + svc: "{{ services[service] }}" + garage_nebula_ip: "{{ hostvars['garage-01']['nebula_ip'] }}" + tasks: + - name: Skip if S3 not enabled + meta: end_host + when: not (svc.s3.enabled | default(false)) + + - name: Set bucket name + set_fact: + bucket_name: "{{ svc.s3.bucket | default(service + '-media') }}" + + - name: Check if bucket exists + command: garage -c /etc/garage/garage.toml bucket list + register: bucket_list + changed_when: false + + - name: Create bucket if needed + command: garage -c /etc/garage/garage.toml bucket create {{ bucket_name }} + when: bucket_name not in bucket_list.stdout + + - name: Check if key exists + command: garage -c /etc/garage/garage.toml key list + register: key_list + changed_when: false + + - name: Create API key for service + command: garage -c /etc/garage/garage.toml key create {{ service }}-key + register: key_create + when: (service + '-key') not in key_list.stdout + + - name: Get key info + command: garage -c /etc/garage/garage.toml key info {{ service }}-key --show-secret + register: key_info + changed_when: false + no_log: true + + - name: Parse key credentials + set_fact: + s3_access_key: "{{ key_info.stdout | regex_search('Key ID: ([A-Za-z0-9]+)', '\\1') | first }}" + s3_secret_key: "{{ key_info.stdout | regex_search('Secret key: ([a-f0-9]+)', '\\1') | first }}" + no_log: true + + - name: Grant bucket permissions to key + command: > + garage -c /etc/garage/garage.toml bucket allow {{ bucket_name }} + --read --write --key {{ service }}-key + register: bucket_allow + changed_when: "'already' not in bucket_allow.stderr" + + - name: Store S3 credentials in Vault + delegate_to: localhost + become: false + shell: | + vault kv put secret/{{ service }}/s3 \ + access_key={{ s3_access_key }} \ + secret_key={{ s3_secret_key }} \ + bucket={{ bucket_name }} \ + endpoint=http://{{ garage_nebula_ip }}:3900 + environment: + VAULT_ADDR: "{{ lookup('env', 'VAULT_ADDR') | default('http://' + hostvars['vault-01']['nebula_ip'] + ':8200', true) }}" + VAULT_TOKEN: "{{ lookup('env', 'VAULT_TOKEN') }}" + no_log: true + +- name: Configure Vault Database Credentials + hosts: localhost + gather_facts: false + vars_files: + - ../vault/secrets.yml + - ../services.yml + vars: + svc: "{{ services[service] }}" + postgres_nebula_ip: "{{ hostvars['postgres-01']['nebula_ip'] }}" + vault_nebula_ip: "{{ hostvars['vault-01']['nebula_ip'] }}" + environment: + VAULT_ADDR: "{{ vault_addr | default('http://' + vault_nebula_ip + ':8200') }}" + tasks: + - name: Skip if PostgreSQL not enabled + meta: end_play + when: not (svc.postgres.enabled | default(false)) + + - name: Check if VAULT_TOKEN is set + fail: + msg: "VAULT_TOKEN environment variable must be set" + when: lookup('env', 'VAULT_TOKEN') == '' + + - name: Configure Vault database connection + shell: | + vault write database/config/{{ service }} \ + plugin_name="postgresql-database-plugin" \ + allowed_roles="{{ service }}-app,{{ service }}-migrate" \ + connection_url="postgresql://{% raw %}{{username}}:{{password}}{% endraw %}@{{ postgres_nebula_ip }}:5432/{{ service }}" \ + username="vault_admin" \ + password="{{ vault_admin_password }}" + register: vault_config + changed_when: vault_config.rc == 0 + + - name: Create Vault app role + shell: | + vault write database/roles/{{ service }}-app \ + db_name="{{ service }}" \ + creation_statements="CREATE ROLE \"{% raw %}{{name}}{% endraw %}\" WITH LOGIN PASSWORD '{% raw %}{{password}}{% endraw %}' VALID UNTIL '{% raw %}{{expiration}}{% endraw %}' INHERIT; GRANT {{ service }}_app TO \"{% raw %}{{name}}{% endraw %}\"; ALTER ROLE \"{% raw %}{{name}}{% endraw %}\" SET ROLE = {{ service }}_app;" \ + revocation_statements="REASSIGN OWNED BY \"{% raw %}{{name}}{% endraw %}\" TO {{ service }}_owner; REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA public FROM \"{% raw %}{{name}}{% endraw %}\"; REVOKE ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public FROM \"{% raw %}{{name}}{% endraw %}\"; REVOKE USAGE ON SCHEMA public FROM \"{% raw %}{{name}}{% endraw %}\"; REVOKE CONNECT ON DATABASE {{ service }} FROM \"{% raw %}{{name}}{% endraw %}\"; DROP ROLE IF EXISTS \"{% raw %}{{name}}{% endraw %}\";" \ + default_ttl="1h" \ + max_ttl="24h" + when: "'app' in (svc.vault_roles | default(['app', 'migrate']))" + + - name: Create Vault migrate role + shell: | + vault write database/roles/{{ service }}-migrate \ + db_name="{{ service }}" \ + creation_statements="CREATE ROLE \"{% raw %}{{name}}{% endraw %}\" WITH LOGIN PASSWORD '{% raw %}{{password}}{% endraw %}' VALID UNTIL '{% raw %}{{expiration}}{% endraw %}' INHERIT; GRANT {{ service }}_migrate TO \"{% raw %}{{name}}{% endraw %}\"; ALTER ROLE \"{% raw %}{{name}}{% endraw %}\" SET ROLE = {{ service }}_migrate;" \ + revocation_statements="REASSIGN OWNED BY \"{% raw %}{{name}}{% endraw %}\" TO {{ service }}_owner; REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA public FROM \"{% raw %}{{name}}{% endraw %}\"; REVOKE ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public FROM \"{% raw %}{{name}}{% endraw %}\"; REVOKE ALL PRIVILEGES ON ALL FUNCTIONS IN SCHEMA public FROM \"{% raw %}{{name}}{% endraw %}\"; REVOKE ALL PRIVILEGES ON SCHEMA public FROM \"{% raw %}{{name}}{% endraw %}\"; REVOKE CONNECT ON DATABASE {{ service }} FROM \"{% raw %}{{name}}{% endraw %}\"; DROP ROLE IF EXISTS \"{% raw %}{{name}}{% endraw %}\";" \ + default_ttl="15m" \ + max_ttl="1h" + when: "'migrate' in (svc.vault_roles | default(['app', 'migrate']))" + +- name: Display Service Summary + hosts: localhost + gather_facts: false + vars_files: + - ../services.yml + vars: + svc: "{{ services[service] }}" + postgres_ip: "{{ hostvars['postgres-01']['nebula_ip'] }}" + valkey_ip: "{{ hostvars['valkey-01']['nebula_ip'] }}" + garage_ip: "{{ hostvars['garage-01']['nebula_ip'] }}" + tasks: + - name: Service provisioning complete + debug: + msg: + - "==========================================" + - "Service: {{ service }}" + - "Description: {{ svc.description }}" + - "==========================================" + - "" + - "PostgreSQL:" + - " Database: {{ service }} @ {{ postgres_ip }}:5432" + - " App credentials: vault read database/creds/{{ service }}-app" + - " Migrate credentials: vault read database/creds/{{ service }}-migrate" + - "" + - "Valkey:" + - " Host: {{ valkey_ip }}:6379" + - " User: {{ service }}" + - " Key prefix: {{ svc.valkey.key_prefix | default(service) }}:*" + - " Credentials: vault kv get secret/{{ service }}/valkey" + - "" + - "S3:" + - " Bucket: {{ svc.s3.bucket | default(service + '-media') }} @ http://{{ garage_ip }}:3900" + - " Credentials: vault kv get secret/{{ service }}/s3" + - "" + - "==========================================" diff --git a/ansible/playbooks/dns-client.yml b/ansible/playbooks/dns-client.yml new file mode 100644 index 0000000..bcad860 --- /dev/null +++ b/ansible/playbooks/dns-client.yml @@ -0,0 +1,35 @@ +--- +# DNS Client Configuration Playbook +# +# Usage: ansible-playbook -i inventory.ini playbooks/dns-client.yml +# +# Configures all VMs to use the internal Unbound DNS server. +# Run AFTER dns.yml has configured the server. + +- name: Configure DNS Clients + hosts: all + become: true + vars: + dns_server: "{{ hostvars['dns']['nebula_ip'] }}" + tasks: + - name: Configure resolv.conf to use internal DNS + copy: + dest: /etc/resolv.conf + content: | + # Managed by Ansible - changes will be overwritten + # Internal DNS server on Nebula overlay + nameserver {{ dns_server }} + # Fallback to public DNS if internal is unreachable + nameserver 1.1.1.1 + nameserver 8.8.8.8 + # Search domain for short hostnames + search nebula + owner: root + group: root + mode: '0644' + + - name: Test DNS resolution + command: getent hosts lighthouse.nebula + register: dns_test + changed_when: false + failed_when: dns_test.rc != 0 diff --git a/ansible/playbooks/dns.yml b/ansible/playbooks/dns.yml new file mode 100644 index 0000000..756c0c1 --- /dev/null +++ b/ansible/playbooks/dns.yml @@ -0,0 +1,77 @@ +--- +# Unbound DNS Server Playbook +# +# Usage: ansible-playbook -i inventory.ini playbooks/dns.yml +# +# Configures Unbound as a recursive resolver with local DNS records +# for the Nebula overlay network. + +- name: Setup Unbound DNS Server + hosts: dns + become: true + tasks: + - name: Install unbound and bind-tools + community.general.pacman: + name: + - unbound + - bind # provides dig for verification + state: present + + - name: Create unbound config directory + file: + path: /etc/unbound + state: directory + owner: root + group: root + mode: '0755' + + - name: Deploy main unbound configuration + template: + src: ../templates/unbound.conf.j2 + dest: /etc/unbound/unbound.conf + owner: root + group: root + mode: '0644' + notify: Restart unbound + + - name: Deploy local zones configuration + template: + src: ../templates/unbound-local-zones.conf.j2 + dest: /etc/unbound/local-zones.conf + owner: root + group: root + mode: '0644' + notify: Restart unbound + + - name: Deploy unbound systemd service + template: + src: ../templates/unbound.service.j2 + dest: /etc/systemd/system/unbound.service + owner: root + group: root + mode: '0644' + notify: + - Reload systemd + - Restart unbound + + - name: Enable and start unbound + systemd: + name: unbound + state: started + enabled: true + + - name: Verify unbound is responding + command: dig @127.0.0.1 dns.nebula +short + register: dns_test + changed_when: false + failed_when: dns_test.stdout != hostvars['dns']['nebula_ip'] + + handlers: + - name: Reload systemd + systemd: + daemon_reload: true + + - name: Restart unbound + systemd: + name: unbound + state: restarted diff --git a/ansible/playbooks/docker.yml b/ansible/playbooks/docker.yml new file mode 100644 index 0000000..8b4030a --- /dev/null +++ b/ansible/playbooks/docker.yml @@ -0,0 +1,50 @@ +--- +# Docker playbook for VMs that need containerization +# +# Usage: ansible-playbook -i inventory.ini playbooks/docker.yml --limit "docker" + +- name: Install and Configure Docker + hosts: docker + become: true + tasks: + - name: Install Docker and Docker Compose + community.general.pacman: + name: + - docker + - docker-compose + - docker-buildx + state: present + + - name: Create Docker daemon configuration directory + file: + path: /etc/docker + state: directory + mode: '0755' + + - name: Configure Docker daemon with DNS + copy: + content: | + { + "dns": ["1.1.1.1", "8.8.8.8"] + } + dest: /etc/docker/daemon.json + mode: '0644' + notify: Restart Docker + + - name: Enable and start Docker service + systemd: + name: docker + enabled: true + state: started + + - name: Add user to docker group + user: + name: "{{ ansible_user }}" + groups: docker + append: true + + handlers: + - name: Restart Docker + systemd: + name: docker + state: restarted diff --git a/ansible/playbooks/garage.yml b/ansible/playbooks/garage.yml new file mode 100644 index 0000000..b8f6b4c --- /dev/null +++ b/ansible/playbooks/garage.yml @@ -0,0 +1,187 @@ +--- +# Garage S3 Cluster Setup (3 nodes, replication factor 3) +# +# Usage: +# # Full deployment: +# ansible-playbook -i inventory.ini playbooks/garage.yml +# +# # Just install/configure (no layout): +# ansible-playbook -i inventory.ini playbooks/garage.yml --tags install +# +# # Just configure layout (after install): +# ansible-playbook -i inventory.ini playbooks/garage.yml --tags layout + +- name: Install and Configure Garage on All Nodes + hosts: garage + become: true + tags: [install] + vars_files: + - ../vault/secrets.yml + tasks: + - name: Download Garage binary + get_url: + url: "https://garagehq.deuxfleurs.fr/_releases/v1.0.1/x86_64-unknown-linux-musl/garage" + dest: /usr/local/bin/garage + mode: '0755' + + - name: Create garage user + user: + name: garage + system: true + shell: /sbin/nologin + home: /var/lib/garage + create_home: false + + - name: Create garage directories + file: + path: "{{ item }}" + state: directory + owner: garage + group: garage + mode: '0750' + loop: + - /var/lib/garage + - /var/lib/garage/meta + - /var/lib/garage/data + - /etc/garage + + - name: Deploy garage configuration + template: + src: ../templates/garage.toml.j2 + dest: /etc/garage/garage.toml + owner: garage + group: garage + mode: '0600' + notify: restart garage + + - name: Deploy garage systemd service + copy: + dest: /etc/systemd/system/garage.service + content: | + [Unit] + Description=Garage S3-compatible object storage + Documentation=https://garagehq.deuxfleurs.fr/ + After=network.target nebula.service + Wants=network-online.target + + [Service] + Type=simple + User=garage + Group=garage + ExecStart=/usr/local/bin/garage -c /etc/garage/garage.toml server + Restart=always + RestartSec=5 + + [Install] + WantedBy=multi-user.target + mode: '0644' + notify: + - reload systemd + - restart garage + + - name: Flush handlers to apply config before starting + meta: flush_handlers + + - name: Start and enable garage + systemd: + name: garage + state: started + enabled: true + daemon_reload: true + + - name: Wait for Garage RPC to be ready + wait_for: + host: "{{ nebula_ip }}" + port: 3901 + timeout: 30 + + - name: Get node ID + command: garage -c /etc/garage/garage.toml node id -q + register: node_id + changed_when: false + + - name: Display node ID + debug: + msg: "Node {{ inventory_hostname }}: {{ node_id.stdout }}" + + handlers: + - name: reload systemd + systemd: + daemon_reload: true + + - name: restart garage + systemd: + name: garage + state: restarted + +- name: Configure Garage Cluster Layout + hosts: garage-01 + become: true + tags: [layout] + vars_files: + - ../vault/secrets.yml + tasks: + - name: Wait for all nodes to connect + pause: + seconds: 10 + + - name: Check cluster status + command: garage -c /etc/garage/garage.toml status + register: cluster_status + changed_when: false + + - name: Display cluster status + debug: + msg: "{{ cluster_status.stdout_lines }}" + + - name: Get current layout + command: garage -c /etc/garage/garage.toml layout show + register: layout_show + changed_when: false + + - name: Check if layout needs configuration + set_fact: + layout_needs_config: "{{ 'no role' in layout_show.stdout }}" + + - name: Get node IDs for layout + command: garage -c /etc/garage/garage.toml status + register: status_output + changed_when: false + when: layout_needs_config + + - name: Parse node IDs + set_fact: + node_ids: "{{ status_output.stdout | regex_findall('([a-f0-9]{16})\\s+' + item + '\\s') }}" + loop: + - "{{ hostvars['garage-01']['nebula_ip'] }}" + - "{{ hostvars['garage-02']['nebula_ip'] }}" + - "{{ hostvars['garage-03']['nebula_ip'] }}" + register: parsed_nodes + when: layout_needs_config + + - name: Assign layout to nodes + command: > + garage -c /etc/garage/garage.toml layout assign + -z dc1 -c 200GB -t {{ item.item | regex_replace('10\\.10\\.10\\.(\\d+)', 'garage-\\1') | regex_replace('garage-39', 'garage-01') | regex_replace('garage-40', 'garage-02') | regex_replace('garage-41', 'garage-03') }} + {{ item.ansible_facts.node_ids[0] }} + loop: "{{ parsed_nodes.results }}" + when: layout_needs_config and item.ansible_facts.node_ids is defined and item.ansible_facts.node_ids | length > 0 + + - name: Apply layout + command: garage -c /etc/garage/garage.toml layout apply --version 1 + when: layout_needs_config + register: layout_apply + + - name: Display layout result + debug: + var: layout_apply.stdout_lines + when: layout_apply is changed + + - name: Show final layout + command: garage -c /etc/garage/garage.toml layout show + register: final_layout + changed_when: false + + - name: Display final layout + debug: + msg: "{{ final_layout.stdout_lines }}" diff --git a/ansible/playbooks/nebula.yml b/ansible/playbooks/nebula.yml new file mode 100644 index 0000000..43f6be9 --- /dev/null +++ b/ansible/playbooks/nebula.yml @@ -0,0 +1,43 @@ +--- +- name: Configure Nebula Overlay Network + hosts: all + become: true + tasks: + - name: Install the Nebula network overlay + community.general.pacman: + name: nebula + state: present + + - name: Make sure configuration directories exists + file: + path: /etc/nebula + state: directory + mode: '0755' + + - name: Copy over the Nebula CA certificate + copy: + src: ../../nebula/ca.crt + dest: /etc/nebula/ca.crt + mode: '0644' + + - name: Copy over certificates and keys for the nodes + copy: + src: "../../nebula/configs/{{ vmid }}/{{ inventory_hostname }}/{{ inventory_hostname }}.{{ item }}" + dest: "/etc/nebula/config.{{ item }}" + mode: '0600' + loop: + - crt + - key + + - name: Create new node configurations + template: + src: ../templates/nebula-config.yml.j2 + dest: /etc/nebula/config.yml + notify: restart nebula + + handlers: + - name: restart nebula + systemd: + name: nebula + state: restarted + enabled: true diff --git a/ansible/playbooks/postgres-ha.yml b/ansible/playbooks/postgres-ha.yml new file mode 100644 index 0000000..1bf4986 --- /dev/null +++ b/ansible/playbooks/postgres-ha.yml @@ -0,0 +1,277 @@ +--- +# PostgreSQL High Availability with Patroni + etcd +# Run on postgres group hosts +# +# Usage: +# # Initialize first node (with existing data): +# ansible-playbook -i inventory.ini playbooks/postgres-ha.yml --limit postgres-01 -e "patroni_bootstrap=true" +# +# # Join additional nodes: +# ansible-playbook -i inventory.ini playbooks/postgres-ha.yml --limit postgres-02 +# +# # All nodes at once (after bootstrap): +# ansible-playbook -i inventory.ini playbooks/postgres-ha.yml --limit postgres + +- name: Configure PostgreSQL HA with Patroni + etcd + hosts: postgres + become: true + vars: + patroni_superuser_password: "{{ lookup('env', 'PATRONI_SUPERUSER_PASSWORD') | default('changeme', true) }}" + patroni_replicator_password: "{{ lookup('env', 'PATRONI_REPLICATOR_PASSWORD') | default('changeme', true) }}" + patroni_bootstrap: false + etcd_version: "3.5.17" + + tasks: + # ============================================ + # ETCD SETUP + # ============================================ + - name: Check if etcd is installed + stat: + path: /usr/local/bin/etcd + register: etcd_binary + + - name: Download etcd + get_url: + url: "https://github.com/etcd-io/etcd/releases/download/v{{ etcd_version }}/etcd-v{{ etcd_version }}-linux-amd64.tar.gz" + dest: /tmp/etcd.tar.gz + mode: '0644' + when: not etcd_binary.stat.exists + + - name: Extract etcd + unarchive: + src: /tmp/etcd.tar.gz + dest: /tmp + remote_src: true + when: not etcd_binary.stat.exists + + - name: Install etcd binaries + copy: + src: "/tmp/etcd-v{{ etcd_version }}-linux-amd64/{{ item }}" + dest: "/usr/local/bin/{{ item }}" + mode: '0755' + remote_src: true + loop: + - etcd + - etcdctl + - etcdutl + when: not etcd_binary.stat.exists + + - name: Create symlinks for etcd binaries + file: + src: "/usr/local/bin/{{ item }}" + dest: "/usr/bin/{{ item }}" + state: link + loop: + - etcd + - etcdctl + - etcdutl + + - name: Create etcd user + user: + name: etcd + system: true + shell: /sbin/nologin + home: /var/lib/etcd + create_home: true + + - name: Create etcd config directory + file: + path: /etc/etcd + state: directory + mode: '0755' + + - name: Create etcd data directory + file: + path: /var/lib/etcd + state: directory + owner: etcd + group: etcd + mode: '0700' + + - name: Deploy etcd configuration + template: + src: ../templates/etcd.conf.j2 + dest: /etc/etcd/etcd.conf + mode: '0644' + notify: restart etcd + + - name: Deploy etcd systemd service + template: + src: ../templates/etcd.service.j2 + dest: /etc/systemd/system/etcd.service + mode: '0644' + notify: + - reload systemd + - restart etcd + + - name: Enable and start etcd + systemd: + name: etcd + state: started + enabled: true + daemon_reload: true + + - name: Wait for etcd to be healthy + command: etcdctl endpoint health --endpoints=http://127.0.0.1:2379 + register: etcd_health + until: etcd_health.rc == 0 + retries: 30 + delay: 2 + changed_when: false + + # ============================================ + # POSTGRESQL SETUP + # ============================================ + - name: Install PostgreSQL + community.general.pacman: + name: postgresql + state: present + + # ============================================ + # PATRONI SETUP + # ============================================ + - name: Install Patroni dependencies + community.general.pacman: + name: + - python + - python-pip + - python-psycopg2 + - python-yaml + - python-urllib3 + - python-certifi + - python-virtualenv + state: present + + - name: Create Patroni virtual environment + command: python -m venv /opt/patroni + args: + creates: /opt/patroni/bin/python + + - name: Install Patroni in virtual environment + pip: + name: + - patroni[etcd3] + - psycopg2-binary + state: present + virtualenv: /opt/patroni + + - name: Create PostgreSQL run directory + file: + path: /run/postgresql + state: directory + owner: postgres + group: postgres + mode: '0755' + + - name: Create tmpfiles config for postgresql run directory + copy: + content: "d /run/postgresql 0755 postgres postgres -" + dest: /etc/tmpfiles.d/postgresql.conf + mode: '0644' + + - name: Create patroni symlink + file: + src: /opt/patroni/bin/patroni + dest: /usr/local/bin/patroni + state: link + + - name: Create patroni config directory + file: + path: /etc/patroni + state: directory + mode: '0755' + + - name: Stop PostgreSQL service (Patroni will manage it) + systemd: + name: postgresql + state: stopped + enabled: false + ignore_errors: true + + # For bootstrap node with existing data + - name: Prepare existing data directory for Patroni takeover + block: + - name: Ensure postgres owns data directory + file: + path: /var/lib/postgres/data + owner: postgres + group: postgres + recurse: true + + - name: Create replicator role + become_user: postgres + command: > + psql -c "DO $$ + BEGIN + IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'replicator') THEN + CREATE ROLE replicator WITH REPLICATION LOGIN PASSWORD '{{ patroni_replicator_password }}'; + END IF; + END $$;" + when: patroni_bootstrap | bool + ignore_errors: true + + - name: Set postgres superuser password + become_user: postgres + command: psql -c "ALTER USER postgres WITH PASSWORD '{{ patroni_superuser_password }}';" + when: patroni_bootstrap | bool + ignore_errors: true + when: patroni_bootstrap | bool + + - name: Deploy Patroni configuration + template: + src: ../templates/patroni.yml.j2 + dest: /etc/patroni/patroni.yml + owner: postgres + group: postgres + mode: '0600' + notify: restart patroni + + - name: Create .pgpass file for postgres user + copy: + content: | + *:*:*:postgres:{{ patroni_superuser_password }} + *:*:*:replicator:{{ patroni_replicator_password }} + dest: /var/lib/postgres/.pgpass + owner: postgres + group: postgres + mode: '0600' + + - name: Deploy Patroni systemd service + template: + src: ../templates/patroni.service.j2 + dest: /etc/systemd/system/patroni.service + mode: '0644' + notify: + - reload systemd + - restart patroni + + - name: Enable and start Patroni + systemd: + name: patroni + state: started + enabled: true + daemon_reload: true + + - name: Wait for Patroni to be healthy + uri: + url: "http://{{ nebula_ip }}:8008/health" + status_code: 200 + register: patroni_health + until: patroni_health.status == 200 + retries: 30 + delay: 5 + + handlers: + - name: reload systemd + systemd: + daemon_reload: true + + - name: restart etcd + systemd: + name: etcd + state: restarted + + - name: restart patroni + systemd: + name: patroni + state: restarted diff --git a/ansible/playbooks/security.yml b/ansible/playbooks/security.yml new file mode 100644 index 0000000..2b754d7 --- /dev/null +++ b/ansible/playbooks/security.yml @@ -0,0 +1,103 @@ +--- +# Security playbook: iptables + fail2ban for all VMs +# +# Run: ansible-playbook -i inventory.ini playbooks/security.yml +# +# This playbook uses direct iptables rules instead of ufw to ensure +# bridge network traffic is properly blocked (ufw's before.rules allows +# ICMP before custom deny rules can take effect). + +- name: Configure Security for All VMs + hosts: all + become: true + tasks: + # Load netfilter kernel modules (required on fresh VMs) + - name: Load netfilter kernel modules + community.general.modprobe: + name: "{{ item }}" + state: present + loop: + - ip_tables + - ip6_tables + - iptable_filter + - ip6table_filter + + # Install security packages + - name: Install iptables and fail2ban + community.general.pacman: + name: + - iptables + - fail2ban + state: present + + # Stop and disable ufw if present (migrating to iptables) + - name: Check if ufw is installed + command: pacman -Q ufw + register: ufw_check + ignore_errors: true + changed_when: false + + - name: Stop ufw if running + systemd: + name: ufw + state: stopped + when: ufw_check.rc == 0 + ignore_errors: true + + - name: Disable ufw + systemd: + name: ufw + enabled: false + when: ufw_check.rc == 0 + ignore_errors: true + + # Deploy iptables rules + - name: Deploy iptables rules + template: + src: ../templates/iptables.rules.j2 + dest: /etc/iptables/iptables.rules + mode: '0644' + notify: reload iptables + + # Enable and start iptables service + - name: Enable and start iptables + systemd: + name: iptables + state: started + enabled: true + + # Configure fail2ban + - name: Create fail2ban local config + copy: + dest: /etc/fail2ban/jail.local + content: | + [DEFAULT] + bantime = 1h + findtime = 10m + maxretry = 5 + + [sshd] + enabled = true + port = ssh + filter = sshd + backend = systemd + mode: '0644' + notify: restart fail2ban + + # Enable fail2ban service + - name: Enable and start fail2ban + systemd: + name: fail2ban + state: started + enabled: true + + handlers: + - name: reload iptables + systemd: + name: iptables + state: restarted + + - name: restart fail2ban + systemd: + name: fail2ban + state: restarted diff --git a/ansible/playbooks/valkey-sentinel.yml b/ansible/playbooks/valkey-sentinel.yml new file mode 100644 index 0000000..b9ab150 --- /dev/null +++ b/ansible/playbooks/valkey-sentinel.yml @@ -0,0 +1,155 @@ +--- +# Valkey Sentinel Setup (1 master + 2 replicas + Sentinel on each) +# +# Provides automatic failover without requiring cluster-aware clients. +# Apps connect directly to master or use Sentinel-aware clients. +# +# Usage: +# ansible-playbook -i inventory.ini playbooks/valkey-sentinel.yml + +- name: Configure Valkey with Sentinel + hosts: valkey + become: true + vars_files: + - ../vault/secrets.yml + vars: + valkey_maxmemory: "256mb" + valkey_maxmemory_policy: "allkeys-lru" + valkey_role: "{{ 'master' if inventory_hostname == 'valkey-01' else 'replica' }}" + tasks: + - name: Stop valkey service + systemd: + name: valkey + state: stopped + ignore_errors: true + + - name: Remove cluster data files + file: + path: "{{ item }}" + state: absent + loop: + - /var/lib/valkey/nodes.conf + - /var/lib/valkey/dump.rdb + + - name: Deploy standalone Valkey configuration + template: + src: ../templates/valkey-standalone.conf.j2 + dest: /etc/valkey/valkey.conf + owner: valkey + group: valkey + mode: '0640' + + - name: Deploy ACL file + template: + src: ../templates/valkey-acl.j2 + dest: /etc/valkey/users.acl + owner: valkey + group: valkey + mode: '0600' + + - name: Create Sentinel data directory + file: + path: /var/lib/valkey/sentinel + state: directory + owner: valkey + group: valkey + mode: '0750' + + - name: Deploy Sentinel configuration + template: + src: ../templates/valkey-sentinel.conf.j2 + dest: /etc/valkey/sentinel.conf + owner: valkey + group: valkey + mode: '0640' + + - name: Deploy Sentinel systemd service + copy: + dest: /etc/systemd/system/valkey-sentinel.service + content: | + [Unit] + Description=Valkey Sentinel + Documentation=https://valkey.io/ + After=network.target valkey.service nebula.service + Wants=network-online.target + + [Service] + Type=simple + User=valkey + Group=valkey + ExecStart=/usr/bin/valkey-sentinel /etc/valkey/sentinel.conf + Restart=always + RestartSec=5 + + [Install] + WantedBy=multi-user.target + mode: '0644' + + - name: Reload systemd + systemd: + daemon_reload: true + + - name: Start Valkey service + systemd: + name: valkey + state: started + enabled: true + + - name: Wait for Valkey to be ready + wait_for: + host: "{{ nebula_ip }}" + port: 6379 + timeout: 30 + +- name: Start Sentinel on all nodes + hosts: valkey + become: true + serial: 1 + tasks: + - name: Wait for master to be ready (replicas only) + wait_for: + host: "{{ hostvars['valkey-01']['nebula_ip'] }}" + port: 6379 + timeout: 30 + when: inventory_hostname != 'valkey-01' + + - name: Start Sentinel service + systemd: + name: valkey-sentinel + state: started + enabled: true + + - name: Wait for Sentinel to be ready + wait_for: + host: "{{ nebula_ip }}" + port: 26379 + timeout: 30 + +- name: Verify Sentinel Setup + hosts: valkey-01 + become: true + vars_files: + - ../vault/secrets.yml + tasks: + - name: Check replication status + command: > + valkey-cli -h {{ nebula_ip }} -p 6379 + --user admin --pass {{ valkey_admin_password }} + info replication + register: replication_info + changed_when: false + + - name: Display replication status + debug: + msg: "{{ replication_info.stdout_lines }}" + + - name: Check Sentinel status + command: > + valkey-cli -h {{ nebula_ip }} -p 26379 + sentinel master valkey-ha + register: sentinel_info + changed_when: false + + - name: Display Sentinel status + debug: + msg: "{{ sentinel_info.stdout_lines }}" diff --git a/ansible/playbooks/valkey.yml b/ansible/playbooks/valkey.yml new file mode 100644 index 0000000..ab7405e --- /dev/null +++ b/ansible/playbooks/valkey.yml @@ -0,0 +1,80 @@ +--- +# Valkey (Redis fork) Primary Setup +# +# Usage: ansible-playbook -i inventory.ini playbooks/valkey.yml +# +# Creates: +# - Valkey server on valkey-primary +# - Configured for Nebula network access +# - 16 databases (0-15) for multi-tenant use + +- name: Setup Valkey Primary + hosts: valkey-primary + become: true + vars_files: + - ../vault/secrets.yml + vars: + valkey_maxmemory: "256mb" + valkey_maxmemory_policy: "allkeys-lru" + tasks: + - name: Install valkey + community.general.pacman: + name: valkey + state: present + + - name: Create systemd override directory + file: + path: /etc/systemd/system/valkey.service.d + state: directory + mode: '0755' + + - name: Add systemd override for ACL write access + copy: + dest: /etc/systemd/system/valkey.service.d/override.conf + content: | + [Service] + ReadWritePaths=/etc/valkey + mode: '0644' + notify: + - reload systemd + - restart valkey + + - name: Deploy ACL file + template: + src: ../templates/valkey-acl.j2 + dest: /etc/valkey/users.acl + owner: valkey + group: valkey + mode: '0600' + notify: restart valkey + + - name: Deploy valkey.conf + template: + src: ../templates/valkey.conf.j2 + dest: /etc/valkey/valkey.conf + owner: valkey + group: valkey + mode: '0640' + notify: restart valkey + + - name: Start and enable valkey + systemd: + name: valkey + state: started + enabled: true + + - name: Wait for Valkey to be ready + wait_for: + host: "{{ nebula_ip }}" + port: 6379 + timeout: 30 + + handlers: + - name: reload systemd + systemd: + daemon_reload: true + + - name: restart valkey + systemd: + name: valkey + state: restarted diff --git a/ansible/services.yml.example b/ansible/services.yml.example new file mode 100644 index 0000000..4a45f1a --- /dev/null +++ b/ansible/services.yml.example @@ -0,0 +1,57 @@ +# Service Manifest +# +# Defines applications, their git repos, data services, and deployment targets. +# +# Usage: +# ansible-playbook playbooks/data-service.yml -e "service=myapp" + +git_base_url: "git@git.infra.example:org" + +services: + myapp: + description: "Example web application" + host: app-server + deploy_path: /opt/myapp + repos: + - name: myapp + dest: myapp + version: main + postgres: + enabled: true + # restore_from: "databases/dumps/myapp.dump" # Optional: restore from backup + valkey: + enabled: true + key_prefix: "myapp" # Access to myapp:* keys only + s3: + enabled: true + bucket: "myapp-media" + vault_roles: + - app # 1h TTL, DML only (SELECT, INSERT, UPDATE, DELETE) + - migrate # 15m TTL, DDL+DML (for migrations) + + another-service: + description: "Another example service" + host: another-server + deploy_path: /opt/another + repos: + - name: another + dest: another + version: main + postgres: + enabled: true + valkey: + enabled: true + key_prefix: "another" + vault_roles: + - app + - migrate + +# Valkey key prefix allocation: +# All services use database /0 with key prefixes for namespace isolation. +# Each service gets an ACL user that can only access {service}:* keys. +# Credentials are provisioned by data-service.yml and stored in Vault. + +# S3 bucket allocation: +# Each service gets its own bucket (default: {service}-media) with a dedicated API key. +# Buckets are created on the Garage cluster with read/write permissions. +# Credentials are provisioned by data-service.yml and stored in Vault at secret/{service}/s3. diff --git a/ansible/templates/etcd.conf.j2 b/ansible/templates/etcd.conf.j2 new file mode 100644 index 0000000..1f19d4e --- /dev/null +++ b/ansible/templates/etcd.conf.j2 @@ -0,0 +1,21 @@ +# etcd configuration for Patroni cluster +# Node: {{ inventory_hostname }} + +name: '{{ inventory_hostname }}' +data-dir: /var/lib/etcd + +# Cluster communication +initial-advertise-peer-urls: http://{{ nebula_ip }}:2380 +listen-peer-urls: http://{{ nebula_ip }}:2380 +listen-client-urls: http://{{ nebula_ip }}:2379,http://127.0.0.1:2379 +advertise-client-urls: http://{{ nebula_ip }}:2379 + +# Cluster bootstrap +initial-cluster-token: 'patroni-etcd-cluster' +initial-cluster: {% for host in groups['postgres'] %}{{ host }}=http://{{ hostvars[host]['nebula_ip'] }}:2380{% if not loop.last %},{% endif %}{% endfor %} + +initial-cluster-state: 'new' + +# Performance tuning +heartbeat-interval: 1000 +election-timeout: 5000 diff --git a/ansible/templates/etcd.service.j2 b/ansible/templates/etcd.service.j2 new file mode 100644 index 0000000..5bf5606 --- /dev/null +++ b/ansible/templates/etcd.service.j2 @@ -0,0 +1,16 @@ +[Unit] +Description=etcd distributed key-value store +Documentation=https://etcd.io/docs/ +After=network.target nebula.service +Wants=network-online.target + +[Service] +Type=notify +User=etcd +ExecStart=/usr/local/bin/etcd --config-file=/etc/etcd/etcd.conf +Restart=always +RestartSec=5 +LimitNOFILE=65536 + +[Install] +WantedBy=multi-user.target diff --git a/ansible/templates/garage.toml.j2 b/ansible/templates/garage.toml.j2 new file mode 100644 index 0000000..01bad44 --- /dev/null +++ b/ansible/templates/garage.toml.j2 @@ -0,0 +1,48 @@ +# Garage S3 Configuration +# Generated by Ansible - do not edit manually + +# Metadata directory (LMDB database) +metadata_dir = "/var/lib/garage/meta" + +# Data directory (actual S3 objects) +data_dir = "/var/lib/garage/data" + +# Database engine (LMDB recommended for clusters with replication) +db_engine = "lmdb" + +# Replication factor (3 = all nodes have all data) +replication_factor = 3 + +# Compression level (1-19, higher = more CPU, smaller files) +compression_level = 2 + +# Automatic metadata snapshots (protects against LMDB corruption) +metadata_auto_snapshot_interval = "6h" + +# RPC configuration (inter-node communication) +rpc_bind_addr = "{{ nebula_ip }}:3901" +rpc_public_addr = "{{ nebula_ip }}:3901" +rpc_secret = "{{ garage_rpc_secret }}" + +# Bootstrap peers (connect to other nodes) +bootstrap_peers = [ +{% for host in groups['garage'] if host != inventory_hostname %} + "{{ hostvars[host]['nebula_ip'] }}:3901", +{% endfor %} +] + +# S3 API endpoint +[s3_api] +api_bind_addr = "{{ nebula_ip }}:3900" +s3_region = "garage" +root_domain = ".s3.garage.nebula" + +# S3 Web endpoint (for static website hosting) +[s3_web] +bind_addr = "{{ nebula_ip }}:3902" +root_domain = ".web.garage.nebula" + +# Admin API (for bucket management) +[admin] +api_bind_addr = "{{ nebula_ip }}:3903" +admin_token = "{{ garage_admin_token }}" diff --git a/ansible/templates/iptables.rules.j2 b/ansible/templates/iptables.rules.j2 new file mode 100644 index 0000000..7131b86 --- /dev/null +++ b/ansible/templates/iptables.rules.j2 @@ -0,0 +1,71 @@ +*filter +:INPUT DROP [0:0] +:FORWARD DROP [0:0] +:OUTPUT ACCEPT [0:0] + +# Allow established and related connections +-A INPUT -m state --state ESTABLISHED,RELATED -j ACCEPT + +# Allow loopback +-A INPUT -i lo -j ACCEPT + +# Allow all traffic on Nebula interface (encrypted overlay) +-A INPUT -i nebula1 -j ACCEPT + +# ============================================================ +# Bridge network rules (192.168.100.0/24) +# Only allow Proxmox host - block all other VMs on the bridge +# ============================================================ + +# Allow Proxmox host for management/Ansible +-A INPUT -s {{ proxmox_host_ip }} -j ACCEPT + +# Allow Nebula UDP from lighthouse (required for overlay connectivity) +-A INPUT -s {{ lighthouse_bridge_ip }} -p udp --dport 4242 -j ACCEPT + +# DROP everything else from bridge network (force Nebula for inter-VM) +-A INPUT -s {{ bridge_network }} -j DROP + +# ============================================================ +# Caddy-proxied ports (Nebula only - reverse proxy traffic) +# These ports are NOT public; only Caddy can reach them +# ============================================================ + +{% if caddy_proxied_ports_tcp is defined %} +# Web services proxied through Caddy (Nebula only) +{% for port in caddy_proxied_ports_tcp %} +-A INPUT -s {{ caddy_nebula_ip }} -p tcp --dport {{ port }} -j ACCEPT +{% endfor %} +{% endif %} + +{% if caddy_proxied_ports_udp is defined %} +# UDP services proxied through Caddy (Nebula only) +{% for port in caddy_proxied_ports_udp %} +-A INPUT -s {{ caddy_nebula_ip }} -p udp --dport {{ port }} -j ACCEPT +{% endfor %} +{% endif %} + +# ============================================================ +# Public-facing ports (for DNAT'd traffic from internet) +# ============================================================ + +{% if game_ports_tcp is defined %} +# Game server TCP ports (internet -> Proxmox DNAT -> VM) +{% for port in game_ports_tcp %} +-A INPUT -p tcp --dport {{ port }} -j ACCEPT +{% endfor %} +{% endif %} + +{% if game_ports_udp is defined %} +# Game server UDP ports (internet -> Proxmox DNAT -> VM) +{% for port in game_ports_udp %} +-A INPUT -p udp --dport {{ port }} -j ACCEPT +{% endfor %} +{% endif %} + +# ============================================================ +# Default deny - drop everything not explicitly allowed +# ============================================================ +-A INPUT -j DROP + +COMMIT diff --git a/ansible/templates/nebula-config.yml.j2 b/ansible/templates/nebula-config.yml.j2 new file mode 100644 index 0000000..0bfe52c --- /dev/null +++ b/ansible/templates/nebula-config.yml.j2 @@ -0,0 +1,67 @@ +pki: + ca: /etc/nebula/ca.crt + cert: /etc/nebula/config.crt + key: /etc/nebula/config.key + +static_host_map: + # Primary lighthouse (InterServer datacenter) + "{{ lighthouse_nebula_ip }}": ["{{ lighthouse_bridge_ip }}:4242", "{{ lighthouse_public_ip }}:4242"] + # Home lighthouse (Dell) for reduced latency from home + "{{ home_lighthouse_nebula_ip }}": ["{{ home_lighthouse_public_ip }}:4242"] + +lighthouse: + am_lighthouse: false + interval: 60 + hosts: + - "{{ lighthouse_nebula_ip }}" + - "{{ home_lighthouse_nebula_ip }}" + +punchy: + punch: true + respond: true + +relay: + relays: + - "{{ lighthouse_nebula_ip }}" + - "{{ home_lighthouse_nebula_ip }}" + +listen: + host: 0.0.0.0 + port: 0 + +tun: + dev: nebula1 + drop_local_broadcast: true + drop_multicast: true + +firewall: + conntrack: + tcp_timeout: 12h + udp_timeout: 3m + default_timeout: 10m + + outbound: + - port: any + proto: any + host: any + + inbound: + # Admin (laptop) has full access + - port: any + proto: any + group: admin + + # Infrastructure can talk to each other + - port: any + proto: any + group: infrastructure + + # Projects can access infrastructure services + - port: any + proto: any + group: projects + + # Allow ICMP from anyone (ping) + - port: any + proto: icmp + host: any \ No newline at end of file diff --git a/ansible/templates/patroni.service.j2 b/ansible/templates/patroni.service.j2 new file mode 100644 index 0000000..02e9131 --- /dev/null +++ b/ansible/templates/patroni.service.j2 @@ -0,0 +1,19 @@ +[Unit] +Description=Patroni PostgreSQL HA Manager +Documentation=https://patroni.readthedocs.io/ +After=network.target etcd.service nebula.service +Wants=network-online.target + +[Service] +Type=simple +User=postgres +Group=postgres +ExecStart=/opt/patroni/bin/patroni /etc/patroni/patroni.yml +ExecReload=/bin/kill -HUP $MAINPID +KillMode=process +Restart=always +RestartSec=5 +TimeoutSec=30 + +[Install] +WantedBy=multi-user.target diff --git a/ansible/templates/patroni.yml.j2 b/ansible/templates/patroni.yml.j2 new file mode 100644 index 0000000..c0308ba --- /dev/null +++ b/ansible/templates/patroni.yml.j2 @@ -0,0 +1,63 @@ +scope: postgres-cluster +name: {{ inventory_hostname }} + +restapi: + listen: {{ nebula_ip }}:8008 + connect_address: {{ nebula_ip }}:8008 + +etcd3: + hosts: {% for host in groups['postgres'] %}{{ hostvars[host]['nebula_ip'] }}:2379{% if not loop.last %},{% endif %}{% endfor %} + + +bootstrap: + dcs: + ttl: 30 + loop_wait: 10 + retry_timeout: 10 + maximum_lag_on_failover: 1048576 + postgresql: + use_pg_rewind: true + use_slots: true + parameters: + wal_level: replica + hot_standby: "on" + max_connections: 200 + max_worker_processes: 8 + max_wal_senders: 10 + max_replication_slots: 10 + hot_standby_feedback: "on" + wal_log_hints: "on" + listen_addresses: '*' + + initdb: + - encoding: UTF8 + - data-checksums + - locale: C + + pg_hba: + - host replication replicator 10.10.10.0/24 md5 + - host all all 10.10.10.0/24 md5 + - host all all 127.0.0.1/32 md5 + - local all all trust + +postgresql: + listen: {{ nebula_ip }}:5432 + connect_address: {{ nebula_ip }}:5432 + data_dir: /var/lib/postgres/data + bin_dir: /usr/bin + pgpass: /var/lib/postgres/.pgpass + authentication: + replication: + username: replicator + password: {{ patroni_replicator_password }} + superuser: + username: postgres + password: {{ patroni_superuser_password }} + parameters: + unix_socket_directories: '/run/postgresql' + +tags: + nofailover: false + noloadbalance: false + clonefrom: false + nosync: false diff --git a/ansible/templates/pg-static-roles.sql.j2 b/ansible/templates/pg-static-roles.sql.j2 new file mode 100644 index 0000000..186f4ae --- /dev/null +++ b/ansible/templates/pg-static-roles.sql.j2 @@ -0,0 +1,83 @@ +-- PostgreSQL Static Roles for {{ service }} +-- Generated by Ansible - run once per service +-- +-- Creates: +-- {{ service }}_owner - Owns database and all objects (NOLOGIN) +-- {{ service }}_app - DML permissions (SELECT, INSERT, UPDATE, DELETE) +-- {{ service }}_migrate - DDL+DML permissions (for migrations) +-- +-- Vault dynamic roles inherit from _app or _migrate + +-- ============================================================================= +-- 1. Create owner role (NOLOGIN, owns all objects) +-- ============================================================================= +CREATE ROLE {{ service }}_owner NOLOGIN; + +-- ============================================================================= +-- 2. Create database owned by the owner role +-- ============================================================================= +CREATE DATABASE {{ service }} OWNER {{ service }}_owner; + +-- ============================================================================= +-- 3. Connect to the new database for schema grants +-- ============================================================================= +\c {{ service }} + +-- ============================================================================= +-- 4. Create app role template (DML only - SELECT, INSERT, UPDATE, DELETE) +-- ============================================================================= +CREATE ROLE {{ service }}_app NOLOGIN NOINHERIT; + +-- Grant DML permissions +GRANT CONNECT ON DATABASE {{ service }} TO {{ service }}_app; +GRANT USAGE ON SCHEMA public TO {{ service }}_app; +GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO {{ service }}_app; +GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO {{ service }}_app; + +-- ============================================================================= +-- 5. Create migrate role template (DDL+DML - for migrations) +-- ============================================================================= +CREATE ROLE {{ service }}_migrate NOLOGIN NOINHERIT; + +-- Grant all privileges +GRANT CONNECT ON DATABASE {{ service }} TO {{ service }}_migrate; +GRANT ALL PRIVILEGES ON SCHEMA public TO {{ service }}_migrate; +GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO {{ service }}_migrate; +GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO {{ service }}_migrate; +GRANT ALL PRIVILEGES ON ALL FUNCTIONS IN SCHEMA public TO {{ service }}_migrate; + +-- ============================================================================= +-- 6. Set default privileges for future objects (CRITICAL) +-- Ensures new tables created during migrations are accessible +-- ============================================================================= + +-- Default privileges for objects created by _owner role +ALTER DEFAULT PRIVILEGES FOR ROLE {{ service }}_owner IN SCHEMA public + GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO {{ service }}_app; + +ALTER DEFAULT PRIVILEGES FOR ROLE {{ service }}_owner IN SCHEMA public + GRANT USAGE, SELECT ON SEQUENCES TO {{ service }}_app; + +ALTER DEFAULT PRIVILEGES FOR ROLE {{ service }}_owner IN SCHEMA public + GRANT ALL PRIVILEGES ON TABLES TO {{ service }}_migrate; + +ALTER DEFAULT PRIVILEGES FOR ROLE {{ service }}_owner IN SCHEMA public + GRANT ALL PRIVILEGES ON SEQUENCES TO {{ service }}_migrate; + +ALTER DEFAULT PRIVILEGES FOR ROLE {{ service }}_owner IN SCHEMA public + GRANT ALL PRIVILEGES ON FUNCTIONS TO {{ service }}_migrate; + +-- Default privileges for objects created by _migrate role +-- (migrations run as _migrate via SET ROLE, so tables are owned by _migrate) +ALTER DEFAULT PRIVILEGES FOR ROLE {{ service }}_migrate IN SCHEMA public + GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO {{ service }}_app; + +ALTER DEFAULT PRIVILEGES FOR ROLE {{ service }}_migrate IN SCHEMA public + GRANT USAGE, SELECT ON SEQUENCES TO {{ service }}_app; + +-- ============================================================================= +-- 7. Grant roles to vault_admin (WITH ADMIN OPTION for dynamic role creation) +-- ============================================================================= +GRANT {{ service }}_app TO vault_admin WITH ADMIN OPTION; +GRANT {{ service }}_migrate TO vault_admin WITH ADMIN OPTION; +GRANT {{ service }}_owner TO vault_admin; -- For REASSIGN OWNED during revocation diff --git a/ansible/templates/unbound-local-zones.conf.j2 b/ansible/templates/unbound-local-zones.conf.j2 new file mode 100644 index 0000000..daa56c4 --- /dev/null +++ b/ansible/templates/unbound-local-zones.conf.j2 @@ -0,0 +1,34 @@ +# Local zones for Nebula overlay network +# Generated by Ansible from inventory - do not edit manually + +# ============================================================ +# .nebula zone - All VMs +# ============================================================ +local-zone: "nebula." static + +# Lighthouse (not in inventory, uses variable) +local-data: "lighthouse.nebula. IN A {{ lighthouse_nebula_ip }}" +local-data-ptr: "{{ lighthouse_nebula_ip }} lighthouse.nebula" + +# Proxmox host (not in inventory) +local-data: "proxmox.nebula. IN A 10.10.10.1" +local-data-ptr: "10.10.10.1 proxmox.nebula" + +# All VMs from inventory +{% for host in groups['all'] %} +local-data: "{{ host }}.nebula. IN A {{ hostvars[host]['nebula_ip'] }}" +local-data-ptr: "{{ hostvars[host]['nebula_ip'] }} {{ host }}.nebula" +{% endfor %} + +# ============================================================ +# Custom domain aliases (optional) +# Add your own domain mappings here +# ============================================================ +# Example: +# local-zone: "myapp.infra.example." static +# local-data: "myapp.infra.example. IN A {{ hostvars['app-server']['nebula_ip'] }}" + +# ============================================================ +# Reverse DNS zone for 10.10.10.0/24 +# ============================================================ +local-zone: "10.10.10.in-addr.arpa." static diff --git a/ansible/templates/unbound.conf.j2 b/ansible/templates/unbound.conf.j2 new file mode 100644 index 0000000..6cc44cc --- /dev/null +++ b/ansible/templates/unbound.conf.j2 @@ -0,0 +1,55 @@ +# Unbound DNS configuration for Nebula overlay network +# Deployed by Ansible - do not edit manually + +server: + # Network settings - bind to Nebula interface only + interface: 127.0.0.1 + interface: {{ hostvars['dns']['nebula_ip'] }} + port: 53 + do-ip4: yes + do-ip6: no + do-udp: yes + do-tcp: yes + + # Access control - Nebula network only + access-control: 127.0.0.0/8 allow + access-control: 10.10.10.0/24 allow + access-control: 0.0.0.0/0 refuse + + # Performance tuning + num-threads: 2 + msg-cache-size: 16m + rrset-cache-size: 32m + cache-min-ttl: 300 + cache-max-ttl: 86400 + + # Privacy + hide-identity: yes + hide-version: yes + + # Security hardening + harden-glue: yes + harden-dnssec-stripped: yes + harden-referral-path: yes + use-caps-for-id: yes + + # Rate limiting + ip-ratelimit: 100 + + # Logging + verbosity: 1 + logfile: "" + use-syslog: yes + + # Include local zone definitions + include: /etc/unbound/local-zones.conf + +# Forward external queries to public DNS +# Cloudflare primary (faster - we're behind their proxy) +# Google fallback +forward-zone: + name: "." + forward-addr: 1.1.1.1 + forward-addr: 1.0.0.1 + forward-addr: 8.8.8.8 + forward-addr: 8.8.4.4 diff --git a/ansible/templates/unbound.service.j2 b/ansible/templates/unbound.service.j2 new file mode 100644 index 0000000..74c25a3 --- /dev/null +++ b/ansible/templates/unbound.service.j2 @@ -0,0 +1,15 @@ +[Unit] +Description=Unbound DNS resolver +Documentation=man:unbound(8) +After=network.target nebula.service +Wants=nebula.service + +[Service] +Type=simple +ExecStart=/usr/bin/unbound -d -c /etc/unbound/unbound.conf +ExecReload=/bin/kill -HUP $MAINPID +Restart=on-failure +RestartSec=5 + +[Install] +WantedBy=multi-user.target diff --git a/ansible/templates/valkey-acl.j2 b/ansible/templates/valkey-acl.j2 new file mode 100644 index 0000000..ab933b3 --- /dev/null +++ b/ansible/templates/valkey-acl.j2 @@ -0,0 +1,10 @@ +# Valkey ACL Configuration +# Admin user has full access +user admin on >{{ valkey_admin_password }} ~* &* +@all + +# Disable default user +user default off + +# Service users are created dynamically by data-service.yml +# Example format: +# user myapp on >password ~myapp:* &* +@all diff --git a/ansible/templates/valkey-cluster.conf.j2 b/ansible/templates/valkey-cluster.conf.j2 new file mode 100644 index 0000000..9dd5e7a --- /dev/null +++ b/ansible/templates/valkey-cluster.conf.j2 @@ -0,0 +1,44 @@ +# Valkey Cluster Configuration +# Generated by Ansible - do not edit manually + +# Network +bind {{ nebula_ip }} +port 6379 +protected-mode yes + +# Cluster mode +cluster-enabled yes +cluster-config-file /var/lib/valkey/nodes.conf +cluster-node-timeout 5000 +cluster-announce-ip {{ nebula_ip }} +cluster-announce-port 6379 +cluster-announce-bus-port 16379 + +# General +daemonize no +pidfile /run/valkey/valkey.pid +loglevel notice +logfile "" + +# Databases (cluster mode only uses db 0) +databases 1 + +# Memory Management +maxmemory {{ valkey_maxmemory }} +maxmemory-policy {{ valkey_maxmemory_policy }} + +# Persistence (minimal for cluster mode) +save "" +appendonly no + +# Security - ACL-based authentication +aclfile /etc/valkey/users.acl + +# Limits +maxclients 1000 +timeout 0 +tcp-keepalive 300 + +# Slow log +slowlog-log-slower-than 10000 +slowlog-max-len 128 diff --git a/ansible/templates/valkey-sentinel.conf.j2 b/ansible/templates/valkey-sentinel.conf.j2 new file mode 100644 index 0000000..2295897 --- /dev/null +++ b/ansible/templates/valkey-sentinel.conf.j2 @@ -0,0 +1,21 @@ +# Valkey Sentinel Configuration +# Generated by Ansible - do not edit manually + +port 26379 +bind {{ nebula_ip }} + +# Sentinel monitoring configuration +sentinel monitor valkey-ha {{ hostvars['valkey-01']['nebula_ip'] }} 6379 2 +sentinel auth-pass valkey-ha {{ valkey_admin_password }} +sentinel auth-user valkey-ha admin +sentinel down-after-milliseconds valkey-ha 5000 +sentinel failover-timeout valkey-ha 60000 +sentinel parallel-syncs valkey-ha 1 + +# Sentinel authentication +sentinel sentinel-user admin +sentinel sentinel-pass {{ valkey_admin_password }} + +# Announce IP for Nebula network +sentinel announce-ip {{ nebula_ip }} +sentinel announce-port 26379 diff --git a/ansible/templates/valkey-standalone.conf.j2 b/ansible/templates/valkey-standalone.conf.j2 new file mode 100644 index 0000000..3f14aa3 --- /dev/null +++ b/ansible/templates/valkey-standalone.conf.j2 @@ -0,0 +1,46 @@ +# Valkey Standalone Configuration (Master-Replica mode) +# Generated by Ansible - do not edit manually + +# Network +bind {{ nebula_ip }} +port 6379 +protected-mode yes + +# Disable cluster mode +cluster-enabled no + +# General +daemonize no +pidfile /run/valkey/valkey.pid +loglevel notice +logfile "" + +# Databases +databases 16 + +# Memory Management +maxmemory {{ valkey_maxmemory }} +maxmemory-policy {{ valkey_maxmemory_policy }} + +# Persistence (minimal for caching) +save "" +appendonly no + +# Security - ACL-based authentication +aclfile /etc/valkey/users.acl + +# Replication (configured on replicas only) +{% if valkey_role == 'replica' %} +replicaof {{ hostvars['valkey-01']['nebula_ip'] }} 6379 +masterauth {{ valkey_admin_password }} +masteruser admin +{% endif %} + +# Limits +maxclients 1000 +timeout 0 +tcp-keepalive 300 + +# Slow log +slowlog-log-slower-than 10000 +slowlog-max-len 128 diff --git a/ansible/templates/valkey.conf.j2 b/ansible/templates/valkey.conf.j2 new file mode 100644 index 0000000..b0d31a4 --- /dev/null +++ b/ansible/templates/valkey.conf.j2 @@ -0,0 +1,40 @@ +# Valkey Configuration +# Generated by Ansible - do not edit manually + +# Network +bind {{ nebula_ip }} +port 6379 +protected-mode yes + +# General +daemonize no +pidfile /run/valkey/valkey.pid +loglevel notice +logfile "" + +# Databases (0-15 available for multi-tenant use) +# See services.yml for DB allocation +databases 16 + +# Memory Management +maxmemory {{ valkey_maxmemory }} +maxmemory-policy {{ valkey_maxmemory_policy }} + +# Persistence (disable for pure caching) +# Enable if you need persistence +save "" +appendonly no + +# Security - ACL-based authentication +# Each service gets its own ACL user with scoped key prefix access +# Users are provisioned by data-service.yml and stored in Vault +aclfile /etc/valkey/users.acl + +# Limits +maxclients 1000 +timeout 0 +tcp-keepalive 300 + +# Slow log +slowlog-log-slower-than 10000 +slowlog-max-len 128 diff --git a/ansible/vault/README.md b/ansible/vault/README.md new file mode 100644 index 0000000..9d744fb --- /dev/null +++ b/ansible/vault/README.md @@ -0,0 +1,44 @@ +# Ansible Vault Secrets + +This directory stores encrypted secrets used by playbooks. + +## Setup + +1. Create a password file (excluded from git): + ```bash + echo "your-vault-password" > ansible_vault_pass + chmod 600 ansible_vault_pass + ``` + +2. Create the secrets file: + ```bash + ansible-vault create secrets.yml --vault-password-file ansible_vault_pass + ``` + +3. Add your secrets (example structure): + ```yaml + # Valkey admin password (used by valkey.yml) + valkey_admin_password: "your-strong-password" + + # Vault admin database password (used by data-service.yml) + vault_admin_password: "your-vault-admin-password" + ``` + +## Usage + +Reference in playbooks: +```yaml +vars_files: + - ../vault/secrets.yml +``` + +Run playbooks with vault password: +```bash +ansible-playbook -i inventory.ini playbooks/valkey.yml --vault-password-file vault/ansible_vault_pass +``` + +Or set the environment variable: +```bash +export ANSIBLE_VAULT_PASSWORD_FILE=vault/ansible_vault_pass +ansible-playbook -i inventory.ini playbooks/valkey.yml +``` diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000..5d668dd --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,139 @@ +# Architecture + +This document explains the design decisions behind Arvandor. + +## Network Separation + +### Why Two Networks? + +``` +Internet ──► Proxmox Host ──► vmbr1 (192.168.100.0/24) + │ + └──► Nebula (10.10.10.0/24) +``` + +**Bridge Network (vmbr1)** +- Used only for Terraform provisioning and Ansible access +- VMs firewall blocks all bridge traffic except from Proxmox host +- No inter-VM communication on this network + +**Nebula Overlay** +- All application traffic uses encrypted Nebula tunnels +- Group-based firewall rules for segmentation +- Works across any network boundary (cloud, datacenter, home) + +### Benefits + +1. **Defense in depth** - Compromise of bridge network doesn't expose services +2. **Migration ready** - Move VMs anywhere, Nebula handles connectivity +3. **Zero-trust** - VMs authenticate via certificates, not network position + +## VMID Allocation + +VMIDs follow a logical pattern: + +| Range | Purpose | Example | +|-------|---------|---------| +| 1000-1999 | Management | DNS, Caddy | +| 2000-2999 | Services | Vault, Gitea | +| 3000-3999 | Data | PostgreSQL, Valkey | +| 4000-4999 | Workloads | Applications | +| 5000-5999 | Monitoring | Prometheus | + +The last digits determine the IP address: +- VMID 1001 → x.x.x.11 +- VMID 3000 → x.x.x.30 + +## High Availability + +All data services run as 3-node clusters: + +### PostgreSQL (Patroni + etcd) + +``` +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ postgres-01 │ │ postgres-02 │ │ postgres-03 │ +│ Leader │◄─│ Replica │◄─│ Replica │ +│ + etcd │ │ + etcd │ │ + etcd │ +└─────────────┘ └─────────────┘ └─────────────┘ +``` + +- Patroni handles leader election +- etcd provides distributed consensus +- Automatic failover on leader failure + +### Valkey (Sentinel) + +``` +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ valkey-01 │ │ valkey-02 │ │ valkey-03 │ +│ Master │──│ Replica │ │ Replica │ +│ + Sentinel │ │ + Sentinel │ │ + Sentinel │ +└─────────────┘ └─────────────┘ └─────────────┘ +``` + +- Sentinel monitors master health +- Automatic promotion on master failure +- ACL-based per-service key isolation + +### Vault (Raft) + +``` +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ vault-01 │ │ vault-02 │ │ vault-03 │ +│ Leader │──│ Standby │──│ Standby │ +└─────────────┘ └─────────────┘ └─────────────┘ +``` + +- Integrated Raft storage (no external backend) +- Automatic leader election +- Unseal required after restart + +## Security Model + +### Three-Layer Firewall + +``` +┌─────────────────────────────────────────────────────────────┐ +│ 1. Proxmox VM Firewall → Egress control │ +│ 2. Nebula Groups → East-west segmentation │ +│ 3. Guest iptables → Defense in depth │ +└─────────────────────────────────────────────────────────────┘ +``` + +### Nebula Groups + +| Group | Can Access | +|-------|------------| +| admin | Everything | +| infrastructure | infrastructure | +| projects | infrastructure | +| games | Nothing (isolated) | + +### Vault Integration + +Applications use Vault for: +- Dynamic database credentials (short-lived) +- Service secrets (API keys, etc.) +- AppRole authentication + +## Service Discovery + +Internal DNS provides hostname resolution: + +``` +.nebula → Nebula IP +``` + +VMs query 10.10.10.11 (DNS server) via Nebula. External queries forward to Cloudflare (1.1.1.1). + +## Provisioning Flow + +``` +1. terraform apply → Create VM +2. bootstrap.yml → Update packages +3. security.yml → Configure firewall +4. nebula.yml → Join overlay network +5. .yml → Deploy service +6. data-service.yml → Provision credentials +``` diff --git a/docs/getting-started.md b/docs/getting-started.md new file mode 100644 index 0000000..1006084 --- /dev/null +++ b/docs/getting-started.md @@ -0,0 +1,197 @@ +# Getting Started + +This guide walks through setting up Arvandor from scratch. + +## Prerequisites + +### Proxmox Host + +- Proxmox VE 7.x or 8.x +- Two network bridges: + - `vmbr0` - Public interface + - `vmbr1` - Internal VM network (192.168.100.0/24) +- IP forwarding enabled + +### VM Template + +Create an Arch Linux template (VMID 9000): + +1. Download Arch Linux ISO +2. Create VM, install Arch with basic setup +3. Install `openssh`, `python` (for Ansible) +4. Enable cloud-init or configure static user +5. Convert to template + +### Local Tools + +```bash +# Terraform +wget -O- https://apt.releases.hashicorp.com/gpg | gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg +echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list +sudo apt update && sudo apt install terraform + +# Ansible +pip install ansible + +# Nebula +wget https://github.com/slackhq/nebula/releases/download/v1.9.0/nebula-linux-amd64.tar.gz +tar xzf nebula-linux-amd64.tar.gz +sudo mv nebula nebula-cert /usr/local/bin/ +``` + +## Step 1: Configure Terraform + +```bash +cd terraform + +# Copy example configuration +cp terraform.tfvars.example terraform.tfvars + +# Edit with your values +vim terraform.tfvars +``` + +Required variables: +- `proxmox_endpoint` - Your Proxmox API URL +- `proxmox_api_token_id` - API token ID +- `proxmox_api_token_secret` - API token secret +- `proxmox_node` - Node name (e.g., "pve") +- `username` - Default VM username +- `password` - Default VM password +- `ssh_key_path` - Path to your SSH public key + +## Step 2: Create Proxmox API Token + +In Proxmox: + +1. Datacenter → Permissions → API Tokens +2. Add token for a user with `PVEAdmin` or `Administrator` role +3. Copy the token ID and secret + +## Step 3: Generate Nebula CA + +```bash +cd nebula + +# Generate Certificate Authority +nebula-cert ca -name "Arvandor CA" -duration 87600h + +# This creates: +# - ca.crt (share with all hosts) +# - ca.key (keep secure!) +``` + +## Step 4: Provision VMs + +```bash +cd terraform + +terraform init +terraform plan +terraform apply +``` + +This creates all VMs defined in the .tf files. + +## Step 5: Generate Nebula Certificates + +For each VM, generate a certificate: + +```bash +cd nebula + +# DNS server +nebula-cert sign -ca-crt ca.crt -ca-key ca.key \ + -name "dns" -networks "10.10.10.11/24" -groups "infrastructure" \ + -out-crt configs/1001/dns/dns.crt \ + -out-key configs/1001/dns/dns.key + +# Repeat for all VMs... +``` + +## Step 6: Configure Ansible Inventory + +```bash +cd ansible + +cp inventory.ini.example inventory.ini +vim inventory.ini +``` + +Update: +- VM hostnames and IPs +- SSH jump host configuration +- Infrastructure variables + +## Step 7: Bootstrap VMs + +Run playbooks in order: + +```bash +# Update packages, reboot if kernel changed +ansible-playbook -i inventory.ini playbooks/bootstrap.yml + +# Configure iptables and fail2ban +ansible-playbook -i inventory.ini playbooks/security.yml + +# Join Nebula overlay network +ansible-playbook -i inventory.ini playbooks/nebula.yml +``` + +## Step 8: Deploy Core Services + +```bash +# DNS (required for hostname resolution) +ansible-playbook -i inventory.ini playbooks/dns.yml +ansible-playbook -i inventory.ini playbooks/dns-client.yml + +# PostgreSQL HA cluster +ansible-playbook -i inventory.ini playbooks/postgres-ha.yml + +# Valkey with Sentinel +ansible-playbook -i inventory.ini playbooks/valkey-sentinel.yml + +# Garage S3 storage +ansible-playbook -i inventory.ini playbooks/garage.yml +``` + +## Step 9: Configure Host Port Forwarding + +On the Proxmox host: + +```bash +# Copy and configure the script +cp network/port-forward.sh.example /root/network/port-forward.sh +chmod +x /root/network/port-forward.sh +vim /root/network/port-forward.sh + +# Test +./port-forward.sh --dry-run + +# Apply +./port-forward.sh +``` + +## Verification + +Test connectivity: + +```bash +# SSH to VM via Nebula +ssh admin@10.10.10.11 + +# Test DNS resolution +dig @10.10.10.11 vault-01.nebula + +# Test PostgreSQL +psql -h 10.10.10.30 -U postgres -c "SELECT 1" + +# Test Valkey +valkey-cli -h 10.10.10.33 PING +``` + +## Next Steps + +- Add your application VMs to `terraform/workloads.tf` +- Create services in `ansible/services.yml` +- Provision app credentials with `data-service.yml` diff --git a/docs/provisioning-guide.md b/docs/provisioning-guide.md new file mode 100644 index 0000000..74a23fa --- /dev/null +++ b/docs/provisioning-guide.md @@ -0,0 +1,199 @@ +# Provisioning Guide + +Step-by-step guide for adding new VMs to the infrastructure. + +## Adding a New VM + +### 1. Choose VMID and IP + +Select a VMID based on the VM's purpose: + +| Purpose | VMID Range | Example | +|---------|------------|---------| +| Management | 1000-1999 | 1003 | +| Services | 2000-2999 | 2004 | +| Data | 3000-3999 | 3012 | +| Workloads | 4000-4999 | 4056 | +| Monitoring | 5000-5999 | 5001 | + +IP is derived from VMID: +- Bridge: 192.168.100.XX +- Nebula: 10.10.10.XX + +Where XX is the last 2 digits of VMID. + +### 2. Add to Terraform + +Edit the appropriate .tf file: + +```hcl +module "myapp" { + source = "./modules/vm" + name = "myapp" + vmid = 4056 + node_name = var.proxmox_node + bridge_ip = "192.168.100.56" + gateway = var.gateway + datastore_id = var.datastore_id + clone_vmid = var.template_vmid + cores = 2 + memory = 4096 + disk_size = 50 + username = var.username + password = var.password + ssh_key_path = var.ssh_key_path +} +``` + +Apply: + +```bash +cd terraform +terraform plan +terraform apply +``` + +### 3. Generate Nebula Certificate + +```bash +cd nebula + +nebula-cert sign -ca-crt ca.crt -ca-key ca.key \ + -name "myapp" \ + -networks "10.10.10.56/24" \ + -groups "projects" \ + -out-crt configs/4056/myapp/myapp.crt \ + -out-key configs/4056/myapp/myapp.key +``` + +Choose the appropriate group: +- `infrastructure` - Core services +- `projects` - Applications needing infrastructure access +- `games` - Isolated workloads + +### 4. Add to Ansible Inventory + +Edit `ansible/inventory.ini`: + +```ini +[projects] +myapp ansible_host=192.168.100.56 nebula_ip=10.10.10.56 vmid=4056 + +[docker] +myapp +``` + +### 5. Run Bootstrap Playbooks + +```bash +cd ansible + +# Update packages +ansible-playbook -i inventory.ini playbooks/bootstrap.yml --limit "myapp" + +# Configure firewall +ansible-playbook -i inventory.ini playbooks/security.yml --limit "myapp" + +# Join Nebula +ansible-playbook -i inventory.ini playbooks/nebula.yml --limit "myapp" + +# Configure DNS client +ansible-playbook -i inventory.ini playbooks/dns-client.yml --limit "myapp" + +# Install Docker (if needed) +ansible-playbook -i inventory.ini playbooks/docker.yml --limit "myapp" +``` + +### 6. Update DNS (Optional) + +If you want a `.nebula` hostname, re-run the DNS playbook: + +```bash +ansible-playbook -i inventory.ini playbooks/dns.yml +``` + +### 7. Verify + +```bash +# Test SSH via Nebula +ssh admin@10.10.10.56 + +# Test hostname resolution +dig @10.10.10.11 myapp.nebula +``` + +## Adding a Service with Database + +### 1. Define in services.yml + +```yaml +services: + myapp: + description: "My Application" + host: myapp + deploy_path: /opt/myapp + postgres: + enabled: true + valkey: + enabled: true + key_prefix: "myapp" + s3: + enabled: true + bucket: "myapp-media" + vault_roles: + - app + - migrate +``` + +### 2. Provision Data Services + +```bash +ansible-playbook -i inventory.ini playbooks/data-service.yml -e "service=myapp" +``` + +This creates: +- PostgreSQL database with static roles +- Valkey ACL user with key prefix +- Garage S3 bucket with API key +- Vault database engine roles + +### 3. Retrieve Credentials + +```bash +# Database credentials (dynamic) +vault read database/creds/myapp-app +vault read database/creds/myapp-migrate + +# Valkey credentials (static, stored in Vault) +vault kv get secret/myapp/valkey + +# S3 credentials (static, stored in Vault) +vault kv get secret/myapp/s3 +``` + +## Removing a VM + +### 1. Remove from Terraform + +Comment out or delete the module from .tf file, then: + +```bash +terraform plan +terraform apply +``` + +### 2. Remove from Inventory + +Edit `ansible/inventory.ini` and remove the host. + +### 3. Clean up Certificates + +```bash +rm -rf nebula/configs// +``` + +### 4. Update DNS + +```bash +ansible-playbook -i inventory.ini playbooks/dns.yml +``` diff --git a/nebula/.gitignore b/nebula/.gitignore new file mode 100644 index 0000000..6e67784 --- /dev/null +++ b/nebula/.gitignore @@ -0,0 +1,8 @@ +# CA private key - NEVER COMMIT +ca.key + +# All private keys +*.key + +# Host certificates in configs/ +configs/*/*/*.key diff --git a/nebula/README.md b/nebula/README.md new file mode 100644 index 0000000..771b873 --- /dev/null +++ b/nebula/README.md @@ -0,0 +1,164 @@ +# Nebula Overlay Network + +Nebula is a scalable overlay network that provides encrypted connectivity between all VMs regardless of their physical location. + +## Architecture + +``` + ┌─────────────┐ + │ Lighthouse │ + │ 10.10.10.10 │ + └──────┬──────┘ + │ + ┌─────────────────┼─────────────────┐ + │ │ │ + ┌────▼────┐ ┌────▼────┐ ┌────▼────┐ + │ VM 1 │ │ VM 2 │ │ VM 3 │ + │ 10.10.10.11 │ 10.10.10.20 │ 10.10.10.30 + └─────────┘ └─────────┘ └─────────┘ +``` + +## Groups + +Nebula uses groups for firewall segmentation: + +| Group | Purpose | Can Access | +|-------|---------|------------| +| `admin` | Personal devices | Everything | +| `infrastructure` | Core services | Each other | +| `projects` | Application workloads | Infrastructure | +| `lighthouse` | Nebula relays | - | +| `games` | Game servers | Isolated | + +## Setup + +### 1. Generate Certificate Authority + +```bash +nebula-cert ca -name "Arvandor CA" -duration 87600h +``` + +This creates: +- `ca.crt` - Certificate (share with all hosts) +- `ca.key` - Private key (keep secure, do not commit!) + +### 2. Generate Host Certificates + +```bash +# Infrastructure VM example +nebula-cert sign -ca-crt ca.crt -ca-key ca.key \ + -name "dns" \ + -networks "10.10.10.11/24" \ + -groups "infrastructure" \ + -out-crt configs/1001/dns/dns.crt \ + -out-key configs/1001/dns/dns.key + +# Application VM example +nebula-cert sign -ca-crt ca.crt -ca-key ca.key \ + -name "app-server" \ + -networks "10.10.10.50/24" \ + -groups "projects" \ + -out-crt configs/4050/app-server/app-server.crt \ + -out-key configs/4050/app-server/app-server.key + +# Lighthouse +nebula-cert sign -ca-crt ca.crt -ca-key ca.key \ + -name "lighthouse" \ + -networks "10.10.10.10/24" \ + -groups "infrastructure,lighthouse" \ + -out-crt configs/1000/lighthouse/lighthouse.crt \ + -out-key configs/1000/lighthouse/lighthouse.key +``` + +### 3. Directory Structure + +``` +nebula/ +├── ca.crt # Certificate authority (commit this) +├── ca.key # CA private key (DO NOT COMMIT) +├── configs/ +│ ├── 1000/lighthouse/ +│ │ ├── lighthouse.crt +│ │ └── lighthouse.key +│ ├── 1001/dns/ +│ │ ├── dns.crt +│ │ └── dns.key +│ └── ... +└── README.md +``` + +### 4. Deploy with Ansible + +The `nebula.yml` playbook deploys certificates and configuration: + +```bash +ansible-playbook -i inventory.ini playbooks/nebula.yml --limit "new-vm" +``` + +## Lighthouse Configuration + +The lighthouse requires manual configuration (not managed by Ansible): + +```yaml +# /etc/nebula/config.yml on lighthouse +pki: + ca: /etc/nebula/ca.crt + cert: /etc/nebula/config.crt + key: /etc/nebula/config.key + +static_host_map: {} + +lighthouse: + am_lighthouse: true + serve_dns: false + +listen: + host: 0.0.0.0 + port: 4242 + +punchy: + punch: true + respond: true + +relay: + am_relay: true + +tun: + dev: nebula1 + drop_local_broadcast: true + drop_multicast: true + +firewall: + conntrack: + tcp_timeout: 12h + udp_timeout: 3m + default_timeout: 10m + + outbound: + - port: any + proto: any + host: any + + inbound: + - port: any + proto: any + group: admin + - port: any + proto: any + group: infrastructure + - port: any + proto: icmp + host: any +``` + +## IP Allocation + +| VMID Range | Network Segment | Last Octet | +|------------|-----------------|------------| +| 1000-1999 | Management | 10-19 | +| 2000-2999 | Services | 20-29 | +| 3000-3999 | Data | 30-49 | +| 4000-4999 | Workloads | 50-59 | +| 5000-5999 | Monitoring | 90-99 | + +Example: VMID 3000 → 10.10.10.30 diff --git a/network/ip-schema.example b/network/ip-schema.example new file mode 100644 index 0000000..f24e232 --- /dev/null +++ b/network/ip-schema.example @@ -0,0 +1,61 @@ +# Arvandor IP Schema +# +# This documents the IP addressing scheme for the infrastructure. + +## Networks + +| Network | CIDR | Purpose | +|---------|------|---------| +| Public | 203.0.113.10 | External access (vmbr0) | +| Bridge | 192.168.100.0/24 | VM provisioning network (vmbr1) | +| Nebula | 10.10.10.0/24 | Encrypted overlay network | + +## VMID Ranges + +| Range | Domain | Nebula Group | Purpose | +|-------|--------|--------------|---------| +| 1000-1999 | Management | infrastructure | DNS, Caddy, Lighthouse | +| 2000-2999 | Services | infrastructure | Vault, Gitea | +| 3000-3999 | Data | infrastructure | PostgreSQL, Valkey, Garage | +| 4000-4999 | Workloads | projects/games | Applications, game servers | +| 5000-5999 | Monitoring | infrastructure | Prometheus, Grafana, Loki | + +## IP Pattern + +VMID determines IP address. Last 2-3 digits become the last octet: +- VMID 1001 → 192.168.100.11 / 10.10.10.11 +- VMID 2000 → 192.168.100.20 / 10.10.10.20 +- VMID 3009 → 192.168.100.39 / 10.10.10.39 + +## Reserved Addresses + +| IP | Host | Purpose | +|----|------|---------| +| 192.168.100.1 | Proxmox host | Gateway, Ansible jump host | +| 10.10.10.1 | Proxmox host | Nebula endpoint for management | +| 10.10.10.10 | Lighthouse | Nebula discovery/relay | +| 10.10.10.11 | DNS | Internal DNS server | +| 10.10.10.12 | Caddy | Reverse proxy | +| 10.10.10.20-22 | Vault cluster | Secrets management | +| 10.10.10.30-32 | PostgreSQL | Database cluster | +| 10.10.10.33-35 | Valkey | Cache/queue cluster | +| 10.10.10.39-41 | Garage | S3 storage cluster | + +## Example VM Allocation + +``` +VMID 1001 - dns + Bridge: 192.168.100.11 + Nebula: 10.10.10.11 + Group: infrastructure + +VMID 2000 - vault-01 + Bridge: 192.168.100.20 + Nebula: 10.10.10.20 + Group: infrastructure + +VMID 4050 - app-server + Bridge: 192.168.100.50 + Nebula: 10.10.10.50 + Group: projects +``` diff --git a/network/port-forward.sh.example b/network/port-forward.sh.example new file mode 100644 index 0000000..68018d8 --- /dev/null +++ b/network/port-forward.sh.example @@ -0,0 +1,230 @@ +#!/bin/bash +set -euo pipefail + +# ============================================================================= +# Arvandor Port Forwarding Script +# ============================================================================= +# Configures NAT (DNAT/SNAT) and FORWARD rules for Proxmox host. +# Uses a custom chain (ARVANDOR-FORWARD) to avoid conflicts with PVE firewall. +# +# Usage: +# ./port-forward.sh # Apply rules +# ./port-forward.sh --dry-run # Show what would be done +# ./port-forward.sh --restore # Restore backup +# ./port-forward.sh --status # Show current rules +# ============================================================================= + +# ----------------------------------------------------------------------------- +# Configuration - UPDATE THESE FOR YOUR ENVIRONMENT +# ----------------------------------------------------------------------------- +NETWORK_INTERFACE="vmbr0" +INTERNAL_NETWORK="192.168.100.0/24" +PUBLIC_IP="203.0.113.10" # Your public IP +CUSTOM_CHAIN="ARVANDOR-FORWARD" +BACKUP_FILE="/root/network/iptables.backup" + +# Nebula Lighthouse +NEBULA_IP="192.168.100.10" +NEBULA_PORT="4242" + +# Caddy (Reverse Proxy) +CADDY_IP="192.168.100.12" +CADDY_HTTP_PORT="80" +CADDY_HTTPS_PORT="443" + +# Gitea (Optional) +GITEA_IP="192.168.100.23" +GITEA_SSH_PORT="2222" + +# Security - restrict SSH to specific IP +ALLOWED_SSH_IP="203.0.113.20" # Your home IP + +# ----------------------------------------------------------------------------- +# Functions +# ----------------------------------------------------------------------------- +log() { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" +} + +error() { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: $*" >&2 +} + +die() { + error "$*" + exit 1 +} + +check_root() { + [[ $EUID -eq 0 ]] || die "This script must be run as root" +} + +check_interface() { + local iface=$1 + ip link show "$iface" &>/dev/null || die "Interface $iface does not exist" +} + +backup_rules() { + log "Backing up current iptables rules to $BACKUP_FILE" + mkdir -p "$(dirname "$BACKUP_FILE")" + iptables-save > "$BACKUP_FILE" +} + +restore_rules() { + [[ -f "$BACKUP_FILE" ]] || die "Backup file $BACKUP_FILE not found" + log "Restoring iptables rules from $BACKUP_FILE" + iptables-restore < "$BACKUP_FILE" + log "Rules restored successfully" +} + +setup_custom_chain() { + # Create custom chain if it doesn't exist + if ! iptables -L "$CUSTOM_CHAIN" -n &>/dev/null; then + log "Creating custom chain: $CUSTOM_CHAIN" + iptables -N "$CUSTOM_CHAIN" + fi + + # Ensure chain is jumped to from FORWARD (only once) + if ! iptables -C FORWARD -j "$CUSTOM_CHAIN" &>/dev/null; then + log "Inserting jump to $CUSTOM_CHAIN in FORWARD chain" + iptables -I FORWARD 1 -j "$CUSTOM_CHAIN" + fi + + # Flush the custom chain + log "Flushing custom chain: $CUSTOM_CHAIN" + iptables -F "$CUSTOM_CHAIN" +} + +apply_rules() { + local dry_run=${1:-false} + + if [[ "$dry_run" == "true" ]]; then + log "=== DRY RUN MODE - No changes will be made ===" + echo "" + echo "Would apply the following rules:" + echo "" + echo "NAT PREROUTING (DNAT):" + echo " - UDP $NEBULA_PORT → $NEBULA_IP:$NEBULA_PORT (Nebula)" + echo " - TCP $CADDY_HTTP_PORT → $CADDY_IP:$CADDY_HTTP_PORT (HTTP)" + echo " - TCP $CADDY_HTTPS_PORT → $CADDY_IP:$CADDY_HTTPS_PORT (HTTPS)" + echo " - TCP $GITEA_SSH_PORT → $GITEA_IP:$GITEA_SSH_PORT (Gitea SSH)" + echo "" + echo "FORWARD chain ($CUSTOM_CHAIN):" + echo " - Allow traffic to all above destinations" + echo "" + echo "INPUT:" + echo " - Allow Nebula (nebula1 interface)" + echo " - Allow SSH from $ALLOWED_SSH_IP" + echo " - Drop SSH from all others" + echo " - Block Proxmox UI from $NETWORK_INTERFACE" + return + fi + + # --- NAT Rules --- + log "Flushing NAT rules..." + iptables -t nat -F PREROUTING + iptables -t nat -F POSTROUTING + + log "Setting up NAT masquerading..." + iptables -t nat -A POSTROUTING -s "$INTERNAL_NETWORK" -o "$NETWORK_INTERFACE" -j MASQUERADE + + log "Setting up hairpin NAT for Nebula..." + iptables -t nat -A PREROUTING -s "$INTERNAL_NETWORK" -d "$PUBLIC_IP" -p udp --dport "$NEBULA_PORT" -j DNAT --to-destination "$NEBULA_IP:$NEBULA_PORT" + iptables -t nat -A POSTROUTING -s "$INTERNAL_NETWORK" -d "$NEBULA_IP" -p udp --dport "$NEBULA_PORT" -j SNAT --to-source "$PUBLIC_IP" + + log "Setting up hairpin NAT for Gitea SSH..." + iptables -t nat -A PREROUTING -s "$INTERNAL_NETWORK" -d "$PUBLIC_IP" -p tcp --dport "$GITEA_SSH_PORT" -j DNAT --to-destination "$GITEA_IP:$GITEA_SSH_PORT" + iptables -t nat -A POSTROUTING -s "$INTERNAL_NETWORK" -d "$GITEA_IP" -p tcp --dport "$GITEA_SSH_PORT" -j SNAT --to-source "$PUBLIC_IP" + + log "Setting up DNAT rules..." + # Nebula + iptables -t nat -A PREROUTING -i "$NETWORK_INTERFACE" -p udp --dport "$NEBULA_PORT" -j DNAT --to-destination "$NEBULA_IP:$NEBULA_PORT" + # Caddy + iptables -t nat -A PREROUTING -i "$NETWORK_INTERFACE" -p tcp --dport "$CADDY_HTTP_PORT" -j DNAT --to-destination "$CADDY_IP:$CADDY_HTTP_PORT" + iptables -t nat -A PREROUTING -i "$NETWORK_INTERFACE" -p tcp --dport "$CADDY_HTTPS_PORT" -j DNAT --to-destination "$CADDY_IP:$CADDY_HTTPS_PORT" + # Gitea SSH + iptables -t nat -A PREROUTING -i "$NETWORK_INTERFACE" -p tcp --dport "$GITEA_SSH_PORT" -j DNAT --to-destination "$GITEA_IP:$GITEA_SSH_PORT" + + # --- FORWARD Rules (custom chain) --- + setup_custom_chain + + log "Adding FORWARD rules to $CUSTOM_CHAIN..." + iptables -A "$CUSTOM_CHAIN" -d "$CADDY_IP" -p tcp --dport "$CADDY_HTTP_PORT" -j ACCEPT + iptables -A "$CUSTOM_CHAIN" -d "$CADDY_IP" -p tcp --dport "$CADDY_HTTPS_PORT" -j ACCEPT + iptables -A "$CUSTOM_CHAIN" -d "$NEBULA_IP" -p udp --dport "$NEBULA_PORT" -j ACCEPT + iptables -A "$CUSTOM_CHAIN" -d "$GITEA_IP" -p tcp --dport "$GITEA_SSH_PORT" -j ACCEPT + + # --- INPUT Rules --- + log "Flushing INPUT rules..." + iptables -F INPUT + + log "Setting up INPUT rules..." + iptables -A INPUT -i nebula1 -j ACCEPT + iptables -A INPUT -p tcp --dport 22 -s "$ALLOWED_SSH_IP" -j ACCEPT + iptables -A INPUT -p tcp --dport 22 -j DROP + iptables -I INPUT -i "$NETWORK_INTERFACE" -p tcp --dport 8006 -j DROP + iptables -I INPUT -i vmbr1 -p tcp --dport 8006 -j ACCEPT +} + +save_rules() { + log "Saving iptables rules persistently..." + if command -v netfilter-persistent &>/dev/null; then + netfilter-persistent save + log "Rules saved via netfilter-persistent" + else + die "netfilter-persistent not found. Install with: apt install iptables-persistent" + fi +} + +show_status() { + echo "" + echo "=== Port Forwarding Status ===" + echo "" + echo "NAT PREROUTING rules:" + iptables -t nat -L PREROUTING -n --line-numbers 2>/dev/null | head -20 + echo "" + echo "FORWARD chain ($CUSTOM_CHAIN):" + iptables -L "$CUSTOM_CHAIN" -n --line-numbers 2>/dev/null || echo "Chain not found" + echo "" + echo "=== Services ===" + echo " HTTP/HTTPS: 80,443 → Caddy ($CADDY_IP)" + echo " Nebula: $NEBULA_PORT → Lighthouse ($NEBULA_IP)" + echo " Gitea SSH: $GITEA_SSH_PORT → $GITEA_IP" +} + +# ----------------------------------------------------------------------------- +# Main +# ----------------------------------------------------------------------------- +main() { + local action="${1:-apply}" + + case "$action" in + --dry-run|-n) + check_root + check_interface "$NETWORK_INTERFACE" + apply_rules true + ;; + --restore|-r) + check_root + restore_rules + ;; + --status|-s) + show_status + ;; + apply|"") + check_root + check_interface "$NETWORK_INTERFACE" + backup_rules + apply_rules false + save_rules + log "Setup complete!" + show_status + ;; + *) + echo "Usage: $0 [--dry-run|--restore|--status]" + exit 1 + ;; + esac +} + +main "$@" diff --git a/terraform/.gitignore b/terraform/.gitignore new file mode 100644 index 0000000..425b179 --- /dev/null +++ b/terraform/.gitignore @@ -0,0 +1,15 @@ +# Terraform +.terraform/ +*.tfstate +*.tfstate.* +*.tfvars +!*.tfvars.example +*.tfvars.json +crash.log +crash.*.log +override.tf +override.tf.json +*_override.tf +*_override.tf.json +.terraformrc +terraform.rc diff --git a/terraform/data.tf b/terraform/data.tf new file mode 100644 index 0000000..91927fb --- /dev/null +++ b/terraform/data.tf @@ -0,0 +1,184 @@ +# Data Tier (3000-3999) +# +# Enterprise HA data services with automatic failover. +# All VMs communicate via Nebula overlay (10.10.10.x) for migration-ready architecture. +# +# PostgreSQL Cluster (Patroni + etcd): +# 3000 postgres-01 10.10.10.30 - Patroni node (primary/replica elected dynamically) +# 3001 postgres-02 10.10.10.31 - Patroni node +# 3002 postgres-03 10.10.10.32 - Patroni node +# +# Valkey Sentinel (1 master + 2 replicas, Sentinel on each): +# 3003 valkey-01 10.10.10.33 - Master + Sentinel +# 3004 valkey-02 10.10.10.34 - Replica + Sentinel +# 3005 valkey-03 10.10.10.35 - Replica + Sentinel +# +# Garage S3 Cluster: +# 3009 garage-01 10.10.10.39 - S3-compatible storage node +# 3010 garage-02 10.10.10.40 - S3-compatible storage node +# 3011 garage-03 10.10.10.41 - S3-compatible storage node + +# ============================================================================= +# PostgreSQL HA Cluster (3 nodes) +# ============================================================================= + +module "postgres-01" { + source = "./modules/vm" + name = "postgres-01" + vmid = 3000 + node_name = var.proxmox_node + bridge_ip = "192.168.100.30" + gateway = var.gateway + datastore_id = var.datastore_id + clone_vmid = var.template_vmid + cores = 2 + memory = 4096 + disk_size = 100 + username = var.username + password = var.password + ssh_key_path = var.ssh_key_path +} + +module "postgres-02" { + source = "./modules/vm" + name = "postgres-02" + vmid = 3001 + node_name = var.proxmox_node + bridge_ip = "192.168.100.31" + gateway = var.gateway + datastore_id = var.datastore_id + clone_vmid = var.template_vmid + cores = 2 + memory = 4096 + disk_size = 100 + username = var.username + password = var.password + ssh_key_path = var.ssh_key_path +} + +module "postgres-03" { + source = "./modules/vm" + name = "postgres-03" + vmid = 3002 + node_name = var.proxmox_node + bridge_ip = "192.168.100.32" + gateway = var.gateway + datastore_id = var.datastore_id + clone_vmid = var.template_vmid + cores = 2 + memory = 4096 + disk_size = 100 + username = var.username + password = var.password + ssh_key_path = var.ssh_key_path +} + +# ============================================================================= +# Valkey Sentinel (3 nodes: 1 master + 2 replicas + Sentinel on each) +# ============================================================================= + +module "valkey-01" { + source = "./modules/vm" + name = "valkey-01" + vmid = 3003 + node_name = var.proxmox_node + bridge_ip = "192.168.100.33" + gateway = var.gateway + datastore_id = var.datastore_id + clone_vmid = var.template_vmid + cores = 2 + memory = 2048 + disk_size = 50 + username = var.username + password = var.password + ssh_key_path = var.ssh_key_path +} + +module "valkey-02" { + source = "./modules/vm" + name = "valkey-02" + vmid = 3004 + node_name = var.proxmox_node + bridge_ip = "192.168.100.34" + gateway = var.gateway + datastore_id = var.datastore_id + clone_vmid = var.template_vmid + cores = 2 + memory = 2048 + disk_size = 50 + username = var.username + password = var.password + ssh_key_path = var.ssh_key_path +} + +module "valkey-03" { + source = "./modules/vm" + name = "valkey-03" + vmid = 3005 + node_name = var.proxmox_node + bridge_ip = "192.168.100.35" + gateway = var.gateway + datastore_id = var.datastore_id + clone_vmid = var.template_vmid + cores = 2 + memory = 2048 + disk_size = 50 + username = var.username + password = var.password + ssh_key_path = var.ssh_key_path +} + +# ============================================================================= +# Garage S3 Cluster (3 nodes) +# ============================================================================= + +module "garage-01" { + source = "./modules/vm" + name = "garage-01" + vmid = 3009 + node_name = var.proxmox_node + bridge_ip = "192.168.100.39" + gateway = var.gateway + datastore_id = var.datastore_id + clone_vmid = var.template_vmid + cores = 2 + memory = 2048 + disk_size = 200 + username = var.username + password = var.password + ssh_key_path = var.ssh_key_path +} + +module "garage-02" { + source = "./modules/vm" + name = "garage-02" + vmid = 3010 + node_name = var.proxmox_node + bridge_ip = "192.168.100.40" + gateway = var.gateway + datastore_id = var.datastore_id + clone_vmid = var.template_vmid + cores = 2 + memory = 2048 + disk_size = 200 + username = var.username + password = var.password + ssh_key_path = var.ssh_key_path +} + +module "garage-03" { + source = "./modules/vm" + name = "garage-03" + vmid = 3011 + node_name = var.proxmox_node + bridge_ip = "192.168.100.41" + gateway = var.gateway + datastore_id = var.datastore_id + clone_vmid = var.template_vmid + cores = 2 + memory = 2048 + disk_size = 200 + username = var.username + password = var.password + ssh_key_path = var.ssh_key_path +} diff --git a/terraform/firewall.tf b/terraform/firewall.tf new file mode 100644 index 0000000..6b30a8a --- /dev/null +++ b/terraform/firewall.tf @@ -0,0 +1,11 @@ +# Firewall Configuration +# +# Security groups are managed manually in Proxmox UI: +# Datacenter → Firewall → Security Group +# +# Groups: +# - base-egress: HTTP, HTTPS, DNS, NTP (default for VMs) +# - restricted: UDP 4242 only (Nebula tunnels, no internet) +# +# VMs reference these groups via the firewall_security_group variable. +# East-west segmentation (VM-to-VM) is handled by Nebula groups. diff --git a/terraform/management.tf b/terraform/management.tf new file mode 100644 index 0000000..a392c12 --- /dev/null +++ b/terraform/management.tf @@ -0,0 +1,37 @@ +# Management (1000-1999) +# +# Core infrastructure services that other VMs depend on. +# Lighthouse and DNS should be provisioned first. +# +# VMs: +# 1000 lighthouse 192.168.100.10 - Nebula lighthouse/relay +# 1001 dns 192.168.100.11 - Internal DNS server +# 1002 caddy 192.168.100.12 - Reverse proxy + +module "dns" { + source = "./modules/vm" + name = "dns" + vmid = 1001 + node_name = var.proxmox_node + bridge_ip = "192.168.100.11" + gateway = var.gateway + datastore_id = var.datastore_id + clone_vmid = var.template_vmid + username = var.username + password = var.password + ssh_key_path = var.ssh_key_path +} + +module "caddy" { + source = "./modules/vm" + name = "caddy" + vmid = 1002 + node_name = var.proxmox_node + bridge_ip = "192.168.100.12" + gateway = var.gateway + datastore_id = var.datastore_id + clone_vmid = var.template_vmid + username = var.username + password = var.password + ssh_key_path = var.ssh_key_path +} diff --git a/terraform/modules/vm/main.tf b/terraform/modules/vm/main.tf new file mode 100644 index 0000000..d84c2e7 --- /dev/null +++ b/terraform/modules/vm/main.tf @@ -0,0 +1,76 @@ +terraform { + required_providers { + proxmox = { + source = "bpg/proxmox" + } + } +} + +resource "proxmox_virtual_environment_vm" "vm" { + name = var.name + node_name = var.node_name + vm_id = var.vmid + + clone { + vm_id = var.clone_vmid + } + + cpu { + cores = var.cores + } + + memory { + dedicated = var.memory + floating = var.memory_floating + } + + disk { + datastore_id = var.datastore_id + interface = "scsi0" + iothread = true + discard = "on" + size = var.disk_size + } + + network_device { + bridge = var.network_bridge + } + + initialization { + datastore_id = var.datastore_id + ip_config { + ipv4 { + address = "${var.bridge_ip}/24" + gateway = var.gateway + } + } + user_account { + username = var.username + password = var.password + keys = [trimspace(file(var.ssh_key_path))] + } + } +} + +# Firewall configuration - always manage options to explicitly enable/disable +resource "proxmox_virtual_environment_firewall_options" "vm" { + node_name = var.node_name + vm_id = proxmox_virtual_environment_vm.vm.vm_id + + enabled = var.firewall_enabled + input_policy = var.firewall_enabled ? var.firewall_input_policy : "ACCEPT" + output_policy = var.firewall_enabled ? var.firewall_output_policy : "ACCEPT" +} + +resource "proxmox_virtual_environment_firewall_rules" "vm" { + count = var.firewall_enabled ? 1 : 0 + + node_name = var.node_name + vm_id = proxmox_virtual_environment_vm.vm.vm_id + + rule { + security_group = var.firewall_security_group + } + + depends_on = [proxmox_virtual_environment_firewall_options.vm] +} diff --git a/terraform/modules/vm/outputs.tf b/terraform/modules/vm/outputs.tf new file mode 100644 index 0000000..108f8de --- /dev/null +++ b/terraform/modules/vm/outputs.tf @@ -0,0 +1,14 @@ +output "vm_id" { + value = proxmox_virtual_environment_vm.vm.vm_id + description = "The Proxmox VM ID" +} + +output "ip_address" { + value = var.bridge_ip + description = "The bridge IP address" +} + +output "name" { + value = proxmox_virtual_environment_vm.vm.name + description = "The VM name" +} diff --git a/terraform/modules/vm/variables.tf b/terraform/modules/vm/variables.tf new file mode 100644 index 0000000..d9b427c --- /dev/null +++ b/terraform/modules/vm/variables.tf @@ -0,0 +1,107 @@ +variable "name" { + type = string + description = "VM name" +} + +variable "vmid" { + type = number + description = "Proxmox VM ID" +} + +variable "node_name" { + type = string + description = "Proxmox node name" +} + +variable "bridge_ip" { + type = string + description = "IP address on bridge network (without CIDR)" +} + +variable "gateway" { + type = string + default = "192.168.100.1" + description = "Gateway IP address" +} + +variable "network_bridge" { + type = string + default = "vmbr1" + description = "Network bridge name" +} + +variable "datastore_id" { + type = string + default = "local-zfs" + description = "Proxmox datastore for disks" +} + +variable "cores" { + type = number + default = 1 + description = "Number of CPU cores" +} + +variable "memory" { + type = number + default = 2048 + description = "Memory in MB" +} + +variable "memory_floating" { + type = number + default = null + description = "Floating memory (ballooning) in MB" +} + +variable "disk_size" { + type = number + default = 50 + description = "Disk size in GB" +} + +variable "clone_vmid" { + type = number + default = 9000 + description = "Template VM ID to clone from" +} + +variable "username" { + type = string + description = "VM user account name" +} + +variable "password" { + type = string + sensitive = true + description = "VM user account password" +} + +variable "ssh_key_path" { + type = string + description = "Path to SSH public key file" +} + +variable "firewall_enabled" { + type = bool + default = true + description = "Enable Proxmox firewall for this VM" +} + +variable "firewall_security_group" { + type = string + default = "base-egress" + description = "Security group to assign (base-egress, infrastructure, restricted)" +} + +variable "firewall_input_policy" { + type = string + default = "DROP" + description = "Default policy for inbound traffic" +} + +variable "firewall_output_policy" { + type = string + default = "DROP" + description = "Default policy for outbound traffic" +} diff --git a/terraform/providers.tf b/terraform/providers.tf new file mode 100644 index 0000000..74f7756 --- /dev/null +++ b/terraform/providers.tf @@ -0,0 +1,14 @@ +terraform { + required_providers { + proxmox = { + source = "bpg/proxmox" + version = "0.66.1" + } + } +} + +provider "proxmox" { + endpoint = var.proxmox_endpoint + api_token = "${var.proxmox_api_token_id}=${var.proxmox_api_token_secret}" + insecure = var.proxmox_insecure +} diff --git a/terraform/services.tf b/terraform/services.tf new file mode 100644 index 0000000..0215ab2 --- /dev/null +++ b/terraform/services.tf @@ -0,0 +1,78 @@ +# Trusted Services (2000-2999) +# +# Infrastructure services that support development and operations. +# All VMs in this tier use the "infrastructure" Nebula group. +# +# VMs: +# 2000 vault-01 192.168.100.20 - Vault cluster node 1 +# 2001 vault-02 192.168.100.21 - Vault cluster node 2 +# 2002 vault-03 192.168.100.22 - Vault cluster node 3 +# 2003 gitea 192.168.100.23 - Git hosting + +module "vault-01" { + source = "./modules/vm" + name = "vault-01" + vmid = 2000 + node_name = var.proxmox_node + bridge_ip = "192.168.100.20" + gateway = var.gateway + datastore_id = var.datastore_id + clone_vmid = var.template_vmid + cores = 2 + memory = 2048 + disk_size = 50 + username = var.username + password = var.password + ssh_key_path = var.ssh_key_path +} + +module "vault-02" { + source = "./modules/vm" + name = "vault-02" + vmid = 2001 + node_name = var.proxmox_node + bridge_ip = "192.168.100.21" + gateway = var.gateway + datastore_id = var.datastore_id + clone_vmid = var.template_vmid + cores = 2 + memory = 2048 + disk_size = 50 + username = var.username + password = var.password + ssh_key_path = var.ssh_key_path +} + +module "vault-03" { + source = "./modules/vm" + name = "vault-03" + vmid = 2002 + node_name = var.proxmox_node + bridge_ip = "192.168.100.22" + gateway = var.gateway + datastore_id = var.datastore_id + clone_vmid = var.template_vmid + cores = 2 + memory = 2048 + disk_size = 50 + username = var.username + password = var.password + ssh_key_path = var.ssh_key_path +} + +module "gitea" { + source = "./modules/vm" + name = "gitea" + vmid = 2003 + node_name = var.proxmox_node + bridge_ip = "192.168.100.23" + gateway = var.gateway + datastore_id = var.datastore_id + clone_vmid = var.template_vmid + cores = 2 + memory = 2048 + disk_size = 100 + username = var.username + password = var.password + ssh_key_path = var.ssh_key_path +} diff --git a/terraform/terraform.tfvars.example b/terraform/terraform.tfvars.example new file mode 100644 index 0000000..71bed6c --- /dev/null +++ b/terraform/terraform.tfvars.example @@ -0,0 +1,13 @@ +# Proxmox Connection +proxmox_endpoint = "https://proxmox.example:8006/" +proxmox_api_token_id = "terraform@pve!terraform" +proxmox_api_token_secret = "your-api-token-secret-here" +proxmox_insecure = true +proxmox_node = "pve" + +# VM Defaults +username = "admin" +password = "changeme" +ssh_key_path = "~/.ssh/id_ed25519.pub" +datastore_id = "local-zfs" +template_vmid = 9000 diff --git a/terraform/vars.tf b/terraform/vars.tf new file mode 100644 index 0000000..59f7001 --- /dev/null +++ b/terraform/vars.tf @@ -0,0 +1,74 @@ +# ============================================================================= +# Proxmox Connection +# ============================================================================= + +variable "proxmox_endpoint" { + type = string + description = "Proxmox API endpoint (e.g., https://proxmox.example:8006/)" +} + +variable "proxmox_api_token_id" { + type = string + description = "Proxmox API token ID (e.g., terraform@pve!terraform)" +} + +variable "proxmox_api_token_secret" { + type = string + sensitive = true + description = "Proxmox API token secret" +} + +variable "proxmox_insecure" { + type = bool + default = true + description = "Skip TLS verification for self-signed certificates" +} + +variable "proxmox_node" { + type = string + description = "Proxmox node name to deploy VMs on" +} + +# ============================================================================= +# VM Defaults +# ============================================================================= + +variable "username" { + type = string + description = "Default VM user account name" +} + +variable "password" { + type = string + sensitive = true + description = "Default VM user account password" +} + +variable "ssh_key_path" { + type = string + description = "Path to SSH public key file" +} + +variable "datastore_id" { + type = string + default = "local-zfs" + description = "Default Proxmox datastore for VM disks" +} + +variable "network_bridge" { + type = string + default = "vmbr1" + description = "Default network bridge for VMs" +} + +variable "gateway" { + type = string + default = "192.168.100.1" + description = "Default gateway for VMs" +} + +variable "template_vmid" { + type = number + default = 9000 + description = "Template VM ID to clone from" +}