public-ready-init
This commit is contained in:
commit
0c77379ab3
42
.gitignore
vendored
Normal file
42
.gitignore
vendored
Normal file
@ -0,0 +1,42 @@
|
||||
# Terraform
|
||||
terraform/.terraform/
|
||||
terraform/*.tfstate
|
||||
terraform/*.tfstate.*
|
||||
terraform/*.tfvars
|
||||
!terraform/*.tfvars.example
|
||||
terraform/crash.log
|
||||
|
||||
# Ansible
|
||||
ansible/inventory.ini
|
||||
!ansible/inventory.ini.example
|
||||
ansible/services.yml
|
||||
!ansible/services.yml.example
|
||||
ansible/vault/secrets.yml
|
||||
ansible/vault/*pass*
|
||||
ansible/databases/
|
||||
|
||||
# Nebula
|
||||
nebula/ca.key
|
||||
nebula/**/*.key
|
||||
*.key
|
||||
*.pem
|
||||
|
||||
# Secrets
|
||||
secrets/
|
||||
.env
|
||||
.env.*
|
||||
token.txt
|
||||
|
||||
# Backups
|
||||
backup/
|
||||
|
||||
# IDE
|
||||
.idea/
|
||||
.vscode/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
146
README.md
Normal file
146
README.md
Normal file
@ -0,0 +1,146 @@
|
||||
# Arvandor
|
||||
|
||||
Production-grade infrastructure-as-code for running services on Proxmox with enterprise HA patterns.
|
||||
|
||||
## Overview
|
||||
|
||||
Arvandor provides a complete infrastructure stack:
|
||||
|
||||
- **Terraform** - VM provisioning on Proxmox
|
||||
- **Ansible** - Configuration management
|
||||
- **Nebula** - Encrypted overlay network
|
||||
- **Vault** - Secrets management (3-node Raft cluster)
|
||||
- **PostgreSQL** - Database (3-node Patroni + etcd)
|
||||
- **Valkey** - Cache/queue (3-node Sentinel)
|
||||
- **Garage** - S3-compatible storage (3-node cluster)
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────┐
|
||||
│ Proxmox Host │
|
||||
├─────────────────────────────────────────────────────────────────────────┤
|
||||
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||
│ │ Management │ │ Services │ │ Data │ │ Workloads │ │
|
||||
│ │ 1000-1999 │ │ 2000-2999 │ │ 3000-3999 │ │ 4000-4999 │ │
|
||||
│ │ │ │ │ │ │ │ │ │
|
||||
│ │ DNS, Caddy │ │ Vault │ │ PostgreSQL │ │ Your Apps │ │
|
||||
│ │ Lighthouse │ │ Gitea │ │ Valkey │ │ │ │
|
||||
│ │ │ │ │ │ Garage │ │ │ │
|
||||
│ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ │
|
||||
│ │ │ │ │ │
|
||||
│ └────────────────┴────────────────┴────────────────┘ │
|
||||
│ │ │
|
||||
│ Nebula Overlay (10.10.10.0/24) │
|
||||
└─────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
### 1. Prerequisites
|
||||
|
||||
- Proxmox VE host
|
||||
- Arch Linux VM template (VMID 9000)
|
||||
- Terraform, Ansible installed locally
|
||||
- Nebula binary for certificate generation
|
||||
|
||||
### 2. Configure
|
||||
|
||||
```bash
|
||||
# Clone repository
|
||||
git clone <repo-url> arvandor
|
||||
cd arvandor
|
||||
|
||||
# Configure Terraform
|
||||
cp terraform/terraform.tfvars.example terraform/terraform.tfvars
|
||||
vim terraform/terraform.tfvars
|
||||
|
||||
# Configure Ansible
|
||||
cp ansible/inventory.ini.example ansible/inventory.ini
|
||||
vim ansible/inventory.ini
|
||||
|
||||
# Generate Nebula CA
|
||||
cd nebula
|
||||
nebula-cert ca -name "Arvandor CA"
|
||||
```
|
||||
|
||||
### 3. Provision
|
||||
|
||||
```bash
|
||||
# Create VMs
|
||||
cd terraform
|
||||
terraform init
|
||||
terraform plan
|
||||
terraform apply
|
||||
|
||||
# Bootstrap VMs (in order)
|
||||
cd ../ansible
|
||||
ansible-playbook -i inventory.ini playbooks/bootstrap.yml
|
||||
ansible-playbook -i inventory.ini playbooks/security.yml
|
||||
ansible-playbook -i inventory.ini playbooks/nebula.yml
|
||||
```
|
||||
|
||||
### 4. Deploy Services
|
||||
|
||||
```bash
|
||||
# DNS server
|
||||
ansible-playbook -i inventory.ini playbooks/dns.yml
|
||||
|
||||
# PostgreSQL HA cluster
|
||||
ansible-playbook -i inventory.ini playbooks/postgres-ha.yml
|
||||
|
||||
# Valkey Sentinel
|
||||
ansible-playbook -i inventory.ini playbooks/valkey-sentinel.yml
|
||||
|
||||
# Garage S3
|
||||
ansible-playbook -i inventory.ini playbooks/garage.yml
|
||||
```
|
||||
|
||||
## Directory Structure
|
||||
|
||||
```
|
||||
arvandor/
|
||||
├── terraform/ # VM provisioning
|
||||
│ ├── modules/vm/ # Reusable VM module
|
||||
│ ├── management.tf # DNS, Caddy
|
||||
│ ├── services.tf # Vault, Gitea
|
||||
│ └── data.tf # PostgreSQL, Valkey, Garage
|
||||
├── ansible/ # Configuration management
|
||||
│ ├── playbooks/ # Core playbooks
|
||||
│ ├── templates/ # Jinja2 templates
|
||||
│ └── vault/ # Ansible Vault secrets
|
||||
├── nebula/ # Overlay network
|
||||
│ └── configs/ # Per-host certificates
|
||||
├── network/ # Host networking
|
||||
└── docs/ # Documentation
|
||||
```
|
||||
|
||||
## Network Design
|
||||
|
||||
### Two-Network Model
|
||||
|
||||
| Network | CIDR | Purpose |
|
||||
|---------|------|---------|
|
||||
| Bridge (vmbr1) | 192.168.100.0/24 | Provisioning only |
|
||||
| Nebula | 10.10.10.0/24 | All application traffic |
|
||||
|
||||
VMs only accept traffic from the Proxmox host (for Ansible) and the Nebula overlay. This provides isolation even if someone gains bridge network access.
|
||||
|
||||
### Security Groups (Nebula)
|
||||
|
||||
| Group | Purpose |
|
||||
|-------|---------|
|
||||
| `admin` | Full access (your devices) |
|
||||
| `infrastructure` | Core services |
|
||||
| `projects` | Application workloads |
|
||||
| `games` | Isolated game servers |
|
||||
|
||||
## Documentation
|
||||
|
||||
- [Getting Started](docs/getting-started.md) - Detailed setup guide
|
||||
- [Architecture](docs/architecture.md) - Design decisions
|
||||
- [Provisioning Guide](docs/provisioning-guide.md) - Adding new VMs
|
||||
|
||||
## License
|
||||
|
||||
MIT
|
||||
20
ansible/.gitignore
vendored
Normal file
20
ansible/.gitignore
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
# Inventory (contains IPs and hostnames)
|
||||
inventory.ini
|
||||
!inventory.ini.example
|
||||
|
||||
# Services (contains real service configs)
|
||||
services.yml
|
||||
!services.yml.example
|
||||
|
||||
# Ansible Vault secrets
|
||||
vault/secrets.yml
|
||||
vault/ansible_vault_pass
|
||||
vault/*.pass
|
||||
|
||||
# Database dumps
|
||||
databases/
|
||||
|
||||
# SSH keys
|
||||
*.key
|
||||
*.pem
|
||||
id_*
|
||||
89
ansible/inventory.ini.example
Normal file
89
ansible/inventory.ini.example
Normal file
@ -0,0 +1,89 @@
|
||||
# Arvandor Infrastructure Inventory
|
||||
#
|
||||
# Groups:
|
||||
# infrastructure - Core services (Nebula group: infrastructure)
|
||||
# projects - Application workloads (Nebula group: projects)
|
||||
# games - Game servers (Nebula group: games)
|
||||
# all - All managed VMs
|
||||
#
|
||||
# Variables per host:
|
||||
# ansible_host - Bridge network IP (for SSH via Proxmox jump)
|
||||
# nebula_ip - Overlay network IP (for inter-service communication)
|
||||
# vmid - Proxmox VM ID (used for cert paths)
|
||||
|
||||
[infrastructure]
|
||||
dns ansible_host=192.168.100.11 nebula_ip=10.10.10.11 vmid=1001
|
||||
caddy ansible_host=192.168.100.12 nebula_ip=10.10.10.12 vmid=1002
|
||||
vault-01 ansible_host=192.168.100.20 nebula_ip=10.10.10.20 vmid=2000
|
||||
vault-02 ansible_host=192.168.100.21 nebula_ip=10.10.10.21 vmid=2001
|
||||
vault-03 ansible_host=192.168.100.22 nebula_ip=10.10.10.22 vmid=2002
|
||||
gitea ansible_host=192.168.100.23 nebula_ip=10.10.10.23 vmid=2003
|
||||
postgres-01 ansible_host=192.168.100.30 nebula_ip=10.10.10.30 vmid=3000
|
||||
postgres-02 ansible_host=192.168.100.31 nebula_ip=10.10.10.31 vmid=3001
|
||||
postgres-03 ansible_host=192.168.100.32 nebula_ip=10.10.10.32 vmid=3002
|
||||
valkey-01 ansible_host=192.168.100.33 nebula_ip=10.10.10.33 vmid=3003
|
||||
valkey-02 ansible_host=192.168.100.34 nebula_ip=10.10.10.34 vmid=3004
|
||||
valkey-03 ansible_host=192.168.100.35 nebula_ip=10.10.10.35 vmid=3005
|
||||
garage-01 ansible_host=192.168.100.39 nebula_ip=10.10.10.39 vmid=3009
|
||||
garage-02 ansible_host=192.168.100.40 nebula_ip=10.10.10.40 vmid=3010
|
||||
garage-03 ansible_host=192.168.100.41 nebula_ip=10.10.10.41 vmid=3011
|
||||
|
||||
[projects]
|
||||
app-server ansible_host=192.168.100.50 nebula_ip=10.10.10.50 vmid=4050
|
||||
|
||||
[games]
|
||||
# Example game servers (firewall disabled, use host DNAT + guest ufw)
|
||||
# minecraft ansible_host=192.168.100.52 nebula_ip=10.10.10.52 vmid=4052
|
||||
|
||||
[docker]
|
||||
gitea
|
||||
app-server
|
||||
|
||||
[vault]
|
||||
vault-01
|
||||
vault-02
|
||||
vault-03
|
||||
|
||||
# PostgreSQL HA Cluster (Patroni + etcd)
|
||||
[postgres]
|
||||
postgres-01
|
||||
postgres-02
|
||||
postgres-03
|
||||
|
||||
# Valkey Sentinel (1 master + 2 replicas)
|
||||
[valkey]
|
||||
valkey-01
|
||||
valkey-02
|
||||
valkey-03
|
||||
|
||||
# Garage S3 Cluster
|
||||
[garage]
|
||||
garage-01
|
||||
garage-02
|
||||
garage-03
|
||||
|
||||
[all:children]
|
||||
infrastructure
|
||||
projects
|
||||
games
|
||||
|
||||
[all:vars]
|
||||
# SSH jumps through the Proxmox host to reach VMs on bridge network
|
||||
ansible_user=admin
|
||||
ansible_ssh_common_args='-o ProxyCommand="ssh -W %h:%p -q admin@10.10.10.1"'
|
||||
ansible_ssh_extra_args='-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
|
||||
ansible_python_interpreter=/usr/bin/python3
|
||||
|
||||
# Infrastructure context (update these for your environment)
|
||||
lighthouse_nebula_ip=10.10.10.10
|
||||
lighthouse_bridge_ip=192.168.100.10
|
||||
lighthouse_public_ip=203.0.113.10
|
||||
|
||||
# Optional: secondary lighthouse for reduced latency
|
||||
home_lighthouse_nebula_ip=10.10.10.102
|
||||
home_lighthouse_public_ip=203.0.113.20
|
||||
proxmox_host_ip=192.168.100.1
|
||||
bridge_network=192.168.100.0/24
|
||||
|
||||
# Caddy reverse proxy (for iptables rules)
|
||||
caddy_nebula_ip=10.10.10.12
|
||||
63
ansible/playbooks/bootstrap.yml
Normal file
63
ansible/playbooks/bootstrap.yml
Normal file
@ -0,0 +1,63 @@
|
||||
---
|
||||
# Bootstrap playbook for new VMs
|
||||
#
|
||||
# Run FIRST on newly provisioned VMs before security/nebula playbooks.
|
||||
# Updates system packages and reboots if kernel changed.
|
||||
#
|
||||
# Usage: ansible-playbook -i inventory.ini playbooks/bootstrap.yml --limit "new-vm"
|
||||
|
||||
- name: Bootstrap New VMs
|
||||
hosts: all
|
||||
become: true
|
||||
tasks:
|
||||
- name: Initialize pacman keyring
|
||||
command: pacman-key --init
|
||||
args:
|
||||
creates: /etc/pacman.d/gnupg/trustdb.gpg
|
||||
|
||||
- name: Populate pacman keyring with Arch Linux keys
|
||||
command: pacman-key --populate archlinux
|
||||
register: populate_result
|
||||
changed_when: "'locally signed' in populate_result.stdout"
|
||||
|
||||
- name: Update archlinux-keyring package first
|
||||
community.general.pacman:
|
||||
name: archlinux-keyring
|
||||
state: latest
|
||||
update_cache: true
|
||||
|
||||
- name: Get current running kernel version
|
||||
command: uname -r
|
||||
register: running_kernel
|
||||
changed_when: false
|
||||
|
||||
- name: Update all packages
|
||||
community.general.pacman:
|
||||
update_cache: true
|
||||
upgrade: true
|
||||
register: update_result
|
||||
|
||||
- name: Install essential packages
|
||||
community.general.pacman:
|
||||
name:
|
||||
- rsync
|
||||
state: present
|
||||
|
||||
- name: Get installed kernel version
|
||||
shell: pacman -Q linux | awk '{print $2}' | sed 's/\.arch/-arch/'
|
||||
register: installed_kernel
|
||||
changed_when: false
|
||||
|
||||
- name: Check if reboot is needed (kernel updated)
|
||||
set_fact:
|
||||
reboot_needed: "{{ running_kernel.stdout not in installed_kernel.stdout }}"
|
||||
|
||||
- name: Display kernel status
|
||||
debug:
|
||||
msg: "Running: {{ running_kernel.stdout }}, Installed: {{ installed_kernel.stdout }}, Reboot needed: {{ reboot_needed }}"
|
||||
|
||||
- name: Reboot if kernel was updated
|
||||
reboot:
|
||||
msg: "Kernel updated, rebooting"
|
||||
reboot_timeout: 300
|
||||
when: reboot_needed | bool
|
||||
337
ansible/playbooks/data-service.yml
Normal file
337
ansible/playbooks/data-service.yml
Normal file
@ -0,0 +1,337 @@
|
||||
---
|
||||
# Data Service Provisioning Playbook
|
||||
#
|
||||
# Provisions PostgreSQL database, Valkey ACL user, Garage S3 bucket/key,
|
||||
# and Vault credentials for a service defined in services.yml.
|
||||
#
|
||||
# Usage:
|
||||
# ansible-playbook -i inventory.ini playbooks/data-service.yml -e "service=myapp"
|
||||
#
|
||||
# With database restore:
|
||||
# ansible-playbook -i inventory.ini playbooks/data-service.yml -e "service=myapp" -e "restore=true"
|
||||
#
|
||||
# Prerequisites:
|
||||
# - postgres-primary running (run playbooks/postgres.yml first)
|
||||
# - valkey-primary running with ACLs (run playbooks/valkey.yml first)
|
||||
# - Vault cluster initialized and unsealed (run playbooks/vault.yml first)
|
||||
# - Database secrets engine enabled: vault secrets enable database
|
||||
# - VAULT_ADDR and VAULT_TOKEN environment variables set
|
||||
|
||||
- name: Load Service Configuration
|
||||
hosts: localhost
|
||||
gather_facts: false
|
||||
vars_files:
|
||||
- ../services.yml
|
||||
tasks:
|
||||
- name: Validate service parameter
|
||||
fail:
|
||||
msg: "Service '{{ service }}' not found in services.yml"
|
||||
when: service not in services
|
||||
|
||||
- name: Set service facts
|
||||
set_fact:
|
||||
svc: "{{ services[service] }}"
|
||||
postgres_enabled: "{{ services[service].postgres.enabled | default(false) }}"
|
||||
valkey_enabled: "{{ services[service].valkey.enabled | default(false) }}"
|
||||
s3_enabled: "{{ services[service].s3.enabled | default(false) }}"
|
||||
vault_roles: "{{ services[service].vault_roles | default(['app', 'migrate']) }}"
|
||||
|
||||
- name: Display service info
|
||||
debug:
|
||||
msg: |
|
||||
Service: {{ service }}
|
||||
Description: {{ svc.description }}
|
||||
PostgreSQL: {{ postgres_enabled }}
|
||||
Valkey: {{ valkey_enabled }} (prefix: {{ svc.valkey.key_prefix | default(service) }}:*)
|
||||
S3: {{ s3_enabled }} (bucket: {{ svc.s3.bucket | default(service + '-media') }})
|
||||
Vault roles: {{ vault_roles | join(', ') }}
|
||||
|
||||
- name: Setup PostgreSQL Database and Roles
|
||||
hosts: postgres-01
|
||||
become: true
|
||||
vars_files:
|
||||
- ../vault/secrets.yml
|
||||
- ../services.yml
|
||||
vars:
|
||||
svc: "{{ services[service] }}"
|
||||
tasks:
|
||||
- name: Skip if PostgreSQL not enabled
|
||||
meta: end_host
|
||||
when: not (svc.postgres.enabled | default(false))
|
||||
|
||||
- name: Check if database exists
|
||||
become_user: postgres
|
||||
shell: psql -tAc "SELECT 1 FROM pg_database WHERE datname='{{ service }}'"
|
||||
register: db_exists
|
||||
changed_when: false
|
||||
|
||||
- name: Template static roles SQL
|
||||
template:
|
||||
src: ../templates/pg-static-roles.sql.j2
|
||||
dest: "/tmp/{{ service }}-roles.sql"
|
||||
mode: '0644'
|
||||
when: db_exists.stdout != "1"
|
||||
|
||||
- name: Create database and static roles
|
||||
become_user: postgres
|
||||
shell: psql -f /tmp/{{ service }}-roles.sql
|
||||
when: db_exists.stdout != "1"
|
||||
|
||||
- name: Create common extensions (requires superuser)
|
||||
become_user: postgres
|
||||
shell: |
|
||||
psql -d {{ service }} -c "CREATE EXTENSION IF NOT EXISTS btree_gist;"
|
||||
psql -d {{ service }} -c 'CREATE EXTENSION IF NOT EXISTS "uuid-ossp";'
|
||||
when: db_exists.stdout != "1"
|
||||
|
||||
- name: Clean up SQL file
|
||||
file:
|
||||
path: "/tmp/{{ service }}-roles.sql"
|
||||
state: absent
|
||||
|
||||
- name: Check for dump file
|
||||
delegate_to: localhost
|
||||
become: false
|
||||
stat:
|
||||
path: "{{ playbook_dir }}/../{{ svc.postgres.restore_from }}"
|
||||
register: dump_file
|
||||
when: restore | default(false) | bool
|
||||
|
||||
- name: Copy dump to server
|
||||
copy:
|
||||
src: "{{ playbook_dir }}/../{{ svc.postgres.restore_from }}"
|
||||
dest: "/tmp/{{ service }}.dump"
|
||||
mode: '0644'
|
||||
when:
|
||||
- restore | default(false) | bool
|
||||
- dump_file.stat.exists | default(false)
|
||||
|
||||
- name: Restore database from dump
|
||||
become_user: postgres
|
||||
shell: pg_restore --no-owner --no-privileges -d {{ service }} /tmp/{{ service }}.dump
|
||||
when:
|
||||
- restore | default(false) | bool
|
||||
- dump_file.stat.exists | default(false)
|
||||
ignore_errors: true # May fail if data already exists
|
||||
|
||||
- name: Clean up dump file
|
||||
file:
|
||||
path: "/tmp/{{ service }}.dump"
|
||||
state: absent
|
||||
when: restore | default(false) | bool
|
||||
|
||||
- name: Setup Valkey ACL User
|
||||
hosts: valkey-01
|
||||
become: true
|
||||
vars_files:
|
||||
- ../vault/secrets.yml
|
||||
- ../services.yml
|
||||
vars:
|
||||
svc: "{{ services[service] }}"
|
||||
valkey_nebula_ip: "{{ hostvars['valkey-01']['nebula_ip'] }}"
|
||||
tasks:
|
||||
- name: Skip if Valkey not enabled
|
||||
meta: end_host
|
||||
when: not (svc.valkey.enabled | default(false))
|
||||
|
||||
- name: Generate service password
|
||||
set_fact:
|
||||
valkey_service_password: "{{ lookup('password', '/dev/null length=32 chars=hexdigits') }}"
|
||||
|
||||
- name: Check if ACL user exists
|
||||
command: valkey-cli -h {{ valkey_nebula_ip }} --user admin --pass {{ valkey_admin_password }} ACL GETUSER {{ service }}
|
||||
register: acl_user_check
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
no_log: true
|
||||
|
||||
- name: Create ACL user for service
|
||||
shell: |
|
||||
valkey-cli -h {{ valkey_nebula_ip }} --user admin --pass {{ valkey_admin_password }} \
|
||||
ACL SETUSER {{ service }} on '>{{ valkey_service_password }}' '~{{ svc.valkey.key_prefix | default(service) }}:*' '&*' '+@all'
|
||||
when: acl_user_check.rc != 0
|
||||
no_log: true
|
||||
|
||||
- name: Update ACL user password if exists
|
||||
shell: |
|
||||
valkey-cli -h {{ valkey_nebula_ip }} --user admin --pass {{ valkey_admin_password }} \
|
||||
ACL SETUSER {{ service }} on '>{{ valkey_service_password }}' '~{{ svc.valkey.key_prefix | default(service) }}:*' '&*' '+@all'
|
||||
when: acl_user_check.rc == 0
|
||||
no_log: true
|
||||
|
||||
- name: Persist ACL to disk
|
||||
command: valkey-cli -h {{ valkey_nebula_ip }} --user admin --pass {{ valkey_admin_password }} ACL SAVE
|
||||
no_log: true
|
||||
|
||||
- name: Store credentials in Vault
|
||||
delegate_to: localhost
|
||||
become: false
|
||||
shell: |
|
||||
vault kv put secret/{{ service }}/valkey \
|
||||
host={{ valkey_nebula_ip }} \
|
||||
port=6379 \
|
||||
username={{ service }} \
|
||||
password={{ valkey_service_password }} \
|
||||
key_prefix={{ svc.valkey.key_prefix | default(service) }}
|
||||
environment:
|
||||
VAULT_ADDR: "{{ lookup('env', 'VAULT_ADDR') | default('http://' + hostvars['vault-01']['nebula_ip'] + ':8200', true) }}"
|
||||
VAULT_TOKEN: "{{ lookup('env', 'VAULT_TOKEN') }}"
|
||||
no_log: true
|
||||
|
||||
- name: Setup Garage S3 Bucket and Key
|
||||
hosts: garage-01
|
||||
become: true
|
||||
vars_files:
|
||||
- ../services.yml
|
||||
vars:
|
||||
svc: "{{ services[service] }}"
|
||||
garage_nebula_ip: "{{ hostvars['garage-01']['nebula_ip'] }}"
|
||||
tasks:
|
||||
- name: Skip if S3 not enabled
|
||||
meta: end_host
|
||||
when: not (svc.s3.enabled | default(false))
|
||||
|
||||
- name: Set bucket name
|
||||
set_fact:
|
||||
bucket_name: "{{ svc.s3.bucket | default(service + '-media') }}"
|
||||
|
||||
- name: Check if bucket exists
|
||||
command: garage -c /etc/garage/garage.toml bucket list
|
||||
register: bucket_list
|
||||
changed_when: false
|
||||
|
||||
- name: Create bucket if needed
|
||||
command: garage -c /etc/garage/garage.toml bucket create {{ bucket_name }}
|
||||
when: bucket_name not in bucket_list.stdout
|
||||
|
||||
- name: Check if key exists
|
||||
command: garage -c /etc/garage/garage.toml key list
|
||||
register: key_list
|
||||
changed_when: false
|
||||
|
||||
- name: Create API key for service
|
||||
command: garage -c /etc/garage/garage.toml key create {{ service }}-key
|
||||
register: key_create
|
||||
when: (service + '-key') not in key_list.stdout
|
||||
|
||||
- name: Get key info
|
||||
command: garage -c /etc/garage/garage.toml key info {{ service }}-key --show-secret
|
||||
register: key_info
|
||||
changed_when: false
|
||||
no_log: true
|
||||
|
||||
- name: Parse key credentials
|
||||
set_fact:
|
||||
s3_access_key: "{{ key_info.stdout | regex_search('Key ID: ([A-Za-z0-9]+)', '\\1') | first }}"
|
||||
s3_secret_key: "{{ key_info.stdout | regex_search('Secret key: ([a-f0-9]+)', '\\1') | first }}"
|
||||
no_log: true
|
||||
|
||||
- name: Grant bucket permissions to key
|
||||
command: >
|
||||
garage -c /etc/garage/garage.toml bucket allow {{ bucket_name }}
|
||||
--read --write --key {{ service }}-key
|
||||
register: bucket_allow
|
||||
changed_when: "'already' not in bucket_allow.stderr"
|
||||
|
||||
- name: Store S3 credentials in Vault
|
||||
delegate_to: localhost
|
||||
become: false
|
||||
shell: |
|
||||
vault kv put secret/{{ service }}/s3 \
|
||||
access_key={{ s3_access_key }} \
|
||||
secret_key={{ s3_secret_key }} \
|
||||
bucket={{ bucket_name }} \
|
||||
endpoint=http://{{ garage_nebula_ip }}:3900
|
||||
environment:
|
||||
VAULT_ADDR: "{{ lookup('env', 'VAULT_ADDR') | default('http://' + hostvars['vault-01']['nebula_ip'] + ':8200', true) }}"
|
||||
VAULT_TOKEN: "{{ lookup('env', 'VAULT_TOKEN') }}"
|
||||
no_log: true
|
||||
|
||||
- name: Configure Vault Database Credentials
|
||||
hosts: localhost
|
||||
gather_facts: false
|
||||
vars_files:
|
||||
- ../vault/secrets.yml
|
||||
- ../services.yml
|
||||
vars:
|
||||
svc: "{{ services[service] }}"
|
||||
postgres_nebula_ip: "{{ hostvars['postgres-01']['nebula_ip'] }}"
|
||||
vault_nebula_ip: "{{ hostvars['vault-01']['nebula_ip'] }}"
|
||||
environment:
|
||||
VAULT_ADDR: "{{ vault_addr | default('http://' + vault_nebula_ip + ':8200') }}"
|
||||
tasks:
|
||||
- name: Skip if PostgreSQL not enabled
|
||||
meta: end_play
|
||||
when: not (svc.postgres.enabled | default(false))
|
||||
|
||||
- name: Check if VAULT_TOKEN is set
|
||||
fail:
|
||||
msg: "VAULT_TOKEN environment variable must be set"
|
||||
when: lookup('env', 'VAULT_TOKEN') == ''
|
||||
|
||||
- name: Configure Vault database connection
|
||||
shell: |
|
||||
vault write database/config/{{ service }} \
|
||||
plugin_name="postgresql-database-plugin" \
|
||||
allowed_roles="{{ service }}-app,{{ service }}-migrate" \
|
||||
connection_url="postgresql://{% raw %}{{username}}:{{password}}{% endraw %}@{{ postgres_nebula_ip }}:5432/{{ service }}" \
|
||||
username="vault_admin" \
|
||||
password="{{ vault_admin_password }}"
|
||||
register: vault_config
|
||||
changed_when: vault_config.rc == 0
|
||||
|
||||
- name: Create Vault app role
|
||||
shell: |
|
||||
vault write database/roles/{{ service }}-app \
|
||||
db_name="{{ service }}" \
|
||||
creation_statements="CREATE ROLE \"{% raw %}{{name}}{% endraw %}\" WITH LOGIN PASSWORD '{% raw %}{{password}}{% endraw %}' VALID UNTIL '{% raw %}{{expiration}}{% endraw %}' INHERIT; GRANT {{ service }}_app TO \"{% raw %}{{name}}{% endraw %}\"; ALTER ROLE \"{% raw %}{{name}}{% endraw %}\" SET ROLE = {{ service }}_app;" \
|
||||
revocation_statements="REASSIGN OWNED BY \"{% raw %}{{name}}{% endraw %}\" TO {{ service }}_owner; REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA public FROM \"{% raw %}{{name}}{% endraw %}\"; REVOKE ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public FROM \"{% raw %}{{name}}{% endraw %}\"; REVOKE USAGE ON SCHEMA public FROM \"{% raw %}{{name}}{% endraw %}\"; REVOKE CONNECT ON DATABASE {{ service }} FROM \"{% raw %}{{name}}{% endraw %}\"; DROP ROLE IF EXISTS \"{% raw %}{{name}}{% endraw %}\";" \
|
||||
default_ttl="1h" \
|
||||
max_ttl="24h"
|
||||
when: "'app' in (svc.vault_roles | default(['app', 'migrate']))"
|
||||
|
||||
- name: Create Vault migrate role
|
||||
shell: |
|
||||
vault write database/roles/{{ service }}-migrate \
|
||||
db_name="{{ service }}" \
|
||||
creation_statements="CREATE ROLE \"{% raw %}{{name}}{% endraw %}\" WITH LOGIN PASSWORD '{% raw %}{{password}}{% endraw %}' VALID UNTIL '{% raw %}{{expiration}}{% endraw %}' INHERIT; GRANT {{ service }}_migrate TO \"{% raw %}{{name}}{% endraw %}\"; ALTER ROLE \"{% raw %}{{name}}{% endraw %}\" SET ROLE = {{ service }}_migrate;" \
|
||||
revocation_statements="REASSIGN OWNED BY \"{% raw %}{{name}}{% endraw %}\" TO {{ service }}_owner; REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA public FROM \"{% raw %}{{name}}{% endraw %}\"; REVOKE ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public FROM \"{% raw %}{{name}}{% endraw %}\"; REVOKE ALL PRIVILEGES ON ALL FUNCTIONS IN SCHEMA public FROM \"{% raw %}{{name}}{% endraw %}\"; REVOKE ALL PRIVILEGES ON SCHEMA public FROM \"{% raw %}{{name}}{% endraw %}\"; REVOKE CONNECT ON DATABASE {{ service }} FROM \"{% raw %}{{name}}{% endraw %}\"; DROP ROLE IF EXISTS \"{% raw %}{{name}}{% endraw %}\";" \
|
||||
default_ttl="15m" \
|
||||
max_ttl="1h"
|
||||
when: "'migrate' in (svc.vault_roles | default(['app', 'migrate']))"
|
||||
|
||||
- name: Display Service Summary
|
||||
hosts: localhost
|
||||
gather_facts: false
|
||||
vars_files:
|
||||
- ../services.yml
|
||||
vars:
|
||||
svc: "{{ services[service] }}"
|
||||
postgres_ip: "{{ hostvars['postgres-01']['nebula_ip'] }}"
|
||||
valkey_ip: "{{ hostvars['valkey-01']['nebula_ip'] }}"
|
||||
garage_ip: "{{ hostvars['garage-01']['nebula_ip'] }}"
|
||||
tasks:
|
||||
- name: Service provisioning complete
|
||||
debug:
|
||||
msg:
|
||||
- "=========================================="
|
||||
- "Service: {{ service }}"
|
||||
- "Description: {{ svc.description }}"
|
||||
- "=========================================="
|
||||
- ""
|
||||
- "PostgreSQL:"
|
||||
- " Database: {{ service }} @ {{ postgres_ip }}:5432"
|
||||
- " App credentials: vault read database/creds/{{ service }}-app"
|
||||
- " Migrate credentials: vault read database/creds/{{ service }}-migrate"
|
||||
- ""
|
||||
- "Valkey:"
|
||||
- " Host: {{ valkey_ip }}:6379"
|
||||
- " User: {{ service }}"
|
||||
- " Key prefix: {{ svc.valkey.key_prefix | default(service) }}:*"
|
||||
- " Credentials: vault kv get secret/{{ service }}/valkey"
|
||||
- ""
|
||||
- "S3:"
|
||||
- " Bucket: {{ svc.s3.bucket | default(service + '-media') }} @ http://{{ garage_ip }}:3900"
|
||||
- " Credentials: vault kv get secret/{{ service }}/s3"
|
||||
- ""
|
||||
- "=========================================="
|
||||
35
ansible/playbooks/dns-client.yml
Normal file
35
ansible/playbooks/dns-client.yml
Normal file
@ -0,0 +1,35 @@
|
||||
---
|
||||
# DNS Client Configuration Playbook
|
||||
#
|
||||
# Usage: ansible-playbook -i inventory.ini playbooks/dns-client.yml
|
||||
#
|
||||
# Configures all VMs to use the internal Unbound DNS server.
|
||||
# Run AFTER dns.yml has configured the server.
|
||||
|
||||
- name: Configure DNS Clients
|
||||
hosts: all
|
||||
become: true
|
||||
vars:
|
||||
dns_server: "{{ hostvars['dns']['nebula_ip'] }}"
|
||||
tasks:
|
||||
- name: Configure resolv.conf to use internal DNS
|
||||
copy:
|
||||
dest: /etc/resolv.conf
|
||||
content: |
|
||||
# Managed by Ansible - changes will be overwritten
|
||||
# Internal DNS server on Nebula overlay
|
||||
nameserver {{ dns_server }}
|
||||
# Fallback to public DNS if internal is unreachable
|
||||
nameserver 1.1.1.1
|
||||
nameserver 8.8.8.8
|
||||
# Search domain for short hostnames
|
||||
search nebula
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
|
||||
- name: Test DNS resolution
|
||||
command: getent hosts lighthouse.nebula
|
||||
register: dns_test
|
||||
changed_when: false
|
||||
failed_when: dns_test.rc != 0
|
||||
77
ansible/playbooks/dns.yml
Normal file
77
ansible/playbooks/dns.yml
Normal file
@ -0,0 +1,77 @@
|
||||
---
|
||||
# Unbound DNS Server Playbook
|
||||
#
|
||||
# Usage: ansible-playbook -i inventory.ini playbooks/dns.yml
|
||||
#
|
||||
# Configures Unbound as a recursive resolver with local DNS records
|
||||
# for the Nebula overlay network.
|
||||
|
||||
- name: Setup Unbound DNS Server
|
||||
hosts: dns
|
||||
become: true
|
||||
tasks:
|
||||
- name: Install unbound and bind-tools
|
||||
community.general.pacman:
|
||||
name:
|
||||
- unbound
|
||||
- bind # provides dig for verification
|
||||
state: present
|
||||
|
||||
- name: Create unbound config directory
|
||||
file:
|
||||
path: /etc/unbound
|
||||
state: directory
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0755'
|
||||
|
||||
- name: Deploy main unbound configuration
|
||||
template:
|
||||
src: ../templates/unbound.conf.j2
|
||||
dest: /etc/unbound/unbound.conf
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
notify: Restart unbound
|
||||
|
||||
- name: Deploy local zones configuration
|
||||
template:
|
||||
src: ../templates/unbound-local-zones.conf.j2
|
||||
dest: /etc/unbound/local-zones.conf
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
notify: Restart unbound
|
||||
|
||||
- name: Deploy unbound systemd service
|
||||
template:
|
||||
src: ../templates/unbound.service.j2
|
||||
dest: /etc/systemd/system/unbound.service
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
notify:
|
||||
- Reload systemd
|
||||
- Restart unbound
|
||||
|
||||
- name: Enable and start unbound
|
||||
systemd:
|
||||
name: unbound
|
||||
state: started
|
||||
enabled: true
|
||||
|
||||
- name: Verify unbound is responding
|
||||
command: dig @127.0.0.1 dns.nebula +short
|
||||
register: dns_test
|
||||
changed_when: false
|
||||
failed_when: dns_test.stdout != hostvars['dns']['nebula_ip']
|
||||
|
||||
handlers:
|
||||
- name: Reload systemd
|
||||
systemd:
|
||||
daemon_reload: true
|
||||
|
||||
- name: Restart unbound
|
||||
systemd:
|
||||
name: unbound
|
||||
state: restarted
|
||||
50
ansible/playbooks/docker.yml
Normal file
50
ansible/playbooks/docker.yml
Normal file
@ -0,0 +1,50 @@
|
||||
---
|
||||
# Docker playbook for VMs that need containerization
|
||||
#
|
||||
# Usage: ansible-playbook -i inventory.ini playbooks/docker.yml --limit "docker"
|
||||
|
||||
- name: Install and Configure Docker
|
||||
hosts: docker
|
||||
become: true
|
||||
tasks:
|
||||
- name: Install Docker and Docker Compose
|
||||
community.general.pacman:
|
||||
name:
|
||||
- docker
|
||||
- docker-compose
|
||||
- docker-buildx
|
||||
state: present
|
||||
|
||||
- name: Create Docker daemon configuration directory
|
||||
file:
|
||||
path: /etc/docker
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Configure Docker daemon with DNS
|
||||
copy:
|
||||
content: |
|
||||
{
|
||||
"dns": ["1.1.1.1", "8.8.8.8"]
|
||||
}
|
||||
dest: /etc/docker/daemon.json
|
||||
mode: '0644'
|
||||
notify: Restart Docker
|
||||
|
||||
- name: Enable and start Docker service
|
||||
systemd:
|
||||
name: docker
|
||||
enabled: true
|
||||
state: started
|
||||
|
||||
- name: Add user to docker group
|
||||
user:
|
||||
name: "{{ ansible_user }}"
|
||||
groups: docker
|
||||
append: true
|
||||
|
||||
handlers:
|
||||
- name: Restart Docker
|
||||
systemd:
|
||||
name: docker
|
||||
state: restarted
|
||||
187
ansible/playbooks/garage.yml
Normal file
187
ansible/playbooks/garage.yml
Normal file
@ -0,0 +1,187 @@
|
||||
---
|
||||
# Garage S3 Cluster Setup (3 nodes, replication factor 3)
|
||||
#
|
||||
# Usage:
|
||||
# # Full deployment:
|
||||
# ansible-playbook -i inventory.ini playbooks/garage.yml
|
||||
#
|
||||
# # Just install/configure (no layout):
|
||||
# ansible-playbook -i inventory.ini playbooks/garage.yml --tags install
|
||||
#
|
||||
# # Just configure layout (after install):
|
||||
# ansible-playbook -i inventory.ini playbooks/garage.yml --tags layout
|
||||
|
||||
- name: Install and Configure Garage on All Nodes
|
||||
hosts: garage
|
||||
become: true
|
||||
tags: [install]
|
||||
vars_files:
|
||||
- ../vault/secrets.yml
|
||||
tasks:
|
||||
- name: Download Garage binary
|
||||
get_url:
|
||||
url: "https://garagehq.deuxfleurs.fr/_releases/v1.0.1/x86_64-unknown-linux-musl/garage"
|
||||
dest: /usr/local/bin/garage
|
||||
mode: '0755'
|
||||
|
||||
- name: Create garage user
|
||||
user:
|
||||
name: garage
|
||||
system: true
|
||||
shell: /sbin/nologin
|
||||
home: /var/lib/garage
|
||||
create_home: false
|
||||
|
||||
- name: Create garage directories
|
||||
file:
|
||||
path: "{{ item }}"
|
||||
state: directory
|
||||
owner: garage
|
||||
group: garage
|
||||
mode: '0750'
|
||||
loop:
|
||||
- /var/lib/garage
|
||||
- /var/lib/garage/meta
|
||||
- /var/lib/garage/data
|
||||
- /etc/garage
|
||||
|
||||
- name: Deploy garage configuration
|
||||
template:
|
||||
src: ../templates/garage.toml.j2
|
||||
dest: /etc/garage/garage.toml
|
||||
owner: garage
|
||||
group: garage
|
||||
mode: '0600'
|
||||
notify: restart garage
|
||||
|
||||
- name: Deploy garage systemd service
|
||||
copy:
|
||||
dest: /etc/systemd/system/garage.service
|
||||
content: |
|
||||
[Unit]
|
||||
Description=Garage S3-compatible object storage
|
||||
Documentation=https://garagehq.deuxfleurs.fr/
|
||||
After=network.target nebula.service
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=garage
|
||||
Group=garage
|
||||
ExecStart=/usr/local/bin/garage -c /etc/garage/garage.toml server
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
mode: '0644'
|
||||
notify:
|
||||
- reload systemd
|
||||
- restart garage
|
||||
|
||||
- name: Flush handlers to apply config before starting
|
||||
meta: flush_handlers
|
||||
|
||||
- name: Start and enable garage
|
||||
systemd:
|
||||
name: garage
|
||||
state: started
|
||||
enabled: true
|
||||
daemon_reload: true
|
||||
|
||||
- name: Wait for Garage RPC to be ready
|
||||
wait_for:
|
||||
host: "{{ nebula_ip }}"
|
||||
port: 3901
|
||||
timeout: 30
|
||||
|
||||
- name: Get node ID
|
||||
command: garage -c /etc/garage/garage.toml node id -q
|
||||
register: node_id
|
||||
changed_when: false
|
||||
|
||||
- name: Display node ID
|
||||
debug:
|
||||
msg: "Node {{ inventory_hostname }}: {{ node_id.stdout }}"
|
||||
|
||||
handlers:
|
||||
- name: reload systemd
|
||||
systemd:
|
||||
daemon_reload: true
|
||||
|
||||
- name: restart garage
|
||||
systemd:
|
||||
name: garage
|
||||
state: restarted
|
||||
|
||||
- name: Configure Garage Cluster Layout
|
||||
hosts: garage-01
|
||||
become: true
|
||||
tags: [layout]
|
||||
vars_files:
|
||||
- ../vault/secrets.yml
|
||||
tasks:
|
||||
- name: Wait for all nodes to connect
|
||||
pause:
|
||||
seconds: 10
|
||||
|
||||
- name: Check cluster status
|
||||
command: garage -c /etc/garage/garage.toml status
|
||||
register: cluster_status
|
||||
changed_when: false
|
||||
|
||||
- name: Display cluster status
|
||||
debug:
|
||||
msg: "{{ cluster_status.stdout_lines }}"
|
||||
|
||||
- name: Get current layout
|
||||
command: garage -c /etc/garage/garage.toml layout show
|
||||
register: layout_show
|
||||
changed_when: false
|
||||
|
||||
- name: Check if layout needs configuration
|
||||
set_fact:
|
||||
layout_needs_config: "{{ 'no role' in layout_show.stdout }}"
|
||||
|
||||
- name: Get node IDs for layout
|
||||
command: garage -c /etc/garage/garage.toml status
|
||||
register: status_output
|
||||
changed_when: false
|
||||
when: layout_needs_config
|
||||
|
||||
- name: Parse node IDs
|
||||
set_fact:
|
||||
node_ids: "{{ status_output.stdout | regex_findall('([a-f0-9]{16})\\s+' + item + '\\s') }}"
|
||||
loop:
|
||||
- "{{ hostvars['garage-01']['nebula_ip'] }}"
|
||||
- "{{ hostvars['garage-02']['nebula_ip'] }}"
|
||||
- "{{ hostvars['garage-03']['nebula_ip'] }}"
|
||||
register: parsed_nodes
|
||||
when: layout_needs_config
|
||||
|
||||
- name: Assign layout to nodes
|
||||
command: >
|
||||
garage -c /etc/garage/garage.toml layout assign
|
||||
-z dc1 -c 200GB -t {{ item.item | regex_replace('10\\.10\\.10\\.(\\d+)', 'garage-\\1') | regex_replace('garage-39', 'garage-01') | regex_replace('garage-40', 'garage-02') | regex_replace('garage-41', 'garage-03') }}
|
||||
{{ item.ansible_facts.node_ids[0] }}
|
||||
loop: "{{ parsed_nodes.results }}"
|
||||
when: layout_needs_config and item.ansible_facts.node_ids is defined and item.ansible_facts.node_ids | length > 0
|
||||
|
||||
- name: Apply layout
|
||||
command: garage -c /etc/garage/garage.toml layout apply --version 1
|
||||
when: layout_needs_config
|
||||
register: layout_apply
|
||||
|
||||
- name: Display layout result
|
||||
debug:
|
||||
var: layout_apply.stdout_lines
|
||||
when: layout_apply is changed
|
||||
|
||||
- name: Show final layout
|
||||
command: garage -c /etc/garage/garage.toml layout show
|
||||
register: final_layout
|
||||
changed_when: false
|
||||
|
||||
- name: Display final layout
|
||||
debug:
|
||||
msg: "{{ final_layout.stdout_lines }}"
|
||||
43
ansible/playbooks/nebula.yml
Normal file
43
ansible/playbooks/nebula.yml
Normal file
@ -0,0 +1,43 @@
|
||||
---
|
||||
- name: Configure Nebula Overlay Network
|
||||
hosts: all
|
||||
become: true
|
||||
tasks:
|
||||
- name: Install the Nebula network overlay
|
||||
community.general.pacman:
|
||||
name: nebula
|
||||
state: present
|
||||
|
||||
- name: Make sure configuration directories exists
|
||||
file:
|
||||
path: /etc/nebula
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Copy over the Nebula CA certificate
|
||||
copy:
|
||||
src: ../../nebula/ca.crt
|
||||
dest: /etc/nebula/ca.crt
|
||||
mode: '0644'
|
||||
|
||||
- name: Copy over certificates and keys for the nodes
|
||||
copy:
|
||||
src: "../../nebula/configs/{{ vmid }}/{{ inventory_hostname }}/{{ inventory_hostname }}.{{ item }}"
|
||||
dest: "/etc/nebula/config.{{ item }}"
|
||||
mode: '0600'
|
||||
loop:
|
||||
- crt
|
||||
- key
|
||||
|
||||
- name: Create new node configurations
|
||||
template:
|
||||
src: ../templates/nebula-config.yml.j2
|
||||
dest: /etc/nebula/config.yml
|
||||
notify: restart nebula
|
||||
|
||||
handlers:
|
||||
- name: restart nebula
|
||||
systemd:
|
||||
name: nebula
|
||||
state: restarted
|
||||
enabled: true
|
||||
277
ansible/playbooks/postgres-ha.yml
Normal file
277
ansible/playbooks/postgres-ha.yml
Normal file
@ -0,0 +1,277 @@
|
||||
---
|
||||
# PostgreSQL High Availability with Patroni + etcd
|
||||
# Run on postgres group hosts
|
||||
#
|
||||
# Usage:
|
||||
# # Initialize first node (with existing data):
|
||||
# ansible-playbook -i inventory.ini playbooks/postgres-ha.yml --limit postgres-01 -e "patroni_bootstrap=true"
|
||||
#
|
||||
# # Join additional nodes:
|
||||
# ansible-playbook -i inventory.ini playbooks/postgres-ha.yml --limit postgres-02
|
||||
#
|
||||
# # All nodes at once (after bootstrap):
|
||||
# ansible-playbook -i inventory.ini playbooks/postgres-ha.yml --limit postgres
|
||||
|
||||
- name: Configure PostgreSQL HA with Patroni + etcd
|
||||
hosts: postgres
|
||||
become: true
|
||||
vars:
|
||||
patroni_superuser_password: "{{ lookup('env', 'PATRONI_SUPERUSER_PASSWORD') | default('changeme', true) }}"
|
||||
patroni_replicator_password: "{{ lookup('env', 'PATRONI_REPLICATOR_PASSWORD') | default('changeme', true) }}"
|
||||
patroni_bootstrap: false
|
||||
etcd_version: "3.5.17"
|
||||
|
||||
tasks:
|
||||
# ============================================
|
||||
# ETCD SETUP
|
||||
# ============================================
|
||||
- name: Check if etcd is installed
|
||||
stat:
|
||||
path: /usr/local/bin/etcd
|
||||
register: etcd_binary
|
||||
|
||||
- name: Download etcd
|
||||
get_url:
|
||||
url: "https://github.com/etcd-io/etcd/releases/download/v{{ etcd_version }}/etcd-v{{ etcd_version }}-linux-amd64.tar.gz"
|
||||
dest: /tmp/etcd.tar.gz
|
||||
mode: '0644'
|
||||
when: not etcd_binary.stat.exists
|
||||
|
||||
- name: Extract etcd
|
||||
unarchive:
|
||||
src: /tmp/etcd.tar.gz
|
||||
dest: /tmp
|
||||
remote_src: true
|
||||
when: not etcd_binary.stat.exists
|
||||
|
||||
- name: Install etcd binaries
|
||||
copy:
|
||||
src: "/tmp/etcd-v{{ etcd_version }}-linux-amd64/{{ item }}"
|
||||
dest: "/usr/local/bin/{{ item }}"
|
||||
mode: '0755'
|
||||
remote_src: true
|
||||
loop:
|
||||
- etcd
|
||||
- etcdctl
|
||||
- etcdutl
|
||||
when: not etcd_binary.stat.exists
|
||||
|
||||
- name: Create symlinks for etcd binaries
|
||||
file:
|
||||
src: "/usr/local/bin/{{ item }}"
|
||||
dest: "/usr/bin/{{ item }}"
|
||||
state: link
|
||||
loop:
|
||||
- etcd
|
||||
- etcdctl
|
||||
- etcdutl
|
||||
|
||||
- name: Create etcd user
|
||||
user:
|
||||
name: etcd
|
||||
system: true
|
||||
shell: /sbin/nologin
|
||||
home: /var/lib/etcd
|
||||
create_home: true
|
||||
|
||||
- name: Create etcd config directory
|
||||
file:
|
||||
path: /etc/etcd
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Create etcd data directory
|
||||
file:
|
||||
path: /var/lib/etcd
|
||||
state: directory
|
||||
owner: etcd
|
||||
group: etcd
|
||||
mode: '0700'
|
||||
|
||||
- name: Deploy etcd configuration
|
||||
template:
|
||||
src: ../templates/etcd.conf.j2
|
||||
dest: /etc/etcd/etcd.conf
|
||||
mode: '0644'
|
||||
notify: restart etcd
|
||||
|
||||
- name: Deploy etcd systemd service
|
||||
template:
|
||||
src: ../templates/etcd.service.j2
|
||||
dest: /etc/systemd/system/etcd.service
|
||||
mode: '0644'
|
||||
notify:
|
||||
- reload systemd
|
||||
- restart etcd
|
||||
|
||||
- name: Enable and start etcd
|
||||
systemd:
|
||||
name: etcd
|
||||
state: started
|
||||
enabled: true
|
||||
daemon_reload: true
|
||||
|
||||
- name: Wait for etcd to be healthy
|
||||
command: etcdctl endpoint health --endpoints=http://127.0.0.1:2379
|
||||
register: etcd_health
|
||||
until: etcd_health.rc == 0
|
||||
retries: 30
|
||||
delay: 2
|
||||
changed_when: false
|
||||
|
||||
# ============================================
|
||||
# POSTGRESQL SETUP
|
||||
# ============================================
|
||||
- name: Install PostgreSQL
|
||||
community.general.pacman:
|
||||
name: postgresql
|
||||
state: present
|
||||
|
||||
# ============================================
|
||||
# PATRONI SETUP
|
||||
# ============================================
|
||||
- name: Install Patroni dependencies
|
||||
community.general.pacman:
|
||||
name:
|
||||
- python
|
||||
- python-pip
|
||||
- python-psycopg2
|
||||
- python-yaml
|
||||
- python-urllib3
|
||||
- python-certifi
|
||||
- python-virtualenv
|
||||
state: present
|
||||
|
||||
- name: Create Patroni virtual environment
|
||||
command: python -m venv /opt/patroni
|
||||
args:
|
||||
creates: /opt/patroni/bin/python
|
||||
|
||||
- name: Install Patroni in virtual environment
|
||||
pip:
|
||||
name:
|
||||
- patroni[etcd3]
|
||||
- psycopg2-binary
|
||||
state: present
|
||||
virtualenv: /opt/patroni
|
||||
|
||||
- name: Create PostgreSQL run directory
|
||||
file:
|
||||
path: /run/postgresql
|
||||
state: directory
|
||||
owner: postgres
|
||||
group: postgres
|
||||
mode: '0755'
|
||||
|
||||
- name: Create tmpfiles config for postgresql run directory
|
||||
copy:
|
||||
content: "d /run/postgresql 0755 postgres postgres -"
|
||||
dest: /etc/tmpfiles.d/postgresql.conf
|
||||
mode: '0644'
|
||||
|
||||
- name: Create patroni symlink
|
||||
file:
|
||||
src: /opt/patroni/bin/patroni
|
||||
dest: /usr/local/bin/patroni
|
||||
state: link
|
||||
|
||||
- name: Create patroni config directory
|
||||
file:
|
||||
path: /etc/patroni
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Stop PostgreSQL service (Patroni will manage it)
|
||||
systemd:
|
||||
name: postgresql
|
||||
state: stopped
|
||||
enabled: false
|
||||
ignore_errors: true
|
||||
|
||||
# For bootstrap node with existing data
|
||||
- name: Prepare existing data directory for Patroni takeover
|
||||
block:
|
||||
- name: Ensure postgres owns data directory
|
||||
file:
|
||||
path: /var/lib/postgres/data
|
||||
owner: postgres
|
||||
group: postgres
|
||||
recurse: true
|
||||
|
||||
- name: Create replicator role
|
||||
become_user: postgres
|
||||
command: >
|
||||
psql -c "DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'replicator') THEN
|
||||
CREATE ROLE replicator WITH REPLICATION LOGIN PASSWORD '{{ patroni_replicator_password }}';
|
||||
END IF;
|
||||
END $$;"
|
||||
when: patroni_bootstrap | bool
|
||||
ignore_errors: true
|
||||
|
||||
- name: Set postgres superuser password
|
||||
become_user: postgres
|
||||
command: psql -c "ALTER USER postgres WITH PASSWORD '{{ patroni_superuser_password }}';"
|
||||
when: patroni_bootstrap | bool
|
||||
ignore_errors: true
|
||||
when: patroni_bootstrap | bool
|
||||
|
||||
- name: Deploy Patroni configuration
|
||||
template:
|
||||
src: ../templates/patroni.yml.j2
|
||||
dest: /etc/patroni/patroni.yml
|
||||
owner: postgres
|
||||
group: postgres
|
||||
mode: '0600'
|
||||
notify: restart patroni
|
||||
|
||||
- name: Create .pgpass file for postgres user
|
||||
copy:
|
||||
content: |
|
||||
*:*:*:postgres:{{ patroni_superuser_password }}
|
||||
*:*:*:replicator:{{ patroni_replicator_password }}
|
||||
dest: /var/lib/postgres/.pgpass
|
||||
owner: postgres
|
||||
group: postgres
|
||||
mode: '0600'
|
||||
|
||||
- name: Deploy Patroni systemd service
|
||||
template:
|
||||
src: ../templates/patroni.service.j2
|
||||
dest: /etc/systemd/system/patroni.service
|
||||
mode: '0644'
|
||||
notify:
|
||||
- reload systemd
|
||||
- restart patroni
|
||||
|
||||
- name: Enable and start Patroni
|
||||
systemd:
|
||||
name: patroni
|
||||
state: started
|
||||
enabled: true
|
||||
daemon_reload: true
|
||||
|
||||
- name: Wait for Patroni to be healthy
|
||||
uri:
|
||||
url: "http://{{ nebula_ip }}:8008/health"
|
||||
status_code: 200
|
||||
register: patroni_health
|
||||
until: patroni_health.status == 200
|
||||
retries: 30
|
||||
delay: 5
|
||||
|
||||
handlers:
|
||||
- name: reload systemd
|
||||
systemd:
|
||||
daemon_reload: true
|
||||
|
||||
- name: restart etcd
|
||||
systemd:
|
||||
name: etcd
|
||||
state: restarted
|
||||
|
||||
- name: restart patroni
|
||||
systemd:
|
||||
name: patroni
|
||||
state: restarted
|
||||
103
ansible/playbooks/security.yml
Normal file
103
ansible/playbooks/security.yml
Normal file
@ -0,0 +1,103 @@
|
||||
---
|
||||
# Security playbook: iptables + fail2ban for all VMs
|
||||
#
|
||||
# Run: ansible-playbook -i inventory.ini playbooks/security.yml
|
||||
#
|
||||
# This playbook uses direct iptables rules instead of ufw to ensure
|
||||
# bridge network traffic is properly blocked (ufw's before.rules allows
|
||||
# ICMP before custom deny rules can take effect).
|
||||
|
||||
- name: Configure Security for All VMs
|
||||
hosts: all
|
||||
become: true
|
||||
tasks:
|
||||
# Load netfilter kernel modules (required on fresh VMs)
|
||||
- name: Load netfilter kernel modules
|
||||
community.general.modprobe:
|
||||
name: "{{ item }}"
|
||||
state: present
|
||||
loop:
|
||||
- ip_tables
|
||||
- ip6_tables
|
||||
- iptable_filter
|
||||
- ip6table_filter
|
||||
|
||||
# Install security packages
|
||||
- name: Install iptables and fail2ban
|
||||
community.general.pacman:
|
||||
name:
|
||||
- iptables
|
||||
- fail2ban
|
||||
state: present
|
||||
|
||||
# Stop and disable ufw if present (migrating to iptables)
|
||||
- name: Check if ufw is installed
|
||||
command: pacman -Q ufw
|
||||
register: ufw_check
|
||||
ignore_errors: true
|
||||
changed_when: false
|
||||
|
||||
- name: Stop ufw if running
|
||||
systemd:
|
||||
name: ufw
|
||||
state: stopped
|
||||
when: ufw_check.rc == 0
|
||||
ignore_errors: true
|
||||
|
||||
- name: Disable ufw
|
||||
systemd:
|
||||
name: ufw
|
||||
enabled: false
|
||||
when: ufw_check.rc == 0
|
||||
ignore_errors: true
|
||||
|
||||
# Deploy iptables rules
|
||||
- name: Deploy iptables rules
|
||||
template:
|
||||
src: ../templates/iptables.rules.j2
|
||||
dest: /etc/iptables/iptables.rules
|
||||
mode: '0644'
|
||||
notify: reload iptables
|
||||
|
||||
# Enable and start iptables service
|
||||
- name: Enable and start iptables
|
||||
systemd:
|
||||
name: iptables
|
||||
state: started
|
||||
enabled: true
|
||||
|
||||
# Configure fail2ban
|
||||
- name: Create fail2ban local config
|
||||
copy:
|
||||
dest: /etc/fail2ban/jail.local
|
||||
content: |
|
||||
[DEFAULT]
|
||||
bantime = 1h
|
||||
findtime = 10m
|
||||
maxretry = 5
|
||||
|
||||
[sshd]
|
||||
enabled = true
|
||||
port = ssh
|
||||
filter = sshd
|
||||
backend = systemd
|
||||
mode: '0644'
|
||||
notify: restart fail2ban
|
||||
|
||||
# Enable fail2ban service
|
||||
- name: Enable and start fail2ban
|
||||
systemd:
|
||||
name: fail2ban
|
||||
state: started
|
||||
enabled: true
|
||||
|
||||
handlers:
|
||||
- name: reload iptables
|
||||
systemd:
|
||||
name: iptables
|
||||
state: restarted
|
||||
|
||||
- name: restart fail2ban
|
||||
systemd:
|
||||
name: fail2ban
|
||||
state: restarted
|
||||
155
ansible/playbooks/valkey-sentinel.yml
Normal file
155
ansible/playbooks/valkey-sentinel.yml
Normal file
@ -0,0 +1,155 @@
|
||||
---
|
||||
# Valkey Sentinel Setup (1 master + 2 replicas + Sentinel on each)
|
||||
#
|
||||
# Provides automatic failover without requiring cluster-aware clients.
|
||||
# Apps connect directly to master or use Sentinel-aware clients.
|
||||
#
|
||||
# Usage:
|
||||
# ansible-playbook -i inventory.ini playbooks/valkey-sentinel.yml
|
||||
|
||||
- name: Configure Valkey with Sentinel
|
||||
hosts: valkey
|
||||
become: true
|
||||
vars_files:
|
||||
- ../vault/secrets.yml
|
||||
vars:
|
||||
valkey_maxmemory: "256mb"
|
||||
valkey_maxmemory_policy: "allkeys-lru"
|
||||
valkey_role: "{{ 'master' if inventory_hostname == 'valkey-01' else 'replica' }}"
|
||||
tasks:
|
||||
- name: Stop valkey service
|
||||
systemd:
|
||||
name: valkey
|
||||
state: stopped
|
||||
ignore_errors: true
|
||||
|
||||
- name: Remove cluster data files
|
||||
file:
|
||||
path: "{{ item }}"
|
||||
state: absent
|
||||
loop:
|
||||
- /var/lib/valkey/nodes.conf
|
||||
- /var/lib/valkey/dump.rdb
|
||||
|
||||
- name: Deploy standalone Valkey configuration
|
||||
template:
|
||||
src: ../templates/valkey-standalone.conf.j2
|
||||
dest: /etc/valkey/valkey.conf
|
||||
owner: valkey
|
||||
group: valkey
|
||||
mode: '0640'
|
||||
|
||||
- name: Deploy ACL file
|
||||
template:
|
||||
src: ../templates/valkey-acl.j2
|
||||
dest: /etc/valkey/users.acl
|
||||
owner: valkey
|
||||
group: valkey
|
||||
mode: '0600'
|
||||
|
||||
- name: Create Sentinel data directory
|
||||
file:
|
||||
path: /var/lib/valkey/sentinel
|
||||
state: directory
|
||||
owner: valkey
|
||||
group: valkey
|
||||
mode: '0750'
|
||||
|
||||
- name: Deploy Sentinel configuration
|
||||
template:
|
||||
src: ../templates/valkey-sentinel.conf.j2
|
||||
dest: /etc/valkey/sentinel.conf
|
||||
owner: valkey
|
||||
group: valkey
|
||||
mode: '0640'
|
||||
|
||||
- name: Deploy Sentinel systemd service
|
||||
copy:
|
||||
dest: /etc/systemd/system/valkey-sentinel.service
|
||||
content: |
|
||||
[Unit]
|
||||
Description=Valkey Sentinel
|
||||
Documentation=https://valkey.io/
|
||||
After=network.target valkey.service nebula.service
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=valkey
|
||||
Group=valkey
|
||||
ExecStart=/usr/bin/valkey-sentinel /etc/valkey/sentinel.conf
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
mode: '0644'
|
||||
|
||||
- name: Reload systemd
|
||||
systemd:
|
||||
daemon_reload: true
|
||||
|
||||
- name: Start Valkey service
|
||||
systemd:
|
||||
name: valkey
|
||||
state: started
|
||||
enabled: true
|
||||
|
||||
- name: Wait for Valkey to be ready
|
||||
wait_for:
|
||||
host: "{{ nebula_ip }}"
|
||||
port: 6379
|
||||
timeout: 30
|
||||
|
||||
- name: Start Sentinel on all nodes
|
||||
hosts: valkey
|
||||
become: true
|
||||
serial: 1
|
||||
tasks:
|
||||
- name: Wait for master to be ready (replicas only)
|
||||
wait_for:
|
||||
host: "{{ hostvars['valkey-01']['nebula_ip'] }}"
|
||||
port: 6379
|
||||
timeout: 30
|
||||
when: inventory_hostname != 'valkey-01'
|
||||
|
||||
- name: Start Sentinel service
|
||||
systemd:
|
||||
name: valkey-sentinel
|
||||
state: started
|
||||
enabled: true
|
||||
|
||||
- name: Wait for Sentinel to be ready
|
||||
wait_for:
|
||||
host: "{{ nebula_ip }}"
|
||||
port: 26379
|
||||
timeout: 30
|
||||
|
||||
- name: Verify Sentinel Setup
|
||||
hosts: valkey-01
|
||||
become: true
|
||||
vars_files:
|
||||
- ../vault/secrets.yml
|
||||
tasks:
|
||||
- name: Check replication status
|
||||
command: >
|
||||
valkey-cli -h {{ nebula_ip }} -p 6379
|
||||
--user admin --pass {{ valkey_admin_password }}
|
||||
info replication
|
||||
register: replication_info
|
||||
changed_when: false
|
||||
|
||||
- name: Display replication status
|
||||
debug:
|
||||
msg: "{{ replication_info.stdout_lines }}"
|
||||
|
||||
- name: Check Sentinel status
|
||||
command: >
|
||||
valkey-cli -h {{ nebula_ip }} -p 26379
|
||||
sentinel master valkey-ha
|
||||
register: sentinel_info
|
||||
changed_when: false
|
||||
|
||||
- name: Display Sentinel status
|
||||
debug:
|
||||
msg: "{{ sentinel_info.stdout_lines }}"
|
||||
80
ansible/playbooks/valkey.yml
Normal file
80
ansible/playbooks/valkey.yml
Normal file
@ -0,0 +1,80 @@
|
||||
---
|
||||
# Valkey (Redis fork) Primary Setup
|
||||
#
|
||||
# Usage: ansible-playbook -i inventory.ini playbooks/valkey.yml
|
||||
#
|
||||
# Creates:
|
||||
# - Valkey server on valkey-primary
|
||||
# - Configured for Nebula network access
|
||||
# - 16 databases (0-15) for multi-tenant use
|
||||
|
||||
- name: Setup Valkey Primary
|
||||
hosts: valkey-primary
|
||||
become: true
|
||||
vars_files:
|
||||
- ../vault/secrets.yml
|
||||
vars:
|
||||
valkey_maxmemory: "256mb"
|
||||
valkey_maxmemory_policy: "allkeys-lru"
|
||||
tasks:
|
||||
- name: Install valkey
|
||||
community.general.pacman:
|
||||
name: valkey
|
||||
state: present
|
||||
|
||||
- name: Create systemd override directory
|
||||
file:
|
||||
path: /etc/systemd/system/valkey.service.d
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Add systemd override for ACL write access
|
||||
copy:
|
||||
dest: /etc/systemd/system/valkey.service.d/override.conf
|
||||
content: |
|
||||
[Service]
|
||||
ReadWritePaths=/etc/valkey
|
||||
mode: '0644'
|
||||
notify:
|
||||
- reload systemd
|
||||
- restart valkey
|
||||
|
||||
- name: Deploy ACL file
|
||||
template:
|
||||
src: ../templates/valkey-acl.j2
|
||||
dest: /etc/valkey/users.acl
|
||||
owner: valkey
|
||||
group: valkey
|
||||
mode: '0600'
|
||||
notify: restart valkey
|
||||
|
||||
- name: Deploy valkey.conf
|
||||
template:
|
||||
src: ../templates/valkey.conf.j2
|
||||
dest: /etc/valkey/valkey.conf
|
||||
owner: valkey
|
||||
group: valkey
|
||||
mode: '0640'
|
||||
notify: restart valkey
|
||||
|
||||
- name: Start and enable valkey
|
||||
systemd:
|
||||
name: valkey
|
||||
state: started
|
||||
enabled: true
|
||||
|
||||
- name: Wait for Valkey to be ready
|
||||
wait_for:
|
||||
host: "{{ nebula_ip }}"
|
||||
port: 6379
|
||||
timeout: 30
|
||||
|
||||
handlers:
|
||||
- name: reload systemd
|
||||
systemd:
|
||||
daemon_reload: true
|
||||
|
||||
- name: restart valkey
|
||||
systemd:
|
||||
name: valkey
|
||||
state: restarted
|
||||
57
ansible/services.yml.example
Normal file
57
ansible/services.yml.example
Normal file
@ -0,0 +1,57 @@
|
||||
# Service Manifest
|
||||
#
|
||||
# Defines applications, their git repos, data services, and deployment targets.
|
||||
#
|
||||
# Usage:
|
||||
# ansible-playbook playbooks/data-service.yml -e "service=myapp"
|
||||
|
||||
git_base_url: "git@git.infra.example:org"
|
||||
|
||||
services:
|
||||
myapp:
|
||||
description: "Example web application"
|
||||
host: app-server
|
||||
deploy_path: /opt/myapp
|
||||
repos:
|
||||
- name: myapp
|
||||
dest: myapp
|
||||
version: main
|
||||
postgres:
|
||||
enabled: true
|
||||
# restore_from: "databases/dumps/myapp.dump" # Optional: restore from backup
|
||||
valkey:
|
||||
enabled: true
|
||||
key_prefix: "myapp" # Access to myapp:* keys only
|
||||
s3:
|
||||
enabled: true
|
||||
bucket: "myapp-media"
|
||||
vault_roles:
|
||||
- app # 1h TTL, DML only (SELECT, INSERT, UPDATE, DELETE)
|
||||
- migrate # 15m TTL, DDL+DML (for migrations)
|
||||
|
||||
another-service:
|
||||
description: "Another example service"
|
||||
host: another-server
|
||||
deploy_path: /opt/another
|
||||
repos:
|
||||
- name: another
|
||||
dest: another
|
||||
version: main
|
||||
postgres:
|
||||
enabled: true
|
||||
valkey:
|
||||
enabled: true
|
||||
key_prefix: "another"
|
||||
vault_roles:
|
||||
- app
|
||||
- migrate
|
||||
|
||||
# Valkey key prefix allocation:
|
||||
# All services use database /0 with key prefixes for namespace isolation.
|
||||
# Each service gets an ACL user that can only access {service}:* keys.
|
||||
# Credentials are provisioned by data-service.yml and stored in Vault.
|
||||
|
||||
# S3 bucket allocation:
|
||||
# Each service gets its own bucket (default: {service}-media) with a dedicated API key.
|
||||
# Buckets are created on the Garage cluster with read/write permissions.
|
||||
# Credentials are provisioned by data-service.yml and stored in Vault at secret/{service}/s3.
|
||||
21
ansible/templates/etcd.conf.j2
Normal file
21
ansible/templates/etcd.conf.j2
Normal file
@ -0,0 +1,21 @@
|
||||
# etcd configuration for Patroni cluster
|
||||
# Node: {{ inventory_hostname }}
|
||||
|
||||
name: '{{ inventory_hostname }}'
|
||||
data-dir: /var/lib/etcd
|
||||
|
||||
# Cluster communication
|
||||
initial-advertise-peer-urls: http://{{ nebula_ip }}:2380
|
||||
listen-peer-urls: http://{{ nebula_ip }}:2380
|
||||
listen-client-urls: http://{{ nebula_ip }}:2379,http://127.0.0.1:2379
|
||||
advertise-client-urls: http://{{ nebula_ip }}:2379
|
||||
|
||||
# Cluster bootstrap
|
||||
initial-cluster-token: 'patroni-etcd-cluster'
|
||||
initial-cluster: {% for host in groups['postgres'] %}{{ host }}=http://{{ hostvars[host]['nebula_ip'] }}:2380{% if not loop.last %},{% endif %}{% endfor %}
|
||||
|
||||
initial-cluster-state: 'new'
|
||||
|
||||
# Performance tuning
|
||||
heartbeat-interval: 1000
|
||||
election-timeout: 5000
|
||||
16
ansible/templates/etcd.service.j2
Normal file
16
ansible/templates/etcd.service.j2
Normal file
@ -0,0 +1,16 @@
|
||||
[Unit]
|
||||
Description=etcd distributed key-value store
|
||||
Documentation=https://etcd.io/docs/
|
||||
After=network.target nebula.service
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
User=etcd
|
||||
ExecStart=/usr/local/bin/etcd --config-file=/etc/etcd/etcd.conf
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
LimitNOFILE=65536
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
48
ansible/templates/garage.toml.j2
Normal file
48
ansible/templates/garage.toml.j2
Normal file
@ -0,0 +1,48 @@
|
||||
# Garage S3 Configuration
|
||||
# Generated by Ansible - do not edit manually
|
||||
|
||||
# Metadata directory (LMDB database)
|
||||
metadata_dir = "/var/lib/garage/meta"
|
||||
|
||||
# Data directory (actual S3 objects)
|
||||
data_dir = "/var/lib/garage/data"
|
||||
|
||||
# Database engine (LMDB recommended for clusters with replication)
|
||||
db_engine = "lmdb"
|
||||
|
||||
# Replication factor (3 = all nodes have all data)
|
||||
replication_factor = 3
|
||||
|
||||
# Compression level (1-19, higher = more CPU, smaller files)
|
||||
compression_level = 2
|
||||
|
||||
# Automatic metadata snapshots (protects against LMDB corruption)
|
||||
metadata_auto_snapshot_interval = "6h"
|
||||
|
||||
# RPC configuration (inter-node communication)
|
||||
rpc_bind_addr = "{{ nebula_ip }}:3901"
|
||||
rpc_public_addr = "{{ nebula_ip }}:3901"
|
||||
rpc_secret = "{{ garage_rpc_secret }}"
|
||||
|
||||
# Bootstrap peers (connect to other nodes)
|
||||
bootstrap_peers = [
|
||||
{% for host in groups['garage'] if host != inventory_hostname %}
|
||||
"{{ hostvars[host]['nebula_ip'] }}:3901",
|
||||
{% endfor %}
|
||||
]
|
||||
|
||||
# S3 API endpoint
|
||||
[s3_api]
|
||||
api_bind_addr = "{{ nebula_ip }}:3900"
|
||||
s3_region = "garage"
|
||||
root_domain = ".s3.garage.nebula"
|
||||
|
||||
# S3 Web endpoint (for static website hosting)
|
||||
[s3_web]
|
||||
bind_addr = "{{ nebula_ip }}:3902"
|
||||
root_domain = ".web.garage.nebula"
|
||||
|
||||
# Admin API (for bucket management)
|
||||
[admin]
|
||||
api_bind_addr = "{{ nebula_ip }}:3903"
|
||||
admin_token = "{{ garage_admin_token }}"
|
||||
71
ansible/templates/iptables.rules.j2
Normal file
71
ansible/templates/iptables.rules.j2
Normal file
@ -0,0 +1,71 @@
|
||||
*filter
|
||||
:INPUT DROP [0:0]
|
||||
:FORWARD DROP [0:0]
|
||||
:OUTPUT ACCEPT [0:0]
|
||||
|
||||
# Allow established and related connections
|
||||
-A INPUT -m state --state ESTABLISHED,RELATED -j ACCEPT
|
||||
|
||||
# Allow loopback
|
||||
-A INPUT -i lo -j ACCEPT
|
||||
|
||||
# Allow all traffic on Nebula interface (encrypted overlay)
|
||||
-A INPUT -i nebula1 -j ACCEPT
|
||||
|
||||
# ============================================================
|
||||
# Bridge network rules (192.168.100.0/24)
|
||||
# Only allow Proxmox host - block all other VMs on the bridge
|
||||
# ============================================================
|
||||
|
||||
# Allow Proxmox host for management/Ansible
|
||||
-A INPUT -s {{ proxmox_host_ip }} -j ACCEPT
|
||||
|
||||
# Allow Nebula UDP from lighthouse (required for overlay connectivity)
|
||||
-A INPUT -s {{ lighthouse_bridge_ip }} -p udp --dport 4242 -j ACCEPT
|
||||
|
||||
# DROP everything else from bridge network (force Nebula for inter-VM)
|
||||
-A INPUT -s {{ bridge_network }} -j DROP
|
||||
|
||||
# ============================================================
|
||||
# Caddy-proxied ports (Nebula only - reverse proxy traffic)
|
||||
# These ports are NOT public; only Caddy can reach them
|
||||
# ============================================================
|
||||
|
||||
{% if caddy_proxied_ports_tcp is defined %}
|
||||
# Web services proxied through Caddy (Nebula only)
|
||||
{% for port in caddy_proxied_ports_tcp %}
|
||||
-A INPUT -s {{ caddy_nebula_ip }} -p tcp --dport {{ port }} -j ACCEPT
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
|
||||
{% if caddy_proxied_ports_udp is defined %}
|
||||
# UDP services proxied through Caddy (Nebula only)
|
||||
{% for port in caddy_proxied_ports_udp %}
|
||||
-A INPUT -s {{ caddy_nebula_ip }} -p udp --dport {{ port }} -j ACCEPT
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
|
||||
# ============================================================
|
||||
# Public-facing ports (for DNAT'd traffic from internet)
|
||||
# ============================================================
|
||||
|
||||
{% if game_ports_tcp is defined %}
|
||||
# Game server TCP ports (internet -> Proxmox DNAT -> VM)
|
||||
{% for port in game_ports_tcp %}
|
||||
-A INPUT -p tcp --dport {{ port }} -j ACCEPT
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
|
||||
{% if game_ports_udp is defined %}
|
||||
# Game server UDP ports (internet -> Proxmox DNAT -> VM)
|
||||
{% for port in game_ports_udp %}
|
||||
-A INPUT -p udp --dport {{ port }} -j ACCEPT
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
|
||||
# ============================================================
|
||||
# Default deny - drop everything not explicitly allowed
|
||||
# ============================================================
|
||||
-A INPUT -j DROP
|
||||
|
||||
COMMIT
|
||||
67
ansible/templates/nebula-config.yml.j2
Normal file
67
ansible/templates/nebula-config.yml.j2
Normal file
@ -0,0 +1,67 @@
|
||||
pki:
|
||||
ca: /etc/nebula/ca.crt
|
||||
cert: /etc/nebula/config.crt
|
||||
key: /etc/nebula/config.key
|
||||
|
||||
static_host_map:
|
||||
# Primary lighthouse (InterServer datacenter)
|
||||
"{{ lighthouse_nebula_ip }}": ["{{ lighthouse_bridge_ip }}:4242", "{{ lighthouse_public_ip }}:4242"]
|
||||
# Home lighthouse (Dell) for reduced latency from home
|
||||
"{{ home_lighthouse_nebula_ip }}": ["{{ home_lighthouse_public_ip }}:4242"]
|
||||
|
||||
lighthouse:
|
||||
am_lighthouse: false
|
||||
interval: 60
|
||||
hosts:
|
||||
- "{{ lighthouse_nebula_ip }}"
|
||||
- "{{ home_lighthouse_nebula_ip }}"
|
||||
|
||||
punchy:
|
||||
punch: true
|
||||
respond: true
|
||||
|
||||
relay:
|
||||
relays:
|
||||
- "{{ lighthouse_nebula_ip }}"
|
||||
- "{{ home_lighthouse_nebula_ip }}"
|
||||
|
||||
listen:
|
||||
host: 0.0.0.0
|
||||
port: 0
|
||||
|
||||
tun:
|
||||
dev: nebula1
|
||||
drop_local_broadcast: true
|
||||
drop_multicast: true
|
||||
|
||||
firewall:
|
||||
conntrack:
|
||||
tcp_timeout: 12h
|
||||
udp_timeout: 3m
|
||||
default_timeout: 10m
|
||||
|
||||
outbound:
|
||||
- port: any
|
||||
proto: any
|
||||
host: any
|
||||
|
||||
inbound:
|
||||
# Admin (laptop) has full access
|
||||
- port: any
|
||||
proto: any
|
||||
group: admin
|
||||
|
||||
# Infrastructure can talk to each other
|
||||
- port: any
|
||||
proto: any
|
||||
group: infrastructure
|
||||
|
||||
# Projects can access infrastructure services
|
||||
- port: any
|
||||
proto: any
|
||||
group: projects
|
||||
|
||||
# Allow ICMP from anyone (ping)
|
||||
- port: any
|
||||
proto: icmp
|
||||
host: any
|
||||
19
ansible/templates/patroni.service.j2
Normal file
19
ansible/templates/patroni.service.j2
Normal file
@ -0,0 +1,19 @@
|
||||
[Unit]
|
||||
Description=Patroni PostgreSQL HA Manager
|
||||
Documentation=https://patroni.readthedocs.io/
|
||||
After=network.target etcd.service nebula.service
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=postgres
|
||||
Group=postgres
|
||||
ExecStart=/opt/patroni/bin/patroni /etc/patroni/patroni.yml
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
KillMode=process
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
TimeoutSec=30
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
63
ansible/templates/patroni.yml.j2
Normal file
63
ansible/templates/patroni.yml.j2
Normal file
@ -0,0 +1,63 @@
|
||||
scope: postgres-cluster
|
||||
name: {{ inventory_hostname }}
|
||||
|
||||
restapi:
|
||||
listen: {{ nebula_ip }}:8008
|
||||
connect_address: {{ nebula_ip }}:8008
|
||||
|
||||
etcd3:
|
||||
hosts: {% for host in groups['postgres'] %}{{ hostvars[host]['nebula_ip'] }}:2379{% if not loop.last %},{% endif %}{% endfor %}
|
||||
|
||||
|
||||
bootstrap:
|
||||
dcs:
|
||||
ttl: 30
|
||||
loop_wait: 10
|
||||
retry_timeout: 10
|
||||
maximum_lag_on_failover: 1048576
|
||||
postgresql:
|
||||
use_pg_rewind: true
|
||||
use_slots: true
|
||||
parameters:
|
||||
wal_level: replica
|
||||
hot_standby: "on"
|
||||
max_connections: 200
|
||||
max_worker_processes: 8
|
||||
max_wal_senders: 10
|
||||
max_replication_slots: 10
|
||||
hot_standby_feedback: "on"
|
||||
wal_log_hints: "on"
|
||||
listen_addresses: '*'
|
||||
|
||||
initdb:
|
||||
- encoding: UTF8
|
||||
- data-checksums
|
||||
- locale: C
|
||||
|
||||
pg_hba:
|
||||
- host replication replicator 10.10.10.0/24 md5
|
||||
- host all all 10.10.10.0/24 md5
|
||||
- host all all 127.0.0.1/32 md5
|
||||
- local all all trust
|
||||
|
||||
postgresql:
|
||||
listen: {{ nebula_ip }}:5432
|
||||
connect_address: {{ nebula_ip }}:5432
|
||||
data_dir: /var/lib/postgres/data
|
||||
bin_dir: /usr/bin
|
||||
pgpass: /var/lib/postgres/.pgpass
|
||||
authentication:
|
||||
replication:
|
||||
username: replicator
|
||||
password: {{ patroni_replicator_password }}
|
||||
superuser:
|
||||
username: postgres
|
||||
password: {{ patroni_superuser_password }}
|
||||
parameters:
|
||||
unix_socket_directories: '/run/postgresql'
|
||||
|
||||
tags:
|
||||
nofailover: false
|
||||
noloadbalance: false
|
||||
clonefrom: false
|
||||
nosync: false
|
||||
83
ansible/templates/pg-static-roles.sql.j2
Normal file
83
ansible/templates/pg-static-roles.sql.j2
Normal file
@ -0,0 +1,83 @@
|
||||
-- PostgreSQL Static Roles for {{ service }}
|
||||
-- Generated by Ansible - run once per service
|
||||
--
|
||||
-- Creates:
|
||||
-- {{ service }}_owner - Owns database and all objects (NOLOGIN)
|
||||
-- {{ service }}_app - DML permissions (SELECT, INSERT, UPDATE, DELETE)
|
||||
-- {{ service }}_migrate - DDL+DML permissions (for migrations)
|
||||
--
|
||||
-- Vault dynamic roles inherit from _app or _migrate
|
||||
|
||||
-- =============================================================================
|
||||
-- 1. Create owner role (NOLOGIN, owns all objects)
|
||||
-- =============================================================================
|
||||
CREATE ROLE {{ service }}_owner NOLOGIN;
|
||||
|
||||
-- =============================================================================
|
||||
-- 2. Create database owned by the owner role
|
||||
-- =============================================================================
|
||||
CREATE DATABASE {{ service }} OWNER {{ service }}_owner;
|
||||
|
||||
-- =============================================================================
|
||||
-- 3. Connect to the new database for schema grants
|
||||
-- =============================================================================
|
||||
\c {{ service }}
|
||||
|
||||
-- =============================================================================
|
||||
-- 4. Create app role template (DML only - SELECT, INSERT, UPDATE, DELETE)
|
||||
-- =============================================================================
|
||||
CREATE ROLE {{ service }}_app NOLOGIN NOINHERIT;
|
||||
|
||||
-- Grant DML permissions
|
||||
GRANT CONNECT ON DATABASE {{ service }} TO {{ service }}_app;
|
||||
GRANT USAGE ON SCHEMA public TO {{ service }}_app;
|
||||
GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO {{ service }}_app;
|
||||
GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO {{ service }}_app;
|
||||
|
||||
-- =============================================================================
|
||||
-- 5. Create migrate role template (DDL+DML - for migrations)
|
||||
-- =============================================================================
|
||||
CREATE ROLE {{ service }}_migrate NOLOGIN NOINHERIT;
|
||||
|
||||
-- Grant all privileges
|
||||
GRANT CONNECT ON DATABASE {{ service }} TO {{ service }}_migrate;
|
||||
GRANT ALL PRIVILEGES ON SCHEMA public TO {{ service }}_migrate;
|
||||
GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO {{ service }}_migrate;
|
||||
GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO {{ service }}_migrate;
|
||||
GRANT ALL PRIVILEGES ON ALL FUNCTIONS IN SCHEMA public TO {{ service }}_migrate;
|
||||
|
||||
-- =============================================================================
|
||||
-- 6. Set default privileges for future objects (CRITICAL)
|
||||
-- Ensures new tables created during migrations are accessible
|
||||
-- =============================================================================
|
||||
|
||||
-- Default privileges for objects created by _owner role
|
||||
ALTER DEFAULT PRIVILEGES FOR ROLE {{ service }}_owner IN SCHEMA public
|
||||
GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO {{ service }}_app;
|
||||
|
||||
ALTER DEFAULT PRIVILEGES FOR ROLE {{ service }}_owner IN SCHEMA public
|
||||
GRANT USAGE, SELECT ON SEQUENCES TO {{ service }}_app;
|
||||
|
||||
ALTER DEFAULT PRIVILEGES FOR ROLE {{ service }}_owner IN SCHEMA public
|
||||
GRANT ALL PRIVILEGES ON TABLES TO {{ service }}_migrate;
|
||||
|
||||
ALTER DEFAULT PRIVILEGES FOR ROLE {{ service }}_owner IN SCHEMA public
|
||||
GRANT ALL PRIVILEGES ON SEQUENCES TO {{ service }}_migrate;
|
||||
|
||||
ALTER DEFAULT PRIVILEGES FOR ROLE {{ service }}_owner IN SCHEMA public
|
||||
GRANT ALL PRIVILEGES ON FUNCTIONS TO {{ service }}_migrate;
|
||||
|
||||
-- Default privileges for objects created by _migrate role
|
||||
-- (migrations run as _migrate via SET ROLE, so tables are owned by _migrate)
|
||||
ALTER DEFAULT PRIVILEGES FOR ROLE {{ service }}_migrate IN SCHEMA public
|
||||
GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO {{ service }}_app;
|
||||
|
||||
ALTER DEFAULT PRIVILEGES FOR ROLE {{ service }}_migrate IN SCHEMA public
|
||||
GRANT USAGE, SELECT ON SEQUENCES TO {{ service }}_app;
|
||||
|
||||
-- =============================================================================
|
||||
-- 7. Grant roles to vault_admin (WITH ADMIN OPTION for dynamic role creation)
|
||||
-- =============================================================================
|
||||
GRANT {{ service }}_app TO vault_admin WITH ADMIN OPTION;
|
||||
GRANT {{ service }}_migrate TO vault_admin WITH ADMIN OPTION;
|
||||
GRANT {{ service }}_owner TO vault_admin; -- For REASSIGN OWNED during revocation
|
||||
34
ansible/templates/unbound-local-zones.conf.j2
Normal file
34
ansible/templates/unbound-local-zones.conf.j2
Normal file
@ -0,0 +1,34 @@
|
||||
# Local zones for Nebula overlay network
|
||||
# Generated by Ansible from inventory - do not edit manually
|
||||
|
||||
# ============================================================
|
||||
# .nebula zone - All VMs
|
||||
# ============================================================
|
||||
local-zone: "nebula." static
|
||||
|
||||
# Lighthouse (not in inventory, uses variable)
|
||||
local-data: "lighthouse.nebula. IN A {{ lighthouse_nebula_ip }}"
|
||||
local-data-ptr: "{{ lighthouse_nebula_ip }} lighthouse.nebula"
|
||||
|
||||
# Proxmox host (not in inventory)
|
||||
local-data: "proxmox.nebula. IN A 10.10.10.1"
|
||||
local-data-ptr: "10.10.10.1 proxmox.nebula"
|
||||
|
||||
# All VMs from inventory
|
||||
{% for host in groups['all'] %}
|
||||
local-data: "{{ host }}.nebula. IN A {{ hostvars[host]['nebula_ip'] }}"
|
||||
local-data-ptr: "{{ hostvars[host]['nebula_ip'] }} {{ host }}.nebula"
|
||||
{% endfor %}
|
||||
|
||||
# ============================================================
|
||||
# Custom domain aliases (optional)
|
||||
# Add your own domain mappings here
|
||||
# ============================================================
|
||||
# Example:
|
||||
# local-zone: "myapp.infra.example." static
|
||||
# local-data: "myapp.infra.example. IN A {{ hostvars['app-server']['nebula_ip'] }}"
|
||||
|
||||
# ============================================================
|
||||
# Reverse DNS zone for 10.10.10.0/24
|
||||
# ============================================================
|
||||
local-zone: "10.10.10.in-addr.arpa." static
|
||||
55
ansible/templates/unbound.conf.j2
Normal file
55
ansible/templates/unbound.conf.j2
Normal file
@ -0,0 +1,55 @@
|
||||
# Unbound DNS configuration for Nebula overlay network
|
||||
# Deployed by Ansible - do not edit manually
|
||||
|
||||
server:
|
||||
# Network settings - bind to Nebula interface only
|
||||
interface: 127.0.0.1
|
||||
interface: {{ hostvars['dns']['nebula_ip'] }}
|
||||
port: 53
|
||||
do-ip4: yes
|
||||
do-ip6: no
|
||||
do-udp: yes
|
||||
do-tcp: yes
|
||||
|
||||
# Access control - Nebula network only
|
||||
access-control: 127.0.0.0/8 allow
|
||||
access-control: 10.10.10.0/24 allow
|
||||
access-control: 0.0.0.0/0 refuse
|
||||
|
||||
# Performance tuning
|
||||
num-threads: 2
|
||||
msg-cache-size: 16m
|
||||
rrset-cache-size: 32m
|
||||
cache-min-ttl: 300
|
||||
cache-max-ttl: 86400
|
||||
|
||||
# Privacy
|
||||
hide-identity: yes
|
||||
hide-version: yes
|
||||
|
||||
# Security hardening
|
||||
harden-glue: yes
|
||||
harden-dnssec-stripped: yes
|
||||
harden-referral-path: yes
|
||||
use-caps-for-id: yes
|
||||
|
||||
# Rate limiting
|
||||
ip-ratelimit: 100
|
||||
|
||||
# Logging
|
||||
verbosity: 1
|
||||
logfile: ""
|
||||
use-syslog: yes
|
||||
|
||||
# Include local zone definitions
|
||||
include: /etc/unbound/local-zones.conf
|
||||
|
||||
# Forward external queries to public DNS
|
||||
# Cloudflare primary (faster - we're behind their proxy)
|
||||
# Google fallback
|
||||
forward-zone:
|
||||
name: "."
|
||||
forward-addr: 1.1.1.1
|
||||
forward-addr: 1.0.0.1
|
||||
forward-addr: 8.8.8.8
|
||||
forward-addr: 8.8.4.4
|
||||
15
ansible/templates/unbound.service.j2
Normal file
15
ansible/templates/unbound.service.j2
Normal file
@ -0,0 +1,15 @@
|
||||
[Unit]
|
||||
Description=Unbound DNS resolver
|
||||
Documentation=man:unbound(8)
|
||||
After=network.target nebula.service
|
||||
Wants=nebula.service
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
ExecStart=/usr/bin/unbound -d -c /etc/unbound/unbound.conf
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
10
ansible/templates/valkey-acl.j2
Normal file
10
ansible/templates/valkey-acl.j2
Normal file
@ -0,0 +1,10 @@
|
||||
# Valkey ACL Configuration
|
||||
# Admin user has full access
|
||||
user admin on >{{ valkey_admin_password }} ~* &* +@all
|
||||
|
||||
# Disable default user
|
||||
user default off
|
||||
|
||||
# Service users are created dynamically by data-service.yml
|
||||
# Example format:
|
||||
# user myapp on >password ~myapp:* &* +@all
|
||||
44
ansible/templates/valkey-cluster.conf.j2
Normal file
44
ansible/templates/valkey-cluster.conf.j2
Normal file
@ -0,0 +1,44 @@
|
||||
# Valkey Cluster Configuration
|
||||
# Generated by Ansible - do not edit manually
|
||||
|
||||
# Network
|
||||
bind {{ nebula_ip }}
|
||||
port 6379
|
||||
protected-mode yes
|
||||
|
||||
# Cluster mode
|
||||
cluster-enabled yes
|
||||
cluster-config-file /var/lib/valkey/nodes.conf
|
||||
cluster-node-timeout 5000
|
||||
cluster-announce-ip {{ nebula_ip }}
|
||||
cluster-announce-port 6379
|
||||
cluster-announce-bus-port 16379
|
||||
|
||||
# General
|
||||
daemonize no
|
||||
pidfile /run/valkey/valkey.pid
|
||||
loglevel notice
|
||||
logfile ""
|
||||
|
||||
# Databases (cluster mode only uses db 0)
|
||||
databases 1
|
||||
|
||||
# Memory Management
|
||||
maxmemory {{ valkey_maxmemory }}
|
||||
maxmemory-policy {{ valkey_maxmemory_policy }}
|
||||
|
||||
# Persistence (minimal for cluster mode)
|
||||
save ""
|
||||
appendonly no
|
||||
|
||||
# Security - ACL-based authentication
|
||||
aclfile /etc/valkey/users.acl
|
||||
|
||||
# Limits
|
||||
maxclients 1000
|
||||
timeout 0
|
||||
tcp-keepalive 300
|
||||
|
||||
# Slow log
|
||||
slowlog-log-slower-than 10000
|
||||
slowlog-max-len 128
|
||||
21
ansible/templates/valkey-sentinel.conf.j2
Normal file
21
ansible/templates/valkey-sentinel.conf.j2
Normal file
@ -0,0 +1,21 @@
|
||||
# Valkey Sentinel Configuration
|
||||
# Generated by Ansible - do not edit manually
|
||||
|
||||
port 26379
|
||||
bind {{ nebula_ip }}
|
||||
|
||||
# Sentinel monitoring configuration
|
||||
sentinel monitor valkey-ha {{ hostvars['valkey-01']['nebula_ip'] }} 6379 2
|
||||
sentinel auth-pass valkey-ha {{ valkey_admin_password }}
|
||||
sentinel auth-user valkey-ha admin
|
||||
sentinel down-after-milliseconds valkey-ha 5000
|
||||
sentinel failover-timeout valkey-ha 60000
|
||||
sentinel parallel-syncs valkey-ha 1
|
||||
|
||||
# Sentinel authentication
|
||||
sentinel sentinel-user admin
|
||||
sentinel sentinel-pass {{ valkey_admin_password }}
|
||||
|
||||
# Announce IP for Nebula network
|
||||
sentinel announce-ip {{ nebula_ip }}
|
||||
sentinel announce-port 26379
|
||||
46
ansible/templates/valkey-standalone.conf.j2
Normal file
46
ansible/templates/valkey-standalone.conf.j2
Normal file
@ -0,0 +1,46 @@
|
||||
# Valkey Standalone Configuration (Master-Replica mode)
|
||||
# Generated by Ansible - do not edit manually
|
||||
|
||||
# Network
|
||||
bind {{ nebula_ip }}
|
||||
port 6379
|
||||
protected-mode yes
|
||||
|
||||
# Disable cluster mode
|
||||
cluster-enabled no
|
||||
|
||||
# General
|
||||
daemonize no
|
||||
pidfile /run/valkey/valkey.pid
|
||||
loglevel notice
|
||||
logfile ""
|
||||
|
||||
# Databases
|
||||
databases 16
|
||||
|
||||
# Memory Management
|
||||
maxmemory {{ valkey_maxmemory }}
|
||||
maxmemory-policy {{ valkey_maxmemory_policy }}
|
||||
|
||||
# Persistence (minimal for caching)
|
||||
save ""
|
||||
appendonly no
|
||||
|
||||
# Security - ACL-based authentication
|
||||
aclfile /etc/valkey/users.acl
|
||||
|
||||
# Replication (configured on replicas only)
|
||||
{% if valkey_role == 'replica' %}
|
||||
replicaof {{ hostvars['valkey-01']['nebula_ip'] }} 6379
|
||||
masterauth {{ valkey_admin_password }}
|
||||
masteruser admin
|
||||
{% endif %}
|
||||
|
||||
# Limits
|
||||
maxclients 1000
|
||||
timeout 0
|
||||
tcp-keepalive 300
|
||||
|
||||
# Slow log
|
||||
slowlog-log-slower-than 10000
|
||||
slowlog-max-len 128
|
||||
40
ansible/templates/valkey.conf.j2
Normal file
40
ansible/templates/valkey.conf.j2
Normal file
@ -0,0 +1,40 @@
|
||||
# Valkey Configuration
|
||||
# Generated by Ansible - do not edit manually
|
||||
|
||||
# Network
|
||||
bind {{ nebula_ip }}
|
||||
port 6379
|
||||
protected-mode yes
|
||||
|
||||
# General
|
||||
daemonize no
|
||||
pidfile /run/valkey/valkey.pid
|
||||
loglevel notice
|
||||
logfile ""
|
||||
|
||||
# Databases (0-15 available for multi-tenant use)
|
||||
# See services.yml for DB allocation
|
||||
databases 16
|
||||
|
||||
# Memory Management
|
||||
maxmemory {{ valkey_maxmemory }}
|
||||
maxmemory-policy {{ valkey_maxmemory_policy }}
|
||||
|
||||
# Persistence (disable for pure caching)
|
||||
# Enable if you need persistence
|
||||
save ""
|
||||
appendonly no
|
||||
|
||||
# Security - ACL-based authentication
|
||||
# Each service gets its own ACL user with scoped key prefix access
|
||||
# Users are provisioned by data-service.yml and stored in Vault
|
||||
aclfile /etc/valkey/users.acl
|
||||
|
||||
# Limits
|
||||
maxclients 1000
|
||||
timeout 0
|
||||
tcp-keepalive 300
|
||||
|
||||
# Slow log
|
||||
slowlog-log-slower-than 10000
|
||||
slowlog-max-len 128
|
||||
44
ansible/vault/README.md
Normal file
44
ansible/vault/README.md
Normal file
@ -0,0 +1,44 @@
|
||||
# Ansible Vault Secrets
|
||||
|
||||
This directory stores encrypted secrets used by playbooks.
|
||||
|
||||
## Setup
|
||||
|
||||
1. Create a password file (excluded from git):
|
||||
```bash
|
||||
echo "your-vault-password" > ansible_vault_pass
|
||||
chmod 600 ansible_vault_pass
|
||||
```
|
||||
|
||||
2. Create the secrets file:
|
||||
```bash
|
||||
ansible-vault create secrets.yml --vault-password-file ansible_vault_pass
|
||||
```
|
||||
|
||||
3. Add your secrets (example structure):
|
||||
```yaml
|
||||
# Valkey admin password (used by valkey.yml)
|
||||
valkey_admin_password: "your-strong-password"
|
||||
|
||||
# Vault admin database password (used by data-service.yml)
|
||||
vault_admin_password: "your-vault-admin-password"
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
Reference in playbooks:
|
||||
```yaml
|
||||
vars_files:
|
||||
- ../vault/secrets.yml
|
||||
```
|
||||
|
||||
Run playbooks with vault password:
|
||||
```bash
|
||||
ansible-playbook -i inventory.ini playbooks/valkey.yml --vault-password-file vault/ansible_vault_pass
|
||||
```
|
||||
|
||||
Or set the environment variable:
|
||||
```bash
|
||||
export ANSIBLE_VAULT_PASSWORD_FILE=vault/ansible_vault_pass
|
||||
ansible-playbook -i inventory.ini playbooks/valkey.yml
|
||||
```
|
||||
139
docs/architecture.md
Normal file
139
docs/architecture.md
Normal file
@ -0,0 +1,139 @@
|
||||
# Architecture
|
||||
|
||||
This document explains the design decisions behind Arvandor.
|
||||
|
||||
## Network Separation
|
||||
|
||||
### Why Two Networks?
|
||||
|
||||
```
|
||||
Internet ──► Proxmox Host ──► vmbr1 (192.168.100.0/24)
|
||||
│
|
||||
└──► Nebula (10.10.10.0/24)
|
||||
```
|
||||
|
||||
**Bridge Network (vmbr1)**
|
||||
- Used only for Terraform provisioning and Ansible access
|
||||
- VMs firewall blocks all bridge traffic except from Proxmox host
|
||||
- No inter-VM communication on this network
|
||||
|
||||
**Nebula Overlay**
|
||||
- All application traffic uses encrypted Nebula tunnels
|
||||
- Group-based firewall rules for segmentation
|
||||
- Works across any network boundary (cloud, datacenter, home)
|
||||
|
||||
### Benefits
|
||||
|
||||
1. **Defense in depth** - Compromise of bridge network doesn't expose services
|
||||
2. **Migration ready** - Move VMs anywhere, Nebula handles connectivity
|
||||
3. **Zero-trust** - VMs authenticate via certificates, not network position
|
||||
|
||||
## VMID Allocation
|
||||
|
||||
VMIDs follow a logical pattern:
|
||||
|
||||
| Range | Purpose | Example |
|
||||
|-------|---------|---------|
|
||||
| 1000-1999 | Management | DNS, Caddy |
|
||||
| 2000-2999 | Services | Vault, Gitea |
|
||||
| 3000-3999 | Data | PostgreSQL, Valkey |
|
||||
| 4000-4999 | Workloads | Applications |
|
||||
| 5000-5999 | Monitoring | Prometheus |
|
||||
|
||||
The last digits determine the IP address:
|
||||
- VMID 1001 → x.x.x.11
|
||||
- VMID 3000 → x.x.x.30
|
||||
|
||||
## High Availability
|
||||
|
||||
All data services run as 3-node clusters:
|
||||
|
||||
### PostgreSQL (Patroni + etcd)
|
||||
|
||||
```
|
||||
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
│ postgres-01 │ │ postgres-02 │ │ postgres-03 │
|
||||
│ Leader │◄─│ Replica │◄─│ Replica │
|
||||
│ + etcd │ │ + etcd │ │ + etcd │
|
||||
└─────────────┘ └─────────────┘ └─────────────┘
|
||||
```
|
||||
|
||||
- Patroni handles leader election
|
||||
- etcd provides distributed consensus
|
||||
- Automatic failover on leader failure
|
||||
|
||||
### Valkey (Sentinel)
|
||||
|
||||
```
|
||||
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
│ valkey-01 │ │ valkey-02 │ │ valkey-03 │
|
||||
│ Master │──│ Replica │ │ Replica │
|
||||
│ + Sentinel │ │ + Sentinel │ │ + Sentinel │
|
||||
└─────────────┘ └─────────────┘ └─────────────┘
|
||||
```
|
||||
|
||||
- Sentinel monitors master health
|
||||
- Automatic promotion on master failure
|
||||
- ACL-based per-service key isolation
|
||||
|
||||
### Vault (Raft)
|
||||
|
||||
```
|
||||
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
│ vault-01 │ │ vault-02 │ │ vault-03 │
|
||||
│ Leader │──│ Standby │──│ Standby │
|
||||
└─────────────┘ └─────────────┘ └─────────────┘
|
||||
```
|
||||
|
||||
- Integrated Raft storage (no external backend)
|
||||
- Automatic leader election
|
||||
- Unseal required after restart
|
||||
|
||||
## Security Model
|
||||
|
||||
### Three-Layer Firewall
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ 1. Proxmox VM Firewall → Egress control │
|
||||
│ 2. Nebula Groups → East-west segmentation │
|
||||
│ 3. Guest iptables → Defense in depth │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Nebula Groups
|
||||
|
||||
| Group | Can Access |
|
||||
|-------|------------|
|
||||
| admin | Everything |
|
||||
| infrastructure | infrastructure |
|
||||
| projects | infrastructure |
|
||||
| games | Nothing (isolated) |
|
||||
|
||||
### Vault Integration
|
||||
|
||||
Applications use Vault for:
|
||||
- Dynamic database credentials (short-lived)
|
||||
- Service secrets (API keys, etc.)
|
||||
- AppRole authentication
|
||||
|
||||
## Service Discovery
|
||||
|
||||
Internal DNS provides hostname resolution:
|
||||
|
||||
```
|
||||
<hostname>.nebula → Nebula IP
|
||||
```
|
||||
|
||||
VMs query 10.10.10.11 (DNS server) via Nebula. External queries forward to Cloudflare (1.1.1.1).
|
||||
|
||||
## Provisioning Flow
|
||||
|
||||
```
|
||||
1. terraform apply → Create VM
|
||||
2. bootstrap.yml → Update packages
|
||||
3. security.yml → Configure firewall
|
||||
4. nebula.yml → Join overlay network
|
||||
5. <service>.yml → Deploy service
|
||||
6. data-service.yml → Provision credentials
|
||||
```
|
||||
197
docs/getting-started.md
Normal file
197
docs/getting-started.md
Normal file
@ -0,0 +1,197 @@
|
||||
# Getting Started
|
||||
|
||||
This guide walks through setting up Arvandor from scratch.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
### Proxmox Host
|
||||
|
||||
- Proxmox VE 7.x or 8.x
|
||||
- Two network bridges:
|
||||
- `vmbr0` - Public interface
|
||||
- `vmbr1` - Internal VM network (192.168.100.0/24)
|
||||
- IP forwarding enabled
|
||||
|
||||
### VM Template
|
||||
|
||||
Create an Arch Linux template (VMID 9000):
|
||||
|
||||
1. Download Arch Linux ISO
|
||||
2. Create VM, install Arch with basic setup
|
||||
3. Install `openssh`, `python` (for Ansible)
|
||||
4. Enable cloud-init or configure static user
|
||||
5. Convert to template
|
||||
|
||||
### Local Tools
|
||||
|
||||
```bash
|
||||
# Terraform
|
||||
wget -O- https://apt.releases.hashicorp.com/gpg | gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg
|
||||
echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list
|
||||
sudo apt update && sudo apt install terraform
|
||||
|
||||
# Ansible
|
||||
pip install ansible
|
||||
|
||||
# Nebula
|
||||
wget https://github.com/slackhq/nebula/releases/download/v1.9.0/nebula-linux-amd64.tar.gz
|
||||
tar xzf nebula-linux-amd64.tar.gz
|
||||
sudo mv nebula nebula-cert /usr/local/bin/
|
||||
```
|
||||
|
||||
## Step 1: Configure Terraform
|
||||
|
||||
```bash
|
||||
cd terraform
|
||||
|
||||
# Copy example configuration
|
||||
cp terraform.tfvars.example terraform.tfvars
|
||||
|
||||
# Edit with your values
|
||||
vim terraform.tfvars
|
||||
```
|
||||
|
||||
Required variables:
|
||||
- `proxmox_endpoint` - Your Proxmox API URL
|
||||
- `proxmox_api_token_id` - API token ID
|
||||
- `proxmox_api_token_secret` - API token secret
|
||||
- `proxmox_node` - Node name (e.g., "pve")
|
||||
- `username` - Default VM username
|
||||
- `password` - Default VM password
|
||||
- `ssh_key_path` - Path to your SSH public key
|
||||
|
||||
## Step 2: Create Proxmox API Token
|
||||
|
||||
In Proxmox:
|
||||
|
||||
1. Datacenter → Permissions → API Tokens
|
||||
2. Add token for a user with `PVEAdmin` or `Administrator` role
|
||||
3. Copy the token ID and secret
|
||||
|
||||
## Step 3: Generate Nebula CA
|
||||
|
||||
```bash
|
||||
cd nebula
|
||||
|
||||
# Generate Certificate Authority
|
||||
nebula-cert ca -name "Arvandor CA" -duration 87600h
|
||||
|
||||
# This creates:
|
||||
# - ca.crt (share with all hosts)
|
||||
# - ca.key (keep secure!)
|
||||
```
|
||||
|
||||
## Step 4: Provision VMs
|
||||
|
||||
```bash
|
||||
cd terraform
|
||||
|
||||
terraform init
|
||||
terraform plan
|
||||
terraform apply
|
||||
```
|
||||
|
||||
This creates all VMs defined in the .tf files.
|
||||
|
||||
## Step 5: Generate Nebula Certificates
|
||||
|
||||
For each VM, generate a certificate:
|
||||
|
||||
```bash
|
||||
cd nebula
|
||||
|
||||
# DNS server
|
||||
nebula-cert sign -ca-crt ca.crt -ca-key ca.key \
|
||||
-name "dns" -networks "10.10.10.11/24" -groups "infrastructure" \
|
||||
-out-crt configs/1001/dns/dns.crt \
|
||||
-out-key configs/1001/dns/dns.key
|
||||
|
||||
# Repeat for all VMs...
|
||||
```
|
||||
|
||||
## Step 6: Configure Ansible Inventory
|
||||
|
||||
```bash
|
||||
cd ansible
|
||||
|
||||
cp inventory.ini.example inventory.ini
|
||||
vim inventory.ini
|
||||
```
|
||||
|
||||
Update:
|
||||
- VM hostnames and IPs
|
||||
- SSH jump host configuration
|
||||
- Infrastructure variables
|
||||
|
||||
## Step 7: Bootstrap VMs
|
||||
|
||||
Run playbooks in order:
|
||||
|
||||
```bash
|
||||
# Update packages, reboot if kernel changed
|
||||
ansible-playbook -i inventory.ini playbooks/bootstrap.yml
|
||||
|
||||
# Configure iptables and fail2ban
|
||||
ansible-playbook -i inventory.ini playbooks/security.yml
|
||||
|
||||
# Join Nebula overlay network
|
||||
ansible-playbook -i inventory.ini playbooks/nebula.yml
|
||||
```
|
||||
|
||||
## Step 8: Deploy Core Services
|
||||
|
||||
```bash
|
||||
# DNS (required for hostname resolution)
|
||||
ansible-playbook -i inventory.ini playbooks/dns.yml
|
||||
ansible-playbook -i inventory.ini playbooks/dns-client.yml
|
||||
|
||||
# PostgreSQL HA cluster
|
||||
ansible-playbook -i inventory.ini playbooks/postgres-ha.yml
|
||||
|
||||
# Valkey with Sentinel
|
||||
ansible-playbook -i inventory.ini playbooks/valkey-sentinel.yml
|
||||
|
||||
# Garage S3 storage
|
||||
ansible-playbook -i inventory.ini playbooks/garage.yml
|
||||
```
|
||||
|
||||
## Step 9: Configure Host Port Forwarding
|
||||
|
||||
On the Proxmox host:
|
||||
|
||||
```bash
|
||||
# Copy and configure the script
|
||||
cp network/port-forward.sh.example /root/network/port-forward.sh
|
||||
chmod +x /root/network/port-forward.sh
|
||||
vim /root/network/port-forward.sh
|
||||
|
||||
# Test
|
||||
./port-forward.sh --dry-run
|
||||
|
||||
# Apply
|
||||
./port-forward.sh
|
||||
```
|
||||
|
||||
## Verification
|
||||
|
||||
Test connectivity:
|
||||
|
||||
```bash
|
||||
# SSH to VM via Nebula
|
||||
ssh admin@10.10.10.11
|
||||
|
||||
# Test DNS resolution
|
||||
dig @10.10.10.11 vault-01.nebula
|
||||
|
||||
# Test PostgreSQL
|
||||
psql -h 10.10.10.30 -U postgres -c "SELECT 1"
|
||||
|
||||
# Test Valkey
|
||||
valkey-cli -h 10.10.10.33 PING
|
||||
```
|
||||
|
||||
## Next Steps
|
||||
|
||||
- Add your application VMs to `terraform/workloads.tf`
|
||||
- Create services in `ansible/services.yml`
|
||||
- Provision app credentials with `data-service.yml`
|
||||
199
docs/provisioning-guide.md
Normal file
199
docs/provisioning-guide.md
Normal file
@ -0,0 +1,199 @@
|
||||
# Provisioning Guide
|
||||
|
||||
Step-by-step guide for adding new VMs to the infrastructure.
|
||||
|
||||
## Adding a New VM
|
||||
|
||||
### 1. Choose VMID and IP
|
||||
|
||||
Select a VMID based on the VM's purpose:
|
||||
|
||||
| Purpose | VMID Range | Example |
|
||||
|---------|------------|---------|
|
||||
| Management | 1000-1999 | 1003 |
|
||||
| Services | 2000-2999 | 2004 |
|
||||
| Data | 3000-3999 | 3012 |
|
||||
| Workloads | 4000-4999 | 4056 |
|
||||
| Monitoring | 5000-5999 | 5001 |
|
||||
|
||||
IP is derived from VMID:
|
||||
- Bridge: 192.168.100.XX
|
||||
- Nebula: 10.10.10.XX
|
||||
|
||||
Where XX is the last 2 digits of VMID.
|
||||
|
||||
### 2. Add to Terraform
|
||||
|
||||
Edit the appropriate .tf file:
|
||||
|
||||
```hcl
|
||||
module "myapp" {
|
||||
source = "./modules/vm"
|
||||
name = "myapp"
|
||||
vmid = 4056
|
||||
node_name = var.proxmox_node
|
||||
bridge_ip = "192.168.100.56"
|
||||
gateway = var.gateway
|
||||
datastore_id = var.datastore_id
|
||||
clone_vmid = var.template_vmid
|
||||
cores = 2
|
||||
memory = 4096
|
||||
disk_size = 50
|
||||
username = var.username
|
||||
password = var.password
|
||||
ssh_key_path = var.ssh_key_path
|
||||
}
|
||||
```
|
||||
|
||||
Apply:
|
||||
|
||||
```bash
|
||||
cd terraform
|
||||
terraform plan
|
||||
terraform apply
|
||||
```
|
||||
|
||||
### 3. Generate Nebula Certificate
|
||||
|
||||
```bash
|
||||
cd nebula
|
||||
|
||||
nebula-cert sign -ca-crt ca.crt -ca-key ca.key \
|
||||
-name "myapp" \
|
||||
-networks "10.10.10.56/24" \
|
||||
-groups "projects" \
|
||||
-out-crt configs/4056/myapp/myapp.crt \
|
||||
-out-key configs/4056/myapp/myapp.key
|
||||
```
|
||||
|
||||
Choose the appropriate group:
|
||||
- `infrastructure` - Core services
|
||||
- `projects` - Applications needing infrastructure access
|
||||
- `games` - Isolated workloads
|
||||
|
||||
### 4. Add to Ansible Inventory
|
||||
|
||||
Edit `ansible/inventory.ini`:
|
||||
|
||||
```ini
|
||||
[projects]
|
||||
myapp ansible_host=192.168.100.56 nebula_ip=10.10.10.56 vmid=4056
|
||||
|
||||
[docker]
|
||||
myapp
|
||||
```
|
||||
|
||||
### 5. Run Bootstrap Playbooks
|
||||
|
||||
```bash
|
||||
cd ansible
|
||||
|
||||
# Update packages
|
||||
ansible-playbook -i inventory.ini playbooks/bootstrap.yml --limit "myapp"
|
||||
|
||||
# Configure firewall
|
||||
ansible-playbook -i inventory.ini playbooks/security.yml --limit "myapp"
|
||||
|
||||
# Join Nebula
|
||||
ansible-playbook -i inventory.ini playbooks/nebula.yml --limit "myapp"
|
||||
|
||||
# Configure DNS client
|
||||
ansible-playbook -i inventory.ini playbooks/dns-client.yml --limit "myapp"
|
||||
|
||||
# Install Docker (if needed)
|
||||
ansible-playbook -i inventory.ini playbooks/docker.yml --limit "myapp"
|
||||
```
|
||||
|
||||
### 6. Update DNS (Optional)
|
||||
|
||||
If you want a `.nebula` hostname, re-run the DNS playbook:
|
||||
|
||||
```bash
|
||||
ansible-playbook -i inventory.ini playbooks/dns.yml
|
||||
```
|
||||
|
||||
### 7. Verify
|
||||
|
||||
```bash
|
||||
# Test SSH via Nebula
|
||||
ssh admin@10.10.10.56
|
||||
|
||||
# Test hostname resolution
|
||||
dig @10.10.10.11 myapp.nebula
|
||||
```
|
||||
|
||||
## Adding a Service with Database
|
||||
|
||||
### 1. Define in services.yml
|
||||
|
||||
```yaml
|
||||
services:
|
||||
myapp:
|
||||
description: "My Application"
|
||||
host: myapp
|
||||
deploy_path: /opt/myapp
|
||||
postgres:
|
||||
enabled: true
|
||||
valkey:
|
||||
enabled: true
|
||||
key_prefix: "myapp"
|
||||
s3:
|
||||
enabled: true
|
||||
bucket: "myapp-media"
|
||||
vault_roles:
|
||||
- app
|
||||
- migrate
|
||||
```
|
||||
|
||||
### 2. Provision Data Services
|
||||
|
||||
```bash
|
||||
ansible-playbook -i inventory.ini playbooks/data-service.yml -e "service=myapp"
|
||||
```
|
||||
|
||||
This creates:
|
||||
- PostgreSQL database with static roles
|
||||
- Valkey ACL user with key prefix
|
||||
- Garage S3 bucket with API key
|
||||
- Vault database engine roles
|
||||
|
||||
### 3. Retrieve Credentials
|
||||
|
||||
```bash
|
||||
# Database credentials (dynamic)
|
||||
vault read database/creds/myapp-app
|
||||
vault read database/creds/myapp-migrate
|
||||
|
||||
# Valkey credentials (static, stored in Vault)
|
||||
vault kv get secret/myapp/valkey
|
||||
|
||||
# S3 credentials (static, stored in Vault)
|
||||
vault kv get secret/myapp/s3
|
||||
```
|
||||
|
||||
## Removing a VM
|
||||
|
||||
### 1. Remove from Terraform
|
||||
|
||||
Comment out or delete the module from .tf file, then:
|
||||
|
||||
```bash
|
||||
terraform plan
|
||||
terraform apply
|
||||
```
|
||||
|
||||
### 2. Remove from Inventory
|
||||
|
||||
Edit `ansible/inventory.ini` and remove the host.
|
||||
|
||||
### 3. Clean up Certificates
|
||||
|
||||
```bash
|
||||
rm -rf nebula/configs/<vmid>/
|
||||
```
|
||||
|
||||
### 4. Update DNS
|
||||
|
||||
```bash
|
||||
ansible-playbook -i inventory.ini playbooks/dns.yml
|
||||
```
|
||||
8
nebula/.gitignore
vendored
Normal file
8
nebula/.gitignore
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
# CA private key - NEVER COMMIT
|
||||
ca.key
|
||||
|
||||
# All private keys
|
||||
*.key
|
||||
|
||||
# Host certificates in configs/
|
||||
configs/*/*/*.key
|
||||
164
nebula/README.md
Normal file
164
nebula/README.md
Normal file
@ -0,0 +1,164 @@
|
||||
# Nebula Overlay Network
|
||||
|
||||
Nebula is a scalable overlay network that provides encrypted connectivity between all VMs regardless of their physical location.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────┐
|
||||
│ Lighthouse │
|
||||
│ 10.10.10.10 │
|
||||
└──────┬──────┘
|
||||
│
|
||||
┌─────────────────┼─────────────────┐
|
||||
│ │ │
|
||||
┌────▼────┐ ┌────▼────┐ ┌────▼────┐
|
||||
│ VM 1 │ │ VM 2 │ │ VM 3 │
|
||||
│ 10.10.10.11 │ 10.10.10.20 │ 10.10.10.30
|
||||
└─────────┘ └─────────┘ └─────────┘
|
||||
```
|
||||
|
||||
## Groups
|
||||
|
||||
Nebula uses groups for firewall segmentation:
|
||||
|
||||
| Group | Purpose | Can Access |
|
||||
|-------|---------|------------|
|
||||
| `admin` | Personal devices | Everything |
|
||||
| `infrastructure` | Core services | Each other |
|
||||
| `projects` | Application workloads | Infrastructure |
|
||||
| `lighthouse` | Nebula relays | - |
|
||||
| `games` | Game servers | Isolated |
|
||||
|
||||
## Setup
|
||||
|
||||
### 1. Generate Certificate Authority
|
||||
|
||||
```bash
|
||||
nebula-cert ca -name "Arvandor CA" -duration 87600h
|
||||
```
|
||||
|
||||
This creates:
|
||||
- `ca.crt` - Certificate (share with all hosts)
|
||||
- `ca.key` - Private key (keep secure, do not commit!)
|
||||
|
||||
### 2. Generate Host Certificates
|
||||
|
||||
```bash
|
||||
# Infrastructure VM example
|
||||
nebula-cert sign -ca-crt ca.crt -ca-key ca.key \
|
||||
-name "dns" \
|
||||
-networks "10.10.10.11/24" \
|
||||
-groups "infrastructure" \
|
||||
-out-crt configs/1001/dns/dns.crt \
|
||||
-out-key configs/1001/dns/dns.key
|
||||
|
||||
# Application VM example
|
||||
nebula-cert sign -ca-crt ca.crt -ca-key ca.key \
|
||||
-name "app-server" \
|
||||
-networks "10.10.10.50/24" \
|
||||
-groups "projects" \
|
||||
-out-crt configs/4050/app-server/app-server.crt \
|
||||
-out-key configs/4050/app-server/app-server.key
|
||||
|
||||
# Lighthouse
|
||||
nebula-cert sign -ca-crt ca.crt -ca-key ca.key \
|
||||
-name "lighthouse" \
|
||||
-networks "10.10.10.10/24" \
|
||||
-groups "infrastructure,lighthouse" \
|
||||
-out-crt configs/1000/lighthouse/lighthouse.crt \
|
||||
-out-key configs/1000/lighthouse/lighthouse.key
|
||||
```
|
||||
|
||||
### 3. Directory Structure
|
||||
|
||||
```
|
||||
nebula/
|
||||
├── ca.crt # Certificate authority (commit this)
|
||||
├── ca.key # CA private key (DO NOT COMMIT)
|
||||
├── configs/
|
||||
│ ├── 1000/lighthouse/
|
||||
│ │ ├── lighthouse.crt
|
||||
│ │ └── lighthouse.key
|
||||
│ ├── 1001/dns/
|
||||
│ │ ├── dns.crt
|
||||
│ │ └── dns.key
|
||||
│ └── ...
|
||||
└── README.md
|
||||
```
|
||||
|
||||
### 4. Deploy with Ansible
|
||||
|
||||
The `nebula.yml` playbook deploys certificates and configuration:
|
||||
|
||||
```bash
|
||||
ansible-playbook -i inventory.ini playbooks/nebula.yml --limit "new-vm"
|
||||
```
|
||||
|
||||
## Lighthouse Configuration
|
||||
|
||||
The lighthouse requires manual configuration (not managed by Ansible):
|
||||
|
||||
```yaml
|
||||
# /etc/nebula/config.yml on lighthouse
|
||||
pki:
|
||||
ca: /etc/nebula/ca.crt
|
||||
cert: /etc/nebula/config.crt
|
||||
key: /etc/nebula/config.key
|
||||
|
||||
static_host_map: {}
|
||||
|
||||
lighthouse:
|
||||
am_lighthouse: true
|
||||
serve_dns: false
|
||||
|
||||
listen:
|
||||
host: 0.0.0.0
|
||||
port: 4242
|
||||
|
||||
punchy:
|
||||
punch: true
|
||||
respond: true
|
||||
|
||||
relay:
|
||||
am_relay: true
|
||||
|
||||
tun:
|
||||
dev: nebula1
|
||||
drop_local_broadcast: true
|
||||
drop_multicast: true
|
||||
|
||||
firewall:
|
||||
conntrack:
|
||||
tcp_timeout: 12h
|
||||
udp_timeout: 3m
|
||||
default_timeout: 10m
|
||||
|
||||
outbound:
|
||||
- port: any
|
||||
proto: any
|
||||
host: any
|
||||
|
||||
inbound:
|
||||
- port: any
|
||||
proto: any
|
||||
group: admin
|
||||
- port: any
|
||||
proto: any
|
||||
group: infrastructure
|
||||
- port: any
|
||||
proto: icmp
|
||||
host: any
|
||||
```
|
||||
|
||||
## IP Allocation
|
||||
|
||||
| VMID Range | Network Segment | Last Octet |
|
||||
|------------|-----------------|------------|
|
||||
| 1000-1999 | Management | 10-19 |
|
||||
| 2000-2999 | Services | 20-29 |
|
||||
| 3000-3999 | Data | 30-49 |
|
||||
| 4000-4999 | Workloads | 50-59 |
|
||||
| 5000-5999 | Monitoring | 90-99 |
|
||||
|
||||
Example: VMID 3000 → 10.10.10.30
|
||||
61
network/ip-schema.example
Normal file
61
network/ip-schema.example
Normal file
@ -0,0 +1,61 @@
|
||||
# Arvandor IP Schema
|
||||
#
|
||||
# This documents the IP addressing scheme for the infrastructure.
|
||||
|
||||
## Networks
|
||||
|
||||
| Network | CIDR | Purpose |
|
||||
|---------|------|---------|
|
||||
| Public | 203.0.113.10 | External access (vmbr0) |
|
||||
| Bridge | 192.168.100.0/24 | VM provisioning network (vmbr1) |
|
||||
| Nebula | 10.10.10.0/24 | Encrypted overlay network |
|
||||
|
||||
## VMID Ranges
|
||||
|
||||
| Range | Domain | Nebula Group | Purpose |
|
||||
|-------|--------|--------------|---------|
|
||||
| 1000-1999 | Management | infrastructure | DNS, Caddy, Lighthouse |
|
||||
| 2000-2999 | Services | infrastructure | Vault, Gitea |
|
||||
| 3000-3999 | Data | infrastructure | PostgreSQL, Valkey, Garage |
|
||||
| 4000-4999 | Workloads | projects/games | Applications, game servers |
|
||||
| 5000-5999 | Monitoring | infrastructure | Prometheus, Grafana, Loki |
|
||||
|
||||
## IP Pattern
|
||||
|
||||
VMID determines IP address. Last 2-3 digits become the last octet:
|
||||
- VMID 1001 → 192.168.100.11 / 10.10.10.11
|
||||
- VMID 2000 → 192.168.100.20 / 10.10.10.20
|
||||
- VMID 3009 → 192.168.100.39 / 10.10.10.39
|
||||
|
||||
## Reserved Addresses
|
||||
|
||||
| IP | Host | Purpose |
|
||||
|----|------|---------|
|
||||
| 192.168.100.1 | Proxmox host | Gateway, Ansible jump host |
|
||||
| 10.10.10.1 | Proxmox host | Nebula endpoint for management |
|
||||
| 10.10.10.10 | Lighthouse | Nebula discovery/relay |
|
||||
| 10.10.10.11 | DNS | Internal DNS server |
|
||||
| 10.10.10.12 | Caddy | Reverse proxy |
|
||||
| 10.10.10.20-22 | Vault cluster | Secrets management |
|
||||
| 10.10.10.30-32 | PostgreSQL | Database cluster |
|
||||
| 10.10.10.33-35 | Valkey | Cache/queue cluster |
|
||||
| 10.10.10.39-41 | Garage | S3 storage cluster |
|
||||
|
||||
## Example VM Allocation
|
||||
|
||||
```
|
||||
VMID 1001 - dns
|
||||
Bridge: 192.168.100.11
|
||||
Nebula: 10.10.10.11
|
||||
Group: infrastructure
|
||||
|
||||
VMID 2000 - vault-01
|
||||
Bridge: 192.168.100.20
|
||||
Nebula: 10.10.10.20
|
||||
Group: infrastructure
|
||||
|
||||
VMID 4050 - app-server
|
||||
Bridge: 192.168.100.50
|
||||
Nebula: 10.10.10.50
|
||||
Group: projects
|
||||
```
|
||||
230
network/port-forward.sh.example
Normal file
230
network/port-forward.sh.example
Normal file
@ -0,0 +1,230 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
# =============================================================================
|
||||
# Arvandor Port Forwarding Script
|
||||
# =============================================================================
|
||||
# Configures NAT (DNAT/SNAT) and FORWARD rules for Proxmox host.
|
||||
# Uses a custom chain (ARVANDOR-FORWARD) to avoid conflicts with PVE firewall.
|
||||
#
|
||||
# Usage:
|
||||
# ./port-forward.sh # Apply rules
|
||||
# ./port-forward.sh --dry-run # Show what would be done
|
||||
# ./port-forward.sh --restore # Restore backup
|
||||
# ./port-forward.sh --status # Show current rules
|
||||
# =============================================================================
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Configuration - UPDATE THESE FOR YOUR ENVIRONMENT
|
||||
# -----------------------------------------------------------------------------
|
||||
NETWORK_INTERFACE="vmbr0"
|
||||
INTERNAL_NETWORK="192.168.100.0/24"
|
||||
PUBLIC_IP="203.0.113.10" # Your public IP
|
||||
CUSTOM_CHAIN="ARVANDOR-FORWARD"
|
||||
BACKUP_FILE="/root/network/iptables.backup"
|
||||
|
||||
# Nebula Lighthouse
|
||||
NEBULA_IP="192.168.100.10"
|
||||
NEBULA_PORT="4242"
|
||||
|
||||
# Caddy (Reverse Proxy)
|
||||
CADDY_IP="192.168.100.12"
|
||||
CADDY_HTTP_PORT="80"
|
||||
CADDY_HTTPS_PORT="443"
|
||||
|
||||
# Gitea (Optional)
|
||||
GITEA_IP="192.168.100.23"
|
||||
GITEA_SSH_PORT="2222"
|
||||
|
||||
# Security - restrict SSH to specific IP
|
||||
ALLOWED_SSH_IP="203.0.113.20" # Your home IP
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Functions
|
||||
# -----------------------------------------------------------------------------
|
||||
log() {
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
|
||||
}
|
||||
|
||||
error() {
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: $*" >&2
|
||||
}
|
||||
|
||||
die() {
|
||||
error "$*"
|
||||
exit 1
|
||||
}
|
||||
|
||||
check_root() {
|
||||
[[ $EUID -eq 0 ]] || die "This script must be run as root"
|
||||
}
|
||||
|
||||
check_interface() {
|
||||
local iface=$1
|
||||
ip link show "$iface" &>/dev/null || die "Interface $iface does not exist"
|
||||
}
|
||||
|
||||
backup_rules() {
|
||||
log "Backing up current iptables rules to $BACKUP_FILE"
|
||||
mkdir -p "$(dirname "$BACKUP_FILE")"
|
||||
iptables-save > "$BACKUP_FILE"
|
||||
}
|
||||
|
||||
restore_rules() {
|
||||
[[ -f "$BACKUP_FILE" ]] || die "Backup file $BACKUP_FILE not found"
|
||||
log "Restoring iptables rules from $BACKUP_FILE"
|
||||
iptables-restore < "$BACKUP_FILE"
|
||||
log "Rules restored successfully"
|
||||
}
|
||||
|
||||
setup_custom_chain() {
|
||||
# Create custom chain if it doesn't exist
|
||||
if ! iptables -L "$CUSTOM_CHAIN" -n &>/dev/null; then
|
||||
log "Creating custom chain: $CUSTOM_CHAIN"
|
||||
iptables -N "$CUSTOM_CHAIN"
|
||||
fi
|
||||
|
||||
# Ensure chain is jumped to from FORWARD (only once)
|
||||
if ! iptables -C FORWARD -j "$CUSTOM_CHAIN" &>/dev/null; then
|
||||
log "Inserting jump to $CUSTOM_CHAIN in FORWARD chain"
|
||||
iptables -I FORWARD 1 -j "$CUSTOM_CHAIN"
|
||||
fi
|
||||
|
||||
# Flush the custom chain
|
||||
log "Flushing custom chain: $CUSTOM_CHAIN"
|
||||
iptables -F "$CUSTOM_CHAIN"
|
||||
}
|
||||
|
||||
apply_rules() {
|
||||
local dry_run=${1:-false}
|
||||
|
||||
if [[ "$dry_run" == "true" ]]; then
|
||||
log "=== DRY RUN MODE - No changes will be made ==="
|
||||
echo ""
|
||||
echo "Would apply the following rules:"
|
||||
echo ""
|
||||
echo "NAT PREROUTING (DNAT):"
|
||||
echo " - UDP $NEBULA_PORT → $NEBULA_IP:$NEBULA_PORT (Nebula)"
|
||||
echo " - TCP $CADDY_HTTP_PORT → $CADDY_IP:$CADDY_HTTP_PORT (HTTP)"
|
||||
echo " - TCP $CADDY_HTTPS_PORT → $CADDY_IP:$CADDY_HTTPS_PORT (HTTPS)"
|
||||
echo " - TCP $GITEA_SSH_PORT → $GITEA_IP:$GITEA_SSH_PORT (Gitea SSH)"
|
||||
echo ""
|
||||
echo "FORWARD chain ($CUSTOM_CHAIN):"
|
||||
echo " - Allow traffic to all above destinations"
|
||||
echo ""
|
||||
echo "INPUT:"
|
||||
echo " - Allow Nebula (nebula1 interface)"
|
||||
echo " - Allow SSH from $ALLOWED_SSH_IP"
|
||||
echo " - Drop SSH from all others"
|
||||
echo " - Block Proxmox UI from $NETWORK_INTERFACE"
|
||||
return
|
||||
fi
|
||||
|
||||
# --- NAT Rules ---
|
||||
log "Flushing NAT rules..."
|
||||
iptables -t nat -F PREROUTING
|
||||
iptables -t nat -F POSTROUTING
|
||||
|
||||
log "Setting up NAT masquerading..."
|
||||
iptables -t nat -A POSTROUTING -s "$INTERNAL_NETWORK" -o "$NETWORK_INTERFACE" -j MASQUERADE
|
||||
|
||||
log "Setting up hairpin NAT for Nebula..."
|
||||
iptables -t nat -A PREROUTING -s "$INTERNAL_NETWORK" -d "$PUBLIC_IP" -p udp --dport "$NEBULA_PORT" -j DNAT --to-destination "$NEBULA_IP:$NEBULA_PORT"
|
||||
iptables -t nat -A POSTROUTING -s "$INTERNAL_NETWORK" -d "$NEBULA_IP" -p udp --dport "$NEBULA_PORT" -j SNAT --to-source "$PUBLIC_IP"
|
||||
|
||||
log "Setting up hairpin NAT for Gitea SSH..."
|
||||
iptables -t nat -A PREROUTING -s "$INTERNAL_NETWORK" -d "$PUBLIC_IP" -p tcp --dport "$GITEA_SSH_PORT" -j DNAT --to-destination "$GITEA_IP:$GITEA_SSH_PORT"
|
||||
iptables -t nat -A POSTROUTING -s "$INTERNAL_NETWORK" -d "$GITEA_IP" -p tcp --dport "$GITEA_SSH_PORT" -j SNAT --to-source "$PUBLIC_IP"
|
||||
|
||||
log "Setting up DNAT rules..."
|
||||
# Nebula
|
||||
iptables -t nat -A PREROUTING -i "$NETWORK_INTERFACE" -p udp --dport "$NEBULA_PORT" -j DNAT --to-destination "$NEBULA_IP:$NEBULA_PORT"
|
||||
# Caddy
|
||||
iptables -t nat -A PREROUTING -i "$NETWORK_INTERFACE" -p tcp --dport "$CADDY_HTTP_PORT" -j DNAT --to-destination "$CADDY_IP:$CADDY_HTTP_PORT"
|
||||
iptables -t nat -A PREROUTING -i "$NETWORK_INTERFACE" -p tcp --dport "$CADDY_HTTPS_PORT" -j DNAT --to-destination "$CADDY_IP:$CADDY_HTTPS_PORT"
|
||||
# Gitea SSH
|
||||
iptables -t nat -A PREROUTING -i "$NETWORK_INTERFACE" -p tcp --dport "$GITEA_SSH_PORT" -j DNAT --to-destination "$GITEA_IP:$GITEA_SSH_PORT"
|
||||
|
||||
# --- FORWARD Rules (custom chain) ---
|
||||
setup_custom_chain
|
||||
|
||||
log "Adding FORWARD rules to $CUSTOM_CHAIN..."
|
||||
iptables -A "$CUSTOM_CHAIN" -d "$CADDY_IP" -p tcp --dport "$CADDY_HTTP_PORT" -j ACCEPT
|
||||
iptables -A "$CUSTOM_CHAIN" -d "$CADDY_IP" -p tcp --dport "$CADDY_HTTPS_PORT" -j ACCEPT
|
||||
iptables -A "$CUSTOM_CHAIN" -d "$NEBULA_IP" -p udp --dport "$NEBULA_PORT" -j ACCEPT
|
||||
iptables -A "$CUSTOM_CHAIN" -d "$GITEA_IP" -p tcp --dport "$GITEA_SSH_PORT" -j ACCEPT
|
||||
|
||||
# --- INPUT Rules ---
|
||||
log "Flushing INPUT rules..."
|
||||
iptables -F INPUT
|
||||
|
||||
log "Setting up INPUT rules..."
|
||||
iptables -A INPUT -i nebula1 -j ACCEPT
|
||||
iptables -A INPUT -p tcp --dport 22 -s "$ALLOWED_SSH_IP" -j ACCEPT
|
||||
iptables -A INPUT -p tcp --dport 22 -j DROP
|
||||
iptables -I INPUT -i "$NETWORK_INTERFACE" -p tcp --dport 8006 -j DROP
|
||||
iptables -I INPUT -i vmbr1 -p tcp --dport 8006 -j ACCEPT
|
||||
}
|
||||
|
||||
save_rules() {
|
||||
log "Saving iptables rules persistently..."
|
||||
if command -v netfilter-persistent &>/dev/null; then
|
||||
netfilter-persistent save
|
||||
log "Rules saved via netfilter-persistent"
|
||||
else
|
||||
die "netfilter-persistent not found. Install with: apt install iptables-persistent"
|
||||
fi
|
||||
}
|
||||
|
||||
show_status() {
|
||||
echo ""
|
||||
echo "=== Port Forwarding Status ==="
|
||||
echo ""
|
||||
echo "NAT PREROUTING rules:"
|
||||
iptables -t nat -L PREROUTING -n --line-numbers 2>/dev/null | head -20
|
||||
echo ""
|
||||
echo "FORWARD chain ($CUSTOM_CHAIN):"
|
||||
iptables -L "$CUSTOM_CHAIN" -n --line-numbers 2>/dev/null || echo "Chain not found"
|
||||
echo ""
|
||||
echo "=== Services ==="
|
||||
echo " HTTP/HTTPS: 80,443 → Caddy ($CADDY_IP)"
|
||||
echo " Nebula: $NEBULA_PORT → Lighthouse ($NEBULA_IP)"
|
||||
echo " Gitea SSH: $GITEA_SSH_PORT → $GITEA_IP"
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Main
|
||||
# -----------------------------------------------------------------------------
|
||||
main() {
|
||||
local action="${1:-apply}"
|
||||
|
||||
case "$action" in
|
||||
--dry-run|-n)
|
||||
check_root
|
||||
check_interface "$NETWORK_INTERFACE"
|
||||
apply_rules true
|
||||
;;
|
||||
--restore|-r)
|
||||
check_root
|
||||
restore_rules
|
||||
;;
|
||||
--status|-s)
|
||||
show_status
|
||||
;;
|
||||
apply|"")
|
||||
check_root
|
||||
check_interface "$NETWORK_INTERFACE"
|
||||
backup_rules
|
||||
apply_rules false
|
||||
save_rules
|
||||
log "Setup complete!"
|
||||
show_status
|
||||
;;
|
||||
*)
|
||||
echo "Usage: $0 [--dry-run|--restore|--status]"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
15
terraform/.gitignore
vendored
Normal file
15
terraform/.gitignore
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
# Terraform
|
||||
.terraform/
|
||||
*.tfstate
|
||||
*.tfstate.*
|
||||
*.tfvars
|
||||
!*.tfvars.example
|
||||
*.tfvars.json
|
||||
crash.log
|
||||
crash.*.log
|
||||
override.tf
|
||||
override.tf.json
|
||||
*_override.tf
|
||||
*_override.tf.json
|
||||
.terraformrc
|
||||
terraform.rc
|
||||
184
terraform/data.tf
Normal file
184
terraform/data.tf
Normal file
@ -0,0 +1,184 @@
|
||||
# Data Tier (3000-3999)
|
||||
#
|
||||
# Enterprise HA data services with automatic failover.
|
||||
# All VMs communicate via Nebula overlay (10.10.10.x) for migration-ready architecture.
|
||||
#
|
||||
# PostgreSQL Cluster (Patroni + etcd):
|
||||
# 3000 postgres-01 10.10.10.30 - Patroni node (primary/replica elected dynamically)
|
||||
# 3001 postgres-02 10.10.10.31 - Patroni node
|
||||
# 3002 postgres-03 10.10.10.32 - Patroni node
|
||||
#
|
||||
# Valkey Sentinel (1 master + 2 replicas, Sentinel on each):
|
||||
# 3003 valkey-01 10.10.10.33 - Master + Sentinel
|
||||
# 3004 valkey-02 10.10.10.34 - Replica + Sentinel
|
||||
# 3005 valkey-03 10.10.10.35 - Replica + Sentinel
|
||||
#
|
||||
# Garage S3 Cluster:
|
||||
# 3009 garage-01 10.10.10.39 - S3-compatible storage node
|
||||
# 3010 garage-02 10.10.10.40 - S3-compatible storage node
|
||||
# 3011 garage-03 10.10.10.41 - S3-compatible storage node
|
||||
|
||||
# =============================================================================
|
||||
# PostgreSQL HA Cluster (3 nodes)
|
||||
# =============================================================================
|
||||
|
||||
module "postgres-01" {
|
||||
source = "./modules/vm"
|
||||
name = "postgres-01"
|
||||
vmid = 3000
|
||||
node_name = var.proxmox_node
|
||||
bridge_ip = "192.168.100.30"
|
||||
gateway = var.gateway
|
||||
datastore_id = var.datastore_id
|
||||
clone_vmid = var.template_vmid
|
||||
cores = 2
|
||||
memory = 4096
|
||||
disk_size = 100
|
||||
username = var.username
|
||||
password = var.password
|
||||
ssh_key_path = var.ssh_key_path
|
||||
}
|
||||
|
||||
module "postgres-02" {
|
||||
source = "./modules/vm"
|
||||
name = "postgres-02"
|
||||
vmid = 3001
|
||||
node_name = var.proxmox_node
|
||||
bridge_ip = "192.168.100.31"
|
||||
gateway = var.gateway
|
||||
datastore_id = var.datastore_id
|
||||
clone_vmid = var.template_vmid
|
||||
cores = 2
|
||||
memory = 4096
|
||||
disk_size = 100
|
||||
username = var.username
|
||||
password = var.password
|
||||
ssh_key_path = var.ssh_key_path
|
||||
}
|
||||
|
||||
module "postgres-03" {
|
||||
source = "./modules/vm"
|
||||
name = "postgres-03"
|
||||
vmid = 3002
|
||||
node_name = var.proxmox_node
|
||||
bridge_ip = "192.168.100.32"
|
||||
gateway = var.gateway
|
||||
datastore_id = var.datastore_id
|
||||
clone_vmid = var.template_vmid
|
||||
cores = 2
|
||||
memory = 4096
|
||||
disk_size = 100
|
||||
username = var.username
|
||||
password = var.password
|
||||
ssh_key_path = var.ssh_key_path
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Valkey Sentinel (3 nodes: 1 master + 2 replicas + Sentinel on each)
|
||||
# =============================================================================
|
||||
|
||||
module "valkey-01" {
|
||||
source = "./modules/vm"
|
||||
name = "valkey-01"
|
||||
vmid = 3003
|
||||
node_name = var.proxmox_node
|
||||
bridge_ip = "192.168.100.33"
|
||||
gateway = var.gateway
|
||||
datastore_id = var.datastore_id
|
||||
clone_vmid = var.template_vmid
|
||||
cores = 2
|
||||
memory = 2048
|
||||
disk_size = 50
|
||||
username = var.username
|
||||
password = var.password
|
||||
ssh_key_path = var.ssh_key_path
|
||||
}
|
||||
|
||||
module "valkey-02" {
|
||||
source = "./modules/vm"
|
||||
name = "valkey-02"
|
||||
vmid = 3004
|
||||
node_name = var.proxmox_node
|
||||
bridge_ip = "192.168.100.34"
|
||||
gateway = var.gateway
|
||||
datastore_id = var.datastore_id
|
||||
clone_vmid = var.template_vmid
|
||||
cores = 2
|
||||
memory = 2048
|
||||
disk_size = 50
|
||||
username = var.username
|
||||
password = var.password
|
||||
ssh_key_path = var.ssh_key_path
|
||||
}
|
||||
|
||||
module "valkey-03" {
|
||||
source = "./modules/vm"
|
||||
name = "valkey-03"
|
||||
vmid = 3005
|
||||
node_name = var.proxmox_node
|
||||
bridge_ip = "192.168.100.35"
|
||||
gateway = var.gateway
|
||||
datastore_id = var.datastore_id
|
||||
clone_vmid = var.template_vmid
|
||||
cores = 2
|
||||
memory = 2048
|
||||
disk_size = 50
|
||||
username = var.username
|
||||
password = var.password
|
||||
ssh_key_path = var.ssh_key_path
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Garage S3 Cluster (3 nodes)
|
||||
# =============================================================================
|
||||
|
||||
module "garage-01" {
|
||||
source = "./modules/vm"
|
||||
name = "garage-01"
|
||||
vmid = 3009
|
||||
node_name = var.proxmox_node
|
||||
bridge_ip = "192.168.100.39"
|
||||
gateway = var.gateway
|
||||
datastore_id = var.datastore_id
|
||||
clone_vmid = var.template_vmid
|
||||
cores = 2
|
||||
memory = 2048
|
||||
disk_size = 200
|
||||
username = var.username
|
||||
password = var.password
|
||||
ssh_key_path = var.ssh_key_path
|
||||
}
|
||||
|
||||
module "garage-02" {
|
||||
source = "./modules/vm"
|
||||
name = "garage-02"
|
||||
vmid = 3010
|
||||
node_name = var.proxmox_node
|
||||
bridge_ip = "192.168.100.40"
|
||||
gateway = var.gateway
|
||||
datastore_id = var.datastore_id
|
||||
clone_vmid = var.template_vmid
|
||||
cores = 2
|
||||
memory = 2048
|
||||
disk_size = 200
|
||||
username = var.username
|
||||
password = var.password
|
||||
ssh_key_path = var.ssh_key_path
|
||||
}
|
||||
|
||||
module "garage-03" {
|
||||
source = "./modules/vm"
|
||||
name = "garage-03"
|
||||
vmid = 3011
|
||||
node_name = var.proxmox_node
|
||||
bridge_ip = "192.168.100.41"
|
||||
gateway = var.gateway
|
||||
datastore_id = var.datastore_id
|
||||
clone_vmid = var.template_vmid
|
||||
cores = 2
|
||||
memory = 2048
|
||||
disk_size = 200
|
||||
username = var.username
|
||||
password = var.password
|
||||
ssh_key_path = var.ssh_key_path
|
||||
}
|
||||
11
terraform/firewall.tf
Normal file
11
terraform/firewall.tf
Normal file
@ -0,0 +1,11 @@
|
||||
# Firewall Configuration
|
||||
#
|
||||
# Security groups are managed manually in Proxmox UI:
|
||||
# Datacenter → Firewall → Security Group
|
||||
#
|
||||
# Groups:
|
||||
# - base-egress: HTTP, HTTPS, DNS, NTP (default for VMs)
|
||||
# - restricted: UDP 4242 only (Nebula tunnels, no internet)
|
||||
#
|
||||
# VMs reference these groups via the firewall_security_group variable.
|
||||
# East-west segmentation (VM-to-VM) is handled by Nebula groups.
|
||||
37
terraform/management.tf
Normal file
37
terraform/management.tf
Normal file
@ -0,0 +1,37 @@
|
||||
# Management (1000-1999)
|
||||
#
|
||||
# Core infrastructure services that other VMs depend on.
|
||||
# Lighthouse and DNS should be provisioned first.
|
||||
#
|
||||
# VMs:
|
||||
# 1000 lighthouse 192.168.100.10 - Nebula lighthouse/relay
|
||||
# 1001 dns 192.168.100.11 - Internal DNS server
|
||||
# 1002 caddy 192.168.100.12 - Reverse proxy
|
||||
|
||||
module "dns" {
|
||||
source = "./modules/vm"
|
||||
name = "dns"
|
||||
vmid = 1001
|
||||
node_name = var.proxmox_node
|
||||
bridge_ip = "192.168.100.11"
|
||||
gateway = var.gateway
|
||||
datastore_id = var.datastore_id
|
||||
clone_vmid = var.template_vmid
|
||||
username = var.username
|
||||
password = var.password
|
||||
ssh_key_path = var.ssh_key_path
|
||||
}
|
||||
|
||||
module "caddy" {
|
||||
source = "./modules/vm"
|
||||
name = "caddy"
|
||||
vmid = 1002
|
||||
node_name = var.proxmox_node
|
||||
bridge_ip = "192.168.100.12"
|
||||
gateway = var.gateway
|
||||
datastore_id = var.datastore_id
|
||||
clone_vmid = var.template_vmid
|
||||
username = var.username
|
||||
password = var.password
|
||||
ssh_key_path = var.ssh_key_path
|
||||
}
|
||||
76
terraform/modules/vm/main.tf
Normal file
76
terraform/modules/vm/main.tf
Normal file
@ -0,0 +1,76 @@
|
||||
terraform {
|
||||
required_providers {
|
||||
proxmox = {
|
||||
source = "bpg/proxmox"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "proxmox_virtual_environment_vm" "vm" {
|
||||
name = var.name
|
||||
node_name = var.node_name
|
||||
vm_id = var.vmid
|
||||
|
||||
clone {
|
||||
vm_id = var.clone_vmid
|
||||
}
|
||||
|
||||
cpu {
|
||||
cores = var.cores
|
||||
}
|
||||
|
||||
memory {
|
||||
dedicated = var.memory
|
||||
floating = var.memory_floating
|
||||
}
|
||||
|
||||
disk {
|
||||
datastore_id = var.datastore_id
|
||||
interface = "scsi0"
|
||||
iothread = true
|
||||
discard = "on"
|
||||
size = var.disk_size
|
||||
}
|
||||
|
||||
network_device {
|
||||
bridge = var.network_bridge
|
||||
}
|
||||
|
||||
initialization {
|
||||
datastore_id = var.datastore_id
|
||||
ip_config {
|
||||
ipv4 {
|
||||
address = "${var.bridge_ip}/24"
|
||||
gateway = var.gateway
|
||||
}
|
||||
}
|
||||
user_account {
|
||||
username = var.username
|
||||
password = var.password
|
||||
keys = [trimspace(file(var.ssh_key_path))]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Firewall configuration - always manage options to explicitly enable/disable
|
||||
resource "proxmox_virtual_environment_firewall_options" "vm" {
|
||||
node_name = var.node_name
|
||||
vm_id = proxmox_virtual_environment_vm.vm.vm_id
|
||||
|
||||
enabled = var.firewall_enabled
|
||||
input_policy = var.firewall_enabled ? var.firewall_input_policy : "ACCEPT"
|
||||
output_policy = var.firewall_enabled ? var.firewall_output_policy : "ACCEPT"
|
||||
}
|
||||
|
||||
resource "proxmox_virtual_environment_firewall_rules" "vm" {
|
||||
count = var.firewall_enabled ? 1 : 0
|
||||
|
||||
node_name = var.node_name
|
||||
vm_id = proxmox_virtual_environment_vm.vm.vm_id
|
||||
|
||||
rule {
|
||||
security_group = var.firewall_security_group
|
||||
}
|
||||
|
||||
depends_on = [proxmox_virtual_environment_firewall_options.vm]
|
||||
}
|
||||
14
terraform/modules/vm/outputs.tf
Normal file
14
terraform/modules/vm/outputs.tf
Normal file
@ -0,0 +1,14 @@
|
||||
output "vm_id" {
|
||||
value = proxmox_virtual_environment_vm.vm.vm_id
|
||||
description = "The Proxmox VM ID"
|
||||
}
|
||||
|
||||
output "ip_address" {
|
||||
value = var.bridge_ip
|
||||
description = "The bridge IP address"
|
||||
}
|
||||
|
||||
output "name" {
|
||||
value = proxmox_virtual_environment_vm.vm.name
|
||||
description = "The VM name"
|
||||
}
|
||||
107
terraform/modules/vm/variables.tf
Normal file
107
terraform/modules/vm/variables.tf
Normal file
@ -0,0 +1,107 @@
|
||||
variable "name" {
|
||||
type = string
|
||||
description = "VM name"
|
||||
}
|
||||
|
||||
variable "vmid" {
|
||||
type = number
|
||||
description = "Proxmox VM ID"
|
||||
}
|
||||
|
||||
variable "node_name" {
|
||||
type = string
|
||||
description = "Proxmox node name"
|
||||
}
|
||||
|
||||
variable "bridge_ip" {
|
||||
type = string
|
||||
description = "IP address on bridge network (without CIDR)"
|
||||
}
|
||||
|
||||
variable "gateway" {
|
||||
type = string
|
||||
default = "192.168.100.1"
|
||||
description = "Gateway IP address"
|
||||
}
|
||||
|
||||
variable "network_bridge" {
|
||||
type = string
|
||||
default = "vmbr1"
|
||||
description = "Network bridge name"
|
||||
}
|
||||
|
||||
variable "datastore_id" {
|
||||
type = string
|
||||
default = "local-zfs"
|
||||
description = "Proxmox datastore for disks"
|
||||
}
|
||||
|
||||
variable "cores" {
|
||||
type = number
|
||||
default = 1
|
||||
description = "Number of CPU cores"
|
||||
}
|
||||
|
||||
variable "memory" {
|
||||
type = number
|
||||
default = 2048
|
||||
description = "Memory in MB"
|
||||
}
|
||||
|
||||
variable "memory_floating" {
|
||||
type = number
|
||||
default = null
|
||||
description = "Floating memory (ballooning) in MB"
|
||||
}
|
||||
|
||||
variable "disk_size" {
|
||||
type = number
|
||||
default = 50
|
||||
description = "Disk size in GB"
|
||||
}
|
||||
|
||||
variable "clone_vmid" {
|
||||
type = number
|
||||
default = 9000
|
||||
description = "Template VM ID to clone from"
|
||||
}
|
||||
|
||||
variable "username" {
|
||||
type = string
|
||||
description = "VM user account name"
|
||||
}
|
||||
|
||||
variable "password" {
|
||||
type = string
|
||||
sensitive = true
|
||||
description = "VM user account password"
|
||||
}
|
||||
|
||||
variable "ssh_key_path" {
|
||||
type = string
|
||||
description = "Path to SSH public key file"
|
||||
}
|
||||
|
||||
variable "firewall_enabled" {
|
||||
type = bool
|
||||
default = true
|
||||
description = "Enable Proxmox firewall for this VM"
|
||||
}
|
||||
|
||||
variable "firewall_security_group" {
|
||||
type = string
|
||||
default = "base-egress"
|
||||
description = "Security group to assign (base-egress, infrastructure, restricted)"
|
||||
}
|
||||
|
||||
variable "firewall_input_policy" {
|
||||
type = string
|
||||
default = "DROP"
|
||||
description = "Default policy for inbound traffic"
|
||||
}
|
||||
|
||||
variable "firewall_output_policy" {
|
||||
type = string
|
||||
default = "DROP"
|
||||
description = "Default policy for outbound traffic"
|
||||
}
|
||||
14
terraform/providers.tf
Normal file
14
terraform/providers.tf
Normal file
@ -0,0 +1,14 @@
|
||||
terraform {
|
||||
required_providers {
|
||||
proxmox = {
|
||||
source = "bpg/proxmox"
|
||||
version = "0.66.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
provider "proxmox" {
|
||||
endpoint = var.proxmox_endpoint
|
||||
api_token = "${var.proxmox_api_token_id}=${var.proxmox_api_token_secret}"
|
||||
insecure = var.proxmox_insecure
|
||||
}
|
||||
78
terraform/services.tf
Normal file
78
terraform/services.tf
Normal file
@ -0,0 +1,78 @@
|
||||
# Trusted Services (2000-2999)
|
||||
#
|
||||
# Infrastructure services that support development and operations.
|
||||
# All VMs in this tier use the "infrastructure" Nebula group.
|
||||
#
|
||||
# VMs:
|
||||
# 2000 vault-01 192.168.100.20 - Vault cluster node 1
|
||||
# 2001 vault-02 192.168.100.21 - Vault cluster node 2
|
||||
# 2002 vault-03 192.168.100.22 - Vault cluster node 3
|
||||
# 2003 gitea 192.168.100.23 - Git hosting
|
||||
|
||||
module "vault-01" {
|
||||
source = "./modules/vm"
|
||||
name = "vault-01"
|
||||
vmid = 2000
|
||||
node_name = var.proxmox_node
|
||||
bridge_ip = "192.168.100.20"
|
||||
gateway = var.gateway
|
||||
datastore_id = var.datastore_id
|
||||
clone_vmid = var.template_vmid
|
||||
cores = 2
|
||||
memory = 2048
|
||||
disk_size = 50
|
||||
username = var.username
|
||||
password = var.password
|
||||
ssh_key_path = var.ssh_key_path
|
||||
}
|
||||
|
||||
module "vault-02" {
|
||||
source = "./modules/vm"
|
||||
name = "vault-02"
|
||||
vmid = 2001
|
||||
node_name = var.proxmox_node
|
||||
bridge_ip = "192.168.100.21"
|
||||
gateway = var.gateway
|
||||
datastore_id = var.datastore_id
|
||||
clone_vmid = var.template_vmid
|
||||
cores = 2
|
||||
memory = 2048
|
||||
disk_size = 50
|
||||
username = var.username
|
||||
password = var.password
|
||||
ssh_key_path = var.ssh_key_path
|
||||
}
|
||||
|
||||
module "vault-03" {
|
||||
source = "./modules/vm"
|
||||
name = "vault-03"
|
||||
vmid = 2002
|
||||
node_name = var.proxmox_node
|
||||
bridge_ip = "192.168.100.22"
|
||||
gateway = var.gateway
|
||||
datastore_id = var.datastore_id
|
||||
clone_vmid = var.template_vmid
|
||||
cores = 2
|
||||
memory = 2048
|
||||
disk_size = 50
|
||||
username = var.username
|
||||
password = var.password
|
||||
ssh_key_path = var.ssh_key_path
|
||||
}
|
||||
|
||||
module "gitea" {
|
||||
source = "./modules/vm"
|
||||
name = "gitea"
|
||||
vmid = 2003
|
||||
node_name = var.proxmox_node
|
||||
bridge_ip = "192.168.100.23"
|
||||
gateway = var.gateway
|
||||
datastore_id = var.datastore_id
|
||||
clone_vmid = var.template_vmid
|
||||
cores = 2
|
||||
memory = 2048
|
||||
disk_size = 100
|
||||
username = var.username
|
||||
password = var.password
|
||||
ssh_key_path = var.ssh_key_path
|
||||
}
|
||||
13
terraform/terraform.tfvars.example
Normal file
13
terraform/terraform.tfvars.example
Normal file
@ -0,0 +1,13 @@
|
||||
# Proxmox Connection
|
||||
proxmox_endpoint = "https://proxmox.example:8006/"
|
||||
proxmox_api_token_id = "terraform@pve!terraform"
|
||||
proxmox_api_token_secret = "your-api-token-secret-here"
|
||||
proxmox_insecure = true
|
||||
proxmox_node = "pve"
|
||||
|
||||
# VM Defaults
|
||||
username = "admin"
|
||||
password = "changeme"
|
||||
ssh_key_path = "~/.ssh/id_ed25519.pub"
|
||||
datastore_id = "local-zfs"
|
||||
template_vmid = 9000
|
||||
74
terraform/vars.tf
Normal file
74
terraform/vars.tf
Normal file
@ -0,0 +1,74 @@
|
||||
# =============================================================================
|
||||
# Proxmox Connection
|
||||
# =============================================================================
|
||||
|
||||
variable "proxmox_endpoint" {
|
||||
type = string
|
||||
description = "Proxmox API endpoint (e.g., https://proxmox.example:8006/)"
|
||||
}
|
||||
|
||||
variable "proxmox_api_token_id" {
|
||||
type = string
|
||||
description = "Proxmox API token ID (e.g., terraform@pve!terraform)"
|
||||
}
|
||||
|
||||
variable "proxmox_api_token_secret" {
|
||||
type = string
|
||||
sensitive = true
|
||||
description = "Proxmox API token secret"
|
||||
}
|
||||
|
||||
variable "proxmox_insecure" {
|
||||
type = bool
|
||||
default = true
|
||||
description = "Skip TLS verification for self-signed certificates"
|
||||
}
|
||||
|
||||
variable "proxmox_node" {
|
||||
type = string
|
||||
description = "Proxmox node name to deploy VMs on"
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# VM Defaults
|
||||
# =============================================================================
|
||||
|
||||
variable "username" {
|
||||
type = string
|
||||
description = "Default VM user account name"
|
||||
}
|
||||
|
||||
variable "password" {
|
||||
type = string
|
||||
sensitive = true
|
||||
description = "Default VM user account password"
|
||||
}
|
||||
|
||||
variable "ssh_key_path" {
|
||||
type = string
|
||||
description = "Path to SSH public key file"
|
||||
}
|
||||
|
||||
variable "datastore_id" {
|
||||
type = string
|
||||
default = "local-zfs"
|
||||
description = "Default Proxmox datastore for VM disks"
|
||||
}
|
||||
|
||||
variable "network_bridge" {
|
||||
type = string
|
||||
default = "vmbr1"
|
||||
description = "Default network bridge for VMs"
|
||||
}
|
||||
|
||||
variable "gateway" {
|
||||
type = string
|
||||
default = "192.168.100.1"
|
||||
description = "Default gateway for VMs"
|
||||
}
|
||||
|
||||
variable "template_vmid" {
|
||||
type = number
|
||||
default = 9000
|
||||
description = "Template VM ID to clone from"
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user