chore: Migrate stingray to NFS storage backend

This commit is contained in:
Tony Du 2025-02-06 21:25:24 -08:00
parent ed0a889cf3
commit 4581ae91f8
21 changed files with 72 additions and 281 deletions

View File

@ -151,4 +151,3 @@ flux bootstrap gitea \
- Some inspiration and guidance was taken from [Andreas Marqvardsen's blog post](https://blog.andreasm.io/2024/01/15/proxmox-with-opentofu-kubespray-and-kubernetes)
- An automated setup of a k3s cluster from [Techno Tim's Ansible roles](https://github.com/techno-tim/k3s-ansible)
- Inspiration for a minimal docker swarm from [nmarus](https://github.com/nmarus/docker-swarm-ansible/tree/master)

View File

@ -3,12 +3,6 @@ strict: true
groups:
swarm_managers: group_names | intersect(['stingray_manager', 'stingray']) | length == 2
# haproxy only on the first manager.
# Using the special variable "groups" doesn't work here (probably because
# we're constructing it), so we can't do something like
# `inventory_hostname == groups['stringray_manager'][0]`
haproxy: group_names | intersect(['stingray_manager', 'stingray']) | length == 2 and
"-01" in inventory_hostname
swarm_workers: group_names | intersect(['stingray_worker', 'stingray']) | length == 2
gluster_nodes: group_names | intersect(['stingray']) | length == 1
gluster_nodes: group_names | intersect(['stingray_worker', 'stingray']) | length == 2
swarm: group_names | intersect(['stingray']) | length == 1

View File

@ -2,10 +2,32 @@ ansible_user: ubuntu
app_domain_name: stingray.mnke.org
gluster_volume_path: /glusterfs/bricks
gluster_volume_name: gfs
gluster_mount_path: /mnt/gfs
device2_hdd_dev: /dev/sda
# After messing around with storage solutions, I'm going full circle and
# landing back on using a single NFS share provided externally.
#
# GlusterFS seemed promising, especially with [this plugin](https://github.com/trajano/docker-volume-plugins/tree/master/glusterfs-volume-plugin)
# that's supposed to let you automatically create subdirectories of the Gluster
# share. However, I can't get it working. I'm pretty sure it's because the
# underlying gluster command that's run when creating a container with a gluster
# volume expects a host directory to exist already, but neither the Docker
# daemon nor this plugin creates it, causing it to fail.
# Without that, the appeal of GlusterFS over a simple, single NFS share lies in
# its redundancy and better handling of high IOPS. These aren't appealing
# enough to me given that GlusterFS is more difficult to manage, especially with
# each Gluster node directly built into my Swarm topology rather than existing
# outside of it, and I don't expect this Swarm to be IO intensive.
#
# Ceph was another option, but to be frank, it looks like way too much work to
# set up.
#
# Lastly, there's some cutting-edge [CSI plugins](https://github.com/olljanat/csi-plugins-for-docker-swarm)
# for new versions of Docker Swarm, which, I think should provide the best
# volume management out of everything, but I also can't get it working!
#
# So I'm ending up back where I started :)
nfs_export_path: /mnt/emc14t9/managed/stingray
nfs_server: truenas.local
nfs_mount_path: /mnt/stingray/
portainer_app_name: portainer
portainer_admin_password: "{{ secrets.portainer_admin_password }}"

View File

@ -47,7 +47,7 @@
state: started
enabled: true
- name: (swarm_manager) Init swarm
- name: Init swarm
when: inventory_hostname == groups.swarm_managers[0]
become: true
docker_swarm:
@ -58,13 +58,13 @@
- 172.18.0.0/16
- 172.19.0.0/16
register: manager_swarm_facts
failed_when: manager_swarm_facts.failed
- when: inventory_hostname == groups.swarm_managers[0]
set_fact:
worker_join_token: "{{manager_swarm_facts.swarm_facts.JoinTokens.Worker}}"
manager_join_token: "{{manager_swarm_facts.swarm_facts.JoinTokens.Manager}}"
- name: (swarm_manager) Create ingress network
- name: Create ingress network
# The ingress network conflicts with my subnet and ends up causing problems,
# so we have to set a different subnet first
when: inventory_hostname == groups.swarm_managers[0]
@ -83,7 +83,16 @@
# and things don't seem to work without it.
com.docker.network.driver.overlay.vxlanid_list: 4096
- name: (swarm_workers) Join swarm
- name: Join swarm as managers
when: inventory_hostname in groups.swarm_managers and inventory_hostname != groups.swarm_managers[0]
become: true
docker_swarm:
state: join
join_token: "{{hostvars[groups.swarm_managers[0]].manager_join_token}}"
advertise_addr: "{{ansible_default_ipv4.address}}"
remote_addrs: ["{{hostvars[groups.swarm_managers[0]].ansible_default_ipv4.address}}"]
- name: Join swarm as workers
when: inventory_hostname in groups.swarm_workers
become: true
docker_swarm:

View File

@ -1,46 +0,0 @@
---
- name: Check if Gluster volume is initialized
become: true
stat:
path: "{{gluster_volume_path}}/{{inventory_hostname}}/brick"
register: glustervolume
- name: Verify Gluster volume path
become: true
file:
path: "{{gluster_volume_path}}/{{inventory_hostname}}/brick"
state: directory
- name: Initialize Gluster Cluster (on first node)
become: true
when: glustervolume.stat.exists == false and inventory_hostname == groups['gluster_nodes'][0]
loop: "{{ groups['gluster_nodes'] }}"
shell: gluster peer probe {{ item }}
- name: Create Gluster Volume (on first node)
# TODO Make this scalable
become: true
when: glustervolume.stat.exists == false and inventory_hostname == groups['gluster_nodes'][0]
shell: >
gluster volume create {{gluster_volume_name}} \
replica 3 \
{{groups['gluster_nodes'][0]}}:{{gluster_volume_path}}/{{groups['gluster_nodes'][0]}}/brick \
{{groups['gluster_nodes'][1]}}:{{gluster_volume_path}}/{{groups['gluster_nodes'][1]}}/brick \
{{groups['gluster_nodes'][2]}}:{{gluster_volume_path}}/{{groups['gluster_nodes'][2]}}/brick
- name: Secure Gluster Volume (on first node)
become: true
when: inventory_hostname == groups['gluster_nodes'][0]
shell: >
gluster volume set {{gluster_volume_name}} auth.allow \
{{groups['gluster_nodes'][0]}},{{groups['gluster_nodes'][1]}},{{groups['gluster_nodes'][2]}}
changed_when: false
- name: Start Gluster Volume (on first node)
become: true
when: glustervolume.stat.exists == false and inventory_hostname == groups['gluster_nodes'][0]
shell: gluster volume start {{gluster_volume_name}}
- name: Wait 60s for Gluster volume to be replicated
when: glustervolume.stat.exists == false and inventory_hostname == groups['gluster_nodes'][0]
shell: sleep 60

View File

@ -1,20 +0,0 @@
---
- name: Install Gluster and dependencies
become: true
apt:
pkg:
- xfsprogs
- attr
- glusterfs-server
- glusterfs-common
- glusterfs-client
state: present
force_apt_get: true
update_cache: yes
- name: Enable Gluster service
become: true
systemd:
name: glusterd
enabled: yes
state: started

View File

@ -1,5 +0,0 @@
---
- import_tasks: xfs.yml
- import_tasks: install.yml
- import_tasks: init.yml
- import_tasks: mount.yml

View File

@ -1,16 +0,0 @@
---
- name: Verify Gluster mount path
become: true
file:
path: "{{gluster_mount_path}}"
state: directory
- name: Mount Gluster volume
become: true
# TODO: Make this scalable in case different # of replicas
mount:
path: "{{gluster_mount_path}}"
src: "localhost:/{{gluster_volume_name}}"
fstype: glusterfs
opts: defaults,_netdev,backupvolfile-server=localhost
state: mounted

View File

@ -1,20 +0,0 @@
---
- name: Create XFS Path {{gluster_volume_path}}/{{inventory_hostname}}
become: true
file:
path: "{{gluster_volume_path}}/{{inventory_hostname}}"
state: directory
- name: Create a XFS filesystem on {{device2_hdd_dev}}
become: true
filesystem:
fstype: xfs
dev: "{{device2_hdd_dev}}"
- name: Mount XFS volume {{device2_hdd_dev}} to {{gluster_volume_path}}/{{inventory_hostname}}
become: true
mount:
path: "{{gluster_volume_path}}/{{inventory_hostname}}"
src: "{{device2_hdd_dev}}"
fstype: xfs
state: mounted

View File

@ -1,4 +1,13 @@
---
- name: Mount docker volume share
become: true
mount:
src: "{{nfs_server}}:{{nfs_export_path}}"
path: "{{nfs_mount_path}}"
opts: rw,sync,hard,intr
state: mounted
fstype: nfs
- name: Verify stacks directory exists (on first swarm node)
when: inventory_hostname == groups['swarm_managers'][0]
file:
@ -12,16 +21,16 @@
path: "{{ item }}"
state: directory
loop:
- "{{gluster_mount_path}}/swarm-bootstrap/traefik/letsencrypt"
- "{{gluster_mount_path}}/swarm-bootstrap/traefik/secrets"
- "{{gluster_mount_path}}/swarm-bootstrap/portainer"
- "{{gluster_mount_path}}/swarm-bootstrap/gitea"
- "{{nfs_mount_path}}/swarm-bootstrap/traefik/letsencrypt"
- "{{nfs_mount_path}}/swarm-bootstrap/traefik/secrets"
- "{{nfs_mount_path}}/swarm-bootstrap/portainer"
- "{{nfs_mount_path}}/swarm-bootstrap/gitea"
- name: Create CF secret
become: true
copy:
content: "{{ cf_dns_api_token }}"
dest: "{{gluster_mount_path}}/swarm-bootstrap/traefik/secrets/cf-dns-api-token.secret"
dest: "{{nfs_mount_path}}/swarm-bootstrap/traefik/secrets/cf-dns-api-token.secret"
mode: '0740'
- name: Set DNS servers

View File

@ -18,26 +18,26 @@ volumes:
driver_opts:
o: bind
type: none
device: {{gluster_mount_path}}/swarm-bootstrap/gitea
device: {{nfs_mount_path}}/swarm-bootstrap/gitea
name: gitea
portainer_data:
driver: local
driver_opts:
o: bind
type: none
device: {{gluster_mount_path}}/swarm-bootstrap/portainer
device: {{nfs_mount_path}}/swarm-bootstrap/portainer
name: portainer_data
traefik:
driver: local
driver_opts:
o: bind
type: none
device: {{gluster_mount_path}}/swarm-bootstrap/traefik
device: {{nfs_mount_path}}/swarm-bootstrap/traefik
name: traefik
secrets:
cf_dns_api_token:
file: "{{gluster_mount_path}}/swarm-bootstrap/traefik/secrets/cf-dns-api-token.secret"
file: "{{nfs_mount_path}}/swarm-bootstrap/traefik/secrets/cf-dns-api-token.secret"
services:
traefik:

View File

@ -3,5 +3,4 @@
hosts: swarm
roles:
- docker-swarm
- gluster
- swarm-bootstrap

View File

@ -12,25 +12,25 @@ volumes:
driver_opts:
o: bind
type: none
device: ${AUTHENTIK_MEDIA_DIRECTORY:-/mnt/gfs/authentik/media}
device: ${AUTHENTIK_MEDIA_DIRECTORY:-/mnt/stingray/authentik/media}
authentik_certs:
driver: local
driver_opts:
o: bind
type: none
device: ${AUTHENTIK_CERTS_DIRECTORY:-/mnt/gfs/authentik/certs}
device: ${AUTHENTIK_CERTS_DIRECTORY:-/mnt/stingray/authentik/certs}
authentik_templates:
driver: local
driver_opts:
o: bind
type: none
device: ${AUTHENTIK_TEMPLATES_DIRECTORY:-/mnt/gfs/authentik/custom-templates}
device: ${AUTHENTIK_TEMPLATES_DIRECTORY:-/mnt/stingray/authentik/custom-templates}
redis_data:
driver: local
driver_opts:
o: bind
type: none
device: ${REDIS_DATA:-/mnt/gfs/authentik/redis/data}
device: ${REDIS_DATA:-/mnt/stingray/authentik/redis/data}
services:
redis:

View File

@ -10,13 +10,13 @@ volumes:
driver_opts:
o: bind
type: none
device: ${DATA_DIRECTORY:-/mnt/gfs/dockge/data}
device: ${DATA_DIRECTORY:-/mnt/stingray/dockge/data}
dockge_stacks:
driver: local
driver_opts:
o: bind
type: none
device: ${STACKS_DIRECTORY:-/mnt/gfs/dockge/stacks}
device: ${STACKS_DIRECTORY:-/mnt/stingray/dockge/stacks}
services:
dockge:

View File

@ -73,5 +73,5 @@ volumes:
driver_opts:
o: bind
type: none
device: ${REDIS_DATA_DIRECTORY:-/mnt/gfs/infisical/redis/data}
device: ${REDIS_DATA_DIRECTORY:-/mnt/stingray/infisical/redis/data}

View File

@ -1,11 +0,0 @@
storage "raft" {
path = "/vault/file"
node_id = "vault-1"
}
listener "tcp" {
address = "0.0.0.0:8200"
tls_disable = 1
}
ui = true

View File

@ -1,33 +0,0 @@
networks:
traefik:
external: true
services:
vault:
image: "hashicorp/vault"
cap_add:
- IPC_LOCK
volumes:
# Make sure to copy the config.hcl configuration into this mountpoint
# first.
- /mnt/gfs/vault/config:/vault/config
- /mnt/gfs/vault/file:/vault/file
- /mnt/gfs/vault/logs:/vault/logs
environment:
VAULT_ADDR: "https://0.0.0.0:8200"
VAULT_API_ADDR: "https://${VAULT_HOST:-vault.stingray.mnke.org}:8200"
VAULT_CLUSTER_ADDR: "https://${VAULT_HOST:-vault.stingray.mnke.org}:8201"
networks:
- traefik
entrypoint:
- vault
- server
- -config=/vault/config/config.hcl
deploy:
labels:
- "traefik.enable=true"
- "traefik.http.routers.vault.rule=Host(`${VAULT_HOST:-vault.stingray.mnke.org}`)"
- "traefik.http.routers.vault.entrypoints=websecure"
- "traefik.http.routers.vault.tls.certresolver=letsencrypt"
- "traefik.http.services.vault.loadbalancer.server.port=8200"
- "traefik.swarm.network=traefik"

View File

@ -49,5 +49,9 @@ spec:
existingSecret: ghost-db-creds
service:
type: LoadBalancer
type: ClusterIP
persistence:
accessModes:
- ReadWriteMany

View File

@ -5,9 +5,8 @@ module "docker_swarm_stingray" {
vm_id_prefix = "8"
subnet_cidr = "10.0.42.0/24"
gateway = "10.0.0.1"
manager_count = 1
worker_count = 2
storage_size = 32
manager_count = 3
worker_count = 3
dns_server_ip = local.dns_server_ip
proxmox_vm_storage = var.proxmox_vm_storage

View File

@ -4,21 +4,11 @@ locals {
name = "${var.swarm_name}-manager-${format("%02s", i + 1)}"
}
]
manager_storage_dummies = [
for i in range(var.manager_count) : {
name = "${var.swarm_name}-manager-${format("%02s", i + 1)}-dummy"
}
]
workers = [
for i in range(var.worker_count) : {
name = "${var.swarm_name}-worker-${format("%02s", i + 1)}"
}
]
worker_storage_dummies = [
for i in range(var.worker_count) : {
name = "${var.swarm_name}-worker-${format("%02s", i + 1)}-dummy"
}
]
}
resource "proxmox_virtual_environment_pool" "swarm_pool" {
@ -26,27 +16,6 @@ resource "proxmox_virtual_environment_pool" "swarm_pool" {
pool_id = var.swarm_name
}
resource "proxmox_virtual_environment_vm" "swarm_manager_dummy" {
count = var.manager_count
name = local.manager_storage_dummies[count.index].name
description = "Managed by Terraform"
tags = ["terraform", "disk-dummy", var.swarm_name]
node_name = "pve"
vm_id = "${var.vm_id_prefix}${count.index + 201}"
pool_id = proxmox_virtual_environment_pool.swarm_pool.id
started = false
on_boot = false
disk {
datastore_id = var.proxmox_vm_storage
file_format = "qcow2"
interface = "scsi0"
size = var.storage_size
}
}
resource "proxmox_virtual_environment_vm" "swarm_manager" {
count = var.manager_count
name = local.managers[count.index].name
@ -88,23 +57,7 @@ resource "proxmox_virtual_environment_vm" "swarm_manager" {
file_format = "qcow2"
}
dynamic "disk" {
for_each = { for idx, val in proxmox_virtual_environment_vm.swarm_manager_dummy[count.index].disk : idx => val }
iterator = data_disk
content {
datastore_id = data_disk.value["datastore_id"]
path_in_datastore = data_disk.value["path_in_datastore"]
file_format = data_disk.value["file_format"]
size = data_disk.value["size"]
# assign from scsi1 and up
interface = "scsi${data_disk.key + 1}"
}
}
initialization {
dns {
servers = [var.dns_server_ip]
}
ip_config {
ipv4 {
# x.x.x.32 - x.x.x.39
@ -163,31 +116,6 @@ EOF
}
}
# This is currently how we create "disks" that are independent of a VM: by
# creating a dummy VM with a disk and then attaching the disk. This way, we
# can destroy the real VM without destroying the disk.
# https://registry.terraform.io/providers/bpg/proxmox/latest/docs/resources/virtual_environment_vm#example-attached-disks
resource "proxmox_virtual_environment_vm" "swarm_worker_dummy" {
count = var.worker_count
name = local.worker_storage_dummies[count.index].name
description = "Managed by Terraform"
tags = ["terraform", "disk-dummy", var.swarm_name]
node_name = "pve"
vm_id = "${var.vm_id_prefix}${count.index + 401}"
pool_id = proxmox_virtual_environment_pool.swarm_pool.id
started = false
on_boot = false
disk {
datastore_id = var.proxmox_vm_storage
file_format = "qcow2"
interface = "scsi0"
size = var.storage_size
}
}
resource "proxmox_virtual_environment_vm" "swarm_worker" {
count = var.worker_count
name = local.workers[count.index].name
@ -228,24 +156,7 @@ resource "proxmox_virtual_environment_vm" "swarm_worker" {
file_format = "qcow2"
}
# scsi on these guys for hotplugging and resizing
dynamic "disk" {
for_each = { for idx, val in proxmox_virtual_environment_vm.swarm_worker_dummy[count.index].disk : idx => val }
iterator = data_disk
content {
datastore_id = data_disk.value["datastore_id"]
path_in_datastore = data_disk.value["path_in_datastore"]
file_format = data_disk.value["file_format"]
size = data_disk.value["size"]
# assign from scsi1 and up
interface = "scsi${data_disk.key + 1}"
}
}
initialization {
dns {
servers = [var.dns_server_ip]
}
ip_config {
ipv4 {
# x.x.x.40 - x.x.x.55

View File

@ -35,10 +35,6 @@ variable "worker_count" {
}
}
variable "storage_size" {
type = number
}
variable "common_cloud_init" {
type = string
description = "Base cloud-init template"