feat: initial commit

This commit is contained in:
Simon Cornet 2025-05-30 15:04:18 +02:00
commit 5f0f242ab9
14 changed files with 322 additions and 0 deletions

View file

@ -0,0 +1,29 @@
---
# enable ceph maintenance mode
- name: "enable ceph maintenance mode"
block:
# set ceph osd noout settings
- name: "set ceph noout"
ansible.builtin.shell:
cmd: "sudo ceph osd set noout"
changed_when: false
failed_when: "ceph_noout_result.rc != 0"
register: "ceph_noout_result"
# set ceph osd nobackfill settings
- name: "set ceph nobackfill"
ansible.builtin.shell:
cmd: "sudo ceph osd set nobackfill"
changed_when: false
failed_when: "ceph_nobackfill_result.rc != 0"
register: "ceph_nobackfill_result"
# set ceph osd norebalance settings
- name: "set ceph norebalance"
ansible.builtin.shell:
cmd: "sudo ceph osd set norebalance"
changed_when: false
failed_when: "ceph_norebalance_result.rc != 0"
register: "ceph_norebalance_result"

View file

@ -0,0 +1,43 @@
---
# exit ceph maintenance mode
- name: "exit ceph maintenance mode"
block:
# unset ceph osd noout settings
- name: "unset ceph noout"
ansible.builtin.shell:
cmd: "sudo ceph osd unset noout"
changed_when: false
failed_when: "ceph_noout_result.rc != 0"
register: "ceph_noout_result"
tags: "cluster"
# unset ceph osd nobackfill settings
- name: "unset ceph nobackfill"
ansible.builtin.shell:
cmd: "sudo ceph osd unset nobackfill"
changed_when: false
failed_when: "ceph_nobackfill_result.rc != 0"
register: "ceph_nobackfill_result"
tags: "cluster"
# unset ceph osd norebalance settings
- name: "unset ceph norebalance"
ansible.builtin.shell:
cmd: "sudo ceph osd unset norebalance"
changed_when: false
failed_when: "ceph_norebalance_result.rc != 0"
register: "ceph_norebalance_result"
tags: "cluster"
# wait for ceph to be healthy
- name: "wait for ceph to be healthy"
ansible.builtin.shell:
cmd: "sudo ceph -s"
changed_when: false
delay: 10
register: "ceph_status"
retries: 30
tags: "cluster"
until: "'HEALTH_OK' in ceph_status.stdout"

25
tasks/main.yaml Normal file
View file

@ -0,0 +1,25 @@
---
# update cluster
- name: "update cluster"
tags: "update-cluster"
block:
# collect proxmox cluster nodes
- name: "collect proxmox cluster nodes"
set_fact:
leader_node: "{{ groups['proxmox'] | default([]) | first | default('') }}"
# enter ceph maintenance mode
- name: "set ceph enter maintenance mode"
ansible.builtin.include_tasks: "ceph/enter-maint.yaml"
when: "inventory_hostname == leader_node"
# update proxmox cluster
- name: "update proxmox cluster nodes"
ansible.builtin.include_tasks: "proxmox/update-node.yaml"
# exit ceph maintenance mode
- name: "exit ceph maintenance mode"
ansible.builtin.include_tasks: "ceph/exit-maint.yaml"
when: "inventory_hostname == leader_node"

View file

@ -0,0 +1,64 @@
---
# enter maintenance mode
- name: "enter maintenance mode"
ansible.builtin.shell:
cmd: "sudo /usr/sbin/ha-manager crm-command node-maintenance enable {{ inventory_hostname_short }}"
changed_when: false
failed_when: "maintenance_result.rc != 0"
register: "maintenance_result"
# wait for host to be empty
- name: "wait for host to be empty"
ansible.builtin.shell:
cmd: "set -o pipefail; \
sudo pvesh get /cluster/resources | \
grep {{ inventory_hostname_short }} | \
egrep -v \"node/pve|storage/pve|sdn/pve|template\""
changed_when: false
delay: 10
failed_when: "running_guests.rc >= 2"
register: "running_guests"
retries: 30
until: "running_guests.stdout_lines | length == 0"
# install node updates
- name: "install node updates"
ansible.builtin.apt:
dpkg_options: "force-confdef,force-confold"
force_apt_get: true
update_cache: true
upgrade: "dist"
changed_when: false
# install firmware updates
- name: "install firmware updates"
ansible.builtin.shell:
cmd: "sudo fwupdmgr upgrade --no-reboot-check --assume-yes"
changed_when: false
failed_when: false
# initiate reboot
- name: "reboot host"
ansible.builtin.reboot:
reboot_timeout: 300
pre_reboot_delay: 5
post_reboot_delay: 30
test_command: "uptime"
changed_when: false
when: "reboot_required"
# exit maintenance mode
- name: "exit maintenance mode"
ansible.builtin.shell:
cmd: "sudo /usr/sbin/ha-manager crm-command node-maintenance disable {{ inventory_hostname_short }}"
changed_when: false
delay: 10
failed_when: "maintenance_exit_result.rc != 0"
register: "maintenance_exit_result"
retries: 3
# calm down period
- name: "calming down"
ansible.builtin.wait_for:
timeout: 120