This commit is contained in:
commit
f29f47ba64
|
|
@ -0,0 +1,3 @@
|
|||
{
|
||||
"discovered_interpreter_python": "/opt/homebrew/bin/python3.12"
|
||||
}
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# Editor-based HTTP Client requests
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="TemplatesService">
|
||||
<option name="TEMPLATE_FOLDERS">
|
||||
<list>
|
||||
<option value="$MODULE_DIR$/templates" />
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
</module>
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.9 (masasana_cloud)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (masasana_cloud)" project-jdk-type="Python SDK" />
|
||||
</project>
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/kubernetes_ansible.iml" filepath="$PROJECT_DIR$/.idea/kubernetes_ansible.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
[defaults]
|
||||
inventory = inventory.yml
|
||||
# ToDo: Wenn ACN genutzt wird, muss der ssh Key hier angegeben werden
|
||||
# private_key_file = /root/.ssh/id_rsa_sc_admin
|
||||
remote_user = root
|
||||
ansible_ssh_common_args = '-o StrictHostKeyChecking=no'
|
||||
host_key_checking = False
|
||||
gathering = smart
|
||||
fact_caching = jsonfile
|
||||
fact_caching_connection = .facts
|
||||
fact_caching_timeout = 0
|
||||
stdout_callback = yaml
|
||||
ansible_python_interpreter = '/usr/bin/python3'
|
||||
|
||||
[inventory]
|
||||
enable_plugins=host_list, script, auto, yaml, ini, toml, kubernetes.core.k8s
|
||||
|
||||
[ssh_connection]
|
||||
scp_if_ssh = True
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
###########################################
|
||||
# Masasana Cloud PLatform Inventory
|
||||
###########################################
|
||||
all:
|
||||
children:
|
||||
kubernetes:
|
||||
children:
|
||||
controller:
|
||||
children:
|
||||
controller_init:
|
||||
hosts:
|
||||
k8s-master-1:
|
||||
ansible_host: 10.0.0.2
|
||||
ansible_python_interpreter: /usr/bin/python3.11
|
||||
ansible_ssh_common_args: '-o ProxyCommand="ssh -p 22 -W %h:%p -q root@65.109.4.220"'
|
||||
|
||||
worker:
|
||||
hosts:
|
||||
k8s-node-1:
|
||||
ansible_host: 10.0.0.3
|
||||
ansible_python_interpreter: /usr/bin/python3.11
|
||||
ansible_ssh_common_args: '-o ProxyCommand="ssh -p 22 -W %h:%p -q root@65.109.4.220"'
|
||||
|
||||
nat:
|
||||
hosts:
|
||||
nat-gateway:
|
||||
ansible_host: 65.109.4.220
|
||||
ansible_python_interpreter: /usr/bin/python3.11
|
||||
ansible_ssh_common_args: '-o ProxyCommand="ssh -p 22 -W %h:%p -q root@65.109.4.220"'
|
||||
|
||||
kubernetes_api:
|
||||
hosts:
|
||||
k8s-api:
|
||||
ansible_host: 65.109.222.158
|
||||
ansible_python_interpreter: /usr/bin/python3.11
|
||||
|
||||
local:
|
||||
hosts:
|
||||
localhost:
|
||||
ansible_host: 127.0.0.1
|
||||
ansible_connection: local
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
######################################
|
||||
# Main Playbook #
|
||||
######################################
|
||||
---
|
||||
- hosts: localhost
|
||||
gather_facts: false
|
||||
connection: local
|
||||
|
||||
- import_playbook: playbooks/setup_hetzner_nat.yml
|
||||
- import_playbook: playbooks/setup_k8s_cluster.yml
|
||||
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
######################################
|
||||
# Prepare Hetzner NAT-Gateway #
|
||||
######################################
|
||||
---
|
||||
- hosts: controller, worker, nat, local
|
||||
gather_facts: false
|
||||
|
||||
tasks:
|
||||
- name: Generate ssh key pair local
|
||||
when: inventory_hostname in groups ['local']
|
||||
import_tasks: ../tasks/hetzner_nat/generate_ssh_key.yml
|
||||
|
||||
- name: Configuration of NAT-Server
|
||||
when: inventory_hostname in groups ['nat']
|
||||
import_tasks: ../tasks/hetzner_nat/configuration_nat.yml
|
||||
|
||||
- name: Configuration of Client-Server (controller and worker)
|
||||
when: inventory_hostname in groups ['controller'] or inventory_hostname in groups ['worker']
|
||||
import_tasks: ../tasks/hetzner_nat/configuration_client.yml
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
######################################
|
||||
# Prepare Linux and install k8s #
|
||||
######################################
|
||||
---
|
||||
- hosts: controller, worker , proxmox
|
||||
gather_facts: false
|
||||
|
||||
tasks:
|
||||
- name: Read global default values
|
||||
include_vars:
|
||||
file: ../vars/default.yml
|
||||
|
||||
- name: Read cluster default values
|
||||
include_vars:
|
||||
file: ../vars/k8s_cluster/cluster_vars.yml
|
||||
|
||||
- name: base install block
|
||||
when: inventory_hostname in (groups['controller'] + groups['worker'])
|
||||
block:
|
||||
- name: Prepare Debian System
|
||||
import_tasks: ../tasks/k8s_cluster/system/prepare_debian_system.yml
|
||||
|
||||
- name: Setup k8s Cluster
|
||||
import_tasks: ../tasks/k8s_cluster/kubernetes/setupK8sCluster.yml
|
||||
|
||||
- name : Install linkerd Service Mesh
|
||||
import_tasks: ../tasks/k8s_cluster/serviceMesh/installLinkerd.yml
|
||||
|
||||
- name: Setup Storage
|
||||
import_tasks: ../tasks/k8s_cluster/storage/setupStorage.yml
|
||||
|
||||
- name: base install block
|
||||
when: inventory_hostname in groups['controller']
|
||||
block:
|
||||
- name: install Helm3
|
||||
import_tasks: ../tasks/k8s_cluster/helm/install_helm3.yml
|
||||
|
||||
- name: base install block controller_init
|
||||
when: inventory_hostname in groups['controller_init']
|
||||
block:
|
||||
- name: Install MetalLB
|
||||
import_tasks: ../tasks/k8s_cluster/loadbalancer/install_metallb.yml
|
||||
|
||||
- name: install nginx ingress
|
||||
import_tasks: ../tasks/k8s_cluster/ingress/install_nginx_ingress.yml
|
||||
|
||||
- name: Install cert-manager
|
||||
import_tasks: ../tasks/k8s_cluster/cert_manager/install_cert_manager.yml
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
######################################
|
||||
# Configuration Client-Server #
|
||||
######################################
|
||||
---
|
||||
- name: Edit Network interface file on Client Server
|
||||
blockinfile:
|
||||
path: /etc/network/interfaces
|
||||
block: |
|
||||
auto enp7s0
|
||||
iface enp7s0 inet dhcp
|
||||
post-up ip route add default via 10.0.0.1
|
||||
|
||||
- name: Add Hetzner Nameserver
|
||||
blockinfile:
|
||||
path: /etc/resolvconf/resolv.conf.d/head
|
||||
block: |
|
||||
nameserver 8.8.8.8
|
||||
nameserver 8.8.4.4
|
||||
|
||||
- name: Enable Updates for resolvconf
|
||||
raw: "resolvconf --enable-updates"
|
||||
|
||||
- name: Update resolvconf
|
||||
raw: "resolvconf -u"
|
||||
|
||||
- name: Reboot Clients
|
||||
reboot:
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
######################################
|
||||
# Configuration NAT-Server #
|
||||
######################################
|
||||
---
|
||||
- name: Copy Public ssh-key and paste to NAT-Server
|
||||
copy:
|
||||
src: /tmp/id_rsa.pub
|
||||
dest: ~/.ssh/
|
||||
|
||||
- name: Change Permission of the Private ssh-key only to read for the User
|
||||
raw: "chmod 0400 ~/.ssh/id_rsa"
|
||||
|
||||
- name: Edit Network interface file on NAT-Server
|
||||
blockinfile:
|
||||
path: /etc/network/interfaces
|
||||
block: |
|
||||
auto enp7s0
|
||||
iface enp7s0 inet dhcp
|
||||
post-up echo 1 > /proc/sys/net/ipv4/ip_forward
|
||||
post-up iptables -t nat -A POSTROUTING -s '10.0.0.0/16' -o enp7s0 -j MASQUERADE
|
||||
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
######################################
|
||||
# Generate SSH-Key #
|
||||
######################################
|
||||
---
|
||||
- name: Generate an OpenSSH keypair local
|
||||
community.crypto.openssh_keypair:
|
||||
path: /tmp/id_rsa
|
||||
type: rsa
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
- name: Deploy example-app from manifest
|
||||
kubernetes.core.k8s:
|
||||
state: present
|
||||
definition: "{{ lookup('file', './manifests/example-app/deploy.yml') | from_yaml_all }}"
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
######################################
|
||||
# Install cert-manager in cluster #
|
||||
######################################
|
||||
---
|
||||
- name: Read cert-manager values
|
||||
include_vars:
|
||||
file: ../vars/k8s_cluster/cert_manager/certManager.yml
|
||||
|
||||
- name: Create cert-manager namespace
|
||||
k8s:
|
||||
state: present
|
||||
definition:
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: "{{ namespace }}"
|
||||
annotations:
|
||||
linkerd.io/inject: 'enabled'
|
||||
|
||||
- name: Add cert-manager repo
|
||||
kubernetes.core.helm_repository:
|
||||
name: "{{ helm.releaseName }}"
|
||||
repo_url: "{{ helm.repoUrl }}"
|
||||
|
||||
- name: Install CRDs for cert-manager
|
||||
kubernetes.core.k8s:
|
||||
state: present
|
||||
definition: "{{ lookup('template', '../templates/k8s_cluster/cert_manager/cert_manager_crds.yaml') | from_yaml_all }}"
|
||||
|
||||
|
||||
- name: Deploy cert-manager from helm chart
|
||||
kubernetes.core.helm:
|
||||
name: "{{ helm.releaseName }}"
|
||||
state: present
|
||||
chart_ref: "{{ helm.chart }}"
|
||||
release_namespace: "{{ namespace }}"
|
||||
chart_version: "{{ helm.chartVersion }}"
|
||||
update_repo_cache: "true"
|
||||
|
||||
- name: Pause for 1.5 minutes and wait for cert-manager webhook
|
||||
ansible.builtin.pause:
|
||||
seconds: 90
|
||||
|
||||
- name: Deploy cert-manager lets-encrypt staging config file
|
||||
kubernetes.core.k8s:
|
||||
state: present
|
||||
definition: "{{ lookup('template', '../templates/k8s_cluster/cert_manager/lets_encrypt_staging.yml.j2') | from_yaml_all }}"
|
||||
|
||||
- name: Deploy cert-manager lets-encrypt production config file
|
||||
kubernetes.core.k8s:
|
||||
state: present
|
||||
definition: "{{ lookup('template', '../templates/k8s_cluster/cert_manager/lets_encrypt_production.yml.j2') | from_yaml_all }}"
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
######################################
|
||||
# add controller to existing cluster #
|
||||
######################################
|
||||
---
|
||||
- name: copy clusterConfig to remote location
|
||||
template:
|
||||
src: '../templates/k8s_cluster/cluster/joinController.yml.j2'
|
||||
dest: /tmp/joinController.yml
|
||||
|
||||
- name: Join the controller node to cluster
|
||||
command: kubeadm join --config=/tmp/joinController.yml
|
||||
|
||||
- name: Setup kubeconfig for local usage
|
||||
command: "{{ item }}"
|
||||
loop:
|
||||
- mkdir -p ~/.kube
|
||||
- cp -i /etc/kubernetes/admin.conf ~/.kube/config
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
######################################
|
||||
# add worker to existing cluster #
|
||||
######################################
|
||||
---
|
||||
- name: Copy the worker join command to server location
|
||||
copy: src=join_command_worker.sh dest=/tmp/join_command_worker.sh mode=0777
|
||||
|
||||
- name: Join the worker node to cluster
|
||||
command: sh /tmp/join_command_worker.sh
|
||||
|
||||
- name: Delete local copy of join worker
|
||||
local_action: file path=./join_command_worker.sh state=absent
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
######################################
|
||||
# Tasks for init k8s cluster #
|
||||
######################################
|
||||
---
|
||||
- name: Generate join command
|
||||
command: kubeadm token create --print-join-command
|
||||
register: join_command
|
||||
|
||||
- name: Copy join command to local file
|
||||
local_action: copy content="{{ join_command.stdout_lines[0] }}" dest="./join_command_worker.sh"
|
||||
|
||||
- name: Generate join command controller token
|
||||
command: kubeadm token create
|
||||
register: join_command_token
|
||||
|
||||
- name: Generate join command controller certsKey
|
||||
command: kubeadm init phase upload-certs --upload-certs
|
||||
register: join_command_controller_certskey
|
||||
|
||||
- name: Generate join command controller certssh256
|
||||
shell: openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | openssl dgst -sha256 -hex | sed 's/^.* //'
|
||||
register: join_command_controller_certsha256
|
||||
|
||||
- name: save facts for controller join
|
||||
set_fact:
|
||||
token: '{{ join_command_token.stdout }}'
|
||||
certskey: '{{ join_command_controller_certskey.stdout_lines[-1] }}'
|
||||
certsha256: '{{ join_command_controller_certsha256.stdout }}'
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
######################################
|
||||
# Tasks for init k8s cluster #
|
||||
######################################
|
||||
---
|
||||
- name: Get hostname
|
||||
command: hostname
|
||||
register: old_hostname
|
||||
changed_when: false
|
||||
|
||||
- set_fact: hostname={{ old_hostname.stdout | lower }}
|
||||
|
||||
- name: Pull k8s images
|
||||
command: kubeadm config images pull --kubernetes-version=v{{ kubernetesVersion }}
|
||||
|
||||
- name: copy clusterConfig to remote location
|
||||
template:
|
||||
src: '../templates/k8s_cluster/cluster/clusterConfiguration.yml.j2'
|
||||
dest: /tmp/clusterConfiguration.yml
|
||||
|
||||
- name: Initialize the Kubernetes cluster using kubeadm
|
||||
command:
|
||||
argv:
|
||||
- kubeadm
|
||||
- init
|
||||
- --config=/tmp/clusterConfiguration.yml
|
||||
- --node-name={{ hostname }}
|
||||
- --ignore-preflight-errors
|
||||
- Swap
|
||||
- --upload-certs
|
||||
|
||||
- name: Remove clusterConfig on remote location
|
||||
ansible.builtin.file:
|
||||
path: /tmp/clusterConfiguration.yml
|
||||
state: absent
|
||||
|
||||
- name: Setup kubeconfig for local usage
|
||||
command: "{{ item }}"
|
||||
loop:
|
||||
- mkdir -p ~/.kube
|
||||
- cp -i /etc/kubernetes/admin.conf ~/.kube/config
|
||||
|
||||
- name: Wait for all k8s nodes to be ready
|
||||
shell: kubectl wait --for=condition=Ready nodes --all --timeout=600s
|
||||
register: nodes_ready
|
||||
|
||||
- name: create Calico NetworkManager directory
|
||||
file:
|
||||
path: '/etc/NetworkManager/conf.d/'
|
||||
state: directory
|
||||
mode: 0755
|
||||
|
||||
- name: Configure Calico NetworkManager
|
||||
template:
|
||||
src: ../templates/k8s_cluster/cluster/calico.conf.j2
|
||||
dest: /etc/NetworkManager/conf.d/calico.conf
|
||||
owner: root
|
||||
mode: '0644'
|
||||
|
||||
- name: Install calico pod network
|
||||
kubernetes.core.k8s:
|
||||
state: present
|
||||
definition: "{{ lookup('template', '../templates/k8s_cluster/cluster/calico.yml.j2') | from_yaml_all }}"
|
||||
|
||||
- name: Wait for calico daemonset become ready
|
||||
command: "kubectl rollout status daemonset calico-node -n kube-system --timeout 60s"
|
||||
|
||||
- name: Generate join command
|
||||
command: kubeadm token create --print-join-command
|
||||
register: join_command
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
########################################
|
||||
#Restart DNS after DeamonSet Deployment#
|
||||
########################################
|
||||
---
|
||||
- name: Wait for calico pods become ready
|
||||
command: "kubectl rollout status daemonset calico-node -n kube-system --timeout 120s"
|
||||
|
||||
- name: Restart CoreDNS deployment
|
||||
command: "kubectl rollout restart deployments/coredns -n kube-system"
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
######################################
|
||||
# INstall Helm3 in cluster #
|
||||
######################################
|
||||
---
|
||||
- name: Read helm3 values
|
||||
include_vars:
|
||||
file: ../vars/k8s_cluster/helm/helm3.yml
|
||||
|
||||
- name: Download Helm install script
|
||||
get_url:
|
||||
url: "{{ helm_install_script }}"
|
||||
dest: "~/get_helm.sh"
|
||||
mode: 0700
|
||||
|
||||
- name: Install Helm
|
||||
command: "~/get_helm.sh"
|
||||
|
||||
- name: Delete Helm install script
|
||||
file:
|
||||
state: absent
|
||||
path: "~/get_helm.sh"
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
######################################
|
||||
# Deploy nginx ingress controller #
|
||||
######################################
|
||||
---
|
||||
- name: Read ingress nginx values
|
||||
include_vars:
|
||||
file: ../vars/k8s_cluster/ingress/ingressNginx.yml
|
||||
|
||||
- name: "Create namespace '{{ namespace }}'"
|
||||
kubernetes.core.k8s:
|
||||
state: present
|
||||
definition:
|
||||
api_version: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: '{{ namespace }}'
|
||||
labels:
|
||||
name: '{{ namespace }}'
|
||||
|
||||
- name: Add nginx ingress controller chart repo
|
||||
kubernetes.core.helm_repository:
|
||||
name: "{{ helm.releaseName }}"
|
||||
repo_url: "{{ helm.repoUrl }}"
|
||||
|
||||
- name: Deploy nginx ingress controller from helm chart
|
||||
kubernetes.core.helm:
|
||||
name: '{{ helm.releaseName }}'
|
||||
state: present
|
||||
chart_ref: '{{ helm.chart }}'
|
||||
release_namespace: '{{ namespace }}'
|
||||
chart_version: '{{ helm.chartVersion }}'
|
||||
update_repo_cache: 'true'
|
||||
## ToDo: Nginx Controller mit eigenen Values deployen
|
||||
# values: "{{ lookup('template', '../templates/k8s_cluster/ingress/ingressNginxValues.yml') | from_yaml }}"
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
######################################
|
||||
# Deploy kube-vip virtualIP #
|
||||
######################################
|
||||
---
|
||||
- name: Deploy kube-vip as static pod
|
||||
template:
|
||||
src: ../templates/k8s_cluster/kube_vip/kube_vip.yml.j2
|
||||
dest: /etc/kubernetes/manifests/kube-vip.yml
|
||||
owner: root
|
||||
mode: '0600'
|
||||
when: installKubeVip
|
||||
|
|
@ -0,0 +1,129 @@
|
|||
######################################
|
||||
#tasks for vanilla kubernetes install#
|
||||
######################################
|
||||
---
|
||||
- name: Get OS version name
|
||||
command: lsb_release -cs
|
||||
register: os_codename
|
||||
changed_when: false
|
||||
|
||||
- name: Get OS release number
|
||||
command: lsb_release -rs
|
||||
register: os_release
|
||||
changed_when: false
|
||||
|
||||
- name: Add an apt signing key for CRI-O
|
||||
apt_key:
|
||||
url: "{{ item }}"
|
||||
state: present
|
||||
loop:
|
||||
- 'https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable:/cri-o:/{{ crio_version }}/Debian_{{ os_release.stdout }}/Release.key'
|
||||
- 'https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/Debian_{{ os_release.stdout }}/Release.key'
|
||||
|
||||
- name: Add CRI-O apt repository for stable version
|
||||
apt_repository:
|
||||
repo: deb http://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/Debian_{{ os_release.stdout }}/ /
|
||||
filename: devel:kubic:libcontainers:stable.list
|
||||
state: present
|
||||
update_cache: yes
|
||||
- apt_repository:
|
||||
repo: deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable:/cri-o:/{{ crio_version }}/Debian_{{ os_release.stdout }}/ /
|
||||
filename: devel:kubic:libcontainers:stable:cri-o:{{ crio_version }}.list
|
||||
state: present
|
||||
update_cache: yes
|
||||
|
||||
- name: Install CRI-O packages
|
||||
apt:
|
||||
name: "{{ packages }}"
|
||||
state: present
|
||||
update_cache: yes
|
||||
allow_unauthenticated: true
|
||||
vars:
|
||||
packages:
|
||||
- cri-o
|
||||
- cri-o-runc
|
||||
|
||||
- name: Enable and start CRI-O service
|
||||
ansible.builtin.systemd:
|
||||
name: crio.service
|
||||
state: started
|
||||
enabled: yes
|
||||
|
||||
- name: CRI-O use systemd cgroup driver
|
||||
copy:
|
||||
dest: "/etc/crio/crio.conf.d/02-cgroup-manager.conf"
|
||||
content: |
|
||||
[crio.runtime]
|
||||
conmon_cgroup = "pod"
|
||||
cgroup_manager = "systemd"
|
||||
|
||||
- name: Overriding the CRI-O sandbox (pause) image
|
||||
lineinfile:
|
||||
path: /etc/crio/crio.conf
|
||||
regexp: '#? ?pause_image ?= ?"registry\.k8s\.io/pause:(.+)"'
|
||||
backrefs: True
|
||||
line: pause_image = "registry.k8s.io/pause:\1"
|
||||
|
||||
- name: Forwarding IPv4 and letting iptables see bridged traffic
|
||||
copy:
|
||||
dest: "/etc/modules-load.d/k8s.conf"
|
||||
content: |
|
||||
overlay
|
||||
br_netfilter
|
||||
|
||||
- name: modprobe overlay & br-netfilter
|
||||
command: "{{ item }}"
|
||||
loop:
|
||||
- modprobe overlay
|
||||
- modprobe br_netfilter
|
||||
|
||||
#sysctl params required by setup, params persist across reboots
|
||||
- name: ipv4 bridge forward
|
||||
copy:
|
||||
dest: "/etc/sysctl.d/k8s.conf"
|
||||
content: |
|
||||
net.bridge.bridge-nf-call-iptables = 1
|
||||
net.bridge.bridge-nf-call-ip6tables = 1
|
||||
net.ipv4.ip_forward = 1
|
||||
|
||||
- name: Apply sysctl params without reboot
|
||||
command: sysctl --system
|
||||
|
||||
|
||||
- name: Import Kubernetes GPG key
|
||||
raw: "curl -fsSL https://pkgs.k8s.io/core:/stable:/v{{ kubernetesVersion.split('.')[:2] | join('.') }}/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg"
|
||||
|
||||
- name: Add Kubernetes apt repository
|
||||
raw: "echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v{{ kubernetesVersion.split('.')[:2] | join('.') }}/deb/ /' | sudo tee /etc/apt/sources.list.d/kubernetes.list"
|
||||
|
||||
- name: Remove swapfile from /etc/fstab
|
||||
mount:
|
||||
name: "{{ item }}"
|
||||
fstype: swap
|
||||
state: absent
|
||||
with_items:
|
||||
- swap
|
||||
- none
|
||||
|
||||
- name: Disable swap
|
||||
command: swapoff -a
|
||||
|
||||
- name: Update apt cache
|
||||
raw: apt-get -y update
|
||||
changed_when: False
|
||||
|
||||
- name: Install Kubernetes binaries
|
||||
apt:
|
||||
name: "{{ packages }}"
|
||||
state: present
|
||||
update_cache: yes
|
||||
vars:
|
||||
packages:
|
||||
- "kubelet={{ kubernetesVersion }}-1.1"
|
||||
- "kubeadm={{ kubernetesVersion }}-1.1"
|
||||
- "kubectl={{ kubernetesVersion }}-1.1"
|
||||
|
||||
- name: Add kubectl completion bash
|
||||
lineinfile:
|
||||
path: ~/.bashrc
|
||||
line: source <(kubectl completion bash)
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
######################################
|
||||
# Setup k8s Cluster #
|
||||
######################################
|
||||
---
|
||||
- name: kubernetes installation
|
||||
block:
|
||||
- name: vanilla kubernetes install block
|
||||
when: kubernetesClusterType == 'vanilla'
|
||||
block:
|
||||
- name: Read vanilla kubernetes values
|
||||
include_vars:
|
||||
file: ../vars/k8s_cluster/kubernetes/vanilla_kubernetes.yml
|
||||
|
||||
- import_tasks: ../tasks/k8s_cluster/kubernetes/install_vanilla_kubernetes.yml
|
||||
|
||||
# ToDo: find solution for VIP
|
||||
# - name: Read kube_vip values for virtual IP
|
||||
# include_vars:
|
||||
# file: ../vars/k8s_cluster/kube_vip/kube_vip.yml
|
||||
# - import_tasks: ../tasks/k8s_cluster/kube_vip/install_kube_vip.yml
|
||||
# when: inventory_hostname in groups['controller']
|
||||
|
||||
- import_tasks: ../tasks/k8s_cluster/cluster/vanilla_kubernetes/init_kubernetes_cluster.yml
|
||||
when: inventory_hostname in groups['controller_init']
|
||||
- import_tasks: ../tasks/k8s_cluster/cluster/vanilla_kubernetes/generate_join_command.yml
|
||||
when: inventory_hostname in groups['controller_init']
|
||||
#ToDo: when controller replica exists
|
||||
## - import_tasks: ../tasks/k8s_cluster/cluster/vanilla_kubernetes/add_controller_to_cluster.yml
|
||||
## when: inventory_hostname in groups['controller_replica']
|
||||
- import_tasks: ../tasks/k8s_cluster/cluster/vanilla_kubernetes/add_worker_to_cluster.yml
|
||||
when: inventory_hostname in groups['worker']
|
||||
- import_tasks: ../tasks/k8s_cluster/cluster/vanilla_kubernetes/restart_coredns.yml
|
||||
when: inventory_hostname in groups['controller_init']
|
||||
#
|
||||
# - name: install microk8s block
|
||||
# when: kubernetesClusterType == 'microk8s'
|
||||
# block:
|
||||
# - debug: msg='ToDo install microk8s'
|
||||
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
######################################
|
||||
# Install MetalLB in cluster #
|
||||
######################################
|
||||
---
|
||||
- name: Read metallb values
|
||||
include_vars:
|
||||
file: ../vars/k8s_cluster/loadbalancer/metallb.yml
|
||||
|
||||
- name: Create metallb namespace
|
||||
k8s:
|
||||
state: present
|
||||
definition:
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: "{{ namespace }}"
|
||||
annotations:
|
||||
linkerd.io/inject: 'enabled'
|
||||
|
||||
- name: Add metallb repo
|
||||
kubernetes.core.helm_repository:
|
||||
name: "{{ helm.releaseName }}"
|
||||
repo_url: "{{ helm.repoUrl }}"
|
||||
|
||||
- name: Deploy metalb from helm chart
|
||||
kubernetes.core.helm:
|
||||
name: "{{ helm.releaseName }}"
|
||||
state: present
|
||||
chart_ref: "{{ helm.chart }}"
|
||||
release_namespace: "{{ namespace }}"
|
||||
chart_version: "{{ helm.chartVersion }}"
|
||||
update_repo_cache: "true"
|
||||
values: "{{ lookup('template', '../templates/k8s_cluster/loadbalancer/metallb.yml') | from_yaml }}"
|
||||
|
||||
- name: Pause for 25 seconds and wait for metallb
|
||||
ansible.builtin.pause:
|
||||
seconds: 25
|
||||
|
||||
- name: Pause for 15 seconds and wait for metallb webhook
|
||||
ansible.builtin.pause:
|
||||
seconds: 15
|
||||
|
||||
- name: Deploy metallb IPAddressPool
|
||||
kubernetes.core.k8s:
|
||||
state: present
|
||||
definition: "{{ lookup('template', '../templates/k8s_cluster/loadbalancer/metal_lb_configmap.yml.j2') | from_yaml_all }}"
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
######################################
|
||||
# Install linkerd service mesh #
|
||||
######################################
|
||||
---
|
||||
- name: install linkerd service mesh
|
||||
when: inventory_hostname in groups['controller']
|
||||
block:
|
||||
- name: Download linkerd install scrip
|
||||
get_url:
|
||||
url: https://run.linkerd.io/install
|
||||
dest: /tmp/linkerd.sh
|
||||
|
||||
- name: Install linkerd CLI
|
||||
shell:
|
||||
cmd: cat /tmp/linkerd.sh | sh
|
||||
|
||||
- name: Set linkerd .bashrc
|
||||
lineinfile:
|
||||
path: ~/.bashrc
|
||||
line: 'PATH=$PATH:/root/.linkerd2/bin'
|
||||
|
||||
- name: init linkerd on controller1
|
||||
when: inventory_hostname in groups['controller_init']
|
||||
block:
|
||||
- name: Install linkerd CRD in Cluster
|
||||
shell: "linkerd install --crds | kubectl apply -f -"
|
||||
|
||||
- name: Install linkerd in Cluster
|
||||
shell: "linkerd install | kubectl apply -f -"
|
||||
|
||||
- name: Wait for linkerd pods become ready
|
||||
command: "kubectl rollout status deployment linkerd-destination -n linkerd --timeout 150s"
|
||||
|
||||
- name: Wait for linkerd pods become ready
|
||||
command: "kubectl rollout status deployment linkerd-proxy-injector -n linkerd --timeout 150s"
|
||||
|
||||
- name: Install linkerd Dashboard
|
||||
shell: "linkerd viz install | kubectl apply -f -"
|
||||
|
||||
- name: Pause for 15 seconds and wait for linkerd installation
|
||||
pause:
|
||||
seconds: 15
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
######################################
|
||||
# Setup ceph prometheus Monitoring #
|
||||
######################################
|
||||
---
|
||||
- name: Storage monitoring block
|
||||
when: inventory_hostname in groups['kubernetes_api']
|
||||
block:
|
||||
- name: Read rook-ceph storage values
|
||||
include_vars:
|
||||
file: ../vars/k8s_cluster/storage/rook_ceph.yml
|
||||
|
||||
- name: Deploy rook CRDs, common resources and operator from manifest
|
||||
kubernetes.core.k8s:
|
||||
state: present
|
||||
definition: "{{ lookup('template', '../templates/k8s_cluster/storage/rook/monitoring/{{ item }}') | from_yaml_all }}"
|
||||
loop:
|
||||
- 'csi-metrics-service-monitor.yaml'
|
||||
- 'service-monitor.yaml'
|
||||
- 'rbac.yaml'
|
||||
|
||||
- name: Setting monitoring fact rook-ceph
|
||||
set_fact: cephMonitoring=true
|
||||
|
||||
- name: Deploy rook cluster from manifest
|
||||
kubernetes.core.k8s:
|
||||
state: present
|
||||
definition: "{{ lookup('template', '../templates/k8s_cluster/storage/rook/cluster' + ('-test' if rook_cluster_type == 'dev' else '') + '.yaml') | from_yaml_all }}"
|
||||
|
|
@ -0,0 +1,82 @@
|
|||
######################################
|
||||
# Setup rook-ceph storage #
|
||||
######################################
|
||||
---
|
||||
- name: "Create namespace '{{ namespace }}'"
|
||||
kubernetes.core.k8s:
|
||||
state: present
|
||||
definition:
|
||||
api_version: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: '{{ namespace }}'
|
||||
labels:
|
||||
name: '{{ namespace }}'
|
||||
annotations:
|
||||
linkerd.io/inject: 'enabled'
|
||||
|
||||
- name: Deploy rook CRDs, common resources and operator from manifest
|
||||
kubernetes.core.k8s:
|
||||
state: present
|
||||
definition: "{{ lookup('template', '../templates/k8s_cluster/storage/rook/{{ item }}') | from_yaml_all }}"
|
||||
loop:
|
||||
- 'crds.yaml'
|
||||
- 'common.yaml'
|
||||
- 'operator.yaml'
|
||||
|
||||
- name: Verify if the rook operator is up and running
|
||||
k8s:
|
||||
kind: Deployment
|
||||
name: rook-ceph-operator
|
||||
namespace: "rook-ceph"
|
||||
register: ret
|
||||
until: "ret.get('result', {}).get('status', {}).get('conditions', []) | length and ret.get('result', {}).get('status', {}).get('conditions', [])[0].get('status') == 'True'"
|
||||
retries: 10
|
||||
delay: 20
|
||||
|
||||
# ToDo: Tobi bitte prüfen, ob die Methode so okay ist? Monitoring wird in k8scluster/storage/cephAddPrometheus nochmal gesetzt
|
||||
- name: Setting monitoring fact rook-ceph
|
||||
set_fact: cephMonitoring=false
|
||||
|
||||
- name: Deploy rook cluster from manifest
|
||||
kubernetes.core.k8s:
|
||||
state: present
|
||||
definition: "{{ lookup('template', '../templates/k8s_cluster/storage/rook/cluster' + ('-test' if rook_cluster_type == 'dev' else '') + '.yaml') | from_yaml_all }}"
|
||||
|
||||
- name: Verify the cluster deploy is complete
|
||||
k8s:
|
||||
kind: CephCluster
|
||||
name: '{{ rook_cluster_config["name"] }}'
|
||||
namespace: "rook-ceph"
|
||||
register: cluster_data
|
||||
until: "cluster_data.get('result', {}).get('status', {}).get('state') == 'Created'"
|
||||
retries: 20
|
||||
delay: 30
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Safety puffer for osd enrollment
|
||||
pause:
|
||||
seconds: 60
|
||||
|
||||
- name: Deploy rook block storage class
|
||||
kubernetes.core.k8s:
|
||||
state: present
|
||||
definition: "{{ lookup('template', '../templates/k8s_cluster/storage/rook/csi/rbd/storageclass' + ('-test' if rook_cluster_type == 'dev' else '') + '.yaml') | from_yaml_all }}"
|
||||
|
||||
- name: Create rook filesystem
|
||||
kubernetes.core.k8s:
|
||||
state: present
|
||||
definition: "{{ lookup('template', '../templates/k8s_cluster/storage/rook/filesystem' + ('-test' if rook_cluster_type == 'dev' else '') + '.yaml') | from_yaml_all }}"
|
||||
|
||||
- name: Safety puffer for filesystem enrolment
|
||||
pause:
|
||||
seconds: 25
|
||||
|
||||
- name: Wait ceph fs pods become ready
|
||||
shell: kubectl wait --namespace=rook-ceph --for=condition=Ready pods --selector app=rook-ceph-mds --timeout=600s
|
||||
register: ceph_pods_ready
|
||||
|
||||
- name: Deploy rook file storage class
|
||||
kubernetes.core.k8s:
|
||||
state: present
|
||||
definition: "{{ lookup('template', '../templates/k8s_cluster/storage/rook/csi/cephfs/storageclass.yaml') | from_yaml_all }}"
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
######################################
|
||||
# Setup Storage #
|
||||
######################################
|
||||
---
|
||||
- name: Storage block
|
||||
when: inventory_hostname in (groups['controller_init'])
|
||||
block:
|
||||
- name: Read rook-ceph storage values
|
||||
include_vars:
|
||||
file: ../vars/k8s_cluster/storage/rook_ceph.yml
|
||||
|
||||
- name: rook internal ceph
|
||||
when: inventory_hostname in groups['controller_init']
|
||||
block:
|
||||
- import_tasks: ../tasks/k8s_cluster/storage/install_rook_ceph_storage.yml
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
#######################################
|
||||
## Tasks to prepare a Debian System #
|
||||
#######################################
|
||||
---
|
||||
- name: Read debian values and prepare system
|
||||
include_vars:
|
||||
file: ../vars/k8s_cluster/system/debian.yml
|
||||
|
||||
- name: Update apt cache
|
||||
raw: apt-get -y update
|
||||
changed_when: False
|
||||
|
||||
- name: Install required system packages
|
||||
apt: name={{ sys_packages }} state=present update_cache=yes cache_valid_time=3600
|
||||
|
||||
- name: Install required kubernetes system packages
|
||||
apt: name={{ k8s_sys_packages }} state=present update_cache=yes cache_valid_time=3600
|
||||
when: inventory_hostname in groups['kubernetes']
|
||||
|
||||
- name: Delete EXTERNALLY-MANAGED python venv
|
||||
ansible.builtin.file:
|
||||
state: absent
|
||||
path: /usr/lib/python3.11/EXTERNALLY-MANAGED
|
||||
|
||||
- name: Install required Python modules
|
||||
pip: name={{ pip_packages }} state=present
|
||||
when: inventory_hostname in groups['kubernetes']
|
||||
|
||||
- name: Get hostname
|
||||
command: hostname
|
||||
register: old_hostname
|
||||
changed_when: false
|
||||
|
||||
- set_fact: hostname={{ old_hostname.stdout | lower }}
|
||||
|
||||
# No capital letters in the hostname
|
||||
- name: Change the hostname
|
||||
command: hostnamectl set-hostname {{ hostname }}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,14 @@
|
|||
apiVersion: cert-manager.io/v1
|
||||
kind: ClusterIssuer
|
||||
metadata:
|
||||
name: letsencrypt-production
|
||||
spec:
|
||||
acme:
|
||||
server: https://acme-v02.api.letsencrypt.org/directory
|
||||
email: {{ email }}
|
||||
privateKeySecretRef:
|
||||
name: letsencrypt-cluster-issuer-key
|
||||
solvers:
|
||||
- http01:
|
||||
ingress:
|
||||
class: nginx
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
apiVersion: cert-manager.io/v1
|
||||
kind: ClusterIssuer
|
||||
metadata:
|
||||
name: letsencrypt-staging
|
||||
spec:
|
||||
acme:
|
||||
server: https://acme-staging-v02.api.letsencrypt.org/directory
|
||||
email: {{ email }}
|
||||
privateKeySecretRef:
|
||||
name: letsencrypt-staging
|
||||
solvers:
|
||||
- http01:
|
||||
ingress:
|
||||
class: nginx
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
[keyfile]
|
||||
unmanaged-devices=interface-name:cali*;interface-name:tunl*;interface-name:vxlan.calico;interface-name:wireguard.cali
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,35 @@
|
|||
apiVersion: kubeadm.k8s.io/v1beta3
|
||||
kind: InitConfiguration
|
||||
localAPIEndpoint:
|
||||
advertiseAddress: '{{ hostvars['k8s-master-1'].ansible_host }}'
|
||||
bindPort: 6443
|
||||
---
|
||||
apiVersion: kubeadm.k8s.io/v1beta3
|
||||
kind: ClusterConfiguration
|
||||
kubernetesVersion: 'v{{ kubernetesVersion.split("-")[0] }}'
|
||||
apiServer:
|
||||
extraArgs:
|
||||
authorization-mode: Node,RBAC
|
||||
timeoutForControlPlane: 4m0s
|
||||
certificatesDir: /etc/kubernetes/pki
|
||||
clusterName: '{{ kubernetesClusterName }}'
|
||||
controlPlaneEndpoint: '{{ kubernetesApi }}:6443'
|
||||
controllerManager:
|
||||
extraArgs:
|
||||
bind-address: 0.0.0.0
|
||||
dns: {}
|
||||
etcd:
|
||||
local:
|
||||
dataDir: /var/lib/etcd
|
||||
extraArgs:
|
||||
listen-metrics-urls: http://0.0.0.0:2381
|
||||
networking:
|
||||
dnsDomain: cluster.local
|
||||
serviceSubnet: 10.96.0.0/12
|
||||
scheduler:
|
||||
extraArgs:
|
||||
bind-address: 0.0.0.0
|
||||
---
|
||||
apiVersion: kubeproxy.config.k8s.io/v1alpha1
|
||||
kind: KubeProxyConfiguration
|
||||
metricsBindAddress: 0.0.0.0
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
apiVersion: kubeadm.k8s.io/v1beta3
|
||||
kind: JoinConfiguration
|
||||
discovery:
|
||||
bootstrapToken:
|
||||
apiServerEndpoint: {{ kubernetesApi }}:6443
|
||||
caCertHashes:
|
||||
- sha256:{{ hostvars['k8s-master-1']['certsha256'] }}
|
||||
token: {{ hostvars['k8s-master-1']['token'] }}
|
||||
nodeRegistration:
|
||||
kubeletExtraArgs:
|
||||
node-ip: {{ ansible_host }}
|
||||
controlPlane:
|
||||
localAPIEndpoint:
|
||||
advertiseAddress: {{ ansible_host }}
|
||||
certificateKey: {{ hostvars['k8s-master-1']['certskey'] }}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,61 @@
|
|||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
creationTimestamp: null
|
||||
name: kube-vip
|
||||
namespace: kube-system
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- manager
|
||||
env:
|
||||
- name: vip_arp
|
||||
value: "true"
|
||||
- name: port
|
||||
value: "6443"
|
||||
- name: vip_interface
|
||||
value: {{ interface }}
|
||||
- name: vip_cidr
|
||||
value: "32"
|
||||
- name: cp_enable
|
||||
value: "true"
|
||||
- name: cp_namespace
|
||||
value: kube-system
|
||||
- name: vip_ddns
|
||||
value: "false"
|
||||
- name: svc_enable
|
||||
value: "true"
|
||||
- name: vip_leaderelection
|
||||
value: "true"
|
||||
- name: vip_leaseduration
|
||||
value: "5"
|
||||
- name: vip_renewdeadline
|
||||
value: "3"
|
||||
- name: vip_retryperiod
|
||||
value: "1"
|
||||
- name: address
|
||||
value: {{ virtual_ip }}
|
||||
- name: prometheus_server
|
||||
value: :2112
|
||||
image: ghcr.io/kube-vip/kube-vip:{{ kube_vip_version }}
|
||||
imagePullPolicy: Always
|
||||
name: kube-vip
|
||||
resources: {}
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- NET_ADMIN
|
||||
- NET_RAW
|
||||
volumeMounts:
|
||||
- mountPath: /etc/kubernetes/admin.conf
|
||||
name: kubeconfig
|
||||
hostAliases:
|
||||
- hostnames:
|
||||
- kubernetes
|
||||
ip: 127.0.0.1
|
||||
hostNetwork: true
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /etc/kubernetes/admin.conf
|
||||
name: kubeconfig
|
||||
status: {}
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
apiVersion: metallb.io/v1beta1
|
||||
kind: IPAddressPool
|
||||
metadata:
|
||||
name: test
|
||||
namespace: metallb
|
||||
spec:
|
||||
addresses:
|
||||
- 192.168.10.0/24
|
||||
|
|
@ -0,0 +1,363 @@
|
|||
# Default values for metallb.
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
||||
|
||||
imagePullSecrets: []
|
||||
nameOverride: ""
|
||||
fullnameOverride: ""
|
||||
loadBalancerClass: ""
|
||||
|
||||
# To configure MetalLB, you must specify ONE of the following two
|
||||
# options.
|
||||
|
||||
rbac:
|
||||
# create specifies whether to install and use RBAC rules.
|
||||
create: true
|
||||
|
||||
prometheus:
|
||||
# scrape annotations specifies whether to add Prometheus metric
|
||||
# auto-collection annotations to pods. See
|
||||
# https://github.com/prometheus/prometheus/blob/release-2.1/documentation/examples/prometheus-kubernetes.yml
|
||||
# for a corresponding Prometheus configuration. Alternatively, you
|
||||
# may want to use the Prometheus Operator
|
||||
# (https://github.com/coreos/prometheus-operator) for more powerful
|
||||
# monitoring configuration. If you use the Prometheus operator, this
|
||||
# can be left at false.
|
||||
scrapeAnnotations: false
|
||||
|
||||
# port both controller and speaker will listen on for metrics
|
||||
metricsPort: 7472
|
||||
|
||||
# if set, enables rbac proxy on the controller and speaker to expose
|
||||
# the metrics via tls.
|
||||
# secureMetricsPort: 9120
|
||||
|
||||
# the name of the secret to be mounted in the speaker pod
|
||||
# to expose the metrics securely. If not present, a self signed
|
||||
# certificate to be used.
|
||||
speakerMetricsTLSSecret: ""
|
||||
|
||||
# the name of the secret to be mounted in the controller pod
|
||||
# to expose the metrics securely. If not present, a self signed
|
||||
# certificate to be used.
|
||||
controllerMetricsTLSSecret: ""
|
||||
|
||||
# prometheus doens't have the permission to scrape all namespaces so we give it permission to scrape metallb's one
|
||||
rbacPrometheus: true
|
||||
|
||||
# the service account used by prometheus
|
||||
# required when " .Values.prometheus.rbacPrometheus == true " and " .Values.prometheus.podMonitor.enabled=true or prometheus.serviceMonitor.enabled=true "
|
||||
serviceAccount: ""
|
||||
|
||||
# the namespace where prometheus is deployed
|
||||
# required when " .Values.prometheus.rbacPrometheus == true " and " .Values.prometheus.podMonitor.enabled=true or prometheus.serviceMonitor.enabled=true "
|
||||
namespace: ""
|
||||
|
||||
# the image to be used for the kuberbacproxy container
|
||||
rbacProxy:
|
||||
repository: gcr.io/kubebuilder/kube-rbac-proxy
|
||||
tag: v0.12.0
|
||||
pullPolicy:
|
||||
|
||||
# Prometheus Operator PodMonitors
|
||||
podMonitor:
|
||||
# enable support for Prometheus Operator
|
||||
enabled: false
|
||||
|
||||
# optional additionnal labels for podMonitors
|
||||
additionalLabels: {}
|
||||
|
||||
# optional annotations for podMonitors
|
||||
annotations: {}
|
||||
|
||||
# Job label for scrape target
|
||||
jobLabel: "app.kubernetes.io/name"
|
||||
|
||||
# Scrape interval. If not set, the Prometheus default scrape interval is used.
|
||||
interval:
|
||||
|
||||
# metric relabel configs to apply to samples before ingestion.
|
||||
metricRelabelings: []
|
||||
# - action: keep
|
||||
# regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
|
||||
# sourceLabels: [__name__]
|
||||
|
||||
# relabel configs to apply to samples before ingestion.
|
||||
relabelings: []
|
||||
# - sourceLabels: [__meta_kubernetes_pod_node_name]
|
||||
# separator: ;
|
||||
# regex: ^(.*)$
|
||||
# target_label: nodename
|
||||
# replacement: $1
|
||||
# action: replace
|
||||
|
||||
# Prometheus Operator ServiceMonitors. To be used as an alternative
|
||||
# to podMonitor, supports secure metrics.
|
||||
serviceMonitor:
|
||||
# enable support for Prometheus Operator
|
||||
enabled: false
|
||||
|
||||
speaker:
|
||||
# optional additional labels for the speaker serviceMonitor
|
||||
additionalLabels: {}
|
||||
# optional additional annotations for the speaker serviceMonitor
|
||||
annotations: {}
|
||||
# optional tls configuration for the speaker serviceMonitor, in case
|
||||
# secure metrics are enabled.
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
|
||||
controller:
|
||||
# optional additional labels for the controller serviceMonitor
|
||||
additionalLabels: {}
|
||||
# optional additional annotations for the controller serviceMonitor
|
||||
annotations: {}
|
||||
# optional tls configuration for the controller serviceMonitor, in case
|
||||
# secure metrics are enabled.
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
|
||||
# Job label for scrape target
|
||||
jobLabel: "app.kubernetes.io/name"
|
||||
|
||||
# Scrape interval. If not set, the Prometheus default scrape interval is used.
|
||||
interval:
|
||||
|
||||
# metric relabel configs to apply to samples before ingestion.
|
||||
metricRelabelings: []
|
||||
# - action: keep
|
||||
# regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
|
||||
# sourceLabels: [__name__]
|
||||
|
||||
# relabel configs to apply to samples before ingestion.
|
||||
relabelings: []
|
||||
# - sourceLabels: [__meta_kubernetes_pod_node_name]
|
||||
# separator: ;
|
||||
# regex: ^(.*)$
|
||||
# target_label: nodename
|
||||
# replacement: $1
|
||||
# action: replace
|
||||
|
||||
# Prometheus Operator alertmanager alerts
|
||||
prometheusRule:
|
||||
# enable alertmanager alerts
|
||||
enabled: false
|
||||
|
||||
# optional additionnal labels for prometheusRules
|
||||
additionalLabels: {}
|
||||
|
||||
# optional annotations for prometheusRules
|
||||
annotations: {}
|
||||
|
||||
# MetalLBStaleConfig
|
||||
staleConfig:
|
||||
enabled: true
|
||||
labels:
|
||||
severity: warning
|
||||
|
||||
# MetalLBConfigNotLoaded
|
||||
configNotLoaded:
|
||||
enabled: true
|
||||
labels:
|
||||
severity: warning
|
||||
|
||||
# MetalLBAddressPoolExhausted
|
||||
addressPoolExhausted:
|
||||
enabled: true
|
||||
labels:
|
||||
severity: alert
|
||||
|
||||
addressPoolUsage:
|
||||
enabled: true
|
||||
thresholds:
|
||||
- percent: 75
|
||||
labels:
|
||||
severity: warning
|
||||
- percent: 85
|
||||
labels:
|
||||
severity: warning
|
||||
- percent: 95
|
||||
labels:
|
||||
severity: alert
|
||||
|
||||
# MetalLBBGPSessionDown
|
||||
bgpSessionDown:
|
||||
enabled: true
|
||||
labels:
|
||||
severity: alert
|
||||
|
||||
extraAlerts: []
|
||||
|
||||
# controller contains configuration specific to the MetalLB cluster
|
||||
# controller.
|
||||
controller:
|
||||
enabled: true
|
||||
# -- Controller log level. Must be one of: `all`, `debug`, `info`, `warn`, `error` or `none`
|
||||
logLevel: info
|
||||
# command: /controller
|
||||
# webhookMode: enabled
|
||||
image:
|
||||
repository: quay.io/metallb/controller
|
||||
tag:
|
||||
pullPolicy:
|
||||
## @param controller.updateStrategy.type Metallb controller deployment strategy type.
|
||||
## ref: https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#strategy
|
||||
## e.g:
|
||||
## strategy:
|
||||
## type: RollingUpdate
|
||||
## rollingUpdate:
|
||||
## maxSurge: 25%
|
||||
## maxUnavailable: 25%
|
||||
##
|
||||
strategy:
|
||||
type: RollingUpdate
|
||||
serviceAccount:
|
||||
# Specifies whether a ServiceAccount should be created
|
||||
create: true
|
||||
# The name of the ServiceAccount to use. If not set and create is
|
||||
# true, a name is generated using the fullname template
|
||||
name: ""
|
||||
annotations: {}
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
# nobody
|
||||
runAsUser: 65534
|
||||
fsGroup: 65534
|
||||
resources: {}
|
||||
# limits:
|
||||
# cpu: 100m
|
||||
# memory: 100Mi
|
||||
nodeSelector: {}
|
||||
tolerations: []
|
||||
priorityClassName: ""
|
||||
runtimeClassName: ""
|
||||
affinity: {}
|
||||
podAnnotations: {}
|
||||
labels: {}
|
||||
livenessProbe:
|
||||
enabled: true
|
||||
failureThreshold: 3
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 1
|
||||
readinessProbe:
|
||||
enabled: true
|
||||
failureThreshold: 3
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 1
|
||||
tlsMinVersion: "VersionTLS12"
|
||||
tlsCipherSuites: ""
|
||||
|
||||
extraContainers: []
|
||||
|
||||
# speaker contains configuration specific to the MetalLB speaker
|
||||
# daemonset.
|
||||
speaker:
|
||||
enabled: true
|
||||
# command: /speaker
|
||||
# -- Speaker log level. Must be one of: `all`, `debug`, `info`, `warn`, `error` or `none`
|
||||
logLevel: info
|
||||
tolerateMaster: true
|
||||
memberlist:
|
||||
enabled: true
|
||||
mlBindPort: 7946
|
||||
mlBindAddrOverride: ""
|
||||
mlSecretKeyPath: "/etc/ml_secret_key"
|
||||
excludeInterfaces:
|
||||
enabled: true
|
||||
# ignore the exclude-from-external-loadbalancer label
|
||||
ignoreExcludeLB: false
|
||||
|
||||
image:
|
||||
repository: quay.io/metallb/speaker
|
||||
tag:
|
||||
pullPolicy:
|
||||
## @param speaker.updateStrategy.type Speaker daemonset strategy type
|
||||
## ref: https://kubernetes.io/docs/tasks/manage-daemon/update-daemon-set/
|
||||
##
|
||||
updateStrategy:
|
||||
## StrategyType
|
||||
## Can be set to RollingUpdate or OnDelete
|
||||
##
|
||||
type: RollingUpdate
|
||||
serviceAccount:
|
||||
# Specifies whether a ServiceAccount should be created
|
||||
create: true
|
||||
# The name of the ServiceAccount to use. If not set and create is
|
||||
# true, a name is generated using the fullname template
|
||||
name: ""
|
||||
annotations: {}
|
||||
securityContext: {}
|
||||
## Defines a secret name for the controller to generate a memberlist encryption secret
|
||||
## By default secretName: {{ "metallb.fullname" }}-memberlist
|
||||
##
|
||||
# secretName:
|
||||
resources: {}
|
||||
# limits:
|
||||
# cpu: 100m
|
||||
# memory: 100Mi
|
||||
nodeSelector: {}
|
||||
tolerations: []
|
||||
priorityClassName: ""
|
||||
affinity: {}
|
||||
## Selects which runtime class will be used by the pod.
|
||||
runtimeClassName: ""
|
||||
podAnnotations: {}
|
||||
labels: {}
|
||||
livenessProbe:
|
||||
enabled: true
|
||||
failureThreshold: 3
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 1
|
||||
readinessProbe:
|
||||
enabled: true
|
||||
failureThreshold: 3
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 1
|
||||
startupProbe:
|
||||
enabled: true
|
||||
failureThreshold: 30
|
||||
periodSeconds: 5
|
||||
# frr contains configuration specific to the MetalLB FRR container,
|
||||
# for speaker running alongside FRR.
|
||||
frr:
|
||||
enabled: true
|
||||
image:
|
||||
repository: quay.io/frrouting/frr
|
||||
tag: 9.0.2
|
||||
pullPolicy:
|
||||
metricsPort: 7473
|
||||
resources: {}
|
||||
|
||||
# if set, enables a rbac proxy sidecar container on the speaker to
|
||||
# expose the frr metrics via tls.
|
||||
# secureMetricsPort: 9121
|
||||
|
||||
|
||||
reloader:
|
||||
resources: {}
|
||||
|
||||
frrMetrics:
|
||||
resources: {}
|
||||
|
||||
extraContainers: []
|
||||
|
||||
crds:
|
||||
enabled: true
|
||||
validationFailurePolicy: Fail
|
||||
|
||||
# frrk8s contains the configuration related to using an frrk8s instance
|
||||
# (github.com/metallb/frr-k8s) as the backend for the BGP implementation.
|
||||
# This allows configuring additional frr parameters in combination to those
|
||||
# applied by MetalLB.
|
||||
frrk8s:
|
||||
# if set, enables frrk8s as a backend. This is mutually exclusive to frr
|
||||
# mode.
|
||||
enabled: false
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
rook version v1.11.4
|
||||
For documentation on running Rook in your Kubernetes cluster see the [Kubernetes Quickstart Guide](/Documentation/Getting-Started/quickstart.md)
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
#################################################################################################################
|
||||
# Define the settings for the rook-ceph-external cluster with common settings for a production cluster.
|
||||
|
||||
# For example, if Rook is not managing any existing cluster in the 'rook-ceph' namespace do:
|
||||
# kubectl create -f crds.yaml -f common.yaml -f operator.yaml
|
||||
# kubectl create -f cluster-external.yaml
|
||||
|
||||
# If there is already a cluster managed by Rook in 'rook-ceph' then run:
|
||||
# kubectl create -f common-external.yaml -f cluster-external-management.yaml
|
||||
#################################################################################################################
|
||||
apiVersion: ceph.rook.io/v1
|
||||
kind: CephCluster
|
||||
metadata:
|
||||
name: rook-ceph-external
|
||||
namespace: rook-ceph-external # namespace:cluster
|
||||
spec:
|
||||
external:
|
||||
enable: true
|
||||
dataDirHostPath: /var/lib/rook
|
||||
# providing an image is required, if you want to create other CRs (rgw, mds, nfs)
|
||||
cephVersion:
|
||||
image: quay.io/ceph/ceph:v17.2.6 # Should match external cluster version
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
#################################################################################################################
|
||||
# Define the settings for the rook-ceph-external cluster with common settings for a production cluster.
|
||||
|
||||
# For example, if Rook is not managing any existing cluster in the 'rook-ceph' namespace do:
|
||||
# kubectl create -f crds.yaml -f common.yaml -f operator.yaml
|
||||
# kubectl create -f cluster-external.yaml
|
||||
|
||||
# If there is already a cluster managed by Rook in 'rook-ceph' then do:
|
||||
# kubectl create -f common-external.yaml
|
||||
# kubectl create -f cluster-external.yaml
|
||||
#################################################################################################################
|
||||
apiVersion: ceph.rook.io/v1
|
||||
kind: CephCluster
|
||||
metadata:
|
||||
name: rook-ceph-external
|
||||
namespace: rook-ceph-external # namespace:cluster
|
||||
spec:
|
||||
external:
|
||||
enable: true
|
||||
crashCollector:
|
||||
disable: true
|
||||
network:
|
||||
connections:
|
||||
encryption:
|
||||
enabled: false
|
||||
compression:
|
||||
enabled: false
|
||||
healthCheck:
|
||||
daemonHealth:
|
||||
mon:
|
||||
disabled: false
|
||||
interval: 45s
|
||||
# optionally, the ceph-mgr IP address can be passed to gather metric from the prometheus exporter
|
||||
# monitoring:
|
||||
# enabled: true
|
||||
# rulesNamespace: rook-ceph
|
||||
# externalMgrEndpoints:
|
||||
#- ip: ip
|
||||
# externalMgrPrometheusPort: 9283
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
#################################################################################################################
|
||||
# Define the settings for the rook-ceph cluster with common settings for a small test cluster.
|
||||
# All nodes with available raw devices will be used for the Ceph cluster. One node is sufficient
|
||||
# in this example.
|
||||
|
||||
# For example, to create the cluster:
|
||||
# kubectl create -f crds.yaml -f common.yaml -f operator.yaml
|
||||
# kubectl create -f cluster-test.yaml
|
||||
#################################################################################################################
|
||||
apiVersion: ceph.rook.io/v1
|
||||
kind: CephCluster
|
||||
metadata:
|
||||
name: {{ rook_cluster_configs.dev.name }}
|
||||
namespace: rook-ceph # namespace:cluster
|
||||
spec:
|
||||
dataDirHostPath: /var/lib/rook
|
||||
cephVersion:
|
||||
image: quay.io/ceph/ceph:v18
|
||||
allowUnsupported: true
|
||||
mon:
|
||||
count: {{ rook_cluster_configs.dev.mons }}
|
||||
allowMultiplePerNode: true
|
||||
mgr:
|
||||
count: {{ rook_cluster_configs.dev.mgrs }}
|
||||
allowMultiplePerNode: true
|
||||
dashboard:
|
||||
enabled: true
|
||||
crashCollector:
|
||||
disable: true
|
||||
storage:
|
||||
useAllNodes: true
|
||||
useAllDevices: true
|
||||
#deviceFilter:
|
||||
monitoring:
|
||||
enabled: false
|
||||
healthCheck:
|
||||
daemonHealth:
|
||||
mon:
|
||||
interval: 45s
|
||||
timeout: 600s
|
||||
priorityClassNames:
|
||||
all: system-node-critical
|
||||
mgr: system-cluster-critical
|
||||
disruptionManagement:
|
||||
managePodBudgets: true
|
||||
cephConfig:
|
||||
global:
|
||||
osd_pool_default_size: "1"
|
||||
mon_warn_on_pool_no_redundancy: "false"
|
||||
bdev_flock_retry: "20"
|
||||
bluefs_buffered_io: "false"
|
||||
mon_data_avail_warn: "10"
|
||||
---
|
||||
apiVersion: ceph.rook.io/v1
|
||||
kind: CephBlockPool
|
||||
metadata:
|
||||
name: builtin-mgr
|
||||
namespace: rook-ceph # namespace:cluster
|
||||
spec:
|
||||
name: .mgr
|
||||
replicated:
|
||||
size: 1
|
||||
requireSafeReplicaSize: false
|
||||
|
|
@ -0,0 +1,312 @@
|
|||
#################################################################################################################
|
||||
# Define the settings for the rook-ceph cluster with common settings for a production cluster.
|
||||
# All nodes with available raw devices will be used for the Ceph cluster. At least three nodes are required
|
||||
# in this example. See the documentation for more details on storage settings available.
|
||||
|
||||
# For example, to create the cluster:
|
||||
# kubectl create -f crds.yaml -f common.yaml -f operator.yaml
|
||||
# kubectl create -f cluster.yaml
|
||||
#################################################################################################################
|
||||
|
||||
apiVersion: ceph.rook.io/v1
|
||||
kind: CephCluster
|
||||
metadata:
|
||||
name: rook-ceph
|
||||
namespace: rook-ceph # namespace:cluster
|
||||
spec:
|
||||
cephVersion:
|
||||
# The container image used to launch the Ceph daemon pods (mon, mgr, osd, mds, rgw).
|
||||
# v16 is Pacific, and v17 is Quincy.
|
||||
# RECOMMENDATION: In production, use a specific version tag instead of the general v17 flag, which pulls the latest release and could result in different
|
||||
# versions running within the cluster. See tags available at https://hub.docker.com/r/ceph/ceph/tags/.
|
||||
# If you want to be more precise, you can always use a timestamp tag such quay.io/ceph/ceph:v17.2.6-20230410
|
||||
# This tag might not contain a new Ceph version, just security fixes from the underlying operating system, which will reduce vulnerabilities
|
||||
image: quay.io/ceph/ceph:v17.2.6
|
||||
# Whether to allow unsupported versions of Ceph. Currently `pacific`, `quincy`, and `reef` are supported.
|
||||
# Future versions such as `squid` (v19) would require this to be set to `true`.
|
||||
# Do not set to true in production.
|
||||
allowUnsupported: false
|
||||
# The path on the host where configuration files will be persisted. Must be specified.
|
||||
# Important: if you reinstall the cluster, make sure you delete this directory from each host or else the mons will fail to start on the new cluster.
|
||||
# In Minikube, the '/data' directory is configured to persist across reboots. Use "/data/rook" in Minikube environment.
|
||||
dataDirHostPath: /var/lib/rook
|
||||
# Whether or not upgrade should continue even if a check fails
|
||||
# This means Ceph's status could be degraded and we don't recommend upgrading but you might decide otherwise
|
||||
# Use at your OWN risk
|
||||
# To understand Rook's upgrade process of Ceph, read https://rook.io/docs/rook/latest/ceph-upgrade.html#ceph-version-upgrades
|
||||
skipUpgradeChecks: false
|
||||
# Whether or not continue if PGs are not clean during an upgrade
|
||||
continueUpgradeAfterChecksEvenIfNotHealthy: false
|
||||
# WaitTimeoutForHealthyOSDInMinutes defines the time (in minutes) the operator would wait before an OSD can be stopped for upgrade or restart.
|
||||
# If the timeout exceeds and OSD is not ok to stop, then the operator would skip upgrade for the current OSD and proceed with the next one
|
||||
# if `continueUpgradeAfterChecksEvenIfNotHealthy` is `false`. If `continueUpgradeAfterChecksEvenIfNotHealthy` is `true`, then operator would
|
||||
# continue with the upgrade of an OSD even if its not ok to stop after the timeout. This timeout won't be applied if `skipUpgradeChecks` is `true`.
|
||||
# The default wait timeout is 10 minutes.
|
||||
waitTimeoutForHealthyOSDInMinutes: 10
|
||||
mon:
|
||||
# Set the number of mons to be started. Generally recommended to be 3.
|
||||
# For highest availability, an odd number of mons should be specified.
|
||||
count: 3
|
||||
# The mons should be on unique nodes. For production, at least 3 nodes are recommended for this reason.
|
||||
# Mons should only be allowed on the same node for test environments where data loss is acceptable.
|
||||
allowMultiplePerNode: false
|
||||
mgr:
|
||||
# When higher availability of the mgr is needed, increase the count to 2.
|
||||
# In that case, one mgr will be active and one in standby. When Ceph updates which
|
||||
# mgr is active, Rook will update the mgr services to match the active mgr.
|
||||
count: 2
|
||||
allowMultiplePerNode: false
|
||||
modules:
|
||||
# Several modules should not need to be included in this list. The "dashboard" and "monitoring" modules
|
||||
# are already enabled by other settings in the cluster CR.
|
||||
- name: pg_autoscaler
|
||||
enabled: true
|
||||
# enable the ceph dashboard for viewing cluster status
|
||||
dashboard:
|
||||
enabled: true
|
||||
# serve the dashboard under a subpath (useful when you are accessing the dashboard via a reverse proxy)
|
||||
# urlPrefix: /ceph-dashboard
|
||||
# serve the dashboard at the given port.
|
||||
# port: 8443
|
||||
# serve the dashboard using SSL
|
||||
ssl: true
|
||||
# The url of the Prometheus instance
|
||||
# prometheusEndpoint: <protocol>://<prometheus-host>:<port>
|
||||
# Whether SSL should be verified if the Prometheus server is using https
|
||||
# prometheusEndpointSSLVerify: false
|
||||
# enable prometheus alerting for cluster
|
||||
monitoring:
|
||||
# requires Prometheus to be pre-installed
|
||||
enabled: false
|
||||
# Whether to disable the metrics reported by Ceph. If false, the prometheus mgr module and Ceph exporter are enabled.
|
||||
# If true, the prometheus mgr module and Ceph exporter are both disabled. Default is false.
|
||||
metricsDisabled: false
|
||||
network:
|
||||
connections:
|
||||
# Whether to encrypt the data in transit across the wire to prevent eavesdropping the data on the network.
|
||||
# The default is false. When encryption is enabled, all communication between clients and Ceph daemons, or between Ceph daemons will be encrypted.
|
||||
# When encryption is not enabled, clients still establish a strong initial authentication and data integrity is still validated with a crc check.
|
||||
# IMPORTANT: Encryption requires the 5.11 kernel for the latest nbd and cephfs drivers. Alternatively for testing only,
|
||||
# you can set the "mounter: rbd-nbd" in the rbd storage class, or "mounter: fuse" in the cephfs storage class.
|
||||
# The nbd and fuse drivers are *not* recommended in production since restarting the csi driver pod will disconnect the volumes.
|
||||
encryption:
|
||||
enabled: false
|
||||
# Whether to compress the data in transit across the wire. The default is false.
|
||||
# Requires Ceph Quincy (v17) or newer. Also see the kernel requirements above for encryption.
|
||||
compression:
|
||||
enabled: false
|
||||
# Whether to require communication over msgr2. If true, the msgr v1 port (6789) will be disabled
|
||||
# and clients will be required to connect to the Ceph cluster with the v2 port (3300).
|
||||
# Requires a kernel that supports msgr v2 (kernel 5.11 or CentOS 8.4 or newer).
|
||||
requireMsgr2: false
|
||||
# enable host networking
|
||||
#provider: host
|
||||
# enable the Multus network provider
|
||||
#provider: multus
|
||||
#selectors:
|
||||
# The selector keys are required to be `public` and `cluster`.
|
||||
# Based on the configuration, the operator will do the following:
|
||||
# 1. if only the `public` selector key is specified both public_network and cluster_network Ceph settings will listen on that interface
|
||||
# 2. if both `public` and `cluster` selector keys are specified the first one will point to 'public_network' flag and the second one to 'cluster_network'
|
||||
#
|
||||
# In order to work, each selector value must match a NetworkAttachmentDefinition object in Multus
|
||||
#
|
||||
# public: public-conf --> NetworkAttachmentDefinition object name in Multus
|
||||
# cluster: cluster-conf --> NetworkAttachmentDefinition object name in Multus
|
||||
# Provide internet protocol version. IPv6, IPv4 or empty string are valid options. Empty string would mean IPv4
|
||||
#ipFamily: "IPv6"
|
||||
# Ceph daemons to listen on both IPv4 and Ipv6 networks
|
||||
#dualStack: false
|
||||
# Enable multiClusterService to export the mon and OSD services to peer cluster.
|
||||
# This is useful to support RBD mirroring between two clusters having overlapping CIDRs.
|
||||
# Ensure that peer clusters are connected using an MCS API compatible application, like Globalnet Submariner.
|
||||
#multiClusterService:
|
||||
# enabled: false
|
||||
|
||||
# enable the crash collector for ceph daemon crash collection
|
||||
crashCollector:
|
||||
disable: false
|
||||
# Uncomment daysToRetain to prune ceph crash entries older than the
|
||||
# specified number of days.
|
||||
#daysToRetain: 30
|
||||
# enable log collector, daemons will log on files and rotate
|
||||
logCollector:
|
||||
enabled: true
|
||||
periodicity: daily # one of: hourly, daily, weekly, monthly
|
||||
maxLogSize: 500M # SUFFIX may be 'M' or 'G'. Must be at least 1M.
|
||||
# automate [data cleanup process](https://github.com/rook/rook/blob/master/Documentation/Storage-Configuration/ceph-teardown.md#delete-the-data-on-hosts) in cluster destruction.
|
||||
cleanupPolicy:
|
||||
# Since cluster cleanup is destructive to data, confirmation is required.
|
||||
# To destroy all Rook data on hosts during uninstall, confirmation must be set to "yes-really-destroy-data".
|
||||
# This value should only be set when the cluster is about to be deleted. After the confirmation is set,
|
||||
# Rook will immediately stop configuring the cluster and only wait for the delete command.
|
||||
# If the empty string is set, Rook will not destroy any data on hosts during uninstall.
|
||||
confirmation: ""
|
||||
# sanitizeDisks represents settings for sanitizing OSD disks on cluster deletion
|
||||
sanitizeDisks:
|
||||
# method indicates if the entire disk should be sanitized or simply ceph's metadata
|
||||
# in both case, re-install is possible
|
||||
# possible choices are 'complete' or 'quick' (default)
|
||||
method: quick
|
||||
# dataSource indicate where to get random bytes from to write on the disk
|
||||
# possible choices are 'zero' (default) or 'random'
|
||||
# using random sources will consume entropy from the system and will take much more time then the zero source
|
||||
dataSource: zero
|
||||
# iteration overwrite N times instead of the default (1)
|
||||
# takes an integer value
|
||||
iteration: 1
|
||||
# allowUninstallWithVolumes defines how the uninstall should be performed
|
||||
# If set to true, cephCluster deletion does not wait for the PVs to be deleted.
|
||||
allowUninstallWithVolumes: false
|
||||
# To control where various services will be scheduled by kubernetes, use the placement configuration sections below.
|
||||
# The example under 'all' would have all services scheduled on kubernetes nodes labeled with 'role=storage-node' and
|
||||
# tolerate taints with a key of 'storage-node'.
|
||||
# placement:
|
||||
# all:
|
||||
# nodeAffinity:
|
||||
# requiredDuringSchedulingIgnoredDuringExecution:
|
||||
# nodeSelectorTerms:
|
||||
# - matchExpressions:
|
||||
# - key: role
|
||||
# operator: In
|
||||
# values:
|
||||
# - storage-node
|
||||
# podAffinity:
|
||||
# podAntiAffinity:
|
||||
# topologySpreadConstraints:
|
||||
# tolerations:
|
||||
# - key: storage-node
|
||||
# operator: Exists
|
||||
# The above placement information can also be specified for mon, osd, and mgr components
|
||||
# mon:
|
||||
# Monitor deployments may contain an anti-affinity rule for avoiding monitor
|
||||
# collocation on the same node. This is a required rule when host network is used
|
||||
# or when AllowMultiplePerNode is false. Otherwise this anti-affinity rule is a
|
||||
# preferred rule with weight: 50.
|
||||
# osd:
|
||||
# prepareosd:
|
||||
# mgr:
|
||||
# cleanup:
|
||||
annotations:
|
||||
# all:
|
||||
# mon:
|
||||
# osd:
|
||||
# cleanup:
|
||||
prepareosd: {linkerd.io/inject: disabled}
|
||||
# clusterMetadata annotations will be applied to only `rook-ceph-mon-endpoints` configmap and the `rook-ceph-mon` and `rook-ceph-admin-keyring` secrets.
|
||||
# And clusterMetadata annotations will not be merged with `all` annotations.
|
||||
# clusterMetadata:
|
||||
# kubed.appscode.com/sync: "true"
|
||||
# If no mgr annotations are set, prometheus scrape annotations will be set by default.
|
||||
# mgr:
|
||||
labels:
|
||||
# all:
|
||||
# mon:
|
||||
# osd:
|
||||
# cleanup:
|
||||
# mgr:
|
||||
# prepareosd:
|
||||
# monitoring is a list of key-value pairs. It is injected into all the monitoring resources created by operator.
|
||||
# These labels can be passed as LabelSelector to Prometheus
|
||||
# monitoring:
|
||||
# crashcollector:
|
||||
resources:
|
||||
#The requests and limits set here, allow the mgr pod to use half of one CPU core and 1 gigabyte of memory
|
||||
# mgr:
|
||||
# limits:
|
||||
# cpu: "500m"
|
||||
# memory: "1024Mi"
|
||||
# requests:
|
||||
# cpu: "500m"
|
||||
# memory: "1024Mi"
|
||||
# The above example requests/limits can also be added to the other components
|
||||
# mon:
|
||||
# osd:
|
||||
# For OSD it also is a possible to specify requests/limits based on device class
|
||||
# osd-hdd:
|
||||
# osd-ssd:
|
||||
# osd-nvme:
|
||||
# prepareosd:
|
||||
# mgr-sidecar:
|
||||
# crashcollector:
|
||||
# logcollector:
|
||||
# cleanup:
|
||||
# exporter:
|
||||
# The option to automatically remove OSDs that are out and are safe to destroy.
|
||||
removeOSDsIfOutAndSafeToRemove: false
|
||||
priorityClassNames:
|
||||
#all: rook-ceph-default-priority-class
|
||||
mon: system-node-critical
|
||||
osd: system-node-critical
|
||||
mgr: system-cluster-critical
|
||||
#crashcollector: rook-ceph-crashcollector-priority-class
|
||||
storage: # cluster level storage configuration and selection
|
||||
useAllNodes: true
|
||||
useAllDevices: true
|
||||
#deviceFilter:
|
||||
config:
|
||||
# crushRoot: "custom-root" # specify a non-default root label for the CRUSH map
|
||||
# metadataDevice: "md0" # specify a non-rotational storage so ceph-volume will use it as block db device of bluestore.
|
||||
# databaseSizeMB: "1024" # uncomment if the disks are smaller than 100 GB
|
||||
# osdsPerDevice: "1" # this value can be overridden at the node or device level
|
||||
# encryptedDevice: "true" # the default value for this option is "false"
|
||||
# Individual nodes and their config can be specified as well, but 'useAllNodes' above must be set to false. Then, only the named
|
||||
# nodes below will be used as storage resources. Each node's 'name' field should match their 'kubernetes.io/hostname' label.
|
||||
# nodes:
|
||||
# - name: "172.17.4.201"
|
||||
# devices: # specific devices to use for storage can be specified for each node
|
||||
# - name: "sdb"
|
||||
# - name: "nvme01" # multiple osds can be created on high performance devices
|
||||
# config:
|
||||
# osdsPerDevice: "5"
|
||||
# - name: "/dev/disk/by-id/ata-ST4000DM004-XXXX" # devices can be specified using full udev paths
|
||||
# config: # configuration can be specified at the node level which overrides the cluster level config
|
||||
# - name: "172.17.4.301"
|
||||
# deviceFilter: "^sd."
|
||||
# when onlyApplyOSDPlacement is false, will merge both placement.All() and placement.osd
|
||||
onlyApplyOSDPlacement: false
|
||||
# Time for which an OSD pod will sleep before restarting, if it stopped due to flapping
|
||||
# flappingRestartIntervalHours: 24
|
||||
# The section for configuring management of daemon disruptions during upgrade or fencing.
|
||||
disruptionManagement:
|
||||
# If true, the operator will create and manage PodDisruptionBudgets for OSD, Mon, RGW, and MDS daemons. OSD PDBs are managed dynamically
|
||||
# via the strategy outlined in the [design](https://github.com/rook/rook/blob/master/design/ceph/ceph-managed-disruptionbudgets.md). The operator will
|
||||
# block eviction of OSDs by default and unblock them safely when drains are detected.
|
||||
managePodBudgets: true
|
||||
# A duration in minutes that determines how long an entire failureDomain like `region/zone/host` will be held in `noout` (in addition to the
|
||||
# default DOWN/OUT interval) when it is draining. This is only relevant when `managePodBudgets` is `true`. The default value is `30` minutes.
|
||||
osdMaintenanceTimeout: 30
|
||||
# A duration in minutes that the operator will wait for the placement groups to become healthy (active+clean) after a drain was completed and OSDs came back up.
|
||||
# Operator will continue with the next drain if the timeout exceeds. It only works if `managePodBudgets` is `true`.
|
||||
# No values or 0 means that the operator will wait until the placement groups are healthy before unblocking the next drain.
|
||||
pgHealthCheckTimeout: 0
|
||||
|
||||
# healthChecks
|
||||
# Valid values for daemons are 'mon', 'osd', 'status'
|
||||
healthCheck:
|
||||
daemonHealth:
|
||||
mon:
|
||||
disabled: false
|
||||
interval: 45s
|
||||
osd:
|
||||
disabled: false
|
||||
interval: 60s
|
||||
status:
|
||||
disabled: false
|
||||
interval: 60s
|
||||
# Change pod liveness probe timing or threshold values. Works for all mon,mgr,osd daemons.
|
||||
livenessProbe:
|
||||
mon:
|
||||
disabled: false
|
||||
mgr:
|
||||
disabled: false
|
||||
osd:
|
||||
disabled: false
|
||||
# Change pod startup probe timing or threshold values. Works for all mon,mgr,osd daemons.
|
||||
startupProbe:
|
||||
mon:
|
||||
disabled: false
|
||||
mgr:
|
||||
disabled: false
|
||||
osd:
|
||||
disabled: false
|
||||
|
|
@ -0,0 +1,77 @@
|
|||
###################################################################################################################
|
||||
# Create the common resources that are necessary to start an external Ceph cluster in a different namespace
|
||||
# These resources can be created after an operator that is already running but assumes common.yaml has been injected
|
||||
# The samples all assume that your existing operator running "rook-ceph" namespace will also watch and have permissions
|
||||
# to interact with an external cluster configured in "rook-ceph-external" cluster.
|
||||
#
|
||||
# kubectl create -f crds.yaml -f common.yaml -f operator.yaml -f common-external.yaml
|
||||
#
|
||||
# If there is no cluster managed by the current Rook Operator
|
||||
# you can simply replace all occurrence of rook-ceph-external with rook-ceph
|
||||
#
|
||||
# And remove the following code:
|
||||
#
|
||||
# apiVersion: v1
|
||||
# kind: Namespace
|
||||
# metadata:
|
||||
# name: rook-ceph-external
|
||||
#
|
||||
# Then kubectl create -f cluster-external.yaml
|
||||
###################################################################################################################
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: rook-ceph-external # namespace:cluster
|
||||
---
|
||||
kind: RoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: rook-ceph-cluster-mgmt
|
||||
namespace: rook-ceph-external # namespace:cluster
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: rook-ceph-cluster-mgmt
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: rook-ceph-system
|
||||
namespace: rook-ceph # namespace:operator
|
||||
---
|
||||
kind: RoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: rook-ceph-cmd-reporter
|
||||
namespace: rook-ceph-external # namespace:cluster
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: rook-ceph-cmd-reporter
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: rook-ceph-cmd-reporter
|
||||
namespace: rook-ceph-external # namespace:cluster
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: rook-ceph-cmd-reporter
|
||||
namespace: rook-ceph-external # namespace:cluster
|
||||
---
|
||||
kind: Role
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: rook-ceph-cmd-reporter
|
||||
namespace: rook-ceph-external # namespace:cluster
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods
|
||||
- configmaps
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- delete
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,36 @@
|
|||
apiVersion: storage.k8s.io/v1
|
||||
kind: StorageClass
|
||||
metadata:
|
||||
name: rook-cephfs
|
||||
# Change "rook-ceph" provisioner prefix to match the operator namespace if needed
|
||||
provisioner: rook-ceph.cephfs.csi.ceph.com # driver:namespace:operator
|
||||
parameters:
|
||||
# clusterID is the namespace where the rook cluster is running
|
||||
# If you change this namespace, also change the namespace below where the secret namespaces are defined
|
||||
clusterID: rook-ceph-external # namespace:cluster
|
||||
|
||||
# CephFS filesystem name into which the volume shall be created
|
||||
fsName: sharedStoreK8s
|
||||
|
||||
# Ceph pool into which the volume shall be created
|
||||
# Required for provisionVolume: "true"
|
||||
pool: sharedStoreK8s_data
|
||||
|
||||
# The secrets contain Ceph admin credentials. These are generated automatically by the operator
|
||||
# in the same namespace as the cluster.
|
||||
csi.storage.k8s.io/provisioner-secret-name: rook-csi-cephfs-provisioner
|
||||
csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph-external # namespace:cluster
|
||||
csi.storage.k8s.io/controller-expand-secret-name: rook-csi-cephfs-provisioner
|
||||
csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph-external # namespace:cluster
|
||||
csi.storage.k8s.io/node-stage-secret-name: rook-csi-cephfs-node
|
||||
csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph-external # namespace:cluster
|
||||
|
||||
# (optional) The driver can use either ceph-fuse (fuse) or ceph kernel client (kernel)
|
||||
# If omitted, default volume mounter will be used - this is determined by probing for ceph-fuse
|
||||
# or by setting the default mounter explicitly via --volumemounter command-line argument.
|
||||
# mounter: kernel
|
||||
reclaimPolicy: Delete
|
||||
allowVolumeExpansion: true
|
||||
mountOptions:
|
||||
# uncomment the following line for debugging
|
||||
#- debug
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
apiVersion: storage.k8s.io/v1
|
||||
kind: StorageClass
|
||||
metadata:
|
||||
name: rook-cephfs
|
||||
# Change "rook-ceph" provisioner prefix to match the operator namespace if needed
|
||||
provisioner: rook-ceph.cephfs.csi.ceph.com
|
||||
parameters:
|
||||
# clusterID is the namespace where the rook cluster is running
|
||||
# If you change this namespace, also change the namespace below where the secret namespaces are defined
|
||||
clusterID: rook-ceph
|
||||
|
||||
# CephFS filesystem name into which the volume shall be created
|
||||
fsName: myfs
|
||||
|
||||
# Ceph pool into which the volume shall be created
|
||||
# Required for provisionVolume: "true"
|
||||
pool: myfs-replicated
|
||||
|
||||
# The secrets contain Ceph admin credentials. These are generated automatically by the operator
|
||||
# in the same namespace as the cluster.
|
||||
csi.storage.k8s.io/provisioner-secret-name: rook-csi-cephfs-provisioner
|
||||
csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph
|
||||
csi.storage.k8s.io/controller-expand-secret-name: rook-csi-cephfs-provisioner
|
||||
csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph
|
||||
csi.storage.k8s.io/node-stage-secret-name: rook-csi-cephfs-node
|
||||
csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph
|
||||
|
||||
reclaimPolicy: Delete
|
||||
|
|
@ -0,0 +1,94 @@
|
|||
apiVersion: ceph.rook.io/v1
|
||||
kind: CephBlockPool
|
||||
metadata:
|
||||
name: replicapool
|
||||
namespace: rook-ceph # namespace:cluster
|
||||
spec:
|
||||
failureDomain: host
|
||||
replicated:
|
||||
size: 3
|
||||
# Disallow setting pool with replica 1, this could lead to data loss without recovery.
|
||||
# Make sure you're *ABSOLUTELY CERTAIN* that is what you want
|
||||
requireSafeReplicaSize: true
|
||||
# gives a hint (%) to Ceph in terms of expected consumption of the total cluster capacity of a given pool
|
||||
# for more info: https://docs.ceph.com/docs/master/rados/operations/placement-groups/#specifying-expected-pool-size
|
||||
#targetSizeRatio: .5
|
||||
---
|
||||
apiVersion: storage.k8s.io/v1
|
||||
kind: StorageClass
|
||||
metadata:
|
||||
name: rook-ceph-block
|
||||
annotations:
|
||||
storageclass.kubernetes.io/is-default-class: 'true'
|
||||
provisioner: rook-ceph.rbd.csi.ceph.com
|
||||
parameters:
|
||||
# clusterID is the namespace where the rook cluster is running
|
||||
# If you change this namespace, also change the namespace below where the secret namespaces are defined
|
||||
clusterID: rook-ceph-external # namespace:cluster
|
||||
|
||||
# If you want to use erasure coded pool with RBD, you need to create
|
||||
# two pools. one erasure coded and one replicated.
|
||||
# You need to specify the replicated pool here in the `pool` parameter, it is
|
||||
# used for the metadata of the images.
|
||||
# The erasure coded pool must be set as the `dataPool` parameter below.
|
||||
#dataPool: ec-data-pool
|
||||
pool: k8sBlockStorage
|
||||
|
||||
# (optional) mapOptions is a comma-separated list of map options.
|
||||
# For krbd options refer
|
||||
# https://docs.ceph.com/docs/master/man/8/rbd/#kernel-rbd-krbd-options
|
||||
# For nbd options refer
|
||||
# https://docs.ceph.com/docs/master/man/8/rbd-nbd/#options
|
||||
# mapOptions: lock_on_read,queue_depth=1024
|
||||
|
||||
# (optional) unmapOptions is a comma-separated list of unmap options.
|
||||
# For krbd options refer
|
||||
# https://docs.ceph.com/docs/master/man/8/rbd/#kernel-rbd-krbd-options
|
||||
# For nbd options refer
|
||||
# https://docs.ceph.com/docs/master/man/8/rbd-nbd/#options
|
||||
# unmapOptions: force
|
||||
|
||||
# (optional) Set it to true to encrypt each volume with encryption keys
|
||||
# from a key management system (KMS)
|
||||
# encrypted: "true"
|
||||
|
||||
# (optional) Use external key management system (KMS) for encryption key by
|
||||
# specifying a unique ID matching a KMS ConfigMap. The ID is only used for
|
||||
# correlation to configmap entry.
|
||||
# encryptionKMSID: <kms-config-id>
|
||||
|
||||
# RBD image format. Defaults to "2".
|
||||
imageFormat: "2"
|
||||
|
||||
# RBD image features
|
||||
# Available for imageFormat: "2". Older releases of CSI RBD
|
||||
# support only the `layering` feature. The Linux kernel (KRBD) supports the
|
||||
# full complement of features as of 5.4
|
||||
# `layering` alone corresponds to Ceph's bitfield value of "2" ;
|
||||
# `layering` + `fast-diff` + `object-map` + `deep-flatten` + `exclusive-lock` together
|
||||
# correspond to Ceph's OR'd bitfield value of "63". Here we use
|
||||
# a symbolic, comma-separated format:
|
||||
# For 5.4 or later kernels:
|
||||
#imageFeatures: layering,fast-diff,object-map,deep-flatten,exclusive-lock
|
||||
# For 5.3 or earlier kernels:
|
||||
imageFeatures: layering
|
||||
|
||||
# The secrets contain Ceph admin credentials. These are generated automatically by the operator
|
||||
# in the same namespace as the cluster.
|
||||
csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner
|
||||
csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph-external # namespace:cluster
|
||||
csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner
|
||||
csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph-external # namespace:cluster
|
||||
csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node
|
||||
csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph-external # namespace:cluster
|
||||
# Specify the filesystem type of the volume. If not specified, csi-provisioner
|
||||
# will set default as `ext4`. Note that `xfs` is not recommended due to potential deadlock
|
||||
# in hyperconverged settings where the volume is mounted on the same node as the osds.
|
||||
csi.storage.k8s.io/fstype: ext4
|
||||
# uncomment the following to use rbd-nbd as mounter on supported nodes
|
||||
# **IMPORTANT**: CephCSI v3.4.0 onwards a volume healer functionality is added to reattach
|
||||
# the PVC to application pod if nodeplugin pod restart.
|
||||
# Its still in Alpha support. Therefore, this option is not recommended for production use.
|
||||
#mounter: rbd-nbd
|
||||
allowVolumeExpansion: true
|
||||
reclaimPolicy: Delete
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
apiVersion: ceph.rook.io/v1
|
||||
kind: CephBlockPool
|
||||
metadata:
|
||||
name: replicapool
|
||||
namespace: rook-ceph
|
||||
spec:
|
||||
failureDomain: host
|
||||
replicated:
|
||||
size: 1
|
||||
---
|
||||
apiVersion: storage.k8s.io/v1
|
||||
kind: StorageClass
|
||||
metadata:
|
||||
name: rook-ceph-block
|
||||
# Change "rook-ceph" provisioner prefix to match the operator namespace if needed
|
||||
provisioner: rook-ceph.rbd.csi.ceph.com
|
||||
parameters:
|
||||
# clusterID is the namespace where the rook cluster is running
|
||||
clusterID: rook-ceph
|
||||
# Ceph pool into which the RBD image shall be created
|
||||
pool: replicapool
|
||||
|
||||
# (optional) mapOptions is a comma-separated list of map options.
|
||||
# For krbd options refer
|
||||
# https://docs.ceph.com/docs/master/man/8/rbd/#kernel-rbd-krbd-options
|
||||
# For nbd options refer
|
||||
# https://docs.ceph.com/docs/master/man/8/rbd-nbd/#options
|
||||
# mapOptions: lock_on_read,queue_depth=1024
|
||||
|
||||
# (optional) unmapOptions is a comma-separated list of unmap options.
|
||||
# For krbd options refer
|
||||
# https://docs.ceph.com/docs/master/man/8/rbd/#kernel-rbd-krbd-options
|
||||
# For nbd options refer
|
||||
# https://docs.ceph.com/docs/master/man/8/rbd-nbd/#options
|
||||
# unmapOptions: force
|
||||
|
||||
# RBD image format. Defaults to "2".
|
||||
imageFormat: "2"
|
||||
|
||||
# RBD image features
|
||||
# Available for imageFormat: "2". Older releases of CSI RBD
|
||||
# support only the `layering` feature. The Linux kernel (KRBD) supports the
|
||||
# full complement of features as of 5.4
|
||||
# `layering` alone corresponds to Ceph's bitfield value of "2" ;
|
||||
# `layering` + `fast-diff` + `object-map` + `deep-flatten` + `exclusive-lock` together
|
||||
# correspond to Ceph's OR'd bitfield value of "63". Here we use
|
||||
# a symbolic, comma-separated format:
|
||||
# For 5.4 or later kernels:
|
||||
#imageFeatures: layering,fast-diff,object-map,deep-flatten,exclusive-lock
|
||||
# For 5.3 or earlier kernels:
|
||||
imageFeatures: layering
|
||||
|
||||
# The secrets contain Ceph admin credentials.
|
||||
csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner
|
||||
csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph
|
||||
csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner
|
||||
csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph
|
||||
csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node
|
||||
csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph
|
||||
|
||||
# Specify the filesystem type of the volume. If not specified, csi-provisioner
|
||||
# will set default as `ext4`. Note that `xfs` is not recommended due to potential deadlock
|
||||
# in hyperconverged settings where the volume is mounted on the same node as the osds.
|
||||
csi.storage.k8s.io/fstype: ext4
|
||||
|
||||
# Delete the rbd volume when a PVC is deleted
|
||||
reclaimPolicy: Delete
|
||||
|
||||
# Optional, if you want to add dynamic resize for PVC.
|
||||
# For now only ext3, ext4, xfs resize support provided, like in Kubernetes itself.
|
||||
allowVolumeExpansion: true
|
||||
|
|
@ -0,0 +1,93 @@
|
|||
apiVersion: ceph.rook.io/v1
|
||||
kind: CephBlockPool
|
||||
metadata:
|
||||
name: replicapool
|
||||
namespace: rook-ceph # namespace:cluster
|
||||
spec:
|
||||
failureDomain: host
|
||||
replicated:
|
||||
size: 3
|
||||
# Disallow setting pool with replica 1, this could lead to data loss without recovery.
|
||||
# Make sure you're *ABSOLUTELY CERTAIN* that is what you want
|
||||
requireSafeReplicaSize: true
|
||||
# gives a hint (%) to Ceph in terms of expected consumption of the total cluster capacity of a given pool
|
||||
# for more info: https://docs.ceph.com/docs/master/rados/operations/placement-groups/#specifying-expected-pool-size
|
||||
#targetSizeRatio: .5
|
||||
---
|
||||
apiVersion: storage.k8s.io/v1
|
||||
kind: StorageClass
|
||||
metadata:
|
||||
name: rook-ceph-block
|
||||
# Change "rook-ceph" provisioner prefix to match the operator namespace if needed
|
||||
provisioner: rook-ceph.rbd.csi.ceph.com
|
||||
parameters:
|
||||
# clusterID is the namespace where the rook cluster is running
|
||||
# If you change this namespace, also change the namespace below where the secret namespaces are defined
|
||||
clusterID: rook-ceph # namespace:cluster
|
||||
|
||||
# If you want to use erasure coded pool with RBD, you need to create
|
||||
# two pools. one erasure coded and one replicated.
|
||||
# You need to specify the replicated pool here in the `pool` parameter, it is
|
||||
# used for the metadata of the images.
|
||||
# The erasure coded pool must be set as the `dataPool` parameter below.
|
||||
#dataPool: ec-data-pool
|
||||
pool: replicapool
|
||||
|
||||
# (optional) mapOptions is a comma-separated list of map options.
|
||||
# For krbd options refer
|
||||
# https://docs.ceph.com/docs/master/man/8/rbd/#kernel-rbd-krbd-options
|
||||
# For nbd options refer
|
||||
# https://docs.ceph.com/docs/master/man/8/rbd-nbd/#options
|
||||
# mapOptions: lock_on_read,queue_depth=1024
|
||||
|
||||
# (optional) unmapOptions is a comma-separated list of unmap options.
|
||||
# For krbd options refer
|
||||
# https://docs.ceph.com/docs/master/man/8/rbd/#kernel-rbd-krbd-options
|
||||
# For nbd options refer
|
||||
# https://docs.ceph.com/docs/master/man/8/rbd-nbd/#options
|
||||
# unmapOptions: force
|
||||
|
||||
# (optional) Set it to true to encrypt each volume with encryption keys
|
||||
# from a key management system (KMS)
|
||||
# encrypted: "true"
|
||||
|
||||
# (optional) Use external key management system (KMS) for encryption key by
|
||||
# specifying a unique ID matching a KMS ConfigMap. The ID is only used for
|
||||
# correlation to configmap entry.
|
||||
# encryptionKMSID: <kms-config-id>
|
||||
|
||||
# RBD image format. Defaults to "2".
|
||||
imageFormat: "2"
|
||||
|
||||
# RBD image features
|
||||
# Available for imageFormat: "2". Older releases of CSI RBD
|
||||
# support only the `layering` feature. The Linux kernel (KRBD) supports the
|
||||
# full complement of features as of 5.4
|
||||
# `layering` alone corresponds to Ceph's bitfield value of "2" ;
|
||||
# `layering` + `fast-diff` + `object-map` + `deep-flatten` + `exclusive-lock` together
|
||||
# correspond to Ceph's OR'd bitfield value of "63". Here we use
|
||||
# a symbolic, comma-separated format:
|
||||
# For 5.4 or later kernels:
|
||||
#imageFeatures: layering,fast-diff,object-map,deep-flatten,exclusive-lock
|
||||
# For 5.3 or earlier kernels:
|
||||
imageFeatures: layering
|
||||
|
||||
# The secrets contain Ceph admin credentials. These are generated automatically by the operator
|
||||
# in the same namespace as the cluster.
|
||||
csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner
|
||||
csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph # namespace:cluster
|
||||
csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner
|
||||
csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph # namespace:cluster
|
||||
csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node
|
||||
csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph # namespace:cluster
|
||||
# Specify the filesystem type of the volume. If not specified, csi-provisioner
|
||||
# will set default as `ext4`. Note that `xfs` is not recommended due to potential deadlock
|
||||
# in hyperconverged settings where the volume is mounted on the same node as the osds.
|
||||
csi.storage.k8s.io/fstype: ext4
|
||||
# uncomment the following to use rbd-nbd as mounter on supported nodes
|
||||
# **IMPORTANT**: CephCSI v3.4.0 onwards a volume healer functionality is added to reattach
|
||||
# the PVC to application pod if nodeplugin pod restart.
|
||||
# Its still in Alpha support. Therefore, this option is not recommended for production use.
|
||||
#mounter: rbd-nbd
|
||||
allowVolumeExpansion: true
|
||||
reclaimPolicy: Delete
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
#################################################################################################################
|
||||
# Create a filesystem with settings for a test environment where only a single OSD is required.
|
||||
# kubectl create -f filesystem-test.yaml
|
||||
#################################################################################################################
|
||||
|
||||
apiVersion: ceph.rook.io/v1
|
||||
kind: CephFilesystem
|
||||
metadata:
|
||||
name: myfs
|
||||
namespace: rook-ceph
|
||||
spec:
|
||||
metadataPool:
|
||||
replicated:
|
||||
size: 1
|
||||
dataPools:
|
||||
- name: replicated
|
||||
replicated:
|
||||
size: 1
|
||||
preserveFilesystemOnDelete: true
|
||||
metadataServer:
|
||||
activeCount: 1
|
||||
activeStandby: true
|
||||
|
|
@ -0,0 +1,138 @@
|
|||
#################################################################################################################
|
||||
# Create a filesystem with settings with replication enabled for a production environment.
|
||||
# A minimum of 3 OSDs on different nodes are required in this example.
|
||||
# If one mds daemon per node is too restrictive, see the podAntiAffinity below.
|
||||
# kubectl create -f filesystem.yaml
|
||||
#################################################################################################################
|
||||
|
||||
apiVersion: ceph.rook.io/v1
|
||||
kind: CephFilesystem
|
||||
metadata:
|
||||
name: myfs
|
||||
namespace: rook-ceph # namespace:cluster
|
||||
spec:
|
||||
# The metadata pool spec. Must use replication.
|
||||
metadataPool:
|
||||
replicated:
|
||||
size: 3
|
||||
requireSafeReplicaSize: true
|
||||
parameters:
|
||||
# Inline compression mode for the data pool
|
||||
# Further reference: https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#inline-compression
|
||||
compression_mode:
|
||||
none
|
||||
# gives a hint (%) to Ceph in terms of expected consumption of the total cluster capacity of a given pool
|
||||
# for more info: https://docs.ceph.com/docs/master/rados/operations/placement-groups/#specifying-expected-pool-size
|
||||
#target_size_ratio: ".5"
|
||||
# The list of data pool specs. Can use replication or erasure coding.
|
||||
dataPools:
|
||||
- name: replicated
|
||||
failureDomain: host
|
||||
replicated:
|
||||
size: 3
|
||||
# Disallow setting pool with replica 1, this could lead to data loss without recovery.
|
||||
# Make sure you're *ABSOLUTELY CERTAIN* that is what you want
|
||||
requireSafeReplicaSize: true
|
||||
parameters:
|
||||
# Inline compression mode for the data pool
|
||||
# Further reference: https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#inline-compression
|
||||
compression_mode:
|
||||
none
|
||||
# gives a hint (%) to Ceph in terms of expected consumption of the total cluster capacity of a given pool
|
||||
# for more info: https://docs.ceph.com/docs/master/rados/operations/placement-groups/#specifying-expected-pool-size
|
||||
#target_size_ratio: ".5"
|
||||
# Whether to preserve filesystem after CephFilesystem CRD deletion
|
||||
preserveFilesystemOnDelete: true
|
||||
# The metadata service (mds) configuration
|
||||
metadataServer:
|
||||
# The number of active MDS instances
|
||||
activeCount: 1
|
||||
# Whether each active MDS instance will have an active standby with a warm metadata cache for faster failover.
|
||||
# If false, standbys will be available, but will not have a warm cache.
|
||||
activeStandby: true
|
||||
# The affinity rules to apply to the mds deployment
|
||||
placement:
|
||||
# nodeAffinity:
|
||||
# requiredDuringSchedulingIgnoredDuringExecution:
|
||||
# nodeSelectorTerms:
|
||||
# - matchExpressions:
|
||||
# - key: role
|
||||
# operator: In
|
||||
# values:
|
||||
# - mds-node
|
||||
# topologySpreadConstraints:
|
||||
# tolerations:
|
||||
# - key: mds-node
|
||||
# operator: Exists
|
||||
# podAffinity:
|
||||
podAntiAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
- labelSelector:
|
||||
matchExpressions:
|
||||
- key: app
|
||||
operator: In
|
||||
values:
|
||||
- rook-ceph-mds
|
||||
## Add this if you want to allow mds daemons for different filesystems to run on one
|
||||
## node. The value in "values" must match .metadata.name.
|
||||
# - key: rook_file_system
|
||||
# operator: In
|
||||
# values:
|
||||
# - myfs
|
||||
# topologyKey: kubernetes.io/hostname will place MDS across different hosts
|
||||
topologyKey: kubernetes.io/hostname
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
podAffinityTerm:
|
||||
labelSelector:
|
||||
matchExpressions:
|
||||
- key: app
|
||||
operator: In
|
||||
values:
|
||||
- rook-ceph-mds
|
||||
# topologyKey: */zone can be used to spread MDS across different AZ
|
||||
# Use <topologyKey: failure-domain.beta.kubernetes.io/zone> in k8s cluster if your cluster is v1.16 or lower
|
||||
# Use <topologyKey: topology.kubernetes.io/zone> in k8s cluster is v1.17 or upper
|
||||
topologyKey: topology.kubernetes.io/zone
|
||||
# A key/value list of annotations
|
||||
# annotations:
|
||||
# key: value
|
||||
# A key/value list of labels
|
||||
# labels:
|
||||
# key: value
|
||||
# resources:
|
||||
# The requests and limits set here, allow the filesystem MDS Pod(s) to use half of one CPU core and 1 gigabyte of memory
|
||||
# limits:
|
||||
# cpu: "500m"
|
||||
# memory: "1024Mi"
|
||||
# requests:
|
||||
# cpu: "500m"
|
||||
# memory: "1024Mi"
|
||||
priorityClassName: system-cluster-critical
|
||||
livenessProbe:
|
||||
disabled: false
|
||||
startupProbe:
|
||||
disabled: false
|
||||
# Filesystem mirroring settings
|
||||
# mirroring:
|
||||
# enabled: true
|
||||
# list of Kubernetes Secrets containing the peer token
|
||||
# for more details see: https://docs.ceph.com/en/latest/dev/cephfs-mirroring/#bootstrap-peers
|
||||
# Add the secret name if it already exists else specify the empty list here.
|
||||
# peers:
|
||||
#secretNames:
|
||||
#- secondary-cluster-peer
|
||||
# specify the schedule(s) on which snapshots should be taken
|
||||
# see the official syntax here https://docs.ceph.com/en/latest/cephfs/snap-schedule/#add-and-remove-schedules
|
||||
# snapshotSchedules:
|
||||
# - path: /
|
||||
# interval: 24h # daily snapshots
|
||||
# The startTime should be mentioned in the format YYYY-MM-DDTHH:MM:SS
|
||||
# If startTime is not specified, then by default the start time is considered as midnight UTC.
|
||||
# see usage here https://docs.ceph.com/en/latest/cephfs/snap-schedule/#usage
|
||||
# startTime: 2022-07-15T11:55:00
|
||||
# manage retention policies
|
||||
# see syntax duration here https://docs.ceph.com/en/latest/cephfs/snap-schedule/#add-and-remove-retention-policies
|
||||
# snapshotRetention:
|
||||
# - path: /
|
||||
# duration: "h 24"
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: csi-metrics
|
||||
namespace: rook-ceph
|
||||
labels:
|
||||
team: rook
|
||||
spec:
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- rook-ceph
|
||||
selector:
|
||||
matchLabels:
|
||||
app: csi-metrics
|
||||
endpoints:
|
||||
- port: csi-http-metrics
|
||||
path: /metrics
|
||||
interval: 5s
|
||||
# comment csi-grpc-metrics related information if csi grpc metrics is not enabled
|
||||
- port: csi-grpc-metrics
|
||||
path: /metrics
|
||||
interval: 5s
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
# Copied from /deploy/charts/rook-ceph-cluster/prometheus/, CR header added, and indentation increased on the groups
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
labels:
|
||||
prometheus: rook-prometheus
|
||||
role: alert-rules
|
||||
name: prometheus-ceph-rules
|
||||
namespace: rook-ceph
|
||||
spec:
|
||||
groups:
|
||||
- name: persistent-volume-alert.rules
|
||||
rules:
|
||||
- alert: PersistentVolumeUsageNearFull
|
||||
annotations:
|
||||
description: PVC {{ $labels.persistentvolumeclaim }} utilization has crossed 75%. Free up some space or expand the PVC.
|
||||
message: PVC {{ $labels.persistentvolumeclaim }} is nearing full. Data deletion or PVC expansion is required.
|
||||
severity_level: warning
|
||||
storage_type: ceph
|
||||
expr: |
|
||||
(kubelet_volume_stats_used_bytes * on (namespace,persistentvolumeclaim) group_left(storageclass, provisioner) (kube_persistentvolumeclaim_info * on (storageclass) group_left(provisioner) kube_storageclass_info {provisioner=~"(.*rbd.csi.ceph.com)|(.*cephfs.csi.ceph.com)"})) / (kubelet_volume_stats_capacity_bytes * on (namespace,persistentvolumeclaim) group_left(storageclass, provisioner) (kube_persistentvolumeclaim_info * on (storageclass) group_left(provisioner) kube_storageclass_info {provisioner=~"(.*rbd.csi.ceph.com)|(.*cephfs.csi.ceph.com)"})) > 0.75
|
||||
for: 5s
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PersistentVolumeUsageCritical
|
||||
annotations:
|
||||
description: PVC {{ $labels.persistentvolumeclaim }} utilization has crossed 85%. Free up some space or expand the PVC immediately.
|
||||
message: PVC {{ $labels.persistentvolumeclaim }} is critically full. Data deletion or PVC expansion is required.
|
||||
severity_level: error
|
||||
storage_type: ceph
|
||||
expr: |
|
||||
(kubelet_volume_stats_used_bytes * on (namespace,persistentvolumeclaim) group_left(storageclass, provisioner) (kube_persistentvolumeclaim_info * on (storageclass) group_left(provisioner) kube_storageclass_info {provisioner=~"(.*rbd.csi.ceph.com)|(.*cephfs.csi.ceph.com)"})) / (kubelet_volume_stats_capacity_bytes * on (namespace,persistentvolumeclaim) group_left(storageclass, provisioner) (kube_persistentvolumeclaim_info * on (storageclass) group_left(provisioner) kube_storageclass_info {provisioner=~"(.*rbd.csi.ceph.com)|(.*cephfs.csi.ceph.com)"})) > 0.85
|
||||
for: 5s
|
||||
labels:
|
||||
severity: critical
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
apiVersion: keda.sh/v1alpha1
|
||||
kind: ScaledObject
|
||||
metadata:
|
||||
name: rgw-scale
|
||||
namespace: rook-ceph
|
||||
spec:
|
||||
scaleTargetRef:
|
||||
kind: Deployment
|
||||
name: rook-ceph-rgw-my-store-a
|
||||
minReplicaCount: 1
|
||||
maxReplicaCount: 5
|
||||
triggers:
|
||||
- type: prometheus
|
||||
metadata:
|
||||
serverAddress: http://rook-prometheus.rook-ceph.svc:9090
|
||||
metricName: ceph_rgw_put_collector
|
||||
query: |
|
||||
sum(rate(ceph_rgw_put[2m]))
|
||||
threshold: "90"
|
||||
|
|
@ -0,0 +1,846 @@
|
|||
# Copied from /deploy/charts/rook-ceph-cluster/prometheus/, CR header added, and indentation increased on the groups
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
labels:
|
||||
prometheus: rook-prometheus
|
||||
role: alert-rules
|
||||
name: prometheus-ceph-rules
|
||||
namespace: rook-ceph
|
||||
spec:
|
||||
groups:
|
||||
- name: cluster health
|
||||
rules:
|
||||
- alert: CephHealthError
|
||||
expr: ceph_health_status == 2
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.2.1
|
||||
annotations:
|
||||
summary: Cluster is in the ERROR state
|
||||
description: >
|
||||
The cluster state has been HEALTH_ERROR for more than 5 minutes.
|
||||
Please check "ceph health detail" for more information.
|
||||
|
||||
- alert: CephHealthWarning
|
||||
expr: ceph_health_status == 1
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
annotations:
|
||||
summary: Cluster is in the WARNING state
|
||||
description: >
|
||||
The cluster state has been HEALTH_WARN for more than 15 minutes.
|
||||
Please check "ceph health detail" for more information.
|
||||
|
||||
- name: mon
|
||||
rules:
|
||||
- alert: CephMonDownQuorumAtRisk
|
||||
expr: ((ceph_health_detail{name="MON_DOWN"} == 1) * on() (count(ceph_mon_quorum_status == 1) == bool (floor(count(ceph_mon_metadata) / 2) + 1))) == 1
|
||||
for: 30s
|
||||
labels:
|
||||
severity: critical
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.3.1
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-down
|
||||
summary: Monitor quorum is at risk
|
||||
description: |
|
||||
{{ $min := query "floor(count(ceph_mon_metadata) / 2) +1" | first | value }}Quorum requires a majority of monitors (x {{ $min }}) to be active
|
||||
Without quorum the cluster will become inoperable, affecting all services and connected clients.
|
||||
|
||||
The following monitors are down:
|
||||
{{- range query "(ceph_mon_quorum_status == 0) + on(ceph_daemon) group_left(hostname) (ceph_mon_metadata * 0)" }}
|
||||
- {{ .Labels.ceph_daemon }} on {{ .Labels.hostname }}
|
||||
{{- end }}
|
||||
- alert: CephMonDown
|
||||
expr: (count(ceph_mon_quorum_status == 0) <= (count(ceph_mon_metadata) - floor(count(ceph_mon_metadata) / 2) + 1))
|
||||
for: 30s
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-down
|
||||
summary: One or more monitors down
|
||||
description: |
|
||||
{{ $down := query "count(ceph_mon_quorum_status == 0)" | first | value }}{{ $s := "" }}{{ if gt $down 1.0 }}{{ $s = "s" }}{{ end }}There are {{ $down }} monitor{{ $s }} down.
|
||||
Quorum is still intact, but the loss of an additional monitor will make your cluster inoperable.
|
||||
|
||||
The following monitors are down:
|
||||
{{- range query "(ceph_mon_quorum_status == 0) + on(ceph_daemon) group_left(hostname) (ceph_mon_metadata * 0)" }}
|
||||
- {{ .Labels.ceph_daemon }} on {{ .Labels.hostname }}
|
||||
{{- end }}
|
||||
- alert: CephMonDiskspaceCritical
|
||||
expr: ceph_health_detail{name="MON_DISK_CRIT"} == 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.3.2
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-disk-crit
|
||||
summary: Filesystem space on at least one monitor is critically low
|
||||
description: |
|
||||
The free space available to a monitor's store is critically low.
|
||||
You should increase the space available to the monitor(s). The default directory
|
||||
is /var/lib/ceph/mon-*/data/store.db on traditional deployments, and under
|
||||
/var/lib/rook/mon-*/data/store.db on the mon pod's worker node for Rook.
|
||||
Look for old, rotated versions of *.log and MANIFEST*. Do NOT touch any *.sst files.
|
||||
Also check any other directories under /var/lib/rook and other directories on the
|
||||
same filesystem, often /var/log and /var/tmp are culprits. Your monitor hosts are;
|
||||
{{- range query "ceph_mon_metadata"}}
|
||||
- {{ .Labels.hostname }}
|
||||
{{- end }}
|
||||
|
||||
- alert: CephMonDiskspaceLow
|
||||
expr: ceph_health_detail{name="MON_DISK_LOW"} == 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-disk-low
|
||||
summary: Disk space on at least one monitor is approaching full
|
||||
description: |
|
||||
The space available to a monitor's store is approaching full (>70% is the default).
|
||||
You should increase the space available to the monitor(s). The default directory
|
||||
is /var/lib/ceph/mon-*/data/store.db on traditional deployments, and under
|
||||
/var/lib/rook/mon-*/data/store.db on the mon pod's worker node for Rook.
|
||||
Look for old, rotated versions of *.log and MANIFEST*. Do NOT touch any *.sst files.
|
||||
Also check any other directories under /var/lib/rook and other directories on the
|
||||
same filesystem, often /var/log and /var/tmp are culprits. Your monitor hosts are;
|
||||
{{- range query "ceph_mon_metadata"}}
|
||||
- {{ .Labels.hostname }}
|
||||
{{- end }}
|
||||
|
||||
- alert: CephMonClockSkew
|
||||
expr: ceph_health_detail{name="MON_CLOCK_SKEW"} == 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-clock-skew
|
||||
summary: Clock skew detected among monitors
|
||||
description: |
|
||||
Ceph monitors rely on closely synchronized time to maintain
|
||||
quorum and cluster consistency. This event indicates that time on at least
|
||||
one mon has drifted too far from the lead mon.
|
||||
|
||||
Review cluster status with ceph -s. This will show which monitors
|
||||
are affected. Check the time sync status on each monitor host with
|
||||
"ceph time-sync-status" and the state and peers of your ntpd or chrony daemon.
|
||||
|
||||
- name: osd
|
||||
rules:
|
||||
- alert: CephOSDDownHigh
|
||||
expr: count(ceph_osd_up == 0) / count(ceph_osd_up) * 100 >= 10
|
||||
labels:
|
||||
severity: critical
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.4.1
|
||||
annotations:
|
||||
summary: More than 10% of OSDs are down
|
||||
description: |
|
||||
{{ $value | humanize }}% or {{ with query "count(ceph_osd_up == 0)" }}{{ . | first | value }}{{ end }} of {{ with query "count(ceph_osd_up)" }}{{ . | first | value }}{{ end }} OSDs are down (>= 10%).
|
||||
|
||||
The following OSDs are down:
|
||||
{{- range query "(ceph_osd_up * on(ceph_daemon) group_left(hostname) ceph_osd_metadata) == 0" }}
|
||||
- {{ .Labels.ceph_daemon }} on {{ .Labels.hostname }}
|
||||
{{- end }}
|
||||
- alert: CephOSDHostDown
|
||||
expr: ceph_health_detail{name="OSD_HOST_DOWN"} == 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.4.8
|
||||
annotations:
|
||||
summary: An OSD host is offline
|
||||
description: |
|
||||
The following OSDs are down:
|
||||
{{- range query "(ceph_osd_up * on(ceph_daemon) group_left(hostname) ceph_osd_metadata) == 0" }}
|
||||
- {{ .Labels.hostname }} : {{ .Labels.ceph_daemon }}
|
||||
{{- end }}
|
||||
- alert: CephOSDDown
|
||||
expr: ceph_health_detail{name="OSD_DOWN"} == 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.4.2
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-down
|
||||
summary: An OSD has been marked down
|
||||
description: |
|
||||
{{ $num := query "count(ceph_osd_up == 0)" | first | value }}{{ $s := "" }}{{ if gt $num 1.0 }}{{ $s = "s" }}{{ end }}{{ $num }} OSD{{ $s }} down for over 5mins.
|
||||
|
||||
The following OSD{{ $s }} {{ if eq $s "" }}is{{ else }}are{{ end }} down:
|
||||
{{- range query "(ceph_osd_up * on(ceph_daemon) group_left(hostname) ceph_osd_metadata) == 0"}}
|
||||
- {{ .Labels.ceph_daemon }} on {{ .Labels.hostname }}
|
||||
{{- end }}
|
||||
|
||||
- alert: CephOSDNearFull
|
||||
expr: ceph_health_detail{name="OSD_NEARFULL"} == 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.4.3
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-nearfull
|
||||
summary: OSD(s) running low on free space (NEARFULL)
|
||||
description: |
|
||||
One or more OSDs have reached the NEARFULL threshold
|
||||
|
||||
Use 'ceph health detail' and 'ceph osd df' to identify the problem.
|
||||
To resolve, add capacity to the affected OSD's failure domain, restore down/out OSDs, or delete unwanted data.
|
||||
- alert: CephOSDFull
|
||||
expr: ceph_health_detail{name="OSD_FULL"} > 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.4.6
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-full
|
||||
summary: OSD full, writes blocked
|
||||
description: |
|
||||
An OSD has reached the FULL threshold. Writes to pools that share the
|
||||
affected OSD will be blocked.
|
||||
|
||||
Use 'ceph health detail' and 'ceph osd df' to identify the problem.
|
||||
To resolve, add capacity to the affected OSD's failure domain, restore down/out OSDs, or delete unwanted data.
|
||||
- alert: CephOSDBackfillFull
|
||||
expr: ceph_health_detail{name="OSD_BACKFILLFULL"} > 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-backfillfull
|
||||
summary: OSD(s) too full for backfill operations
|
||||
description: |
|
||||
An OSD has reached the BACKFILL FULL threshold. This will prevent rebalance operations
|
||||
from completing.
|
||||
Use 'ceph health detail' and 'ceph osd df' to identify the problem.
|
||||
|
||||
To resolve, add capacity to the affected OSD's failure domain, restore down/out OSDs, or delete unwanted data.
|
||||
- alert: CephOSDTooManyRepairs
|
||||
expr: ceph_health_detail{name="OSD_TOO_MANY_REPAIRS"} == 1
|
||||
for: 30s
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-too-many-repairs
|
||||
summary: OSD reports a high number of read errors
|
||||
description: |
|
||||
Reads from an OSD have used a secondary PG to return data to the client, indicating
|
||||
a potential failing disk.
|
||||
- alert: CephOSDTimeoutsPublicNetwork
|
||||
expr: ceph_health_detail{name="OSD_SLOW_PING_TIME_FRONT"} == 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
annotations:
|
||||
summary: Network issues delaying OSD heartbeats (public network)
|
||||
description: |
|
||||
OSD heartbeats on the cluster's 'public' network (frontend) are running slow. Investigate the network
|
||||
for latency or loss issues. Use 'ceph health detail' to show the affected OSDs.
|
||||
- alert: CephOSDTimeoutsClusterNetwork
|
||||
expr: ceph_health_detail{name="OSD_SLOW_PING_TIME_BACK"} == 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
annotations:
|
||||
summary: Network issues delaying OSD heartbeats (cluster network)
|
||||
description: |
|
||||
OSD heartbeats on the cluster's 'cluster' network (backend) are running slow. Investigate the network
|
||||
for latency or loss issues. Use 'ceph health detail' to show the affected OSDs.
|
||||
- alert: CephOSDInternalDiskSizeMismatch
|
||||
expr: ceph_health_detail{name="BLUESTORE_DISK_SIZE_MISMATCH"} == 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#bluestore-disk-size-mismatch
|
||||
summary: OSD size inconsistency error
|
||||
description: |
|
||||
One or more OSDs have an internal inconsistency between metadata and the size of the device.
|
||||
This could lead to the OSD(s) crashing in future. You should redeploy the affected OSDs.
|
||||
- alert: CephDeviceFailurePredicted
|
||||
expr: ceph_health_detail{name="DEVICE_HEALTH"} == 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#id2
|
||||
summary: Device(s) predicted to fail soon
|
||||
description: |
|
||||
The device health module has determined that one or more devices will fail
|
||||
soon. To review device status use 'ceph device ls'. To show a specific
|
||||
device use 'ceph device info <dev id>'.
|
||||
|
||||
Mark the OSD out so that data may migrate to other OSDs. Once
|
||||
the OSD has drained, destroy the OSD, replace the device, and redeploy the OSD.
|
||||
- alert: CephDeviceFailurePredictionTooHigh
|
||||
expr: ceph_health_detail{name="DEVICE_HEALTH_TOOMANY"} == 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.4.7
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#device-health-toomany
|
||||
summary: Too many devices are predicted to fail, unable to resolve
|
||||
description: |
|
||||
The device health module has determined that devices predicted to
|
||||
fail can not be remediated automatically, since too many OSDs would be removed from the
|
||||
cluster to ensure performance and availabililty. Prevent data
|
||||
integrity issues by adding new OSDs so that data may be relocated.
|
||||
- alert: CephDeviceFailureRelocationIncomplete
|
||||
expr: ceph_health_detail{name="DEVICE_HEALTH_IN_USE"} == 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#device-health-in-use
|
||||
summary: Device failure is predicted, but unable to relocate data
|
||||
description: |
|
||||
The device health module has determined that one or more devices will fail
|
||||
soon, but the normal process of relocating the data on the device to other
|
||||
OSDs in the cluster is blocked.
|
||||
|
||||
Ensure that the cluster has available free space. It may be necessary to add
|
||||
capacity to the cluster to allow the data from the failing device to
|
||||
successfully migrate, or to enable the balancer.
|
||||
|
||||
- alert: CephOSDFlapping
|
||||
expr: |
|
||||
(
|
||||
rate(ceph_osd_up[5m])
|
||||
* on(ceph_daemon) group_left(hostname) ceph_osd_metadata
|
||||
) * 60 > 1
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.4.4
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/troubleshooting/troubleshooting-osd#flapping-osds
|
||||
summary: Network issues are causing OSDs to flap (mark each other down)
|
||||
description: >
|
||||
OSD {{ $labels.ceph_daemon }} on {{ $labels.hostname }} was
|
||||
marked down and back up {{ $value | humanize }} times once a
|
||||
minute for 5 minutes. This may indicate a network issue (latency,
|
||||
packet loss, MTU mismatch) on the cluster network, or the public network if no cluster network
|
||||
is deployed. Check network stats on the listed host(s).
|
||||
|
||||
- alert: CephOSDReadErrors
|
||||
expr: ceph_health_detail{name="BLUESTORE_SPURIOUS_READ_ERRORS"} == 1
|
||||
for: 30s
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#bluestore-spurious-read-errors
|
||||
summary: Device read errors detected
|
||||
description: >
|
||||
An OSD has encountered read errors, but the OSD has recovered by retrying
|
||||
the reads. This may indicate an issue with hardware or the kernel.
|
||||
# alert on high deviation from average PG count
|
||||
- alert: CephPGImbalance
|
||||
expr: |
|
||||
abs(
|
||||
(
|
||||
(ceph_osd_numpg > 0) - on (job) group_left avg(ceph_osd_numpg > 0) by (job)
|
||||
) / on (job) group_left avg(ceph_osd_numpg > 0) by (job)
|
||||
) * on (ceph_daemon) group_left(hostname) ceph_osd_metadata > 0.30
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.4.5
|
||||
annotations:
|
||||
summary: PGs are not balanced across OSDs
|
||||
description: >
|
||||
OSD {{ $labels.ceph_daemon }} on {{ $labels.hostname }} deviates
|
||||
by more than 30% from average PG count.
|
||||
# alert on high commit latency...but how high is too high
|
||||
|
||||
- name: mds
|
||||
rules:
|
||||
- alert: CephFilesystemDamaged
|
||||
expr: ceph_health_detail{name="MDS_DAMAGE"} > 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.5.1
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/cephfs/health-messages#cephfs-health-messages
|
||||
summary: CephFS filesystem is damaged.
|
||||
description: >
|
||||
Filesystem metadata has been corrupted. Data may be inaccessible.
|
||||
Analyze metrics from the MDS daemon admin socket, or
|
||||
escalate to support.
|
||||
- alert: CephFilesystemOffline
|
||||
expr: ceph_health_detail{name="MDS_ALL_DOWN"} > 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.5.3
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/cephfs/health-messages/#mds-all-down
|
||||
summary: CephFS filesystem is offline
|
||||
description: >
|
||||
All MDS ranks are unavailable. The MDS daemons managing metadata
|
||||
are down, rendering the filesystem offline.
|
||||
- alert: CephFilesystemDegraded
|
||||
expr: ceph_health_detail{name="FS_DEGRADED"} > 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.5.4
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/cephfs/health-messages/#fs-degraded
|
||||
summary: CephFS filesystem is degraded
|
||||
description: >
|
||||
One or more metadata daemons (MDS ranks) are failed or in a
|
||||
damaged state. At best the filesystem is partially available,
|
||||
at worst the filesystem is completely unusable.
|
||||
- alert: CephFilesystemMDSRanksLow
|
||||
expr: ceph_health_detail{name="MDS_UP_LESS_THAN_MAX"} > 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/cephfs/health-messages/#mds-up-less-than-max
|
||||
summary: MDS daemon count is lower than configured
|
||||
description: >
|
||||
The filesystem's "max_mds" setting defines the number of MDS ranks in
|
||||
the filesystem. The current number of active MDS daemons is less than
|
||||
this value.
|
||||
- alert: CephFilesystemInsufficientStandby
|
||||
expr: ceph_health_detail{name="MDS_INSUFFICIENT_STANDBY"} > 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/cephfs/health-messages/#mds-insufficient-standby
|
||||
summary: Ceph filesystem standby daemons too few
|
||||
description: >
|
||||
The minimum number of standby daemons required by standby_count_wanted
|
||||
is less than the current number of standby daemons. Adjust the standby count
|
||||
or increase the number of MDS daemons.
|
||||
- alert: CephFilesystemFailureNoStandby
|
||||
expr: ceph_health_detail{name="FS_WITH_FAILED_MDS"} > 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.5.5
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/cephfs/health-messages/#fs-with-failed-mds
|
||||
summary: MDS daemon failed, no further standby available
|
||||
description: >
|
||||
An MDS daemon has failed, leaving only one active rank and no
|
||||
available standby. Investigate the cause of the failure or add a
|
||||
standby MDS.
|
||||
- alert: CephFilesystemReadOnly
|
||||
expr: ceph_health_detail{name="MDS_HEALTH_READ_ONLY"} > 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.5.2
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/cephfs/health-messages#cephfs-health-messages
|
||||
summary: CephFS filesystem in read only mode due to write error(s)
|
||||
description: >
|
||||
The filesystem has switched to READ ONLY due to an unexpected
|
||||
error when writing to the metadata pool.
|
||||
|
||||
Analyze the output from the MDS daemon admin socket, or
|
||||
escalate to support.
|
||||
|
||||
- name: mgr
|
||||
rules:
|
||||
- alert: CephMgrModuleCrash
|
||||
expr: ceph_health_detail{name="RECENT_MGR_MODULE_CRASH"} == 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.6.1
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#recent-mgr-module-crash
|
||||
summary: A manager module has recently crashed
|
||||
description: >
|
||||
One or more mgr modules have crashed and have yet to be acknowledged by an administrator. A
|
||||
crashed module may impact functionality within the cluster. Use the 'ceph crash' command to
|
||||
determine which module has failed, and archive it to acknowledge the failure.
|
||||
- alert: CephMgrPrometheusModuleInactive
|
||||
expr: up{job="ceph"} == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.6.2
|
||||
annotations:
|
||||
summary: The mgr/prometheus module is not available
|
||||
description: >
|
||||
The mgr/prometheus module at {{ $labels.instance }} is unreachable. This
|
||||
could mean that the module has been disabled or the mgr daemon itself is down.
|
||||
|
||||
Without the mgr/prometheus module metrics and alerts will no longer
|
||||
function. Open a shell to an admin node or toolbox pod and use 'ceph -s' to to determine whether the
|
||||
mgr is active. If the mgr is not active, restart it, otherwise you can determine
|
||||
the mgr/prometheus module status with 'ceph mgr module ls'. If it is
|
||||
not listed as enabled, enable it with 'ceph mgr module enable prometheus'.
|
||||
|
||||
- name: pgs
|
||||
rules:
|
||||
- alert: CephPGsInactive
|
||||
expr: ceph_pool_metadata * on(pool_id,instance) group_left() (ceph_pg_total - ceph_pg_active) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.7.1
|
||||
annotations:
|
||||
summary: One or more placement groups are inactive
|
||||
description: >
|
||||
{{ $value }} PGs have been inactive for more than 5 minutes in pool {{ $labels.name }}.
|
||||
Inactive placement groups are not able to serve read/write requests.
|
||||
- alert: CephPGsUnclean
|
||||
expr: ceph_pool_metadata * on(pool_id,instance) group_left() (ceph_pg_total - ceph_pg_clean) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.7.2
|
||||
annotations:
|
||||
summary: One or more placement groups are marked unclean
|
||||
description: >
|
||||
{{ $value }} PGs have been unclean for more than 15 minutes in pool {{ $labels.name }}.
|
||||
Unclean PGs have not recovered from a previous failure.
|
||||
- alert: CephPGsDamaged
|
||||
expr: ceph_health_detail{name=~"PG_DAMAGED|OSD_SCRUB_ERRORS"} == 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.7.4
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-damaged
|
||||
summary: Placement group damaged; manual intervention needed
|
||||
description: >
|
||||
Scrubs have flagged at least one PG as damaged or inconsistent.
|
||||
|
||||
Check to see which PG is affected, and attempt a manual repair if necessary. To list
|
||||
problematic placement groups, use 'ceph health detail' or 'rados list-inconsistent-pg <pool>'. To repair PGs use
|
||||
the 'ceph pg repair <pg_num>' command.
|
||||
- alert: CephPGRecoveryAtRisk
|
||||
expr: ceph_health_detail{name="PG_RECOVERY_FULL"} == 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.7.5
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-recovery-full
|
||||
summary: OSDs are too full for recovery
|
||||
description: >
|
||||
Data redundancy is at risk since one or more OSDs are at or above the
|
||||
'full' threshold. Add capacity to the cluster, restore down/out OSDs, or delete unwanted data.
|
||||
- alert: CephPGUnavailableBlockingIO
|
||||
# PG_AVAILABILITY, but an OSD is not in a DOWN state
|
||||
expr: ((ceph_health_detail{name="PG_AVAILABILITY"} == 1) - scalar(ceph_health_detail{name="OSD_DOWN"})) == 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.7.3
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-availability
|
||||
summary: PG is unavailable, blocking I/O
|
||||
description: >
|
||||
Data availability is reduced, impacting the cluster's ability to service I/O. One or
|
||||
more placement groups (PGs) are in a state that blocks I/O.
|
||||
- alert: CephPGBackfillAtRisk
|
||||
expr: ceph_health_detail{name="PG_BACKFILL_FULL"} == 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.7.6
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-backfill-full
|
||||
summary: Backfill operations are blocked due to lack of free space
|
||||
description: >
|
||||
Data redundancy may be at risk due to lack of free space within the cluster. One or more OSDs
|
||||
have breached their 'backfillfull' threshold. Add more capacity, or delete unwanted data.
|
||||
- alert: CephPGNotScrubbed
|
||||
expr: ceph_health_detail{name="PG_NOT_SCRUBBED"} == 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-not-scrubbed
|
||||
summary: Placement group(s) have not been scrubbed
|
||||
description: |
|
||||
One or more PGs have not been scrubbed recently. Scrubs check metadata integrity,
|
||||
protecting against bit-rot. They check that metadata
|
||||
is consistent across data replicas. When PGs miss their scrub interval, it may
|
||||
indicate that the scrub window is too small, or PGs were not in a 'clean' state during the
|
||||
scrub window.
|
||||
|
||||
You can manually initiate a scrub with: ceph pg scrub <pgid>
|
||||
- alert: CephPGsHighPerOSD
|
||||
expr: ceph_health_detail{name="TOO_MANY_PGS"} == 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks/#too-many-pgs
|
||||
summary: Placement groups per OSD is too high
|
||||
description: |
|
||||
The number of placement groups per OSD is too high (exceeds the mon_max_pg_per_osd setting).
|
||||
|
||||
Check that the pg_autoscaler has not been disabled for any pools with 'ceph osd pool autoscale-status',
|
||||
and that the profile selected is appropriate. You may also adjust the target_size_ratio of a pool to guide
|
||||
the autoscaler based on the expected relative size of the pool
|
||||
('ceph osd pool set cephfs.cephfs.meta target_size_ratio .1') or set the pg_autoscaler
|
||||
mode to "warn" and adjust pg_num appropriately for one or more pools.
|
||||
- alert: CephPGNotDeepScrubbed
|
||||
expr: ceph_health_detail{name="PG_NOT_DEEP_SCRUBBED"} == 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-not-deep-scrubbed
|
||||
summary: Placement group(s) have not been deep scrubbed
|
||||
description: |
|
||||
One or more PGs have not been deep scrubbed recently. Deep scrubs
|
||||
protect against bit-rot. They compare data
|
||||
replicas to ensure consistency. When PGs miss their deep scrub interval, it may indicate
|
||||
that the window is too small or PGs were not in a 'clean' state during the deep-scrub
|
||||
window.
|
||||
|
||||
You can manually initiate a deep scrub with: ceph pg deep-scrub <pgid>
|
||||
|
||||
- name: nodes
|
||||
rules:
|
||||
- alert: CephNodeRootFilesystemFull
|
||||
expr: node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"} * 100 < 5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.8.1
|
||||
annotations:
|
||||
summary: Root filesystem is dangerously full
|
||||
description: >
|
||||
Root volume is dangerously full: {{ $value | humanize }}% free.
|
||||
|
||||
# alert on packet errors and drop rate
|
||||
- alert: CephNodeNetworkPacketDrops
|
||||
expr: |
|
||||
(
|
||||
increase(node_network_receive_drop_total{device!="lo"}[1m]) +
|
||||
increase(node_network_transmit_drop_total{device!="lo"}[1m])
|
||||
) / (
|
||||
increase(node_network_receive_packets_total{device!="lo"}[1m]) +
|
||||
increase(node_network_transmit_packets_total{device!="lo"}[1m])
|
||||
) >= 0.0001 or (
|
||||
increase(node_network_receive_drop_total{device!="lo"}[1m]) +
|
||||
increase(node_network_transmit_drop_total{device!="lo"}[1m])
|
||||
) >= 10
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.8.2
|
||||
annotations:
|
||||
summary: One or more NICs reports packet drops
|
||||
description: >
|
||||
Node {{ $labels.instance }} experiences packet drop > 0.01% or >
|
||||
10 packets/s on interface {{ $labels.device }}.
|
||||
|
||||
- alert: CephNodeNetworkPacketErrors
|
||||
expr: |
|
||||
(
|
||||
increase(node_network_receive_errs_total{device!="lo"}[1m]) +
|
||||
increase(node_network_transmit_errs_total{device!="lo"}[1m])
|
||||
) / (
|
||||
increase(node_network_receive_packets_total{device!="lo"}[1m]) +
|
||||
increase(node_network_transmit_packets_total{device!="lo"}[1m])
|
||||
) >= 0.0001 or (
|
||||
increase(node_network_receive_errs_total{device!="lo"}[1m]) +
|
||||
increase(node_network_transmit_errs_total{device!="lo"}[1m])
|
||||
) >= 10
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.8.3
|
||||
annotations:
|
||||
summary: One or more NICs reports packet errors
|
||||
description: >
|
||||
Node {{ $labels.instance }} experiences packet errors > 0.01% or
|
||||
> 10 packets/s on interface {{ $labels.device }}.
|
||||
|
||||
# Restrict to device names beginning with '/' to skip false alarms from
|
||||
# tmpfs, overlay type filesystems
|
||||
- alert: CephNodeDiskspaceWarning
|
||||
expr: |
|
||||
predict_linear(node_filesystem_free_bytes{device=~"/.*"}[2d], 3600 * 24 * 5) *
|
||||
on(instance) group_left(nodename) node_uname_info < 0
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.8.4
|
||||
annotations:
|
||||
summary: Host filesystem free space is low
|
||||
description: >
|
||||
Mountpoint {{ $labels.mountpoint }} on {{ $labels.nodename }}
|
||||
will be full in less than 5 days based on the 48 hour trailing
|
||||
fill rate.
|
||||
- alert: CephNodeInconsistentMTU
|
||||
expr: node_network_mtu_bytes{device!="lo"} * (node_network_up{device!="lo"} > 0) != on() group_left() (quantile(0.5, node_network_mtu_bytes{device!="lo"}))
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
annotations:
|
||||
summary: MTU settings across hosts are inconsistent
|
||||
description: >
|
||||
Node {{ $labels.instance }} has a different MTU size ({{ $value }})
|
||||
than the median value on device {{ $labels.device }}.
|
||||
|
||||
- name: pools
|
||||
rules:
|
||||
- alert: CephPoolGrowthWarning
|
||||
expr: |
|
||||
(predict_linear((max(ceph_pool_percent_used) without (pod, instance))[2d:1h], 3600 * 24 * 5) * on(pool_id)
|
||||
group_right ceph_pool_metadata) >= 95
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.9.2
|
||||
annotations:
|
||||
summary: Pool growth rate may soon exceed capacity
|
||||
description: >
|
||||
Pool '{{ $labels.name }}' will be full in less than 5 days
|
||||
assuming the average fill-up rate of the past 48 hours.
|
||||
- alert: CephPoolBackfillFull
|
||||
expr: ceph_health_detail{name="POOL_BACKFILLFULL"} > 0
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
annotations:
|
||||
summary: Free space in a pool is too low for recovery/backfill
|
||||
description: >
|
||||
A pool is approaching the near full threshold, which will
|
||||
prevent recovery/backfill from completing.
|
||||
Consider adding more capacity.
|
||||
- alert: CephPoolFull
|
||||
expr: ceph_health_detail{name="POOL_FULL"} > 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.9.1
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#pool-full
|
||||
summary: Pool is full - writes are blocked
|
||||
description: |
|
||||
A pool has reached its MAX quota, or OSDs supporting the pool
|
||||
have reached the FULL threshold. Until this is resolved, writes to
|
||||
the pool will be blocked.
|
||||
Pool Breakdown (top 5)
|
||||
{{- range query "topk(5, sort_desc(ceph_pool_percent_used * on(pool_id) group_right ceph_pool_metadata))" }}
|
||||
- {{ .Labels.name }} at {{ .Value }}%
|
||||
{{- end }}
|
||||
Increase the pool's quota, or add capacity to the cluster
|
||||
then increase the pool's quota (e.g. ceph osd pool set quota <pool_name> max_bytes <bytes>)
|
||||
- alert: CephPoolNearFull
|
||||
expr: ceph_health_detail{name="POOL_NEAR_FULL"} > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
annotations:
|
||||
summary: One or more Ceph pools are nearly full
|
||||
description: |
|
||||
A pool has exceeded the warning (percent full) threshold, or OSDs
|
||||
supporting the pool have reached the NEARFULL threshold. Writes may
|
||||
continue, but you are at risk of the pool going read-only if more capacity
|
||||
isn't made available.
|
||||
|
||||
Determine the affected pool with 'ceph df detail', looking
|
||||
at QUOTA BYTES and STORED. Increase the pool's quota, or add
|
||||
capacity to the cluster then increase the pool's quota
|
||||
(e.g. ceph osd pool set quota <pool_name> max_bytes <bytes>).
|
||||
Also ensure that the balancer is active.
|
||||
- name: healthchecks
|
||||
rules:
|
||||
- alert: CephSlowOps
|
||||
expr: ceph_healthcheck_slow_ops > 0
|
||||
for: 30s
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#slow-ops
|
||||
summary: OSD operations are slow to complete
|
||||
description: >
|
||||
{{ $value }} OSD requests are taking too long to process (osd_op_complaint_time exceeded)
|
||||
|
||||
# Object related events
|
||||
- name: rados
|
||||
rules:
|
||||
- alert: CephObjectMissing
|
||||
expr: (ceph_health_detail{name="OBJECT_UNFOUND"} == 1) * on() (count(ceph_osd_up == 1) == bool count(ceph_osd_metadata)) == 1
|
||||
for: 30s
|
||||
labels:
|
||||
severity: critical
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.10.1
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#object-unfound
|
||||
summary: Object(s) marked UNFOUND
|
||||
description: |
|
||||
The latest version of a RADOS object can not be found, even though all OSDs are up. I/O
|
||||
requests for this object from clients will block (hang). Resolving this issue may
|
||||
require the object to be rolled back to a prior version manually, and manually verified.
|
||||
# Generic
|
||||
- name: generic
|
||||
rules:
|
||||
- alert: CephDaemonCrash
|
||||
expr: ceph_health_detail{name="RECENT_CRASH"} == 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
type: ceph_default
|
||||
oid: 1.3.6.1.4.1.50495.1.2.1.1.2
|
||||
annotations:
|
||||
documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks/#recent-crash
|
||||
summary: One or more Ceph daemons have crashed, and are pending acknowledgement
|
||||
description: |
|
||||
One or more daemons have crashed recently, and need to be acknowledged. This notification
|
||||
ensures that software crashes do not go unseen. To acknowledge a crash, use the
|
||||
'ceph crash archive <id>' command.
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: rook-prometheus
|
||||
namespace: rook-ceph
|
||||
spec:
|
||||
type: NodePort
|
||||
ports:
|
||||
- name: web
|
||||
nodePort: 30900
|
||||
port: 9090
|
||||
protocol: TCP
|
||||
targetPort: web
|
||||
selector:
|
||||
prometheus: rook-prometheus
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: rook-ceph
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: prometheus
|
||||
aggregationRule:
|
||||
clusterRoleSelectors:
|
||||
- matchLabels:
|
||||
rbac.ceph.rook.io/aggregate-to-prometheus: "true"
|
||||
rules: []
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: prometheus-rules
|
||||
labels:
|
||||
rbac.ceph.rook.io/aggregate-to-prometheus: "true"
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- nodes
|
||||
- services
|
||||
- endpoints
|
||||
- pods
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- configmaps
|
||||
verbs: ["get"]
|
||||
- nonResourceURLs: ["/metrics"]
|
||||
verbs: ["get"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: prometheus
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: prometheus
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: prometheus
|
||||
namespace: rook-ceph
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: Prometheus
|
||||
metadata:
|
||||
name: rook-prometheus
|
||||
namespace: rook-ceph
|
||||
labels:
|
||||
prometheus: rook-prometheus
|
||||
spec:
|
||||
serviceAccountName: prometheus
|
||||
serviceMonitorSelector:
|
||||
matchLabels:
|
||||
team: rook
|
||||
ruleSelector:
|
||||
matchLabels:
|
||||
role: alert-rules
|
||||
prometheus: rook-prometheus
|
||||
resources:
|
||||
requests:
|
||||
memory: 400Mi
|
||||
|
|
@ -0,0 +1,113 @@
|
|||
---
|
||||
# OLM: BEGIN ROLE
|
||||
# Aspects for creation of monitoring resources
|
||||
kind: Role
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: rook-ceph-monitor
|
||||
namespace: rook-ceph
|
||||
rules:
|
||||
- apiGroups:
|
||||
- monitoring.coreos.com
|
||||
resources:
|
||||
- servicemonitors
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- delete
|
||||
# OLM: END ROLE
|
||||
---
|
||||
# OLM: BEGIN ROLE BINDING
|
||||
# Allow creation of monitoring resources
|
||||
kind: RoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: rook-ceph-monitor
|
||||
namespace: rook-ceph
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: rook-ceph-monitor
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: rook-ceph-system
|
||||
namespace: rook-ceph
|
||||
# OLM: END ROLE BINDING
|
||||
---
|
||||
# OLM: BEGIN ROLE
|
||||
# Aspects for metrics collection
|
||||
kind: Role
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: rook-ceph-metrics
|
||||
namespace: rook-ceph
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- services
|
||||
- endpoints
|
||||
- pods
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
# OLM: END ROLE
|
||||
---
|
||||
# OLM: BEGIN ROLE BINDING
|
||||
# Allow collection of metrics
|
||||
kind: RoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: rook-ceph-metrics
|
||||
namespace: rook-ceph
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: rook-ceph-metrics
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
# change to the serviceaccount and namespace to use for monitoring
|
||||
name: prometheus-k8s
|
||||
namespace: rook-ceph
|
||||
# OLM: END ROLE BINDING
|
||||
---
|
||||
# OLM: BEGIN ROLE
|
||||
# Allow management of monitoring resources in the mgr
|
||||
kind: Role
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: rook-ceph-monitor-mgr
|
||||
namespace: rook-ceph
|
||||
rules:
|
||||
- apiGroups:
|
||||
- monitoring.coreos.com
|
||||
resources:
|
||||
- servicemonitors
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- create
|
||||
- update
|
||||
# OLM: END ROLE
|
||||
---
|
||||
# OLM: BEGIN ROLE BINDING
|
||||
# Allow creation of monitoring resources in the mgr
|
||||
kind: RoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: rook-ceph-monitor-mgr
|
||||
namespace: rook-ceph
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: rook-ceph-monitor-mgr
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: rook-ceph-mgr
|
||||
namespace: rook-ceph
|
||||
# OLM: END ROLE BINDING
|
||||
---
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: rook-ceph-mgr
|
||||
namespace: rook-ceph
|
||||
labels:
|
||||
team: rook
|
||||
spec:
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- rook-ceph
|
||||
selector:
|
||||
matchLabels:
|
||||
app: rook-ceph-mgr
|
||||
rook_cluster: rook-ceph
|
||||
ceph_daemon_id: a
|
||||
endpoints:
|
||||
- port: http-metrics
|
||||
path: /metrics
|
||||
interval: 5s
|
||||
|
|
@ -0,0 +1,694 @@
|
|||
#################################################################################################################
|
||||
# The deployment for the rook operator
|
||||
# Contains the common settings for most Kubernetes deployments.
|
||||
# For example, to create the rook-ceph cluster:
|
||||
# kubectl create -f crds.yaml -f common.yaml -f operator.yaml
|
||||
# kubectl create -f cluster.yaml
|
||||
#
|
||||
# Also see other operator sample files for variations of operator.yaml:
|
||||
# - operator-openshift.yaml: Common settings for running in OpenShift
|
||||
###############################################################################################################
|
||||
|
||||
# Rook Ceph Operator Config ConfigMap
|
||||
# Use this ConfigMap to override Rook-Ceph Operator configurations.
|
||||
# NOTE! Precedence will be given to this config if the same Env Var config also exists in the
|
||||
# Operator Deployment.
|
||||
# To move a configuration(s) from the Operator Deployment to this ConfigMap, add the config
|
||||
# here. It is recommended to then remove it from the Deployment to eliminate any future confusion.
|
||||
kind: ConfigMap
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: rook-ceph-operator-config
|
||||
# should be in the namespace of the operator
|
||||
namespace: rook-ceph # namespace:operator
|
||||
data:
|
||||
# The logging level for the operator: ERROR | WARNING | INFO | DEBUG
|
||||
ROOK_LOG_LEVEL: "INFO"
|
||||
|
||||
# Allow using loop devices for osds in test clusters.
|
||||
ROOK_CEPH_ALLOW_LOOP_DEVICES: "false"
|
||||
|
||||
# Enable the CSI driver.
|
||||
# To run the non-default version of the CSI driver, see the override-able image properties in operator.yaml
|
||||
ROOK_CSI_ENABLE_CEPHFS: "true"
|
||||
# Enable the default version of the CSI RBD driver. To start another version of the CSI driver, see image properties below.
|
||||
ROOK_CSI_ENABLE_RBD: "true"
|
||||
# Enable the CSI NFS driver. To start another version of the CSI driver, see image properties below.
|
||||
ROOK_CSI_ENABLE_NFS: "false"
|
||||
ROOK_CSI_ENABLE_GRPC_METRICS: "false"
|
||||
|
||||
# Set to true to enable Ceph CSI pvc encryption support.
|
||||
CSI_ENABLE_ENCRYPTION: "false"
|
||||
|
||||
# Set to true to enable host networking for CSI CephFS and RBD nodeplugins. This may be necessary
|
||||
# in some network configurations where the SDN does not provide access to an external cluster or
|
||||
# there is significant drop in read/write performance.
|
||||
# CSI_ENABLE_HOST_NETWORK: "true"
|
||||
|
||||
# Set to true to enable adding volume metadata on the CephFS subvolume and RBD images.
|
||||
# Not all users might be interested in getting volume/snapshot details as metadata on CephFS subvolume and RBD images.
|
||||
# Hence enable metadata is false by default.
|
||||
# CSI_ENABLE_METADATA: "true"
|
||||
|
||||
# cluster name identifier to set as metadata on the CephFS subvolume and RBD images. This will be useful in cases
|
||||
# like for example, when two container orchestrator clusters (Kubernetes/OCP) are using a single ceph cluster.
|
||||
# CSI_CLUSTER_NAME: "my-prod-cluster"
|
||||
|
||||
# Set logging level for cephCSI containers maintained by the cephCSI.
|
||||
# Supported values from 0 to 5. 0 for general useful logs, 5 for trace level verbosity.
|
||||
# CSI_LOG_LEVEL: "0"
|
||||
|
||||
# Set logging level for Kubernetes-csi sidecar containers.
|
||||
# Supported values from 0 to 5. 0 for general useful logs (the default), 5 for trace level verbosity.
|
||||
# CSI_SIDECAR_LOG_LEVEL: "0"
|
||||
|
||||
# Set replicas for csi provisioner deployment.
|
||||
CSI_PROVISIONER_REPLICAS: "2"
|
||||
|
||||
# OMAP generator will generate the omap mapping between the PV name and the RBD image.
|
||||
# CSI_ENABLE_OMAP_GENERATOR need to be enabled when we are using rbd mirroring feature.
|
||||
# By default OMAP generator sidecar is deployed with CSI provisioner pod, to disable
|
||||
# it set it to false.
|
||||
# CSI_ENABLE_OMAP_GENERATOR: "false"
|
||||
|
||||
# set to false to disable deployment of snapshotter container in CephFS provisioner pod.
|
||||
CSI_ENABLE_CEPHFS_SNAPSHOTTER: "true"
|
||||
|
||||
# set to false to disable deployment of snapshotter container in NFS provisioner pod.
|
||||
CSI_ENABLE_NFS_SNAPSHOTTER: "true"
|
||||
|
||||
# set to false to disable deployment of snapshotter container in RBD provisioner pod.
|
||||
CSI_ENABLE_RBD_SNAPSHOTTER: "true"
|
||||
|
||||
# Enable cephfs kernel driver instead of ceph-fuse.
|
||||
# If you disable the kernel client, your application may be disrupted during upgrade.
|
||||
# See the upgrade guide: https://rook.io/docs/rook/latest/ceph-upgrade.html
|
||||
# NOTE! cephfs quota is not supported in kernel version < 4.17
|
||||
CSI_FORCE_CEPHFS_KERNEL_CLIENT: "true"
|
||||
|
||||
# (Optional) policy for modifying a volume's ownership or permissions when the RBD PVC is being mounted.
|
||||
# supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html
|
||||
CSI_RBD_FSGROUPPOLICY: "File"
|
||||
|
||||
# (Optional) policy for modifying a volume's ownership or permissions when the CephFS PVC is being mounted.
|
||||
# supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html
|
||||
CSI_CEPHFS_FSGROUPPOLICY: "File"
|
||||
|
||||
# (Optional) policy for modifying a volume's ownership or permissions when the NFS PVC is being mounted.
|
||||
# supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html
|
||||
CSI_NFS_FSGROUPPOLICY: "File"
|
||||
|
||||
# (Optional) Allow starting unsupported ceph-csi image
|
||||
ROOK_CSI_ALLOW_UNSUPPORTED_VERSION: "false"
|
||||
|
||||
# (Optional) control the host mount of /etc/selinux for csi plugin pods.
|
||||
CSI_PLUGIN_ENABLE_SELINUX_HOST_MOUNT: "false"
|
||||
|
||||
# The default version of CSI supported by Rook will be started. To change the version
|
||||
# of the CSI driver to something other than what is officially supported, change
|
||||
# these images to the desired release of the CSI driver.
|
||||
# ROOK_CSI_CEPH_IMAGE: "quay.io/cephcsi/cephcsi:v3.8.0"
|
||||
# ROOK_CSI_REGISTRAR_IMAGE: "registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.7.0"
|
||||
# ROOK_CSI_RESIZER_IMAGE: "registry.k8s.io/sig-storage/csi-resizer:v1.7.0"
|
||||
# ROOK_CSI_PROVISIONER_IMAGE: "registry.k8s.io/sig-storage/csi-provisioner:v3.4.0"
|
||||
# ROOK_CSI_SNAPSHOTTER_IMAGE: "registry.k8s.io/sig-storage/csi-snapshotter:v6.2.1"
|
||||
# ROOK_CSI_ATTACHER_IMAGE: "registry.k8s.io/sig-storage/csi-attacher:v4.1.0"
|
||||
|
||||
# To indicate the image pull policy to be applied to all the containers in the csi driver pods.
|
||||
# ROOK_CSI_IMAGE_PULL_POLICY: "IfNotPresent"
|
||||
|
||||
# (Optional) set user created priorityclassName for csi plugin pods.
|
||||
CSI_PLUGIN_PRIORITY_CLASSNAME: "system-node-critical"
|
||||
|
||||
# (Optional) set user created priorityclassName for csi provisioner pods.
|
||||
CSI_PROVISIONER_PRIORITY_CLASSNAME: "system-cluster-critical"
|
||||
|
||||
# CSI CephFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate.
|
||||
# Default value is RollingUpdate.
|
||||
# CSI_CEPHFS_PLUGIN_UPDATE_STRATEGY: "OnDelete"
|
||||
# CSI RBD plugin daemonset update strategy, supported values are OnDelete and RollingUpdate.
|
||||
# Default value is RollingUpdate.
|
||||
# CSI_RBD_PLUGIN_UPDATE_STRATEGY: "OnDelete"
|
||||
# A maxUnavailable parameter of CSI RBD plugin daemonset update strategy.
|
||||
# Default value is 1.
|
||||
# CSI_RBD_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE: "1"
|
||||
|
||||
# CSI NFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate.
|
||||
# Default value is RollingUpdate.
|
||||
# CSI_NFS_PLUGIN_UPDATE_STRATEGY: "OnDelete"
|
||||
|
||||
# kubelet directory path, if kubelet configured to use other than /var/lib/kubelet path.
|
||||
# ROOK_CSI_KUBELET_DIR_PATH: "/var/lib/kubelet"
|
||||
|
||||
# Labels to add to the CSI CephFS Deployments and DaemonSets Pods.
|
||||
# ROOK_CSI_CEPHFS_POD_LABELS: "key1=value1,key2=value2"
|
||||
# Labels to add to the CSI RBD Deployments and DaemonSets Pods.
|
||||
# ROOK_CSI_RBD_POD_LABELS: "key1=value1,key2=value2"
|
||||
# Labels to add to the CSI NFS Deployments and DaemonSets Pods.
|
||||
# ROOK_CSI_NFS_POD_LABELS: "key1=value1,key2=value2"
|
||||
|
||||
# (Optional) CephCSI CephFS plugin Volumes
|
||||
# CSI_CEPHFS_PLUGIN_VOLUME: |
|
||||
# - name: lib-modules
|
||||
# hostPath:
|
||||
# path: /run/current-system/kernel-modules/lib/modules/
|
||||
# - name: host-nix
|
||||
# hostPath:
|
||||
# path: /nix
|
||||
|
||||
# (Optional) CephCSI CephFS plugin Volume mounts
|
||||
# CSI_CEPHFS_PLUGIN_VOLUME_MOUNT: |
|
||||
# - name: host-nix
|
||||
# mountPath: /nix
|
||||
# readOnly: true
|
||||
|
||||
# (Optional) CephCSI RBD plugin Volumes
|
||||
# CSI_RBD_PLUGIN_VOLUME: |
|
||||
# - name: lib-modules
|
||||
# hostPath:
|
||||
# path: /run/current-system/kernel-modules/lib/modules/
|
||||
# - name: host-nix
|
||||
# hostPath:
|
||||
# path: /nix
|
||||
|
||||
# (Optional) CephCSI RBD plugin Volume mounts
|
||||
# CSI_RBD_PLUGIN_VOLUME_MOUNT: |
|
||||
# - name: host-nix
|
||||
# mountPath: /nix
|
||||
# readOnly: true
|
||||
|
||||
# (Optional) CephCSI provisioner NodeAffinity (applied to both CephFS and RBD provisioner).
|
||||
# CSI_PROVISIONER_NODE_AFFINITY: "role=storage-node; storage=rook, ceph"
|
||||
# (Optional) CephCSI provisioner tolerations list(applied to both CephFS and RBD provisioner).
|
||||
# Put here list of taints you want to tolerate in YAML format.
|
||||
# CSI provisioner would be best to start on the same nodes as other ceph daemons.
|
||||
# CSI_PROVISIONER_TOLERATIONS: |
|
||||
# - effect: NoSchedule
|
||||
# key: node-role.kubernetes.io/control-plane
|
||||
# operator: Exists
|
||||
# - effect: NoExecute
|
||||
# key: node-role.kubernetes.io/etcd
|
||||
# operator: Exists
|
||||
# (Optional) CephCSI plugin NodeAffinity (applied to both CephFS and RBD plugin).
|
||||
# CSI_PLUGIN_NODE_AFFINITY: "role=storage-node; storage=rook, ceph"
|
||||
# (Optional) CephCSI plugin tolerations list(applied to both CephFS and RBD plugin).
|
||||
# Put here list of taints you want to tolerate in YAML format.
|
||||
# CSI plugins need to be started on all the nodes where the clients need to mount the storage.
|
||||
# CSI_PLUGIN_TOLERATIONS: |
|
||||
# - effect: NoSchedule
|
||||
# key: node-role.kubernetes.io/control-plane
|
||||
# operator: Exists
|
||||
# - effect: NoExecute
|
||||
# key: node-role.kubernetes.io/etcd
|
||||
# operator: Exists
|
||||
|
||||
# (Optional) CephCSI RBD provisioner NodeAffinity (if specified, overrides CSI_PROVISIONER_NODE_AFFINITY).
|
||||
# CSI_RBD_PROVISIONER_NODE_AFFINITY: "role=rbd-node"
|
||||
# (Optional) CephCSI RBD provisioner tolerations list(if specified, overrides CSI_PROVISIONER_TOLERATIONS).
|
||||
# Put here list of taints you want to tolerate in YAML format.
|
||||
# CSI provisioner would be best to start on the same nodes as other ceph daemons.
|
||||
# CSI_RBD_PROVISIONER_TOLERATIONS: |
|
||||
# - key: node.rook.io/rbd
|
||||
# operator: Exists
|
||||
# (Optional) CephCSI RBD plugin NodeAffinity (if specified, overrides CSI_PLUGIN_NODE_AFFINITY).
|
||||
# CSI_RBD_PLUGIN_NODE_AFFINITY: "role=rbd-node"
|
||||
# (Optional) CephCSI RBD plugin tolerations list(if specified, overrides CSI_PLUGIN_TOLERATIONS).
|
||||
# Put here list of taints you want to tolerate in YAML format.
|
||||
# CSI plugins need to be started on all the nodes where the clients need to mount the storage.
|
||||
# CSI_RBD_PLUGIN_TOLERATIONS: |
|
||||
# - key: node.rook.io/rbd
|
||||
# operator: Exists
|
||||
|
||||
# (Optional) CephCSI CephFS provisioner NodeAffinity (if specified, overrides CSI_PROVISIONER_NODE_AFFINITY).
|
||||
# CSI_CEPHFS_PROVISIONER_NODE_AFFINITY: "role=cephfs-node"
|
||||
# (Optional) CephCSI CephFS provisioner tolerations list(if specified, overrides CSI_PROVISIONER_TOLERATIONS).
|
||||
# Put here list of taints you want to tolerate in YAML format.
|
||||
# CSI provisioner would be best to start on the same nodes as other ceph daemons.
|
||||
# CSI_CEPHFS_PROVISIONER_TOLERATIONS: |
|
||||
# - key: node.rook.io/cephfs
|
||||
# operator: Exists
|
||||
# (Optional) CephCSI CephFS plugin NodeAffinity (if specified, overrides CSI_PLUGIN_NODE_AFFINITY).
|
||||
# CSI_CEPHFS_PLUGIN_NODE_AFFINITY: "role=cephfs-node"
|
||||
# NOTE: Support for defining NodeAffinity for operators other than "In" and "Exists" requires the user to input a
|
||||
# valid v1.NodeAffinity JSON or YAML string. For example, the following is valid YAML v1.NodeAffinity:
|
||||
# CSI_CEPHFS_PLUGIN_NODE_AFFINITY: |
|
||||
# requiredDuringSchedulingIgnoredDuringExecution:
|
||||
# nodeSelectorTerms:
|
||||
# - matchExpressions:
|
||||
# - key: myKey
|
||||
# operator: DoesNotExist
|
||||
# (Optional) CephCSI CephFS plugin tolerations list(if specified, overrides CSI_PLUGIN_TOLERATIONS).
|
||||
# Put here list of taints you want to tolerate in YAML format.
|
||||
# CSI plugins need to be started on all the nodes where the clients need to mount the storage.
|
||||
# CSI_CEPHFS_PLUGIN_TOLERATIONS: |
|
||||
# - key: node.rook.io/cephfs
|
||||
# operator: Exists
|
||||
|
||||
# (Optional) CephCSI NFS provisioner NodeAffinity (overrides CSI_PROVISIONER_NODE_AFFINITY).
|
||||
# CSI_NFS_PROVISIONER_NODE_AFFINITY: "role=nfs-node"
|
||||
# (Optional) CephCSI NFS provisioner tolerations list (overrides CSI_PROVISIONER_TOLERATIONS).
|
||||
# Put here list of taints you want to tolerate in YAML format.
|
||||
# CSI provisioner would be best to start on the same nodes as other ceph daemons.
|
||||
# CSI_NFS_PROVISIONER_TOLERATIONS: |
|
||||
# - key: node.rook.io/nfs
|
||||
# operator: Exists
|
||||
# (Optional) CephCSI NFS plugin NodeAffinity (overrides CSI_PLUGIN_NODE_AFFINITY).
|
||||
# CSI_NFS_PLUGIN_NODE_AFFINITY: "role=nfs-node"
|
||||
# (Optional) CephCSI NFS plugin tolerations list (overrides CSI_PLUGIN_TOLERATIONS).
|
||||
# Put here list of taints you want to tolerate in YAML format.
|
||||
# CSI plugins need to be started on all the nodes where the clients need to mount the storage.
|
||||
# CSI_NFS_PLUGIN_TOLERATIONS: |
|
||||
# - key: node.rook.io/nfs
|
||||
# operator: Exists
|
||||
|
||||
# (Optional) CEPH CSI RBD provisioner resource requirement list, Put here list of resource
|
||||
# requests and limits you want to apply for provisioner pod
|
||||
#CSI_RBD_PROVISIONER_RESOURCE: |
|
||||
# - name : csi-provisioner
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 100m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 200m
|
||||
# - name : csi-resizer
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 100m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 200m
|
||||
# - name : csi-attacher
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 100m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 200m
|
||||
# - name : csi-snapshotter
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 100m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 200m
|
||||
# - name : csi-rbdplugin
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 512Mi
|
||||
# cpu: 250m
|
||||
# limits:
|
||||
# memory: 1Gi
|
||||
# cpu: 500m
|
||||
# - name : csi-omap-generator
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 512Mi
|
||||
# cpu: 250m
|
||||
# limits:
|
||||
# memory: 1Gi
|
||||
# cpu: 500m
|
||||
# - name : liveness-prometheus
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 50m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 100m
|
||||
# (Optional) CEPH CSI RBD plugin resource requirement list, Put here list of resource
|
||||
# requests and limits you want to apply for plugin pod
|
||||
#CSI_RBD_PLUGIN_RESOURCE: |
|
||||
# - name : driver-registrar
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 50m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 100m
|
||||
# - name : csi-rbdplugin
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 512Mi
|
||||
# cpu: 250m
|
||||
# limits:
|
||||
# memory: 1Gi
|
||||
# cpu: 500m
|
||||
# - name : liveness-prometheus
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 50m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 100m
|
||||
# (Optional) CEPH CSI CephFS provisioner resource requirement list, Put here list of resource
|
||||
# requests and limits you want to apply for provisioner pod
|
||||
#CSI_CEPHFS_PROVISIONER_RESOURCE: |
|
||||
# - name : csi-provisioner
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 100m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 200m
|
||||
# - name : csi-resizer
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 100m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 200m
|
||||
# - name : csi-attacher
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 100m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 200m
|
||||
# - name : csi-snapshotter
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 100m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 200m
|
||||
# - name : csi-cephfsplugin
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 512Mi
|
||||
# cpu: 250m
|
||||
# limits:
|
||||
# memory: 1Gi
|
||||
# cpu: 500m
|
||||
# - name : liveness-prometheus
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 50m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 100m
|
||||
# (Optional) CEPH CSI CephFS plugin resource requirement list, Put here list of resource
|
||||
# requests and limits you want to apply for plugin pod
|
||||
#CSI_CEPHFS_PLUGIN_RESOURCE: |
|
||||
# - name : driver-registrar
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 50m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 100m
|
||||
# - name : csi-cephfsplugin
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 512Mi
|
||||
# cpu: 250m
|
||||
# limits:
|
||||
# memory: 1Gi
|
||||
# cpu: 500m
|
||||
# - name : liveness-prometheus
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 50m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 100m
|
||||
|
||||
# (Optional) CEPH CSI NFS provisioner resource requirement list, Put here list of resource
|
||||
# requests and limits you want to apply for provisioner pod
|
||||
# CSI_NFS_PROVISIONER_RESOURCE: |
|
||||
# - name : csi-provisioner
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 100m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 200m
|
||||
# - name : csi-nfsplugin
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 512Mi
|
||||
# cpu: 250m
|
||||
# limits:
|
||||
# memory: 1Gi
|
||||
# cpu: 500m
|
||||
# - name : csi-attacher
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 100m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 200m
|
||||
# (Optional) CEPH CSI NFS plugin resource requirement list, Put here list of resource
|
||||
# requests and limits you want to apply for plugin pod
|
||||
# CSI_NFS_PLUGIN_RESOURCE: |
|
||||
# - name : driver-registrar
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 50m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 100m
|
||||
# - name : csi-nfsplugin
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 512Mi
|
||||
# cpu: 250m
|
||||
# limits:
|
||||
# memory: 1Gi
|
||||
# cpu: 500m
|
||||
|
||||
# Configure CSI Ceph FS grpc and liveness metrics port
|
||||
# Set to true to enable Ceph CSI liveness container.
|
||||
CSI_ENABLE_LIVENESS: "false"
|
||||
# CSI_CEPHFS_GRPC_METRICS_PORT: "9091"
|
||||
# CSI_CEPHFS_LIVENESS_METRICS_PORT: "9081"
|
||||
# Configure CSI RBD grpc and liveness metrics port
|
||||
# CSI_RBD_GRPC_METRICS_PORT: "9090"
|
||||
# CSI_RBD_LIVENESS_METRICS_PORT: "9080"
|
||||
# CSIADDONS_PORT: "9070"
|
||||
|
||||
# Set CephFS Kernel mount options to use https://docs.ceph.com/en/latest/man/8/mount.ceph/#options
|
||||
# Set to "ms_mode=secure" when connections.encrypted is enabled in CephCluster CR
|
||||
# CSI_CEPHFS_KERNEL_MOUNT_OPTIONS: "ms_mode=secure"
|
||||
|
||||
# Whether the OBC provisioner should watch on the operator namespace or not, if not the namespace of the cluster will be used
|
||||
ROOK_OBC_WATCH_OPERATOR_NAMESPACE: "true"
|
||||
|
||||
# Whether to start the discovery daemon to watch for raw storage devices on nodes in the cluster.
|
||||
# This daemon does not need to run if you are only going to create your OSDs based on StorageClassDeviceSets with PVCs.
|
||||
ROOK_ENABLE_DISCOVERY_DAEMON: "false"
|
||||
# The timeout value (in seconds) of Ceph commands. It should be >= 1. If this variable is not set or is an invalid value, it's default to 15.
|
||||
ROOK_CEPH_COMMANDS_TIMEOUT_SECONDS: "15"
|
||||
# Enable the csi addons sidecar.
|
||||
CSI_ENABLE_CSIADDONS: "false"
|
||||
# ROOK_CSIADDONS_IMAGE: "quay.io/csiaddons/k8s-sidecar:v0.5.0"
|
||||
# The CSI GRPC timeout value (in seconds). It should be >= 120. If this variable is not set or is an invalid value, it's default to 150.
|
||||
CSI_GRPC_TIMEOUT_SECONDS: "150"
|
||||
|
||||
ROOK_DISABLE_ADMISSION_CONTROLLER: "true"
|
||||
|
||||
# Enable topology based provisioning.
|
||||
CSI_ENABLE_TOPOLOGY: "false"
|
||||
# Domain labels define which node labels to use as domains
|
||||
# for CSI nodeplugins to advertise their domains
|
||||
# NOTE: the value here serves as an example and needs to be
|
||||
# updated with node labels that define domains of interest
|
||||
# CSI_TOPOLOGY_DOMAIN_LABELS: "kubernetes.io/hostname,topology.kubernetes.io/zone,topology.rook.io/rack"
|
||||
|
||||
# Enable read affinity for RBD volumes. Recommended to
|
||||
# set to true if running kernel 5.8 or newer.
|
||||
CSI_ENABLE_READ_AFFINITY: "false"
|
||||
# CRUSH location labels define which node labels to use
|
||||
# as CRUSH location. This should correspond to the values set in
|
||||
# the CRUSH map.
|
||||
# Defaults to all the labels mentioned in
|
||||
# https://rook.io/docs/rook/latest/CRDs/Cluster/ceph-cluster-crd/#osd-topology
|
||||
# CSI_CRUSH_LOCATION_LABELS: "kubernetes.io/hostname,topology.kubernetes.io/zone,topology.rook.io/rack"
|
||||
|
||||
# Whether to skip any attach operation altogether for CephCSI PVCs.
|
||||
# See more details [here](https://kubernetes-csi.github.io/docs/skip-attach.html#skip-attach-with-csi-driver-object).
|
||||
# If set to false it skips the volume attachments and makes the creation of pods using the CephCSI PVC fast.
|
||||
# **WARNING** It's highly discouraged to use this for RWO volumes. for RBD PVC it can cause data corruption,
|
||||
# csi-addons operations like Reclaimspace and PVC Keyrotation will also not be supported if set to false
|
||||
# since we'll have no VolumeAttachments to determine which node the PVC is mounted on.
|
||||
# Refer to this [issue](https://github.com/kubernetes/kubernetes/issues/103305) for more details.
|
||||
CSI_CEPHFS_ATTACH_REQUIRED: "true"
|
||||
CSI_RBD_ATTACH_REQUIRED: "true"
|
||||
CSI_NFS_ATTACH_REQUIRED: "true"
|
||||
---
|
||||
# OLM: BEGIN OPERATOR DEPLOYMENT
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: rook-ceph-operator
|
||||
namespace: rook-ceph # namespace:operator
|
||||
labels:
|
||||
operator: rook
|
||||
storage-backend: ceph
|
||||
app.kubernetes.io/name: rook-ceph
|
||||
app.kubernetes.io/instance: rook-ceph
|
||||
app.kubernetes.io/component: rook-ceph-operator
|
||||
app.kubernetes.io/part-of: rook-ceph-operator
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: rook-ceph-operator
|
||||
strategy:
|
||||
type: Recreate
|
||||
replicas: 1
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: rook-ceph-operator
|
||||
spec:
|
||||
serviceAccountName: rook-ceph-system
|
||||
containers:
|
||||
- name: rook-ceph-operator
|
||||
image: rook/ceph:master
|
||||
args: ["ceph", "operator"]
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 2016
|
||||
runAsGroup: 2016
|
||||
volumeMounts:
|
||||
- mountPath: /var/lib/rook
|
||||
name: rook-config
|
||||
- mountPath: /etc/ceph
|
||||
name: default-config-dir
|
||||
- mountPath: /etc/webhook
|
||||
name: webhook-cert
|
||||
ports:
|
||||
- containerPort: 9443
|
||||
name: https-webhook
|
||||
protocol: TCP
|
||||
env:
|
||||
# If the operator should only watch for cluster CRDs in the same namespace, set this to "true".
|
||||
# If this is not set to true, the operator will watch for cluster CRDs in all namespaces.
|
||||
- name: ROOK_CURRENT_NAMESPACE_ONLY
|
||||
value: "false"
|
||||
# Rook Discover toleration. Will tolerate all taints with all keys.
|
||||
# Choose between NoSchedule, PreferNoSchedule and NoExecute:
|
||||
# - name: DISCOVER_TOLERATION
|
||||
# value: "NoSchedule"
|
||||
# (Optional) Rook Discover toleration key. Set this to the key of the taint you want to tolerate
|
||||
# - name: DISCOVER_TOLERATION_KEY
|
||||
# value: "<KeyOfTheTaintToTolerate>"
|
||||
# (Optional) Rook Discover tolerations list. Put here list of taints you want to tolerate in YAML format.
|
||||
# - name: DISCOVER_TOLERATIONS
|
||||
# value: |
|
||||
# - effect: NoSchedule
|
||||
# key: node-role.kubernetes.io/control-plane
|
||||
# operator: Exists
|
||||
# - effect: NoExecute
|
||||
# key: node-role.kubernetes.io/etcd
|
||||
# operator: Exists
|
||||
# (Optional) Rook Discover priority class name to set on the pod(s)
|
||||
# - name: DISCOVER_PRIORITY_CLASS_NAME
|
||||
# value: "<PriorityClassName>"
|
||||
# (Optional) Discover Agent NodeAffinity.
|
||||
# - name: DISCOVER_AGENT_NODE_AFFINITY
|
||||
# value: "role=storage-node; storage=rook, ceph"
|
||||
# (Optional) Discover Agent Pod Labels.
|
||||
# - name: DISCOVER_AGENT_POD_LABELS
|
||||
# value: "key1=value1,key2=value2"
|
||||
|
||||
# The duration between discovering devices in the rook-discover daemonset.
|
||||
- name: ROOK_DISCOVER_DEVICES_INTERVAL
|
||||
value: "60m"
|
||||
|
||||
# Whether to start pods as privileged that mount a host path, which includes the Ceph mon and osd pods.
|
||||
# Set this to true if SELinux is enabled (e.g. OpenShift) to workaround the anyuid issues.
|
||||
# For more details see https://github.com/rook/rook/issues/1314#issuecomment-355799641
|
||||
- name: ROOK_HOSTPATH_REQUIRES_PRIVILEGED
|
||||
value: "false"
|
||||
|
||||
# Disable automatic orchestration when new devices are discovered
|
||||
- name: ROOK_DISABLE_DEVICE_HOTPLUG
|
||||
value: "false"
|
||||
|
||||
# Provide customised regex as the values using comma. For eg. regex for rbd based volume, value will be like "(?i)rbd[0-9]+".
|
||||
# In case of more than one regex, use comma to separate between them.
|
||||
# Default regex will be "(?i)dm-[0-9]+,(?i)rbd[0-9]+,(?i)nbd[0-9]+"
|
||||
# Add regex expression after putting a comma to blacklist a disk
|
||||
# If value is empty, the default regex will be used.
|
||||
- name: DISCOVER_DAEMON_UDEV_BLACKLIST
|
||||
value: "(?i)dm-[0-9]+,(?i)rbd[0-9]+,(?i)nbd[0-9]+"
|
||||
|
||||
# - name: DISCOVER_DAEMON_RESOURCES
|
||||
# value: |
|
||||
# resources:
|
||||
# limits:
|
||||
# cpu: 500m
|
||||
# memory: 512Mi
|
||||
# requests:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
|
||||
# Time to wait until the node controller will move Rook pods to other
|
||||
# nodes after detecting an unreachable node.
|
||||
# Pods affected by this setting are:
|
||||
# mgr, rbd, mds, rgw, nfs, PVC based mons and osds, and ceph toolbox
|
||||
# The value used in this variable replaces the default value of 300 secs
|
||||
# added automatically by k8s as Toleration for
|
||||
# <node.kubernetes.io/unreachable>
|
||||
# The total amount of time to reschedule Rook pods in healthy nodes
|
||||
# before detecting a <not ready node> condition will be the sum of:
|
||||
# --> node-monitor-grace-period: 40 seconds (k8s kube-controller-manager flag)
|
||||
# --> ROOK_UNREACHABLE_NODE_TOLERATION_SECONDS: 5 seconds
|
||||
- name: ROOK_UNREACHABLE_NODE_TOLERATION_SECONDS
|
||||
value: "5"
|
||||
|
||||
# The name of the node to pass with the downward API
|
||||
- name: NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
# The pod name to pass with the downward API
|
||||
- name: POD_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.name
|
||||
# The pod namespace to pass with the downward API
|
||||
- name: POD_NAMESPACE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.namespace
|
||||
# Recommended resource requests and limits, if desired
|
||||
#resources:
|
||||
# limits:
|
||||
# cpu: 500m
|
||||
# memory: 512Mi
|
||||
# requests:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
|
||||
# Uncomment it to run lib bucket provisioner in multithreaded mode
|
||||
#- name: LIB_BUCKET_PROVISIONER_THREADS
|
||||
# value: "5"
|
||||
|
||||
# Uncomment it to run rook operator on the host network
|
||||
#hostNetwork: true
|
||||
volumes:
|
||||
- name: rook-config
|
||||
emptyDir: {}
|
||||
- name: default-config-dir
|
||||
emptyDir: {}
|
||||
- name: webhook-cert
|
||||
emptyDir: {}
|
||||
# OLM: END OPERATOR DEPLOYMENT
|
||||
|
|
@ -0,0 +1,690 @@
|
|||
#################################################################################################################
|
||||
# The deployment for the rook operator
|
||||
# Contains the common settings for most Kubernetes deployments.
|
||||
# For example, to create the rook-ceph cluster:
|
||||
# kubectl create -f crds.yaml -f common.yaml -f operator.yaml
|
||||
# kubectl create -f cluster.yaml
|
||||
#
|
||||
# Also see other operator sample files for variations of operator.yaml:
|
||||
# - operator-openshift.yaml: Common settings for running in OpenShift
|
||||
###############################################################################################################
|
||||
|
||||
# Rook Ceph Operator Config ConfigMap
|
||||
# Use this ConfigMap to override Rook-Ceph Operator configurations.
|
||||
# NOTE! Precedence will be given to this config if the same Env Var config also exists in the
|
||||
# Operator Deployment.
|
||||
# To move a configuration(s) from the Operator Deployment to this ConfigMap, add the config
|
||||
# here. It is recommended to then remove it from the Deployment to eliminate any future confusion.
|
||||
kind: ConfigMap
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: rook-ceph-operator-config
|
||||
# should be in the namespace of the operator
|
||||
namespace: rook-ceph # namespace:operator
|
||||
data:
|
||||
# The logging level for the operator: ERROR | WARNING | INFO | DEBUG
|
||||
ROOK_LOG_LEVEL: "INFO"
|
||||
|
||||
# Allow using loop devices for osds in test clusters.
|
||||
ROOK_CEPH_ALLOW_LOOP_DEVICES: "false"
|
||||
|
||||
# Enable the CSI driver.
|
||||
# To run the non-default version of the CSI driver, see the override-able image properties in operator.yaml
|
||||
ROOK_CSI_ENABLE_CEPHFS: "true"
|
||||
# Enable the default version of the CSI RBD driver. To start another version of the CSI driver, see image properties below.
|
||||
ROOK_CSI_ENABLE_RBD: "true"
|
||||
# Enable the CSI NFS driver. To start another version of the CSI driver, see image properties below.
|
||||
ROOK_CSI_ENABLE_NFS: "false"
|
||||
ROOK_CSI_ENABLE_GRPC_METRICS: "false"
|
||||
|
||||
# Set to true to enable Ceph CSI pvc encryption support.
|
||||
CSI_ENABLE_ENCRYPTION: "false"
|
||||
|
||||
# Set to true to enable host networking for CSI CephFS and RBD nodeplugins. This may be necessary
|
||||
# in some network configurations where the SDN does not provide access to an external cluster or
|
||||
# there is significant drop in read/write performance.
|
||||
# CSI_ENABLE_HOST_NETWORK: "true"
|
||||
|
||||
# Set to true to enable adding volume metadata on the CephFS subvolume and RBD images.
|
||||
# Not all users might be interested in getting volume/snapshot details as metadata on CephFS subvolume and RBD images.
|
||||
# Hence enable metadata is false by default.
|
||||
# CSI_ENABLE_METADATA: "true"
|
||||
|
||||
# cluster name identifier to set as metadata on the CephFS subvolume and RBD images. This will be useful in cases
|
||||
# like for example, when two container orchestrator clusters (Kubernetes/OCP) are using a single ceph cluster.
|
||||
# CSI_CLUSTER_NAME: "my-prod-cluster"
|
||||
|
||||
# Set logging level for cephCSI containers maintained by the cephCSI.
|
||||
# Supported values from 0 to 5. 0 for general useful logs, 5 for trace level verbosity.
|
||||
# CSI_LOG_LEVEL: "0"
|
||||
|
||||
# Set logging level for Kubernetes-csi sidecar containers.
|
||||
# Supported values from 0 to 5. 0 for general useful logs (the default), 5 for trace level verbosity.
|
||||
# CSI_SIDECAR_LOG_LEVEL: "0"
|
||||
|
||||
# Set replicas for csi provisioner deployment.
|
||||
CSI_PROVISIONER_REPLICAS: "1"
|
||||
|
||||
# OMAP generator will generate the omap mapping between the PV name and the RBD image.
|
||||
# CSI_ENABLE_OMAP_GENERATOR need to be enabled when we are using rbd mirroring feature.
|
||||
# By default OMAP generator sidecar is deployed with CSI provisioner pod, to disable
|
||||
# it set it to false.
|
||||
# CSI_ENABLE_OMAP_GENERATOR: "false"
|
||||
|
||||
# set to false to disable deployment of snapshotter container in CephFS provisioner pod.
|
||||
CSI_ENABLE_CEPHFS_SNAPSHOTTER: "true"
|
||||
|
||||
# set to false to disable deployment of snapshotter container in NFS provisioner pod.
|
||||
CSI_ENABLE_NFS_SNAPSHOTTER: "true"
|
||||
|
||||
# set to false to disable deployment of snapshotter container in RBD provisioner pod.
|
||||
CSI_ENABLE_RBD_SNAPSHOTTER: "true"
|
||||
|
||||
# Enable cephfs kernel driver instead of ceph-fuse.
|
||||
# If you disable the kernel client, your application may be disrupted during upgrade.
|
||||
# See the upgrade guide: https://rook.io/docs/rook/latest/ceph-upgrade.html
|
||||
# NOTE! cephfs quota is not supported in kernel version < 4.17
|
||||
CSI_FORCE_CEPHFS_KERNEL_CLIENT: "true"
|
||||
|
||||
# (Optional) policy for modifying a volume's ownership or permissions when the RBD PVC is being mounted.
|
||||
# supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html
|
||||
CSI_RBD_FSGROUPPOLICY: "File"
|
||||
|
||||
# (Optional) policy for modifying a volume's ownership or permissions when the CephFS PVC is being mounted.
|
||||
# supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html
|
||||
CSI_CEPHFS_FSGROUPPOLICY: "File"
|
||||
|
||||
# (Optional) policy for modifying a volume's ownership or permissions when the NFS PVC is being mounted.
|
||||
# supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html
|
||||
CSI_NFS_FSGROUPPOLICY: "File"
|
||||
|
||||
# (Optional) Allow starting unsupported ceph-csi image
|
||||
ROOK_CSI_ALLOW_UNSUPPORTED_VERSION: "false"
|
||||
|
||||
# (Optional) control the host mount of /etc/selinux for csi plugin pods.
|
||||
CSI_PLUGIN_ENABLE_SELINUX_HOST_MOUNT: "false"
|
||||
|
||||
# The default version of CSI supported by Rook will be started. To change the version
|
||||
# of the CSI driver to something other than what is officially supported, change
|
||||
# these images to the desired release of the CSI driver.
|
||||
# ROOK_CSI_CEPH_IMAGE: "quay.io/cephcsi/cephcsi:v3.9.0"
|
||||
# ROOK_CSI_REGISTRAR_IMAGE: "registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.8.0"
|
||||
# ROOK_CSI_RESIZER_IMAGE: "registry.k8s.io/sig-storage/csi-resizer:v1.8.0"
|
||||
# ROOK_CSI_PROVISIONER_IMAGE: "registry.k8s.io/sig-storage/csi-provisioner:v3.5.0"
|
||||
# ROOK_CSI_SNAPSHOTTER_IMAGE: "registry.k8s.io/sig-storage/csi-snapshotter:v6.2.2"
|
||||
# ROOK_CSI_ATTACHER_IMAGE: "registry.k8s.io/sig-storage/csi-attacher:v4.3.0"
|
||||
|
||||
# To indicate the image pull policy to be applied to all the containers in the csi driver pods.
|
||||
# ROOK_CSI_IMAGE_PULL_POLICY: "IfNotPresent"
|
||||
|
||||
# (Optional) set user created priorityclassName for csi plugin pods.
|
||||
CSI_PLUGIN_PRIORITY_CLASSNAME: "system-node-critical"
|
||||
|
||||
# (Optional) set user created priorityclassName for csi provisioner pods.
|
||||
CSI_PROVISIONER_PRIORITY_CLASSNAME: "system-cluster-critical"
|
||||
|
||||
# CSI CephFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate.
|
||||
# Default value is RollingUpdate.
|
||||
# CSI_CEPHFS_PLUGIN_UPDATE_STRATEGY: "OnDelete"
|
||||
# A maxUnavailable parameter of CSI cephFS plugin daemonset update strategy.
|
||||
# Default value is 1.
|
||||
# CSI_CEPHFS_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE: "1"
|
||||
# CSI RBD plugin daemonset update strategy, supported values are OnDelete and RollingUpdate.
|
||||
# Default value is RollingUpdate.
|
||||
# CSI_RBD_PLUGIN_UPDATE_STRATEGY: "OnDelete"
|
||||
# A maxUnavailable parameter of CSI RBD plugin daemonset update strategy.
|
||||
# Default value is 1.
|
||||
# CSI_RBD_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE: "1"
|
||||
|
||||
# CSI NFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate.
|
||||
# Default value is RollingUpdate.
|
||||
# CSI_NFS_PLUGIN_UPDATE_STRATEGY: "OnDelete"
|
||||
|
||||
# kubelet directory path, if kubelet configured to use other than /var/lib/kubelet path.
|
||||
# ROOK_CSI_KUBELET_DIR_PATH: "/var/lib/kubelet"
|
||||
|
||||
# Labels to add to the CSI CephFS Deployments and DaemonSets Pods.
|
||||
# ROOK_CSI_CEPHFS_POD_LABELS: "key1=value1,key2=value2"
|
||||
# Labels to add to the CSI RBD Deployments and DaemonSets Pods.
|
||||
# ROOK_CSI_RBD_POD_LABELS: "key1=value1,key2=value2"
|
||||
# Labels to add to the CSI NFS Deployments and DaemonSets Pods.
|
||||
# ROOK_CSI_NFS_POD_LABELS: "key1=value1,key2=value2"
|
||||
|
||||
# (Optional) CephCSI CephFS plugin Volumes
|
||||
# CSI_CEPHFS_PLUGIN_VOLUME: |
|
||||
# - name: lib-modules
|
||||
# hostPath:
|
||||
# path: /run/current-system/kernel-modules/lib/modules/
|
||||
# - name: host-nix
|
||||
# hostPath:
|
||||
# path: /nix
|
||||
|
||||
# (Optional) CephCSI CephFS plugin Volume mounts
|
||||
# CSI_CEPHFS_PLUGIN_VOLUME_MOUNT: |
|
||||
# - name: host-nix
|
||||
# mountPath: /nix
|
||||
# readOnly: true
|
||||
|
||||
# (Optional) CephCSI RBD plugin Volumes
|
||||
# CSI_RBD_PLUGIN_VOLUME: |
|
||||
# - name: lib-modules
|
||||
# hostPath:
|
||||
# path: /run/current-system/kernel-modules/lib/modules/
|
||||
# - name: host-nix
|
||||
# hostPath:
|
||||
# path: /nix
|
||||
|
||||
# (Optional) CephCSI RBD plugin Volume mounts
|
||||
# CSI_RBD_PLUGIN_VOLUME_MOUNT: |
|
||||
# - name: host-nix
|
||||
# mountPath: /nix
|
||||
# readOnly: true
|
||||
|
||||
# (Optional) CephCSI provisioner NodeAffinity (applied to both CephFS and RBD provisioner).
|
||||
# CSI_PROVISIONER_NODE_AFFINITY: "role=storage-node; storage=rook, ceph"
|
||||
# (Optional) CephCSI provisioner tolerations list(applied to both CephFS and RBD provisioner).
|
||||
# Put here list of taints you want to tolerate in YAML format.
|
||||
# CSI provisioner would be best to start on the same nodes as other ceph daemons.
|
||||
# CSI_PROVISIONER_TOLERATIONS: |
|
||||
# - effect: NoSchedule
|
||||
# key: node-role.kubernetes.io/control-plane
|
||||
# operator: Exists
|
||||
# - effect: NoExecute
|
||||
# key: node-role.kubernetes.io/etcd
|
||||
# operator: Exists
|
||||
# (Optional) CephCSI plugin NodeAffinity (applied to both CephFS and RBD plugin).
|
||||
# CSI_PLUGIN_NODE_AFFINITY: "role=storage-node; storage=rook, ceph"
|
||||
# (Optional) CephCSI plugin tolerations list(applied to both CephFS and RBD plugin).
|
||||
# Put here list of taints you want to tolerate in YAML format.
|
||||
# CSI plugins need to be started on all the nodes where the clients need to mount the storage.
|
||||
# CSI_PLUGIN_TOLERATIONS: |
|
||||
# - effect: NoSchedule
|
||||
# key: node-role.kubernetes.io/control-plane
|
||||
# operator: Exists
|
||||
# - effect: NoExecute
|
||||
# key: node-role.kubernetes.io/etcd
|
||||
# operator: Exists
|
||||
|
||||
# (Optional) CephCSI RBD provisioner NodeAffinity (if specified, overrides CSI_PROVISIONER_NODE_AFFINITY).
|
||||
# CSI_RBD_PROVISIONER_NODE_AFFINITY: "role=rbd-node"
|
||||
# (Optional) CephCSI RBD provisioner tolerations list(if specified, overrides CSI_PROVISIONER_TOLERATIONS).
|
||||
# Put here list of taints you want to tolerate in YAML format.
|
||||
# CSI provisioner would be best to start on the same nodes as other ceph daemons.
|
||||
# CSI_RBD_PROVISIONER_TOLERATIONS: |
|
||||
# - key: node.rook.io/rbd
|
||||
# operator: Exists
|
||||
# (Optional) CephCSI RBD plugin NodeAffinity (if specified, overrides CSI_PLUGIN_NODE_AFFINITY).
|
||||
# CSI_RBD_PLUGIN_NODE_AFFINITY: "role=rbd-node"
|
||||
# (Optional) CephCSI RBD plugin tolerations list(if specified, overrides CSI_PLUGIN_TOLERATIONS).
|
||||
# Put here list of taints you want to tolerate in YAML format.
|
||||
# CSI plugins need to be started on all the nodes where the clients need to mount the storage.
|
||||
# CSI_RBD_PLUGIN_TOLERATIONS: |
|
||||
# - key: node.rook.io/rbd
|
||||
# operator: Exists
|
||||
|
||||
# (Optional) CephCSI CephFS provisioner NodeAffinity (if specified, overrides CSI_PROVISIONER_NODE_AFFINITY).
|
||||
# CSI_CEPHFS_PROVISIONER_NODE_AFFINITY: "role=cephfs-node"
|
||||
# (Optional) CephCSI CephFS provisioner tolerations list(if specified, overrides CSI_PROVISIONER_TOLERATIONS).
|
||||
# Put here list of taints you want to tolerate in YAML format.
|
||||
# CSI provisioner would be best to start on the same nodes as other ceph daemons.
|
||||
# CSI_CEPHFS_PROVISIONER_TOLERATIONS: |
|
||||
# - key: node.rook.io/cephfs
|
||||
# operator: Exists
|
||||
# (Optional) CephCSI CephFS plugin NodeAffinity (if specified, overrides CSI_PLUGIN_NODE_AFFINITY).
|
||||
# CSI_CEPHFS_PLUGIN_NODE_AFFINITY: "role=cephfs-node"
|
||||
# NOTE: Support for defining NodeAffinity for operators other than "In" and "Exists" requires the user to input a
|
||||
# valid v1.NodeAffinity JSON or YAML string. For example, the following is valid YAML v1.NodeAffinity:
|
||||
# CSI_CEPHFS_PLUGIN_NODE_AFFINITY: |
|
||||
# requiredDuringSchedulingIgnoredDuringExecution:
|
||||
# nodeSelectorTerms:
|
||||
# - matchExpressions:
|
||||
# - key: myKey
|
||||
# operator: DoesNotExist
|
||||
# (Optional) CephCSI CephFS plugin tolerations list(if specified, overrides CSI_PLUGIN_TOLERATIONS).
|
||||
# Put here list of taints you want to tolerate in YAML format.
|
||||
# CSI plugins need to be started on all the nodes where the clients need to mount the storage.
|
||||
# CSI_CEPHFS_PLUGIN_TOLERATIONS: |
|
||||
# - key: node.rook.io/cephfs
|
||||
# operator: Exists
|
||||
|
||||
# (Optional) CephCSI NFS provisioner NodeAffinity (overrides CSI_PROVISIONER_NODE_AFFINITY).
|
||||
# CSI_NFS_PROVISIONER_NODE_AFFINITY: "role=nfs-node"
|
||||
# (Optional) CephCSI NFS provisioner tolerations list (overrides CSI_PROVISIONER_TOLERATIONS).
|
||||
# Put here list of taints you want to tolerate in YAML format.
|
||||
# CSI provisioner would be best to start on the same nodes as other ceph daemons.
|
||||
# CSI_NFS_PROVISIONER_TOLERATIONS: |
|
||||
# - key: node.rook.io/nfs
|
||||
# operator: Exists
|
||||
# (Optional) CephCSI NFS plugin NodeAffinity (overrides CSI_PLUGIN_NODE_AFFINITY).
|
||||
# CSI_NFS_PLUGIN_NODE_AFFINITY: "role=nfs-node"
|
||||
# (Optional) CephCSI NFS plugin tolerations list (overrides CSI_PLUGIN_TOLERATIONS).
|
||||
# Put here list of taints you want to tolerate in YAML format.
|
||||
# CSI plugins need to be started on all the nodes where the clients need to mount the storage.
|
||||
# CSI_NFS_PLUGIN_TOLERATIONS: |
|
||||
# - key: node.rook.io/nfs
|
||||
# operator: Exists
|
||||
|
||||
# (Optional) CEPH CSI RBD provisioner resource requirement list, Put here list of resource
|
||||
# requests and limits you want to apply for provisioner pod
|
||||
#CSI_RBD_PROVISIONER_RESOURCE: |
|
||||
# - name : csi-provisioner
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 100m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 200m
|
||||
# - name : csi-resizer
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 100m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 200m
|
||||
# - name : csi-attacher
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 100m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 200m
|
||||
# - name : csi-snapshotter
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 100m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 200m
|
||||
# - name : csi-rbdplugin
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 512Mi
|
||||
# cpu: 250m
|
||||
# limits:
|
||||
# memory: 1Gi
|
||||
# cpu: 500m
|
||||
# - name : csi-omap-generator
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 512Mi
|
||||
# cpu: 250m
|
||||
# limits:
|
||||
# memory: 1Gi
|
||||
# cpu: 500m
|
||||
# - name : liveness-prometheus
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 50m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 100m
|
||||
# (Optional) CEPH CSI RBD plugin resource requirement list, Put here list of resource
|
||||
# requests and limits you want to apply for plugin pod
|
||||
#CSI_RBD_PLUGIN_RESOURCE: |
|
||||
# - name : driver-registrar
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 50m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 100m
|
||||
# - name : csi-rbdplugin
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 512Mi
|
||||
# cpu: 250m
|
||||
# limits:
|
||||
# memory: 1Gi
|
||||
# cpu: 500m
|
||||
# - name : liveness-prometheus
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 50m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 100m
|
||||
# (Optional) CEPH CSI CephFS provisioner resource requirement list, Put here list of resource
|
||||
# requests and limits you want to apply for provisioner pod
|
||||
#CSI_CEPHFS_PROVISIONER_RESOURCE: |
|
||||
# - name : csi-provisioner
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 100m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 200m
|
||||
# - name : csi-resizer
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 100m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 200m
|
||||
# - name : csi-attacher
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 100m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 200m
|
||||
# - name : csi-snapshotter
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 100m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 200m
|
||||
# - name : csi-cephfsplugin
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 512Mi
|
||||
# cpu: 250m
|
||||
# limits:
|
||||
# memory: 1Gi
|
||||
# cpu: 500m
|
||||
# - name : liveness-prometheus
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 50m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 100m
|
||||
# (Optional) CEPH CSI CephFS plugin resource requirement list, Put here list of resource
|
||||
# requests and limits you want to apply for plugin pod
|
||||
#CSI_CEPHFS_PLUGIN_RESOURCE: |
|
||||
# - name : driver-registrar
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 50m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 100m
|
||||
# - name : csi-cephfsplugin
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 512Mi
|
||||
# cpu: 250m
|
||||
# limits:
|
||||
# memory: 1Gi
|
||||
# cpu: 500m
|
||||
# - name : liveness-prometheus
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 50m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 100m
|
||||
|
||||
# (Optional) CEPH CSI NFS provisioner resource requirement list, Put here list of resource
|
||||
# requests and limits you want to apply for provisioner pod
|
||||
# CSI_NFS_PROVISIONER_RESOURCE: |
|
||||
# - name : csi-provisioner
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 100m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 200m
|
||||
# - name : csi-nfsplugin
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 512Mi
|
||||
# cpu: 250m
|
||||
# limits:
|
||||
# memory: 1Gi
|
||||
# cpu: 500m
|
||||
# - name : csi-attacher
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 100m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 200m
|
||||
# (Optional) CEPH CSI NFS plugin resource requirement list, Put here list of resource
|
||||
# requests and limits you want to apply for plugin pod
|
||||
# CSI_NFS_PLUGIN_RESOURCE: |
|
||||
# - name : driver-registrar
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 128Mi
|
||||
# cpu: 50m
|
||||
# limits:
|
||||
# memory: 256Mi
|
||||
# cpu: 100m
|
||||
# - name : csi-nfsplugin
|
||||
# resource:
|
||||
# requests:
|
||||
# memory: 512Mi
|
||||
# cpu: 250m
|
||||
# limits:
|
||||
# memory: 1Gi
|
||||
# cpu: 500m
|
||||
|
||||
# Configure CSI Ceph FS grpc and liveness metrics port
|
||||
# Set to true to enable Ceph CSI liveness container.
|
||||
CSI_ENABLE_LIVENESS: "false"
|
||||
# CSI_CEPHFS_GRPC_METRICS_PORT: "9091"
|
||||
# CSI_CEPHFS_LIVENESS_METRICS_PORT: "9081"
|
||||
# Configure CSI RBD grpc and liveness metrics port
|
||||
# CSI_RBD_GRPC_METRICS_PORT: "9090"
|
||||
# CSI_RBD_LIVENESS_METRICS_PORT: "9080"
|
||||
# CSIADDONS_PORT: "9070"
|
||||
|
||||
# Set CephFS Kernel mount options to use https://docs.ceph.com/en/latest/man/8/mount.ceph/#options
|
||||
# Set to "ms_mode=secure" when connections.encrypted is enabled in CephCluster CR
|
||||
# CSI_CEPHFS_KERNEL_MOUNT_OPTIONS: "ms_mode=secure"
|
||||
|
||||
# Whether the OBC provisioner should watch on the operator namespace or not, if not the namespace of the cluster will be used
|
||||
ROOK_OBC_WATCH_OPERATOR_NAMESPACE: "true"
|
||||
|
||||
# Whether to start the discovery daemon to watch for raw storage devices on nodes in the cluster.
|
||||
# This daemon does not need to run if you are only going to create your OSDs based on StorageClassDeviceSets with PVCs.
|
||||
ROOK_ENABLE_DISCOVERY_DAEMON: "false"
|
||||
# The timeout value (in seconds) of Ceph commands. It should be >= 1. If this variable is not set or is an invalid value, it's default to 15.
|
||||
ROOK_CEPH_COMMANDS_TIMEOUT_SECONDS: "15"
|
||||
# Enable the csi addons sidecar.
|
||||
CSI_ENABLE_CSIADDONS: "false"
|
||||
# Enable watch for faster recovery from rbd rwo node loss
|
||||
ROOK_WATCH_FOR_NODE_FAILURE: "true"
|
||||
# ROOK_CSIADDONS_IMAGE: "quay.io/csiaddons/k8s-sidecar:v0.7.0"
|
||||
# The CSI GRPC timeout value (in seconds). It should be >= 120. If this variable is not set or is an invalid value, it's default to 150.
|
||||
CSI_GRPC_TIMEOUT_SECONDS: "150"
|
||||
|
||||
ROOK_DISABLE_ADMISSION_CONTROLLER: "true"
|
||||
|
||||
# Enable topology based provisioning.
|
||||
CSI_ENABLE_TOPOLOGY: "false"
|
||||
# Domain labels define which node labels to use as domains
|
||||
# for CSI nodeplugins to advertise their domains
|
||||
# NOTE: the value here serves as an example and needs to be
|
||||
# updated with node labels that define domains of interest
|
||||
# CSI_TOPOLOGY_DOMAIN_LABELS: "kubernetes.io/hostname,topology.kubernetes.io/zone,topology.rook.io/rack"
|
||||
|
||||
# Enable read affinity for RBD volumes. Recommended to
|
||||
# set to true if running kernel 5.8 or newer.
|
||||
CSI_ENABLE_READ_AFFINITY: "false"
|
||||
# CRUSH location labels define which node labels to use
|
||||
# as CRUSH location. This should correspond to the values set in
|
||||
# the CRUSH map.
|
||||
# Defaults to all the labels mentioned in
|
||||
# https://rook.io/docs/rook/latest/CRDs/Cluster/ceph-cluster-crd/#osd-topology
|
||||
# CSI_CRUSH_LOCATION_LABELS: "kubernetes.io/hostname,topology.kubernetes.io/zone,topology.rook.io/rack"
|
||||
|
||||
# Whether to skip any attach operation altogether for CephCSI PVCs.
|
||||
# See more details [here](https://kubernetes-csi.github.io/docs/skip-attach.html#skip-attach-with-csi-driver-object).
|
||||
# If set to false it skips the volume attachments and makes the creation of pods using the CephCSI PVC fast.
|
||||
# **WARNING** It's highly discouraged to use this for RWO volumes. for RBD PVC it can cause data corruption,
|
||||
# csi-addons operations like Reclaimspace and PVC Keyrotation will also not be supported if set to false
|
||||
# since we'll have no VolumeAttachments to determine which node the PVC is mounted on.
|
||||
# Refer to this [issue](https://github.com/kubernetes/kubernetes/issues/103305) for more details.
|
||||
CSI_CEPHFS_ATTACH_REQUIRED: "true"
|
||||
CSI_RBD_ATTACH_REQUIRED: "true"
|
||||
CSI_NFS_ATTACH_REQUIRED: "true"
|
||||
# Rook Discover toleration. Will tolerate all taints with all keys.
|
||||
# (Optional) Rook Discover tolerations list. Put here list of taints you want to tolerate in YAML format.
|
||||
# DISCOVER_TOLERATIONS: |
|
||||
# - effect: NoSchedule
|
||||
# key: node-role.kubernetes.io/control-plane
|
||||
# operator: Exists
|
||||
# - effect: NoExecute
|
||||
# key: node-role.kubernetes.io/etcd
|
||||
# operator: Exists
|
||||
# (Optional) Rook Discover priority class name to set on the pod(s)
|
||||
# DISCOVER_PRIORITY_CLASS_NAME: "<PriorityClassName>"
|
||||
# (Optional) Discover Agent NodeAffinity.
|
||||
# DISCOVER_AGENT_NODE_AFFINITY: |
|
||||
# requiredDuringSchedulingIgnoredDuringExecution:
|
||||
# nodeSelectorTerms:
|
||||
# - matchExpressions:
|
||||
# - key: myKey
|
||||
# operator: DoesNotExist
|
||||
# (Optional) Discover Agent Pod Labels.
|
||||
# DISCOVER_AGENT_POD_LABELS: "key1=value1,key2=value2"
|
||||
# Disable automatic orchestration when new devices are discovered
|
||||
ROOK_DISABLE_DEVICE_HOTPLUG: "false"
|
||||
# The duration between discovering devices in the rook-discover daemonset.
|
||||
ROOK_DISCOVER_DEVICES_INTERVAL: "60m"
|
||||
# DISCOVER_DAEMON_RESOURCES: |
|
||||
# - name: DISCOVER_DAEMON_RESOURCES
|
||||
# resources:
|
||||
# limits:
|
||||
# cpu: 500m
|
||||
# memory: 512Mi
|
||||
# requests:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
---
|
||||
# OLM: BEGIN OPERATOR DEPLOYMENT
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: rook-ceph-operator
|
||||
namespace: rook-ceph # namespace:operator
|
||||
labels:
|
||||
operator: rook
|
||||
storage-backend: ceph
|
||||
app.kubernetes.io/name: rook-ceph
|
||||
app.kubernetes.io/instance: rook-ceph
|
||||
app.kubernetes.io/component: rook-ceph-operator
|
||||
app.kubernetes.io/part-of: rook-ceph-operator
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: rook-ceph-operator
|
||||
strategy:
|
||||
type: Recreate
|
||||
replicas: 1
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: rook-ceph-operator
|
||||
spec:
|
||||
serviceAccountName: rook-ceph-system
|
||||
containers:
|
||||
- name: rook-ceph-operator
|
||||
image: rook/ceph:master
|
||||
args: ["ceph", "operator"]
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 2016
|
||||
runAsGroup: 2016
|
||||
capabilities:
|
||||
drop: ["ALL"]
|
||||
volumeMounts:
|
||||
- mountPath: /var/lib/rook
|
||||
name: rook-config
|
||||
- mountPath: /etc/ceph
|
||||
name: default-config-dir
|
||||
- mountPath: /etc/webhook
|
||||
name: webhook-cert
|
||||
ports:
|
||||
- containerPort: 9443
|
||||
name: https-webhook
|
||||
protocol: TCP
|
||||
env:
|
||||
# If the operator should only watch for cluster CRDs in the same namespace, set this to "true".
|
||||
# If this is not set to true, the operator will watch for cluster CRDs in all namespaces.
|
||||
- name: ROOK_CURRENT_NAMESPACE_ONLY
|
||||
value: "false"
|
||||
|
||||
# Whether to start pods as privileged that mount a host path, which includes the Ceph mon and osd pods.
|
||||
# Set this to true if SELinux is enabled (e.g. OpenShift) to workaround the anyuid issues.
|
||||
# For more details see https://github.com/rook/rook/issues/1314#issuecomment-355799641
|
||||
- name: ROOK_HOSTPATH_REQUIRES_PRIVILEGED
|
||||
value: "false"
|
||||
# Provide customised regex as the values using comma. For eg. regex for rbd based volume, value will be like "(?i)rbd[0-9]+".
|
||||
# In case of more than one regex, use comma to separate between them.
|
||||
# Default regex will be "(?i)dm-[0-9]+,(?i)rbd[0-9]+,(?i)nbd[0-9]+"
|
||||
# Add regex expression after putting a comma to blacklist a disk
|
||||
# If value is empty, the default regex will be used.
|
||||
- name: DISCOVER_DAEMON_UDEV_BLACKLIST
|
||||
value: "(?i)dm-[0-9]+,(?i)rbd[0-9]+,(?i)nbd[0-9]+"
|
||||
|
||||
# Time to wait until the node controller will move Rook pods to other
|
||||
# nodes after detecting an unreachable node.
|
||||
# Pods affected by this setting are:
|
||||
# mgr, rbd, mds, rgw, nfs, PVC based mons and osds, and ceph toolbox
|
||||
# The value used in this variable replaces the default value of 300 secs
|
||||
# added automatically by k8s as Toleration for
|
||||
# <node.kubernetes.io/unreachable>
|
||||
# The total amount of time to reschedule Rook pods in healthy nodes
|
||||
# before detecting a <not ready node> condition will be the sum of:
|
||||
# --> node-monitor-grace-period: 40 seconds (k8s kube-controller-manager flag)
|
||||
# --> ROOK_UNREACHABLE_NODE_TOLERATION_SECONDS: 5 seconds
|
||||
- name: ROOK_UNREACHABLE_NODE_TOLERATION_SECONDS
|
||||
value: "5"
|
||||
|
||||
# The name of the node to pass with the downward API
|
||||
- name: NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
# The pod name to pass with the downward API
|
||||
- name: POD_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.name
|
||||
# The pod namespace to pass with the downward API
|
||||
- name: POD_NAMESPACE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.namespace
|
||||
# Recommended resource requests and limits, if desired
|
||||
#resources:
|
||||
# limits:
|
||||
# cpu: 500m
|
||||
# memory: 512Mi
|
||||
# requests:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
|
||||
# Uncomment it to run lib bucket provisioner in multithreaded mode
|
||||
#- name: LIB_BUCKET_PROVISIONER_THREADS
|
||||
# value: "5"
|
||||
|
||||
# Uncomment it to run rook operator on the host network
|
||||
#hostNetwork: true
|
||||
volumes:
|
||||
- name: rook-config
|
||||
emptyDir: {}
|
||||
- name: default-config-dir
|
||||
emptyDir: {}
|
||||
- name: webhook-cert
|
||||
emptyDir: {}
|
||||
# OLM: END OPERATOR DEPLOYMENT
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
nameserver {{ ns1 }}
|
||||
nameserver {{ ns2 }}
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
######################################
|
||||
# Global vars for Cloud-Cluster #
|
||||
######################################
|
||||
|
||||
# Name of the envrionment where to deploy to
|
||||
# - 'production' (for production environment)
|
||||
# - 'staging' (for staging environment)
|
||||
# - 'development' (for development environment)
|
||||
# user can expand sub-domains as the wish
|
||||
# masterDomain: 'example.com'
|
||||
|
||||
masterDomain: masasana.ai
|
||||
deployEnvironment: production
|
||||
subdomains:
|
||||
staging: stg
|
||||
development: dev
|
||||
|
||||
# ToDo: Check if microk8s use different classes
|
||||
# TODO: find usages
|
||||
k8s_ingress_class: nginx
|
||||
|
||||
# Identity Management
|
||||
idm_domain: auth.{{ domain }}
|
||||
idmDomain: "{{ idm_domain }}"
|
||||
|
||||
############################################################################
|
||||
# computed varibles -> please don't modify by hand! #
|
||||
############################################################################
|
||||
domain: '{% if deployEnvironment != "production" %}{{ subdomains[deployEnvironment] }}.{% endif %}{{ masterDomain }}'
|
||||
kubernetesApi: '{{ hostvars[groups["kubernetes_api"][0]].ansible_host }}'
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
######################################
|
||||
# cert-manager vars for cluster #
|
||||
######################################
|
||||
namespace: "cert-manager"
|
||||
email: wefers@masasana.ai
|
||||
|
||||
helm:
|
||||
repoUrl: "https://charts.jetstack.io"
|
||||
chart: "cert-manager/cert-manager"
|
||||
releaseName: "cert-manager"
|
||||
chartVersion: "1.14.4"
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
######################################
|
||||
# Global vars for Cloud-Cluster #
|
||||
######################################
|
||||
|
||||
# Debug mode for test_task.yml
|
||||
debug: False
|
||||
|
||||
# Cluster name
|
||||
kubernetesClusterName: 'marcel-stg'
|
||||
|
||||
# Cluster enviroment
|
||||
kubernetesVersion: '1.29.3'
|
||||
|
||||
# Cluster Type. Possible types:
|
||||
# - vanilla # plain kubernetes (HA)
|
||||
kubernetesClusterType: 'vanilla'
|
||||
|
||||
# decide if kube-vip should be installed?
|
||||
installKubeVip: False
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
######################################
|
||||
# helm3 vars for cluster #
|
||||
######################################
|
||||
|
||||
# Helm install script path
|
||||
helm_install_script: 'https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3'
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
######################################
|
||||
# Kubernetes ingress controller vars #
|
||||
######################################
|
||||
namespace: 'ingress-nginx'
|
||||
|
||||
helm:
|
||||
repoUrl: 'https://kubernetes.github.io/ingress-nginx'
|
||||
chart: 'ingress-nginx/ingress-nginx'
|
||||
releaseName: 'ingress-nginx'
|
||||
chartVersion: '4.10.0'
|
||||
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
######################################
|
||||
# kube-vip vars for virtual IP #
|
||||
######################################
|
||||
|
||||
# KubeVIP virtual IP settings
|
||||
virtual_ip: '10.42.0.100'
|
||||
interface: 'enp7s0'
|
||||
kube_vip_version: 'v0.5.0'
|
||||
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
######################################
|
||||
# System vars for Linux Systems #
|
||||
######################################
|
||||
|
||||
# CRI-O container engine + version
|
||||
crio_version: '1.28'
|
||||
|
||||
calico_main_interface: 'interface=enp7s0'
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
######################################
|
||||
# MetalLB Configuration #
|
||||
######################################
|
||||
namespace: "metallb"
|
||||
|
||||
helm:
|
||||
repoUrl: "https://metallb.github.io/metallb"
|
||||
chart: "metallb/metallb"
|
||||
releaseName: "metallb"
|
||||
chartVersion: "v0.14.4"
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
######################################
|
||||
# rook-ceph vars for storage #
|
||||
######################################
|
||||
namespace: 'rook-ceph'
|
||||
|
||||
CLIENT_CHECKER_NAME: 'client.healthchecker'
|
||||
RGW_POOL_PREFIX: 'default'
|
||||
rook_external:
|
||||
csi_rbd_provisioner_secret: ""
|
||||
csi_rbd_node_secret: ""
|
||||
|
||||
|
||||
# possible types:
|
||||
# - dev (2 mons/ 4 osds - cloud be changed in the rook cluster-test.yml) # ToDO: set variables
|
||||
# - prod (requires a minimum set of 3 mons)
|
||||
rook_cluster_type: 'dev'
|
||||
|
||||
# prod an dev variables are fixed. Please do not change the name
|
||||
rook_cluster_configs:
|
||||
dev:
|
||||
name: 'my-cluster'
|
||||
mons: 1
|
||||
osds: 3
|
||||
mgrs: 1
|
||||
prod:
|
||||
name: 'rook-ceph'
|
||||
mons: 3
|
||||
mgrs: 2
|
||||
|
||||
|
||||
############################################################################
|
||||
# computed varibles -> please don't modify by hand! #
|
||||
############################################################################
|
||||
rook_cluster_config: "{{ rook_cluster_configs[rook_cluster_type] }}"
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
######################################
|
||||
# System vars for Linux Systems #
|
||||
######################################
|
||||
|
||||
# ToDo: Linux base hardening
|
||||
# Nameserver IPv4 Addresses
|
||||
# /etc/resolv.conf
|
||||
#ns1: '8.8.8.8'
|
||||
#ns2: '4.4.4.4'
|
||||
|
||||
# package versions
|
||||
k8s_pip_version: '25.3.0'
|
||||
|
||||
# Necessary System packages we need to intall
|
||||
# ToDo: Pin packages to fix version; test split string
|
||||
# ToDo: descripe version split
|
||||
sys_packages: [ 'curl', 'nano', 'python3', 'python3-pip', 'htop', 'lsb-release', 'git' ]
|
||||
k8s_sys_packages: [ 'open-iscsi', 'apt-transport-https', 'ca-certificates', 'gnupg' ]
|
||||
pip_packages: ['PyYAML', 'jmespath', 'kubernetes>={{ k8s_pip_version }},<{{ (k8s_pip_version | string).split(".")[0] | int + 1 }}']
|
||||
Loading…
Reference in New Issue