# SPDX-License-Identifier: Apache-2.0
# ============LICENSE_END=========================================================
+#################################################################################################################
+# Define the settings for the rook-ceph cluster with common settings for a production cluster.
+# All nodes with available raw devices will be used for the Ceph cluster. At least three nodes are required
+# in this example. See the documentation for more details on storage settings available.
+
+# For example, to create the cluster:
+# kubectl create -f crds.yaml -f common.yaml -f operator.yaml
+# kubectl create -f cluster.yaml
+#################################################################################################################
+
apiVersion: ceph.rook.io/v1
kind: CephCluster
metadata:
name: rook-ceph
- namespace: "{{ rook_namespace }}"
+ namespace: "{{ rook_namespace }}" # namespace:cluster
spec:
cephVersion:
# The container image used to launch the Ceph daemon pods (mon, mgr, osd, mds, rgw).
- # v12 is luminous, v13 is mimic, and v14 is nautilus.
- # RECOMMENDATION: In production, use a specific version tag instead of the general v13 flag, which pulls the latest release and could result in different
+ # v13 is mimic, v14 is nautilus, and v15 is octopus.
+ # RECOMMENDATION: In production, use a specific version tag instead of the general v14 flag, which pulls the latest release and could result in different
# versions running within the cluster. See tags available at https://hub.docker.com/r/ceph/ceph/tags/.
+ # If you want to be more precise, you can always use a timestamp tag such ceph/ceph:v15.2.9-20201217
+ # This tag might not contain a new Ceph version, just security fixes from the underlying operating system, which will reduce vulnerabilities
image: "{{ ceph_repository }}:{{ ceph_version }}"
- # Whether to allow unsupported versions of Ceph. Currently only luminous and mimic are supported.
- # After nautilus is released, Rook will be updated to support nautilus.
+ # Whether to allow unsupported versions of Ceph. Currently `nautilus` and `octopus` are supported.
+ # Future versions such as `pacific` would require this to be set to `true`.
# Do not set to true in production.
allowUnsupported: false
- # The path on the host where configuration files will be persisted. If not specified, a kubernetes emptyDir will be created (not recommended).
+ # The path on the host where configuration files will be persisted. Must be specified.
# Important: if you reinstall the cluster, make sure you delete this directory from each host or else the mons will fail to start on the new cluster.
# In Minikube, the '/data' directory is configured to persist across reboots. Use "/data/rook" in Minikube environment.
dataDirHostPath: "{{ rook_data_dir_path }}"
# Use at your OWN risk
# To understand Rook's upgrade process of Ceph, read https://rook.io/docs/rook/master/ceph-upgrade.html#ceph-version-upgrades
skipUpgradeChecks: false
- # set the amount of mons to be started
+ # Whether or not continue if PGs are not clean during an upgrade
+ continueUpgradeAfterChecksEvenIfNotHealthy: false
+ # WaitTimeoutForHealthyOSDInMinutes defines the time (in minutes) the operator would wait before an OSD can be stopped for upgrade or restart.
+ # If the timeout exceeds and OSD is not ok to stop, then the operator would skip upgrade for the current OSD and proceed with the next one
+ # if `continueUpgradeAfterChecksEvenIfNotHealthy` is `false`. If `continueUpgradeAfterChecksEvenIfNotHealthy` is `true`, then opertor would
+ # continue with the upgrade of an OSD even if its not ok to stop after the timeout. This timeout won't be applied if `skipUpgradeChecks` is `true`.
+ # The default wait timeout is 10 minutes.
+ waitTimeoutForHealthyOSDInMinutes: 10
mon:
- count: 3
+ # Set the number of mons to be started. Must be an odd number, and is generally recommended to be 3.
+ count: {{ rook_ceph_mon_count }}
+ # The mons should be on unique nodes. For production, at least 3 nodes are recommended for this reason.
+ # Mons should only be allowed on the same node for test environments where data loss is acceptable.
allowMultiplePerNode: true
mgr:
- modules:
+ modules: []
# Several modules should not need to be included in this list. The "dashboard" and "monitoring" modules
- # are already enabled by other settings in the cluster CR and the "rook" module is always enabled.
+ # are already enabled by other settings in the cluster CR.
# - name: pg_autoscaler
# enabled: true
# enable the ceph dashboard for viewing cluster status
# port: 8443
# serve the dashboard using SSL
ssl: true
+ # enable prometheus alerting for cluster
monitoring:
# requires Prometheus to be pre-installed
enabled: false
# If you have a single rook-ceph cluster, set the rulesNamespace to the same namespace as the cluster or keep it empty.
# If you have multiple rook-ceph clusters in the same k8s cluster, choose the same namespace (ideally, namespace with prometheus
# deployed) to set rulesNamespace for all the clusters. Otherwise, you will get duplicate alerts with multiple alert definitions.
- rulesNamespace: {{ rook_namespace }}
+ rulesNamespace: "{{ rook_namespace }}"
network:
- # toggle to use hostNetwork
hostNetwork: {{ rook_use_host_network }}
- rbdMirroring:
- # The number of daemons that will perform the rbd mirroring.
- # rbd mirroring must be configured with "rbd mirror" from the rook toolbox.
- workers: 0
+ # enable the crash collector for ceph daemon crash collection
+ crashCollector:
+ disable: {{ rook_ceph_crashcollector_disable }}
+ # automate [data cleanup process](https://github.com/rook/rook/blob/master/Documentation/ceph-teardown.md#delete-the-data-on-hosts) in cluster destruction.
+ cleanupPolicy:
+ # Since cluster cleanup is destructive to data, confirmation is required.
+ # To destroy all Rook data on hosts during uninstall, confirmation must be set to "yes-really-destroy-data".
+ # This value should only be set when the cluster is about to be deleted. After the confirmation is set,
+ # Rook will immediately stop configuring the cluster and only wait for the delete command.
+ # If the empty string is set, Rook will not destroy any data on hosts during uninstall.
+ confirmation: ""
+ # sanitizeDisks represents settings for sanitizing OSD disks on cluster deletion
+ sanitizeDisks:
+ # method indicates if the entire disk should be sanitized or simply ceph's metadata
+ # in both case, re-install is possible
+ # possible choices are 'complete' or 'quick' (default)
+ method: quick
+ # dataSource indicate where to get random bytes from to write on the disk
+ # possible choices are 'zero' (default) or 'random'
+ # using random sources will consume entropy from the system and will take much more time then the zero source
+ dataSource: zero
+ # iteration overwrite N times instead of the default (1)
+ # takes an integer value
+ iteration: 1
+ # allowUninstallWithVolumes defines how the uninstall should be performed
+ # If set to true, cephCluster deletion does not wait for the PVs to be deleted.
+ allowUninstallWithVolumes: false
# To control where various services will be scheduled by kubernetes, use the placement configuration sections below.
# The example under 'all' would have all services scheduled on kubernetes nodes labeled with 'role=storage-node' and
# tolerate taints with a key of 'storage-node'.
-# placement:
-# all:
-# nodeAffinity:
-# requiredDuringSchedulingIgnoredDuringExecution:
-# nodeSelectorTerms:
-# - matchExpressions:
-# - key: role
-# operator: In
-# values:
-# - storage-node
+ placement:
+ all:
+ nodeAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ nodeSelectorTerms:
+ - matchExpressions:
+ - key: "{{ rook_storage_label }}"
+ operator: In
+ values:
+ - "true"
# podAffinity:
# podAntiAffinity:
-# tolerations:
-# - key: storage-node
-# operator: Exists
+# topologySpreadConstraints:
+ tolerations:
+ - key: "{{ rook_storage_label }}"
+ operator: Exists
# The above placement information can also be specified for mon, osd, and mgr components
# mon:
# Monitor deployments may contain an anti-affinity rule for avoiding monitor
# preferred rule with weight: 50.
# osd:
# mgr:
+# cleanup:
annotations:
# all:
# mon:
# osd:
+# cleanup:
+# prepareosd:
# If no mgr annotations are set, prometheus scrape annotations will be set by default.
-# mgr:
+# mgr:
+ labels:
+# all:
+# mon:
+# osd:
+# cleanup:
+# mgr:
+# prepareosd:
resources:
# The requests and limits set here, allow the mgr pod to use half of one CPU core and 1 gigabyte of memory
# mgr:
# The above example requests/limits can also be added to the mon and osd components
# mon:
# osd:
+# prepareosd:
+# crashcollector:
+# logcollector:
+# cleanup:
+ # The option to automatically remove OSDs that are out and are safe to destroy.
+ removeOSDsIfOutAndSafeToRemove: false
storage: # cluster level storage configuration and selection
useAllNodes: true
useAllDevices: false
+ deviceFilter: "{{ rook_node_device_filter }}"
location:
config:
- # The default and recommended storeType is dynamically set to bluestore for devices and filestore for directories.
- # Set the storeType explicitly only if it is required not to use the default.
- # storeType: bluestore
- databaseSizeMB: "1024" # this value can be removed for environments with normal sized disks (100 GB or larger)
- journalSizeMB: "1024" # this value can be removed for environments with normal sized disks (20 GB or larger)
+ # crushRoot: "custom-root" # specify a non-default root label for the CRUSH map
+ # metadataDevice: "md0" # specify a non-rotational storage so ceph-volume will use it as block db device of bluestore.
+ databaseSizeMB: "1024" # uncomment if the disks are smaller than 100 GB
+ journalSizeMB: "1024" # uncomment if the disks are 20 GB or smaller
osdsPerDevice: "1" # this value can be overridden at the node or device level
-# Cluster level list of directories to use for storage. These values will be set for all nodes that have no `directories` set.
- directories:
- - path: "{{ rook_storage_dir_path }}"
+ # encryptedDevice: "true" # the default value for this option is "false"
+ #directories:
+ #- path: "{{ rook_storage_dir_path }}"
# Individual nodes and their config can be specified as well, but 'useAllNodes' above must be set to false. Then, only the named
# nodes below will be used as storage resources. Each node's 'name' field should match their 'kubernetes.io/hostname' label.
# nodes:
-# - name: "172.17.4.101"
-# directories: # specific directories to use for storage can be specified for each node
-# - path: "/rook/storage-dir"
-# resources:
-# limits:
-# cpu: "500m"
-# memory: "1024Mi"
-# requests:
-# cpu: "500m"
-# memory: "1024Mi"
# - name: "172.17.4.201"
# devices: # specific devices to use for storage can be specified for each node
# - name: "sdb"
# - name: "nvme01" # multiple osds can be created on high performance devices
# config:
# osdsPerDevice: "5"
+# - name: "/dev/disk/by-id/ata-ST4000DM004-XXXX" # devices can be specified using full udev paths
# config: # configuration can be specified at the node level which overrides the cluster level config
-# storeType: filestore
+# storeType: filestore # this option is osbsolete and only provided as an example
# - name: "172.17.4.301"
-# deviceFilter: ^vdb
+# deviceFilter: "^vdb"
# The section for configuring management of daemon disruptions during upgrade or fencing.
disruptionManagement:
# If true, the operator will create and manage PodDisruptionBudgets for OSD, Mon, RGW, and MDS daemons. OSD PDBs are managed dynamically
- # via the strategy outlined in the [design](https://github.com/rook/rook/blob/master/design/ceph-managed-disruptionbudgets.md). The operator will
+ # via the strategy outlined in the [design](https://github.com/rook/rook/blob/master/design/ceph/ceph-managed-disruptionbudgets.md). The operator will
# block eviction of OSDs by default and unblock them safely when drains are detected.
managePodBudgets: false
# A duration in minutes that determines how long an entire failureDomain like `region/zone/host` will be held in `noout` (in addition to the
# default DOWN/OUT interval) when it is draining. This is only relevant when `managePodBudgets` is `true`. The default value is `30` minutes.
osdMaintenanceTimeout: 30
+ # A duration in minutes that the operator will wait for the placement groups to become healthy (active+clean) after a drain was completed and OSDs came back up.
+ # Operator will continue with the next drain if the timeout exceeds. It only works if `managePodBudgets` is `true`.
+ # No values or 0 means that the operator will wait until the placement groups are healthy before unblocking the next drain.
+ pgHealthCheckTimeout: 0
# If true, the operator will create and manage MachineDisruptionBudgets to ensure OSDs are only fenced when the cluster is healthy.
# Only available on OpenShift.
manageMachineDisruptionBudgets: false
# Namespace in which to watch for the MachineDisruptionBudgets.
machineDisruptionBudgetNamespace: openshift-machine-api
+
+ # healthChecks
+ # Valid values for daemons are 'mon', 'osd', 'status'
+ healthCheck:
+ daemonHealth:
+ mon:
+ disabled: false
+ interval: 45s
+ osd:
+ disabled: false
+ interval: 60s
+ status:
+ disabled: false
+ interval: 60s
+ # Change pod liveness probe, it works for all mon,mgr,osd daemons
+ livenessProbe:
+ mon:
+ disabled: false
+ mgr:
+ disabled: false
+ osd:
+ disabled: false