apps/ceph/kubespray/playbooks/roles/install/templates/cluster.yaml.j2

   1 # ============LICENSE_START=======================================================
   2 #  Copyright (C) 2019 The Nordix Foundation. All rights reserved.
   3 # ================================================================================
   4 # Licensed under the Apache License, Version 2.0 (the "License");
   5 # you may not use this file except in compliance with the License.
   6 # You may obtain a copy of the License at
   7 #
   8 #      http://www.apache.org/licenses/LICENSE-2.0
   9 #
  10 # Unless required by applicable law or agreed to in writing, software
  11 # distributed under the License is distributed on an "AS IS" BASIS,
  12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 # See the License for the specific language governing permissions and
  14 # limitations under the License.
  15 #
  16 # SPDX-License-Identifier: Apache-2.0
  17 # ============LICENSE_END=========================================================
  18
  19 #################################################################################################################
  20 # Define the settings for the rook-ceph cluster with common settings for a production cluster.
  21 # All nodes with available raw devices will be used for the Ceph cluster. At least three nodes are required
  22 # in this example. See the documentation for more details on storage settings available.
  23
  24 # For example, to create the cluster:
  25 #   kubectl create -f crds.yaml -f common.yaml -f operator.yaml
  26 #   kubectl create -f cluster.yaml
  27 #################################################################################################################
  28
  29 apiVersion: ceph.rook.io/v1
  30 kind: CephCluster
  31 metadata:
  32   name: rook-ceph
  33   namespace: "{{ rook_namespace }}" # namespace:cluster
  34 spec:
  35   cephVersion:
  36     # The container image used to launch the Ceph daemon pods (mon, mgr, osd, mds, rgw).
  37     # v13 is mimic, v14 is nautilus, and v15 is octopus.
  38     # RECOMMENDATION: In production, use a specific version tag instead of the general v14 flag, which pulls the latest release and could result in different
  39     # versions running within the cluster. See tags available at https://hub.docker.com/r/ceph/ceph/tags/.
  40     # If you want to be more precise, you can always use a timestamp tag such ceph/ceph:v15.2.9-20201217
  41     # This tag might not contain a new Ceph version, just security fixes from the underlying operating system, which will reduce vulnerabilities
  42     image: "{{ ceph_repository }}:{{ ceph_version }}"
  43     # Whether to allow unsupported versions of Ceph. Currently `nautilus` and `octopus` are supported.
  44     # Future versions such as `pacific` would require this to be set to `true`.
  45     # Do not set to true in production.
  46     allowUnsupported: false
  47   # The path on the host where configuration files will be persisted. Must be specified.
  48   # Important: if you reinstall the cluster, make sure you delete this directory from each host or else the mons will fail to start on the new cluster.
  49   # In Minikube, the '/data' directory is configured to persist across reboots. Use "/data/rook" in Minikube environment.
  50   dataDirHostPath: "{{ rook_data_dir_path }}"
  51   # Whether or not upgrade should continue even if a check fails
  52   # This means Ceph's status could be degraded and we don't recommend upgrading but you might decide otherwise
  53   # Use at your OWN risk
  54   # To understand Rook's upgrade process of Ceph, read https://rook.io/docs/rook/master/ceph-upgrade.html#ceph-version-upgrades
  55   skipUpgradeChecks: false
  56   # Whether or not continue if PGs are not clean during an upgrade
  57   continueUpgradeAfterChecksEvenIfNotHealthy: false
  58   # WaitTimeoutForHealthyOSDInMinutes defines the time (in minutes) the operator would wait before an OSD can be stopped for upgrade or restart.
  59   # If the timeout exceeds and OSD is not ok to stop, then the operator would skip upgrade for the current OSD and proceed with the next one
  60   # if `continueUpgradeAfterChecksEvenIfNotHealthy` is `false`. If `continueUpgradeAfterChecksEvenIfNotHealthy` is `true`, then opertor would
  61   # continue with the upgrade of an OSD even if its not ok to stop after the timeout. This timeout won't be applied if `skipUpgradeChecks` is `true`.
  62   # The default wait timeout is 10 minutes.
  63   waitTimeoutForHealthyOSDInMinutes: 10
  64   mon:
  65     # Set the number of mons to be started. Must be an odd number, and is generally recommended to be 3.
  66     count: {{ rook_ceph_mon_count }}
  67     # The mons should be on unique nodes. For production, at least 3 nodes are recommended for this reason.
  68     # Mons should only be allowed on the same node for test environments where data loss is acceptable.
  69     allowMultiplePerNode: true
  70   mgr:
  71     modules: []
  72     # Several modules should not need to be included in this list. The "dashboard" and "monitoring" modules
  73     # are already enabled by other settings in the cluster CR.
  74     # - name: pg_autoscaler
  75     #   enabled: true
  76   # enable the ceph dashboard for viewing cluster status
  77   dashboard:
  78     enabled: {{ rook_ceph_dashboard_enabled }}
  79     # serve the dashboard under a subpath (useful when you are accessing the dashboard via a reverse proxy)
  80     # urlPrefix: /ceph-dashboard
  81     # serve the dashboard at the given port.
  82     # port: 8443
  83     # serve the dashboard using SSL
  84     ssl: true
  85   # enable prometheus alerting for cluster
  86   monitoring:
  87     # requires Prometheus to be pre-installed
  88     enabled: false
  89     # namespace to deploy prometheusRule in. If empty, namespace of the cluster will be used.
  90     # Recommended:
  91     # If you have a single rook-ceph cluster, set the rulesNamespace to the same namespace as the cluster or keep it empty.
  92     # If you have multiple rook-ceph clusters in the same k8s cluster, choose the same namespace (ideally, namespace with prometheus
  93     # deployed) to set rulesNamespace for all the clusters. Otherwise, you will get duplicate alerts with multiple alert definitions.
  94     rulesNamespace: "{{ rook_namespace }}"
  95   network:
  96     hostNetwork: {{ rook_use_host_network }}
  97   # enable the crash collector for ceph daemon crash collection
  98   crashCollector:
  99     disable: {{ rook_ceph_crashcollector_disable }}
 100   # automate [data cleanup process](https://github.com/rook/rook/blob/master/Documentation/ceph-teardown.md#delete-the-data-on-hosts) in cluster destruction.
 101   cleanupPolicy:
 102     # Since cluster cleanup is destructive to data, confirmation is required.
 103     # To destroy all Rook data on hosts during uninstall, confirmation must be set to "yes-really-destroy-data".
 104     # This value should only be set when the cluster is about to be deleted. After the confirmation is set,
 105     # Rook will immediately stop configuring the cluster and only wait for the delete command.
 106     # If the empty string is set, Rook will not destroy any data on hosts during uninstall.
 107     confirmation: ""
 108     # sanitizeDisks represents settings for sanitizing OSD disks on cluster deletion
 109     sanitizeDisks:
 110       # method indicates if the entire disk should be sanitized or simply ceph's metadata
 111       # in both case, re-install is possible
 112       # possible choices are 'complete' or 'quick' (default)
 113       method: quick
 114       # dataSource indicate where to get random bytes from to write on the disk
 115       # possible choices are 'zero' (default) or 'random'
 116       # using random sources will consume entropy from the system and will take much more time then the zero source
 117       dataSource: zero
 118       # iteration overwrite N times instead of the default (1)
 119       # takes an integer value
 120       iteration: 1
 121     # allowUninstallWithVolumes defines how the uninstall should be performed
 122     # If set to true, cephCluster deletion does not wait for the PVs to be deleted.
 123     allowUninstallWithVolumes: false
 124   # To control where various services will be scheduled by kubernetes, use the placement configuration sections below.
 125   # The example under 'all' would have all services scheduled on kubernetes nodes labeled with 'role=storage-node' and
 126   # tolerate taints with a key of 'storage-node'.
 127   placement:
 128     all:
 129       nodeAffinity:
 130         requiredDuringSchedulingIgnoredDuringExecution:
 131           nodeSelectorTerms:
 132           - matchExpressions:
 133             - key: "{{ rook_storage_label }}"
 134               operator: In
 135               values:
 136               - "true"
 137 #      podAffinity:
 138 #      podAntiAffinity:
 139 #      topologySpreadConstraints:
 140       tolerations:
 141       - key: "{{ rook_storage_label }}"
 142         operator: Exists
 143 # The above placement information can also be specified for mon, osd, and mgr components
 144 #    mon:
 145 # Monitor deployments may contain an anti-affinity rule for avoiding monitor
 146 # collocation on the same node. This is a required rule when host network is used
 147 # or when AllowMultiplePerNode is false. Otherwise this anti-affinity rule is a
 148 # preferred rule with weight: 50.
 149 #    osd:
 150 #    mgr:
 151 #    cleanup:
 152   annotations:
 153 #    all:
 154 #    mon:
 155 #    osd:
 156 #    cleanup:
 157 #    prepareosd:
 158 # If no mgr annotations are set, prometheus scrape annotations will be set by default.
 159 #    mgr:
 160   labels:
 161 #    all:
 162 #    mon:
 163 #    osd:
 164 #    cleanup:
 165 #    mgr:
 166 #    prepareosd:
 167   resources:
 168 # The requests and limits set here, allow the mgr pod to use half of one CPU core and 1 gigabyte of memory
 169 #    mgr:
 170 #      limits:
 171 #        cpu: "500m"
 172 #        memory: "1024Mi"
 173 #      requests:
 174 #        cpu: "500m"
 175 #        memory: "1024Mi"
 176 # The above example requests/limits can also be added to the mon and osd components
 177 #    mon:
 178 #    osd:
 179 #    prepareosd:
 180 #    crashcollector:
 181 #    logcollector:
 182 #    cleanup:
 183   # The option to automatically remove OSDs that are out and are safe to destroy.
 184   removeOSDsIfOutAndSafeToRemove: false
 185   storage: # cluster level storage configuration and selection
 186     useAllNodes: true
 187     useAllDevices: false
 188     deviceFilter: "{{ rook_node_device_filter }}"
 189     location:
 190     config:
 191       # crushRoot: "custom-root" # specify a non-default root label for the CRUSH map
 192       # metadataDevice: "md0" # specify a non-rotational storage so ceph-volume will use it as block db device of bluestore.
 193       databaseSizeMB: "1024" # uncomment if the disks are smaller than 100 GB
 194       journalSizeMB: "1024"  # uncomment if the disks are 20 GB or smaller
 195       osdsPerDevice: "1" # this value can be overridden at the node or device level
 196       # encryptedDevice: "true" # the default value for this option is "false"
 197     #directories:
 198     #- path: "{{ rook_storage_dir_path }}"
 199 # Individual nodes and their config can be specified as well, but 'useAllNodes' above must be set to false. Then, only the named
 200 # nodes below will be used as storage resources.  Each node's 'name' field should match their 'kubernetes.io/hostname' label.
 201 #    nodes:
 202 #    - name: "172.17.4.201"
 203 #      devices: # specific devices to use for storage can be specified for each node
 204 #      - name: "sdb"
 205 #      - name: "nvme01" # multiple osds can be created on high performance devices
 206 #        config:
 207 #          osdsPerDevice: "5"
 208 #      - name: "/dev/disk/by-id/ata-ST4000DM004-XXXX" # devices can be specified using full udev paths
 209 #      config: # configuration can be specified at the node level which overrides the cluster level config
 210 #        storeType: filestore # this option is osbsolete and only provided as an example
 211 #    - name: "172.17.4.301"
 212 #      deviceFilter: "^vdb"
 213   # The section for configuring management of daemon disruptions during upgrade or fencing.
 214   disruptionManagement:
 215     # If true, the operator will create and manage PodDisruptionBudgets for OSD, Mon, RGW, and MDS daemons. OSD PDBs are managed dynamically
 216     # via the strategy outlined in the [design](https://github.com/rook/rook/blob/master/design/ceph/ceph-managed-disruptionbudgets.md). The operator will
 217     # block eviction of OSDs by default and unblock them safely when drains are detected.
 218     managePodBudgets: false
 219     # A duration in minutes that determines how long an entire failureDomain like `region/zone/host` will be held in `noout` (in addition to the
 220     # default DOWN/OUT interval) when it is draining. This is only relevant when  `managePodBudgets` is `true`. The default value is `30` minutes.
 221     osdMaintenanceTimeout: 30
 222     # A duration in minutes that the operator will wait for the placement groups to become healthy (active+clean) after a drain was completed and OSDs came back up.
 223     # Operator will continue with the next drain if the timeout exceeds. It only works if `managePodBudgets` is `true`.
 224     # No values or 0 means that the operator will wait until the placement groups are healthy before unblocking the next drain.
 225     pgHealthCheckTimeout: 0
 226     # If true, the operator will create and manage MachineDisruptionBudgets to ensure OSDs are only fenced when the cluster is healthy.
 227     # Only available on OpenShift.
 228     manageMachineDisruptionBudgets: false
 229     # Namespace in which to watch for the MachineDisruptionBudgets.
 230     machineDisruptionBudgetNamespace: openshift-machine-api
 231
 232   # healthChecks
 233   # Valid values for daemons are 'mon', 'osd', 'status'
 234   healthCheck:
 235     daemonHealth:
 236       mon:
 237         disabled: false
 238         interval: 45s
 239       osd:
 240         disabled: false
 241         interval: 60s
 242       status:
 243         disabled: false
 244         interval: 60s
 245     # Change pod liveness probe, it works for all mon,mgr,osd daemons
 246     livenessProbe:
 247       mon:
 248         disabled: false
 249       mgr:
 250         disabled: false
 251       osd:
 252         disabled: false