X-Git-Url: https://gerrit.nordix.org/gitweb?p=infra%2Fstack%2Fkubernetes.git;a=blobdiff_plain;f=apps%2Fceph%2Fkubespray%2Fplaybooks%2Froles%2Finstall%2Ftemplates%2Fcluster.yaml.j2;fp=apps%2Fceph%2Fkubespray%2Fplaybooks%2Froles%2Finstall%2Ftemplates%2Fcluster.yaml.j2;h=36d29ba00d43cce6424d81a44cee727899d1a14c;hp=60c6665b88e11aa9c0d56c58c2e6d39c9d3e7703;hb=4876da5b9e3d8260c5b33352d57573129e8f6ff3;hpb=de35de0a2dc766d50e3548397bfb075e658f44ac diff --git a/apps/ceph/kubespray/playbooks/roles/install/templates/cluster.yaml.j2 b/apps/ceph/kubespray/playbooks/roles/install/templates/cluster.yaml.j2 index 60c6665..36d29ba 100644 --- a/apps/ceph/kubespray/playbooks/roles/install/templates/cluster.yaml.j2 +++ b/apps/ceph/kubespray/playbooks/roles/install/templates/cluster.yaml.j2 @@ -16,23 +16,35 @@ # SPDX-License-Identifier: Apache-2.0 # ============LICENSE_END========================================================= +################################################################################################################# +# Define the settings for the rook-ceph cluster with common settings for a production cluster. +# All nodes with available raw devices will be used for the Ceph cluster. At least three nodes are required +# in this example. See the documentation for more details on storage settings available. + +# For example, to create the cluster: +# kubectl create -f crds.yaml -f common.yaml -f operator.yaml +# kubectl create -f cluster.yaml +################################################################################################################# + apiVersion: ceph.rook.io/v1 kind: CephCluster metadata: name: rook-ceph - namespace: "{{ rook_namespace }}" + namespace: "{{ rook_namespace }}" # namespace:cluster spec: cephVersion: # The container image used to launch the Ceph daemon pods (mon, mgr, osd, mds, rgw). - # v12 is luminous, v13 is mimic, and v14 is nautilus. - # RECOMMENDATION: In production, use a specific version tag instead of the general v13 flag, which pulls the latest release and could result in different + # v13 is mimic, v14 is nautilus, and v15 is octopus. + # RECOMMENDATION: In production, use a specific version tag instead of the general v14 flag, which pulls the latest release and could result in different # versions running within the cluster. See tags available at https://hub.docker.com/r/ceph/ceph/tags/. + # If you want to be more precise, you can always use a timestamp tag such ceph/ceph:v15.2.9-20201217 + # This tag might not contain a new Ceph version, just security fixes from the underlying operating system, which will reduce vulnerabilities image: "{{ ceph_repository }}:{{ ceph_version }}" - # Whether to allow unsupported versions of Ceph. Currently only luminous and mimic are supported. - # After nautilus is released, Rook will be updated to support nautilus. + # Whether to allow unsupported versions of Ceph. Currently `nautilus` and `octopus` are supported. + # Future versions such as `pacific` would require this to be set to `true`. # Do not set to true in production. allowUnsupported: false - # The path on the host where configuration files will be persisted. If not specified, a kubernetes emptyDir will be created (not recommended). + # The path on the host where configuration files will be persisted. Must be specified. # Important: if you reinstall the cluster, make sure you delete this directory from each host or else the mons will fail to start on the new cluster. # In Minikube, the '/data' directory is configured to persist across reboots. Use "/data/rook" in Minikube environment. dataDirHostPath: "{{ rook_data_dir_path }}" @@ -41,14 +53,24 @@ spec: # Use at your OWN risk # To understand Rook's upgrade process of Ceph, read https://rook.io/docs/rook/master/ceph-upgrade.html#ceph-version-upgrades skipUpgradeChecks: false - # set the amount of mons to be started + # Whether or not continue if PGs are not clean during an upgrade + continueUpgradeAfterChecksEvenIfNotHealthy: false + # WaitTimeoutForHealthyOSDInMinutes defines the time (in minutes) the operator would wait before an OSD can be stopped for upgrade or restart. + # If the timeout exceeds and OSD is not ok to stop, then the operator would skip upgrade for the current OSD and proceed with the next one + # if `continueUpgradeAfterChecksEvenIfNotHealthy` is `false`. If `continueUpgradeAfterChecksEvenIfNotHealthy` is `true`, then opertor would + # continue with the upgrade of an OSD even if its not ok to stop after the timeout. This timeout won't be applied if `skipUpgradeChecks` is `true`. + # The default wait timeout is 10 minutes. + waitTimeoutForHealthyOSDInMinutes: 10 mon: - count: 3 + # Set the number of mons to be started. Must be an odd number, and is generally recommended to be 3. + count: {{ rook_ceph_mon_count }} + # The mons should be on unique nodes. For production, at least 3 nodes are recommended for this reason. + # Mons should only be allowed on the same node for test environments where data loss is acceptable. allowMultiplePerNode: true mgr: - modules: + modules: [] # Several modules should not need to be included in this list. The "dashboard" and "monitoring" modules - # are already enabled by other settings in the cluster CR and the "rook" module is always enabled. + # are already enabled by other settings in the cluster CR. # - name: pg_autoscaler # enabled: true # enable the ceph dashboard for viewing cluster status @@ -60,6 +82,7 @@ spec: # port: 8443 # serve the dashboard using SSL ssl: true + # enable prometheus alerting for cluster monitoring: # requires Prometheus to be pre-installed enabled: false @@ -68,32 +91,55 @@ spec: # If you have a single rook-ceph cluster, set the rulesNamespace to the same namespace as the cluster or keep it empty. # If you have multiple rook-ceph clusters in the same k8s cluster, choose the same namespace (ideally, namespace with prometheus # deployed) to set rulesNamespace for all the clusters. Otherwise, you will get duplicate alerts with multiple alert definitions. - rulesNamespace: {{ rook_namespace }} + rulesNamespace: "{{ rook_namespace }}" network: - # toggle to use hostNetwork hostNetwork: {{ rook_use_host_network }} - rbdMirroring: - # The number of daemons that will perform the rbd mirroring. - # rbd mirroring must be configured with "rbd mirror" from the rook toolbox. - workers: 0 + # enable the crash collector for ceph daemon crash collection + crashCollector: + disable: {{ rook_ceph_crashcollector_disable }} + # automate [data cleanup process](https://github.com/rook/rook/blob/master/Documentation/ceph-teardown.md#delete-the-data-on-hosts) in cluster destruction. + cleanupPolicy: + # Since cluster cleanup is destructive to data, confirmation is required. + # To destroy all Rook data on hosts during uninstall, confirmation must be set to "yes-really-destroy-data". + # This value should only be set when the cluster is about to be deleted. After the confirmation is set, + # Rook will immediately stop configuring the cluster and only wait for the delete command. + # If the empty string is set, Rook will not destroy any data on hosts during uninstall. + confirmation: "" + # sanitizeDisks represents settings for sanitizing OSD disks on cluster deletion + sanitizeDisks: + # method indicates if the entire disk should be sanitized or simply ceph's metadata + # in both case, re-install is possible + # possible choices are 'complete' or 'quick' (default) + method: quick + # dataSource indicate where to get random bytes from to write on the disk + # possible choices are 'zero' (default) or 'random' + # using random sources will consume entropy from the system and will take much more time then the zero source + dataSource: zero + # iteration overwrite N times instead of the default (1) + # takes an integer value + iteration: 1 + # allowUninstallWithVolumes defines how the uninstall should be performed + # If set to true, cephCluster deletion does not wait for the PVs to be deleted. + allowUninstallWithVolumes: false # To control where various services will be scheduled by kubernetes, use the placement configuration sections below. # The example under 'all' would have all services scheduled on kubernetes nodes labeled with 'role=storage-node' and # tolerate taints with a key of 'storage-node'. -# placement: -# all: -# nodeAffinity: -# requiredDuringSchedulingIgnoredDuringExecution: -# nodeSelectorTerms: -# - matchExpressions: -# - key: role -# operator: In -# values: -# - storage-node + placement: + all: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "{{ rook_storage_label }}" + operator: In + values: + - "true" # podAffinity: # podAntiAffinity: -# tolerations: -# - key: storage-node -# operator: Exists +# topologySpreadConstraints: + tolerations: + - key: "{{ rook_storage_label }}" + operator: Exists # The above placement information can also be specified for mon, osd, and mgr components # mon: # Monitor deployments may contain an anti-affinity rule for avoiding monitor @@ -102,12 +148,22 @@ spec: # preferred rule with weight: 50. # osd: # mgr: +# cleanup: annotations: # all: # mon: # osd: +# cleanup: +# prepareosd: # If no mgr annotations are set, prometheus scrape annotations will be set by default. -# mgr: +# mgr: + labels: +# all: +# mon: +# osd: +# cleanup: +# mgr: +# prepareosd: resources: # The requests and limits set here, allow the mgr pod to use half of one CPU core and 1 gigabyte of memory # mgr: @@ -120,54 +176,77 @@ spec: # The above example requests/limits can also be added to the mon and osd components # mon: # osd: +# prepareosd: +# crashcollector: +# logcollector: +# cleanup: + # The option to automatically remove OSDs that are out and are safe to destroy. + removeOSDsIfOutAndSafeToRemove: false storage: # cluster level storage configuration and selection useAllNodes: true useAllDevices: false + deviceFilter: "^vdb$" location: config: - # The default and recommended storeType is dynamically set to bluestore for devices and filestore for directories. - # Set the storeType explicitly only if it is required not to use the default. - # storeType: bluestore - databaseSizeMB: "1024" # this value can be removed for environments with normal sized disks (100 GB or larger) - journalSizeMB: "1024" # this value can be removed for environments with normal sized disks (20 GB or larger) + # crushRoot: "custom-root" # specify a non-default root label for the CRUSH map + # metadataDevice: "md0" # specify a non-rotational storage so ceph-volume will use it as block db device of bluestore. + databaseSizeMB: "1024" # uncomment if the disks are smaller than 100 GB + journalSizeMB: "1024" # uncomment if the disks are 20 GB or smaller osdsPerDevice: "1" # this value can be overridden at the node or device level -# Cluster level list of directories to use for storage. These values will be set for all nodes that have no `directories` set. - directories: - - path: "{{ rook_storage_dir_path }}" + # encryptedDevice: "true" # the default value for this option is "false" + #directories: + #- path: "{{ rook_storage_dir_path }}" # Individual nodes and their config can be specified as well, but 'useAllNodes' above must be set to false. Then, only the named # nodes below will be used as storage resources. Each node's 'name' field should match their 'kubernetes.io/hostname' label. # nodes: -# - name: "172.17.4.101" -# directories: # specific directories to use for storage can be specified for each node -# - path: "/rook/storage-dir" -# resources: -# limits: -# cpu: "500m" -# memory: "1024Mi" -# requests: -# cpu: "500m" -# memory: "1024Mi" # - name: "172.17.4.201" # devices: # specific devices to use for storage can be specified for each node # - name: "sdb" # - name: "nvme01" # multiple osds can be created on high performance devices # config: # osdsPerDevice: "5" +# - name: "/dev/disk/by-id/ata-ST4000DM004-XXXX" # devices can be specified using full udev paths # config: # configuration can be specified at the node level which overrides the cluster level config -# storeType: filestore +# storeType: filestore # this option is osbsolete and only provided as an example # - name: "172.17.4.301" -# deviceFilter: ^vdb +# deviceFilter: "^vdb" # The section for configuring management of daemon disruptions during upgrade or fencing. disruptionManagement: # If true, the operator will create and manage PodDisruptionBudgets for OSD, Mon, RGW, and MDS daemons. OSD PDBs are managed dynamically - # via the strategy outlined in the [design](https://github.com/rook/rook/blob/master/design/ceph-managed-disruptionbudgets.md). The operator will + # via the strategy outlined in the [design](https://github.com/rook/rook/blob/master/design/ceph/ceph-managed-disruptionbudgets.md). The operator will # block eviction of OSDs by default and unblock them safely when drains are detected. managePodBudgets: false # A duration in minutes that determines how long an entire failureDomain like `region/zone/host` will be held in `noout` (in addition to the # default DOWN/OUT interval) when it is draining. This is only relevant when `managePodBudgets` is `true`. The default value is `30` minutes. osdMaintenanceTimeout: 30 + # A duration in minutes that the operator will wait for the placement groups to become healthy (active+clean) after a drain was completed and OSDs came back up. + # Operator will continue with the next drain if the timeout exceeds. It only works if `managePodBudgets` is `true`. + # No values or 0 means that the operator will wait until the placement groups are healthy before unblocking the next drain. + pgHealthCheckTimeout: 0 # If true, the operator will create and manage MachineDisruptionBudgets to ensure OSDs are only fenced when the cluster is healthy. # Only available on OpenShift. manageMachineDisruptionBudgets: false # Namespace in which to watch for the MachineDisruptionBudgets. machineDisruptionBudgetNamespace: openshift-machine-api + + # healthChecks + # Valid values for daemons are 'mon', 'osd', 'status' + healthCheck: + daemonHealth: + mon: + disabled: false + interval: 45s + osd: + disabled: false + interval: 60s + status: + disabled: false + interval: 60s + # Change pod liveness probe, it works for all mon,mgr,osd daemons + livenessProbe: + mon: + disabled: false + mgr: + disabled: false + osd: + disabled: false