fluxcd : adding repo example with some apps

This commit is contained in:
2025-05-28 16:00:46 +02:00
parent 00a5e56c27
commit e5738c5c3f
111 changed files with 18051 additions and 0 deletions

View File

@@ -0,0 +1,18 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
release: prometheus
name: alerts-flux.rules
namespace: monitoring
spec:
groups:
- name: Flux
rules:
- alert: ReconciliationFailure
expr: max(gotk_reconcile_condition{status="False",type="Ready"}) by (exported_namespace, name, kind) + on(exported_namespace, name, kind) (max(gotk_reconcile_condition{status="Deleted"}) by (exported_namespace, name, kind)) * 2 == 1
for: 10m
labels:
severity: warning
annotations:
summary: '{{ $labels.kind }} {{ $labels.exported_namespace }}/{{ $labels.name }} reconciliation has been failing for more than ten minutes.'

View File

@@ -0,0 +1,397 @@
---
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: prometheus
spec:
upgrade:
remediation:
retries: -1
chart:
spec:
chart: kube-prometheus-stack
version: "54.x.x"
sourceRef:
kind: HelmRepository
name: prometheus-community
namespace: infrastructure
interval: 30m0s
values:
fullnameOverride: "prometheus"
## Create default rules for monitoring the cluster
##
defaultRules:
create: true
rules:
etcd: false
kubernetesSystem: false
kubeScheduler: false
kubeProxy: false
configReloaders: true
general: true
k8s: true
kubeApiserverAvailability: true
kubeApiserverBurnrate: true
kubeApiserverHistogram: true
kubeApiserverSlos: true
kubeControllerManager: true
kubelet: true
kubePrometheusGeneral: true
kubePrometheusNodeRecording: true
kubernetesApps: true
kubernetesResources: true
kubernetesStorage: true
kubeSchedulerAlerting: true
kubeSchedulerRecording: true
kubeStateMetrics: true
network: true
node: true
nodeExporterAlerting: true
nodeExporterRecording: true
prometheus: true
prometheusOperator: true
global:
rbac:
create: true
alertmanager:
enabled: true
config:
global:
resolve_timeout: 5m
route:
group_by:
- job
- alertname
receiver: 'null'
routes:
- receiver: 'null'
matchers:
- alertname =~ "InfoInhibitor|Watchdog"
receivers:
- name: 'null'
templates:
- '/etc/alertmanager/config/*.tmpl'
serviceMonitor:
interval: ""
selfMonitor: true
alertmanagerSpec:
replicas: 1
retention: 120h
storage: {}
# volumeClaimTemplate:
# spec:
# storageClassName: gluster
# accessModes: ["ReadWriteOnce"]
# resources:
# requests:
# storage: 50Gi
# selector: {}
resources:
requests:
cpu: 10m
memory: 64Mi
limits:
cpu: 501m
memory: 64Mi
podAntiAffinity: "soft"
podAntiAffinityTopologyKey: kubernetes.io/hostname
grafana:
enabled: false
kubeApiServer:
enabled: true
tlsConfig:
# serverName: kubernetes
insecureSkipVerify: true
serviceMonitor:
interval: ""
proxyUrl: ""
kubelet:
enabled: true
namespace: kube-system
serviceMonitor:
additionalLabels: {}
cAdvisor: true
cAdvisorMetricRelabelings:
- action: drop
regex: >-
container_cpu_(load_average_10s|system_seconds_total|user_seconds_total)
sourceLabels:
- __name__
- action: drop
regex: >-
container_fs_(io_current|io_time_seconds_total|io_time_weighted_seconds_total|reads_merged_total|sector_reads_total|sector_writes_total|writes_merged_total)
sourceLabels:
- __name__
- action: drop
regex: container_memory_(mapped_file|swap)
sourceLabels:
- __name__
- action: drop
regex: container_(file_descriptors|tasks_state|threads_max)
sourceLabels:
- __name__
- action: drop
regex: container_spec.*
sourceLabels:
- __name__
- action: drop
regex: .+;
sourceLabels:
- id
- pod
cAdvisorRelabelings:
- sourceLabels:
- __metrics_path__
targetLabel: metrics_path
https: true
interval: ''
metricRelabelings: []
probes: true
probesMetricRelabelings: []
probesRelabelings:
- sourceLabels:
- __metrics_path__
targetLabel: metrics_path
proxyUrl: ''
relabelings:
- sourceLabels:
- __metrics_path__
targetLabel: metrics_path
resource: false
kubeControllerManager:
enabled: false
serviceMonitor:
enabled: false
interval: ""
proxyUrl: ""
https: null
insecureSkipVerify: null
serverName: null
coreDns:
enabled: true
service:
port: 9153
targetPort: 9153
# selector:
# k8s-app: kube-dns
serviceMonitor:
interval: ""
kubeDns:
enabled: false
kubeEtcd:
enabled: false
serviceMonitor:
enabled: false
interval: ""
proxyUrl: ""
scheme: http
insecureSkipVerify: false
kubeScheduler:
enabled: false
serviceMonitor:
enabled: false
interval: ""
https: null
insecureSkipVerify: null
kubeProxy:
enabled: false
serviceMonitor:
enabled: false
interval: ""
https: false
kubeStateMetrics:
enabled: true
kube-state-metrics:
prometheus:
monitor:
enabled: true
interval: ""
selfMonitor:
enabled: true
nodeExporter:
enabled: true
prometheus-node-exporter:
extraArgs:
- --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/)
- --collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$
prometheus:
monitor:
enabled: true
interval: ""
prometheusOperator:
enabled: true
serviceMonitor:
interval: ""
scrapeTimeout: ""
selfMonitor: true
resources:
requests:
cpu: 125m
memory: 128Mi
limits:
cpu: 500m
memory: 128Mi
# Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico),
# because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working
##
hostNetwork: false
dnsConfig: {}
# nameservers:
# - 1.2.3.4
# searches:
# - ns1.svc.cluster-domain.example
# - my.dns.search.suffix
# options:
# - name: ndots
# value: "2"
# - name: edns0
# Enable vertical pod autoscaler support for prometheus-operator
verticalPodAutoscaler:
enabled: false
# List of resources that the vertical pod autoscaler can control. Defaults to cpu and memory
controlledResources: []
# Define the max allowed resources for the pod
maxAllowed: {}
# cpu: 200m
# memory: 100Mi
# Define the min allowed resources for the pod
minAllowed: {}
# cpu: 200m
# memory: 100Mi
updatePolicy:
# Specifies whether recommended updates are applied when a Pod is started and whether recommended updates
# are applied during the life of a Pod. Possible values are "Off", "Initial", "Recreate", and "Auto".
updateMode: Auto
prometheusConfigReloader:
resources:
requests:
cpu: 10m
memory: 35Mi
limits:
cpu: 500m
memory: 1024Mi
prometheus:
enabled: true
thanosService:
enabled: false
thanosServiceMonitor:
enabled: false
interval: ""
thanosServiceExternal:
enabled: false
podDisruptionBudget:
enabled: true
minAvailable: 1
maxUnavailable: ""
ingress:
enabled: false
ingressClassName: nginx
# hosts:
# - prometheus.domain.com
hosts: []
## Paths to use for ingress rules - one path should match the prometheusSpec.routePrefix
##
paths: []
# - /
pathType: Prefix
serviceMonitor:
interval: ""
selfMonitor: true
prometheusSpec:
## If true, pass --storage.tsdb.max-block-duration=2h to prometheus. This is already done if using Thanos
##
disableCompaction: false
scrapeInterval: "15s"
scrapeTimeout: "10s"
evaluationInterval: "1m"
enableAdminAPI: false
priorityClassName: "system-cluster-critical"
remoteWrite:
- url: https://mimir.example.com/api/v1/push
ruleNamespaceSelector: {}
ruleSelectorNilUsesHelmValues: false
ruleSelector: {}
serviceMonitorSelectorNilUsesHelmValues: false
serviceMonitorSelector: {}
serviceMonitorNamespaceSelector: {}
podMonitorSelectorNilUsesHelmValues: false
podMonitorSelector: {}
podMonitorNamespaceSelector: {}
probeSelectorNilUsesHelmValues: true
probeSelector: {}
probeNamespaceSelector: {}
retention: 1d
retentionSize: "25GiB"
## Enable compression of the write-ahead log using Snappy.
##
walCompression: false
replicas: 1
podAntiAffinity: "soft"
podAntiAffinityTopologyKey: kubernetes.io/hostname
resources:
limits:
cpu: 2000m
memory: 6000Mi
requests:
cpu: 900m
memory: 6000Mi
thanos: {}
# objectStorageConfig:
# key: objstore.yml
# name: thanos-objstore-config

View File

@@ -0,0 +1,5 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: infrastructure
resources:
- hr-kube-prometheus-stack.yaml

View File

@@ -0,0 +1,34 @@
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: inf-prometheus-core
namespace: infrastructure
spec:
interval: 5m
path: ./infrastructure/base/prometheus/core
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: prometheus-operator
namespace: infrastructure
---
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: inf-prometheus-resources
namespace: infrastructure
spec:
dependsOn:
- name: inf-prometheus-core
interval: 5m
path: ./infrastructure/base/prometheus/resources
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system

View File

@@ -0,0 +1,5 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: infrastructure
resources:
- dependencies.yaml

View File

@@ -0,0 +1,24 @@
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: flux-system
labels:
app.kubernetes.io/part-of: flux
release: prometheus
spec:
namespaceSelector:
matchNames:
- flux-system
selector:
matchExpressions:
- key: app
operator: In
values:
- helm-controller
- source-controller
- kustomize-controller
- notification-controller
- image-automation-controller
- image-reflector-controller
podMetricsEndpoints:
- port: http-prom

View File

@@ -0,0 +1,15 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: http-broadcaster
spec:
endpoints:
- interval: 30s
port: web
scheme: http
scrapeTimeout: 29s
namespaceSelector:
any: true
selector:
matchLabels:
app.kubernetes.io/name: "http-broadcaster"

View File

@@ -0,0 +1,10 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: infrastructure
resources:
- flux-podmonitor.yaml
- php-fpm-exporters-servicemonitor.yaml
- rabbitmq-servicemonitor.yaml
- python-metrics-servicemonitor.yaml
- php-metrics-servicemonitor.yaml
- http-broadcaster-servicemonitor.yaml

View File

@@ -0,0 +1,15 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: php-fpm-exporters
spec:
endpoints:
- interval: 15s
port: php-fpm-exporter
scheme: http
scrapeTimeout: 14s
namespaceSelector:
any: true
selector:
matchLabels:
k8s.example.com/php-fpm-exporter-scrape: 'true'

View File

@@ -0,0 +1,16 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: php-metrics
spec:
endpoints:
- interval: 15s
port: php-metrics
path: /metrics
scheme: http
scrapeTimeout: 14s
namespaceSelector:
any: true
selector:
matchLabels:
k8s.example.com/php-metrics-scrape: 'true'

View File

@@ -0,0 +1,16 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: python-metrics
spec:
endpoints:
- interval: 15s
port: metrics
path: /metrics
scheme: http
scrapeTimeout: 14s
namespaceSelector:
any: true
selector:
matchLabels:
k8s.example.com/python-metrics-scrape: 'true'

View File

@@ -0,0 +1,42 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: rabbitmq
namespace: rabbitmq-operator
spec:
endpoints:
- port: prometheus
scheme: http
interval: 15s
scrapeTimeout: 14s
- port: prometheus-tls
scheme: https
interval: 15s
scrapeTimeout: 14s
tlsConfig:
insecureSkipVerify: true
- port: prometheus
scheme: http
path: /metrics/detailed
params:
family:
- queue_coarse_metrics
- queue_metrics
interval: 15s
scrapeTimeout: 14s
- port: prometheus-tls
scheme: https
path: /metrics/detailed
params:
family:
- queue_coarse_metrics
- queue_metrics
interval: 15s
scrapeTimeout: 14s
tlsConfig:
insecureSkipVerify: true
selector:
matchLabels:
app.kubernetes.io/component: rabbitmq
namespaceSelector:
any: true