k8s + zfs on ubuntu

Server install log

Prerequisites :

  • installed ubuntu 22.04 with root ssh access
  • system partition and available space for zfs pool

Remarks :

  • System partition stores containers, so a great size is better to avoid space concerns.
  • swap must be disabled

Preconfig

## PRECONFIG
apt update
DEBIAN_FRONTEND=noninteractive apt -yq dist-upgrade
ln -fs /usr/share/zoneinfo/Europe/Paris /etc/localtime
dpkg-reconfigure -f noninteractive tzdata

## MISC DEPS
# Install zfs utils and some useful tools
DEBIAN_FRONTEND=noninteractive apt install -yq zfsutils-linux apt-transport-https curl vim aptitude

# Install directory
mkdir /root/install
cd /root/install

Disable swap, remove swap partitions.

swapoff -a
vi /etc/fstab
# remove all swap partitions

k8s deps install

Install k8s deps :

  • containerd
  • runc

Containerd

Install containerd; get last version from https://github.com/containerd/containerd/releases

export CONTAINERD_VERSION=1.7.1
wget https://github.com/containerd/containerd/releases/download/v${CONTAINERD_VERSION}/containerd-${CONTAINERD_VERSION}-linux-amd64.tar.gz
wget https://github.com/containerd/containerd/releases/download/v${CONTAINERD_VERSION}/containerd-${CONTAINERD_VERSION}-linux-amd64.tar.gz.sha256sum
sha256sum -c containerd-${CONTAINERD_VERSION}-linux-amd64.tar.gz.sha256sum

tar Cxzvf /usr/local containerd-${CONTAINERD_VERSION}-linux-amd64.tar.gz
mkdir -p /usr/local/lib/systemd/system/

wget -O /usr/local/lib/systemd/system/containerd.service https://raw.githubusercontent.com/containerd/containerd/main/containerd.service
systemctl daemon-reload
systemctl enable --now containerd

# set systemd cgroup
mkdir /etc/containerd
containerd config default > /etc/containerd/config.toml
sed -i  s'/SystemdCgroup = false/SystemdCgroup = true/g'  /etc/containerd/config.toml
systemctl restart containerd
systemctl status containerd # should show "active (running)"

runc

Get latest runc from https://github.com/opencontainers/runc/releases

export RUNC_VERSION=1.1.7
wget https://github.com/opencontainers/runc/releases/download/v${RUNC_VERSION}/runc.amd64
install -m 755 runc.amd64 /usr/local/sbin/runc

cni plugins

Get latest cni plugins from https://github.com/containernetworking/plugins/releases

export CNI_PLUGINS_VERSION=1.3.0
wget https://github.com/containernetworking/plugins/releases/download/v${CNI_PLUGINS_VERSION}/cni-plugins-linux-amd64-v${CNI_PLUGINS_VERSION}.tgz
wget https://github.com/containernetworking/plugins/releases/download/v${CNI_PLUGINS_VERSION}/cni-plugins-linux-amd64-v${CNI_PLUGINS_VERSION}.tgz.sha256
sha256sum -c cni-plugins-linux-amd64-v${CNI_PLUGINS_VERSION}.tgz.sha256
mkdir -p /opt/cni/bin
tar Cxzvf /opt/cni/bin cni-plugins-linux-amd64-v${CNI_PLUGINS_VERSION}.tgz

nerdctl

Get latest nerdctl from https://github.com/containerd/nerdctl/releases

# Install nerdctl
export NERDCTL_VERSION=1.4.0
wget https://github.com/containerd/nerdctl/releases/download/v${NERDCTL_VERSION}/nerdctl-${NERDCTL_VERSION}-linux-amd64.tar.gz
wget https://github.com/containerd/nerdctl/releases/download/v${NERDCTL_VERSION}/SHA256SUMS
sha256sum -c SHA256SUMS 2>&1 | grep nerdctl-${NERDCTL_VERSION}-linux-amd64.tar.gz
tar Cxzvvf /usr/local/bin nerdctl-${NERDCTL_VERSION}-linux-amd64.tar.gz

Kubelet prepare and install

Network check

# Checkup network
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF

sudo modprobe overlay
sudo modprobe br_netfilter


# sysctl params required by setup, params persist across reboots
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-iptables  = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward                 = 1
EOF

# Apply sysctl params without reboot
sudo sysctl --system

Install kubelet


# Install and pin kubelet/kubeadm/kubectl
curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
cat <<EOF | tee /etc/apt/sources.list.d/kubernetes.list
deb https://apt.kubernetes.io/ kubernetes-xenial main
EOF
apt update
DEBIAN_FRONTEND=noninteractive  apt install -yq kubelet kubeadm kubectl
apt-mark hold kubelet kubeadm kubectl

# Check that hostname is fully set
export NODE_NAME=xxxx
export NODE_FQDN=xxxx.fq.dn
hostnamectl hostname $NODE_FQDN


# Init cluster
## Use 10.244.0.0/16 for flannel, 192.168.0.0/16 for calico
export NETWORK_CIDR=192.168.0.0/16

kubeadm init --pod-network-cidr=$NETWORK_CIDR --node-name $NODE_NAME --upload-certs --control-plane-endpoint $NODE_FQDN --dry-run
kubeadm init --pod-network-cidr=$NETWORK_CIDR --node-name $NODE_NAME --upload-certs --control-plane-endpoint $NODE_FQDN > k8s-init.log 2>&1
# kept install messages in k8s-init.log

# Check if cluster started
export KUBECONFIG=/etc/kubernetes/admin.conf
kubectl get pods --all-namespaces

Setup network addon

Flannel / calico

Flannel

kubectl apply -f https://raw.githubusercontent.com/flannel-io/flannel/master/Documentation/kube-flannel.yml
watch kubectl get pods --all-namespaces # wait for all pods running smoothly

Calico

Temporary stick to 3.25.1 tigera operator while https://github.com/projectcalico/calico/issues/7715 is not solved.

export CALICO_VERSION=3.25.1
kubectl create -f https://raw.githubusercontent.com/projectcalico/calico/v$CALICO_VERSION/manifests/tigera-operator.yaml

curl https://raw.githubusercontent.com/projectcalico/calico/v$CALICO_VERSION/manifests/custom-resources.yaml -O


# Install calicoctl
curl -L https://github.com/projectcalico/calico/releases/latest/download/calicoctl-linux-amd64 -o calicoctl
mv calicoctl /usr/local/bin/
chmod +x /usr/local/bin/calicoctl

# Check if all nodes are ok
watch kubectl get pods --all-namespaces # wait for all pods running smoothly

Start node

This is a single-node cluster, allow scheduling on it

# Remove Noschedule taint from controller node
kubectl taint nodes --all node-role.kubernetes.io/control-plane-

Install utils

  • get k9s latest version from https://github.com/derailed/k9s/releases
  • get helm latest version from https://github.com/helm/helm/releases
# Install utils


# Install helm and k9s
## k9s

export K9S_VERSION=0.27.4
wget https://github.com/derailed/k9s/releases/download/v${K9S_VERSION}/k9s_Linux_amd64.tar.gz
tar xzf k9s_Linux_amd64.tar.gz
install -m 755 k9s /usr/local/bin/k9s

## helm
## https://github.com/helm/helm/releases to get version
export HELM_VERSION=3.12.0
wget https://get.helm.sh/helm-v${HELM_VERSION}-linux-amd64.tar.gz
wget https://get.helm.sh/helm-v${HELM_VERSION}-linux-amd64.tar.gz.sha256sum
sha256sum -c helm-v${HELM_VERSION}-linux-amd64.tar.gz.sha256sum
tar zxvf helm-v${HELM_VERSION}-linux-amd64.tar.gz
install -m 755 linux-amd64/helm /usr/local/bin/helm

Configure cluster

Storage class

Partition disks : zfs cache + zfs data

Device          Start        End    Sectors  Size Type
/dev/sda1        2048    1048575    1046528  511M EFI System
/dev/sda2     1048576    3145727    2097152    1G Linux RAID
/dev/sda3     3145728 1051721727 1048576000  500G Linux RAID
/dev/sda4  1051721728 1156579327  104857600   50G Linux filesystem
/dev/sda5  1156579328 7501476494 6344897167    3T Linux filesystem

Device          Start        End    Sectors  Size Type
/dev/sdb1        2048    1048575    1046528  511M EFI System
/dev/sdb2     1048576    3145727    2097152    1G Linux RAID
/dev/sdb3     3145728 1051721727 1048576000  500G Linux RAID
/dev/sdb4  1051721728 1156579327  104857600   50G Linux filesystem
/dev/sdb5  1156579328 7501476494 6344897167    3T Linux filesystem

Create zfs pool

zpool create tank mirror /dev/sda5 /dev/sdb5 cache /dev/sda4 /dev/sdb4
zpool status
zfs list
zfs set compression=lz4 tank
zfs create tank/k8s

Install zfs-localpv

kubectl apply -f https://openebs.github.io/charts/zfs-operator.yaml
kubectl get pods -n kube-system -l role=openebs-zfs

cat <<EOF | sudo tee storageclass.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
  name: zfs
parameters:
  recordsize: "4k"
  compression: "on"
  dedup: "off"
  fstype: "zfs"
  poolname: "tank/k8s"
allowVolumeExpansion: true
provisioner: zfs.csi.openebs.io
EOF
kubectl apply -f storageclass.yaml
kubectl patch storageclass zfs -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'

Test if storage class is working

# Test pvc
cat <<EOF | sudo tee pvc.yaml
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
  name: csi-zfspv
spec:
  storageClassName: zfs
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 4Gi
EOF
kubectl apply -f pvc.yaml
kubectl get pvc
kubectl delete pvc csi-zfspv

Ingress

Use server ip address for LB range


# Install metallb
export LB_RANGE="5.196.79.155/32"
helm repo add metallb https://metallb.github.io/metallb
helm install --namespace metallb-system --create-namespace  metallb  metallb/metallb
cat <<EOF | sudo tee metallb.yaml
apiVersion: metallb.io/v1beta1
kind: IPAddressPool
metadata:
  name: base
  namespace: metallb-system
spec:
  addresses:
  - ${LB_RANGE}
EOF
# Wait for metallb to run before applying config
kubectl apply -f metallb.yaml

# Install haproxy ingress
helm repo add haproxy-ingress https://haproxy-ingress.github.io/charts
helm install haproxy-ingress haproxy-ingress/haproxy-ingress\
  --create-namespace --namespace ingress-controller

# check if loadbalancer have given any ip
kubectl --namespace ingress-controller get services haproxy-ingress -o wide


# Install cert-manager
helm repo add jetstack https://charts.jetstack.io
helm install \
  cert-manager jetstack/cert-manager \
  --namespace cert-manager \
  --create-namespace \
  --set installCRDs=true

# Add cluster issuers
cat <<EOF | tee clusterissuers.yaml
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
  name: letsencrypt-staging
spec:
  acme:
    # The ACME server URL
    server: https://acme-staging-v02.api.letsencrypt.org/directory
    # Email address used for ACME registration
    email: jm.barbier+acme@solidev.net
    # Name of a secret used to store the ACME account private key
    privateKeySecretRef:
      name: letsencrypt-staging
    # Enable HTTP01 validations
    solvers:
      - http01:
          ingress:
            ingressTemplate:
              metadata:
                annotations:
                  kubernetes.io/ingress.class: haproxy
---
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
  name: letsencrypt-prod
spec:
  acme:
    # The ACME server URL
    server: https://acme-v02.api.letsencrypt.org/directory
    # Email address used for ACME registration
    email: jm.barbier+acme@solidev.net
    # Name of a secret used to store the ACME account private key
    privateKeySecretRef:
      name: letsencrypt-prod
    # Enable HTTP01 validations
    solvers:
      - http01:
          ingress:
            ingressTemplate:
              metadata:
                annotations:
                  kubernetes.io/ingress.class: haproxy
EOF
kubectl apply -f clusterissuers.yaml


# Test https with basic nginx service
helm create test
cd test
cat <<EOF | sudo tee ingress.yaml
ingress:
  enabled: true
  annotations:
    kubernetes.io/ingress.class: haproxy
    cert-manager.io/cluster-issuer: "letsencrypt-prod"
  hosts:
    - host: ${NODE_FQDN}
      paths:
        - path: /
          pathType: ImplementationSpecific
  tls:
    - hosts:
        -  ${NODE_FQDN}
      secretName: "test-tls-secret"
EOF
helm install test . -f values.yaml -f ingress.yaml
# test https://${NOFR_FQDN}/ in browser
helm uninstall test

Monitoring


# Install metrics server

# ajouter 'serverTLSBootstrap: true' dans kubeadm-config dans kube-system (data.ClusterConfiguration.serverTLSBootstrap)
# ajouter 'serverTLSBootstrap: true' /var/lib/kubelet/config.yaml
# redémarrer le kubelet
systemctl restart kubelet
kubectl get csr
kubectl certificate approve xxxxxx
# Install metrics server
kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
# Check for metrics-server running
kubectl get pods -n kube-system  | grep metrics-server