使用kubeadm 安装 Kubernetes v1.15 HA高可用

Posted by Sunday on 2019-10-10

环境说明

OS: Centos7.5
Kubernetes: 1.15.4
Docker: 18.09

主机名 IP / VIP 组件
k8s-master1 192.168.10.81 / 192.168.10.80 haproxy、keepalived、etcd、kube-apiserver、kube-controllet-manager、kube-scheduler、node
k8s-master2 192.168.10.82 / 192.168.10.80 haproxy、keepalived、etcd、kube-apiserver、kube-controllet-manager、kube-scheduler、node
k8s-master3 192.168.10.83 / 192.168.10.80 haproxy、keepalived、etcd、kube-apiserver、kube-controllet-manager、kube-scheduler、node
k8s-node1 192.168.10.84 kubelet、kube-proxy、docker、calico、core-dns
k8s-node2 192.168.10.85 kubelet、kube-proxy、docker、calico、core-dns

环境准备

配置host

1
2
3
4
5
6
7
cat >> /etc/hosts <<EOF
192.168.10.81 k8s-master1
192.168.10.82 k8s-master2
192.168.10.83 k8s-master3
192.168.10.84 k8s-node1
192.168.10.85 k8s-node2
EOF

免密钥

1
2
3
4
5
6
7
8
9
10
11
12
yum install -y expect
ssh-keygen -t rsa -P "" -f /root/.ssh/id_rsa
export mypass=123456
name=(k8s-master1 k8s-master2 k8s-master3 k8s-node1 k8s-node2)

for i in ${name[@]};do expect -c "
spawn ssh-copy-id -i /root/.ssh/id_rsa.pub root@$i
expect {
\"*yes/no*\" {send \"yes\r\"; exp_continue}
\"*password*\" {send \"$mypass\r\"; exp_continue}
\"*Password*\" {send \"$mypass\r\";}
}";done

hostname

1
2
3
4
5
name=(k8s-master1 k8s-master2 k8s-master3 k8s-node1 k8s-node2)
for i in ${name[@]};do scp /etc/hosts root@$i:/etc/;done

name=(k8s-master1 k8s-master2 k8s-master3 k8s-node1 k8s-node2)
for i in ${name[@]};do ssh root@$i hostnamectl set-hostname $i;done

关闭防火墙、selinux、swap

1
2
3
4
5
6
7
8
systemctl stop firewalld
systemctl disable firewalld
setenforce 0
sed -i 's/^SELINUX=.*/SELINUX=disabled/' /etc/selinux/config
iptables -F && iptables -X && iptables -F -t nat && iptables -X -t nat
iptables -P FORWARD ACCEPT
swapoff -a
sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab

升级内核

1
2
3
4
5
6
7
#Docker overlay2需要使用kernel 4.x版本
rpm -Uvh http://www.elrepo.org/elrepo-release-7.0-3.el7.elrepo.noarch.rpm
yum --enablerepo=elrepo-kernel install -y kernel-lt
grub2-set-default 0
grub2-mkconfig -o /etc/grub2.cfg
grubby --default-kernel
reboot

kernel

1
2
3
4
5
6
cat >/etc/sysctl.d/k8s.conf <<EOF
net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.ipv4.ip_forward=1
EOF
sysctl --system

ipvs

1
2
3
4
5
6
7
8
9
10
11
yum install -y conntrack ipvsadm ipset 
cat >/etc/modules-load.d/ipvs.conf << EOF
ip_vs
ip_vs_rr
ip_vs_wrr
ip_vs_sh
nf_conntrack
EOF

modprobe ip_vs_rr nf_conntrack
lsmod | egrep "ip_vs_rr|nf_conntrack"

时间同步

1
2
3
yum install -y ntpdate
echo "*/30 * * * * ntpdate time7.aliyun.com >/dev/null 2>&1" >> /var/spool/cron/root
ntpdate time7.aliyun.com

cfssl

1
2
3
curl -L -o /usr/local/bin/cfssl https://pkg.cfssl.org/R1.2/cfssl_linux-amd64
curl -L -o /usr/local/bin/cfssljson https://pkg.cfssl.org/R1.2/cfssljson_linux-amd64
chmod +x /usr/local/bin/{cfssl,cfssljson}

https://coreos.com/os/docs/latest/generate-self-signed-certificates.html

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
mkdir ~/cfssl && cd ~/cfssl
#cfssl print-defaults config > ca-config.json
cat > ca-config.json <<EOF
{
"signing": {
"default": {
"expiry": "87600h"
},
"profiles": {
"kubernetes": {
"usages": [
"signing",
"key encipherment",
"server auth",
"client auth"
],
"expiry": "87600h"
}
}
}
}
EOF

#cfssl print-defaults csr > ca-csr.json
cat > ca-csr.json <<EOF
{
"CN": "kubernetes",
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "GuangZhou",
"L": "GuangZhou",
"O": "k8s",
"OU": "System"
}
],
"ca": {
"expiry": "87600h"
}
}
EOF

cfssl gencert -initca ca-csr.json | cfssljson -bare ca

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
cat > etcd-csr.json <<EOF
{
"CN": "etcd",
"hosts": [
"127.0.0.1",
"192.168.10.81",
"192.168.10.82",
"192.168.10.83"
],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "GuangZhou",
"L": "GuangZhou",
"O": "k8s",
"OU": "System"
}
]
}
EOF

cfssl gencert -ca=ca.pem \
-ca-key=ca-key.pem \
-config=ca-config.json \
-profile=kubernetes etcd-csr.json | cfssljson -bare etcd

ETCDHOSTS=(192.168.10.81 192.168.10.82 192.168.10.83)
for i in ${ETCDHOSTS[@]}; do scp -r /etc/etcd root@$i:/etc/; done

etcd

https://github.com/coreos/etcd/releases

1
2
3
4
5
6
7
8
9
10
11
mkdir -p /data/etcd
mkdir -p /etc/etcd/ssl
cp ~/cfssl/{ca.pem,etcd.pem,etcd-key.pem} /etc/etcd/ssl/

cd /usr/local/src
wget https://github.com/etcd-io/etcd/releases/download/v3.4.1/etcd-v3.4.1-linux-amd64.tar.gz
tar xf etcd-v3.4.1-linux-amd64.tar.gz
cp etcd-v3.4.1-linux-amd64/{etcd,etcdctl} /usr/local/bin/

ETCDHOSTS=(192.168.10.81 192.168.10.82 192.168.10.83)
for i in ${ETCDHOSTS[@]}; do scp /usr/local/bin/etcd* root@$i:/usr/local/bin/; done

etcd.service

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
ETCDHOSTS=(192.168.10.81 192.168.10.82 192.168.10.83)
NAMES=("etcd-0" "etcd-1" "etcd-2")

for i in "${!ETCDHOSTS[@]}"; do
HOST=${ETCDHOSTS[$i]}
NAME=${NAMES[$i]}

cat << EOF > /tmp/etcd.service.${HOST}
[Unit]
Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target
[Service]
Type=notify
ExecStart=/usr/local/bin/etcd \\
--data-dir=/data/etcd \\
--name=${NAME} \\
--trusted-ca-file=/etc/etcd/ssl/ca.pem \\
--cert-file=/etc/etcd/ssl/etcd.pem \\
--key-file=/etc/etcd/ssl/etcd-key.pem \\
--peer-trusted-ca-file=/etc/etcd/ssl/ca.pem \\
--peer-cert-file=/etc/etcd/ssl/etcd.pem \\
--peer-key-file=/etc/etcd/ssl/etcd-key.pem \\
--peer-client-cert-auth \\
--client-cert-auth \\
--listen-peer-urls=https://${HOST}:2380 \\
--initial-advertise-peer-urls=https://${HOST}:2380 \\
--listen-client-urls=https://${HOST}:2379,http://127.0.0.1:2379 \\
--advertise-client-urls=https://${HOST}:2379 \\
--initial-cluster-token=etcd-cluster-0 \\
--initial-cluster=${NAMES[0]}=https://${ETCDHOSTS[0]}:2380,${NAMES[1]}=https://${ETCDHOSTS[1]}:2380,${NAMES[2]}=https://${ETCDHOSTS[2]}:2380 \\
--initial-cluster-state=new
Restart=on-failure
RestartSec=5
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
EOF
done

1
2
3
4
5
6
7
8
9
10
11
12
13
14
ETCDHOSTS=(192.168.10.81 192.168.10.82 192.168.10.83)
for i in ${ETCDHOSTS[@]};do
scp /tmp/etcd.service.$i root@$i:/etc/systemd/system/etcd.service;
done

systemctl daemon-reload
systemctl enable etcd
systemctl start etcd

ETCDCTL_API=3 etcdctl \
--cacert=/etc/etcd/ssl/ca.pem \
--cert=/etc/etcd/ssl/etcd.pem \
--key=/etc/etcd/ssl/etcd-key.pem \
--endpoints=https://${ETCDHOSTS[0]}:2379,https://${ETCDHOSTS[1]}:2379,https://${ETCDHOSTS[2]}:2379 endpoint health

集群时间不对报错,注意同步下时间

1
2
3
4
Oct 10 03:36:05 node1 etcd: rejected connection from "192.168.10.83:52514" (error "remote error: tls: bad certificate", ServerName "")
Oct 10 03:36:05 node1 etcd: rejected connection from "192.168.10.83:52518" (error "remote error: tls: bad certificate", ServerName "")
Oct 10 03:36:05 node1 etcd: rejected connection from "192.168.10.82:57950" (error "remote error: tls: bad certificate", ServerName "")
Oct 10 03:36:05 node1 etcd: rejected connection from "192.168.10.82:57952" (error "remote error: tls: bad certificate", ServerName "")

haproxy

1
yum install -y haproxy
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
cat > /etc/haproxy/haproxy.cfg <<EOF
global
log 127.0.0.1 local2
chroot /var/lib/haproxy
pidfile /var/run/haproxy.pid
maxconn 4000
user haproxy
group haproxy
daemon
stats socket /var/lib/haproxy/stats

defaults
mode http
log global
option httplog
option dontlognull
option http-server-close
option forwardfor except 127.0.0.0/8
option redispatch
retries 3
timeout http-request 10s
timeout queue 1m
timeout connect 10s
timeout client 1m
timeout server 1m
timeout http-keep-alive 10s
timeout check 10s
maxconn 3000

frontend kubernetes-apiserver
mode tcp
bind *:8443
option tcplog
default_backend kubernetes-apiserver

backend kubernetes-apiserver
mode tcp
balance roundrobin
server k8s01 192.168.10.81:6443 check
server k8s02 192.168.10.82:6443 check
server k8s03 192.168.10.83:6443 check

listen stats
bind *:1080
stats auth admin:sunday
stats refresh 5s
stats realm HAProxy\ Statistics
stats uri /haproxy-stats
EOF

systemctl enable haproxy
systemctl start haproxy

ETCDHOSTS=(192.168.10.81 192.168.10.82 192.168.10.83)
for i in ${ETCDHOSTS[@]}; do scp /etc/haproxy/haproxy.cfg root@$i:/etc/haproxy/haproxy.cfg; done

keepalived

1
yum install -y keepalived psmisc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
cat > /etc/keepalived/keepalived.conf <<EOF
! Configuration File for keepalived
global_defs {
router_id keepalived
}

vrrp_script check_haproxy {
script "killall -0 haproxy"
interval 2
weight -3
fall 2
rise 2
}

vrrp_instance VI_1 {
state MASTER
interface eth0
virtual_router_id 51
priority 100
advert_int 1
authentication {
auth_type PASS
auth_pass Sunday
}
virtual_ipaddress {
192.168.10.80
}
track_script {
check_haproxy
}
}
EOF


ETCDHOSTS=(192.168.10.81 192.168.10.82 192.168.10.83)
for i in ${ETCDHOSTS[@]}; do
scp /etc/keepalived/keepalived.conf root@$i:/etc/keepalived/keepalived.conf
ssh root@${ETCDHOSTS[1]} "sed -i -e 's#MASTER#BACKUP#g' -e 's#priority 100#priority 99#g' /etc/keepalived/keepalived.conf"
ssh root@${ETCDHOSTS[2]} "sed -i -e 's#MASTER#BACKUP#g' -e 's#priority 100#priority 98#g' /etc/keepalived/keepalived.conf"
done

docker

https://v1-15.docs.kubernetes.io//docs/setup/release/notes/ ctrl+f docker version

1
2
3
4
yum install -y yum-utils device-mapper-persistent-data lvm2
yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
#yum list docker-ce --showduplicates | sort -r
yum install -y docker-ce-18.09.8

1
2
3
4
5
6
7
8
9
10
11
12
13
14
mkdir /etc/docker 
cat > /etc/docker/daemon.json <<EOF
{
"exec-opts": ["native.cgroupdriver=systemd"],
"registry-mirrors": ["https://hub-mirror.c.163.com"],
"log-driver": "json-file",
"log-opts": {
"max-size": "100m"
},
"storage-driver": "overlay2"
}
EOF

systemctl enable docker --now

kubeadm install

https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/install-kubeadm/
https://v1-15.docs.kubernetes.io/docs/setup/production-environment/tools/kubeadm/high-availability/

1
2
3
4
5
6
7
8
cat << EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
1
2
yum install -y kubeadm-1.15.4 kubelet-1.15.4  kubectl-1.15.4
systemctl enable --now kubelet

kubeadm master

添加外部etcd配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#kubeadm config print init-defaults > kubeadm-config.yaml
cat << EOF > kubeadm-config.yaml
apiVersion: kubeadm.k8s.io/v1beta2
kind: ClusterConfiguration
controlPlaneEndpoint: "192.168.10.80:8443"
etcd:
external:
endpoints:
- https://192.168.10.81:2379
- https://192.168.10.82:2379
- https://192.168.10.83:2379
caFile: /etc/etcd/ssl/ca.pem
certFile: /etc/etcd/ssl/etcd.pem
keyFile: /etc/etcd/ssl/etcd-key.pem
kubernetesVersion: v1.15.4
imageRepository: gcr.azk8s.cn/google_containers
networking:
#podSubnet: 192.168.0.0/16 #默认calico网段与内网有重叠
podSubnet: 10.244.0.0/16
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
mode: ipvs
EOF

第一台master

1
2
3
4
kubeadm init --config kubeadm-config.yaml --upload-certs
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/

You can now join any number of the control-plane node running the following command on each as root:

kubeadm join 192.168.10.80:8443 --token ytexwo.j1364ochwtv1t758 \
--discovery-token-ca-cert-hash sha256:1ebbb929926604f3126c875277d6b3f20a9bf35f13bfd01245b6babf74f7dd28 \
--control-plane --certificate-key 8d1f59c025b6b03a9f1585efadfb44edecf330c67c6a89ef5d83cf8bf38005df

Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
"kubeadm init phase upload-certs --upload-certs" to reload certs afterward.

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 192.168.10.80:8443 --token ytexwo.j1364ochwtv1t758 \
--discovery-token-ca-cert-hash sha256:1ebbb929926604f3126c875277d6b3f20a9bf35f13bfd01245b6babf74f7dd28

添加第二、三台master

1
2
3
kubeadm join 192.168.10.80:8443 --token ytexwo.j1364ochwtv1t758 \
--discovery-token-ca-cert-hash sha256:1ebbb929926604f3126c875277d6b3f20a9bf35f13bfd01245b6babf74f7dd28 \
--control-plane --certificate-key 8d1f59c025b6b03a9f1585efadfb44edecf330c67c6a89ef5d83cf8bf38005df

kubeadm node

1
2
kubeadm join 192.168.10.80:8443 --token ytexwo.j1364ochwtv1t758 \
--discovery-token-ca-cert-hash sha256:1ebbb929926604f3126c875277d6b3f20a9bf35f13bfd01245b6babf74f7dd28

calico

https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/create-cluster-kubeadm/#pod-network

#calico默认pod网段为 192.168.0.0/16

1
2
3
4
5
wget https://docs.projectcalico.org/v3.8/manifests/calico.yaml
POD_CIDR="10.244.0.0/16" \
sed -i -e "s?192.168.0.0/16?$POD_CIDR?g" calico.yaml
kubectl apply -f calico.yaml
安装有点久,可能需要几分钟。

多网卡指定

1
2
3
4
5
6
7
8
9
10
vim calico.yaml
spec:
containers:
- env:
- name: DATASTORE_TYPE
value: kubernetes
- name: IP_AUTODETECTION_METHOD # DaemonSet中添加该环境变量
value: interface=eth0 # 指定内网网卡
- name: WAIT_FOR_DATASTORE
value: "true"

1
2
3
4
5
6
7
8
#未安装pod network时 Node STATUS 为NotReady
# kubectl get node
NAME STATUS ROLES AGE VERSION
k8s-master1 Ready master 13h v1.15.4
k8s-master2 Ready master 13h v1.15.4
k8s-master3 Ready master 13h v1.15.4
k8s-node1 Ready <none> 13h v1.15.4
k8s-node2 Ready <none> 13h v1.15.4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
cat <<  EOF > ~/nginx-deployment.sh
apiVersion: apps/v1 # for versions before 1.9.0 use apps/v1beta2
kind: Deployment
metadata:
name: nginx-deployment
spec:
selector:
matchLabels:
app: nginx
replicas: 2 # tells deployment to run 2 pods matching the template
template:
metadata:
labels:
app: nginx
spec:
containers:
- name: nginx
image: nginx:1.7.9
ports:
- containerPort: 80
EOF

# kubectl get pod
NAME READY STATUS RESTARTS AGE
nginx-deployment-7bfb85948d-j9ll7 1/1 Running 0 30s
nginx-deployment-7bfb85948d-zs8qn 1/1 Running 0 30s

coredns

https://kubernetes.io/docs/tasks/administer-cluster/dns-debugging-resolution/

1
2
3
# kubectl get pod -nkube-system | grep coredns
coredns-5c98db65d4-mnlrr 1/1 Running 0 13h
coredns-5c98db65d4-xfjlb 1/1 Running 4 13h

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
cat << EOF > busybox.yaml
apiVersion: v1
kind: Pod
metadata:
name: busybox
namespace: default
spec:
containers:
- name: busybox
image: busybox:1.28
command:
- sleep
- "3600"
imagePullPolicy: IfNotPresent
restartPolicy: Always
EOF

kubectl apply -f busybox.yaml
kubectl get pods busybox
kubectl exec -ti busybox -- nslookup kubernetes.default

Server: 10.96.0.10
Address 1: 10.96.0.10 kube-dns.kube-system.svc.cluster.local

Name: kubernetes.default
Address 1: 10.96.0.1 kubernetes.default.svc.cluster.local

ingress-nginx

https://github.com/kubernetes/ingress-nginx/blob/master/docs/deploy/index.md

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/master/deploy/static/mandatory.yaml
kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/master/deploy/static/provider/baremetal/service-nodeport.yaml
#kubectl get pods --all-namespaces -l app.kubernetes.io/name=ingress-nginx --watch
NAMESPACE NAME READY STATUS RESTARTS AGE
ingress-nginx nginx-ingress-controller-69969b98db-shr6j 1/1 Running 0 58m

# kubectl get svc -ningress-nginx
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
ingress-nginx NodePort 10.98.91.11 <none> 80:32634/TCP,443:30871/TCP 59m

POD_NAMESPACE=ingress-nginx
POD_NAME=$(kubectl get pods -n $POD_NAMESPACE -l app.kubernetes.io/name=ingress-nginx -o jsonpath='{.items[0].metadata.name}')
kubectl exec -it $POD_NAME -n $POD_NAMESPACE -- /nginx-ingress-controller --version

#curl 10.98.91.11:443
<html>
<head><title>404 Not Found</title></head>
<body>
<center><h1>404 Not Found</h1></center>
<hr><center>openresty/1.15.8.2</center>
</body>
</html>

dashboard

https://github.com/kubernetes/dashboard

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
wget https://raw.githubusercontent.com/kubernetes/dashboard/v1.10.1/src/deploy/recommended/kubernetes-dashboard.yaml
vim kubernetes-dashboard.yaml
.....
image: gcr.azk8s.cn/google_containers/kubernetes-dashboard-amd64:v1.10.1
......
selector:
k8s-app: kubernetes-dashboard
type: NodePort
......

kubectl create -f kubernetes-dashboard.yaml

# kubectl get pods -n kube-system -l k8s-app=kubernetes-dashboard
NAME READY STATUS RESTARTS AGE
kubernetes-dashboard-fcfb4cbc-f84jz 1/1 Running 0 2m9s

# kubectl get svc -n kube-system -l k8s-app=kubernetes-dashboard
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
kubernetes-dashboard NodePort 10.99.252.114 <none> 443:30202/TCP 106s

通过上面的 30202 端口去访问 Dashboard,注意要使用 https,Chrome不能访问可以使用Firefox访问:

使用Firefox访问:https://192.168.10.80:30202
创建具有全局所有权限的用户来登录Dashboard

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
cat << EOF > admin.yaml
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
name: admin
annotations:
rbac.authorization.kubernetes.io/autoupdate: "true"
roleRef:
kind: ClusterRole
name: cluster-admin
apiGroup: rbac.authorization.k8s.io
subjects:
- kind: ServiceAccount
name: admin
namespace: kube-system

---
apiVersion: v1
kind: ServiceAccount
metadata:
name: admin
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
EOF

kubectl apply -f admin.yaml
admin_token=$(kubectl get secret -n kube-system|grep admin-token|awk '{print $1}')
kubectl get secret $admin_token -o jsonpath={.data.token} -n kube-system |base64 -d #生成base64后的字符串
eyJhbGciOiJSUzI1NiIsImtpZCI6IiJ9.eyJpc3MiOiJrdWJlcm5ldGVzL3NlcnZpY2VhY2NvdW50Iiwia3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9uYW1lc3BhY2UiOiJrdWJlLXN5c3RlbSIsImt1YmVybmV0ZXMuaW8vc2VydmljZWFjY291bnQvc2VjcmV0Lm5hbWUiOiJhZG1pbi10b2tlbi1scGRjZyIsImt1YmVybmV0ZXMuaW8vc2VydmljZWFjY291bnQvc2VydmljZS1hY2NvdW50Lm5hbWUiOiJhZG1pbiIsImt1YmVybmV0ZXMuaW8vc2VydmljZWFjY291bnQvc2VydmljZS1hY2NvdW50LnVpZCI6IjE0MWU2NTRmLWZhNWUtNGRkZS1iZGUxLTg3NDg0NTBkNTMyNCIsInN1YiI6InN5c3RlbTpzZXJ2aWNlYWNjb3VudDprdWJlLXN5c3RlbTphZG1pbiJ9.SdKvGpq38TCYJOvQ_O7gax0nouhSeZEuuiA6MqFcxcM_8PwyiUzWq0fSzP5FF0gZdPuujYEh7CsJZYY1QQ22WT-UT5Ds6IHDBhvo0BmF5wPk_mlHUHTlUtdtFF_gZWVTZWsibQg2w2Vipzbza0nWsacRG5DYhEwxNBmUHgLJ53w0TpmZ6nonqD-Sva-BrGaim2mgPwe7trkpqSsUGYuxV7Ncwz8ZlG8S_vmwoO2r_3yc9S-62hSR0GMyrTx1JsESevv9tZzQvT1pW3eLdURa-WmhtM9WDOATQk2hk-JKl6KHVaCdgexKB6rWrelu_8uRmojbSf7SDSOJJqD1HwmwXw

使用上面base64解码后的字符串作为令牌登录Dashboard

dashboard login
dashboard

metrics-server

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
git clone https://github.com/kubernetes-incubator/metrics-server.git 

vim metrics-server/deploy/1.8+/metrics-server-deployment.yaml
containers:
- name: metrics-server
command:
- /metrics-server
- --metric-resolution=30s #kubelet 采集数据的周期
- --kubelet-preferred-address-types=InternalIP,Hostname,InternalDNS,ExternalDNS,ExternalIP # 优先使用 InternalIP 来访问 kubelet,避免节点名称没有 DNS 解析
- --kubelet-insecure-tls
image: gcr.azk8s.cn/google_containers/metrics-server-amd64:v0.3.5

kubectl create -f metrics-server/deploy/1.8+/

# kubectl -n kube-system get pods -l k8s-app=metrics-server
NAME READY STATUS RESTARTS AGE
metrics-server-557db69876-g28m6 1/1 Running 0 55s

# kubectl top nodes
error: metrics not available yet #刚搭建完会没有数据,过几分钟就能收集到数据了

# kubectl top nodes
NAME CPU(cores) CPU% MEMORY(bytes) MEMORY%
k8s-master1 230m 11% 1343Mi 71%
k8s-master2 242m 12% 1280Mi 68%
k8s-master3 316m 15% 1317Mi 70%
k8s-node1 112m 5% 571Mi 41%
k8s-node2 114m 5% 769Mi 55%

#注:dashboard还不支持metrics-server,在dashboard中无法以图形展示Pod的内存和CPU情况

kube-proxy开启ipvs

这里已经在kubeadm init 开启ipvs了

1
2
kubectl describe cm -nkube-system kube-proxy | grep mode
mode: ipvs

后期开启ipvs

1
2
3
4
5
6
7
8
kubectl edit cm kube-proxy -n kube-system #将mode: "" 修改为mode: ipvs
mode: ipvs

#重置各个节点上的kube-proxy pod
kubectl get pod -n kube-system | grep kube-proxy | awk '{system("kubectl delete pod "$1" -n kube-system")}'

kubectl -nkube-system logs kube-proxy-d6rtv | grep ipvs
I1010 09:53:41.124950 1 server_others.go:170] Using ipvs Proxier.

移除node

1
2
3
kubectl drain node5 --delete-local-data --force --ignore-daemonsets
kubectl delete node node5
ssh root@node5 kubeadm reset --force

calico 清除

在相应节点操作

1
2
3
4
5
6
kubeadm reset 
kubeadm reset --cri-socket unix:///run/containerd/containerd.sock -v 5
iptables -F && iptables -t nat -F && iptables -t mangle -F && iptables -X
ipvsadm -C
modprobe -r ipip #删除tunl0接口
rm -rf /etc/cni/net.d/

节点维护

https://kubernetes.io/docs/tasks/administer-cluster/cluster-management/#maintenance-on-a-node

如节点内核升级或关机硬件修复,超过了5分钟以上(默认时间为5分钟,由--pod-eviction-timeout controller-manager 控制),则将此节点pod Terminating。如果pod存在相应的副本集,则在其他节点上启动Pod的新副本。

终止节点上的所有pod,及设置节点不可调度:

1
kubectl drain $NODENAME

使节点可再次调度:

1
kubectl uncordon $NODENAME

Logs flooded with systemd messages: Created slice libcontainer_70542_systemd_test_default.slice

1
2
echo 'if $programname == "systemd" and ($msg contains "Starting Session" or $msg contains "Started Session" or $msg contains "Created slice" or $msg contains "Starting user-" or $msg contains "Starting User Slice of" or $msg contains "Removed session" or $msg contains "Removed slice User Slice of" or $msg contains "Stopping User Slice of") then stop' >/etc/rsyslog.d/ignore-systemd-session-slice.conf 
systemctl restart rsyslog

https://access.redhat.com/solutions/1564823

cpu: 100m # 0.1 core 1000M #1G
memory: 100Mi # 100M

镜像修改脚本

上文是直接使用gcr.azk8s.cn/google_containers,
若要使用原生k8s.gcr.io,则下载镜像后打tag

master 镜像下载

1
2
3
4
5
6
7
8
9
10
cat << EOF > ~/pull_master_images.sh
k8s_repo="gcr.azk8s.cn/google_containers"
kubeadm config images pull --image-repository=$k8s_repo --kubernetes-version="v1.15.4"

#将镜像修改回k8s.gcr.io
k8s_repo="gcr.azk8s.cn/google_containers"
docker images | grep $k8s_repo | awk '{print $1":"$2}' | sed 's#\('"$k8s_repo"'\)\(.*\)#docker tag \1\2 k8s.gcr.io\2#g' > ~/k8s_repo.sh && bash ~/k8s_repo.sh && rm -f ~/k8s_repo.sh
docker images | grep $k8s_repo | awk '{print $1":"$2}' | sed 's#'"\($k8s_repo\)"'#docker rmi \1#g' > ~/k8s_image_rm.sh && bash ~/k8s_image_rm.sh && rm -f ~/k8s_image_rm.sh
EOF
bash ~/pull_master_images.sh

worker 镜像下载

1
2
3
4
5
6
7
cat << EOF > ~/pull_worker_images.sh
k8s_repo="gcr.azk8s.cn/google_containers"
docker pull $k8s_repo/coredns:1.3.1
docker pull $k8s_repo/pause:3.1
docker images | grep $k8s_repo | awk '{print $1":"$2}' | sed 's#'"\($k8s_repo\)"'#docker rmi \1#g' > ~/k8s_image_rm.sh && bash ~/k8s_image_rm.sh && rm -f ~/k8s_image_rm.sh
EOF
bash ~/pull_worker_images.sh

efk

1
2
3
4
5
6
7
8
mkdir elk && cd elk
wget https://raw.githubusercontent.com/kubernetes/kubernetes/master/cluster/addons/fluentd-elasticsearch/es-service.yaml
wget https://raw.githubusercontent.com/kubernetes/kubernetes/master/cluster/addons/fluentd-elasticsearch/es-statefulset.yaml
wget https://raw.githubusercontent.com/kubernetes/kubernetes/master/cluster/addons/fluentd-elasticsearch/fluentd-es-configmap.yaml
wget https://raw.githubusercontent.com/kubernetes/kubernetes/master/cluster/addons/fluentd-elasticsearch/fluentd-es-ds.yaml
wget https://raw.githubusercontent.com/kubernetes/kubernetes/master/cluster/addons/fluentd-elasticsearch/kibana-deployment.yaml
wget https://raw.githubusercontent.com/kubernetes/kubernetes/master/cluster/addons/fluentd-elasticsearch/kibana-service.yaml
kubectl apply -f .
1
2
3
4
5
6
# kubectl get pods -n kube-system -o wide|grep -E 'elasticsearch|fluentd|kibana'
elasticsearch-logging-0 1/1 Running 0 101m 10.244.169.157 k8s-node2 <none> <none>
elasticsearch-logging-1 1/1 Running 0 100m 10.244.36.108 k8s-node1 <none> <none>
fluentd-es-v2.7.0-8z6w7 1/1 Running 1 19h 10.244.36.107 k8s-node1 <none> <none>
fluentd-es-v2.7.0-hgj2d 1/1 Running 1 19h 10.244.169.156 k8s-node2 <none> <none>
kibana-logging-7b97c764f6-5v8nl 1/1 Running 2 18h 10.244.36.106 k8s-node1 <none> <none>
1
2
3
# kubectl get service  -n kube-system|grep -E 'elasticsearch|kibana'
elasticsearch-logging ClusterIP 10.106.132.112 <none> 9200/TCP 19h
kibana-logging ClusterIP 10.106.77.83 <none> 5601/TCP 19h
1
kubectl logs -l  k8s-app=kibana-logging -nkube-system
1
2
3
4
5
6
7
8
# kubectl cluster-info
Kubernetes master is running at https://192.168.10.80:8443
Elasticsearch is running at https://192.168.10.80:8443/api/v1/namespaces/kube-system/services/elasticsearch-logging/proxy
Kibana is running at https://192.168.10.80:8443/api/v1/namespaces/kube-system/services/kibana-logging/proxy
KubeDNS is running at https://192.168.10.80:8443/api/v1/namespaces/kube-system/services/kube-dns:dns/proxy
Metrics-server is running at https://192.168.10.80:8443/api/v1/namespaces/kube-system/services/https:metrics-server:/proxy

kubectl proxy --address='192.168.10.80' --port=8086 --accept-hosts='^*$' &

访问Kibana http://192.168.10.80:8086/api/v1/namespaces/kube-system/services/kibana-logging/proxy