Kubernetes 1.15.4 二进制高可用安装

Posted by Sunday on 2019-08-15

环境准备

OS: CentOS 7.5
Kubernetes: 1.15.4
Docker: 18.09
Etcd: 3.4.1

主机名 IP / VIP 组件
k8s-master 192.168.10.81 / 192.168.10.80 haproxy、keepalived、etcd、kube-apiserver、kube-controllet-manager、kube-scheduler、node
k8s-master 192.168.10.82 / 192.168.10.80 haproxy、keepalived、etcd、kube-apiserver、kube-controllet-manager、kube-scheduler、node
k8s-master 192.168.10.83 / 192.168.10.80 haproxy、keepalived、etcd、kube-apiserver、kube-controllet-manager、kube-scheduler、node
k8s-node01 192.168.10.84 kubelet、kube-proxy、docker、flanneld、core-dns
k8s-node02 192.168.10.85 kubelet、kube-proxy、docker、flanneld、core-dns
服务 网络

service network | 10.96.0.0/12
service dns | 10.96.0.10
pod network | 10.244.0.0/16

host解析

1
2
3
4
5
6
7
cat >> /etc/hosts <<EOF
192.168.10.81 k8s-master01
192.168.10.82 k8s-master02
192.168.10.83 k8s-master03
192.168.10.84 k8s-node01
192.168.10.85 k8s-node02
EOF

配置免密钥登陆

1
2
3
4
5
6
7
8
9
10
11
12
13
14
yum install -y expect
ssh-keygen -t rsa -P "" -f /root/.ssh/id_rsa
export mypass=123456
name=(k8s-master1 k8s-master2 k8s-master3 k8s-node1 k8s-node2)

for i in ${name[@]};do
expect -c "
spawn ssh-copy-id -i /root/.ssh/id_rsa.pub root@$i
expect {
\"*yes/no*\" {send \"yes\r\"; exp_continue}
\"*password*\" {send \"$mypass\r\"; exp_continue}
\"*Password*\" {send \"$mypass\r\";}
}"
done

配置主机名

1
2
name=(k8s-master01 k8s-master02 k8s-master03 k8s-node01 k8s-node02)
for i in ${name[@]};do ssh root@$i hostnamectl set-hostname $i;done

关闭防火墙、selinux、swap

1
2
3
4
5
6
7
8
systemctl stop firewalld
systemctl disable firewalld
iptables -F && iptables -X && iptables -F -t nat && iptables -X -t nat
iptables -P FORWARD ACCEPT
swapoff -a
sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab
setenforce 0
sed -i 's/^SELINUX=.*/SELINUX=disabled/' /etc/selinux/config

安装依赖

1
yum install -y ntpdate curl wget

时间同步

1
echo "*/30 * * * * ntpdate time7.aliyun.com >/dev/null 2>&1" >> /var/spool/cron/root

升级内核

1
2
3
4
5
6
7
#Docker overlay2需要使用kernel 4.x版本
rpm -Uvh http://www.elrepo.org/elrepo-release-7.0-3.el7.elrepo.noarch.rpm
yum --enablerepo=elrepo-kernel install -y kernel-lt
grub2-set-default 0
grub2-mkconfig -o /etc/grub2.cfg
grubby --default-kernel
reboot

优化内核参数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
cat >/etc/sysctl.d/kubernetes.conf <<EOF
net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.ipv4.ip_forward=1
net.ipv4.tcp_tw_recycle=0
vm.swappiness=0 # 禁止使用 swap 空间,只有当系统 OOM 时才允许使用它
vm.overcommit_memory=1 # 不检查物理内存是否够用
vm.panic_on_oom=0 # 开启 OOM
fs.inotify.max_user_instances=8192
fs.inotify.max_user_watches=1048576
fs.file-max=52706963
fs.nr_open=52706963
net.ipv6.conf.all.disable_ipv6=1
net.netfilter.nf_conntrack_max=2310720
EOF

sysctl -p

IPVS

1
2
3
4
5
6
7
8
9
10
11
yum install -y conntrack ipvsadm ipset 
cat >/etc/modules-load.d/ipvs.conf << EOF
ip_vs
ip_vs_rr
ip_vs_wrr
ip_vs_sh
nf_conntrack
EOF

modprobe ip_vs_rr nf_conntrack
lsmod | egrep "ip_vs_rr|nf_conntrack"

环境变量文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
cd /etc/kubernetes
cat << "EOF" > env.sh
export MASTER_IPS=(192.168.10.81 192.168.10.82 192.168.10.83)
export NODE_IPS=(192.168.10.81 192.168.10.82 192.168.10.83 192.168.10.84 192.168.10.85)
NODE_NAMES=(k8s-master01 k8s-master02 k8s-master03 k8s-node01 k8s-node02)

export ETCD_IPS=(192.168.10.81 192.168.10.82 192.168.10.83)
export ETCD_NAMES=("etcd-0" "etcd-1" "etcd-2")
export ETCD_ENDPOINTS="https://192.168.10.81:2379,https://192.168.10.82:2379,https://192.168.10.83:2379"

export KUBE_SERVICE_IP="10.96.0.1" #SERVICE_CIDR="10.96.0.0/12"中第一个IP
export KUBE_APISERVER_IP="192.168.10.80" #高可用IP
export KUBE_APISERVER_URL="https://192.168.10.80:8443" #高可用URL
export ELB_APISERVER_DOMAIN="kube-apiserver.elb.sundayle.com" # ELB域名
export SERVICE_CIDR="10.96.0.0/12"
export POD_CIDR="10.244.0.0/16"
export NODE_PORT_RANGE="30000-32767"
export DNS_SERVICE_IP="10.96.0.10"
export GCR_MIRROR="gcr.azk8s.cn/google_containers"
export QUAY_MIRROR="quay.azk8s.cn"
export DNS_DOMAIN="cluster.local"
export ENCRYPTION_KEY=$(head -c 32 /dev/urandom | base64)
EOF

部署cfssl

1
2
3
curl -s -L -o /usr/local/bin/cfssl https://pkg.cfssl.org/R1.2/cfssl_linux-amd64
curl -s -L -o /usr/local/bin/cfssljson https://pkg.cfssl.org/R1.2/cfssljson_linux-amd64
chmod +x /usr/local/bin/{cfssl,cfssljson}

配置证书

只需创建一个CA证书,后续创建的所有证书(kubernetes+etcd)都是由它签名

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
mkdir -p /etc/kubernetes/ssl
cd /etc/kubernetes/ssl
cat > ca-config.json <<EOF
{
"signing": {
"default": {
"expiry": "876000h"
},
"profiles": {
"kubernetes": {
"usages": [
"signing",
"key encipherment",
"server auth",
"client auth"
],
"expiry": "876000h"
}
}
}
}
EOF

# 创建证书签名请求文件
cat > ca-csr.json <<EOF
{
"CN": "kubernetes",
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "Guangzhou",
"L": "Guangzhou",
"O": "k8s",
"OU": "System"
}
],
"ca": {
"expiry": "876000h"
}
}
EOF

# 生成CA证书和私钥
cfssl gencert -initca ca-csr.json | cfssljson -bare ca

source /etc/kubernetes/env.sh
for NODE in ${NODE_IPS[@]};do
ssh root@$NODE "mkdir -p /etc/kubernetes/ssl"
rsync /etc/kubernetes/ssl/ca.pem root@${NODE}:/etc/etcd/ssl/
done

etcd证书

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
cd /etc/kubernetes/ssl
cat > etcd-csr.json <<EOF
{
"CN": "etcd",
"hosts": [
"127.0.0.1",
"192.168.10.81",
"192.168.10.82",
"192.168.10.83"
],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "Guangzhou",
"L": "Guangzhou",
"O": "k8s",
"OU": "System"
}
]
}
EOF

# 生成证书和私钥
cfssl gencert -ca=ca.pem \
-ca-key=ca-key.pem \
-config=ca-config.json \
-profile=kubernetes etcd-csr.json | cfssljson -bare etcd

分发至etcd master

1
2
3
4
5
6
source /etc/kubernetes/env.sh
for NODE in ${ETCD_IPS[@]};do
ssh root@$NODE "mkdir -p /etc/etcd/ssl"
rsync /etc/kubernetes/ssl/ca*.pem root@${NODE}:/etc/etcd/ssl/
rsync /etc/etcd/ssl/etcd*.pem root@${NODE}:/etc/etcd/ssl/
done

部署etcd集群

etcd 是k8s集群最重要的组件,用来存储k8s的所有服务信息, etcd 挂了,集群就挂了,我们这里把etcd部署在master三台节点上做高可用,etcd集群采用raft算法选举Leader, 由于Raft算法在做决策时需要多数节点的投票,所以etcd一般部署集群推荐奇数个节点,推荐的数量为3、5或者7个节点构成一个集群

https://github.com/coreos/etcd/releases

1
2
3
4
5
6
7
8
9
10
# 下载和分发
cd /usr/local/src
wget https://github.com/etcd-io/etcd/releases/download/v3.4.1/etcd-v3.4.1-linux-amd64.tar.gz
tar xf etcd-v3.4.1-linux-amd64.tar.gz

source /etc/kubernetes/env.sh
for NODE in ${ETCD_IPS[@]};do
ssh root@${NODE} "mkdir -p /usr/local/k8s/bin"
rsync etcd-v3.4.1-linux-amd64/etc* root@${NODE}:/usr/local/k8s/bin/
done

etcd.service

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
source /etc/kubernetes/env.sh
#ETCD_IPS=(192.168.10.81 192.168.10.82 192.168.10.83)
#ETCD_NAMES=("etcd-0" "etcd-1" "etcd-2")
for i in "${!ETCD_IPS[@]}"; do
HOST=${ETCD_IPS[$i]}
NAME=${ETCD_NAMES[$i]}

cat << EOF > ~/etcd.service.${HOST}
[Unit]
Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target
[Service]
Type=notify
ExecStart=/usr/local/k8s/bin/etcd \\
--data-dir=/data/etcd \\
--name=${NAME} \\
--trusted-ca-file=/etc/etcd/ssl/ca.pem \\
--cert-file=/etc/etcd/ssl/etcd.pem \\
--key-file=/etc/etcd/ssl/etcd-key.pem \\
--peer-trusted-ca-file=/etc/etcd/ssl/ca.pem \\
--peer-cert-file=/etc/etcd/ssl/etcd.pem \\
--peer-key-file=/etc/etcd/ssl/etcd-key.pem \\
--peer-client-cert-auth \\
--client-cert-auth \\
--listen-peer-urls=https://${HOST}:2380 \\
--initial-advertise-peer-urls=https://${HOST}:2380 \\
--listen-client-urls=https://${HOST}:2379,http://127.0.0.1:2379 \\
--advertise-client-urls=https://${HOST}:2379 \\
--initial-cluster-token=etcd-cluster-0 \\
--initial-cluster=${ETCD_NAMES[0]}=https://${ETCD_IPS[0]}:2380,${ETCD_NAMES[1]}=https://${ETCD_IPS[1]}:2380,${ETCD_NAMES[2]}=https://${ETCD_IPS[2]}:2380 \\
--initial-cluster-state=new \\
--auto-compaction-mode=periodic \\
--auto-compaction-retention=1 \\
--max-request-bytes=33554432 \\
--quota-backend-bytes=6442450944 \\
--heartbeat-interval=250 \\
--election-timeout=2000
Restart=on-failure
RestartSec=5
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
EOF
done

# 分发
source /etc/kubernetes/env.sh
for NODE in ${ETCD_IPS[@]};do
rsync ~/etcd.service.${NODE} root@${NODE}:/etc/systemd/system/etcd.service
ssh root@${NODE} "mkdir -p /data/etcd"
done

for NODE in ${ETCD_IPS[@]};do
ssh root@${NODE} "systemctl enable etcd --now"
done

#etcd首次进程启动会等待其他节点加入etcd集群,执行启动命令会卡顿一会,为正常现象
#journalctl -fu etcd 查看报错

# 验证ETCD集群状态
source /etc/kubernetes/env.sh
ETCDCTL_API=3 /usr/local/k8s/bin/etcdctl \
--endpoints=${ETCD_ENDPOINTS} \
--cacert=/etc/etcd/ssl/ca.pem \
--cert=/etc/etcd/ssl/etcd.pem \
--key=/etc/etcd/ssl/etcd-key.pem endpoint health

# 查看etcd集群leader
source /etc/kubernetes/env.sh
ETCDCTL_API=3 /usr/local/k8s/bin/etcdctl \
-w table --cacert=/etc/etcd/ssl/ca.pem \
--cert=/etc/etcd/ssl/etcd.pem \
--key=/etc/etcd/ssl/etcd-key.pem \
--endpoints=${ETCD_ENDPOINTS} endpoint status

备份etcd

1
2
3
4
5
6
7
8
9
10
11
12
13
14
#!/bin/bash
BACKUP_DIR=/data/bak/etcd
DATE=$(date +%Y%m%d%H%M)
[[ ! -d $BACKUP_DIR ]] && mkdir -p $BACKUP_DIR

ETCD_ENDPOINTS="https://192.168.10.21:2379"
ETCDCTL_API=3 /usr/local/k8s/bin/etcdctl \
--endpoints=${ETCD_ENDPOINTS} \
--cacert=/etc/etcd/ssl/ca.pem \
--cert=/etc/etcd/ssl/etcd.pem \
--key=/etc/etcd/ssl/etcd-key.pem snapshot save $BACKUP_DIR/snap-$DATE.db

result=$?
[ "$result" == 0 ] && find $BACKUP_DIR -mtime +7 | xargs rm -rf

部署高可用

部署haproxy

  • 使用haproxy实现k8s节点(master节点和node节点)高可用访问kube-apiserver的步骤
  • 控制节点的kube-controller-manager、kube-scheduler是多实例部署,所以只要一个实例正常,就可以保证集群高可用
  • 集群内的Pod使用k8s服务域名kubernetes访问kube-apiserver,kube-dns会自动解析多个kube-apiserver节点的IP,所以也是高可用的

配置haproxy

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
yum install -y haproxy
cat > /etc/haproxy/haproxy.cfg <<EOF
global
log 127.0.0.1 local2
chroot /var/lib/haproxy
pidfile /var/run/haproxy.pid
maxconn 4000
user haproxy
group haproxy
daemon

# turn on stats unix socket
stats socket /var/lib/haproxy/stats

defaults
mode http
log global
option httplog
option dontlognull
option http-server-close
option forwardfor except 127.0.0.0/8
option redispatch
retries 3
timeout http-request 10s
timeout queue 1m
timeout connect 10s
timeout client 1m
timeout server 1m
timeout http-keep-alive 10s
timeout check 10s
maxconn 3000

frontend kubernetes-apiserver
mode tcp
bind *:8443
option tcplog
default_backend kubernetes-apiserver

backend kubernetes-apiserver
mode tcp
balance roundrobin
server k8s01 192.168.10.81:6443 check
server k8s02 192.168.10.82:6443 check
server k8s03 192.168.10.83:6443 check

listen stats
bind *:1080
stats auth admin:sunday
stats refresh 5s
stats realm HAProxy\ Statistics
stats uri /haproxy-stats
EOF

# 分发
source /etc/kubernetes/env.sh
for NODE in ${MASTER_IPS[@]};do
ssh root@${NODE} "yum install -y haproxy"
rsync /etc/haproxy/haproxy.cfg ${NODE}:/etc/haproxy/
ssh root@${NODE} "systemctl enable --now haproxy"
done

ss -tunlp |grep 8443

部署keeplived
高可用方案需要一个VIP,供集群内部访问

配置keeplived服务

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
yum install -y keepalived psmisc
cat > /etc/keepalived/keepalived.conf <<EOF
! Configuration File for keepalived
global_defs {
router_id keepalived
}

vrrp_script check_haproxy {
script "killall -0 haproxy"
interval 2
weight -3
fall 2
rise 2
}

vrrp_instance VI_1 {
state MASTER
interface eth0
virtual_router_id 247
priority 100
advert_int 1
authentication {
auth_type PASS
auth_pass Sunday
}
virtual_ipaddress {
192.168.10.80
}
track_script {
check_haproxy
}
}
EOF

# 分发
source /etc/kubernetes/env.sh
for NODE in ${MASTER_IPS[@]};do
ssh root@${NODE} "yum install -y keepalived psmisc"
rsync /etc/keepalived/keepalived.conf $NODE:/etc/keepalived/keepalived.conf
done

# 替换
ssh root@192.168.10.82 "sed -i -e 's#MASTER#BACKUP#g' -e 's#priority 100#priority 99#g' /etc/keepalived/keepalived.conf"
ssh root@192.168.10.83 "sed -i -e 's#MASTER#BACKUP#g' -e 's#priority 100#priority 98#g' /etc/keepalived/keepalived.conf"

# 启动keeplived
source /etc/kubernetes/env.sh
for NODE in ${MASTER_IPS[@]}; do
ssh $NODE 'systemctl enable --now keepalived'
done

# 启动完毕后ping 192.168.10.80 (VIP)
# ping -c 2 192.168.10.80
PING 192.168.10.80 (192.168.10.80) 56(84) bytes of data.
64 bytes from 192.168.10.80: icmp_seq=1 ttl=64 time=0.045 ms
64 bytes from 192.168.10.80: icmp_seq=2 ttl=64 time=0.052 ms

部署master二进制

1
2
3
cat << EOF > /etc/profile.d/k8s.sh 
export PATH=/usr/local/k8s/bin:\$PATH
EOF

分发master二进制

1
2
3
4
5
6
7
8
9
cd /usr/local/src/
wget https://storage.googleapis.com/kubernetes-release/release/v1.15.4/kubernetes-server-linux-amd64.tar.gz
tar -xf kubernetes-server-linux-amd64.tar.gz

source /etc/kubernetes/env.sh
for NODE in ${MASTER_IPS[@]};do
rsync /etc/profile.d/k8s.sh root@${NODE}:/etc/profile.d/
rsync kubernetes/server/bin/{kube-apiserver,kube-controller-manager,kube-scheduler,kubeadm,kubectl} root@${NODE}:/usr/local/k8s/bin/
done

部署kubectl

创建admin证书

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
cd /etc/kubernetes/ssl/
cat > admin-csr.json <<EOF
{
"CN": "admin",
"hosts": [],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "Guangzhou",
"L": "Guangzhou",
"O": "system:masters",
"OU": "System"
}
]
}
EOF

#生成证书和私钥
cfssl gencert -ca=ca.pem \
-ca-key=ca-key.pem \
-config=ca-config.json \
-profile=kubernetes admin-csr.json | cfssljson -bare admin

生成kubeconfig 配置文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
cd /etc/kubernetes/ssl
source /etc/kubernetes/env.sh

# 设置集群参数
kubectl config set-cluster kubernetes \
--certificate-authority=ca.pem \
--embed-certs=true \
--server=${KUBE_APISERVER_URL} \
--kubeconfig=/etc/kubernetes/kubectl.config

#设置客户端认证参数
kubectl config set-credentials admin \
--client-certificate=admin.pem \
--client-key=admin-key.pem \
--embed-certs=true \
--kubeconfig=/etc/kubernetes/kubectl.config

# 设置上下文参数
kubectl config set-context kubernetes \
--cluster=kubernetes \
--user=admin \
--kubeconfig=/etc/kubernetes/kubectl.config

# 设置默认上下文
kubectl config use-context kubernetes --kubeconfig=/etc/kubernetes/kubectl.config

# 分发
source /etc/kubernetes/env.sh
for NODE in ${MASTER_IPS[@]};do
ssh root@${NODE} "mkdir -p ~/.kube"
rsync /etc/kubernetes/kubectl.config root@${NODE}:~/.kube/config
rsync /etc/kubernetes/ssl/admin*.pem root@${NODE}:/etc/kubernetes/ssl/
done

部署kube-apiserver

创建kube-apiserver证书和私钥

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
source /etc/kubernetes/env.sh
cd /etc/kubernetes/ssl/
cat > kube-apiserver-csr.json <<EOF
{
"CN": "kubernetes",
"hosts": [
"127.0.0.1",
"192.168.10.81",
"192.168.10.82",
"192.168.10.83",
"192.168.10.84",
"192.168.10.85",
"192.168.10.86",
"192.168.10.87",
"192.168.10.89",
"192.168.10.90",
"${ELB_APISERVER_DOMAIN}",
"${KUBE_APISERVER_IP}",
"${KUBE_SERVICE_IP}",
"kubernetes",
"kubernetes.default",
"kubernetes.default.svc",
"kubernetes.default.svc.cluster",
"kubernetes.default.svc.cluster.local."
],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "Guangzhou",
"L": "Guangzhou",
"O": "k8s",
"OU": "System"
}
]
}
EOF

# 生成证书和私钥
cfssl gencert -ca=ca.pem \
-ca-key=ca-key.pem \
-config=ca-config.json \
-profile=kubernetes kube-apiserver-csr.json | cfssljson -bare kube-apiserver

创建访问 metrics-server 证书

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
cd /etc/kubernetes/ssl/
cat > proxy-client-csr.json <<EOF
{
"CN": "aggregator",
"hosts": [],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "Guangzhou",
"L": "Guangzhou",
"O": "k8s",
"OU": "System"
}
]
}
EOF

# 生成证书和私钥
cfssl gencert -ca=ca.pem \
-ca-key=ca-key.pem \
-config=ca-config.json \
-profile=kubernetes proxy-client-csr.json | cfssljson -bare proxy-client

创建加密配置文件

1
2
3
4
5
6
7
8
9
10
11
12
13
cat > /etc/kubernetes/encryption-config.yaml <<EOF
kind: EncryptionConfig
apiVersion: v1
resources:
- resources:
- secrets
providers:
- aescbc:
keys:
- name: key1
secret: ${ENCRYPTION_KEY}
- identity: {}
EOF

创建审计策略文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
cat > /etc/kubernetes/audit-policy.yaml <<EOF
apiVersion: audit.k8s.io/v1beta1
kind: Policy
rules:
# The following requests were manually identified as high-volume and low-risk, so drop them.
- level: None
resources:
- group: ""
resources:
- endpoints
- services
- services/status
users:
- 'system:kube-proxy'
verbs:
- watch
- level: None
resources:
- group: ""
resources:
- nodes
- nodes/status
userGroups:
- 'system:nodes'
verbs:
- get
- level: None
namespaces:
- kube-system
resources:
- group: ""
resources:
- endpoints
users:
- 'system:kube-controller-manager'
- 'system:kube-scheduler'
- 'system:serviceaccount:kube-system:endpoint-controller'
verbs:
- get
- update
- level: None
resources:
- group: ""
resources:
- namespaces
- namespaces/status
- namespaces/finalize
users:
- 'system:apiserver'
verbs:
- get
# Don't log HPA fetching metrics.
- level: None
resources:
- group: metrics.k8s.io
users:
- 'system:kube-controller-manager'
verbs:
- get
- list
# Don't log these read-only URLs.
- level: None
nonResourceURLs:
- '/healthz*'
- /version
- '/swagger*'
# Don't log events requests.
- level: None
resources:
- group: ""
resources:
- events
# node and pod status calls from nodes are high-volume and can be large, don't log responses for expected updates from nodes
- level: Request
omitStages:
- RequestReceived
resources:
- group: ""
resources:
- nodes/status
- pods/status
users:
- kubelet
- 'system:node-problem-detector'
- 'system:serviceaccount:kube-system:node-problem-detector'
verbs:
- update
- patch
- level: Request
omitStages:
- RequestReceived
resources:
- group: ""
resources:
- nodes/status
- pods/status
userGroups:
- 'system:nodes'
verbs:
- update
- patch
# deletecollection calls can be large, don't log responses for expected namespace deletions
- level: Request
omitStages:
- RequestReceived
users:
- 'system:serviceaccount:kube-system:namespace-controller'
verbs:
- deletecollection
# Secrets, ConfigMaps, and TokenReviews can contain sensitive & binary data,
# so only log at the Metadata level.
- level: Metadata
omitStages:
- RequestReceived
resources:
- group: ""
resources:
- secrets
- configmaps
- group: authentication.k8s.io
resources:
- tokenreviews
# Get repsonses can be large; skip them.
- level: Request
omitStages:
- RequestReceived
resources:
- group: ""
- group: admissionregistration.k8s.io
- group: apiextensions.k8s.io
- group: apiregistration.k8s.io
- group: apps
- group: authentication.k8s.io
- group: authorization.k8s.io
- group: autoscaling
- group: batch
- group: certificates.k8s.io
- group: extensions
- group: metrics.k8s.io
- group: networking.k8s.io
- group: policy
- group: rbac.authorization.k8s.io
- group: scheduling.k8s.io
- group: settings.k8s.io
- group: storage.k8s.io
verbs:
- get
- list
- watch
# Default level for known APIs
- level: RequestResponse
omitStages:
- RequestReceived
resources:
- group: ""
- group: admissionregistration.k8s.io
- group: apiextensions.k8s.io
- group: apiregistration.k8s.io
- group: apps
- group: authentication.k8s.io
- group: authorization.k8s.io
- group: autoscaling
- group: batch
- group: certificates.k8s.io
- group: extensions
- group: metrics.k8s.io
- group: networking.k8s.io
- group: policy
- group: rbac.authorization.k8s.io
- group: scheduling.k8s.io
- group: settings.k8s.io
- group: storage.k8s.io
# Default level for all other requests.
- level: Metadata
omitStages:
- RequestReceived
EOF

kube-apiserver.service

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
source /etc/kubernetes/env.sh
for i in "${!MASTER_IPS[@]}"; do
NODE=${MASTER_IPS[$i]}
cat > ~/kube-apiserver.service.$NODE <<EOF
[Unit]
Description=Kubernetes API Server
Documentation=https://github.com/GoogleCloudPlatform/kubernetes
After=network.target
[Service]
ExecStart=/usr/local/k8s/bin/kube-apiserver \\
--advertise-address=${NODE} \\
--default-not-ready-toleration-seconds=300 \\
--default-unreachable-toleration-seconds=300 \\
--feature-gates=DynamicAuditing=true \\
--max-mutating-requests-inflight=2000 \\
--max-requests-inflight=4000 \\
--default-watch-cache-size=200 \\
--delete-collection-workers=2 \\
--encryption-provider-config=/etc/kubernetes/encryption-config.yaml \\
--etcd-cafile=/etc/kubernetes/ssl/ca.pem \\
--etcd-certfile=/etc/kubernetes/ssl/kube-apiserver.pem \\
--etcd-keyfile=/etc/kubernetes/ssl/kube-apiserver-key.pem \\
--etcd-servers=${ETCD_ENDPOINTS} \\
--bind-address=${NODE} \\
--secure-port=6443 \\
--tls-cert-file=/etc/kubernetes/ssl/kube-apiserver.pem \\
--tls-private-key-file=/etc/kubernetes/ssl/kube-apiserver-key.pem \\
--insecure-port=0 \\
--audit-dynamic-configuration \\
--audit-log-maxage=15 \\
--audit-log-maxbackup=3 \\
--audit-log-maxsize=100 \\
--audit-log-truncate-enabled \\
--audit-log-path=/data/logs/kube-apiserver/audit.log \\
--audit-policy-file=/etc/kubernetes/audit-policy.yaml \\
--profiling \\
--anonymous-auth=false \\
--client-ca-file=/etc/kubernetes/ssl/ca.pem \\
--enable-bootstrap-token-auth \\
--requestheader-allowed-names="aggregator" \\
--requestheader-client-ca-file=/etc/kubernetes/ssl/ca.pem \\
--requestheader-extra-headers-prefix="X-Remote-Extra-" \\
--requestheader-group-headers=X-Remote-Group \\
--requestheader-username-headers=X-Remote-User \\
--service-account-key-file=/etc/kubernetes/ssl/ca.pem \\
--authorization-mode=Node,RBAC \\
--runtime-config=api/all=true \\
--enable-admission-plugins=NamespaceLifecycle,LimitRanger,ServiceAccount,ResourceQuota,NodeRestriction \\
--allow-privileged=true \\
--apiserver-count=3 \\
--event-ttl=168h \\
--kubelet-certificate-authority=/etc/kubernetes/ssl/ca.pem \\
--kubelet-client-certificate=/etc/kubernetes/ssl/kube-apiserver.pem \\
--kubelet-client-key=/etc/kubernetes/ssl/kube-apiserver-key.pem \\
--kubelet-https=true \\
--kubelet-timeout=10s \\
--proxy-client-cert-file=/etc/kubernetes/ssl/proxy-client.pem \\
--proxy-client-key-file=/etc/kubernetes/ssl/proxy-client-key.pem \\
--service-cluster-ip-range=${SERVICE_CIDR} \\
--service-node-port-range=${NODE_PORT_RANGE} \\
--logtostderr=true \\
--v=2
Restart=on-failure
RestartSec=10
Type=notify
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
EOF
done

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# 分发
cd /etc/kubernetes/ssl
for NODE in ${MASTER_IPS[@]};do
rsync /etc/kubernetes/ssl/ca*.pem root@${NODE}:/etc/kubernetes/ssl/
rsync proxy-client*.pem root@${NODE}:/etc/kubernetes/ssl/
rsync /etc/kubernetes/ssl/kube-apiserver*.pem root@${NODE}:/etc/kubernetes/ssl/
rsync encryption-config.yaml audit-policy.yaml root@${NODE}:/etc/kubernetes/
rsync ~/kube-apiserver.service.${NODE} root@${NODE}:/etc/systemd/system/kube-apiserver.service
done

# 启动apiserver
for NODE in ${MASTER_IPS[@]};do
ssh root@${NODE} "mkdir -p /data/logs/kube-apiserver"
ssh root@${NODE} "systemctl daemon-reload && systemctl enable kube-apiserver --now"
done

# 检查服务是否正常
for NODE in ${MASTER_IPS[@]};do
ssh root@${NODE} "systemctl status kube-apiserver |grep 'Active:'"
done

# 确保状态为active (running),否则查看日志,确认原因
journalctl -u kube-apiserver

# 打印kube-apiserver写入etcd数据
source /etc/kubernetes/env.sh
ETCDCTL_API=3 etcdctl \
--endpoints=${ETCD_ENDPOINTS} \
--cacert=/etc/etcd/ssl/ca.pem \
--cert=/etc/etcd/ssl/etcd.pem \
--key=/etc/etcd/ssl/etcd-key.pem \
get /registry/ --prefix --keys-only


# 检查kube-apiserver监听的端口
# netstat -lntup|grep kube
tcp 0 0 192.168.10.81:6443 0.0.0.0:* LISTEN 2069/kube-apiserver

# 检查集群信息
# kubectl cluster-info
Kubernetes master is running at https://192.168.10.80:8443

To further debug and diagnose cluster problems, use 'kubectl cluster-info dump'.

# kubectl get all --all-namespaces
NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
default service/kubernetes ClusterIP 10.96.0.1 <none> 443/TCP 57m

# kubectl get cs
NAME STATUS MESSAGE ERROR
controller-manager Unhealthy Get http://127.0.0.1:10252/healthz: dial tcp 127.0.0.1:10252: connect: connection refused
scheduler Unhealthy Get http://127.0.0.1:10251/healthz: dial tcp 127.0.0.1:10251: connect: connection refused
etcd-0 Healthy {"health":"true"}
etcd-2 Healthy {"health":"true"}
etcd-1 Healthy {"health":"true"}

#如果提示有报错,请检查~/.kube/config以及配置证书是否有问题

授权kube-apiserver访问kubelet API的权限

1
kubectl create clusterrolebinding kube-apiserver:kubelet-apis --clusterrole=system:kubelet-api-admin --user kubernetes

部署kube-controller-manager

三台 kube-controller-manager 集群,启动后通过竞争选举机制产生一个leader节点,其他节点为阻塞状态。当leader节点不可用时,阻塞节点将会在此选举产生新的leader,从而保证服务的高可用。

创建kube-controller-manager证书

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
cd /etc/kubernetes/ssl/
cat > kube-controller-manager-csr.json <<EOF
{
"CN": "system:kube-controller-manager",
"key": {
"algo": "rsa",
"size": 2048
},
"hosts": [
"127.0.0.1",
"192.168.10.81",
"192.168.10.82",
"192.168.10.83"
],
"names": [
{
"C": "CN",
"ST": "Guangzhou",
"L": "Guangzhou",
"O": "system:kube-controller-manager",
"OU": "System"
}
]
}
EOF

# 生成证书和私钥
cfssl gencert -ca=ca.pem \
-ca-key=ca-key.pem \
-config=ca-config.json \
-profile=kubernetes kube-controller-manager-csr.json | cfssljson -bare kube-controller-manager

kube-controller-manager.kubeconfig

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
cd /etc/kubernetes/ssl/
source /etc/kubernetes/env.sh

kubectl config set-cluster kubernetes \
--certificate-authority=ca.pem \
--embed-certs=true \
--server=${KUBE_APISERVER_URL} \
--kubeconfig=/etc/kubernetes/kube-controller-manager.kubeconfig

kubectl config set-credentials system:kube-controller-manager \
--client-certificate=kube-controller-manager.pem \
--client-key=kube-controller-manager-key.pem \
--embed-certs=true \
--kubeconfig=/etc/kubernetes/kube-controller-manager.kubeconfig

kubectl config set-context system:kube-controller-manager \
--cluster=kubernetes \
--user=system:kube-controller-manager \
--kubeconfig=/etc/kubernetes/kube-controller-manager.kubeconfig

kubectl config use-context system:kube-controller-manager --kubeconfig=/etc/kubernetes/kube-controller-manager.kubeconfig

#kube-controller-manager使用kubeconfig文件访问apiserver
#该文件提供了apiserver地址、嵌入的CA证书和kube-controller-manager证书

kube-controller-manager.service

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
source /etc/kubernetes/env.sh
cat > ~/kube-controller-manager.service <<EOF
[Unit]
Description=Kubernetes Controller Manager
Documentation=https://github.com/GoogleCloudPlatform/kubernetes
[Service]
ExecStart=/usr/local/k8s/bin/kube-controller-manager \\
--profiling \\
--cluster-name=kubernetes \\
--controllers=*,bootstrapsigner,tokencleaner \\
--kube-api-qps=1000 \\
--kube-api-burst=2000 \\
--leader-elect \\
--use-service-account-credentials\\
--concurrent-service-syncs=2 \\
--bind-address=0.0.0.0 \\
#--secure-port=10252 \\
--tls-cert-file=/etc/kubernetes/ssl/kube-controller-manager.pem \\
--tls-private-key-file=/etc/kubernetes/ssl/kube-controller-manager-key.pem \\
#--port=0 \\
--authentication-kubeconfig=/etc/kubernetes/kube-controller-manager.kubeconfig \\
--client-ca-file=/etc/kubernetes/ssl/ca.pem \\
--requestheader-allowed-names="" \\
--requestheader-client-ca-file=/etc/kubernetes/ssl/ca.pem \\
--requestheader-extra-headers-prefix="X-Remote-Extra-" \\
--requestheader-group-headers=X-Remote-Group \\
--requestheader-username-headers=X-Remote-User \\
--authorization-kubeconfig=/etc/kubernetes/kube-controller-manager.kubeconfig \\
--cluster-signing-cert-file=/etc/kubernetes/ssl/ca.pem \\
--cluster-signing-key-file=/etc/kubernetes/ssl/ca-key.pem \\
--experimental-cluster-signing-duration=87600h \\
--horizontal-pod-autoscaler-sync-period=10s \\
--concurrent-deployment-syncs=10 \\
--concurrent-gc-syncs=30 \\
--node-cidr-mask-size=24 \\
--service-cluster-ip-range=${SERVICE_CIDR} \\
--pod-eviction-timeout=6m \\
--terminated-pod-gc-threshold=10000 \\
--root-ca-file=/etc/kubernetes/ssl/ca.pem \\
--service-account-private-key-file=/etc/kubernetes/ssl/ca-key.pem \\
--kubeconfig=/etc/kubernetes/kube-controller-manager.kubeconfig \\
--logtostderr=true \\
--v=2
Restart=on-failure
RestartSec=5
[Install]
WantedBy=multi-user.target
EOF

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# 分发
source /etc/kubernetes/env.sh
for NODE in ${MASTER_IPS[@]};do
rsync /etc/kubernetes/ssl/kube-controller-manager*.pem root@${NODE}:/etc/kubernetes/ssl/
rsync /etc/kubernetes/kube-controller-manager.kubeconfig root@${NODE}:/etc/kubernetes/
rsync ~/kube-controller-manager.service root@${NODE}:/etc/systemd/system/kube-controller-manager.service
done

# 启动服务
for NODE in ${MASTER_IPS[@]};do
ssh root@${NODE} "systemctl daemon-reload && systemctl enable kube-controller-manager --now"
done

# 检查运行状态
for NODE in ${MASTER_IPS[@]};do
ssh root@${NODE} "systemctl status kube-controller-manager|grep Active"
done

# 检查服务状态
netstat -lnpt | grep kube-cont
tcp6 0 0 :::10252 :::* LISTEN 110807/kube-control
tcp6 0 0 :::10257 :::* LISTEN 110807/kube-control

kube-controller-manager 创建权限

ClusteRole system:kube-controller-manager的权限太小,只能创建secret、serviceaccount等资源,将controller的权限分散到ClusterRole system:controller:xxx中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# kubectl describe clusterrole system:kube-controller-manager
Name: system:kube-controller-manager
Labels: kubernetes.io/bootstrapping=rbac-defaults
Annotations: rbac.authorization.kubernetes.io/autoupdate: true
PolicyRule:
Resources Non-Resource URLs Resource Names Verbs
--------- ----------------- -------------- -----
secrets [] [] [create delete get update]
endpoints [] [] [create get update]
serviceaccounts [] [] [create get update]
events [] [] [create patch update]
tokenreviews.authentication.k8s.io [] [] [create]
subjectaccessreviews.authorization.k8s.io [] [] [create]
configmaps [] [] [get]
namespaces [] [] [get]
*.* [] [] [list watch]

需要在 kube-controller-manager 的启动参数中添加 --use-service-account-credentials=true 参数,这样 main controller 会为各 controller 创建对应的 ServiceAccount XXX-controller。内置的 ClusterRoleBinding system:controller:XXX 将赋予各 XXX-controller ServiceAccount 对应的 ClusterRole system:controller:XXX 权限。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# kubectl get clusterrole|grep controller
system:controller:attachdetach-controller 22m
system:controller:certificate-controller 22m
system:controller:clusterrole-aggregation-controller 22m
system:controller:cronjob-controller 22m
system:controller:daemon-set-controller 22m
system:controller:deployment-controller 22m
system:controller:disruption-controller 22m
system:controller:endpoint-controller 22m
system:controller:expand-controller 22m
system:controller:generic-garbage-collector 22m
system:controller:horizontal-pod-autoscaler 22m
system:controller:job-controller 22m
system:controller:namespace-controller 22m
system:controller:node-controller 22m
system:controller:persistent-volume-binder 22m
system:controller:pod-garbage-collector 22m
system:controller:pv-protection-controller 22m
system:controller:pvc-protection-controller 22m
system:controller:replicaset-controller 22m
system:controller:replication-controller 22m
system:controller:resourcequota-controller 22m
system:controller:route-controller 22m
system:controller:service-account-controller 22m
system:controller:service-controller 22m
system:controller:statefulset-controller 22m
system:controller:ttl-controller 22m
system:kube-controller-manager 22m

以 deployment controller 为例:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# kubectl describe clusterrole system:controller:deployment-controller
Name: system:controller:deployment-controller
Labels: kubernetes.io/bootstrapping=rbac-defaults
Annotations: rbac.authorization.kubernetes.io/autoupdate: true
PolicyRule:
Resources Non-Resource URLs Resource Names Verbs
--------- ----------------- -------------- -----
replicasets.apps [] [] [create delete get list patch update watch]
replicasets.extensions [] [] [create delete get list patch update watch]
events [] [] [create patch update]
pods [] [] [get list update watch]
deployments.apps [] [] [get list update watch]
deployments.extensions [] [] [get list update watch]
deployments.apps/finalizers [] [] [update]
deployments.apps/status [] [] [update]
deployments.extensions/finalizers [] [] [update]
deployments.extensions/status [] [] [update]

查看当前的 leader

1
2
3
4
5
6
7
8
9
10
11
12
# kubectl get endpoints kube-controller-manager --namespace=kube-system -o yaml
apiVersion: v1
kind: Endpoints
metadata:
annotations:
control-plane.alpha.kubernetes.io/leader: '{"holderIdentity":"k8s-master01_e225d712-c25f-11e9-831f-000c29b1069b","leaseDurationSeconds":15,"acquireTime":"2019-08-19T09:01:08Z","renewTime":"2019-08-19T09:02:05Z","leaderTransitions":0}'
creationTimestamp: "2019-08-19T09:01:08Z"
name: kube-controller-manager
namespace: kube-system
resourceVersion: "1925"
selfLink: /api/v1/namespaces/kube-system/endpoints/kube-controller-manager
uid: e2299e08-c25f-11e9-b490-000c29b1069b

部署kube-scheduler

创建 kube-scheduler 证书

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
cd  /etc/kubernetes/ssl/
cat > kube-scheduler-csr.json <<EOF
{
"CN": "system:kube-scheduler",
"hosts": [
"127.0.0.1",
"192.168.10.81",
"192.168.10.82",
"192.168.10.83"
],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "Guangzhou",
"L": "Guangzhou",
"O": "system:kube-scheduler",
"OU": "System"
}
]
}
EOF

# 生成证书和私钥
cfssl gencert -ca=ca.pem \
-ca-key=ca-key.pem \
-config=ca-config.json \
-profile=kubernetes kube-scheduler-csr.json | cfssljson -bare kube-scheduler

kube-scheduler.kubeconfig

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
cd /etc/kubernetes/ssl
source /etc/kubernetes/env.sh
kubectl config set-cluster kubernetes \
--certificate-authority=ca.pem \
--embed-certs=true \
--server=${KUBE_APISERVER_URL} \
--kubeconfig=/etc/kubernetes/kube-scheduler.kubeconfig

kubectl config set-credentials system:kube-scheduler \
--client-certificate=kube-scheduler.pem \
--client-key=kube-scheduler-key.pem \
--embed-certs=true \
--kubeconfig=/etc/kubernetes/kube-scheduler.kubeconfig

kubectl config set-context system:kube-scheduler \
--cluster=kubernetes \
--user=system:kube-scheduler \
--kubeconfig=/etc/kubernetes/kube-scheduler.kubeconfig

kubectl config use-context system:kube-scheduler --kubeconfig=/etc/kubernetes/kube-scheduler.kubeconfig

kube-scheduler.yaml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
cat > /etc/kubernetes/kube-scheduler.yaml <<EOF
apiVersion: kubescheduler.config.k8s.io/v1alpha1
kind: KubeSchedulerConfiguration
bindTimeoutSeconds: 600
clientConnection:
burst: 200
kubeconfig: "/etc/kubernetes/kube-scheduler.kubeconfig"
qps: 100
enableContentionProfiling: false
enableProfiling: true
hardPodAffinitySymmetricWeight: 1
healthzBindAddress: 0.0.0.0:10251
leaderElection:
leaderElect: true
metricsBindAddress: 0.0.0.0:10251
EOF

kube-scheduler.service

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
source /etc/kubernetes/env.sh
for i in "${!MASTER_IPS[@]}"; do
NODE=${MASTER_IPS[$i]}
cat > ~/kube-scheduler.service.${NODE} <<EOF
[Unit]
Description=Kubernetes Scheduler
Documentation=https://github.com/GoogleCloudPlatform/kubernetes
[Service]
ExecStart=/usr/local/k8s/bin/kube-scheduler \\
--config=/etc/kubernetes/kube-scheduler.yaml \\
--bind-address=${NODE} \\
--secure-port=10259 \\
--port=0 \\
--tls-cert-file=/etc/kubernetes/ssl/kube-scheduler.pem \\
--tls-private-key-file=/etc/kubernetes/ssl/kube-scheduler-key.pem \\
--authentication-kubeconfig=/etc/kubernetes/kube-scheduler.kubeconfig \\
--client-ca-file=/etc/kubernetes/ssl/ca.pem \\
--requestheader-allowed-names="" \\
--requestheader-client-ca-file=/etc/kubernetes/ssl/ca.pem \\
--requestheader-extra-headers-prefix="X-Remote-Extra-" \\
--requestheader-group-headers=X-Remote-Group \\
--requestheader-username-headers=X-Remote-User \\
--authorization-kubeconfig=/etc/kubernetes/kube-scheduler.kubeconfig \\
--logtostderr=true \\
--v=2
Restart=always
RestartSec=5
StartLimitInterval=0
[Install]
WantedBy=multi-user.target
EOF
done

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# 分发
source /etc/kubernetes/env.sh
for NODE in ${MASTER_IPS[@]};do
rsync /etc/kubernetes/ssl/kube-scheduler*.pem root@${NODE}:/etc/kubernetes/ssl/
rsync /etc/kubernetes/kube-scheduler.kubeconfig root@${NODE}:/etc/kubernetes/
rsync /etc/kubernetes/kube-scheduler.yaml root@${NODE}:/etc/kubernetes/kube-scheduler.yaml
rsync ~/kube-scheduler.service.${NODE} root@${NODE}:/etc/systemd/system/kube-scheduler.service
done

# 启动kube-scheduler
for NODE in ${MASTER_IPS[@]};do
ssh root@${NODE} "systemctl daemon-reload && systemctl enable kube-scheduler --now"
done

# 检查服务运行状态
for NODE in ${MASTER_IPS[@]};do
ssh root@${NODE} "systemctl status kube-scheduler|grep Active"
done
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
查看输出的 metrics
注意:以下命令在 kube-scheduler 节点上执行。
kube-scheduler 监听 10251 和 10251 端口:
10251:接收 http 请求,非安全端口,不需要认证授权;
10259:接收 https 请求,安全端口,需要认证授权;
两个接口都对外提供 /metrics/healthz 的访问。

curl -s http://192.168.10.81:10251/metrics|head
# HELP apiserver_audit_event_total Counter of audit events generated and sent to the audit backend.
# TYPE apiserver_audit_event_total counter
apiserver_audit_event_total 0
# HELP apiserver_audit_requests_rejected_total Counter of apiserver requests rejected due to an error in audit logging backend.
# TYPE apiserver_audit_requests_rejected_total counter
apiserver_audit_requests_rejected_total 0
# HELP apiserver_client_certificate_expiration_seconds Distribution of the remaining lifetime on the certificate used to authenticate a request.
# TYPE apiserver_client_certificate_expiration_seconds histogram
apiserver_client_certificate_expiration_seconds_bucket{le="0"} 0
apiserver_client_certificate_expiration_seconds_bucket{le="1800"} 0

查看当前leader

1
2
3
4
5
6
7
8
9
10
11
12
# kubectl get endpoints kube-scheduler --namespace=kube-system -o yaml
apiVersion: v1
kind: Endpoints
metadata:
annotations:
control-plane.alpha.kubernetes.io/leader: '{"holderIdentity":"k8s-master02_6360c5c0-c262-11e9-a2b9-000c29e02350","leaseDurationSeconds":15,"acquireTime":"2019-08-19T09:19:21Z","renewTime":"2019-08-19T09:19:51Z","leaderTransitions":1}'
creationTimestamp: "2019-08-19T09:18:54Z"
name: kube-scheduler
namespace: kube-system
resourceVersion: "2799"
selfLink: /api/v1/namespaces/kube-system/endpoints/kube-scheduler
uid: 5da1ecf2-c262-11e9-b490-000c29b1069b

部署node

分发node二进制

1
2
3
4
5
source /etc/kubernetes/env.sh
for NODE in ${NODE_IPS[@]};do
ssh root@$NODE "mkdir -p /usr/local/k8s/bin"
rsync kubernetes/server/bin/{kube-proxy,kubelet,kubectl} root@${NODE}:/usr/local/k8s/bin/
done

Docker步骤需要在所有node节点安装

若是需要master也作为node节点加入集群,也需要在master节点部署docker、kubelet、kube-proxy。

1
2
3
yum install -y yum-utils device-mapper-persistent-data lvm2
yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
yum -y install docker-ce

创建配置文件

1
2
3
4
5
6
7
8
9
10
11
cat > /etc/docker/daemon.json <<EOF
{
"exec-opts": ["native.cgroupdriver=cgroupfs"],
"registry-mirrors": ["https://hub-mirror.c.163.com"],
"log-driver": "json-file",
"log-opts": {
"max-size": "100m"
},
"storage-driver": "overlay2"
}
EOF

启动docker服务

1
2
3
4
5
source /etc/kubernetes/env.sh
for NODE in ${NODE_IPS[@]};do
rsync /etc/docker/daemon.json root@${NODE}:/etc/docker/daemon.json
ssh root@${NODE} "systemctl daemon-reload && systemctl enable docker && systemctl restart docker"
done

部署kubelet

kubelet-config.yaml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
source /etc/kubernetes/env.sh
for i in "${!NODE_IPS[@]}"; do
NODE=${NODE_IPS[$i]}
cat > ~/kubelet-config.yaml.${NODE} <<EOF
kind: KubeletConfiguration
apiVersion: kubelet.config.k8s.io/v1beta1
address: "${NODE}"
staticPodPath: ""
syncFrequency: 1m
fileCheckFrequency: 20s
httpCheckFrequency: 20s
staticPodURL: ""
port: 10250
readOnlyPort: 0
rotateCertificates: true
serverTLSBootstrap: true
authentication:
anonymous:
enabled: false
webhook:
enabled: true
x509:
clientCAFile: "/etc/kubernetes/ssl/ca.pem"
authorization:
mode: Webhook
registryPullQPS: 0
registryBurst: 20
eventRecordQPS: 0
eventBurst: 20
enableDebuggingHandlers: true
enableContentionProfiling: true
healthzPort: 10248
healthzBindAddress: "${NODE}"
clusterDomain: "cluster.local"
clusterDNS:
- "${DNS_SERVICE_IP}"
nodeStatusUpdateFrequency: 10s
nodeStatusReportFrequency: 1m
imageMinimumGCAge: 2m
imageGCHighThresholdPercent: 85
imageGCLowThresholdPercent: 80
volumeStatsAggPeriod: 1m
kubeletCgroups: ""
systemCgroups: ""
cgroupRoot: ""
cgroupsPerQOS: true
cgroupDriver: systemd
runtimeRequestTimeout: 10m
hairpinMode: promiscuous-bridge
maxPods: 220
podCIDR: "${POD_CIDR}"
podPidsLimit: -1
resolvConf: /etc/resolv.conf
maxOpenFiles: 1000000
kubeAPIQPS: 1000
kubeAPIBurst: 2000
serializeImagePulls: false
evictionHard:
memory.available: "100Mi"
nodefs.available: "10%"
nodefs.inodesFree: "5%"
imagefs.available: "15%"
evictionSoft: {}
enableControllerAttachDetach: true
failSwapOn: true
containerLogMaxSize: 20Mi
containerLogMaxFiles: 10
systemReserved: {}
kubeReserved: {}
systemReservedCgroup: ""
kubeReservedCgroup: ""
enforceNodeAllocatable: ["pods"]
EOF

kubelet.service

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
source /etc/kubernetes/env.sh
for i in "${!NODE_IPS[@]}"; do
NODE=${NODE_IPS[$i]}
NODE_NAME=${NODE_NAMES[$i]}
cat > ~/kubelet.service.${NODE} <<EOF
[Unit]
Description=Kubernetes Kubelet
Documentation=https://github.com/GoogleCloudPlatform/kubernetes
After=docker.service
Requires=docker.service
[Service]
ExecStart=/usr/local/k8s/bin/kubelet \\
--kubeconfig=/etc/kubernetes/kubelet.kubeconfig \\
--bootstrap-kubeconfig=/etc/kubernetes/kubelet-bootstrap.kubeconfig \\
--config=/etc/kubernetes/kubelet-config.yaml \\
--cert-dir=/etc/kubernetes/ssl \\
--network-plugin=cni \\
--cni-conf-dir=/etc/cni/net.d \\
--hostname-override=${NODE_NAME} \\
--pod-infra-container-image=gcr.azk8s.cn/google_containers/pause-amd64:3.1 \\
--image-pull-progress-deadline=15m \\
--logtostderr=true \\
--cgroup-driver=cgroupfs \\
--v=2
Restart=always
RestartSec=5
StartLimitInterval=0
[Install]
WantedBy=multi-user.target
EOF
done

kubelet-bootstrap-kubeconfig
注意查看token是否过期,如是重新创建

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
cd /etc/kubernetes/ssl
source /etc/kubernetes/env.sh
# 创建 token
export BOOTSTRAP_TOKEN=$(kubeadm token create \
--description kubelet-bootstrap-token \
--groups system:bootstrappers:kubernetes-clientgroup \
--kubeconfig ~/.kube/config)

# 设置集群参数
kubectl config set-cluster kubernetes \
--certificate-authority=ca.pem \
--embed-certs=true \
--server=${KUBE_APISERVER_URL} \
--kubeconfig=/etc/kubernetes/kubelet-bootstrap.kubeconfig

# 设置客户端认证参数
kubectl config set-credentials kubelet-bootstrap \
--token=${BOOTSTRAP_TOKEN} \
--kubeconfig=/etc/kubernetes/kubelet-bootstrap.kubeconfig

# 设置上下文参数
kubectl config set-context default \
--cluster=kubernetes \
--user=kubelet-bootstrap \
--kubeconfig=/etc/kubernetes/kubelet-bootstrap.kubeconfig

# 设置默认上下文
kubectl config use-context default --kubeconfig=/etc/kubernetes/kubelet-bootstrap.kubeconfig

# grep token /etc/kubernetes/kubelet-bootstrap.kubeconfig
token: 2wpre5.7w2lu07fys9m70ix

查看kubeadm创建的token

1
2
3
# kubeadm token list --kubeconfig ~/.kube/config
TOKEN TTL EXPIRES USAGES DESCRIPTION EXTRA GROUPS
2wpre5.7w2lu07fys9m70ix 23h 2019-08-24T07:20:04-04:00 authentication,signing kubelet-bootstrap-token system:bootstrappers:kubernetes-clientgroup

token有效期为1天,超期后将不能被用来bootstrap kubelet,且会被kube-controller-manager的token cleaner清理
kube-apiserver接收kubelet的bootstrap token后,将请求的user设置为system:bootstrap; group设置为system:bootstrappers,后续将为这个group设置ClusterRoleBinding
查看各token关联的Secret

1
2
# kubectl get secrets -n kube-system|grep bootstrap-token
bootstrap-token-2wpre5 bootstrap.kubernetes.io/token 7 51s

创建user和group的CSR权限,不创建kubelet会启动失败

1
kubectl create clusterrolebinding kubelet-bootstrap --clusterrole=system:node-bootstrapper --group=system:bootstrappers

1
2
3
4
5
6
7
8
9
# 分发
source /etc/kubernetes/env.sh
for NODE in ${NODE_IPS[@]};do
ssh root@${NODE} "mkdir -p /etc/kubernetes"
rsync /etc/kubernetes/kubelet.kubeconfig root@${NODE}:/etc/kubernetes/
rsync ~/kubelet-config.yaml.${NODE} root@${NODE}:/etc/kubernetes/kubelet-config.yaml
rsync ~/kubelet.service.${NODE} root@${NODE}:/etc/systemd/system/kubelet.service
rsync /etc/kubernetes/kubelet-bootstrap.kubeconfig root@${NODE}:/etc/kubernetes/kubelet-bootstrap.kubeconfig
done

启动 kubelet 服务

1
2
3
4
source /etc/kubernetes/env.sh
for NODE in ${NODE_IPS[@]};do
ssh root@${NODE} "systemctl daemon-reload && systemctl enable kubelet --now"
done

csr-crb.yaml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
cd /etc/kubernetes/
cat > csr-crb.yaml <<EOF
# Approve all CSRs for the group "system:bootstrappers"
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: auto-approve-csrs-for-group
subjects:
- kind: Group
name: system:bootstrappers
apiGroup: rbac.authorization.k8s.io
roleRef:
kind: ClusterRole
name: system:certificates.k8s.io:certificatesigningrequests:nodeclient
apiGroup: rbac.authorization.k8s.io
---
# To let a node of the group "system:nodes" renew its own credentials
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: node-client-cert-renewal
subjects:
- kind: Group
name: system:nodes
apiGroup: rbac.authorization.k8s.io
roleRef:
kind: ClusterRole
name: system:certificates.k8s.io:certificatesigningrequests:selfnodeclient
apiGroup: rbac.authorization.k8s.io
---
# A ClusterRole which instructs the CSR approver to approve a node requesting a
# serving cert matching its client cert.
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: approve-node-server-renewal-csr
rules:
- apiGroups: ["certificates.k8s.io"]
resources: ["certificatesigningrequests/selfnodeserver"]
verbs: ["create"]
---
# To let a node of the group "system:nodes" renew its own server credentials
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: node-server-cert-renewal
subjects:
- kind: Group
name: system:nodes
apiGroup: rbac.authorization.k8s.io
roleRef:
kind: ClusterRole
name: approve-node-server-renewal-csr
apiGroup: rbac.authorization.k8s.io
EOF
kubectl apply -f csr-crb.yaml

1
2
3
4
5
6
7
8
9
10
11
# kubectl get csr
NAME AGE REQUESTOR CONDITION
csr-4hwzt 3m20s system:node:k8s-node01 Pending
csr-ft87k 12m system:bootstrap:2wpre5 Approved,Issued
csr-mn4mb 2s system:node:k8s-node02 Pending
csr-wknjd 12m system:bootstrap:c6h4x1 Approved,Issued
...

#kubelet get csr
No resources found.
请检查命令kubeadm token list的Token和 文件/etc/kubernetes/kubelet-bootstrap.kubeconfig中token是否一致。
1
2
3
4
5
6
7
# kubectl get nodes
NAME STATUS ROLES AGE VERSION
k8s-node01 Ready <none> 3m41s v1.15.4
k8s-node02 Ready <none> 3m41s v1.15.4
k8s-master01 Ready <none> 3m41s v1.15.4
k8s-master02 Ready <none> 3m41s v1.15.4
k8s-master03 Ready <none> 3m41s v1.15.4

手动approve server cert csr
基于安全考虑,CSR approving controllers不会自动approve kubelet server证书签名请求,需要手动approve

1
kubectl get csr | grep Pending | awk '{print $1}' | xargs kubectl certificate approve

bear token认证和授权
创建一个ServiceAccount,将它和ClusterRole system:kubelet-api-admin绑定,从而具有调用kubelet API的权限

1
2
3
4
5
kubectl create sa kubelet-api-test
kubectl create clusterrolebinding kubelet-api-test --clusterrole=system:kubelet-api-admin --serviceaccount=default:kubelet-api-test
SECRET=$(kubectl get secrets | grep kubelet-api-test | awk '{print $1}')
TOKEN=$(kubectl describe secret ${SECRET} | grep -E '^token' | awk '{print $2}')
echo ${TOKEN}

部署kube-proxy

确保已加载ipvs conntrack 模块

kube-proxy运行在所有worker节点上,它监听apiserver中service和endpoint的变化情况,创建路由规则提供服务IP和负载均衡功能。这里使用ipvs模式的kube-proxy进行部署

创建 kube-proxy 证书

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
cat > /etc/kubernetes/ssl/kube-proxy-csr.json <<EOF
{
"CN": "system:kube-proxy",
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "Guangzhou",
"L": "Guangzhou",
"O": "k8s",
"OU": "System"
}
]
}
EOF

# 生成证书和私钥
cfssl gencert -ca=ca.pem \
-ca-key=ca-key.pem \
-config=ca-config.json \
-profile=kubernetes kube-proxy-csr.json | cfssljson -bare kube-proxy

kube-proxy.kubeconfig

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
source /etc/kubernetes/env.sh
cd /etc/kubernetes/ssl
kubectl config set-cluster kubernetes \
--certificate-authority=ca.pem \
--embed-certs=true \
--server=${KUBE_APISERVER_URL} \
--kubeconfig=/etc/kubernetes/kube-proxy.kubeconfig

kubectl config set-credentials kube-proxy \
--client-certificate=kube-proxy.pem \
--client-key=kube-proxy-key.pem \
--embed-certs=true \
--kubeconfig=/etc/kubernetes/kube-proxy.kubeconfig

kubectl config set-context default \
--cluster=kubernetes \
--user=kube-proxy \
--kubeconfig=/etc/kubernetes/kube-proxy.kubeconfig

kubectl config use-context default --kubeconfig=/etc/kubernetes/kube-proxy.kubeconfig

kube-proxy-config.yaml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
source /etc/kubernetes/env.sh
for i in "${!NODE_IPS[@]}"; do
NODE=${NODE_IPS[$i]}
NODE_NAME=${NODE_NAMES[$i]}

cat > ~/kube-proxy-config.yaml.${NODE}<<EOF
kind: KubeProxyConfiguration
apiVersion: kubeproxy.config.k8s.io/v1alpha1
clientConnection:
burst: 200
kubeconfig: "/etc/kubernetes/kube-proxy.kubeconfig"
qps: 100
bindAddress: ${NODE}
healthzBindAddress: ${NODE}:10256
metricsBindAddress: ${NODE}:10249
enableProfiling: true
clusterCIDR: ${CLUSTER_CIDR}
hostnameOverride: ${NODE_NAME}
mode: "ipvs"
portRange: ""
kubeProxyIPTablesConfiguration:
masqueradeAll: false
kubeProxyIPVSConfiguration:
scheduler: rr
excludeCIDRs: []
EOF
done

#从v1.10开始,kube-proxy部分参数可以配置在文件中,可以使用–write-config-to选项生成该配置文件
bindAddress: 监听地址;
clientConnection.kubeconfig: 连接 apiserver 的 kubeconfig 文件;
--clusterCIDR: kube-proxy 根据 --cluster-cidr判断集群内部和外部流量,指定 --cluster-cidr 或 –masquerade-all 选项后 --kube-proxy 才会对访问 Service IP 的请求做 SNAT;
hostnameOverride: 参数值必须与 kubelet 的值一致,否则 kube-proxy 启动后会找不到该 Node,从而不会创建任何 ipvs 规则;
mode: 使用ipvs模式

kube-proxy.service

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
cat > ~/kube-proxy.service <<EOF
[Unit]
Description=Kubernetes Kube-Proxy Server
Documentation=https://github.com/GoogleCloudPlatform/kubernetes
After=network.target
[Service]
ExecStart=/usr/local/k8s/bin/kube-proxy \\
--config=/etc/kubernetes/kube-proxy-config.yaml \\
--logtostderr=true \\
--v=2
Restart=on-failure
RestartSec=5
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
EOF

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# 分发
source /etc/kubernetes/env.sh
for NODE in ${NODE_IPS[@]};do
rsync /etc/kubernetes/ssl/ca.pem root@${NODE}:/etc/kubernetes/ssl/
rsync /etc/kubernetes/kube-proxy.kubeconfig root@${NODE}:/etc/kubernetes/kube-proxy.kubeconfig
rsync ~/kube-proxy-config.yaml.${NODE} root@${NODE}:/etc/kubernetes/kube-proxy-config.yaml
rsync ~/kube-proxy.service root@${NODE}:/etc/systemd/system/
done

# 启动 kube-proxy 服务
source /etc/kubernetes/env.sh
for NODE in ${NODE_IPS[@]};do
ssh root@${NODE} "modprobe ip_vs_rr"
ssh root@${NODE} "systemctl daemon-reload && systemctl enable kube-proxy --now"
done

# 检查启动结果
for NODE in ${NODE_IPS[@]};do
echo ">>> ${NODE}"
ssh root@${NODE} "systemctl status kube-proxy|grep Active"
done

# 检查监听端口
# netstat -lnpt|grep kube-proxy
tcp 0 0 192.168.10.81:10256 0.0.0.0:* LISTEN 1633/kube-proxy
tcp 0 0 192.168.10.81:10249 0.0.0.0:* LISTEN 1633/kube-proxy
#10249:http prometheus metrics port
#10256:http healthz port

# 查看ipvs路由规则

# ipvsadm -ln
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
TCP 10.96.0.1:443 rr
-> 192.168.10.81:6443 Masq 1 0 0
-> 192.168.10.82:6443 Masq 1 0 0
-> 192.168.10.83:6443 Masq 1 0 0
...
1
2


重新创建token

1
2
3
4
5
6
7
#kubeadm token create --print-join-command
kubeadm token create
kubeadm token list

TOKEN TTL EXPIRES USAGES DESCRIPTION EXTRA GROUPS
j5p01w.ksoigbibr5t4ufte <invalid> 2019-11-16T14:32:11+08:00 authentication,signing kubelet-bootstrap-token system:bootstrappers:kubernetes-clientgroup
th32rs.bspeftc5qko8ntrn 23h 2019-11-21T18:37:15+08:00 authentication,signing <none> system:bootstrappers:kubeadm:default-node-token

部署calico网络

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
curl https://docs.projectcalico.org/v3.9/manifests/calico-etcd.yaml -o calico.yaml
source /etc/kubernetes/env.sh
ETCD_KEY_BASE64=$(cat /etc/etcd/ssl/etcd-key.pem | base64 -w 0)
ETCD_CERT_BASE64=$(cat /etc/etcd/ssl/etcd.pem | base64 -w 0)
ETCD_CA_BASE64=$(cat /etc/etcd/ssl/ca.pem | base64 -w 0)

sed -i -e "s?192.168.0.0/16?$POD_CIDR?g" calico.yaml
sed -i -e "s?etcd_endpoints:.*?etcd_endpoints: \"$ETCD_ENDPOINTS\"?g" calico.yaml

sed -i -e "s?# etcd-key:.*?etcd-key: $ETCD_KEY_BASE64?g" calico.yaml
sed -i -e "s?# etcd-cert:.*?etcd-cert: $ETCD_CERT_BASE64?g" calico.yaml
sed -i -e "s?# etcd-ca:.*?etcd-ca: $ETCD_CA_BASE64?g" calico.yaml

sed -i "s?etcd_ca: \"?etcd_ca: \"/calico-secrets/etcd-ca?" calico.yaml
sed -i "s?etcd_cert: \"?etcd_cert: \"/calico-secrets/etcd-cert?" calico.yaml
sed -i "s?etcd_key: \"?etcd_key: \"/calico-secrets/etcd-key?" calico.yaml

kubectl apply -f calico.yaml

多网卡可以指定接口

1
2
3
4
# https://docs.projectcalico.org/v3.9/reference/node/configuration#ip-autodetection-methods
# 默认情况下采用 first-found 方式获取,即获取第一个有效网卡的 IP 作为 node ip;在某些多网卡机器上可能会出现问题;这里将值设置为 can-reach=192.168.10.81,即使用第一个能够访问 master 192.168.1.51 的网卡地址作为 node ip
- name: IP_AUTODETECTION_METHOD
value: can-reach=192.168.10.81

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
cat calico.yaml 
---
# Source: calico/templates/calico-etcd-secrets.yaml
# The following contains k8s Secrets for use with a TLS enabled etcd cluster.
# For information on populating Secrets, see http://kubernetes.io/docs/user-guide/secrets/
apiVersion: v1
kind: Secret
type: Opaque
metadata:
name: calico-etcd-secrets
namespace: kube-system
data:
# Populate the following with etcd TLS configuration if desired, but leave blank if
# not using TLS for etcd.
# The keys below should be uncommented and the values populated with the base64
# encoded contents of each file that would be associated with the TLS data.
# Example command for encoding a file contents: cat <file> | base64 -w 0
etcd-key: LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlFcEFJQkFBS0NBUUVBOXpXODcxcGpKTSsydzd4NlR5TkIyS0YwcDNvYVFaUTE1RDNSTTF2V0R2cXNkU3VPCktuYUMxMHVFVnpFbllSeDVKWHRWMUJmQjFVRlFUTmExUlhNOStlSFY1cXo3Q3A0WnpHOCswVjB0dkdXRkwySksKT0RjWkFaTW9vakYyMHJnY3d2WVhKNzNVRm9JbzlDY3VRN0J5UElqVkdMdzBjNjJKbWdzUERReXFVaU4zaE1ycQozM1pQN2lPWWRVa3Z5RDhiOHpSekFvK0gxMVV1SWkzaEVsVkJxTjFjS2pJVTJ0MDFJWXllOFE3WDJscmtIL2RKCk92QlNqRnJmTmFpRjVhblFJbkFwRkRsbzZoMVZGVE96MitMZU1lZ1BZNDZnaWQ1Mi82clJrZHJ3WWNkM0JUeXMKYkJkZithNnNYSHc1L012WUZJWFhIaVRURG9wanB4RTJjcmxmT3dJREFRQUJBb0lCQVFEa1ZwaWJKUXZxbG9tawpKbFY2LzMvMGpyMW5iRnNRTTh2TGRHWGxZSEJLQ2JSOXphdFZBWDQ5U3AyRXcvd3hzTkVkazl4bE9VUnFyQmQxCndlUlJyKzZRL3M1S2NZQzMvK3lvYmlEQjFhVkhIU1oxNi8zT0N1Wm9FL3MyUlNZUjRNbUFNenFVRkx6dnhXMzYKaHE0dkM2Mll5SEs4bENxR3BzWXdxUUpQOVdEazE5amx4SFpNcmp1NXhvN2ZiekpyYjZjZ0FsVW5aZitYK3F0VApRcUptTTJ3QWtVRllSVVdCdWM1QVJiY2RWVHlWcFIrUDFvTHNEWWNsQTVxaklyL2s2Y2RCaGVJclJ3NDREWUlwClc5N0VLNVhYNkdORDhSUEZjZnFUcU1ZUDFMakVBd2dML1pjTWZIeVNkdHNVRno2SXJJMVVPVFFsUkNmK0VEMWQKTkJEQUlCd1JBb0dCQVBlcFRBTVgydXVyamV4dXdJdllTcDk2TEJYcWVxS1pib1BGZWdNM29mT0pHRFY4ZWFXOQpIY1k4Z0V2UURuU0dzSVZtNlRrZ1VWcno1aTczQ2NaMC8yZHliN2lUWlVhekhSR2tqWTZrcTZ0UktIVFBOL3FLCm8zZzVqNDFzZE5CNXdCS2hydGtxRmNDeGlueU5ocHk1MHJMR1dodUVDa25xckQ5K2tVMVA0ek5QQW9HQkFQK0kKak42T085YlpqU0tTM0c5WjFTbmw4TjdtcUFUb3F3ZlpiTjhDYitidGFmdG93RGRtRllDaGhPZkdXdU81ajVxRwpsZHk4WHZYQ0FkQUc0Q3FoWndHSERTdU9oRjRyV05PcjdtK1VLT005TDRIbHZ1M1VQVHlMYVF3enNydzFTUjZVCmlVRGtEbU91ckI0OHJ6Y2JCVzl0c1JibGNETzRwTnQwZ1pPK0k4cFZBb0dCQUtBSjZMSy9BWnIxT3pzanZvMGYKOGVqb1hBQzFpeFh6ckRTK3NiL09mWHNRaG5KWnc0cFVrUjcvRGJEdis4ZFZHbEM2VmMreWtLQURxQ1dGUWUzYQp3UWlxTElQTHk5QktBbWphT3hlSVZkN1pEUm1lcG4vaWd3dzN0Z29mb2NUdVNDNUNDT2tRaHdvRU1JZVlOUHFtCklmK2dxYTNLUDVQUWNBcC8wWWx5TEZHRkFvR0FGMnIyTGZjdStMT2JRWTNmOGJ4ejBwRW1EK2RuZ251b0lPSUcKSGpIaysyN0toYVhGNTgzTkxxcWV1WGJ1SVd5Q0FwRk83cG04d2h6emVaTUh2Q0JxaGpOUU82dWw2ZE5qSXBMaApHUXlrNnA1UzJqNnB6clhpbnNmUDZyUmFMdkZiTUJoQXM5ZXlrZVFTRmZ0TnUya1lpR3V2RFFBZDFqK1hyQnp6Ck93enp6cmtDZ1lBZlFYRGlXdjl0YlZHbWc1TkZJSHdLbzBTSEZHZThINERheCtUNTJOT3lDN1k4NmV5NWExVlgKK2V6ZU1pdHlFMkhtdXhtVFhkcVE4cGRhOUVqTmcraHRrTFRORXE0YXRSemwyc3hNdGZXcDRyejhUcjRxRjYvbgpydHFpSUxNdkovV0lWcWFqQW15TEZKQytwNVdzbmV6UG5YbkxxZ3pRNlVZTSthcTFUenIzeGc9PQotLS0tLUVORCBSU0EgUFJJVkFURSBLRVktLS0tLQo=
etcd-cert: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUVBRENDQXVpZ0F3SUJBZ0lVQVNVbDF1Qys1TmN0YmR5c1U5eldvejBvbHRrd0RRWUpLb1pJaHZjTkFRRUwKQlFBd2FURUxNQWtHQTFVRUJoTUNRMDR4RWpBUUJnTlZCQWdUQ1VkMVlXNW5XbWh2ZFRFU01CQUdBMVVFQnhNSgpSM1ZoYm1kYWFHOTFNUXd3Q2dZRFZRUUtFd05yT0hNeER6QU5CZ05WQkFzVEJsTjVjM1JsYlRFVE1CRUdBMVVFCkF4TUthM1ZpWlhKdVpYUmxjekFnRncweE9URXdNVFV4TURFMU1EQmFHQTh5TVRFNU1Ea3lNVEV3TVRVd01Gb3cKWXpFTE1Ba0dBMVVFQmhNQ1EwNHhFakFRQmdOVkJBZ1RDVWQxWVc1bldtaHZkVEVTTUJBR0ExVUVCeE1KUjNWaApibWRhYUc5MU1Rd3dDZ1lEVlFRS0V3TnJPSE14RHpBTkJnTlZCQXNUQmxONWMzUmxiVEVOTUFzR0ExVUVBeE1FClpYUmpaRENDQVNJd0RRWUpLb1pJaHZjTkFRRUJCUUFEZ2dFUEFEQ0NBUW9DZ2dFQkFQYzF2TzlhWXlUUHRzTzgKZWs4alFkaWhkS2Q2R2tHVU5lUTkwVE5iMWc3NnJIVXJqaXAyZ3RkTGhGY3hKMkVjZVNWN1ZkUVh3ZFZCVUV6Vwp0VVZ6UGZuaDFlYXMrd3FlR2N4dlB0RmRMYnhsaFM5aVNqZzNHUUdUS0tJeGR0SzRITUwyRnllOTFCYUNLUFFuCkxrT3djanlJMVJpOE5IT3RpWm9MRHcwTXFsSWpkNFRLNnQ5MlQrNGptSFZKTDhnL0cvTTBjd0tQaDlkVkxpSXQKNFJKVlFhamRYQ295Rk5yZE5TR01udkVPMTlwYTVCLzNTVHJ3VW94YTN6V29oZVdwMENKd0tSUTVhT29kVlJVegpzOXZpM2pIb0QyT09vSW5lZHYrcTBaSGE4R0hIZHdVOHJHd1hYL211ckZ4OE9mekwyQlNGMXg0azB3NktZNmNSCk5uSzVYenNDQXdFQUFhT0JvekNCb0RBT0JnTlZIUThCQWY4RUJBTUNCYUF3SFFZRFZSMGxCQll3RkFZSUt3WUIKQlFVSEF3RUdDQ3NHQVFVRkJ3TUNNQXdHQTFVZEV3RUIvd1FDTUFBd0hRWURWUjBPQkJZRUZNZmtmdnFnNmxPSApoMGpBVmRhZUIrOVd3VDBGTUI4R0ExVWRJd1FZTUJhQUZIcTFudFlQV29oQW4wOUNidTdSQzFGRDZGN2tNQ0VHCkExVWRFUVFhTUJpSEJIOEFBQUdIQk1Db0NoS0hCTUNvQ2hTSEJNQ29DaFV3RFFZSktvWklodmNOQVFFTEJRQUQKZ2dFQkFJbzN6Yyt4MTY0aEZldG0zTTFka2NFcWdOK2FmTWg5TWN2UElEL3V2QTFoQzBubkJ3MmxneWlUTkpqVQpYdlVFY21nNmNXOUJod2lDcUEyRmtvZCtQNUF1QU5KTWxsM3FoVVpPcXJ0bXowdm5wZXVnUUNaSG9uUjBneG1jCkJDbDBZSHNlWlRQekJMWnhRVmdBVW91VEt3K0d1TmRtZHBvdTFlK25WZ1FlTGt4dkNKSjJwRHlXbGpwTENkQ3UKNHVFWmtJRnU5OTlueEpaeGdyeXpwd0lsbGFiME9JYmozbnZGcVRWd24yM0JyaDIxTEdFcGcvOCtxL243VkJDMAp4aENMWGVPbnQybWpjQjgvYzZkYW1zeWZ6dlkvVFRFamVoMlhPUFNuNW5wT0piV2FMNk5lNTVkeW5nbUNhczhoCkpZcEtrNldFZTU3NHpoWVQwMkpBY1BYcEk5ND0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=
etcd-ca: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUR5RENDQXJDZ0F3SUJBZ0lVVzNMMFdEelp6UUJ3dm5id25CbG9GUUlHSWlzd0RRWUpLb1pJaHZjTkFRRUwKQlFBd2FURUxNQWtHQTFVRUJoTUNRMDR4RWpBUUJnTlZCQWdUQ1VkMVlXNW5XbWh2ZFRFU01CQUdBMVVFQnhNSgpSM1ZoYm1kYWFHOTFNUXd3Q2dZRFZRUUtFd05yT0hNeER6QU5CZ05WQkFzVEJsTjVjM1JsYlRFVE1CRUdBMVVFCkF4TUthM1ZpWlhKdVpYUmxjekFnRncweE9URXdNVFV4TURFeU1EQmFHQTh5TVRFNU1Ea3lNVEV3TVRJd01Gb3cKYVRFTE1Ba0dBMVVFQmhNQ1EwNHhFakFRQmdOVkJBZ1RDVWQxWVc1bldtaHZkVEVTTUJBR0ExVUVCeE1KUjNWaApibWRhYUc5MU1Rd3dDZ1lEVlFRS0V3TnJPSE14RHpBTkJnTlZCQXNUQmxONWMzUmxiVEVUTUJFR0ExVUVBeE1LCmEzVmlaWEp1WlhSbGN6Q0NBU0l3RFFZSktvWklodmNOQVFFQkJRQURnZ0VQQURDQ0FRb0NnZ0VCQU5iTzVqRjcKTVdPeDF4N0srNVBKUEt4c2VEeDh3eHgzWmdVdzhjbVFBbjRXcEowMDJVT1ZQZ1EzQjlFbjBXVFZLeUw4dDE0WQordHJlVVlQQW85akVhY1AwakxJY0JwRHFjYXpCUVVUQWg1bzY4bzFra1JuQ3QwcXNVL1NTTkJINHlGREk3ZEdzCjRZYzJxYkNVdm9XeU9JM1o2eXJhSmJ4Mm5POUI1am1IdXlsc1VqMlFSZ2pCUHRlT0dvSzZYc3VWYzd2R2ozOS8KZWV5RmlRVzZQb3ZlekhKYjdzU1g5WmFsMWs4TnBHQ2NuY0s5d0VycW5PcXlOMjFnaGdteCs3eGQ1ZENrNnBGRgo3L2FoU1I1czdVN3VDQktrL1NQVmJ3Y2w1STEzdzhCMFFSUVJOTmI5TW1Tc3RRNVFGNGlXODNtRURUR1piN09hCkVzU0FUUHlsZWdnTllWVUNBd0VBQWFObU1HUXdEZ1lEVlIwUEFRSC9CQVFEQWdFR01CSUdBMVVkRXdFQi93UUkKTUFZQkFmOENBUUl3SFFZRFZSME9CQllFRkhxMW50WVBXb2hBbjA5Q2J1N1JDMUZENkY3a01COEdBMVVkSXdRWQpNQmFBRkhxMW50WVBXb2hBbjA5Q2J1N1JDMUZENkY3a01BMEdDU3FHU0liM0RRRUJDd1VBQTRJQkFRQ094S1dxCkphWS9XeExjNGl3OHJRN05oNEYzTWpjbndEZ2E1MGM1cFg2ZXRZWGh2ZnFQbFQ4azZJWlZiSlBzL0s2bmVrTncKN2daUWRaenVnbXNIZ3lPdWVtOE1HcTlXREoyWVVnWkFGWm1INTl4K2t5VTBMSVZ1UHM5L01KKzlDZlN3YUVkNQpSbzVqUTBzTGtnekZkVFpFd1FoVWg4SEZvcVp6RXhNQ0ZKVVV5amZ3OW0xL0tRVjRFcE1IVEt1M1dLcjhJR2lWCncwcmFSY3JwdEkvdHNvb21HbWFPanE2cHR6dDVqUmZQYUpGQXM5UkU4S2tPRmFlWHNBS3R1V21rRFBrallWcloKakRGRkJCekpZdkV5NC9KL2dVU25raExFWjFDTmJJNkhsdjZiVUtyZHp5VGo3R1crQlFZd1Y3RWxvV1dpWDNOaApTalBNWEc2b01uTStTZDRUCi0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K
---
# Source: calico/templates/calico-config.yaml
# This ConfigMap is used to configure a self-hosted Calico installation.
kind: ConfigMap
apiVersion: v1
metadata:
name: calico-config
namespace: kube-system
data:
# Configure this with the location of your etcd cluster.
etcd_endpoints: "https://192.168.10.18:2379,https://192.168.10.20:2379,https://192.168.10.21:2379"
# If you're using TLS enabled etcd uncomment the following.
# You must also populate the Secret below with these files.
etcd_ca: "/calico-secrets/etcd-ca" # "/calico-secrets/etcd-ca"
etcd_cert: "/calico-secrets/etcd-cert" # "/calico-secrets/etcd-cert"
etcd_key: "/calico-secrets/etcd-key" # "/calico-secrets/etcd-key"
# Typha is disabled.
typha_service_name: "none"
# Configure the backend to use.
calico_backend: "bird"

# Configure the MTU to use
veth_mtu: "1440"

# The CNI network configuration to install on each node. The special
# values in this config will be automatically populated.
cni_network_config: |-
{
"name": "k8s-pod-network",
"cniVersion": "0.3.1",
"plugins": [
{
"type": "calico",
"log_level": "info",
"etcd_endpoints": "__ETCD_ENDPOINTS__",
"etcd_key_file": "__ETCD_KEY_FILE__",
"etcd_cert_file": "__ETCD_CERT_FILE__",
"etcd_ca_cert_file": "__ETCD_CA_CERT_FILE__",
"mtu": __CNI_MTU__,
"ipam": {
"type": "calico-ipam"
},
"policy": {
"type": "k8s"
},
"kubernetes": {
"kubeconfig": "__KUBECONFIG_FILEPATH__"
}
},
{
"type": "portmap",
"snat": true,
"capabilities": {"portMappings": true}
}
]
}

---
# Source: calico/templates/rbac.yaml

# Include a clusterrole for the kube-controllers component,
# and bind it to the calico-kube-controllers serviceaccount.
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: calico-kube-controllers
rules:
# Pods are monitored for changing labels.
# The node controller monitors Kubernetes nodes.
# Namespace and serviceaccount labels are used for policy.
- apiGroups: [""]
resources:
- pods
- nodes
- namespaces
- serviceaccounts
verbs:
- watch
- list
# Watch for changes to Kubernetes NetworkPolicies.
- apiGroups: ["networking.k8s.io"]
resources:
- networkpolicies
verbs:
- watch
- list
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: calico-kube-controllers
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: calico-kube-controllers
subjects:
- kind: ServiceAccount
name: calico-kube-controllers
namespace: kube-system
---
# Include a clusterrole for the calico-node DaemonSet,
# and bind it to the calico-node serviceaccount.
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: calico-node
rules:
# The CNI plugin needs to get pods, nodes, and namespaces.
- apiGroups: [""]
resources:
- pods
- nodes
- namespaces
verbs:
- get
- apiGroups: [""]
resources:
- endpoints
- services
verbs:
# Used to discover service IPs for advertisement.
- watch
- list
- apiGroups: [""]
resources:
- nodes/status
verbs:
# Needed for clearing NodeNetworkUnavailable flag.
- patch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: calico-node
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: calico-node
subjects:
- kind: ServiceAccount
name: calico-node
namespace: kube-system

---
# Source: calico/templates/calico-node.yaml
# This manifest installs the calico-node container, as well
# as the CNI plugins and network config on
# each master and worker node in a Kubernetes cluster.
kind: DaemonSet
apiVersion: apps/v1
metadata:
name: calico-node
namespace: kube-system
labels:
k8s-app: calico-node
spec:
selector:
matchLabels:
k8s-app: calico-node
updateStrategy:
type: RollingUpdate
rollingUpdate:
maxUnavailable: 1
template:
metadata:
labels:
k8s-app: calico-node
annotations:
# This, along with the CriticalAddonsOnly toleration below,
# marks the pod as a critical add-on, ensuring it gets
# priority scheduling and that its resources are reserved
# if it ever gets evicted.
scheduler.alpha.kubernetes.io/critical-pod: ''
spec:
nodeSelector:
beta.kubernetes.io/os: linux
hostNetwork: true
tolerations:
# Make sure calico-node gets scheduled on all nodes.
- effect: NoSchedule
operator: Exists
# Mark the pod as a critical add-on for rescheduling.
- key: CriticalAddonsOnly
operator: Exists
- effect: NoExecute
operator: Exists
serviceAccountName: calico-node
# Minimize downtime during a rolling upgrade or deletion; tell Kubernetes to do a "force
# deletion": https://kubernetes.io/docs/concepts/workloads/pods/pod/#termination-of-pods.
terminationGracePeriodSeconds: 0
priorityClassName: system-node-critical
initContainers:
# This container installs the CNI binaries
# and CNI network config file on each node.
- name: install-cni
image: calico/cni:v3.9.2
command: ["/install-cni.sh"]
env:
# Name of the CNI config file to create.
- name: CNI_CONF_NAME
value: "10-calico.conflist"
# The CNI network config to install on each node.
- name: CNI_NETWORK_CONFIG
valueFrom:
configMapKeyRef:
name: calico-config
key: cni_network_config
# The location of the etcd cluster.
- name: ETCD_ENDPOINTS
valueFrom:
configMapKeyRef:
name: calico-config
key: etcd_endpoints
# CNI MTU Config variable
- name: CNI_MTU
valueFrom:
configMapKeyRef:
name: calico-config
key: veth_mtu
# Prevents the container from sleeping forever.
- name: SLEEP
value: "false"
volumeMounts:
- mountPath: /host/opt/cni/bin
name: cni-bin-dir
- mountPath: /host/etc/cni/net.d
name: cni-net-dir
- mountPath: /calico-secrets
name: etcd-certs
# Adds a Flex Volume Driver that creates a per-pod Unix Domain Socket to allow Dikastes
# to communicate with Felix over the Policy Sync API.
- name: flexvol-driver
image: calico/pod2daemon-flexvol:v3.9.2
volumeMounts:
- name: flexvol-driver-host
mountPath: /host/driver
containers:
# Runs calico-node container on each Kubernetes node. This
# container programs network policy and routes on each
# host.
- name: calico-node
image: calico/node:v3.9.2
env:
# The location of the etcd cluster.
- name: ETCD_ENDPOINTS
valueFrom:
configMapKeyRef:
name: calico-config
key: etcd_endpoints
# Location of the CA certificate for etcd.
- name: ETCD_CA_CERT_FILE
valueFrom:
configMapKeyRef:
name: calico-config
key: etcd_ca
# Location of the client key for etcd.
- name: ETCD_KEY_FILE
valueFrom:
configMapKeyRef:
name: calico-config
key: etcd_key
# Location of the client certificate for etcd.
- name: ETCD_CERT_FILE
valueFrom:
configMapKeyRef:
name: calico-config
key: etcd_cert
# Set noderef for node controller.
- name: CALICO_K8S_NODE_REF
valueFrom:
fieldRef:
fieldPath: spec.nodeName
# Choose the backend to use.
- name: CALICO_NETWORKING_BACKEND
valueFrom:
configMapKeyRef:
name: calico-config
key: calico_backend
# Cluster type to identify the deployment type
- name: CLUSTER_TYPE
value: "k8s,bgp"
# Auto-detect the BGP IP address.
- name: IP
value: "autodetect"
# Enable IPIP
- name: CALICO_IPV4POOL_IPIP
value: "Always"
# Set MTU for tunnel device used if ipip is enabled
- name: FELIX_IPINIPMTU
valueFrom:
configMapKeyRef:
name: calico-config
key: veth_mtu
# The default IPv4 pool to create on startup if none exists. Pod IPs will be
# chosen from this range. Changing this value after installation will have
# no effect. This should fall within `--cluster-cidr`.
- name: CALICO_IPV4POOL_CIDR
value: "10.244.0.0/16"
# Disable file logging so `kubectl logs` works.
- name: CALICO_DISABLE_FILE_LOGGING
value: "true"
# Set Felix endpoint to host default action to ACCEPT.
- name: FELIX_DEFAULTENDPOINTTOHOSTACTION
value: "ACCEPT"
# Disable IPv6 on Kubernetes.
- name: FELIX_IPV6SUPPORT
value: "false"
# Set Felix logging to "info"
- name: FELIX_LOGSEVERITYSCREEN
value: "info"
- name: FELIX_HEALTHENABLED
value: "true"
- name: IP_AUTODETECTION_METHOD
value: can-reach=192.168.10.81
securityContext:
privileged: true
resources:
requests:
cpu: 250m
livenessProbe:
exec:
command:
- /bin/calico-node
- -felix-live
periodSeconds: 10
initialDelaySeconds: 10
failureThreshold: 6
readinessProbe:
exec:
command:
- /bin/calico-node
- -felix-ready
- -bird-ready
periodSeconds: 10
volumeMounts:
- mountPath: /lib/modules
name: lib-modules
readOnly: true
- mountPath: /run/xtables.lock
name: xtables-lock
readOnly: false
- mountPath: /var/run/calico
name: var-run-calico
readOnly: false
- mountPath: /var/lib/calico
name: var-lib-calico
readOnly: false
- mountPath: /calico-secrets
name: etcd-certs
- name: policysync
mountPath: /var/run/nodeagent
volumes:
# Used by calico-node.
- name: lib-modules
hostPath:
path: /lib/modules
- name: var-run-calico
hostPath:
path: /var/run/calico
- name: var-lib-calico
hostPath:
path: /var/lib/calico
- name: xtables-lock
hostPath:
path: /run/xtables.lock
type: FileOrCreate
# Used to install CNI.
- name: cni-bin-dir
hostPath:
path: /opt/cni/bin
- name: cni-net-dir
hostPath:
path: /etc/cni/net.d
# Mount in the etcd TLS secrets with mode 400.
# See https://kubernetes.io/docs/concepts/configuration/secret/
- name: etcd-certs
secret:
secretName: calico-etcd-secrets
defaultMode: 0400
# Used to create per-pod Unix Domain Sockets
- name: policysync
hostPath:
type: DirectoryOrCreate
path: /var/run/nodeagent
# Used to install Flex Volume Driver
- name: flexvol-driver-host
hostPath:
type: DirectoryOrCreate
path: /usr/libexec/kubernetes/kubelet-plugins/volume/exec/nodeagent~uds
---

apiVersion: v1
kind: ServiceAccount
metadata:
name: calico-node
namespace: kube-system

---
# Source: calico/templates/calico-kube-controllers.yaml

# See https://github.com/projectcalico/kube-controllers
apiVersion: apps/v1
kind: Deployment
metadata:
name: calico-kube-controllers
namespace: kube-system
labels:
k8s-app: calico-kube-controllers
spec:
# The controllers can only have a single active instance.
replicas: 1
selector:
matchLabels:
k8s-app: calico-kube-controllers
strategy:
type: Recreate
template:
metadata:
name: calico-kube-controllers
namespace: kube-system
labels:
k8s-app: calico-kube-controllers
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ''
spec:
nodeSelector:
beta.kubernetes.io/os: linux
tolerations:
# Mark the pod as a critical add-on for rescheduling.
- key: CriticalAddonsOnly
operator: Exists
- key: node-role.kubernetes.io/master
effect: NoSchedule
serviceAccountName: calico-kube-controllers
priorityClassName: system-cluster-critical
# The controllers must run in the host network namespace so that
# it isn't governed by policy that would prevent it from working.
hostNetwork: true
containers:
- name: calico-kube-controllers
image: calico/kube-controllers:v3.9.2
env:
# The location of the etcd cluster.
- name: ETCD_ENDPOINTS
valueFrom:
configMapKeyRef:
name: calico-config
key: etcd_endpoints
# Location of the CA certificate for etcd.
- name: ETCD_CA_CERT_FILE
valueFrom:
configMapKeyRef:
name: calico-config
key: etcd_ca
# Location of the client key for etcd.
- name: ETCD_KEY_FILE
valueFrom:
configMapKeyRef:
name: calico-config
key: etcd_key
# Location of the client certificate for etcd.
- name: ETCD_CERT_FILE
valueFrom:
configMapKeyRef:
name: calico-config
key: etcd_cert
# Choose which controllers to run.
- name: ENABLED_CONTROLLERS
value: policy,namespace,serviceaccount,workloadendpoint,node
volumeMounts:
# Mount in the etcd TLS secrets.
- mountPath: /calico-secrets
name: etcd-certs
readinessProbe:
exec:
command:
- /usr/bin/check-status
- -r
volumes:
# Mount in the etcd TLS secrets with mode 400.
# See https://kubernetes.io/docs/concepts/configuration/secret/
- name: etcd-certs
secret:
secretName: calico-etcd-secrets
defaultMode: 0400

---

apiVersion: v1
kind: ServiceAccount
metadata:
name: calico-kube-controllers
namespace: kube-system
---
# Source: calico/templates/calico-typha.yaml

---
# Source: calico/templates/configure-canal.yaml

---
# Source: calico/templates/kdd-crds.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#kubectl get deployment,pod -nkube-system
NAME READY UP-TO-DATE AVAILABLE AGE
deployment.extensions/calico-kube-controllers 1/1 1 1 17h

NAME READY STATUS RESTARTS AGE
pod/calico-kube-controllers-8576cc9448-dlhmp 1/1 Running 0 17h
pod/calico-node-kjp47 1/1 Running 0 17h
pod/calico-node-kngmg 1/1 Running 0 17h
pod/calico-node-nhkjt 1/1 Running 0 17h
...

# ifconfig tunl0
tunl0 Link encap:IPIP Tunnel HWaddr
inet addr:10.244.42.192 Mask:255.255.255.255
UP RUNNING NOARP MTU:1440 Metric:1
RX packets:20 errors:0 dropped:0 overruns:0 frame:0
TX packets:20 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1
RX bytes:3754 (3.7 KB) TX bytes:3530 (3.5 KB)

# route -nv
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 192.168.10.1 0.0.0.0 UG 0 0 0 eno1
10.244.40.64 192.168.10.82 255.255.255.192 UG 0 0 0 tunl0
10.244.42.192 0.0.0.0 255.255.255.192 U 0 0 0 *
10.244.42.198 0.0.0.0 255.255.255.255 UH 0 0 0 calic24aedbc670
10.244.134.128 192.168.10.83 255.255.255.192 UG 0 0 0 tunl0
...

docker service 运行calico

1
2
3
4
5
6
7
8
9
10
11
12
#https://docs.projectcalico.org/v3.9/getting-started/as-service
source /etc/kubernetes/env.sh
ETCD_ENDPOINTS=${ETCD_ENDPOINTS}
ETCD_CA_CERT_FILE="/etc/etcd/ssl/ca.pem"
ETCD_CERT_FILE="/etc/etcd/ssl/etcd.pem"
ETCD_KEY_FILE=".etc/etcd/ssl/etcd-key.pem"
CALICO_NODENAME=""
CALICO_NO_DEFAULT_POOLS=""
CALICO_IP=""
CALICO_IP6=""
CALICO_AS=""
CALICO_NETWORKING_BACKEND=bird

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
cat <<EOF> calico-node.service 
[Unit]
Description=calico-node
After=docker.service
Requires=docker.service

[Service]
EnvironmentFile=/etc/calico/calico.env
ExecStartPre=-/usr/bin/docker rm -f calico-node
ExecStart=/usr/bin/docker run --net=host --privileged \
--name=calico-node \
-e NODENAME=${CALICO_NODENAME} \
-e IP=${CALICO_IP} \
-e IP6=${CALICO_IP6} \
-e CALICO_NETWORKING_BACKEND=${CALICO_NETWORKING_BACKEND} \
-e AS=${CALICO_AS} \
-e NO_DEFAULT_POOLS=${CALICO_NO_DEFAULT_POOLS} \
-e ETCD_ENDPOINTS=${ETCD_ENDPOINTS} \
-e ETCD_CA_CERT_FILE=${ETCD_CA_CERT_FILE} \
-e ETCD_CERT_FILE=${ETCD_CERT_FILE} \
-e ETCD_KEY_FILE=${ETCD_KEY_FILE} \
-v /var/log/calico:/var/log/calico \
-v /run/docker/plugins:/run/docker/plugins \
-v /lib/modules:/lib/modules \
-v /var/run/calico:/var/run/calico \
calico/node:v3.9.2

ExecStop=-/usr/bin/docker stop calico-node

Restart=on-failure
StartLimitBurst=3
StartLimitInterval=60s

[Install]
WantedBy=multi-user.target

calicoctl

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#https://docs.projectcalico.org/master/getting-started/calicoctl/install
#calicoctl请github下载,官方在ubuntu 报错Segmentation fault
cd /usr/local/k8s/bin
wget https://github.com/projectcalico/calicoctl/releases/download/v3.9.2/calicoctl-linux-amd64 -o calicoctl
chmod +x calicoctl

mkdir /etc/calico
cat <<EOF> /etc/calico/calicoctl.cfg
#https://docs.projectcalico.org/master/getting-started/calicoctl/configure/
apiVersion: projectcalico.org/v3
kind: CalicoAPIConfig
metadata:
spec:
datastoreType: "etcdv3"
etcdEndpoints: "https://192.168.10.18:2379,https://192.168.10.20:2379,https://192.168.10.21:2379"
etcdKeyFile: "/etc/etcd/ssl/etcd-key.pem"
etcdCertFile: "/etc/etcd/ssl/etcd.pem"
etcdCACertFile: "/etc/etcd/ssl/ca.pem"
EOF

# calicoctl get node
NAME
k8s-master01
k8s-master02
k8s-master03
k8s-node01
k8s-node02

# calicoctl node status
Calico process is running.

IPv4 BGP status
+---------------+-------------------+-------+----------+-------------+
| PEER ADDRESS | PEER TYPE | STATE | SINCE | INFO |
+---------------+-------------------+-------+----------+-------------+
| 192.168.10.82 | node-to-node mesh | up | 09:45:55 | Established |
| 192.168.10.83 | node-to-node mesh | up | 09:45:54 | Established |
| 192.168.10.84 | node-to-node mesh | up | 09:45:54 | Established |
| 192.168.10.85 | node-to-node mesh | up | 09:45:54 | Established |
+---------------+-------------------+-------+----------+-------------+

IPv6 BGP status
No IPv6 peers found.

# calicoctl get ippool -o wide
NAME CIDR NAT IPIPMODE VXLANMODE DISABLED SELECTOR
default-ipv4-ippool 10.244.0.0/16 true Always Never false all()

测试 my-nginx

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
cat > my-nginx.yaml <<EOF
apiVersion: apps/v1
kind: Deployment
metadata:
name: my-nginx
spec:
replicas: 2
selector:
matchLabels:
app: my-nginx
template:
metadata:
labels:
app: my-nginx
spec:
containers:
- name: my-nginx
image: daocloud.io/library/nginx:1.13.0-alpine
ports:
- containerPort: 80
---
apiVersion: v1
kind: Service
metadata:
name: my-nginx
labels:
app: my-nginx
spec:
type: NodePort
selector:
app: my-nginx
ports:
- name: http
port: 80
targetPort: 80
EOF
1
2
3
4
# kubectl get pod -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
my-nginx-6487974967-49tgj 1/1 Running 0 72s 10.244.40.70 k8s-node01 <none> <none>
my-nginx-6487974967-75pnt 1/1 Running 0 72s 10.244.42.198 k8s-node02 <none> <none>

检查服务IP和端口可达性

1
2
3
# kubectl get svc my-nginx
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
my-nginx NodePort 10.110.21.241 <none> 83:9046/TCP 2m13s

我们在任意节点访问server IP

1
ping 10.110.21.241

部署coredns

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
cp /usr/local/src/kubernetes/cluster/addons/dns/coredns/coredns.yaml.base coredns.yaml

#更换镜像源 http://mirror.azure.cn/help/gcr-proxy-cache.html
source /etc/kubernetes/env.sh
sed -i "s#k8s.gcr.io#"$GCR_MIRROR"#g" coredns.yaml
sed -i "s/__PILLAR__DNS__DOMAIN__/${DNS_DOMAIN}/" coredns.yaml
sed -i "s/__PILLAR__DNS__SERVER__/${DNS_SERVICE_IP}/" coredns.yaml
sed -i "s/__PILLAR__DNS__MEMORY__LIMIT__/150Mi/g" coredns.yaml

#创建 coredns
kubectl create -f coredns.yaml

#replicas默认1,可以修改多个
spec:
replicas: 2

#检查coredns状态
# kubectl get svc -nkube-system
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
kube-dns ClusterIP 10.96.0.10 <none> 53/UDP,53/TCP,9153/TCP 39s

测试corsdns

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
cat<<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
name: busybox
namespace: default
spec:
containers:
- name: busybox
image: busybox:1.28.3
command:
- sleep
- "3600"
imagePullPolicy: IfNotPresent
restartPolicy: Always
EOF

# kubectl get pod | grep busybox
busybox 1/1 Running 0 2m43s

# kubectl exec -ti busybox -- nslookup kubernetes
Server: 10.96.0.10
Address 1: 10.96.0.10 kube-dns.kube-system.svc.cluster.local

Name: kubernetes
Address 1: 10.96.0.1 kubernetes.default.svc.cluster.local

部署coredns自动扩容

https://github.com/kubernetes-incubator/cluster-proportional-autoscaler/
https://github.com/kubernetes/kubernetes/tree/master/cluster/addons/dns-horizontal-autoscaler

部署ingress-nginx

https://github.com/kubernetes/ingress-nginx/blob/master/docs/deploy/index.md

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
mkdir /etc/kubernetes/yaml/ingress-nginx
cd /etc/kubernetes/yaml/ingress-nginx
https://raw.githubusercontent.com/kubernetes/ingress-nginx/master/deploy/static/mandatory.yaml
https://raw.githubusercontent.com/kubernetes/ingress-nginx/master/deploy/static/provider/baremetal/service-nodeport.yaml
sed -i 's/quay.io/quay.azk8s.cn/' mandatory.yaml
kubectl apply -f .

#kubectl get pods --all-namespaces -l app.kubernetes.io/name=ingress-nginx --watch
NAMESPACE NAME READY STATUS RESTARTS AGE
ingress-nginx nginx-ingress-controller-69969b98db-shr6j 1/1 Running 0 58m

# kubectl get svc -ningress-nginx
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
ingress-nginx NodePort 10.98.91.11 <none> 80:32634/TCP,443:30871/TCP 59m

POD_NAMESPACE=ingress-nginx
POD_NAME=$(kubectl get pods -n $POD_NAMESPACE -l app.kubernetes.io/name=ingress-nginx -o jsonpath='{.items[0].metadata.name}')
kubectl exec -it $POD_NAME -n $POD_NAMESPACE -- /nginx-ingress-controller --version

#curl 10.98.91.11:443
<html>
<head><title>404 Not Found</title></head>
<body>
<center><h1>404 Not Found</h1></center>
<hr><center>openresty/1.15.8.2</center>
</body>
</html>

部署dashboard

先前已解压的 kubernetes-server-linux-amd64.tar.gz

1
2
3
mkdir -p /etc/kubernetes/dashboard
cd /etc/kubernetes/dashboard
cp -r /usr/local/src//kubernetes/cluster/addons/dashboard .

service添加NodePort,使外界可以通过地址 NodeIP:NodePort 访问 dashboard

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
apiVersion: v1
kind: Service
metadata:
name: kubernetes-dashboard
namespace: kube-system
labels:
k8s-app: kubernetes-dashboard
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
spec:
type: NodePort #增加此行
selector:
k8s-app: kubernetes-dashboard
ports:
- port: 443
targetPort: 8443
nodePort: 30001 #指定端口

更换微软镜像源、执行

1
2
3
4
# ls *.yaml
dashboard-configmap.yaml dashboard-controller.yaml dashboard-rbac.yaml dashboard-secret.yaml dashboard-service.yaml
# sed -i "s#k8s.gcr.io#$GCR_MIRROR#" dashboard-controller.yaml
# kubectl apply -f .

查看pod 、service

1
2
3
4
5
6
# kubectl get pods -o wide -nkube-system | grep dashboard
kubernetes-dashboard-64ffdff795-vgwrg 1/1 Running 0 77s 10.244.40.74 k8s-node01 <none> <none>

# kubectl get services kubernetes-dashboard -n kube-system
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
kubernetes-dashboard NodePort 10.101.91.150 <none> 443:30001/TCP 2m15s

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
cat << EOF > admin.yaml
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
name: admin
annotations:
rbac.authorization.kubernetes.io/autoupdate: "true"
roleRef:
kind: ClusterRole
name: cluster-admin
apiGroup: rbac.authorization.k8s.io
subjects:
- kind: ServiceAccount
name: admin
namespace: kube-system

---
apiVersion: v1
kind: ServiceAccount
metadata:
name: admin
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
EOF

kubectl apply -f admin.yaml
admin_token=$(kubectl get secret -n kube-system|grep admin-token|awk '{print $1}')
kubectl get secret $admin_token -o jsonpath={.data.token} -n kube-system |base64 -d
eyJhbGciOiJSUzI1NiIsImtpZCI6IiJ9.eyJpc3MiOiJrdWJlcm5ldGVzL3N.....

浏览器访问kube-apiserver

1
2
3
4
$ kubectl cluster-info
Kubernetes master is running at https://192.168.10.80:8443
CoreDNS is running at https://192.168.10.80:8443/api/v1/namespaces/kube-system/services/kube-dns:dns/proxy
kubernetes-dashboard is running at https://192.168.10.80:8443/api/v1/namespaces/kube-system/services/https:kubernetes-dashboard:/proxy

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
cat > dashboard.json << EOF
{
"CN": "dashboard",
"hosts": [],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "Guangzhou",
"L": "Guangzhou",
"O": "k8s",
"OU": "System"
}
]
}
EOF

/etc/kubernetes/ssl/
cfssl gencert -ca=ca.pem \
-ca-key=ca-key.pem \
-config=ca-config.json \
-profile=kubernetes dashboard.json | cfssljson -bare dashboard

cp dashboard.pem /etc/kubernetes/ssl/dashboard.crt
cp dashboard-key.pem /etc/kubernetes/ssl/dashboard.key

#create secret
cd /etc/kubernetes/ssl
kubectl create secret generic kubernetes-dashboard-certs --from-file="dashboard.crt,dashboard.key" -n kube-system
kubectl get secret -n kube-system | grep dashboard

#导出证书至windows
cd /opt/k8s/ssl/
openssl pkcs12 -export -certfile ca.pem -in dashboard.pem -inkey dashboard-key.pem -out dashboard.pfx
openssl pkcs12 -export -in ca.pem -inkey ca-key.pem -out dashboard.pfx

openssl pkcs12 -export -out dashboard.pfx -inkey dashboard-key.pem -in dashboard.pem -certfile ca.pem
openssl pkcs12 -export -out admin.pfx -inkey admin-key.pem -in admin.pem -certfile ca.pem
  1. kubernetes-dashboard service 暴露了 NodePort,可以使用 https://NodeIP:NodePort 地址访问 dashboard
  2. 通过 kube-apiserver 访问 dashboard
  3. 通过 kubectl proxy 访问 dashboard

gcr quay镜像代理

http://mirror.azure.cn/help/gcr-proxy-cache.html
http://mirror.azure.cn/help/quay-proxy-cache.html

gcr.azk8s.cn/google_containers/
registry.cn-hangzhou.aliyuncs.com/google_containers/

参考链接

http://www.debugger.wiki/article/html/1559106039872285
https://github.com/opsnull/follow-me-install-kubernetes-cluster