Pod

检查 Pod 就绪探针

bash
1
kubectl get pods <pod-name> -n <namespace> -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}'

查看 Pod 事件

bash
1
kubectl get events -n <namespace> --field-selector involvedObject.name=<pod-name>

获取 Pod Affinity 和 Anti-Affinity

bash
1
kubectl get pod <pod-name> -n <namespace> -o=jsonpath='{.spec.affinity}'

列出 Pod 的 anti-affinity 规则

bash
1
kubectl get pod <pod-name> -n <namespace> -o=jsonpath='{.spec.affinity.podAntiAffinity}'

Pod Network

运行 Debug Pod 进行调试

bash
1
kubectl run -it --rm --restart=Never --image=busybox net-debug-pod -- /bin/sh

测试从 Pod 到 Endpoint 的连接性

bash
1
kubectl exec -it <pod-name> -n <namespace> -- curl <endpoint-url>

trace 一个 Pod 到另一个 Pod 的网络

bash
1
kubectl exec -it <source-pod-name> -n <namespace> -- traceroute <destination-pod-ip>

检查 Pod DNS配置

bash
1
kubectl exec -it <pod-name> -n <namespace> -- cat /etc/resolv.conf

Workload

Deployment

查看 rollout 状态

bash
1
kubectl rollout status deployment/<deployment-name> -n <namespace>

查看 rollout 历史记录

bash
1
kubectl rollout history deployment/<deployment-name> -n <namespace>

调整 deployment 数量

bash
1
kubectl scale deploy <deployment-name> --replicas=0 -n <namespace>

调整名称空间内的所有 Deployment 工作负载数量为 0

bash
1
2
3
for d in `kubectl get deploy -n ${ns} -o jsonpath='{range $.items[@]}{.metadata.name}{end}'`; do
	kubectl scale deploy ${d} --replicas=0 -n ${ns}
done

调整整个集群的 Deployment 工作负载数量为 0

bash
1
2
3
4
5
6
7
for namespace in `kubectl get ns -o jsonpath='{range $.items[*]}{.metadata.name}{"\n"}{end}'`
do
	for workload in `kubectl get deploy -n ${namespace} -o jsonpath='{range $.items[@]}{.metadata.name}{end}'`
	do 
		kubectl scale deploy ${workload} --replicas=0 -n ${namespace}
	done
done

设置 deployment 的自动缩放

bash
1
2
3
4
5
kubectl autoscale deployment <deployment-name> \
	--min=<min-pods> \
	--max=<max-pods> \
	--cpu-percent=<cpu-percent> \
	-n <namespace>

DaemonSet

调整一个 Daemonset 资源为 0

bash
1
kubectl -n <namespace> patch daemonset <name-of-daemon-set> -p '{"spec": {"template": {"spec": {"nodeSelector": {"disable": "true"}}}}}'

恢复

bash
1
kubectl -n <namespace> patch daemonset <name-of-daemon-set> --type json -p='[{"op": "remove", "path": "/spec/template/spec/nodeSelector/disable"}]'

调整名称空间内的所有 Daemonset 工作负载数量为 0

bash
1
2
3
4
NAMESPACE=default
for daemonset in `kubectl get daemonset -n ${NAMESPACE} -o jsonpath='{range $.items[@]}{.metadata.name}{end}'`; do
	kubectl -n ${NAMESPACE} patch daemonset ${daemonset} -p '{"spec": {"template": {"spec": {"nodeSelector": {"disable": "true"}}}}}'
done

HPA

检查 HPA 状态

bash
1
kubectl get hpa -n <namespace>

Networking

显示命名空间中 Pod 的 IP 地址:

bash
1
kubectl get pods -n <namespace -o custom-columns=POD:metadata.name,IP:status.podIP --no-headers

Node

获取特定 Node 上运行的 Pod 列表

方法1

bash
1
kubectl get pods --field-selector spec.nodeName=<node-name> -n <namespace>

方法2

bash
1
2
kubectl get pod -n {ns} -o \ 
    jsonpath='{range $.items[?(@.spec.nodeName == "xxxx")]}{.metadata.name}{"\n"}'

方法3:检查多个 Node 上运行的 Pod 列表

bash
1
2
3
4
kubectl get pod -A -ojson | jq -r '
  .items[] |
  select(.status.hostIP as $ip | $ip == "10.10.10.124" or $ip == "10.10.10.153" )|
  .metadata.namespace + "/" + .metadata.name + " / " + .status.hostIP'

获取 Node 的 IP

bash
1
2
kubectl get node -n {ns} -o \ 
    -o=jsonpath='{range $.items[*]}{.metadata.name}{"\t"}{.status.addresses[0].address}{"\n"}{end}'

获取 Node 上 指定标签

bash
1
2
kubectl get nodes \
    -o=jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.metadata.labels.<xxx>}{"\n"}{end}'

查找属于这个标签 Node 的数量

bash
1
kubectl get node -l xxx=xxxx --no-headers | wc -l

查看node上所有的标签,json格式

bash
1
kubectl get node -o json | jq '.items[] | {name: .metadata.name, labels: .metadata.labels }' --compact-output | jq -r

根据 NodeSelector 查询 application

bash
1
2
kubectl get deployment -A \
    -o=jsonpath='{range .items[*]}{@.metadata.name}{"\t"}{range @.spec.template.spec.nodeSelector}{.xxxx}{"\n"}{end}{end}'

根据条件列出 Node 1.17+

bash
1
2
kubectl get nodes -o \
	custom-columns=NODE:.metadata.name,READY:.status.conditions[?(@.type=="Ready")].status -l 'node-role.kubernetes.io/worker=xxx'

获取 Node 的操作系统信息

bash
1
kubectl get node <node-name> -o jsonpath='{.status.nodeInfo.osImage}'

查询应用 (Deployment) 使用的 Node

bash
1
2
kubectl deployment -n {ns} -o \
    jsonpath='{range $.item[*]}{"Deployment: "}{.metadata.name}{"\n"}{"\t"}{.spec.template.spec.nodeSelector.srv-grp}{"\n"}{end}'

仅获取 Node 的指定标签和 Pod 名称

bash
1
2
k get nodes -o \
jsonpath='{range .items[*]}{.metadata.labels.nodeSelector.xxxxx}{"\t"}{.status.addresses[?(@.type=="InternalIP")].address}{"\n"}{end}'

仅获取 Node IP

bash
1
k get nodes -o jsonpath='{.items[*].status.addresses[?(@.type=="InternalIP")].address}'

获取指定标签的 Node,并输出他的标签和 IP

bash
1
2
k get nodes -o \
	jsonpath='{range .items[?(@.metadata.labels.xxxx=="xxxx")]}{.metadata.labels.srv-grp}{"\t"}{.status.addresses[?(@.type=="InternalIP")].address}{"\n"}{end}'

获取指定 Role 的 Node 列表,只输出他的主机名

bash
1
2
k get node --all-namespaces \
	-o=jsonpath='{range .items[?(@.metadata.labels.kubernetes.io/role=="xxxx")]}{.metadata.name}{"\n"}{end}'

修改指定 Role 的 Node 的角色值 (jq)

bash
1
2
3
4
5
# 假设将 k8s role 为 aaa 的主机修改为 role=bbb
for n in `k get nodes -o json |jq -r '.items[] | select(.metadata.labels."kubernetes.io/role" == "aaa") | .metadata.name'`;
do 
	k label node $n kubernetes.io/role=bbb --overwrite
done

修改指定 Role 的 Node 的角色值 (jsonpath)

bash
1
2
3
4
5
# 假设将 k8s node 标签 xxx 为 aaa 的主机修改为 xxx=bbb
for n in `k get nodes -o jsonpath='{range .items[?(@.metadata.labels.xxx=="aaa")]}{.metadata.name}{"\n"}{end}'`;\
do
	k label node $n xxx=bbb  --overwrite
done

打印 Node 所在的 Node 不要其他垃圾字段

bash
1
2
3
kubectl get pod -A -owide --no-headers \
--field-selector='metadata.namespace!=infra,metadata.namespace!=debug,metadata.namespace!=kube-system,metadata.namespace!=logging,metadata.namespace!=monitoring' \
-o=custom-columns=NODE:.spec.nodeName,NAMESPACE:.metadata.namespace,NAME:.metadata.name|grep ${n}

打印 所需要的 Node 不要其他垃圾字段

bash
1
2
3
4
5
6
7
8
9
NODES="xxxx xxxx xxxx"

for n in ${NODES}
do
	echo ${n}
	kubectl get pod -A -owide --no-headers \
	--field-selector='metadata.namespace!=infra,metadata.namespace!=debug,metadata.namespace!=kube-system,metadata.namespace!=logging,metadata.namespace!=monitoring' \
	-o=custom-columns=NODE:.spec.nodeName,NAMESPACE:.metadata.namespace,NAME:.metadata.name|grep ${n}
done

Resource Quotas

列出命名空间中的资源配额

bash
1
kubectl get resourcequotas -n <namespace>

查看资源配额详情

bash
1
kubectl describe resourcequota <resource-quota-name> -n <namespace>

Volumes

按容量排序的列出PV

bash
1
kubectl get pv --sort-by=.spec.capacity.storage

检查 PV reclaim policy

bash
1
kubectl get pv <pv-name> -o=jsonpath='{.spec.persistentVolumeReclaimPolicy}'

Ephemeral Containers

1.18+

运行一个 ephemeral debugging 容器

bash
1
kubectl debug -it <pod-name> -n <namespace> --image=<debug-image> -- /bin/sh

Pod Disruption Budget (PDB)

列出一个ns内的PDB

bash
1
kubectl get pdb -n <namespace>

ConfigMap

批量备份所有 configmap

bash
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
KUBE_CMD=kubectl --kubeconfig=
BACKUP_PATH={your backup path}

for ns in `${KUBE_CMD} get ns -o jsonpath='{range $.items[*]}{.metadata.name}{"\n"}{end}'`
do
	for cn in `${KUBE_CMD} get cm -n ${ns} -ojsonpath='{range $.items[*]}{.metadata.name}{"\n"}{end}'`
	do
	${KUBE_CMD} get cm -n ${ns} ${cn} -o yaml | sed -e '/resourceVersion:/d; /uid:/d; /selfLink:/d ; /creationTimestamp:/d; /lifecycle.cattle.io/d' > ${BACKUP_PATH}/$ns.$sn.yaml
	done
done

Secret

从一个 namespace 导出所有 secret 到另一个 namespace

bash
1
2
3
4
5
6
7
8
OLD_NS=
NEW_NS=

for n in `kubectl get secret -n ${OLD_NS} -o jsonpath='{range $.items[*]}{.metadata.name}{"\n"}{end}'`;do
    kubectl get secret -n ${OLD_NS} $n -o yaml | \
     sed -e '/resourceVersion:/d;/uid:/d; /selfLink:/d; /creationTimestamp:/d; /namespace: /d;' \
    kubectl create -n ${NEW_NS} -f -
done

其他方式

bash
1
2
3
4
5
6
7
8
KUBE_CMD=
NAMESPACE=
for n in `${KUBE_CMD} get secret -n ${NAMESPACE} -ojson | jq -r '.items[].metadata.name'`;do
    ${KUBE_CMD} get secret -n ${NAMESPACE} $n -o json \
    | jq -r 'del(.metadata["creationTimestamp","resourceVersion","selfLink","uid","annotations", namespace])' \
    ${KUBE_CMD} create -n ${NAMESPACE} -f -
done
```

复制集群中所有 namespace 的 secret 到另外一个新集群,并排除部分 namespace

bash
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
KUBE_CMD=kubectl --kubeconfig=
NAMESPACE=
EXCLUDE_NS='default|debug|cattle|test|local|logging|skywalking|gke|ingress|kube|infra|opentelemetry'
KUBE_CMD_NEW=kubectl --kubeconfig=

for namespace in `${KUBE_CMD} get ns -ojsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}'|grep -Ev ${EXCLUDE_NS}`; 
do
	for n in `${KUBE_CMD} get secret -n ${namespace} -o jsonpath='{range $.items[*]}{.metadata.name}{"\n"}{end}'`;do
		${KUBE_CMD} get secret -n ${namespace} $n -o json \
			| jq -r 'del(.metadata["creationTimestamp","resourceVersion","selfLink","uid","finalizers","generation"],.status,.spec.clusterIP,.spec.clusterIPs,.metadata.annotations["kubectl.kubernetes.io/last-applied-configuration"])' \
			| ${KUBE_CMD_NEW} apply -f -	
	done
done

拷贝一个 secret 到 另外一个名称空间内

bash
1
2
3
4
5
6
7
SRC_NS=
DST_NS=
SECRET_NAME=

kubectl get secret -n ${SRC_NS} ${SECRET_NAME} -o json \
| jq -r 'del(.metadata["creationTimestamp","resourceVersion","selfLink","uid","annotations","namespace"])' \
| kubectl create -n ${DST_NS} -f -

拷贝一个集群所有名称空间的 secret 到另一个集群

bash
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
KUBE_CMD=kubectl --kubeconfig=
KUBE_CMD_NEW=kubectl --kubeconfig=

for i in `${KUBE_CMD} get ns -ojsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}'|grep -Ev 'default|debug|cattle|test|local|logging|skywalking|spinnaker|gke|ingress|kube|infra|cicd|opentelemetry'`; do
	for n in `${KUBE_CMD} get svc -n ${i} -o jsonpath='{range $.items[*]}{.metadata.name}{"\n"}{end}'`;do
		${KUBE_CMD} get svc -n ${i} $n -o json \
			| jq -r 'del(.metadata["creationTimestamp","resourceVersion","selfLink","uid","finalizers","generation"],.status,.spec.clusterIP,.spec.clusterIPs,.metadata.annotations["kubectl.kubernetes.io/last-applied-configuration"])' \
			| ${KUBE_CMD_NEW} apply -f -	
	done
done

查看 secret 域名是多少

bash
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
NAMESPACE=bussniss
SECRET_NAME=chinamobile.com
KEYWORKS=crt
KUBE_CMD="kubectl --kubeconfig="

openssl x509 -in <( \
  ${KUBE_CMD} get secret -n ${NAMESPACE} ${SECRET_NAME} -ojson | \
  jq --arg secret_key $(
    ${KUBE_CMD} get secret -n ${NAMESPACE} ${SECRET_NAME} -ojson | \
    jq -r '.data | keys[]' | awk -v kw=${KEYWORKS} '$0 ~ kw { print }'
  ) -r '.data | .[$secret_key]' | base64 -d 
) -text -noout | grep -i "subject: "

查看证书是否过期

bash
1
openssl x509 -in <(kubectl get secret -n bussniss xxx -ojson |jq -r '.data."tls.crt"'|base64 -d) -text -noout|grep 'Not After'

查看一个名称空间内的证书是否过期

bash
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
KUBE_CMD=
KEYWORDS=

for ns in `${KUBE_CMD} get ns -o jsonpath='{range $.items[*]}{.metadata.name}{"\n"}{end}'`
do
	for sn in `${KUBE_CMD} get secret -n $ns -o json|jq -r '.items[] | select( .type == "kubernetes.io/tls") | .metadata.name'|awk '$1 ~ /$KEYWORDS/ { print }'`
	do 
		openssl x509 -in <(${KUBE_CMD} get secret -n $ns $sn -ojson |jq -r '.data."tls.crt"'|base64 -d) -text -noout|grep 'Not After'
	done
done

查询所有tls的secret的域名

bash
1
2
3
4
5
6
KUBE_CMD=

for n in `${KUBE_CMD} get secret -A -o json|jq -r '.items[] | select( .type == "kubernetes.io/tls" ) | .data | select(keys[0] == "tls.crt") | ."tls.crt"'`;
do
	openssl x509 -in <(echo $n|base64 -d) -text -noout|grep -i 'subject'
done

批量备份集群内指定类型的 secret 命令

bash
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
KEYWORDS=
KUBE_CMD=kubectl --kubeconfig=

for ns in `${KUBE_CMD} get ns -o jsonpath='{range $.items[*]}{.metadata.name}{"\n"}{end}'`
do
  for secret_name in `${KUBE_CMD} get secret -n $ns -o json | jq -r '.items[] | select( .type == "kubernetes.io/tls") | .metadata.name'`; 
  do
    echo "[$ns]/[$secret_name]"
    openssl x509 -in <( \
      ${KUBE_CMD} get secret -n $ns $secret_name -ojson | \
      jq --arg secret_key $(${KUBE_CMD} get secret -n $ns $secret_name -ojson | \
      jq -r '.data|keys[]' | awk '$0 ~ /crt/ { print }'
    ) -r '.data|.[]' | base64 -d) -text -noout | grep -i 'subject: ' | \
    awk -v ns=$ns -v secret_name=$secret_name -v kw=${KEYWORDS} -v cmd="${KUBE_CMD}" 'index($0, kw) > 0 {
      system(cmd " get secret " secret_name " -n " ns " -oyaml > primetive/" ns "." secret_name ".yaml")
    }' 
  done
done

查询 tls 签发域名,并进行替换

bash
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
KEYWORDS=
KUBE_CMD=kubectl --kubeconfig=

for ns in `${KUBE_CMD} get ns -o jsonpath='range $.items[*]}{.metadata.name}{"\n"}{"end"}'
do
    for secret_name in `${KUBE_CMD} get secret -n $ns -o json|jq -r '.items[] | select( .type == "kubernetes.io/tls") | .metadta.name'`
    do  
        openssl x509 -n <(
            ${KUBE_CMD} get secret -n ${ns} ${secret_name} -ojson | \
            jq --arg secret_key $(${KUBE_CMD} get secret -n ${ns} ${secret_name} -ojson | \
            jq -r '.data | .keys[]' | \
            awk '$0 ~ /crt/ { print }' -r '.data|.[$secret_key]'|base64 -d
        ) -text -noout | \
        grep -i "subject: "| \
        awk -v ns=${ns} -v secret_name=${secret_name} -v commond=${KUBE_CMD}\
            '$0 ~ ${KEYWORDS} { system(
                command" create secret tls -n "ns" "secret_name" --cert=ca.crt --key=key.key --dry-run -oyaml|command" replace -f -"
            )}'
    done
done

使用本地固定证书作为新证书,替换集群内符合条件的secret

本命令只是打印生产的执行命令,主要使用 print

bash
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
KUBE_CMD=kubectl --kubeconfig=
KEYWORDS="chinamobile"

for ns in `${KUBE_CMD} get ns -o jsonpath='{range $.items[*]}{.metadata.name}{"\n"}{end}'`
do	
  for secret_name in `${KUBE_CMD} get secret -n $ns -o json|jq -r '.items[] | select( .type == "kubernetes.io/tls") | .metadata.name'`; do
    openssl x509 -in <(
      ${KUBE_CMD} get secret -n $ns $secret_name -ojson | \
        jq --arg secret_key $(${KUBE_CMD} get secret -n $ns $secret_name -ojson | \
        jq -r '.data|keys[]' | awk '$0 ~ /crt/ { print }'
      ) -r '.data|.[$secret_key]' | base64 -d
    ) -text -noout | grep -i 'subject: ' | \
    awk -v ns=$ns -v secret_name=$secret_name -v cmd="${KUBE_CMD}" -v kw=${KEYWORDS} '$0 ~ kw { 
      print "kubectl create secret tls " secret_name " -n " ns " --cert=xxx.crt --key=xxx.key --dry-run -oyaml | " cmd " replace -f -" 
    }'
  done
done
tip

该命令中用到的技巧

bash
1
2
3
4
5
6
kubectl get secret -n ms chinamobile.com -ojson | jq -r '.data|keys[]' 
# 获取 data下面所有属性的key
awk '$0 ~ /crt/ { print }'
# 匹配有crt的行,这里是正则,不能编写变量
jq --arg secret_key $(kubectl get secret -n monitoring chinamobile.com -ojson | jq -r '.data|keys[]' | awk '$0 ~ /crt/ { print }') -r '.data|.[$secret_key]'
# 前面获取到的json,然后定义变量 secret_key 他的值就是这个 json 内的 xxx.crt 最后通过 -r 解析原始json .data.[$secret_key] 就是拿到 xxx.crt 的 base64值

本命令是真实执行的命令

bash
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
KUBE_CMD=kubectl --kubeconfig=
KEYWORDS="chinamobile"


for ns in `${KUBE_CMD} get ns -o jsonpath='{range $.items[*]}{.metadata.name}{"\n"}{end}'`; do	
  for secret_name in `${KUBE_CMD} get secret -n $ns -o json|jq -r '.items[] | select( .type == "kubernetes.io/tls") | .metadata.name'`; do
    openssl x509 -in <(
      ${KUBE_CMD} get secret -n $ns $secret_name -ojson | \
        jq --arg secret_key $(${KUBE_CMD} get secret -n $ns $secret_name -ojson | \
        jq -r '.data|keys[]' | awk '$0 ~ /crt/ { print }'
      ) -r '.data|.[$secret_key]' | base64 -d
    ) -text -noout | grep -i 'subject: ' | \
    awk -v ns=$ns -v secret_name=$secret_name -v cmd="${KUBE_CMD}" -v kw=${KEYWORDS} '$0 ~ kw { 
      system("${KUBE_CMD} create secret tls " secret_name " -n " ns " --cert=ca.crt --key=chinamobile.key --dry-run -oyaml | " cmd " replace -f -")
    }'
  done
done