apisix etcd 集群

发布于 2024-02-25  319 次阅读


apisix etcd 集群

1. etcd

i. 集群

a. 集群信息
机器 ip 端口 节点 数据目录
sg-charger001 172.21.0.6:2379; 172.21.0.6:2380 etcd01 /data/etcd/etcd01.etcd
sg-charger002 172.21.0.7:2379; 172.21.0.7:2380 etcd02 /data/etcd/etcd02.etcd
sg-charger-pre 172.21.0.5:2379; 172.21.0.5:2380 etcd03 /data/etcd/etcd03.etcd

nlb: nlb-xxxxxxxxxxxxxxx.ap-southeast-1.nlb.aliyuncs.com:2379

服务器组: sg-etcd-group

b. 部署
# etcd-20231114.db 为此前单节点导出的快照

# sg-charger001  172.21.0.6

mkdir -p /data/etcd/
mkdir -p /etc/etcd/
etcdctl snapshot restore etcd-20231114.db --data-dir="/data/etcd/etcd01.etcd" --name etcd01 --initial-cluster etcd01=http://172.21.0.6:2380,etcd02=http://172.21.0.7:2380,etcd03=http://172.21.0.5:2380 --initial-advertise-peer-urls http://172.21.0.6:2380

cat <<eof> /etc/etcd/etcd.conf
ETCD_NAME="etcd01"
ETCD_DATA_DIR="/data/etcd/etcd01.etcd"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://172.21.0.6:2380"
ETCD_LISTEN_PEER_URLS="http://172.21.0.6:2380"
ETCD_LISTEN_CLIENT_URLS="http://172.21.0.6:2379"
ETCD_ADVERTISE_CLIENT_URLS="http://172.21.0.6:2379"
ETCD_INITIAL_CLUSTER="etcd01=http://172.21.0.6:2380,etcd02=http://172.21.0.7:2380,etcd03=http://172.21.0.5:2380"
ETCD_INITIAL_CLUSTER_STATE="new"
eof

cat <<eof> /usr/lib/systemd/system/etcd.service
[Unit]
Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target

[Service]
Type=notify
EnvironmentFile=/etc/etcd/etcd.conf
ExecStart=/usr/bin/etcd
Restart=on-failure
LimitNOFILE=65536

[Install]
WantedBy=multi-user.target
eof

systemctl daemon-reload

# sg-charger002  172.21.0.7

mkdir -p /data/etcd/
mkdir -p /etc/etcd/
etcdctl snapshot restore etcd-20231114.db --data-dir="/data/etcd/etcd02.etcd" --name etcd02 --initial-cluster etcd01=http://172.21.0.6:2380,etcd02=http://172.21.0.7:2380,etcd03=http://172.21.0.5:2380 --initial-advertise-peer-urls http://172.21.0.7:2380

cat <<eof> /etc/etcd/etcd.conf
ETCD_NAME="etcd02"
ETCD_DATA_DIR="/data/etcd/etcd02.etcd"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://172.21.0.7:2380"
ETCD_LISTEN_PEER_URLS="http://172.21.0.7:2380"
ETCD_LISTEN_CLIENT_URLS="http://172.21.0.7:2379"
ETCD_ADVERTISE_CLIENT_URLS="http://172.21.0.7:2379"
ETCD_INITIAL_CLUSTER="etcd01=http://172.21.0.6:2380,etcd02=http://172.21.0.7:2380,etcd03=http://172.21.0.5:2380"
ETCD_INITIAL_CLUSTER_STATE="new"
eof

cat <<eof> /usr/lib/systemd/system/etcd.service
[Unit]
Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target

[Service]
Type=notify
EnvironmentFile=/etc/etcd/etcd.conf
ExecStart=/usr/bin/etcd
Restart=on-failure
LimitNOFILE=65536

[Install]
WantedBy=multi-user.target
eof

systemctl daemon-reload

# sg-charger-pre  172.21.0.5

mkdir -p /data/etcd/
mkdir -p /etc/etcd/
etcdctl snapshot restore etcd-20231114.db --data-dir="/data/etcd/etcd03.etcd" --name etcd03 --initial-cluster etcd01=http://172.21.0.6:2380,etcd02=http://172.21.0.7:2380,etcd03=http://172.21.0.5:2380 --initial-advertise-peer-urls http://172.21.0.5:2380

cat <<eof> /etc/etcd/etcd.conf
ETCD_NAME="etcd03"
ETCD_DATA_DIR="/data/etcd/etcd03.etcd"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://172.21.0.5:2380"
ETCD_LISTEN_PEER_URLS="http://172.21.0.5:2380"
ETCD_LISTEN_CLIENT_URLS="http://172.21.0.5:2379"
ETCD_ADVERTISE_CLIENT_URLS="http://172.21.0.5:2379"
ETCD_INITIAL_CLUSTER="etcd01=http://172.21.0.6:2380,etcd02=http://172.21.0.7:2380,etcd03=http://172.21.0.5:2380"
ETCD_INITIAL_CLUSTER_STATE="new"
eof

cat <<eof> /usr/lib/systemd/system/etcd.service
[Unit]
Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target

[Service]
Type=notify
EnvironmentFile=/etc/etcd/etcd.conf
ExecStart=/usr/bin/etcd
Restart=on-failure
LimitNOFILE=65536

[Install]
WantedBy=multi-user.target
eof

systemctl daemon-reload

# 所有节点同时启动 etcd
systemctl enable etcd --now

# 集群状态查看
etcdctl --endpoints=nlb-xxxxxxxxxxxxxxx.ap-southeast-1.nlb.aliyuncs.com:2379 endpoint status --cluster -w table;etcdctl --endpoints=nlb-xxxxxxxxxxxxxxx.ap-southeast-1.nlb.aliyuncs.com:2379 member list;etcdctl --endpoints=nlb-xxxxxxxxxxxxxxx.ap-southeast-1.nlb.aliyuncs.com:2379 endpoint health

# 查看键值
etcdctl --endpoints nlb-xxxxxxxxxxxxxxx.ap-southeast-1.nlb.aliyuncs.com:2379 get --prefix /apisix
c. 节点故障处理

若节点出现故障时间较长,不建议直接以重启服务的方式重新上线节点(不一定起得来),可能会导致数据不一致。应在集群可用节点上移除异常节点,再重新添加成员,在成员机器上删除旧数据目录,启用全新的节点加入集群,数据会自动同步。

# 假设 etcd03 故障,剩余两个节点,集群尚且可用

# 查看节点列表
etcdctl --endpoints=nlb-xxxxxxxxxxxxxxx.ap-southeast-1.nlb.aliyuncs.com:2379 member list

2def16940d32cf9a, started, etcd03, http://172.21.0.5:2380, http://172.21.0.5:2379, false
c03ab9dd4139d8d3, started, etcd02, http://172.21.0.7:2380, http://172.21.0.7:2379, false
c60fb2cf5f3686a2, started, etcd01, http://172.21.0.6:2380, http://172.21.0.6:2379, false

# 移除 etcd03
etcdctl --endpoints=nlb-xxxxxxxxxxxxxxx.ap-southeast-1.nlb.aliyuncs.com:2379 member remove 2def16940d32cf9a

# 重新加入成员
etcdctl --endpoints=nlb-xxxxxxxxxxxxxxx.ap-southeast-1.nlb.aliyuncs.com:2379 member add etcd03 --peer-urls="http://172.21.0.5:2380"

# sg-charger-pre  172.21.0.5
# 停止 etcd
systemctl stop etcd

# 删除数据目录
rm -rf /data/etcd/etcd03.etcd

# 修改 etcd.conf 集群初始状态为 existing
cat <<eof> /etc/etcd/etcd.conf
ETCD_NAME="etcd03"
ETCD_DATA_DIR="/data/etcd/etcd03.etcd"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://172.21.0.5:2380"
ETCD_LISTEN_PEER_URLS="http://172.21.0.5:2380"
ETCD_LISTEN_CLIENT_URLS="http://172.21.0.5:2379"
ETCD_ADVERTISE_CLIENT_URLS="http://172.21.0.5:2379"
ETCD_INITIAL_CLUSTER="etcd01=http://172.21.0.6:2380,etcd02=http://172.21.0.7:2380,etcd03=http://172.21.0.5:2380"
ETCD_INITIAL_CLUSTER_STATE="existing"
eof

# 启动
systemctl start etcd

# 集群状态查看
etcdctl --endpoints=nlb-xxxxxxxxxxxxxxx.ap-southeast-1.nlb.aliyuncs.com:2379 endpoint status --cluster -w table;etcdctl --endpoints=nlb-xxxxxxxxxxxxxxx.ap-southeast-1.nlb.aliyuncs.com:2379 member list;etcdctl --endpoints=nlb-xxxxxxxxxxxxxxx.ap-southeast-1.nlb.aliyuncs.com:2379 endpoint health

# 以上故障模拟已验证

ii. 应急单节点

机器 ip 端口 节点 数据目录
sg-xxxx001 172.21.0.4:2379;172.21.0.4:2380 etcd01 /data/etcd/etcd-single.etcd

应急节点作为 etcd 集群不可用时临时应急,每天定时同步集群的数据作为单节点运行。apisix 及 apisix-dashboard 使用 nlb 地址接入 etcd,集群不可用时可临时在 nlb 替换为应急节点所在服务器组(sg-etcd-group-backup) 。

# crontab -l
10 4 * * * systemctl stop etcd && rm -rf /data/etcd/etcd-single.etcd && /usr/bin/etcdctl snapshot restore /root/etcd-backup/"etcd-$(date +\%Y\%m\%d).db" --data-dir="/data/etcd/etcd-single.etcd" --name etcd-single --initial-cluster etcd-single=http://172.21.0.4:2380 --initial-advertise-peer-urls http://172.21.0.4:2380 &>>/tmp/etcd-backup.log && systemctl start etcd

2. apisix 高可用

i. 集群

a. 集群信息
机器 ip 备注
apisix001 172.21.0.1:9080
apisix002 172.21.0.2:9080

b. 部署

apisix 节点无状态,集群依赖 etcd, 使用原先的单节点镜像新增节点即可

c. 网关配置

upstream apisix {
    # apisix001
    server 172.21.0.1:9080;
    # apisix002
    server 172.21.0.2:9080;
}

upstream pre-apisix {
    # sg-apisix-pre
    server 172.21.0.3:9080;
}
b. 节点配置文件
# /usr/local/apisix/conf/config.yaml
c . dashboard
# /usr/local/apisix/dashboard/conf/conf.yaml

# 添加插件需更新配置文件:
curl 127.0.0.1:9090/v1/schema > /usr/local/apisix/dashboard/conf/schema.json
systemctl restart apisix-dashboard 

ii. 配置更新

a. 更新命令
# 备份
# bash /root/backup.sh
# 同步
bash /root/update.sh
b. 脚本
# 备份脚本
# /root/backup.sh
###############################################################
# 环境
env="pre"

case "${env}" in
    pre)
        configFileName="config-pre.yaml"
        ;;
    prod)
        configFileName="config-prod.yaml"
        ;;
    test)
        configFileName="config-test.yaml"
        ;;
    dev)
        configFileName="config-dev.yaml"
        ;;
    *)
        configFileName="config.yaml"
        ;;
esac

if [ ! -e "/usr/local/apisix/conf/${configFileName}" ]; then
    configFileName="config.yaml"
fi

# 备份
mkdir -p /root/apisix-config-backup/plugins/utils
mkdir -p /root/apisix-config-backup/resty
/bin/bash /root/sync.sh /usr/local/apisix/apisix/plugins /root/apisix-config-backup/plugins
/bin/bash /root/sync.sh /usr/local/apisix/deps/share/lua/5.1/resty /root/apisix-config-backup/resty
cp "/usr/local/apisix/conf/${configFileName}" "/root/apisix-config-backup/${configFileName}"

# # 恢复
# /bin/bash /root/sync.sh /root/apisix-config-backup/plugins /usr/local/apisix/apisix/plugins
# /bin/bash /root/sync.sh /root/apisix-config-backup/resty /usr/local/apisix/deps/share/lua/5.1/resty
# cp "/root/apisix-config-backup/${configFileName}" "/usr/local/apisix/conf/${configFileName}"
###############################################################

# 更新脚本
# /root/update.sh
###############################################################
# 环境
env="pre"

case "${env}" in
    pre)
        configFileName="config-pre.yaml"
        ;;
    prod)
        configFileName="config-prod.yaml"
        ;;
    test)
        configFileName="config-test.yaml"
        ;;
    dev)
        configFileName="config-dev.yaml"
        ;;
    *)
        configFileName="config.yaml"
        ;;
esac

if [ ! -e "/usr/local/apisix/conf/${configFileName}" ]; then
    configFileName="config.yaml"
fi

# 拉取代码
git -C /opt/middleware/gateway pull

# 对比
/bin/bash /root/diff.sh /opt/middleware/gateway/apisix/plugins /usr/local/apisix/apisix/plugins
/bin/bash /root/diff.sh /opt/middleware/gateway/apisix/plugins/utils /usr/local/apisix/apisix/plugins/utils
# /bin/bash /root/diff.sh /opt/middleware/gateway/apisix/resty /usr/local/apisix/deps/share/lua/5.1/resty
diff "/opt/middleware/gateway/apisix/conf/${configFileName}" "/usr/local/apisix/conf/${configFileName}"

# 同步
read -p "请仔细检查以上内容,是否更新? [Y/n] : " yn
[ -z "${yn}" ] && yn="y"
if [[ $yn == [Yy] ]]; then
    /bin/bash /root/sync.sh /opt/middleware/gateway/apisix/plugins /usr/local/apisix/apisix/plugins
    # /bin/bash /root/sync.sh /opt/middleware/gateway/apisix/resty /usr/local/apisix/deps/share/lua/5.1/resty
    cp -v "/opt/middleware/gateway/apisix/conf/${configFileName}" "/usr/local/apisix/conf/config.yaml"
fi

###############################################################

# 对比脚本
# /root/diff.sh
###############################################################
#!/bin/bash

tip() {
    echo -e "${1}"
}

blankLine() {
    echo ""
}

block() {
    tip "\033[44m$1\033[0m"
}

tipBlock() {
    block "date "+%Y-%m-%d %H:%M:%S" ${c} [NOTICE] $1"
}

tipNotic() {
    tip "date "+%Y-%m-%d %H:%M:%S" [NOTICE] $1"
}

tipWarning() {
    tip "\033[33mdate "+%Y-%m-%d %H:%M:%S" [WARNING] $1\033[0m"
}

tipError() {
    tip "\033[31mdate "+%Y-%m-%d %H:%M:%S" [ERROR] $1\033[0m"
}

tipCode() {
    tip ">> $1"
}

md5file() {
    filename=$1
    vhash=""

    if [ -f ${filename} ]; then
        if [ -e "md5" ]; then
            vhash=md5 ${filename} | awk -F '=' '{print $2}' | tr -d '[:space:]'
        else
            vhash=md5sum ${filename} | awk -F ' ' '{print $1}' | tr -d '[:space:]'
        fi
    fi

    echo ${vhash}
}

syncFile() {
    sourceFile=$1
    targetFile=$2

    sourceHash=md5file ${sourceFile}
    targetHash=md5file ${targetFile}

    tipNotic " -> [source] ${sourceHash} ${sourceFile} (Before)"
    tipNotic " -> [target] ${targetHash} ${targetFile} (Before)"

    if [ "${sourceHash}" != "${targetHash}" ]; then
        tipNotic "Syncing ${sourceFile}..."
        cp ${sourceFile} ${targetFile}
        tipNotic "Done!"

        sourceHash=md5file ${sourceFile}
        targetHash=md5file ${targetFile}

        tipNotic " -> [source] ${sourceHash} ${sourceFile} (After)"
        tipNotic " -> [target] ${targetHash} ${targetFile} (After)"

        if [ "${sourceHash}" != "${targetHash}" ]; then
            tipError "Sync Exception!"
        fi
    fi
}

syncDir() {
    local sourcePath=$1
    local targetPath=$2
    local syncdeep=$3

    if [ "${syncdeep}" == "" ]; then
        syncdeep=1
    else
        syncdeep=expr ${syncdeep} + 1
    fi

    if [ ${syncdeep} -gt 3 ]; then
        tipWarning "Stoped: the synchronization directory exceeds 3 levels"
        return
    fi

    local sourceFilesOrPaths=$(ls ${sourcePath})

    tipNotic "Syncing Dir: ${sourcePath}"

    for sourceFileOrPath in ${sourceFilesOrPaths[@]}; do
        local sourceName="${sourcePath}/${sourceFileOrPath}"

        if [ -f "${sourceName}" ]; then
            syncFile ${sourceName} ${targetPath}/${sourceFileOrPath}
            blankLine
            continue
        fi

        if [ -d "${sourceName}" ] && [ "${sourceName}" != "." ] && [ "${sourceName}" != '..' ]; then
            mkdir -p "${targetPath}/${sourceFileOrPath}"
            syncDir "${sourceName}" "${targetPath}/${sourceFileOrPath}" ${syncdeep}
            continue
        fi
    done

    tipNotic "Done!"
}

diffFile() {
    sourceFile=$1
    targetFile=$2

    sourceHash=md5file ${sourceFile}
    targetHash=md5file ${targetFile}

    tipNotic " -> [source] ${sourceHash} ${sourceFile}"
    tipNotic " -> [target] ${targetHash} ${targetFile}"

    if [ "${sourceHash}" != "${targetHash}" ]; then
        tipError "Diff Exception!"

        blankLine
        tipNotic "+---------------------------"
        diff ${sourceFile} ${targetFile}
        tipNotic "+---------------------------"
        blankLine
    fi
}

diffFileWithPath() {
    sourcePath=$1

    if [ ! -d "${sourcePath}" ]; then
        tipError "Diff Exception: ${sourcePath} not found!"
        return
    fi

    targetPath=$2

    if [ ! -d "${targetPath}" ]; then
        tipError "Diff Exception: ${targetPath} not found!"
        return
    fi

    sourceFiles=$(ls ${sourcePath})

    tipNotic "Diffing ${sourcePath}: "

    for sourceFileName in ${sourceFiles[@]}; do
        sourceFile="${sourcePath}/${sourceFileName}"

        if [ ! -f "${sourceFile}" ]; then
            continue
        fi

        diffFile ${sourceFile} "${targetPath}/${sourceFileName}"
        blankLine
    done

    tipNotic "Done!"
    blankLine
}

diffFileWithPath "${1}" "${2}"

###############################################################

# 同步脚本
# /root/sync.sh
###############################################################
#!/bin/bash

tip() {
    echo -e "${1}"
}

blankLine() {
    echo ""
}

block() {
    tip "\033[44m$1\033[0m"
}

tipBlock() {
    block "date "+%Y-%m-%d %H:%M:%S" ${c} [NOTICE] $1"
}

tipNotic() {
    tip "date "+%Y-%m-%d %H:%M:%S" [NOTICE] $1"
}

tipWarning() {
    tip "\033[33mdate "+%Y-%m-%d %H:%M:%S" [WARNING] $1\033[0m"
}

tipError() {
    tip "\033[31mdate "+%Y-%m-%d %H:%M:%S" [ERROR] $1\033[0m"
}

tipCode() {
    tip ">> $1"
}

md5file() {
    filename=$1
    vhash=""

    if [ -f ${filename} ]; then
        if [ -e "md5" ]; then
            vhash=md5 ${filename} | awk -F '=' '{print $2}' | tr -d '[:space:]'
        else
            vhash=md5sum ${filename} | awk -F ' ' '{print $1}' | tr -d '[:space:]'
        fi
    fi

    echo ${vhash}
}

syncFile() {
    sourceFile=$1
    targetFile=$2

    sourceHash=md5file ${sourceFile}
    targetHash=md5file ${targetFile}

    tipNotic " -> [source] ${sourceHash} ${sourceFile} (Before)"
    tipNotic " -> [target] ${targetHash} ${targetFile} (Before)"

    if [ "${sourceHash}" != "${targetHash}" ]; then
        tipNotic "Syncing ${sourceFile}..."
        cp ${sourceFile} ${targetFile}
        tipNotic "Done!"

        sourceHash=md5file ${sourceFile}
        targetHash=md5file ${targetFile}

        tipNotic " -> [source] ${sourceHash} ${sourceFile} (After)"
        tipNotic " -> [target] ${targetHash} ${targetFile} (After)"

        if [ "${sourceHash}" != "${targetHash}" ]; then
            tipError "Sync Exception!"
        fi
    fi
}

syncDir() {
    local sourcePath=$1
    local targetPath=$2
    local syncdeep=$3

    if [ "${syncdeep}" == "" ]; then
        syncdeep=1
    else
        syncdeep=expr ${syncdeep} + 1
    fi

    if [ ${syncdeep} -gt 3 ]; then
        tipWarning "Stoped: the synchronization directory exceeds 3 levels"
        return
    fi

    local sourceFilesOrPaths=$(ls ${sourcePath})

    tipNotic "Syncing Dir: ${sourcePath}"

    for sourceFileOrPath in ${sourceFilesOrPaths[@]}; do
        local sourceName="${sourcePath}/${sourceFileOrPath}"

        if [ -f "${sourceName}" ]; then
            syncFile ${sourceName} ${targetPath}/${sourceFileOrPath}
            blankLine
            continue
        fi

        if [ -d "${sourceName}" ] && [ "${sourceName}" != "." ] && [ "${sourceName}" != '..' ]; then
            mkdir -p "${targetPath}/${sourceFileOrPath}"
            syncDir "${sourceName}" "${targetPath}/${sourceFileOrPath}" ${syncdeep}
            continue
        fi
    done

    tipNotic "Done!"
}

diffFile() {
    sourceFile=$1
    targetFile=$2

    sourceHash=md5file ${sourceFile}
    targetHash=md5file ${targetFile}

    tipNotic " -> [source] ${sourceHash} ${sourceFile}"
    tipNotic " -> [target] ${targetHash} ${targetFile}"

    if [ "${sourceHash}" != "${targetHash}" ]; then
        tipError "Diff Exception!"

        blankLine
        tipNotic "+---------------------------"
        diff ${sourceFile} ${targetFile}
        tipNotic "+---------------------------"
        blankLine
    fi
}

syncDir "${1}" "${2}"

###############################################################

iii. 网关切换 apisix 节点

a. 场景
  • 更新 apisix 节点的插件或配置时,采取逐台更新的方式,更新一台后观察一段时间,没有问题再更新下一台
  • 升级节点硬件配置时
c. 后续节点增多考虑使用 nlb 方便切换节点权重