diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..ec0fb1a --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,5 @@ +# Default ignored files +/workspace.xml +/misc.xml +/modules.xml +/vcs.xml \ No newline at end of file diff --git a/.idea/etcd.iml b/.idea/etcd.iml new file mode 100644 index 0000000..d6ebd48 --- /dev/null +++ b/.idea/etcd.iml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..73958d7 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,15 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..6649a8c --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..78e162f --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/README.md b/README.md index 8bb88a0..e43c50e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@

Logo

-

etcd on QingCloud

+

etcd on QingCloud

- [Product Page](https://appcenter.qingcloud.com/apps/app-fdyvu2wk) diff --git a/ansible/.gitignore b/ansible/.gitignore index a96e14c..270c282 100644 --- a/ansible/.gitignore +++ b/ansible/.gitignore @@ -1,2 +1,3 @@ /files/tmp /make.retry + diff --git a/ansible/files/etc/confd/conf.d/make.sh.toml b/ansible/files/etc/confd/conf.d/make.sh.toml index d107c17..a1f00fe 100644 --- a/ansible/files/etc/confd/conf.d/make.sh.toml +++ b/ansible/files/etc/confd/conf.d/make.sh.toml @@ -5,5 +5,5 @@ mode = "0700" keys = [ "/", ] -reload_cmd = "/opt/app/bin/make.sh; /opt/app/bin/ctl.sh update" +reload_cmd = "/opt/app/bin/make.sh; /opt/app/bin/ctl.sh configureDomainName; /opt/app/bin/ctl.sh update" diff --git a/ansible/files/etc/confd/conf.d/nodeexporter.sh.toml b/ansible/files/etc/confd/conf.d/nodeexporter.sh.toml new file mode 100644 index 0000000..ea6b810 --- /dev/null +++ b/ansible/files/etc/confd/conf.d/nodeexporter.sh.toml @@ -0,0 +1,8 @@ +[template] +src = "nodeexporter.sh.tmpl" +dest = "/opt/app/bin/nodeexporter.sh" +mode = "0700" +keys = [ + "/", +] +reload_cmd = "/opt/app/bin/ctl.sh updateNodeexporterServer" diff --git a/ansible/files/etc/confd/conf.d/tls.sh.toml b/ansible/files/etc/confd/conf.d/tls.sh.toml new file mode 100644 index 0000000..e3d0ea2 --- /dev/null +++ b/ansible/files/etc/confd/conf.d/tls.sh.toml @@ -0,0 +1,9 @@ +[template] +src = "tls.sh.tmpl" +dest = "/opt/app/bin/tls.sh" +mode = "0700" +keys = [ + "/", +] + + diff --git a/ansible/files/etc/confd/templates/01.header.sh.tmpl b/ansible/files/etc/confd/templates/01.header.sh.tmpl index 8946c27..d2c1784 100644 --- a/ansible/files/etc/confd/templates/01.header.sh.tmpl +++ b/ansible/files/etc/confd/templates/01.header.sh.tmpl @@ -6,10 +6,15 @@ set -e {{- $myRole := replace (getv "/host/role") "_" "-" -1 }} {{- $mySid := getv "/host/sid" }} {{- $myIp := getv "/host/ip" }} +{{- $clusterDNS := getv "/env/cluster_DNS" ".etcdsvc.common" }} + {{- $addedInstances := ls "/adding-hosts/etcd_node" }} {{- $deletedInstances := ls "/deleting-hosts/etcd_node" }} +{{- $addedProxyInstances := ls "/adding-hosts/etcd_proxy" }} +{{- $deletedProxyInstances := ls "/deleting-hosts/etcd_proxy" }} + {{- $isAdded := eq (len ($addedInstances | filter (getv "/host/instance_id"))) 1 }} {{- $isDeleted := eq (len ($deletedInstances | filter (getv "/host/instance_id"))) 1 }} @@ -28,8 +33,14 @@ map() { done } +{{- $enableTLS := getv "/env/enable_TLS" }} + buildEndpoint() { - echo -n http://${1#*=}:2379 + {{- if eq "true" $enableTLS }} + echo -n https://${1#*=}:2379 + {{- else }} + echo -n http://${1#*=}:2379 + {{- end }} } allNodes="$(sort -V - << ALL_NODES_EOF @@ -38,6 +49,14 @@ allNodes="$(sort -V - << ALL_NODES_EOF {{- end }} ALL_NODES_EOF )" +allNodesDomain="$(sort -V - << ALL_NODES_EOF +{{- range ls "/hosts/etcd_node" }} +{{ getv (printf "/hosts/etcd_node/%s/sid" .) }}=etcd{{ getv (printf "/hosts/etcd_node/%s/sid" .) }}{{ $clusterDNS }} +{{- end }} +ALL_NODES_EOF +)" + + stableNodes="$(sort -V - << STABLE_NODES_EOF {{- range ls "/hosts/etcd_node" }} @@ -51,3 +70,41 @@ STABLE_NODES_EOF )" stableEndpointLines=$(map buildEndpoint "$stableNodes") stableEndpoints=$(echo $stableEndpointLines | tr " " ",") + + +stableNodesDomainName="$(sort -V - << STABLE_NODES_EOF +{{- range ls "/hosts/etcd_node" }} + {{- if not (len ($addedInstances | filter .)) }} + {{- if not (len ($deletedInstances | filter .)) }} + {{ getv (printf "/hosts/etcd_node/%s/sid" .) }}=etcd{{ getv (printf "/hosts/etcd_node/%s/sid" .) }}{{ $clusterDNS }} + {{- end }} + {{- end }} +{{- end }} +STABLE_NODES_EOF +)" + + +hostsDomainName="$(sort -V - << STABLE_NODES_EOF +{{- range ls "/hosts/etcd_node" }} + {{- if not (len ($addedInstances | filter .)) }} + {{- if not (len ($deletedInstances | filter .)) }} + {{ getv (printf "/hosts/etcd_node/%s/ip" .) }} etcd{{ getv (printf "/hosts/etcd_node/%s/sid" .) }}{{ $clusterDNS }} + {{- end }} + {{- end }} +{{- end }} +STABLE_NODES_EOF +)" + + +proxyHostsDomainName="$(sort -V - << STABLE_NODES_EOF +{{- range ls "/hosts/etcd_proxy" }} + {{ getv (printf "/hosts/etcd_proxy/%s/ip" .) }} proxy{{ getv (printf "/hosts/etcd_proxy/%s/sid" .) }}{{ $clusterDNS }} +{{- end }} +STABLE_NODES_EOF +)" + + + + + + diff --git a/ansible/files/etc/confd/templates/02.app.env.tmpl b/ansible/files/etc/confd/templates/02.app.env.tmpl index 2a9e60c..82c368d 100644 --- a/ansible/files/etc/confd/templates/02.app.env.tmpl +++ b/ansible/files/etc/confd/templates/02.app.env.tmpl @@ -5,26 +5,65 @@ addedNodes="$(sort -V - << ADDED_NODES_ASC_EOF {{- end }} ADDED_NODES_ASC_EOF )" +addedNodesDomain="$(sort -V - << ADDED_NODES_ASC_EOF +{{- range $addedInstances }} +{{ getv (printf "/adding-hosts/etcd_node/%s/sid" .) }}=etcd{{ getv (printf "/hosts/etcd_node/%s/sid" .) }}{{ $clusterDNS }} +{{- end }} +ADDED_NODES_ASC_EOF +)" +addedNodesHostsDomainName="$(sort -V - << ADDED_NODES_ASC_EOF +{{- range $addedInstances }} +{{ getv (printf "/adding-hosts/etcd_node/%s/ip" .) }} etcd{{ getv (printf "/hosts/etcd_node/%s/sid" .) }}{{ $clusterDNS }} +{{- end }} +ADDED_NODES_ASC_EOF +)" deletedNodes="$(sort -Vr - << DELETED_NODES_DESC_EOF {{- range $deletedInstances }} {{ getv (printf "/deleting-hosts/etcd_node/%s/sid" .) }}={{ getv (printf "/deleting-hosts/etcd_node/%s/ip" .) }} {{- end }} DELETED_NODES_DESC_EOF )" +deletedNodesDomain="$(sort -Vr - << DELETED_NODES_DESC_EOF +{{- range $deletedInstances }} +{{ getv (printf "/deleting-hosts/etcd_node/%s/sid" .) }}=etcd{{ getv (printf "/hosts/etcd_node/%s/sid" .) }}{{ $clusterDNS }} +{{- end }} +DELETED_NODES_DESC_EOF +)" +deletedNodesHostsDomain="$(sort -Vr - << DELETED_NODES_DESC_EOF +{{- range $deletedInstances }} +{{ getv (printf "/deleting-hosts/etcd_node/%s/ip" .) }} etcd{{ getv (printf "/hosts/etcd_node/%s/sid" .) }}{{ $clusterDNS }} +{{- end }} +DELETED_NODES_DESC_EOF +)" {{- end }} changedVariables=$(updateAndCompareFile /opt/app/bin/.env << APP_ENV_FILE CLUSTER_ID={{ getv "/cluster/cluster_id" }} ETCD_COMPACT_INTERVAL={{ getv "/env/etcautocompact" "0" }} ETCD_QUOTA_BYTES={{ getv "/env/etcd.quota.backend.bytes" "2147483648" }} +ETCD_HEARTBEAT_INTERVAL={{ getv "/env/etcdheartbeatinterval" "100" }} +ETCD_ELECTION_TIMEOUT={{ getv "/env/etcdelectiontimeout" "1000" }} +ETCD_AUTO_COMPACTION_MODE="periodic" +# ETCD_MAX_REQUEST_BYTES=1572864 +ETCD_ENABLE_V2="true" +ETCD_ENABLE_TLS={{ $enableTLS }} MY_ROLE={{ $myRole }} MY_SID={{ $mySid }} MY_IP={{ getv "/host/ip" }} +ETCD_CLUSTER_DNS={{ $clusterDNS }} STABLE_NODES="$(echo $stableNodes)" +STABLE_NODES_DOMAIN_NAME="$(echo $stableNodesDomainName)" +HOSTS_DOMAIN_NAME="$(echo $hostsDomainName)" +PROXY_HOSTS_DOMAIN_NAME="$(echo $proxyHostsDomainName)" {{- if eq $myRole "etcd-node" }} ADDED_NODES="$(echo $addedNodes)" +ADDED_NODES_DOMAIN="$(echo $addedNodesDomain)" +ADDED_NODES_HOSTS_DOMAIN="$(echo $addedNodesHostsDomainName)" DELETED_NODES="$(echo $deletedNodes)" +DELETED_NODES_DOMAIN="$(echo $deletedNodesDomain)" +DELETED_NODES_HOSTS_DOMAIN="$(echo $deletedNodesHostsDomain)" ALL_NODES="$(echo $allNodes)" +ALL_NODES_DOMAIN="$(echo $allNodesDomain)" IS_ADDED={{ $isAdded }} IS_DELETED={{ $isDeleted }} {{- end }} diff --git a/ansible/files/etc/confd/templates/nodeexporter.sh.tmpl b/ansible/files/etc/confd/templates/nodeexporter.sh.tmpl new file mode 100644 index 0000000..b150ff3 --- /dev/null +++ b/ansible/files/etc/confd/templates/nodeexporter.sh.tmpl @@ -0,0 +1 @@ +NODE_EXPORTER={{ getv "/env/nodeexporter" "false" }} \ No newline at end of file diff --git a/ansible/files/etc/confd/templates/tls.sh.tmpl b/ansible/files/etc/confd/templates/tls.sh.tmpl new file mode 100644 index 0000000..40cf78f --- /dev/null +++ b/ansible/files/etc/confd/templates/tls.sh.tmpl @@ -0,0 +1 @@ +ENABLE_TLS={{ getv "/env/enable_TLS" "false" }} diff --git a/ansible/files/opt/app/bin/common.sh b/ansible/files/opt/app/bin/common.sh index 75a0a84..77c98a2 100644 --- a/ansible/files/opt/app/bin/common.sh +++ b/ansible/files/opt/app/bin/common.sh @@ -16,9 +16,12 @@ EC_MEMBER_EXISTS=15 # scale: member still exists EC_REPAIR_ILLEGAL_NODE=16 # repair: source node is outside cluster EC_UNHEALTHY=17 # check: cluster is unhealthy EC_NO_MEMBER_ID=18 # member: failed to find ID +EC_NO_CA=19 # ca: failed to CA +EC_REPAIR_FAILED=20 # repair: failed to repair +EC_REPAIR_IP_FAILED=21 # repair: Normal node input error or Abnormal node input error workingDir=/var/lib/etcd -appctlDir=$workingDir/appctl +appctlDir=$workingDir/appctl # Log Dir log() { logger -t $MY_ROLE.appctl --id=$$ [cmd=$command role=$MY_ROLE] "$@" || return $EC_LOGGING diff --git a/ansible/files/opt/app/bin/ctl.sh b/ansible/files/opt/app/bin/ctl.sh index af67630..4022ddc 100644 --- a/ansible/files/opt/app/bin/ctl.sh +++ b/ansible/files/opt/app/bin/ctl.sh @@ -6,25 +6,232 @@ set -e . /opt/app/bin/changes.env . /opt/app/bin/common.sh . /opt/app/bin/etcdutils.sh +. /opt/app/bin/nodeexporter.sh +. /opt/app/bin/tls.sh command=$1 args="${@:2}" - +etcdVersion=v3.4.16 check() { - if [ "$MY_ROLE" = "etcd-node" ]; then - [ "$(curl -s $(buildClientUrls)/health | jq -r '.health')" = "true" ] + if [ $ENABLE_TLS = "true" ]; then + if [ "$MY_ROLE" = "etcd-node" ]; then + [ "$(curl -s --cacert /var/lib/etcd/ssl/etcd/ca.pem --cert /var/lib/etcd/ssl/etcd/client.pem --key /var/lib/etcd/ssl/etcd/client-key.pem $(buildClientDomainUrls)/health | jq -r '.health')" = "true" ] + else + nc -z -w3 $MY_IP 2379 + fi else - nc -z -w3 $MY_IP 2379 + if [ "$MY_ROLE" = "etcd-node" ]; then + [ "$(curl -s $(buildClientUrls)/health | jq -r '.health')" = "true" ] + else + nc -z -w3 $MY_IP 2379 + fi fi } + +updateNodeexporterServer(){ + + if [ $NODE_EXPORTER = "true" ] ;then + log "NODE_EXPORTER service is preparing to start" + systemctl start node_exporter + if [ $? -eq 0 ] ;then + log "NODE_EXPORTER service start success" + fi + else + log "NODE_EXPORTER service is preparing to stop" + systemctl stop node_exporter + if [ $? -eq 0 ] ;then + log "NODE_EXPORTER service stop success" + fi + fi +} + +generateCertificate(){ + openTLSAndChangeDomain=$1 + if [ "$openTLSAndChangeDomain" = "true" ] || [ ! -f "/var/lib/etcd/ssl/etcd/ca.pem" ] ;then + local etcd_nodeAllIp='"'`curl -s -m 15 metadata/self/hosts/etcd_node/|grep /ip|grep -v eip|awk '{print $2}'|egrep '([0-9]{1,3}\.){3}[0-9]{1,3}'|sed ':label;N;s/\n/","/;b label'`'"' + echo '{"CN":"CA","key":{"algo":"rsa","size":2048},"ca":{"expiry": "876000h"}}' | /usr/local/bin/cfssl gencert -initca - | /usr/local/bin/cfssljson -bare ca - + echo '{"signing":{"default":{"expiry":"876000h"},"profiles":{"server":{"expiry":"876000h","usages":["signing","key encipherment","server auth","client auth"]},"client":{"expiry":"876000h","usages":["signing","key encipherment","client auth"]},"peer":{"expiry":"876000h","usages":["signing","key encipherment","server auth","client auth"]}}}}' > ca-config.json + export NAME=server + echo '{"CN":"'$NAME'","hosts":["*'${ETCD_CLUSTER_DNS}'"],"key":{"algo":"rsa","size":2048}}' | /usr/local/bin/cfssl gencert -config=ca-config.json -ca=ca.pem -ca-key=ca-key.pem -profile=server - | /usr/local/bin/cfssljson -bare $NAME + export NAME=client + echo '{"CN":"'$NAME'","key":{"algo":"rsa","size":2048}}' | /usr/local/bin/cfssl gencert -config=ca-config.json -profile=client -ca=ca.pem -ca-key=ca-key.pem - | /usr/local/bin/cfssljson -bare $NAME - + export NAME=peer + echo '{"CN":"'$NAME'","hosts":["*'${ETCD_CLUSTER_DNS}'"],"key":{"algo":"rsa","size":2048}}' | /usr/local/bin/cfssl gencert -config=ca-config.json -ca=ca.pem -ca-key=ca-key.pem -profile=peer - | /usr/local/bin/cfssljson -bare $NAME + + + mkdir -p /var/lib/etcd/ssl/etcd/ + cp ./*.pem /var/lib/etcd/ssl/etcd/ + chown -R etcd:etcd /var/lib/etcd/ssl/etcd/ + + allIps=`curl -s -m 15 metadata/self|grep '/ip'|awk '{print $2}'|sort|uniq|egrep '([0-9]{1,3}\.){3}[0-9]{1,3}'` + ipArr=(${allIps// / }) + for ip in ${ipArr[@]} + do + echo "scp cert to ${ip}" + if [ ${ip} != ${MY_IP} ];then + scp -P 16022 -rp /var/lib/etcd/ssl ${ip}:/var/lib/etcd + ssh -p 16022 ${ip} "chown -R etcd:etcd /var/lib/etcd/ssl" + fi + done + + /opt/etcd/current/etcdctl --endpoints=http://metadata:2379 put /clusters/${CLUSTER_ID}/env/etcd_node/caPem "$(base64 <<< cat /var/lib/etcd/ssl/etcd/ca.pem)" + /opt/etcd/current/etcdctl --endpoints=http://metadata:2379 put /clusters/${CLUSTER_ID}/env/etcd_node/clientPem "$(base64 <<< cat /var/lib/etcd/ssl/etcd/client.pem)" + /opt/etcd/current/etcdctl --endpoints=http://metadata:2379 put /clusters/${CLUSTER_ID}/env/etcd_node/clientKeyPem "$(base64 <<< cat /var/lib/etcd/ssl/etcd/client-key.pem)" + + + fi +} + + +configureDomainName(){ +# local isDomain=`cat /etc/hosts|grep "${MY_IP}"` +# if [ "${isDomain}" = "" ]; then +# echo "$HOSTS_DOMAIN_NAME" |awk '{len=split($0,a," ");for(i=1;i<=len;i=i+2) print a[i]"\t"a[i+1] }' >> /etc/hosts +# fi + NODES_HOSTS="$HOSTS_DOMAIN_NAME $PROXY_HOSTS_DOMAIN_NAME" + echo $NODES_HOSTS + if [ "${NODES_HOSTS}" != "" ]; then + count=0 + memberIP="" + for var in $NODES_HOSTS;do + if [ "$count" -eq "0" ] ;then + memberIP=$var + else + sed -i "/${var}$/d" /etc/hosts + echo "$memberIP" "$var" >> /etc/hosts + fi + count=`expr $count + 1` + if [ "$count" -eq "2" ]; then + count=0 + fi + + done + fi + ADDED_NODES_HOSTS="$ADDED_NODES_HOSTS_DOMAIN" + if [ "${ADDED_NODES_HOSTS}" != "" ]; then + count=0 + memberIP="" + for var in $ADDED_NODES_HOSTS;do + if [ "$count" -eq "0" ] ;then + memberIP=$var + else + sed -i "/${var}$/d" /etc/hosts + echo "$memberIP" "$var" >> /etc/hosts + fi + count=`expr $count + 1` + if [ "$count" -eq "2" ]; then + count=0 + fi + done + fi + echo "IS_DELETED value:$IS_DELETED " + if [ "$IS_DELETED" != "true" ];then + #未删除的节点,删除hosts文件中多余的域名与ip的对应关系 + hostsAllDomain=`cat /etc/hosts|grep -e ${ETCD_CLUSTER_DNS}|awk '{print $1 " " $2}'` + DELETED_NODES_HOSTS="$DELETED_NODES_HOSTS_DOMAIN" + if [ "${hostsAllDomain}" != "" ] && [ "${DELETED_NODES_HOSTS}" != "" ]; then + local count=0 + for var in $hostsAllDomain;do + count=`expr $count + 1` + if [ "$count" -eq "2" ]; then + local delNode=`echo ${DELETED_NODES_HOSTS} |grep ${var}` + if [ "${delNode}" != "" ]; then + sed -i "/${var}$/d" /etc/hosts + fi + count=0 + fi + done + fi + fi + + /opt/etcd/current/etcdctl --endpoints=http://metadata:2379 put /clusters/${CLUSTER_ID}/env/etcd_node/hostsDomain "$HOSTS_DOMAIN_NAME $ADDED_NODES_HOSTS_DOMAIN $PROXY_HOSTS_DOMAIN_NAME " +} + + + + init() { + if [ "$MY_ROLE" = "etcd-proxy" ]; then + rm -rf $workingDir/lost+found + mkdir -p $appctlDir + chown -R etcd.etcd $workingDir + fi + +# if [ "$MY_ROLE" = "etcd-node" ] && [ "$IS_ADDED" != "true" ]; then +# updateEtcdAuth +# fi +} + +initCustom(){ + #第一ip执行其他的都不执行,其它等待这个执行完了,再执行start + firstNode=`echo $STABLE_NODES|awk '{print $1}'` + firstNodeSID=`echo ${firstNode%=*}` + firstNodeIP=`echo ${firstNode#*=}` + if [ "$MY_ROLE" = "etcd-node" ] && [ $ENABLE_TLS = "true" ] && [ "$IS_ADDED" != "true" ] && [ $MY_SID = $firstNodeSID ]; then + generateCertificate + fi + + + sleepMaxTime=0 + eixtFlag=0 + num=0 + sleepTime=2 + while : + do + num=`expr ${num} + 1` + echo "This is the ${num} time to check whether there is a certificate" +# caPem=`curl -s metadata/self/env/caPem` +# clientPem=`curl -s metadata/self/env/clientPem` +# clientKeyPem=`curl -s metadata/self/env/clientKeyPem` + caKeyPem="/var/lib/etcd/ssl/etcd/ca-key.pem" + caPem="/var/lib/etcd/ssl/etcd/ca.pem" + clientKeyPem="/var/lib/etcd/ssl/etcd/client-key.pem" + clientPem="/var/lib/etcd/ssl/etcd/client.pem" + peerKeyPem="/var/lib/etcd/ssl/etcd/peer-key.pem" + peerPem="/var/lib/etcd/ssl/etcd/peer.pem" + serverKeyPem="/var/lib/etcd/ssl/etcd/server-key.pem" + serverPem="/var/lib/etcd/ssl/etcd/server.pem" + if [ -f $caKeyPem ] && [ -f $caPem ] && [ -f $clientKeyPem ] && [ -f $clientPem ] && [ -f $peerKeyPem ] && [ -f $peerPem ] && [ -f $serverKeyPem ] && [ -f $serverPem ]; then + echo "ca-key.pem ca.pem client-key.pem client.pem peer-key.pem peer.pem server-key.pem server.pem,their certificates exist" + break + fi + + if [ ${sleepMaxTime} -ge 120 ];then + echo "sleepMaxTime>=120s,exit check Certificate loop " + eixtFlag=19 + break + fi + + sleepMaxTime=`expr ${sleepMaxTime} + ${sleepTime}` + sleep ${sleepTime}s + done + + if [ ${eixtFlag} -ne 0 ];then + echo "Certificate check failed!" + log "Certificate check failed"; return $EC_NO_CA + fi + +} + + +initETCDenv() { + [ "$MY_ROLE" = "etcd-proxy" ] || { rm -rf $workingDir/lost+found mkdir -p $appctlDir chown -R etcd.etcd $workingDir } svc enable + updateNodeexporterServer + + if [ $ENABLE_TLS = "true" ]; then + configureDomainName + fi + + if [ "$MY_ROLE" = "etcd-node" ] && [ $ENABLE_TLS = "true" ] && [ "$IS_ADDED" != "true" ]; then + initCustom + fi } metricsKeys=" @@ -32,48 +239,281 @@ etcd_network_peer_sent_bytes_total etcd_server_has_leader etcd_server_is_leader etcd_server_leader_changes_seen_total -http_requests_total +etcd_server_proposals_failed_total process_resident_memory_bytes process_virtual_memory_bytes " metricsFilter="$(echo $metricsKeys | tr " " "|")" measure() { - local lines=$(curl -s -m 5 $(buildClientUrls)/metrics | grep -E "^($metricsFilter)" | awk '{gsub(/\{[^}]*\}/,""); a[$1]+=$2}; END{for(c in a) printf "^%s^:%s\n", c, a[c]}') + local lines + if [ $ENABLE_TLS = "true" ]; then + lines=$(curl -s -m 5 --cacert /var/lib/etcd/ssl/etcd/ca.pem --cert /var/lib/etcd/ssl/etcd/client.pem --key /var/lib/etcd/ssl/etcd/client-key.pem $(buildClientDomainUrls)/metrics | grep -E "^($metricsFilter)" | awk '{gsub(/\{[^}]*\}/,""); a[$1]+=$2}; END{for(c in a) printf "^%s^:%s\n", c, a[c]}') + else + lines=$(curl -s -m 5 $(buildClientUrls)/metrics | grep -E "^($metricsFilter)" | awk '{gsub(/\{[^}]*\}/,""); a[$1]+=$2}; END{for(c in a) printf "^%s^:%s\n", c, a[c]}') + fi cat << METRICS_EOF {$(echo $lines | tr " " "," | tr "^" '"')} METRICS_EOF } start() { + initETCDenv + log "Etcd service is preparing to start" if [ "$MY_ROLE" = "etcd-node" ] && [ "$IS_ADDED" = "true" ]; then - buildCluster "$ADDED_NODES" + if [ $ENABLE_TLS = "true" ]; then + echo "View current changes.env document content:" `cat /opt/app/bin/changes.env` + echo "View current hosts document content:" `cat /etc/hosts` + echo "View current MY_ROLE vlaue:" $MY_ROLE ",IS_ADDED vlaue:" $IS_ADDED ",ENABLE_TLS vlaue:"$ENABLE_TLS + local isDomain=`cat /etc/hosts|grep "${MY_IP}"` + echo "View current isDomain vlaue:" $isDomain + if [ "${isDomain}" = "" ]; then + allIps=`curl -s -m 15 metadata/self|grep '/ip'|awk '{print $2}'|sort|uniq|egrep '([0-9]{1,3}\.){3}[0-9]{1,3}'` + echo "View current allNodeIps vlaue:" $allIps + ipArr=(${allIps// / }) + for ip in ${ipArr[@]} + do + echo "View current Iterate ip:" $ip + local ret="ssh -p 16022 ${ip} `cat /etc/hosts |grep ${MY_IP}`" + local hasMY_IP=${ret}|awk '{print $7" "$8}' + if [ "${hasMY_IP}" = "" ];then + echo "add starting----" "$ADDED_NODES_HOSTS_DOMAIN" + ssh -p 16022 ${ip} `echo "$ADDED_NODES_HOSTS_DOMAIN" |awk '{len=split($0,a," ");for(i=1;i<=len;i=i+2) print a[i]"\t"a[i+1] }' >> /etc/hosts` + local catRet="ssh -p 16022 ${ip} `cat /etc/hosts`" + echo "View ip:" ${ip} "Node's /etc/hosts content:" ${catRet} + echo "add ending----" "$ADDED_NODES_HOSTS_DOMAIN" + fi + done + fi + echo "Need ADDED NODES HOSTS DOMAIN:" $ADDED_NODES_HOSTS_DOMAIN + echo "View current ip:"${MY_IP} "Node's /etc/hosts content:" `cat /etc/hosts` + #去复制以前节点的证书 + if [ ! -f "/var/lib/etcd/ssl/etcd/ca.pem" ];then + local ip=`echo "$HOSTS_DOMAIN_NAME"|awk '{print $1}'` + echo "Copy all certificate from " ${ip} + scp -P 16022 -rp ${ip}:/var/lib/etcd/ssl /var/lib/etcd + chown -R etcd:etcd /var/lib/etcd/ssl + fi + echo "Need ADDED NODES DOMAIN:" $ADDED_NODES_DOMAIN + buildCluster "$ADDED_NODES_DOMAIN" + else + buildCluster "$ADDED_NODES" + fi else - prepareEtcdConfig + prepareEtcdConfig + if [ "$MY_ROLE" = "etcd-node" ];then + chown -R etcd.etcd $workingDir #升级时不会调用initETCDenv所以在这里重新执行 + fi svc start fi } stop() { + log "Etcd service is asked to stop ." svc stop } -destroy() { - for node in $DELETED_NODES; do - local member="$(buildMember $node)" - if [ "${node%=*}" != "$MY_SID" ]; then - log "Waiting member $member to be removed ..." - retry 200 1 checkMemberRemoved $member - else - local memberId="$(retry 10 1 findMemberId $MY_IP)" - [ -n "$memberId" ] || return $EC_NO_MEMBER_ID - log "Removing myself [$member] with ID [$memberId] from cluster ..." - # This may fail some times until the cluster gets healthy again after removed some other members. - retry 200 1 removeMember $memberId - retry 10 1 checkStopped - stop - break +openTLS(){ + firstNode=`echo $STABLE_NODES|awk '{print $1}'` + firstNodeSID=`echo ${firstNode%=*}` + firstNodeIP=`echo ${firstNode#*=}` + echo "firstNodeSID:$firstNodeSID" ",firstNodeIP:$firstNodeIP" + etcdctlTemp="" +# if [ $ETCDAUTH = "false" ] ;then + etcdctlTemp="/opt/etcd/current/etcdctl" +# else +# etcdctlTemp="/opt/etcd/current/etcdctl --user="${ETCDDEFAULTUSER}:${ETCDDEFAULTPASSWD} +# fi + + if [ $ENABLE_TLS = "true" ]; then + configureDomainName + if [ $MY_SID = $firstNodeSID ]; then + generateCertificate $1 + allnodelist=`${etcdctlTemp} --endpoints=http://${firstNodeIP}:2379 member list |awk '{print $1,$3}'|sed s/,/""/g` + local count=0 + local memberId="" + set +e + for var in $allnodelist;do + if [ "$count" -eq "0" ] ;then + memberId=$var + else + `${etcdctlTemp} \ + --cert=/var/lib/etcd/ssl/etcd/client.pem \ + --key=/var/lib/etcd/ssl/etcd/client-key.pem \ + --cacert=/var/lib/etcd/ssl/etcd/ca.pem \ + --endpoints=http://${firstNodeIP}:2379 member update $memberId --peer-urls="https://${var}${ETCD_CLUSTER_DNS}:2380"` + fi + count=`expr $count + 1` + if [ "$count" -eq "2" ]; then + count=0 + fi + done + set -e + fi + sleep 20 + #去复制以前节点的证书 + if [ ! -f "/var/lib/etcd/ssl/etcd/ca.pem" ];then + local ip=`echo "$HOSTS_DOMAIN_NAME"|awk '{print $1}'` + echo "Copy all certificate from " ${ip} + scp -P 16022 -rp ${ip}:/var/lib/etcd/ssl /var/lib/etcd + chown -R etcd:etcd /var/lib/etcd/ssl + fi + prepareEtcdConfig + + local allPeerAddrDomain="true" + local sleepMaxTime=0 + while [ ${sleepMaxTime} -le 90 ]; do + set +e + sleepMaxTime=`expr ${sleepMaxTime} + 1` + allPeerNode=`${etcdctlTemp} --endpoints=http://${firstNodeIP}:2379 member list |awk '{print $4}'|sed s/,/""/g` + if [ $? -ne 0 ]; then + echo "openTLS member list view failed: ${sleepMaxTime} times" + sleep 1 + continue + fi + echo "sleepMaxTime: $sleepMaxTime ,current member list info: $allPeerNode" + for peerNode in $allPeerNode;do + changeDomain=`echo $peerNode |grep ${ETCD_CLUSTER_DNS}` + echo "member list PEER ADDRS value:$changeDomain" + if [[ "$changeDomain" = "" ]];then + echo "Not all member addresses have been changed to domain names" + allPeerAddrDomain="false" + break + fi + done + if [ "$allPeerAddrDomain" = "true" ]; then + break + fi + sleep 1 + set -e + done + + systemctl daemon-reload + systemctl stop etcd + systemctl start etcd fi - done + +} + +closeTLS(){ + firstNode=`echo $STABLE_NODES|awk '{print $1}'` + firstNodeSID=`echo ${firstNode%=*}` + firstNodeIP=`echo ${firstNode#*=}` + echo "firstNodeSID:$firstNodeSID" ",firstNodeIP:$firstNodeIP" + etcdctlTemp="" +# if [ $ETCDAUTH = "false" ] ;then + etcdctlTemp="/opt/etcd/current/etcdctl" +# else +# etcdctlTemp="/opt/etcd/current/etcdctl --user="${ETCDDEFAULTUSER}:${ETCDDEFAULTPASSWD} +# fi + + if [ $ENABLE_TLS = "false" ]; then + echo "close TLS starting!" + if [ $MY_SID = $firstNodeSID ]; then + allnodelist=`${etcdctlTemp} \ + --cert=/var/lib/etcd/ssl/etcd/client.pem \ + --key=/var/lib/etcd/ssl/etcd/client-key.pem \ + --cacert=/var/lib/etcd/ssl/etcd/ca.pem \ + --endpoints=https://etcd${firstNodeSID}${ETCD_CLUSTER_DNS}:2379 member list |awk '{print $1,$3}'|sed s/,/""/g` + local count=0 + local memberId="" + set +e + for var in $allnodelist;do + if [ "$count" -eq "0" ] ;then + memberId=$var + else + local ip=`cat /etc/hosts|grep $var|awk '{print $1}'` + `${etcdctlTemp} \ + --cert=/var/lib/etcd/ssl/etcd/client.pem \ + --key=/var/lib/etcd/ssl/etcd/client-key.pem \ + --cacert=/var/lib/etcd/ssl/etcd/ca.pem \ + --endpoints=https://${var}${ETCD_CLUSTER_DNS}:2379 member update $memberId --peer-urls="http://${ip}:2380"` + fi + count=`expr $count + 1` + if [ "$count" -eq "2" ]; then + count=0 + fi + done + set -e + fi + echo "close TLS end!" + sleep 20 + prepareEtcdConfig + + local allPeerAddrIP="true" + local sleepMaxTime=0 + while [ ${sleepMaxTime} -le 90 ]; do + set +e + sleepMaxTime=`expr ${sleepMaxTime} + 1` + allnodelist=`${etcdctlTemp} \ + --cert=/var/lib/etcd/ssl/etcd/client.pem \ + --key=/var/lib/etcd/ssl/etcd/client-key.pem \ + --cacert=/var/lib/etcd/ssl/etcd/ca.pem \ + --endpoints=https://etcd${firstNodeSID}${ETCD_CLUSTER_DNS}:2379 member list |awk '{print $4}'|sed s/,/""/g` + if [ $? -ne 0 ]; then + echo "closeTLS member list view failed: ${sleepMaxTime} times" + sleep 1 + continue + fi + echo "sleepMaxTime: $sleepMaxTime ,current member list info: $allnodelist" + for peerNode in $allnodelist;do + changeIP=`echo $peerNode |grep ${ETCD_CLUSTER_DNS}` + echo "member list PEER ADDRS value:$changeIP" + if [[ "$changeIP" != "" ]];then + echo "Not all member addresses have been changed to ip" + allPeerAddrIP="false" + break + fi + done + if [ "$allPeerAddrIP" = "true" ]; then + break + fi + sleep 1 + set -e + done + + systemctl daemon-reload + systemctl stop etcd + systemctl start etcd + fi + +} + +destroy() { + if [ $ENABLE_TLS = "true" ]; then + for node in $DELETED_NODES_DOMAIN; do + local member="$(buildMember $node)" + if [ "${node%=*}" != "$MY_SID" ]; then + log "Waiting member $member to be removed ..." + retry 200 1 checkMemberRemoved $member + else + local memberId="$(retry 10 1 findMemberId etcd${MY_SID}${ETCD_CLUSTER_DNS})" + [ -n "$memberId" ] || return $EC_NO_MEMBER_ID + log "Removing myself [$member] with ID [$memberId] from cluster ..." + # This may fail some times until the cluster gets healthy again after removed some other members. + retry 200 1 removeMember $memberId + retry 10 1 checkStopped + stop + break + fi + done + else + for node in $DELETED_NODES; do + local member="$(buildMember $node)" + if [ "${node%=*}" != "$MY_SID" ]; then + log "Waiting member $member to be removed ..." + retry 200 1 checkMemberRemoved $member + else + local memberId="$(retry 10 1 findMemberId $MY_IP)" + [ -n "$memberId" ] || return $EC_NO_MEMBER_ID + log "Removing myself [$member] with ID [$memberId] from cluster ..." + # This may fail some times until the cluster gets healthy again after removed some other members. + retry 200 1 removeMember $memberId + retry 10 1 checkStopped + stop + break + fi + done + fi } v2BackupDir=$workingDir/v2.backup @@ -95,42 +535,256 @@ backup() { } restore() { - rm -rf $etcdDataDir && sleep 1 && init + snapshot_id=$(echo "$@" | jq -r '."snapshot_id"') + echo "snapshot_id value:$snapshot_id" + if [ "$snapshot_id" != "" ];then + rm -rf /var/lib/etcd/ssl + fi + rm -rf $etcdDataDir && sleep 1 && initETCDenv prepareEtcdConfig if [ -f "$v3BackupFile" ]; then restoreSnap $v3BackupFile svc start else - local firstNode=${ALL_NODES%% *} - local firstNodeIp=${firstNode#*=} - if [ "$firstNodeIp" = "$MY_IP" ]; then - [ -d "$v2BackupDir" ] || return $EC_RESTORE_NO_DB - log "Restoring v2 on first node ..." - mv $v2BackupDir $etcdDataDir - log "Starting etcd restore service ..." - systemctl start etcd-standalone - log "Updating my peer url ..." - local myMemberId - myMemberId=$(findMemberId localhost $MY_IP) - retry 10 1 etcdctl --endpoints=$(buildClientUrls) member update $myMemberId --peer-urls=$(buildMemberUrls) || { - systemctl stop etcd-standalone - return $EC_RESTORE_ERROR - } - log "Stopping etcd restore service ..." - systemctl stop etcd-standalone + if [ $ENABLE_TLS = "true" ]; then + local firstNode=${ALL_NODES_DOMAIN%% *} + local firstNodeIp=${firstNode#*=} + if [ "$firstNodeIp" = "etcd${MY_SID}${ETCD_CLUSTER_DNS}" ]; then + [ -d "$v2BackupDir" ] || return $EC_RESTORE_NO_DB + log "Restoring v2 on first node ..." + mv $v2BackupDir $etcdDataDir + log "Starting etcd restore service ..." + systemctl start etcd-standalone + log "Updating my peer url ..." + sleep 3 #是为了等待etcd-standalone启动起来 + local myMemberId + myMemberId=$(findMemberId localhost etcd${MY_SID}${ETCD_CLUSTER_DNS}) + retry 10 1 etcdctl member update $myMemberId --peer-urls=$(buildMemberDomainUrls) || { + systemctl stop etcd-standalone + return $EC_RESTORE_ERROR + } + log "Stopping etcd restore service ..." + systemctl stop etcd-standalone + fi + buildCluster "$ALL_NODES_DOMAIN" + else + local firstNode=${ALL_NODES%% *} + local firstNodeIp=${firstNode#*=} + if [ "$firstNodeIp" = "$MY_IP" ]; then + [ -d "$v2BackupDir" ] || return $EC_RESTORE_NO_DB + log "Restoring v2 on first node ..." + mv $v2BackupDir $etcdDataDir + log "Starting etcd restore service ..." + systemctl start etcd-standalone + log "Updating my peer url ..." + sleep 3 #是为了等待etcd-standalone启动起来 + local myMemberId + myMemberId=$(findMemberId localhost $MY_IP) + retry 10 1 etcdctl member update $myMemberId --peer-urls=$(buildMemberUrls) || { + systemctl stop etcd-standalone + return $EC_RESTORE_ERROR + } + log "Stopping etcd restore service ..." + systemctl stop etcd-standalone + fi + buildCluster "$ALL_NODES" fi - buildCluster "$ALL_NODES" fi } restart() { + log "Etcd service is asked to restart ." stop && start } +upgrade() { + # 先升级至当前次版本号对应的最新修订版本号以规避升级bug,后升级至目标版本 + initETCDenv + + log "Etcd service is prepared to upgrade to $etcdVersion" + local sleepMaxTime=0 + while : + do +# curl -L $(buildClientUrls)/version >>/root/a.txt || echo + check && break || echo -n + sleepMaxTime=`expr ${sleepMaxTime} + 1` + if [ ${sleepMaxTime} -ge 60 ]; then + return -1 + fi + sleep 1s + done + #stop + #rm -rf /opt/etcd/current + #ln -s /opt/etcd/$etcdVersion /opt/etcd/current + + #initETCDenv + #curl -L $(buildClientUrls)/version >>/root/a.txt || echo + #start +} + +changesClusterDNS(){ + ETCD_CLUSTER_DNS_before=$1 + ETCD_CLUSTER_DNS_after=$2 + firstNode=`echo $STABLE_NODES|awk '{print $1}'` + firstNodeSID=`echo ${firstNode%=*}` + firstNodeIP=`echo ${firstNode#*=}` + echo "firstNodeSID:$firstNodeSID" ",firstNodeIP:$firstNodeIP" + etcdctlTemp="" +# if [ $ETCDAUTH = "false" ] ;then + etcdctlTemp="/opt/etcd/current/etcdctl" +# else +# etcdctlTemp="/opt/etcd/current/etcdctl --user="${ETCDDEFAULTUSER}:${ETCDDEFAULTPASSWD} +# fi + if [ "$MY_ROLE" = "etcd-node" ] && [ $ENABLE_TLS = "true" ] && [ "$IS_ADDED" != "true" ] ; then + if [ $MY_SID = $firstNodeSID ]; then + #----生成新证书---- + local etcd_nodeAllIp='"'`curl -s -m 15 metadata/self/hosts/etcd_node/|grep /ip|grep -v eip|awk '{print $2}'|egrep '([0-9]{1,3}\.){3}[0-9]{1,3}'|sed ':label;N;s/\n/","/;b label'`'"' + echo '{"CN":"CA","key":{"algo":"rsa","size":2048},"ca":{"expiry": "876000h"}}' | /usr/local/bin/cfssl gencert -initca - | /usr/local/bin/cfssljson -bare ca - + echo '{"signing":{"default":{"expiry":"876000h"},"profiles":{"server":{"expiry":"876000h","usages":["signing","key encipherment","server auth","client auth"]},"client":{"expiry":"876000h","usages":["signing","key encipherment","client auth"]},"peer":{"expiry":"876000h","usages":["signing","key encipherment","server auth","client auth"]}}}}' > ca-config.json + export NAME=server + echo '{"CN":"'$NAME'","hosts":["*'${ETCD_CLUSTER_DNS_after}'"],"key":{"algo":"rsa","size":2048}}' | /usr/local/bin/cfssl gencert -config=ca-config.json -ca=ca.pem -ca-key=ca-key.pem -profile=server - | /usr/local/bin/cfssljson -bare $NAME + export NAME=client + echo '{"CN":"'$NAME'","key":{"algo":"rsa","size":2048}}' | /usr/local/bin/cfssl gencert -config=ca-config.json -profile=client -ca=ca.pem -ca-key=ca-key.pem - | /usr/local/bin/cfssljson -bare $NAME - + export NAME=peer + echo '{"CN":"'$NAME'","hosts":["*'${ETCD_CLUSTER_DNS_after}'"],"key":{"algo":"rsa","size":2048}}' | /usr/local/bin/cfssl gencert -config=ca-config.json -ca=ca.pem -ca-key=ca-key.pem -profile=peer - | /usr/local/bin/cfssljson -bare $NAME + + mkdir -p /var/lib/etcd/ssl/etcdtemp/ + cp ./*.pem /var/lib/etcd/ssl/etcdtemp/ + chown -R etcd:etcd /var/lib/etcd/ssl/etcdtemp/ + + allIps=`curl -s -m 15 metadata/self|grep '/ip'|awk '{print $2}'|sort|uniq|egrep '([0-9]{1,3}\.){3}[0-9]{1,3}'` + ipArr=(${allIps// / }) + for ip in ${ipArr[@]} + do + echo "scp cert to ${ip}" + if [ ${ip} != ${MY_IP} ];then + scp -P 16022 -rp /var/lib/etcd/ssl/etcdtemp ${ip}:/var/lib/etcd/ssl + ssh -p 16022 ${ip} "chown -R etcd:etcd /var/lib/etcd/ssl" + fi + done + + /opt/etcd/current/etcdctl --endpoints=http://metadata:2379 put /clusters/${CLUSTER_ID}/env/etcd_node/caPem "$(base64 <<< cat /var/lib/etcd/ssl/etcdtemp/ca.pem)" + /opt/etcd/current/etcdctl --endpoints=http://metadata:2379 put /clusters/${CLUSTER_ID}/env/etcd_node/clientPem "$(base64 <<< cat /var/lib/etcd/ssl/etcdtemp/client.pem)" + /opt/etcd/current/etcdctl --endpoints=http://metadata:2379 put /clusters/${CLUSTER_ID}/env/etcd_node/clientKeyPem "$(base64 <<< cat /var/lib/etcd/ssl/etcdtemp/client-key.pem)" + + #----生成新证书结束---- + + #----修改etcd集群信息开始---- + allnodelist=`${etcdctlTemp} \ + --cert=/var/lib/etcd/ssl/etcd/client.pem \ + --key=/var/lib/etcd/ssl/etcd/client-key.pem \ + --cacert=/var/lib/etcd/ssl/etcd/ca.pem \ + --endpoints=https://etcd${firstNodeSID}${ETCD_CLUSTER_DNS_before}:2379 member list |awk '{print $1,$3}'|sed s/,/""/g` + local count=0 + local memberId="" + set +e + for var in $allnodelist;do + if [ "$count" -eq "0" ] ;then + memberId=$var + else + `${etcdctlTemp} \ + --cert=/var/lib/etcd/ssl/etcd/client.pem \ + --key=/var/lib/etcd/ssl/etcd/client-key.pem \ + --cacert=/var/lib/etcd/ssl/etcd/ca.pem \ + --endpoints=https://etcd${firstNodeSID}${ETCD_CLUSTER_DNS_before}:2379 member update $memberId --peer-urls="https://${var}${ETCD_CLUSTER_DNS_after}:2380"` + fi + count=`expr $count + 1` + if [ "$count" -eq "2" ]; then + count=0 + fi + done + set -e + #----修改etcd集群信息结束---- + fi + + sleep 20 + #去复制以前节点的证书 + if [ ! -f "/var/lib/etcd/ssl/etcdtemp/ca.pem" ];then + local ip=`echo "$HOSTS_DOMAIN_NAME"|awk '{print $1}'` + echo "Copy all certificate from " ${ip} + scp -P 16022 -rp ${ip}:/var/lib/etcd/ssl/ /var/lib/etcd/ + chown -R etcd:etcd /var/lib/etcd/ssl + fi + cp -r /var/lib/etcd/ssl/etcdtemp/* /var/lib/etcd/ssl/etcd + prepareEtcdConfig + + local allPeerAddrChanged="true" + local sleepMaxTime=0 + while [ ${sleepMaxTime} -le 90 ]; do + set +e + sleepMaxTime=`expr ${sleepMaxTime} + 1` + allnodelist=`${etcdctlTemp} \ + --cert=/var/lib/etcd/ssl/etcd/client.pem \ + --key=/var/lib/etcd/ssl/etcd/client-key.pem \ + --cacert=/var/lib/etcd/ssl/etcd/ca.pem \ + --endpoints=https://etcd${firstNodeSID}${ETCD_CLUSTER_DNS_before}:2379 member list |awk '{print $4}'|sed s/,/""/g` + if [ $? -ne 0 ]; then + echo "changesClusterDNS member list view failed: ${sleepMaxTime} times" + sleep 1 + continue + fi + echo "sleepMaxTime: $sleepMaxTime ,current member list info: $allnodelist" + for peerNode in $allnodelist;do + changeDomain=`echo $peerNode |grep ${ETCD_CLUSTER_DNS_after}` + echo "member list PEER ADDRS value:$changeDomain" + if [[ "$changeDomain" = "" ]];then + echo "Not all member addresses have been changed to new domain" + allPeerAddrChanged="false" + break + fi + done + if [ "$allPeerAddrChanged" = "true" ]; then + break + fi + set -e + done + + systemctl daemon-reload + systemctl stop etcd + #删除以前的hosts域名配置 + sed -i "/${ETCD_CLUSTER_DNS_before}$/d" /etc/hosts + rm -rf /var/lib/etcd/ssl/etcdtemp + systemctl start etcd + + fi +} + update() { svc is-enabled -q || return 0 - [ "$MY_ROLE" = "etcd-proxy" ] || [[ ,${CHANGED_VARS// /,} =~ ,ETCD_COMPACT_INTERVAL= ]] || [[ ,${CHANGED_VARS// /,} =~ ,ETCD_QUOTA_BYTES= ]] || return 0 - restart + [ "$MY_ROLE" = "etcd-proxy" ] || [[ ,${CHANGED_VARS// /,} =~ ,ETCD_ ]] || return 0 + local closeChangeOpenTLS=$(echo ${CHANGED_VARS} | grep "ETCD_ENABLE_TLS=false ETCD_ENABLE_TLS=true") + local openChangeCloseTLS=$(echo ${CHANGED_VARS} | grep "ETCD_ENABLE_TLS=true ETCD_ENABLE_TLS=false") + local changesClusterDNSTime=$(echo ${CHANGED_VARS}|grep -o "CLUSTER_DNS=*"|wc -l) + if [[ "$closeChangeOpenTLS" != "" ]] && [[ "$changesClusterDNSTime" -ne "2" ]];then + openTLS + elif [[ "$openChangeCloseTLS" != "" ]];then + closeTLS + elif [[ "$changesClusterDNSTime" -eq "2" ]] && [[ "$closeChangeOpenTLS" = "" ]];then + changesClusterDNS="" + for var in ${CHANGED_VARS};do + echo $var + if [ ${var%=*} = "ETCD_CLUSTER_DNS" ];then + changesClusterDNS="$changesClusterDNS ${var#*=}" + fi + done + echo $changesClusterDNS + changesClusterDNS $changesClusterDNS + elif [[ "$changesClusterDNSTime" -eq "2" ]] && [[ "$closeChangeOpenTLS" != "" ]];then + openTLSAndChangeDomain="true" + openTLS $openTLSAndChangeDomain + #删除以前hosts域名配置 + ClusterDNSBefore="" + for var in ${CHANGED_VARS};do + echo $var + if [ ${var%=*} = "ETCD_CLUSTER_DNS" ];then + ClusterDNSBefore="${var#*=}" + break + fi + done + sed -i "/${ClusterDNSBefore}$/d" /etc/hosts + else + restart + fi } compact() { @@ -152,6 +806,7 @@ ready2Start=$appctlDir/ready2Start repair() { local sourceIp=$(echo "$@" | jq -r '."node.ip"') echo "${ALL_NODES// /:}:" | grep -q "=$sourceIp:" || return $EC_REPAIR_ILLEGAL_NODE + sshPort=$(netstat -tunpl | grep $(ps -ef |grep `which sshd` | grep -v grep | awk '{print $2}') | grep -v tcp6 | awk '{print $4}' | awk -F ':' '{print $2}') if [ "$sourceIp" = "$MY_IP" ]; then backup @@ -159,7 +814,7 @@ repair() { for node in $ALL_NODES; do local ip=${node#*=} log "Notifying node on $ip ..." - [ "$ip" = "$MY_IP" ] || ssh $ip "touch $ready2Copy" + [ "$ip" = "$MY_IP" ] || ssh -p $sshPort $ip "touch $ready2Copy" done stop @@ -172,27 +827,80 @@ repair() { if [ -d "$v2BackupDir" ]; then local firstNode=${ALL_NODES%% *} local firstNodeIp=${firstNode#*=} - [ "$firstNodeIp" = "$MY_IP" ] || scp -r $v2BackupDir $firstNodeIp:$v2BackupDir + [ "$firstNodeIp" = "$MY_IP" ] || scp -P $sshPort -r $v2BackupDir $firstNodeIp:$v2BackupDir else for node in $ALL_NODES; do local ip=${node#*=} - [ "$ip" = "$MY_IP" ] || scp $v3BackupFile $ip:$v3BackupFile + [ "$ip" = "$MY_IP" ] || scp -P $sshPort $v3BackupFile $ip:$v3BackupFile done fi for node in $ALL_NODES; do local ip=${node#*=} - [ "$ip" = "$MY_IP" ] || ssh $ip "touch $ready2Start" + [ "$ip" = "$MY_IP" ] || ssh -p $sshPort $ip "touch $ready2Start" done else retry 20 1 checkFileReady $ready2Copy stop rm -rf $v2BackupDir $v3BackupFile* - ssh $sourceIp "touch $ready2Copy-$MY_IP" + ssh -p $sshPort $sourceIp "touch $ready2Copy-$MY_IP" retry 200 1 checkFileReady $ready2Start fi restore } +repairMinorityNode(){ + healthNodeIP=$(echo "$@" | jq -r '."healthnode.ip"') + unHealthNodeIP=$(echo "$@" | jq -r '."unhealthnode.ip"') + + local unHealthNodeStatus="" + local healthNodeStatus="" + healthNode="http://${healthNodeIP}:2379" + if [ $ENABLE_TLS = "true" ]; then + unHealthNodeStatus="$(curl -s --cacert /var/lib/etcd/ssl/etcd/ca.pem --cert /var/lib/etcd/ssl/etcd/client.pem --key /var/lib/etcd/ssl/etcd/client-key.pem $(buildClientDomainUrls)/health | jq -r '.health')" + IPkey=`curl -s metadata/self|grep $healthNodeIP|awk '{print $1}'|grep -v /env/hostsDomain |grep -v /host/ip |grep -v /cmd` + sid=`curl -s metadata/self|grep ${IPkey%/*}/sid|awk '{print $2}'` + clusterDNS=`curl -s metadata/self|grep /env/cluster_DNS|awk '{print $2}'` + healthNode="https://etcd${sid}${clusterDNS}:2379" + healthNodeStatus="$(curl -s --cacert /var/lib/etcd/ssl/etcd/ca.pem --cert /var/lib/etcd/ssl/etcd/client.pem --key /var/lib/etcd/ssl/etcd/client-key.pem ${healthNode}/health | jq -r '.health')" + else + unHealthNodeStatus="$(curl -s $(buildClientUrls)/health | jq -r '.health')" + healthNodeStatus="$(curl -s ${healthNode}/health | jq -r '.health')" + fi + + #输入的不健康节点是当前节点,当前节点是不健康状态,且输入的健康节点是健康才执行。 + if [ "$unHealthNodeIP" = "$MY_IP" ] && [ "$unHealthNodeStatus" != "true" ] && [ "$healthNodeStatus" = "true" ];then + stopNodeEtcdService + removeNodeAndaddNodeAgain $healthNode + modifyCfgAndRestart $healthNode + verifyClusterHealth + machineEnvRecovery + else + return 0 + fi +} + +updateEtcdAuthCtl(){ + updateEtcdAuth +} + +updateEtcdPasswdCtl(){ + updateEtcdPasswd +} + + +getAccessCertificate(){ + if [ $ENABLE_TLS = "true" ]; then + hostsDomain=`cat /etc/hosts|grep ${ETCD_CLUSTER_DNS}| jq -Rsc "."` + caPem=`cat /var/lib/etcd/ssl/etcd/ca.pem| jq -Rsc "."` + clientPem=`cat /var/lib/etcd/ssl/etcd/client.pem| jq -Rsc "."` + clientKeyPem=`cat /var/lib/etcd/ssl/etcd/client-key.pem| jq -Rsc "."` + echo '{"labels": ["hostsDomain","ca.pem","client.pem","client-key.pem"], "data": [['$hostsDomain','$caPem','$clientPem','$clientKeyPem']]}'|jq "." + else + echo '{"labels": ["hostsDomain","ca.pem","client.pem","client-key.pem"], "data": [["","","",""]]}'|jq "." + fi +} + + $command $args diff --git a/ansible/files/opt/app/bin/etcdutils.sh b/ansible/files/opt/app/bin/etcdutils.sh index d740655..75c44a1 100644 --- a/ansible/files/opt/app/bin/etcdutils.sh +++ b/ansible/files/opt/app/bin/etcdutils.sh @@ -2,50 +2,124 @@ set -e +#. /opt/app/bin/etcdauth.sh + etcdDataDir=$workingDir/default.etcd etcdEnvFile=/opt/app/conf/etcd.env etcdName=etcd$MY_SID etcdClusterToken=etcd-$CLUSTER_ID +#etcdAuthPasswdFile=$workingDir/etcdAuthPasswd.txt + buildMemberName() { echo etcd${1:-$MY_SID} } buildClientUrls() { - echo http://${1:-$MY_IP}:2379 + if [ $ENABLE_TLS = "true" ]; then + echo https://${1:-$MY_IP}:2379 + else + echo http://${1:-$MY_IP}:2379 + fi +} + +buildClientDomainUrls() { + if [ $ENABLE_TLS = "true" ]; then + echo https://${1:-etcd${MY_SID}${ETCD_CLUSTER_DNS}}:2379 + else + echo http://${1:-$MY_IP}:2379 + fi } buildMemberUrls() { - echo http://${1:-$MY_IP}:2380 + if [ $ENABLE_TLS = "true" ]; then + echo https://${1:-$MY_IP}:2380 + else + echo http://${1:-$MY_IP}:2380 + fi +} + +buildMemberDomainUrls() { + if [ $ENABLE_TLS = "true" ]; then + echo https://${1:-etcd${MY_SID}${ETCD_CLUSTER_DNS}}:2380 + else + echo http://${1:-$MY_IP}:2380 + fi } buildMember() { - echo "$(buildMemberName ${1%=*})=$(buildMemberUrls ${1#*=})" + if [ $ENABLE_TLS = "true" ]; then + echo "$(buildMemberName ${1%=*})=$(buildMemberDomainUrls ${1#*=})" + else + echo "$(buildMemberName ${1%=*})=$(buildMemberUrls ${1#*=})" + fi + } buildEndpoints() { - for node in $STABLE_NODES; do - echo "$(buildClientUrls ${node#*=})" - [ "$1" = "existing" ] && [ "$node" = "$MY_SID=$MY_IP" ] && break - done + if [ $ENABLE_TLS = "true" ]; then + for node in $STABLE_NODES_DOMAIN_NAME; do + echo "$(buildClientUrls ${node#*=})" + [ "$1" = "existing" ] && [ "$node" = "$MY_SID=etcd${MY_SID}${ETCD_CLUSTER_DNS}" ] && break + done + else + for node in $STABLE_NODES; do + echo "$(buildClientUrls ${node#*=})" + [ "$1" = "existing" ] && [ "$node" = "$MY_SID=$MY_IP" ] && break + done + fi } buildMembers() { local nodes - nodes="$(echo $STABLE_NODES) $(echo $ADDED_NODES)" + if [ $ENABLE_TLS = "true" ]; then + nodes="$(echo $STABLE_NODES_DOMAIN_NAME) $(echo $ADDED_NODES_DOMAIN)" + for node in ${2:-$nodes}; do + echo "$(buildMember $node)" + [ "$1" = "existing" ] && [ "$node" = "$MY_SID=etcd${MY_SID}${ETCD_CLUSTER_DNS}" ] && break + done + else + nodes="$(echo $STABLE_NODES) $(echo $ADDED_NODES)" + for node in ${2:-$nodes}; do + echo "$(buildMember $node)" + [ "$1" = "existing" ] && [ "$node" = "$MY_SID=$MY_IP" ] && break + done + fi +} + +buildMembersDomain() { + local nodes + nodes="$(echo $STABLE_NODES_DOMAIN_NAME) $(echo $ADDED_NODES_DOMAIN)" for node in ${2:-$nodes}; do echo "$(buildMember $node)" - [ "$1" = "existing" ] && [ "$node" = "$MY_SID=$MY_IP" ] && break + [ "$1" = "existing" ] && [ "$node" = "$MY_SID=etcd${MY_SID}${ETCD_CLUSTER_DNS}" ] && break done } -export ETCDCTL_API=3 +etcdctlInitFun() { + if [ $ENABLE_TLS = "true" ]; then + echo "/opt/etcd/current/etcdctl --cert=/var/lib/etcd/ssl/etcd/client.pem --key=/var/lib/etcd/ssl/etcd/client-key.pem --cacert=/var/lib/etcd/ssl/etcd/ca.pem" + else + echo "/opt/etcd/current/etcdctl" + fi +} + +export ETCDCTL_API=3 etcdctl() { - ETCDCTL_ENDPOINTS=$(joinArgs $(buildEndpoints)) runCmd /opt/etcd/current/etcdctl $@ + etcdctlInit=$(etcdctlInitFun) +# if [ $ETCDAUTH = "false" ] ;then + ETCDCTL_ENDPOINTS=$(joinArgs $(buildEndpoints)) runCmd $etcdctlInit $@ +# else +# ETCDCTL_ENDPOINTS=$(joinArgs $(buildEndpoints)) runCmd $etcdctlInit --user="${ETCDDEFAULTUSER}:${ETCDDEFAULTPASSWD}" $@ +# fi } takeBackup() { - ETCDCTL_API=2 runCmd /opt/etcd/v3.3.11/etcdctl backup --data-dir=$etcdDataDir --backup-dir=$1 --with-v3 + local etcdctlInit=$(etcdctlInitFun) + if [ $ENABLE_TLS = "true" ]; then + etcdctlInit=`echo ${etcdctlInit}|sed "s/--cert/--cert-file/g"|sed "s/--key/--key-file/g"|sed "s/--cacert/--ca-file/g"` + fi + ETCDCTL_API=2 runCmd $etcdctlInit backup --data-dir=$etcdDataDir --backup-dir=$1 --with-v3 } takeSnap() { @@ -70,17 +144,55 @@ svc() { prepareEtcdConfig() { if [ "$MY_ROLE" = "etcd-proxy" ]; then - cat > $etcdEnvFile << PROXY_ENV_FILE_EOF + if [ $ENABLE_TLS = "true" ]; then + cat > $etcdEnvFile << PROXY_ENV_FILE_EOF +etcdOpts="gateway start --listen-addr=proxy${MY_SID}${ETCD_CLUSTER_DNS}:2379 --endpoints=$(joinArgs $(buildEndpoints))" +PROXY_ENV_FILE_EOF + return 0 + else + cat > $etcdEnvFile << PROXY_ENV_FILE_EOF etcdOpts="gateway start --listen-addr=$MY_IP:2379 --endpoints=$(joinArgs $(buildEndpoints))" PROXY_ENV_FILE_EOF - return 0 + return 0 + fi fi local state=${1:-new} members - members="$(joinArgs $(buildMembers $state))" + if [ $ENABLE_TLS = "true" ]; then + members="$(joinArgs $(buildMembersDomain $state))" + else + members="$(joinArgs $(buildMembers $state))" + fi members="$(echo $members)" - cat > $etcdEnvFile << ETCD_ENV_FILE_EOF + if [ $ENABLE_TLS = "true" ]; then + cat > $etcdEnvFile << ETCD_ENV_FILE_EOF +ETCD_NAME=$etcdName +ETCD_DATA_DIR=$etcdDataDir +ETCD_LISTEN_PEER_URLS=$(buildMemberUrls) +ETCD_LISTEN_CLIENT_URLS=$(buildClientUrls) +ETCD_INITIAL_ADVERTISE_PEER_URLS=$(buildMemberDomainUrls) +ETCD_ADVERTISE_CLIENT_URLS=$(buildClientDomainUrls) +ETCD_INITIAL_CLUSTER=${members// /,} +ETCD_INITIAL_CLUSTER_TOKEN=$etcdClusterToken +ETCD_AUTO_COMPACTION_RETENTION=$ETCD_COMPACT_INTERVAL +ETCD_AUTO_COMPACTION_MODE=$ETCD_AUTO_COMPACTION_MODE +ETCD_QUOTA_BACKEND_BYTES=$ETCD_QUOTA_BYTES +ETCD_HEARTBEAT_INTERVAL=$ETCD_HEARTBEAT_INTERVAL +ETCD_ELECTION_TIMEOUT=$ETCD_ELECTION_TIMEOUT +ETCD_INITIAL_CLUSTER_STATE=$state +ETCD_ENABLE_V2=$ETCD_ENABLE_V2 +ETCD_CERT_FILE=/var/lib/etcd/ssl/etcd/server.pem +ETCD_KEY_FILE=/var/lib/etcd/ssl/etcd/server-key.pem +ETCD_PEER_CERT_FILE=/var/lib/etcd/ssl/etcd/peer.pem +ETCD_PEER_KEY_FILE=/var/lib/etcd/ssl/etcd/peer-key.pem +ETCD_TRUSTED_CA_FILE=/var/lib/etcd/ssl/etcd/ca.pem +ETCD_PEER_TRUSTED_CA_FILE=/var/lib/etcd/ssl/etcd/ca.pem +ETCD_PEER_CLIENT_CERT_AUTH=true +ETCD_CLIENT_CERT_AUTH=true +ETCD_ENV_FILE_EOF + else + cat > $etcdEnvFile << ETCD_ENV_FILE_EOF ETCD_NAME=$etcdName ETCD_DATA_DIR=$etcdDataDir ETCD_LISTEN_PEER_URLS=$(buildMemberUrls) @@ -90,20 +202,31 @@ ETCD_ADVERTISE_CLIENT_URLS=$(buildClientUrls) ETCD_INITIAL_CLUSTER=${members// /,} ETCD_INITIAL_CLUSTER_TOKEN=$etcdClusterToken ETCD_AUTO_COMPACTION_RETENTION=$ETCD_COMPACT_INTERVAL +ETCD_AUTO_COMPACTION_MODE=$ETCD_AUTO_COMPACTION_MODE ETCD_QUOTA_BACKEND_BYTES=$ETCD_QUOTA_BYTES +ETCD_HEARTBEAT_INTERVAL=$ETCD_HEARTBEAT_INTERVAL +ETCD_ELECTION_TIMEOUT=$ETCD_ELECTION_TIMEOUT ETCD_INITIAL_CLUSTER_STATE=$state +ETCD_ENABLE_V2=$ETCD_ENABLE_V2 ETCD_ENV_FILE_EOF + fi } hasOnlyV3Data() { local v2Keys - v2Keys=$(ETCDCTL_API=2 etcdctl --endpoints=$(buildClientUrls) ls) || return 1 + if [ $ENABLE_TLS = "true" ]; then + local etcdctlInit=$(etcdctlInitFun) + etcdctlInit=`echo ${etcdctlInit}|sed "s/--cert/--cert-file/g"|sed "s/--key/--key-file/g"|sed "s/--cacert/--ca-file/g"` + v2Keys=`$(ETCDCTL_API=2 $etcdctlInit --endpoints=$(buildClientDomainUrls) ls)` || return 1 + else + v2Keys=`$(ETCDCTL_API=2 etcdctl --endpoints=$(buildClientUrls) ls)` || return 1 + fi [ -z "$v2Keys" ] } # $ etctctl member list -# 8c2386146dd0f0ce, unstarted, , http://192.168.2.5:2380, -# b15d3498c7e3a169, started, etcd-1, http://192.168.2.3:2380, http://192.168.2.3:2379 +# 8c2386146dd0f0ce, unstarted, , https://192.168.2.5:2380, +# b15d3498c7e3a169, started, etcd-1, https://192.168.2.3:2380, https://192.168.2.3:2379 addMember() { etcdctl member list | grep -q " ${1#*=}" || etcdctl member add ${1/=/ --peer-urls=} @@ -128,7 +251,13 @@ checkMemberRemoved() { findMemberId() { local eps="$(joinArgs $(buildEndpoints))" member [ -z "$2" ] || eps=$(buildClientUrls $2) - member=$(etcdctl --endpoints=$eps member list | grep "http://$1:") + member="" + result=`echo "$1"|grep ${ETCD_CLUSTER_DNS}` + if [ "$result" != "" ];then + member=$(etcdctl member list | grep "https://$1:") + else + member=$(etcdctl member list | grep "http://$1:") + fi log "Found member '$member' of '$1' with endpoint $eps ..." echo -n ${member%%, *} } @@ -153,3 +282,165 @@ buildCluster() { checkStopped() { ! svc is-active -q } + +#-----用户认证功能开始------- +updateEtcdPasswdFile(){ + echo "EtcdRootOriginalPasswd=${ETCDDEFAULTPASSWD}">$etcdAuthPasswdFile +} + +updateEtcdPasswd(){ + EtcdRootOriginalPasswd=`cat $etcdAuthPasswdFile |tr '=' ' '|awk '{print $2}'` + etcdctlInit=$(etcdctlInitFun) + ETCDCTL_ENDPOINTS=$(joinArgs $(buildEndpoints)) runCmd $etcdctlInit --user="${ETCDDEFAULTUSER}:${EtcdRootOriginalPasswd}" user passwd root <> /opt/app/conf/etcd1.env + systemctl daemon-reload + systemctl start etcd1.service +} + + +verifyClusterHealth(){ + etcdctlInit=$(etcdctlInitFun) + sleepMaxTime=0 + eixtFlag=0 + num=0 + sleepTime=2 + while : + do + num=`expr ${num} + 1` + echo " loop check cluster ,Check every ${sleepTime} seconds!" + $etcdctlInit endpoint health --endpoints=$(buildClientDomainUrls) + if [ $? -eq 0 ]; then + break + fi + #循环验证多少秒,健康就返回正常,不健康就返回异常 + if [ ${sleepMaxTime} -ge 600 ];then + echo "sleepMaxTime>=600,exit check ${ip} ${checkPort} loop " + eixtFlag=20 + break + fi + + sleepMaxTime=`expr ${sleepMaxTime} + ${sleepTime}` + sleep ${sleepTime}s + done + + if [ ${eixtFlag} -ne 0 ];then + echo " ${ip} node repair fail!" + log " ${ip} node repair fail!"; return $EC_REPAIR_FAILED + fi +} + +machineEnvRecovery(){ + mv /etc/systemd/system/etcd1.service /etc/systemd/system/etcd.service + rm -rf /opt/app/conf/etcd1.env + sed -i 's/etcd1.env/etcd.env/g' /etc/systemd/system/etcd.service + systemctl daemon-reload + systemctl stop etcd1 + systemctl start etcd +} +#----单节点恢复功能结束------- \ No newline at end of file diff --git a/ansible/make.yml b/ansible/make.yml index 8f828b6..e1aebbb 100644 --- a/ansible/make.yml +++ b/ansible/make.yml @@ -4,8 +4,8 @@ no_log: False remote_user: root vars: - etcdVersion: v3.2.24 - etcdVersion2: v3.3.11 + etcdVersion: v3.4.0 + etcdVersion2: v3.4.16 target_env: "{{ lookup('env', 'target') }}" local_cache_path: "~/.ansible/cache" # ansible_ssh_pass: Zhu1241jie @@ -20,7 +20,8 @@ - disable-apt-jobs-1.0.0 - disable-motd-1.0.0 - app-agent-1.0.6 - - arping-1.0.0 + - arping-1.0.5 + - node-exporter-0.18.1 loop_control: loop_var: service_name @@ -39,22 +40,38 @@ comment: "etcd Service User" state: present - - name: Copy confd toml files + - name: Copy confd make.toml files copy: src: files/etc/confd/conf.d/make.sh.toml dest: /etc/confd/conf.d/make.sh.toml + - name: Copy confd nodeexporter.toml files + copy: + src: files/etc/confd/conf.d/nodeexporter.sh.toml + dest: /etc/confd/conf.d/nodeexporter.sh.toml + +# - name: Copy confd etcdauth.sh.toml files +# copy: +# src: files/etc/confd/conf.d/etcdauth.sh.toml +# dest: /etc/confd/conf.d/etcdauth.sh.toml + + + - name: Copy confd tls.sh.toml files + copy: + src: files/etc/confd/conf.d/tls.sh.toml + dest: /etc/confd/conf.d/tls.sh.toml + - name: Compile tmpl files raw: | - srcDir=files/etc/confd - destDir=files/tmp/confd - rm -rf $destDir - mkdir -p $destDir/conf.d $destDir/templates - tmplFile=$destDir/templates/make.sh.tmpl - for tmpl in $(ls $srcDir/templates/*); do - cat $tmpl >> $tmplFile - echo >> $tmplFile - done + srcDir=files/etc/confd + destDir=files/tmp/confd + rm -rf $destDir + mkdir -p $destDir/conf.d $destDir/templates + tmplFile=$destDir/templates/make.sh.tmpl + for tmpl in $(ls $srcDir/templates/01.header.sh.tmpl $srcDir/templates/02.app.env.tmpl); do + cat $tmpl >> $tmplFile + echo >> $tmplFile + done delegate_to: localhost run_once: true @@ -63,6 +80,21 @@ src: files/tmp/confd/templates/make.sh.tmpl dest: /etc/confd/templates/make.sh.tmpl + - name: Copy confd nodeexporter.tmpl files + copy: + src: files/etc/confd/templates/nodeexporter.sh.tmpl + dest: /etc/confd/templates/nodeexporter.sh.tmpl + +# - name: Copy confd etcdauth.sh.tmpl files +# copy: +# src: files/etc/confd/templates/etcdauth.sh.tmpl +# dest: /etc/confd/templates/etcdauth.sh.tmpl + + - name: Copy confd tls.sh.tmpl files + copy: + src: files/etc/confd/templates/tls.sh.tmpl + dest: /etc/confd/templates/tls.sh.tmpl + - name: Prepare directories file: path: "{{ item }}" @@ -94,10 +126,31 @@ - name: Create symbolic Link file: - src: "/opt/etcd/{{ etcdVersion }}" + src: "/opt/etcd/{{ etcdVersion2 }}" dest: "/opt/etcd/current" state: link + - name: Download install cfssl tools + get_url: + url: "https://pkg.cfssl.org/R1.2/{{ item }}" + dest: /usr/local/bin + mode: 0755 + owner: root + delegate_to: localhost + with_items: + - "cfssl_linux-amd64" + - "cfssljson_linux-amd64" + - "cfssl-certinfo_linux-amd64" + + - name: Move cfssl_linux-amd64 to cfssl + command: mv /usr/local/bin/cfssl_linux-amd64 /usr/local/bin/cfssl + + - name: Move cfssljson_linux-amd64 to cfssljson + command: mv /usr/local/bin/cfssljson_linux-amd64 /usr/local/bin/cfssljson + + - name: Move cfssl-certinfo_linux-amd64 to cfssl-certinfo + command: mv /usr/local/bin/cfssl-certinfo_linux-amd64 /usr/local/bin/cfssl-certinfo + - name: Copy systemd files copy: src: files/etc/systemd/ @@ -125,11 +178,11 @@ path: "{{ item }}" owner: root group: "{{ user }}" - mode: u=rwx,g=rx,o= + mode: u=rwx,g=rx,o=rx recurse: yes with_items: - /etc/confd - - /opt + - /opt/ - name: Prepare local SSH directory file: diff --git a/ansible/requirements.yml b/ansible/requirements.yml index f2a8e50..1c50ad4 100644 --- a/ansible/requirements.yml +++ b/ansible/requirements.yml @@ -2,8 +2,9 @@ - src: https://qingcloudappcenter.github.io/ansible-roles/disable-motd-1.0.0.tar.gz - src: https://qingcloudappcenter.github.io/ansible-roles/app-agent-1.0.6.tar.gz - src: https://qingcloudappcenter.github.io/ansible-roles/appctl-1.2.2.tar.gz -- src: https://qingcloudappcenter.github.io/ansible-roles/arping-1.0.0.tar.gz +- src: https://qingcloudappcenter.github.io/ansible-roles/arping-1.0.5.tar.gz - src: https://qingcloudappcenter.github.io/ansible-roles/confd-files-1.0.8.tar.gz - src: https://qingcloudappcenter.github.io/ansible-roles/install-1.0.5.tar.gz - src: https://qingcloudappcenter.github.io/ansible-roles/caddy-1.1.7.tar.gz - src: https://qingcloudappcenter.github.io/ansible-roles/create-service-user-1.0.0.tar.gz +- src: https://qingcloudappcenter.github.io/ansible-roles/node-exporter-0.18.1.tar.gz \ No newline at end of file diff --git a/app/cluster.json.mustache b/app/cluster.json.mustache index ede5aea..24bf88c 100644 --- a/app/cluster.json.mustache +++ b/app/cluster.json.mustache @@ -5,15 +5,15 @@ "incremental_backup_supported": false, "backup_policy": "device", "advanced_actions": [ "scale_horizontal" ], - "upgrade_policy": [ "appv-7izgt3gc" ], + "upgrade_policy": ["appv-u2me89o5","appv-fpqnuucx","appv-mclc6o8a","appv-q98yvbp3","appv-5taat5ql","appv-jzhr30i8","appv-h1n2681n"], "upgrading_policy": "sequential", "multi_zone_policy": "round_robin", "nodes": [{ "role": "etcd_node", "container": { "type": "kvm", - "zone": "sh1", - "image": "img-l1sksi5f" + "zone": "pek3", + "image": "img-oa06y5tj" }, "count": {{cluster.etcd_node.count}}, "instance_class": {{cluster.etcd_node.instance_class}}, @@ -26,8 +26,11 @@ "mount_options": "defaults,noatime" }, "server_id_upper_bound": 255, + "vertical_scaling_policy":"sequential", "services": { "init": { + "nodes_to_execute_on": 1, + "post_start_service": true, "cmd": "/opt/app/bin/ctl.sh init" }, "start": { @@ -47,7 +50,8 @@ "timeout": 100 }, "upgrade": { - "cmd": "/opt/app/bin/ctl.sh init" + "post_start_service": true, + "cmd": "/opt/app/bin/ctl.sh upgrade" }, "backup": { "nodes_to_execute_on": 1, @@ -56,13 +60,21 @@ "restore": { "cmd": "/opt/app/bin/ctl.sh restore" }, - "repair": { + "repairMajorityNode": { "type": "custom", "cmd": "/opt/app/bin/ctl.sh repair", "service_params": { "node.ip": {{service_params.etcd_node.node.ip}} } }, + "repairMinorityNode": { + "type": "custom", + "cmd": "/opt/app/bin/ctl.sh repairMinorityNode", + "service_params": { + "healthnode.ip": {{service_params.etcd_node.healthnode.ip}}, + "unhealthnode.ip": {{service_params.etcd_node.unhealthnode.ip}} + } + }, "compact": { "nodes_to_execute_on": 1, "type": "custom", @@ -72,7 +84,12 @@ }, "env": { "etcautocompact": {{env.etcd_node.autocompact}}, - "etcd.quota.backend.bytes": {{env.etcd_node.quotabytes}} + "etcd.quota.backend.bytes": {{env.etcd_node.quotabytes}}, + "nodeexporter": {{env.etcd_node.nodeexporter}}, + "enable_TLS": {{env.etcd_node.enable_TLS}}, + "cluster_DNS": {{env.etcd_node.cluster_DNS}}, + "etcdheartbeatinterval": {{env.etcd_node.etcd_heartbeat_interval}}, + "etcdelectiontimeout": {{env.etcd_node.etcd_election_timeout}} }, "monitor": { "enable": true, @@ -90,7 +107,7 @@ "etcd_network_peer_sent_bytes_total": { "statistics_type": "latest" }, - "http_requests_total": { + "etcd_server_proposals_failed_total": { "statistics_type": "latest" }, "process_resident_memory_bytes": { @@ -105,21 +122,22 @@ "data": ["etcd_network_peer_sent_bytes_total"], "memory": ["process_resident_memory_bytes", "process_virtual_memory_bytes"] }, - "display": ["state", "data", "memory", "http_requests_total"], + "display": ["state", "data", "memory", "etcd_server_proposals_failed_total"], "alarm": ["etcd_server_has_leader"] } }, { "role": "etcd_proxy", "container": { "type": "kvm", - "zone": "sh1", - "image": "img-l1sksi5f" + "zone": "pek3", + "image": "img-oa06y5tj" }, "count": {{cluster.etcd_proxy.count}}, "instance_class": {{cluster.etcd_proxy.instance_class}}, "cpu": {{cluster.etcd_proxy.cpu}}, "memory": {{cluster.etcd_proxy.memory}}, "server_id_upper_bound": 255, + "vertical_scaling_policy":"sequential", "services": { "init": { "cmd": "/opt/app/bin/ctl.sh init" @@ -133,8 +151,14 @@ "cmd": "/opt/app/bin/ctl.sh stop" }, "upgrade": { - "cmd": "/opt/app/bin/ctl.sh init" + "post_start_service": true, + "cmd": "/opt/app/bin/ctl.sh upgrade" } + }, + "env": { + "nodeexporter": {{env.etcd_node.nodeexporter}}, + "enable_TLS": {{env.etcd_node.enable_TLS}}, + "cluster_DNS": {{env.etcd_node.cluster_DNS}} } }], "health_check": { @@ -150,7 +174,15 @@ "endpoints": { "client": { "port": 2379, - "protocol": "http" + "protocol": "根据是否开启TLS选择http还是https" + } + }, + "display_tabs": { + "TLSConnectionGuide-column": { + "cmd": "/opt/app/bin/ctl.sh getAccessCertificate", + "roles_to_execute_on": ["etcd_node"], + "description": "TLSConnectionGuideDescrip", + "timeout": 10 + } } - } } diff --git a/app/config.json b/app/config.json index 38dbb2c..7be48ff 100644 --- a/app/config.json +++ b/app/config.json @@ -9,7 +9,7 @@ "label": "name", "description": "etcdAppName", "type": "string", - "default": "etcd 3.2.24", + "default": "etcd 3.4.16", "required": "no" }, { "key": "description", @@ -83,8 +83,8 @@ "label": "InstanceClassLabel", "description": "InstanceClassDescrip", "type": "integer", - "default": 1, - "range": [0, 1], + "default": 202, + "range": [101, 202, 0, 1], "required": "yes" }, { "key": "volume_size", @@ -145,8 +145,8 @@ "label": "InstanceClassLabel", "description": "InstanceClassDescrip", "type": "integer", - "default": 1, - "range": [0, 1], + "default": 202, + "range": [101, 202, 0, 1], "required": "yes" }] }] @@ -177,6 +177,54 @@ "min": 2147483648, "max": 8589934592, "required": "yes" + },{ + "key": "nodeexporter", + "label": "node.exporter", + "description": "nodeexporterdescrip", + "type": "string", + "default": "false", + "range": ["true", "false"], + "required": "no" + },{ + "key": "enable_TLS", + "label": "enable.TLS", + "description": "enableTLSdescrip", + "type": "string", + "default": "false", + "range": ["true", "false"], + "required": "no", + "expanded": false + },{ + "key": "cluster_DNS", + "label": "cluster.DNS", + "description": "clusterDNSdescrip", + "type": "string", + "default": ".etcdsvc.common", + "pattern": "^(?=^.{3,255}$)(\\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62}){2,10}$", + "dependencies":[ + {"refkey":"enable_TLS","values":["true"],"operator":"in"} + ], + "required": "yes" + },{ + "key": "etcd_heartbeat_interval", + "label": "etcdheartbeatinterval", + "description": "etcdheartbeatintervaldescrip", + "type": "integer", + "changeable": true, + "default": 100, + "min": 50, + "max": 3000, + "required": "yes" + },{ + "key": "etcd_election_timeout", + "label": "etcdelectiontimeout", + "description": "etcdelectiontimeoutdescrip", + "type": "integer", + "changeable": true, + "default": 1000, + "min": 500, + "max": 50000, + "required": "yes" }] }] },{ @@ -194,7 +242,22 @@ "type": "string", "pattern": "(\\d+\\.){3}\\d+", "required": "yes" - }] + }, { + "key": "healthnode.ip", + "label": "Health Node IP", + "description": "", + "type": "string", + "pattern": "(\\d+\\.){3}\\d+", + "required": "yes" + },{ + "key": "unhealthnode.ip", + "label": "unHealth Node IP", + "description": "repairMinorityNode descrip", + "type": "string", + "pattern": "(\\d+\\.){3}\\d+", + "required": "yes" + } + ] }] }] } diff --git a/app/locale/en.json b/app/locale/en.json index b7a97a5..3954f21 100644 --- a/app/locale/en.json +++ b/app/locale/en.json @@ -33,5 +33,10 @@ "etcdnodelb":"LB for etcd", "etcdnodelbdescript":"load balancer for etcd service", "corednslb":"LB for coredns", - "corednslbdescript":"Load balancer for coredns service" + "corednslbdescript":"Load balancer for coredns service", + "nodeexporterdescrip": "Enable node_exporter,Monitor server CPU, memory, disk, I / O and other information,The default crawl address is http://IP:9100/metrics", + "node.exporter": "node_exporter", + "clusterDNSdescrip": "Set the domain name. The default is. Etcdsvc.common. Finally, the domain name of the etcd node is etcd *. Etcdsvc.common (* is a number). If it is a proxy node, the domain name is proxy *. Etcdsvc.common; Please meet the format of. XXX when setting the domain name,. XXX at least twice and at most 10 times", + "TLSConnectionGuideDescrip": "When the etcd cluster starts TLS, obtain the following hosts domain name configuration and ca pem, client. pem, client-key. PEM certificate, configure the hostsdomain to the hosts of the host where your etcd client is located, and copy the three *.pem access certificates to the directory where the certificates are placed!", + "TLSConnectionGuide-column": "TLSConnectionGuide" } diff --git a/app/locale/zh-cn.json b/app/locale/zh-cn.json index 379bf3f..499c582 100644 --- a/app/locale/zh-cn.json +++ b/app/locale/zh-cn.json @@ -24,16 +24,20 @@ "VolumeSizelabel": "硬盘容量", "etcd_node": "etcd服务", "etcd_proxy": "etcd代理", - "InstanceClassDescrip": "虚拟机实例的类型,如性能型,超高性能型", + "InstanceClassDescrip": "虚拟机实例的类型,如基础型,企业型", "InstanceClassLabel": "实例类型", "compact": "清理历史数据", "etcautocompact": "etcd autocompact", "etcdautocompactdescrip": "自动清理历史数据的时间间隔,单位是小时,比如 1 表示每隔一小时清理一次数据;默认为 0 表示不自动清除", "etcd.quota.backend.bytes": "etcd quota-backend-bytes", "etcdquotabytesdescrip": "存储大小限制,单位是字节,默认大小为2GB,建议最大值为8GB", - "repair": "修复", - "Source Node IP": "源节点 IP 地址", - "The node IP of etcd from which to take data": "使用源节点上的数据对集群做修复;注意:其他节点原有的数据将被抹除,修复完成后所有节点拥有与此节点相同的数据,如有疑惑请先提交工单获取协助", + "repairMajorityNode": "多数节点不正常修复集群", + "repairMinorityNode": "少数节点不正常修复集群", + "Source Node IP": "数据正常的节点的IP地址", + "Health Node IP": "其中一个正常节点IP地址:", + "unHealth Node IP": "不正常节点IP地址:", + "The node IP of etcd from which to take data": "当集群多数节点(大于n/2)不健康,使用本集群中数据是正常的节点(通过看每个节点的etcd日志确定出数据正常的节点)对集群做修复,服务会中断;注意:其他节点原有的数据将被抹除,修复完成后所有节点拥有与此节点相同的数据,如有疑惑请先提交工单获取协助", + "repairMinorityNode descrip": "当集群少数节点(小于n/2)不健康,可以使用本集群的正常节点上的数据对本集群不正常节点做修复,服务不会中断;注意:不正常节点原有的数据将被抹除,修复完成后不正常节点拥有与正常节点相同的数据,如有疑惑请先提交工单获取协助", "VolumeClassDes": "硬盘类型,如性能型,超高性能型", "VolumeClasslabel": "硬盘类型", "etcdnodelb": "etcd负载均衡器", @@ -46,9 +50,28 @@ "state": "状态", "data": "数据", "memory": "内存", - "http_requests_total": "HTTP 请求数", + "etcd_server_proposals_failed_total": "失败提案总数", "process_resident_memory_bytes": "驻留内存(RSS)字节数", "process_virtual_memory_bytes": "虚拟内存字节数", "corednslb": "coredns负载均衡器", - "corednslbdescript": "给coredns服务的负载均衡器,端口为53" + "corednslbdescript": "给coredns服务的负载均衡器,端口为53", + "nodeexporterdescrip": "开启nodeexporter,可以监控服务器CPU、内存、磁盘、I/O等信息,Node Exporter默认的抓取地址为:http://IP:9100/metrics,每个节点有自己的单独的Node Exporter", + "node.exporter": "node_exporter", + "etcd.default.user": "etcd default-user", + "etcduserdescrip": "默认为etcd创建root用户,且不可修改", + "etcd.default.passwd": "etcd default-passwd", + "etcdpasswddescrip": "为etcd创建root用户的密码,可以修改,默认密码为root;集群创建好之后输入新的密码会覆盖原密码", + "etcd.auth": "etcd auth", + "etcdauthdescrip": "关闭、开启用户认证", + "enableTLSdescrip": "关闭、开启TLS认证", + "clusterDNSdescrip": "设置域名,默认是.etcdsvc.common,最终etcd节点的域名是etcd1.etcdsvc.common(这里的3级域名etcd1中etcd固定的,1是etcd节点唯一数字编号),如果是代理节点,域名是proxy1.etcdsvc.common;设置域名是请满足.xxx的格式,.xxx至少2次,最多10次", + "TLSConnectionGuide-column": "TLS连接指南", + "TLSConnectionGuideDescrip": "当ETCD集群开启TLS,获取如下的hosts域名配置以及ca.pem, client.pem, client-key.pem 证书,将hostsDomain配置到您的ETCD客户端所在的主机的hosts中,将3个*.pem访问证书分别复制到您使用证书的目录!", + "err_code20": "节点修复失败!", + "err_code21": "正常节点IP输入错误或者不正常节点IP输入错!", + "etcdheartbeatinterval":"etcd heartbeat interval", + "etcdheartbeatintervaldescrip": "默认值是100ms,建议心跳间隔的值应介于成员之间的平均平均往返时间(RTT)的最大值附近,通常约为往返时间的0.5-1.5倍。测量往返时间(RTT)的最简单方法是使用PING实用程序。", + "etcdelectiontimeout": "etcd election timeout", + "etcdelectiontimeoutdescrip": "应该根据心跳间隔和成员之间的平均往返时间来设置选举超时。选举超时时间必须至少是往返时间的10倍,这样才能解决网络中的差异。例如,如果成员之间的往返时间为10毫秒,则选举超时应至少为100毫秒。选举超时应设置为心跳间隔的至少5到10倍,以解决领导者复制中的差异。对于50ms的心跳间隔,将选举超时设置为至少250ms-500ms。选举超时上限为50000ms(50s),仅在部署全局分布的etcd集群时才应使用。" + }