/usr/share/glusterfs/scripts/stop-all-gluster-processes.sh is in glusterfs-common 3.13.2-1build1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 | #!/usr/bin/env bash
#
# Kill all the processes/services except glusterd
#
# Usage: ./extras/stop-all-gluster-processes.sh [-g] [-h]
# options:
# -g Terminate in graceful mode
# -h Show this message, then exit
#
# eg:
# 1. ./extras/stop-all-gluster-processes.sh
# 2. ./extras/stop-all-gluster-processes.sh -g
#
# By default, this script executes in force mode, i.e. all of brick, gsyncd
# and other glustershd services/processes are killed without checking for
# ongoing tasks such as geo-rep, self-heal, rebalance and etc. which may lead
# to inconsistency after the node is brought back.
#
# On specifying '-g' option this script works in graceful mode, to maintain
# data consistency the script fails with a valid exit code incase if any of
# the gluster processes are busy in doing their jobs.
#
# The author of page [1] proposes user-defined exit codes to the range 64 - 113
# Find the better explanation behind the choice in the link
#
# The exit code returned by stop-all-gluster-processes.sh:
# 0 No errors/Success
# 64 Rebalance is in progress
# 65 Self-Heal is in progress
# 66 Tier daemon running on this node
# 127 option not found
#
# [1] http://www.tldp.org/LDP/abs/html/exitcodes.html
# global
errors=0
# find the mounts and return their pids
get_mount_pids()
{
local opts
local pid
for opts in $(grep -w fuse.glusterfs /proc/mounts| awk '{print $1":/"$2}');
do
IFS=' ' read -r -a volinfo <<< $(echo "${opts}" | sed 's/:\// /g')
pid+="$(ps -Ao pid,args | grep -w "volfile-server=${volinfo[0]}" |
grep -w "volfile-id=/${volinfo[1]}" | grep -w "${volinfo[2]}" |
awk '{print $1}') "
done
echo "${pid}"
}
# handle mount processes i.e. 'glusterfs'
kill_mounts()
{
local signal=${1}
local pid
for pid in $(get_mount_pids);
do
echo "sending SIG${signal} to mount process with pid: ${pid}";
kill -${signal} ${pid};
done
}
# handle brick processes and node services
kill_bricks_and_services()
{
local signal=${1}
local pidfile
local pid
for pidfile in $(find /var/run/gluster/ -name '*.pid');
do
local pid=$(cat ${pidfile});
echo "sending SIG${signal} to pid: ${pid}";
kill -${signal} ${pid};
done
}
# for geo-replication, only 'monitor' has pid file written, other
# processes are not having a pid file, so get it through 'ps' and
# handle these processes
kill_georep_gsync()
{
local signal=${1}
# FIXME: add strick/better check
local gsyncpid=$(ps -Ao pid,args | grep gluster | grep gsync |
awk '{print $1}');
if [ -n "${gsyncpid}" ]
then
echo "sending SIG${signal} to geo-rep gsync process ${gsyncpid}";
kill -${signal} ${gsyncpid} || errors=$((${errors} + 1));
fi
}
# check if all processes are ready to die
check_background_tasks()
{
volumes=$(gluster vol list)
quit=0
for volname in ${volumes};
do
# tiering
if [[ $(gluster volume tier ${volname} status 2> /dev/null |
grep "localhost" | grep -c "in progress") -gt 0 ]]
then
quit=66
break;
fi
# rebalance
if [[ $(gluster volume rebalance ${volname} status 2> /dev/null |
grep -c "in progress") -gt 0 ]]
then
quit=64
break;
fi
# self heal
if [[ $(gluster volume heal ${volname} info | grep "Number of entries" |
awk '{ sum+=$4} END {print sum}') -gt 0 ]];
then
quit=65
break;
fi
# geo-rep, snapshot and quota doesn't need grace checks,
# as they ensures the consistancy on force kills
done
echo ${quit}
}
usage()
{
cat <<EOM
Usage: $0 [-g] [-h]
options:
-g Terminate in graceful mode
-h Show this message, then exit
eg:
1. $0
2. $0 -g
EOM
}
main()
{
while getopts "gh" opt; do
case $opt in
g)
# graceful mode
quit=$(check_background_tasks)
if [[ ${quit} -ne 0 ]]
then
exit ${quit};
fi
# else safe to kill
;;
h)
usage
exit 0;
;;
*)
usage
exit 127;
;;
esac
done
# remove all the options that have been parsed by getopts
shift $((OPTIND-1))
kill_mounts TERM
kill_georep_gsync TERM
kill_bricks_and_services TERM
sleep 5;
echo ""
# still not Terminated? let's pass SIGKILL
kill_mounts KILL
kill_georep_gsync KILL
kill_bricks_and_services KILL
exit ${errors};
}
main "$@"
|