/sbin/gfs2_lockcapture is in gfs2-utils 3.1.6-0ubuntu3.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 | #!/usr/bin/env python
"""
The script "gfs2_lockcapture" will capture locking information from GFS2 file
systems and DLM.
@author : Shane Bradley
@contact : sbradley@redhat.com
@version : 0.9
@copyright : GPLv2
"""
import sys
import os
import os.path
import logging
import logging.handlers
from optparse import OptionParser, Option
import time
import platform
import shutil
import subprocess
import tarfile
# #####################################################################
# Global vars:
# #####################################################################
"""
@cvar VERSION_NUMBER: The version number of this script.
@type VERSION_NUMBER: String
@cvar MAIN_LOGGER_NAME: The name of the logger.
@type MAIN_LOGGER_NAME: String
@cvar PATH_TO_DEBUG_DIR: The path to the debug directory for the linux kernel.
@type PATH_TO_DEBUG_DIR: String
@cvar PATH_TO_PID_FILENAME: The path to the pid file that will be used to make
sure only 1 instance of this script is running at any time.
@type PATH_TO_PID_FILENAME: String
"""
VERSION_NUMBER = "0.9-7"
MAIN_LOGGER_NAME = "%s" %(os.path.basename(sys.argv[0]))
PATH_TO_DEBUG_DIR="/sys/kernel/debug"
PATH_TO_PID_FILENAME = "/var/run/%s.pid" %(os.path.basename(sys.argv[0]))
# #####################################################################
# Class to define what a clusternode is.
# #####################################################################
class ClusterNode:
"""
This class represents a cluster node that is a current member in a cluster.
"""
def __init__(self, clusternodeName, clusternodeID, clusterName, mapOfMountedFilesystemLabels):
"""
@param clusternodeName: The name of the cluster node.
@type clusternodeName: String
@param clusterName: The name of the cluster that this cluster node is a
member of.
@param clusternodeID: The id of the cluster node.
@type clusternodeID: Int
@param clusterName: The name of the cluster that this cluster node is a
@type clusterName: String
@param mapOfMountedFilesystemLabels: A map of filesystem labels(key) for
a mounted filesystem. The value is the line for the matching mounted
filesystem from the mount -l command.
@type mapOfMountedFilesystemLabels: Dict
"""
self.__clusternodeName = clusternodeName
self.__clusternodeID = clusternodeID
self.__clusterName = clusterName
self.__mapOfMountedFilesystemLabels = mapOfMountedFilesystemLabels
def __str__(self):
"""
This function will return a string representation of the object.
@return: Returns a string representation of the object.
@rtype: String
"""
rString = ""
rString += "%s:%s(id:%d)" %(self.getClusterName(), self.getClusterNodeName(), self.getClusterNodeID())
fsLabels = self.__mapOfMountedFilesystemLabels.keys()
fsLabels.sort()
for fsLabel in fsLabels:
rString += "\n\t%s --> %s" %(fsLabel, self.__mapOfMountedFilesystemLabels.get(fsLabel))
return rString.rstrip()
def getClusterNodeName(self):
"""
Returns the name of the cluster node.
@return: Returns the name of the cluster node.
@rtype: String
"""
return self.__clusternodeName
def getClusterNodeID(self):
"""
Returns the id of the cluster node.
@return: Returns the id of the cluster node.
@rtype: String
"""
return self.__clusternodeID
def getClusterName(self):
"""
Returns the name of cluster that this cluster node is a member of.
@return: Returns the name of cluster that this cluster node is a member
of.
@rtype: String
"""
return self.__clusterName
def getMountedGFS2FilesystemNames(self, includeClusterName=True):
"""
Returns the names of all the mounted GFS2 filesystems. By default
includeClusterName is True which will include the name of the cluster
and the GFS2 filesystem name(ex. f18cluster:mygfs2vol1) in the list of
mounted GFS2 filesystems. If includeClusterName is False it will only
return a list of all the mounted GFS2 filesystem names(ex. mygfs2vol1).
@return: Returns a list of all the mounted GFS2 filesystem names.
@rtype: Array
@param includeClusterName: By default this option is True and will
include the name of the cluster and the GFS2 filesystem name. If False
then only the GFS2 filesystem name will be included.
@param includeClusterName: Boolean
"""
# If true will prepend the cluster name to gfs2 fs name
if (includeClusterName):
return self.__mapOfMountedFilesystemLabels.keys()
else:
listOfGFS2MountedFilesystemLabels = []
for fsLabel in self.__mapOfMountedFilesystemLabels.keys():
fsLabelSplit = fsLabel.split(":", 1)
if (len(fsLabelSplit) == 2):
listOfGFS2MountedFilesystemLabels.append(fsLabelSplit[1])
return listOfGFS2MountedFilesystemLabels
def getMountedGFS2FilesystemPaths(self):
"""
Returns a map of all the mounted GFS2 filesystem paths. The key is the
GFS2 fs name(clustername:fs name) and value is the mountpoint.
@return: Returns a map of all the mounted GFS2 filesystem paths. The key
is the GFS2 fs name(clustername:fs name) and value is the mountpoint.
Returns a list of all the mounted GFS2 filesystem paths.
@rtype: Map
"""
mapOfGFS2MountedFilesystemPaths = {}
for fsLabel in self.__mapOfMountedFilesystemLabels.keys():
value = self.__mapOfMountedFilesystemLabels.get(fsLabel)
mountPoint = value.split("type", 1)[0].split("on")[1]
if (len(mountPoint) > 0):
mapOfGFS2MountedFilesystemPaths[fsLabel] = mountPoint
return mapOfGFS2MountedFilesystemPaths
# #####################################################################
# Helper functions.
# #####################################################################
def runCommand(command, listOfCommandOptions, standardOut=subprocess.PIPE, standardError=subprocess.PIPE):
"""
This function will execute a command. It will return True if the return code
was zero, otherwise False is returned.
@return: Returns True if the return code was zero, otherwise False is
returned.
@rtype: Boolean
@param command: The command that will be executed.
@type command: String
@param listOfCommandOptions: The list of options for the command that will
be executed.
@type listOfCommandOptions: Array
@param standardOut: The pipe that will be used to write standard output. By
default the pipe that is used is subprocess.PIPE.
@type standardOut: Pipe
@param standardError: The pipe that will be used to write standard error. By
default the pipe that is used is subprocess.PIPE.
@type standardError: Pipe
"""
stdout = ""
stderr = ""
try:
commandList = [command]
commandList += listOfCommandOptions
task = subprocess.Popen(commandList, stdout=standardOut, stderr=standardError)
task.wait()
(stdout, stderr) = task.communicate()
return (task.returncode == 0)
except OSError:
commandOptionString = ""
for option in listOfCommandOptions:
commandOptionString += "%s " %(option)
message = "An error occurred running the command: $ %s %s\n" %(command, commandOptionString)
if (len(stdout) > 0):
message += stdout
message += "\n"
if (len(stderr) > 0):
message += stderr
logging.getLogger(MAIN_LOGGER_NAME).error(message)
return False
def runCommandOutput(command, listOfCommandOptions, standardOut=subprocess.PIPE, standardError=subprocess.PIPE):
"""
This function will execute a command. Returns the output that was written to standard output. None is
returned if there was an error.
@return: Returns the output that was written to standard output. None is
returned if there was an error.
@rtype: String
@param command: The command that will be executed.
@type command: String
@param listOfCommandOptions: The list of options for the command that will
be executed.
@type listOfCommandOptions: Array
@param standardOut: The pipe that will be used to write standard output. By
default the pipe that is used is subprocess.PIPE.
@type standardOut: Pipe
@param standardError: The pipe that will be used to write standard error. By
default the pipe that is used is subprocess.PIPE.
@type standardError: Pipe
"""
stdout = ""
stderr = ""
try:
commandList = [command]
commandList += listOfCommandOptions
task = subprocess.Popen(commandList, stdout=standardOut, stderr=standardError)
task.wait()
(stdout, stderr) = task.communicate()
except OSError:
commandOptionString = ""
for option in listOfCommandOptions:
commandOptionString += "%s " %(option)
message = "An error occurred running the command: $ %s %s\n" %(command, commandOptionString)
if (len(stdout) > 0):
message += stdout
message += "\n"
if (len(stderr) > 0):
message += stderr
logging.getLogger(MAIN_LOGGER_NAME).error(message)
return None
return stdout.strip().rstrip()
def writeToFile(pathToFilename, data, appendToFile=True, createFile=False):
"""
This function will write a string to a file.
@return: Returns True if the string was successfully written to the file,
otherwise False is returned.
@rtype: Boolean
@param pathToFilename: The path to the file that will have a string written
to it.
@type pathToFilename: String
@param data: The string that will be written to the file.
@type data: String
@param appendToFile: If True then the data will be appened to the file, if
False then the data will overwrite the contents of the file.
@type appendToFile: Boolean
@param createFile: If True then the file will be created if it does not
exists, if False then file will not be created if it does not exist
resulting in no data being written to the file.
@type createFile: Boolean
"""
[parentDir, filename] = os.path.split(pathToFilename)
if (os.path.isfile(pathToFilename) or (os.path.isdir(parentDir) and createFile)):
try:
filemode = "w"
if (appendToFile):
filemode = "a"
fout = open(pathToFilename, filemode)
fout.write(data + "\n")
fout.close()
return True
except UnicodeEncodeError, e:
message = "There was a unicode encode error writing to the file: %s." %(pathToFilename)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
return False
except IOError:
message = "There was an error writing to the file: %s." %(pathToFilename)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
return False
return False
def mkdirs(pathToDSTDir):
"""
This function will attempt to create a directory with the path of the value of pathToDSTDir.
@return: Returns True if the directory was created or already exists.
@rtype: Boolean
@param pathToDSTDir: The path to the directory that will be created.
@type pathToDSTDir: String
"""
if (os.path.isdir(pathToDSTDir)):
return True
elif ((not os.access(pathToDSTDir, os.F_OK)) and (len(pathToDSTDir) > 0)):
try:
os.makedirs(pathToDSTDir)
except (OSError, os.error):
message = "Could not create the directory: %s." %(pathToDSTDir)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
return False
except (IOError, os.error):
message = "Could not create the directory with the path: %s." %(pathToDSTDir)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
return False
return os.path.isdir(pathToDSTDir)
def removePIDFile():
"""
This function will remove the pid file.
@return: Returns True if the file was successfully remove or does not exist,
otherwise False is returned.
@rtype: Boolean
"""
message = "Removing the pid file: %s" %(PATH_TO_PID_FILENAME)
logging.getLogger(MAIN_LOGGER_NAME).debug(message)
if (os.path.exists(PATH_TO_PID_FILENAME)):
try:
os.remove(PATH_TO_PID_FILENAME)
except IOError:
message = "There was an error removing the file: %s." %(PATH_TO_PID_FILENAME)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
return os.path.exists(PATH_TO_PID_FILENAME)
def archiveData(pathToSrcDir):
"""
This function will return the path to the tar.bz2 file that was created. If
the tar.bz2 file failed to be created then an empty string will be returned
which would indicate an error occurred.
@return: This function will return the path to the tar.bz2 file that was
created. If the tar.bz2 file failed to be created then an empty string will
be returned which would indicate an error occurred.
@rtype: String
@param pathToSrcDir: The path to the directory that will be archived into a
.tar.bz2 file.
@type pathToSrcDir: String
"""
if (os.path.exists(pathToSrcDir)):
pathToTarFilename = "%s-%s.tar.bz2" %(pathToSrcDir, platform.node())
if (os.path.exists(pathToTarFilename)):
message = "A compressed archvied file already exists and will be removed: %s" %(pathToTarFilename)
logging.getLogger(MAIN_LOGGER_NAME).status(message)
try:
os.remove(pathToTarFilename)
except IOError:
message = "There was an error removing the file: %s." %(pathToTarFilename)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
return ""
message = "Creating a compressed archvied file: %s" %(pathToTarFilename)
logging.getLogger(MAIN_LOGGER_NAME).status(message)
try:
tar = tarfile.open(pathToTarFilename, "w:bz2")
tar.add(pathToSrcDir, arcname=os.path.basename(pathToSrcDir))
tar.close()
except tarfile.TarError:
message = "There was an error creating the tarfile: %s." %(pathToTarFilename)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
return ""
if (os.path.exists(pathToTarFilename)):
return pathToTarFilename
return ""
def getDataFromFile(pathToSrcFile) :
"""
This function will return the data in an array. Where each newline in file
is a seperate item in the array. This should really just be used on
relatively small files.
None is returned if no file is found.
@return: Returns an array of Strings, where each newline in file is an item
in the array.
@rtype: Array
@param pathToSrcFile: The path to the file which will be read.
@type pathToSrcFile: String
"""
if (len(pathToSrcFile) > 0) :
try:
fin = open(pathToSrcFile, "r")
data = fin.readlines()
fin.close()
return data
except (IOError, os.error):
message = "An error occured reading the file: %s." %(pathToSrcFile)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
return None
def copyFile(pathToSrcFile, pathToDstFile):
"""
This function will copy a src file to dst file.
@return: Returns True if the file was copied successfully.
@rtype: Boolean
@param pathToSrcFile: The path to the source file that will be copied.
@type pathToSrcFile: String
@param pathToDstFile: The path to the destination of the file.
@type pathToDstFile: String
"""
if(not os.path.exists(pathToSrcFile)):
message = "The file does not exist with the path: %s." %(pathToSrcFile)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
return False
elif (not os.path.isfile(pathToSrcFile)):
message = "The path to the source file is not a regular file: %s." %(pathToSrcFile)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
return False
elif (pathToSrcFile == pathToDstFile):
message = "The path to the source file and path to destination file cannot be the same: %s." %(pathToDstFile)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
return False
else:
# Create the directory structure if it does not exist.
(head, tail) = os.path.split(pathToDstFile)
if (not mkdirs(head)) :
# The path to the directory was not created so file
# could not be copied.
return False
# Copy the file to the dst path.
try:
shutil.copy(pathToSrcFile, pathToDstFile)
except shutil.Error:
message = "Cannot copy the file %s to %s." %(pathToSrcFile, pathToDstFile)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
return False
except OSError:
message = "Cannot copy the file %s to %s." %(pathToSrcFile, pathToDstFile)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
return False
except IOError:
message = "Cannot copy the file %s to %s." %(pathToSrcFile, pathToDstFile)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
return False
return (os.path.exists(pathToDstFile))
def copyDirectory(pathToSrcDir, pathToDstDir):
"""
This function will copy a src dir to dst dir.
@return: Returns True if the dir was copied successfully.
@rtype: Boolean
@param pathToSrcDir: The path to the source dir that will be copied.
@type pathToSrcDir: String
@param pathToDstDir: The path to the destination of the dir.
@type pathToDstDir: String
"""
if(not os.path.exists(pathToSrcDir)):
message = "The directory does not exist with the path: %s." %(pathToSrcDir)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
return False
elif (not os.path.isdir(pathToSrcDir)):
message = "The path to the source directory is not a directory: %s." %(pathToSrcDir)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
return False
elif (pathToSrcDir == pathToDstDir):
message = "The path to the source directory and path to destination directory cannot be the same: %s." %(pathToDstDir)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
return False
else:
if (not mkdirs(pathToDstDir)) :
# The path to the directory was not created so file
# could not be copied.
return False
# Copy the file to the dst path.
dst = os.path.join(pathToDstDir, os.path.basename(pathToSrcDir))
try:
shutil.copytree(pathToSrcDir, dst)
except shutil.Error:
message = "Cannot copy the directory %s to %s." %(pathToSrcDir, dst)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
return False
except OSError:
message = "Cannot copy the directory %s to %s." %(pathToSrcDir, dst)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
return False
except IOError:
message = "Cannot copy the directory %s to %s." %(pathToSrcDir, dst)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
return False
return (os.path.exists(dst))
def backupOutputDirectory(pathToOutputDir):
"""
This function will return True if the pathToOutputDir does not exist or the
directory was successfully rename. If pathToOutputDir exists and was not
successfully rename then False is returned.
@return: Returns True if the pathToOutputDir does not exist or the directory
was successfully rename. If pathToOutputDir exists and was not successfully
rename then False is returned.
@rtype: Boolean
@param pathToOutputDir: The path to the directory that will be backed up.
@type pathToOutputDir: String
"""
if (os.path.exists(pathToOutputDir)):
message = "The path already exists and could contain previous lockdump data: %s" %(pathToOutputDir)
logging.getLogger(MAIN_LOGGER_NAME).info(message)
backupIndex = 1
pathToDST = ""
keepSearchingForIndex = True
while (keepSearchingForIndex):
pathToDST = "%s.bk-%d" %(pathToOutputDir, backupIndex)
if (os.path.exists(pathToDST)):
backupIndex += 1
else:
keepSearchingForIndex = False
try:
message = "The existing output directory will be renamed: %s to %s." %(pathToOutputDir, pathToDST)
logging.getLogger(MAIN_LOGGER_NAME).status(message)
shutil.move(pathToOutputDir, pathToDST)
except shutil.Error:
message = "There was an error renaming the directory: %s to %s." %(pathToOutputDir, pathToDST)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
except OSError:
message = "There was an error renaming the directory: %s to %s." %(pathToOutputDir, pathToDST)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
# The path should not exists now, else there was an error backing up an
# existing output directory.
return (not os.path.exists(pathToOutputDir))
def mountFilesystem(filesystemType, pathToDevice, pathToMountPoint):
"""
This function will attempt to mount a filesystem. If the filesystem is
already mounted or the filesystem was successfully mounted then True is
returned, otherwise False is returned.
@return: If the filesystem is already mounted or the filesystem was
successfully mounted then True is returned, otherwise False is returned.
@rtype: Boolean
@param filesystemType: The type of filesystem that will be mounted.
@type filesystemType: String
@param pathToDevice: The path to the device that will be mounted.
@type pathToDevice: String
@param pathToMountPoint: The path to the directory that will be used as the
mount point for the device.
@type pathToMountPoint: String
"""
if (os.path.ismount(PATH_TO_DEBUG_DIR)):
return True
listOfCommandOptions = ["-t", filesystemType, pathToDevice, pathToMountPoint]
if (not runCommand("mount", listOfCommandOptions)):
message = "There was an error mounting the filesystem type %s for the device %s to the mount point %s." %(filesystemType, pathToDevice, pathToMountPoint)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
return os.path.ismount(PATH_TO_DEBUG_DIR)
def exitScript(removePidFile=True, errorCode=0):
"""
This function will cause the script to exit or quit. It will return an error
code and will remove the pid file that was created.
@param removePidFile: If True(default) then the pid file will be remove
before the script exits.
@type removePidFile: Boolean
@param errorCode: The exit code that will be returned. The default value is 0.
@type errorCode: Int
"""
if (removePidFile):
removePIDFile()
message = "The script will exit."
logging.getLogger(MAIN_LOGGER_NAME).info(message)
sys.exit(errorCode)
# #####################################################################
# Helper functions for gathering the lockdumps.
# #####################################################################
def getClusterNode(listOfGFS2Names):
"""
This function return a ClusterNode object if the machine is a member of a
cluster and has GFS2 filesystems mounted for that cluster. The
listOfGFS2Names is a list of GFS2 filesystem that need to have their data
capture. If the list is empty then that means that all the mounted GFS2
filesystems will be captured, if list is not empty then only those GFS2
filesystems in the list will have their data captured.
@return: Returns a cluster node object if there was mounted GFS2 filesystems
found that will have their data captured.
@rtype: ClusterNode
@param listOfGFS2Names: A list of GFS2 filesystem names that will have their
data captured. If the list is empty then that means that all the mounted
GFS2 filesystems will be captured, if list is not empty then only those GFS2
filesystems in the list will have their data captured.
@type listOfGFS2Names: Array
"""
# Return a ClusterNode object if the clusternode and cluster name are found
# in the output, else return None.
clusterName = ""
clusternodeName = ""
clusternodeID = ""
if (runCommand("which", ["cman_tool"])):
stdout = runCommandOutput("cman_tool", ["status"])
if (not stdout == None):
stdoutSplit = stdout.split("\n")
clusterName = ""
clusternodeName = ""
for line in stdoutSplit:
if (line.startswith("Cluster Name:")):
clusterName = line.split("Cluster Name:")[1].strip().rstrip()
if (line.startswith("Node name: ")):
clusternodeName = line.split("Node name:")[1].strip().rstrip()
if (line.startswith("Node ID: ")):
clusternodeID = line.split("Node ID: ")[1].strip().rstrip()
elif (runCommand("which", ["corosync-cmapctl"])):
# Another way to get the local cluster node is: $ crm_node -i; crm_node -l
# Get the name of the cluster.
stdout = runCommandOutput("corosync-cmapctl", ["-g", "totem.cluster_name"])
if (not stdout == None):
stdoutSplit = stdout.split("=")
if (len(stdoutSplit) == 2):
clusterName = stdoutSplit[1].strip().rstrip()
# Get the id of the local cluster node so we can get the clusternode name
clusternodeID = ""
stdout = runCommandOutput("corosync-cmapctl", ["-g", "runtime.votequorum.this_node_id"])
if (not stdout == None):
stdoutSplit = stdout.split("=")
if (len(stdoutSplit) == 2):
clusternodeID = stdoutSplit[1].strip().rstrip()
# Now that we the nodeid then we can get the clusternode name.
if (len(clusternodeID) > 0):
stdout = runCommandOutput("corosync-quorumtool", ["-l"])
if (not stdout == None):
for line in stdout.split("\n"):
if (line.find("local") >=0):
splitLine = line.split(" (local)")
clusternodeName = splitLine[0].split()[2]
break;
# If a clusternode name and cluster name was found then return a new object
# since this means this cluster is part of cluster.
if ((len(clusterName) > 0) and (len(clusternodeName) > 0)):
mapOfMountedFilesystemLabels = getLabelMapForMountedFilesystems(clusterName, getMountedGFS2Filesystems())
# These will be the GFS2 filesystems that will have their lockdump information gathered.
if (len(listOfGFS2Names) > 0):
for label in mapOfMountedFilesystemLabels.keys():
foundMatch = False
for gfs2FSName in listOfGFS2Names:
if ((gfs2FSName == label) or ("%s:%s"%(clusterName, gfs2FSName) == label)):
foundMatch = True
break
if ((not foundMatch) and (mapOfMountedFilesystemLabels.has_key(label))):
del(mapOfMountedFilesystemLabels[label])
# Cast the node id to an int, and default is 0 if node is not found or
# not castable.
clusternodeIDInt = 0
if (clusternodeID.isalnum()):
try:
clusternodeIDInt = int(clusternodeID)
except(ValueError):
pass
return ClusterNode(clusternodeName, clusternodeIDInt, clusterName, mapOfMountedFilesystemLabels)
else:
return None
def getDLMToolDLMLockspaces():
"""
This function returns the names of all the dlm lockspace names found with the
command: "dlm_tool ls".
@return: A list of all the dlm lockspace names.
@rtype: Array
"""
dlmLockspaces = []
stdout = runCommandOutput("dlm_tool", ["ls"])
if (not stdout == None):
stdout = stdout.replace("dlm lockspaces\n", "")
dlmToolLSKeys = ["name", "id", "flags", "change", "members"]
# Split on newlines
stdoutSections = stdout.split("\n\n")
for section in stdoutSections:
# Create tmp map to hold data
dlmToolLSMap = dict.fromkeys(dlmToolLSKeys)
lines = section.split("\n")
for line in lines:
for dlmToolLSKey in dlmToolLSMap.keys():
if (line.startswith(dlmToolLSKey)):
value = line.replace(dlmToolLSKey, " ", 1).strip().rstrip()
dlmToolLSMap[dlmToolLSKey] = value
if ((not dlmToolLSMap.get("name") == None) and (not dlmToolLSMap.get("id") == None)):
dlmLockspaces.append(dlmToolLSMap.get("name"))
return dlmLockspaces
def getGroupToolDLMLockspaces():
"""
This function returns the names of all the dlm lockspace names found with the
command: "group_tool ls".
@return: A list of all the dlm lockspace names.
@rtype: Array
"""
dlmLockspaces = []
stdout = runCommandOutput("group_tool", ["ls"])
if (not stdout == None):
lines = stdout.split("\n")
for line in lines:
if (line.startswith("dlm")):
dlmLockspaces.append(line.split()[2])
return dlmLockspaces
def getDLMLockspaces():
"""
Returns a list of the dlm lockspace names.
@return: Returns a list of dlm lockspace names.
@rtype: Array
"""
message = "Gathering the DLM Lockspace Names."
logging.getLogger(MAIN_LOGGER_NAME).debug(message)
dlmLockspaces = getDLMToolDLMLockspaces()
if (not len(dlmLockspaces) > 0):
dlmLockspaces = getGroupToolDLMLockspaces()
return dlmLockspaces
def getVerifiedDLMLockspaceNames(lockspaceNames):
"""
Returns a list of DLM lockspaces that have been verified to exists in the
command output of $(dlm_tool ls).
@return: Returns a list of DLM lockspaces that have been verified to exists
in the command output of $(dlm_tool ls).
@rtype: Array
@param lockspaceNames: This is the list of DLM lockspaces that will have
their debug directory copied.
@type lockspaceNames: Array
"""
# Get a list of all the DLM lockspaces names.
dlmLockspaces = getDLMLockspaces()
# Verify the lockspaceNames are lockspaces that exist.
verifiedLockspaceNames = []
for lockspaceName in lockspaceNames:
if ((lockspaceName in dlmLockspaces) and
(not lockspaceName in verifiedLockspaceNames)):
verifiedLockspaceNames.append(lockspaceName)
return verifiedLockspaceNames
def getMountedGFS2Filesystems():
"""
This function returns a list of all the mounted GFS2 filesystems.
@return: Returns a list of all the mounted GFS2 filesystems.
@rtype: Array
"""
fsType = "gfs2"
listOfMountedFilesystems = []
stdout = runCommandOutput("mount", ["-l"])
if (not stdout == None):
stdoutSplit = stdout.split("\n")
for line in stdoutSplit:
splitLine = line.split()
if (len(splitLine) >= 5):
if (splitLine[4] == fsType):
listOfMountedFilesystems.append(line)
return listOfMountedFilesystems
def getLabelMapForMountedFilesystems(clusterName, listOfMountedFilesystems):
"""
This function will return a dictionary of the mounted GFS2 filesystem that
contain a label that starts with the cluster name. For example:
{'f18cluster:mygfs2vol1': '/dev/vdb1 on /mnt/gfs2vol1 type gfs2 (rw,relatime) [f18cluster:mygfs2vol1]'}
@return: Returns a dictionary of the mounted GFS2 filesystems that contain a
label that starts with the cluster name.
@rtype: Dict
@param clusterName: The name of the cluster.
@type clusterName: String
@param listOfMountedFilesystems: A list of all the mounted GFS2 filesystems.
@type listOfMountedFilesystems: Array
"""
mapOfMountedFilesystemLabels = {}
for mountedFilesystem in listOfMountedFilesystems:
splitMountedFilesystem = mountedFilesystem.split()
fsLabel = splitMountedFilesystem[-1].strip().strip("[").rstrip("]")
if (len(fsLabel) > 0):
# Verify it starts with name of the cluster.
if (fsLabel.startswith("%s:" %(clusterName))):
mapOfMountedFilesystemLabels[fsLabel] = mountedFilesystem
return mapOfMountedFilesystemLabels
# #####################################################################
# Gather output from command functions
# #####################################################################
def gatherGeneralInformation(pathToDSTDir):
"""
This function will gather general information about the cluster and write
the results to a file. The following data will be captured: hostname, date,
uname -a, uptime, contents of /proc/mounts, and ps h -AL -o tid,s,cmd.
@param pathToDSTDir: This is the path to directory where the files will be
written to.
@type pathToDSTDir: String
"""
# Gather some general information and write to system.txt.
systemString = "HOSTNAME=%s\nTIMESTAMP=%s\n" %(platform.node(), time.strftime("%Y-%m-%d %H:%M:%S"))
stdout = runCommandOutput("uname", ["-a"]).strip().rstrip()
if (not stdout == None):
systemString += "UNAMEA=%s\n" %(stdout)
stdout = runCommandOutput("uptime", []).strip().rstrip()
if (not stdout == None):
systemString += "UPTIME=%s" %(stdout)
writeToFile(os.path.join(pathToDSTDir, "hostinformation.txt"), systemString, createFile=True)
# Copy misc files
pathToSrcFile = "/proc/mounts"
copyFile(pathToSrcFile, os.path.join(pathToDSTDir, pathToSrcFile.strip("/")))
pathToSrcFile = "/proc/slabinfo"
copyFile(pathToSrcFile, os.path.join(pathToDSTDir, pathToSrcFile.strip("/")))
# Copy the DLM hash table sizes:
pathToHashTableFiles = ["/sys/kernel/config/dlm/cluster/lkbtbl_size", "/sys/kernel/config/dlm/cluster/dirtbl_size",
"/sys/kernel/config/dlm/cluster/rsbtbl_size"]
for pathToSrcFile in pathToHashTableFiles:
if (os.path.exists(pathToSrcFile)):
copyFile(pathToSrcFile, os.path.join(pathToDSTDir, pathToSrcFile.strip("/")))
# Get "ps -eo user,pid,%cpu,%mem,vsz,rss,tty,stat,start,time,comm,wchan" data.
# Get " ps h -AL -o tid,s,cmd
command = "ps"
pathToCommandOutput = os.path.join(pathToDSTDir, "ps_hALo-tid.s.cmd")
try:
fout = open(pathToCommandOutput, "w")
#runCommand(command, ["-eo", "user,pid,%cpu,%mem,vsz,rss,tty,stat,start,time,comm,wchan"], standardOut=fout)
runCommand(command, ["h", "-AL", "-o", "tid,s,cmd"], standardOut=fout)
fout.close()
except IOError:
message = "There was an error writing the command output for %s to the file %s." %(command, pathToCommandOutput)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
# Get df -h ouput
command = "df"
pathToCommandOutput = os.path.join(pathToDSTDir, "df-h.cmd")
try:
fout = open(pathToCommandOutput, "w")
runCommand(command, ["-h"], standardOut=fout)
fout.close()
except IOError:
message = "There was an error writing the command output for %s to the file %s." %(command, pathToCommandOutput)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
# Get lsof ouput
command = "lsof"
pathToCommandOutput = os.path.join(pathToDSTDir, "lsof.cmd")
try:
fout = open(pathToCommandOutput, "w")
runCommand(command, [], standardOut=fout)
fout.close()
except IOError:
message = "There was an error writing the command output for %s to the file %s." %(command, pathToCommandOutput)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
# Write the status of all the nodes in the cluster out.
if (runCommand("which", ["cman_tool"])):
command = "cman_tool"
pathToCommandOutput = os.path.join(pathToDSTDir, "cman_tool_status")
try:
fout = open(pathToCommandOutput, "w")
runCommand(command, ["status"], standardOut=fout)
fout.close()
except IOError:
message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
elif (runCommand("which", ["corosync-cmapctl"])):
command = "corosync-quorumtool"
pathToCommandOutput = os.path.join(pathToDSTDir, "corosync-quorumtool_l")
try:
fout = open(pathToCommandOutput, "w")
runCommand(command, ["-l"], standardOut=fout)
fout.close()
except IOError:
message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
# #####################################################################
# Gather Process Information
# #####################################################################
def isProcPidStackEnabled(pathToPidData):
"""
Returns true if the init process has the file "stack" in its pid data
directory which contains the task functions for that process.
@return: Returns true if the init process has the file "stack" in its pid
data directory which contains the task functions for that process.
@rtype: Boolean
@param pathToPidData: The path to the directory where all the pid data
directories are located.
@type pathToPidData: String
"""
return os.path.exists(os.path.join(pathToPidData, "1/stack"))
def gatherPidData(pathToPidData, pathToDSTDir):
"""
This command will gather all the directories which contain data about all the pids.
@return: Returns a list of paths to the directory that contains the
information about the pid.
@rtype: Array
@param pathToPidData: The path to the directory where all the pid data
directories are located.
@type pathToPidData: String
"""
# Status has: command name, pid, ppid, state, possibly registers
listOfFilesToCopy = ["cmdline", "stack", "status"]
listOfPathToPidsData = []
if (os.path.exists(pathToPidData)):
for srcFilename in os.listdir(pathToPidData):
pathToPidDirDST = os.path.join(pathToDSTDir, srcFilename)
if (srcFilename.isdigit()):
pathToSrcDir = os.path.join(pathToPidData, srcFilename)
for filenameToCopy in listOfFilesToCopy:
copyFile(os.path.join(pathToSrcDir, filenameToCopy), os.path.join(pathToPidDirDST, filenameToCopy))
if (os.path.exists(pathToPidDirDST)):
listOfPathToPidsData.append(pathToPidDirDST)
return listOfPathToPidsData
def triggerSysRQEvents():
"""
This command will trigger sysrq events which will write the output to
/var/log/messages. The events that will be trigger are "m" and "t". The "m"
event will dump information about memory allocation. The "t" event will dump
all the threads state information.
"""
command = "echo"
pathToSysrqTriggerFile = "/proc/sysrq-trigger"
# m - dump information about memory allocation
# t - dump thread state information
# triggers = ["m", "t"]
triggers = ["t"]
for trigger in triggers:
try:
fout = open(pathToSysrqTriggerFile, "w")
runCommand(command, [trigger], standardOut=fout)
fout.close()
except IOError:
message = "There was an error writing the command output for %s to the file %s." %(command, pathToSysrqTriggerFile)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
# #####################################################################
# Gather lockdumps and logs
# #####################################################################
def gatherLogs(pathToDSTDir):
"""
This function will copy all the cluster logs(/var/log/cluster) and the
system log(/var/log/messages) to the directory given by pathToDSTDir.
@param pathToDSTDir: This is the path to directory where the files will be
copied to.
@type pathToDSTDir: String
"""
pathToLogFile = "/var/log/messages"
pathToDSTLogFile = os.path.join(pathToDSTDir, os.path.basename(pathToLogFile))
copyFile(pathToLogFile, pathToDSTLogFile)
pathToLogDir = "/var/log/cluster"
if (os.path.exists(pathToLogDir)):
pathToDSTLogDir = os.path.join(pathToDSTDir, os.path.basename(pathToLogDir))
copyDirectory(pathToLogDir, pathToDSTDir)
def gatherDLMLockDumps(pathToDSTDir, lockspaceNames):
"""
This function copies all the debug files for dlm and sorts them into their
own directory based on name of dlm lockspace.
@param pathToDSTDir: This is the path to directory where the files will be
copied to.
@type pathToDSTDir: String
@param lockspaceNames: This is the list of DLM lockspaces that will have
their debug directory copied.
@type lockspaceNames: Array
"""
# This function assumes that verifiedLockspaceNames has already been called
# to verify the lockspace does exist.
lockDumpType = "dlm"
pathToSrcDir = os.path.join(PATH_TO_DEBUG_DIR, lockDumpType)
pathToOutputDir = os.path.join(pathToDSTDir, lockDumpType)
message = "Copying the files in the %s lockdump data directory %s." %(lockDumpType.upper(), pathToSrcDir)
logging.getLogger(MAIN_LOGGER_NAME).debug(message)
# Get list of all the dlm lockspaces
if (os.path.exists(pathToSrcDir)):
for filename in os.listdir(pathToSrcDir):
for lockspaceName in lockspaceNames:
if (filename.startswith(lockspaceName)):
copyFile(os.path.join(pathToSrcDir, filename),
os.path.join(os.path.join(pathToOutputDir, lockspaceName), filename))
# Run dlm_tool lockdebug against the lockspace names and write to file.
for lockspaceName in lockspaceNames:
dstDir = os.path.join(pathToOutputDir, lockspaceName)
if (mkdirs(dstDir)):
pathToCommandOutput = os.path.join(dstDir,"%s_lockdebug" %(lockspaceName))
try:
fout = open(pathToCommandOutput, "w")
runCommand("dlm_tool", ["lockdebug", "-v", "-s", "-w", lockspaceName], standardOut=fout)
fout.close()
except IOError:
message = "There was an error writing the command output to the file %s." %(pathToCommandOutput)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
def gatherGFS2LockDumps(pathToDSTDir, listOfGFS2Filesystems):
"""
This function copies the debug directory for a GFS2 filesystems in the list
to a directory. The list of GFS2 filesystems will include the cluster name
and filesystem name for each item in the list. For example:
"f18cluster:mygfs2vol1"
@return: Returns True if files(not directories) were copied to the
destination directory.
@rtype: Boolean
@param pathToDSTDir: This is the path to directory where the files will be
copied to.
@type pathToDSTDir: String
@param listOfGFS2Filesystems: This is the list of the GFS2 filesystems that
will have their debug directory copied.
@type listOfGFS2Filesystems: Array
"""
lockDumpType = "gfs2"
pathToSrcDir = os.path.join(PATH_TO_DEBUG_DIR, lockDumpType)
pathToOutputDir = os.path.join(pathToDSTDir, lockDumpType)
# The number of files that were copied
fileCopiedCount = 0
if (not os.path.exists(pathToSrcDir)):
return False
for dirName in os.listdir(pathToSrcDir):
pathToCurrentDir = os.path.join(pathToSrcDir, dirName)
if ((os.path.isdir(pathToCurrentDir)) and (dirName in listOfGFS2Filesystems)):
message = "Copying the lockdump data for the %s filesystem: %s" %(lockDumpType.upper(), dirName)
logging.getLogger(MAIN_LOGGER_NAME).debug(message)
copySuccessful = copyDirectory(pathToCurrentDir, pathToOutputDir)
if (copySuccessful and os.path.exists(os.path.join(pathToOutputDir, dirName))):
fileCopiedCount = len(os.listdir(os.path.join(pathToOutputDir, dirName)))
# If the number of files(not directories) copied was greater than zero then files were copied
# succesfully.
return (fileCopiedCount > 0)
# ##############################################################################
# Get user selected options
# ##############################################################################
def __getOptions(version) :
"""
This function creates the OptionParser and returns commandline
a tuple of the selected commandline options and commandline args.
The cmdlineOpts which is the options user selected and cmdLineArgs
is value passed and not associated with an option.
@return: A tuple of the selected commandline options and commandline args.
@rtype: Tuple
@param version: The version of the this script.
@type version: String
"""
cmdParser = OptionParserExtended(version)
cmdParser.add_option("-d", "--debug",
action="store_true",
dest="enableDebugLogging",
help="enables debug logging",
default=False)
cmdParser.add_option("-q", "--quiet",
action="store_true",
dest="disableLoggingToConsole",
help="disables logging to console",
default=False)
cmdParser.add_option("-y", "--no_ask",
action="store_true",
dest="disableQuestions",
help="disables all questions and assumes yes",
default=False)
cmdParser.add_option("-i", "--info",
action="store_true",
dest="enablePrintInfo",
help="prints information about the mounted GFS2 file-systems",
default=False)
cmdParser.add_option("-P", "--disable_process_gather",
action="store_true",
dest="disableProcessGather",
help="the gathering of process information will be disabled",
default=False)
cmdParser.add_option("-o", "--path_to_output_dir",
action="store",
dest="pathToOutputDir",
help="the directory where all the collect data will be stored",
type="string",
metavar="<output directory>",
default="")
cmdParser.add_option("-r", "--num_of_runs",
action="store",
dest="numberOfRuns",
help="number of runs capturing the lockdump data(default: 3 runs)",
type="int",
metavar="<number of runs>",
default=3)
cmdParser.add_option("-s", "--seconds_sleep",
action="store",
dest="secondsToSleep",
help="number of seconds to sleep between runs of capturing the lockdump data(default: 120 seconds)",
type="int",
metavar="<seconds to sleep>",
default=120)
cmdParser.add_option("-n", "--fs_name",
action="extend",
dest="listOfGFS2Names",
help="name of the GFS2 filesystem(s) that will have their lockdump data captured(default: all GFS2 file-systems will be captured)",
type="string",
metavar="<name of GFS2 filesystem>",
default=[])
# Get the options and return the result.
(cmdLineOpts, cmdLineArgs) = cmdParser.parse_args()
return (cmdLineOpts, cmdLineArgs)
# ##############################################################################
# OptParse classes for commandline options
# ##############################################################################
class OptionParserExtended(OptionParser):
"""
This is the class that gets the command line options the end user
selects.
"""
def __init__(self, version) :
"""
@param version: The version of the this script.
@type version: String
"""
self.__commandName = os.path.basename(sys.argv[0])
versionMessage = "%s %s\n" %(self.__commandName, version)
commandDescription ="%s gfs2_lockcapture will capture locking information from GFS2 file systems and DLM.\n"%(self.__commandName)
OptionParser.__init__(self, option_class=ExtendOption,
version=versionMessage,
description=commandDescription)
def print_help(self):
"""
Print examples at the bottom of the help message.
"""
self.print_version()
examplesMessage = "\n"
examplesMessage = "\nPrints information about the available GFS2 filesystems that can have lockdump data captured."
examplesMessage += "\n# %s -i\n" %(self.__commandName)
examplesMessage += "\nIt will do 3 runs of gathering the lockdump information in 10 second intervals for only the"
examplesMessage += "\nGFS2 filesystems with the names myGFS2vol2,myGFS2vol1. Then it will archive and compress"
examplesMessage += "\nthe data collected in the output directory:"
examplesMessage += "\n/tmp/cluster42-gfs2_lockcapture and all the questions will be answered with yes.\n"
examplesMessage += "\n# %s -r 3 -s 10 -n myGFS2vol2,myGFS2vol1 -o /tmp/cluster42-gfs2_lockcapture -y\n" %(self.__commandName)
examplesMessage += "\nIt will do 2 runs of gathering the lockdump information in 25 second intervals for all the"
examplesMessage += "\nmounted GFS2 filesystems. The gathering process data will be disabled. Then it will archive and compress"
examplesMessage += "\nthe data collected in the output directory:"
examplesMessage += "\n/tmp/cluster42-gfs2_lockcapture and all the questions will be answered with yes.\n"
examplesMessage += "\n# %s -r 2 -s 25 -P -o /tmp/cluster42-gfs2_lockcapture\n" %(self.__commandName)
OptionParser.print_help(self)
print examplesMessage
class ExtendOption (Option):
"""
Allow to specify comma delimited list of entries for arrays
and dictionaries.
"""
ACTIONS = Option.ACTIONS + ("extend",)
STORE_ACTIONS = Option.STORE_ACTIONS + ("extend",)
TYPED_ACTIONS = Option.TYPED_ACTIONS + ("extend",)
def take_action(self, action, dest, opt, value, values, parser):
"""
This function is a wrapper to take certain options passed on command
prompt and wrap them into an Array.
@param action: The type of action that will be taken. For example:
"store_true", "store_false", "extend".
@type action: String
@param dest: The name of the variable that will be used to store the
option.
@type dest: String/Boolean/Array
@param opt: The option string that triggered the action.
@type opt: String
@param value: The value of opt(option) if it takes a
value, if not then None.
@type value:
@param values: All the opt(options) in a dictionary.
@type values: Dictionary
@param parser: The option parser that was orginally called.
@type parser: OptionParser
"""
if (action == "extend") :
valueList = []
try:
for v in value.split(","):
# Need to add code for dealing with paths if there is option for paths.
newValue = value.strip().rstrip()
if (len(newValue) > 0):
valueList.append(newValue)
except:
pass
else:
values.ensure_value(dest, []).extend(valueList)
else:
Option.take_action(self, action, dest, opt, value, values, parser)
# ###############################################################################
# Main Function
# ###############################################################################
if __name__ == "__main__":
"""
When the script is executed then this code is ran. If there was files(not
directories) created then 0 will be returned, else a 1 is returned.
"""
try:
# #######################################################################
# Get the options from the commandline.
# #######################################################################
(cmdLineOpts, cmdLineArgs) = __getOptions(VERSION_NUMBER)
# #######################################################################
# Setup the logger and create config directory
# #######################################################################
# Create the logger
logLevel = logging.INFO
logger = logging.getLogger(MAIN_LOGGER_NAME)
logger.setLevel(logLevel)
# Create a new status function and level.
logging.STATUS = logging.INFO + 2
logging.addLevelName(logging.STATUS, "STATUS")
# Log to main system logger that script has started then close the
# handler before the other handlers are created.
sysLogHandler = logging.handlers.SysLogHandler(address = '/dev/log')
logger.addHandler(sysLogHandler)
logger.info("Capturing of the data to analyze GFS2 lockdumps.")
logger.removeHandler(sysLogHandler)
# Create a function for the STATUS_LEVEL since not defined by python. This
# means you can call it like the other predefined message
# functions. Example: logging.getLogger("loggerName").status(message)
setattr(logger, "status", lambda *args: logger.log(logging.STATUS, *args))
streamHandler = logging.StreamHandler()
streamHandler.setLevel(logLevel)
streamHandler.setFormatter(logging.Formatter("%(levelname)s %(message)s"))
logger.addHandler(streamHandler)
# Please note there will not be a global log file created. If a log file
# is needed then redirect the output. There will be a log file created
# for each run in the corresponding directory.
# #######################################################################
# Set the logging levels.
# #######################################################################
if ((cmdLineOpts.enableDebugLogging) and (not cmdLineOpts.disableLoggingToConsole)):
logging.getLogger(MAIN_LOGGER_NAME).setLevel(logging.DEBUG)
streamHandler.setLevel(logging.DEBUG)
message = "Debugging has been enabled."
logging.getLogger(MAIN_LOGGER_NAME).debug(message)
if (cmdLineOpts.disableLoggingToConsole):
logging.disable(logging.CRITICAL)
# #######################################################################
# Check to see if pid file exists and error if it does.
# #######################################################################
if (os.path.exists(PATH_TO_PID_FILENAME)):
message = "The PID file %s already exists and this script cannot run till it does not exist." %(PATH_TO_PID_FILENAME)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
message = "Verify that there are no other existing processes running. If there are running processes those need to be stopped first and the file removed."
logging.getLogger(MAIN_LOGGER_NAME).info(message)
exitScript(removePidFile=False, errorCode=1)
else:
message = "Creating the pid file: %s" %(PATH_TO_PID_FILENAME)
logging.getLogger(MAIN_LOGGER_NAME).debug(message)
# Creata the pid file so we dont have more than 1 process of this
# script running.
writeToFile(PATH_TO_PID_FILENAME, str(os.getpid()), createFile=True)
# #######################################################################
# Get the clusternode name and verify that mounted GFS2 filesystems were
# found.
# #######################################################################
clusternode = getClusterNode(cmdLineOpts.listOfGFS2Names)
if (clusternode == None):
message = "The cluster or cluster node name could not be found."
logging.getLogger(MAIN_LOGGER_NAME).error(message)
exitScript(removePidFile=True, errorCode=1)
elif (not len(clusternode.getMountedGFS2FilesystemNames()) > 0):
message = "There were no mounted GFS2 filesystems found."
if (len(cmdLineOpts.listOfGFS2Names) > 0):
message = "There were no mounted GFS2 filesystems found with the name:"
for name in cmdLineOpts.listOfGFS2Names:
message += " %s" %(name)
message += "."
logging.getLogger(MAIN_LOGGER_NAME).error(message)
if (cmdLineOpts.enablePrintInfo):
logging.disable(logging.CRITICAL)
print "List of all the mounted GFS2 filesystems that can have their lockdump data captured:"
print clusternode
exitScript()
# #######################################################################
# Verify they want to continue because this script will trigger sysrq events.
# #######################################################################
if (not cmdLineOpts.disableQuestions):
valid = {"yes":True, "y":True, "no":False, "n":False}
question = "This script will trigger a sysrq -t event or collect the data for each pid directory located in /proc for each run. Are you sure you want to continue?"
prompt = " [y/n] "
while True:
sys.stdout.write(question + prompt)
choice = raw_input().lower()
if (choice in valid):
if (valid.get(choice)):
# If yes, or y then exit loop and continue.
break
else:
message = "The script will not continue since you chose not to continue."
logging.getLogger(MAIN_LOGGER_NAME).error(message)
exitScript(removePidFile=True, errorCode=1)
else:
sys.stdout.write("Please respond with '(y)es' or '(n)o'.\n")
# #######################################################################
# Create the output directory to verify it can be created before
# proceeding unless it is already created from a previous run data needs
# to be analyzed. Probably could add more debugging on if file or dir.
# #######################################################################
pathToOutputDir = cmdLineOpts.pathToOutputDir
if (not len(pathToOutputDir) > 0):
pathToOutputDir = "%s" %(os.path.join("/tmp", "%s-%s" %(time.strftime("%Y-%m-%d_%H%M%S"), os.path.basename(sys.argv[0]))))
# #######################################################################
# Backup any existing directory with same name as current output
# directory.
# #######################################################################
if (backupOutputDirectory(pathToOutputDir)):
message = "This directory that will be used to capture all the data: %s" %(pathToOutputDir)
logging.getLogger(MAIN_LOGGER_NAME).info(message)
if (not mkdirs(pathToOutputDir)):
exitScript(errorCode=1)
else:
# There was an existing directory with same path as current output
# directory and it failed to back it up.
message = "Please change the output directory path (-o) or manual rename or remove the existing path: %s" %(pathToOutputDir)
logging.getLogger(MAIN_LOGGER_NAME).info(message)
exitScript(errorCode=1)
# #######################################################################
# Check to see if the debug directory is mounted. If not then
# log an error.
# #######################################################################
if(mountFilesystem("debugfs", "none", PATH_TO_DEBUG_DIR)):
message = "The debug filesystem %s is mounted." %(PATH_TO_DEBUG_DIR)
logging.getLogger(MAIN_LOGGER_NAME).info(message)
else:
message = "There was a problem mounting the debug filesystem: %s" %(PATH_TO_DEBUG_DIR)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
message = "The debug filesystem is required to be mounted for this script to run."
logging.getLogger(MAIN_LOGGER_NAME).info(message)
exitScript(errorCode=1)
# #######################################################################
# Gather data and the lockdumps.
# #######################################################################
if (cmdLineOpts.numberOfRuns <= 0):
message = "The number of runs must be greater than zero."
logging.getLogger(MAIN_LOGGER_NAME).warning(message)
exitScript(errorCode=1)
# If GFS2 lockdump files were successfully copied to output directory
# then the exit code will be set to 0, else the exit code will be 1.
exitCode = 1
for i in range(1,(cmdLineOpts.numberOfRuns + 1)):
# The current log count that will start at 1 and not zero to make it
# make sense in logs.
# Add clusternode name under each run dir to make combining multple
# clusternode gfs2_lockgather data together and all data in each run directory.
pathToOutputRunDir = os.path.join(pathToOutputDir, "run%d/%s" %(i, clusternode.getClusterNodeName()))
# Create the the directory that will be used to capture the data.
if (not mkdirs(pathToOutputRunDir)):
exitScript(errorCode=1)
# Set the handler for writing to log file for this run.
currentRunFileHandler = None
pathToLogFile = os.path.join(pathToOutputRunDir, "%s.log" %(MAIN_LOGGER_NAME))
if (((os.access(pathToLogFile, os.W_OK) and os.access("/tmp", os.R_OK))) or (not os.path.exists(pathToLogFile))):
currentRunFileHandler = logging.FileHandler(pathToLogFile)
currentRunFileHandler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s", "%Y-%m-%d %H:%M:%S"))
logging.getLogger(MAIN_LOGGER_NAME).addHandler(currentRunFileHandler)
message = "Pass (%d/%d): Gathering all the lockdump data." %(i, cmdLineOpts.numberOfRuns)
logging.getLogger(MAIN_LOGGER_NAME).status(message)
# Gather various bits of data from the clusternode.
message = "Pass (%d/%d): Gathering general information about the host." %(i, cmdLineOpts.numberOfRuns)
logging.getLogger(MAIN_LOGGER_NAME).debug(message)
gatherGeneralInformation(pathToOutputRunDir)
# Write the clusternode name and id to the general information file.
writeToFile(os.path.join(pathToOutputRunDir, "hostinformation.txt"),
"NODE_NAME=%s\nNODE_ID=%d" %(clusternode.getClusterNodeName(), clusternode.getClusterNodeID()),
appendToFile=True, createFile=True)
# Going to sleep for 2 seconds, so that TIMESTAMP should be in the
# past in the logs so that capturing sysrq data will be guaranteed.
time.sleep(2)
# If enabled then gather the process data.
if (not cmdLineOpts.disableProcessGather):
# Gather the backtraces for all the pids, by grabbing the /proc/<pid
# number> or triggering sysrq events to capture task bask traces
# from log.
# Gather the data in the /proc/<pid> directory if the file
# </proc/<pid>/stack exists. If file exists we will not trigger
# sysrq events.
# Should I gather anyhow and only capture sysrq if needed.
pathToPidData = "/proc"
if (isProcPidStackEnabled(pathToPidData)):
message = "Pass (%d/%d): Triggering the capture of all pid directories in %s." %(i, cmdLineOpts.numberOfRuns, pathToPidData)
logging.getLogger(MAIN_LOGGER_NAME).debug(message)
gatherPidData(pathToPidData, os.path.join(pathToOutputRunDir, pathToPidData.strip("/")))
else:
message = "Pass (%d/%d): Triggering the sysrq events for the host since stack was not captured in pid directory." %(i, cmdLineOpts.numberOfRuns)
logging.getLogger(MAIN_LOGGER_NAME).debug(message)
triggerSysRQEvents()
# #######################################################################
# Gather the DLM data and lock-dumps
# #######################################################################
# Gather data for the DLM lockspaces that are found.
lockspaceNames = clusternode.getMountedGFS2FilesystemNames(includeClusterName=False)
# In addition always gather these lockspaces(if they exist).
lockspaceNames.append("clvmd")
lockspaceNames.append("rgmanager")
# Verify that these lockspace names exist.
lockspaceNames = getVerifiedDLMLockspaceNames(lockspaceNames)
# Gather the dlm locks.
message = "Pass (%d/%d): Gathering the DLM lock-dumps for the host." %(i, cmdLineOpts.numberOfRuns)
logging.getLogger(MAIN_LOGGER_NAME).debug(message)
# Add other notable lockspace names that should be captured if they exist.
gatherDLMLockDumps(pathToOutputRunDir, lockspaceNames)
# #######################################################################
# Gather the GFS2 data and lock-dumps
# #######################################################################
# Gather the glock locks from gfs2.
message = "Pass (%d/%d): Gathering the GFS2 lock-dumps for the host." %(i, cmdLineOpts.numberOfRuns)
logging.getLogger(MAIN_LOGGER_NAME).debug(message)
if(gatherGFS2LockDumps(pathToOutputRunDir, clusternode.getMountedGFS2FilesystemNames())):
exitCode = 0
# Gather log files
message = "Pass (%d/%d): Gathering the log files for the host." %(i, cmdLineOpts.numberOfRuns)
logging.getLogger(MAIN_LOGGER_NAME).debug(message)
gatherLogs(os.path.join(pathToOutputRunDir, "logs"))
# Sleep between each run if secondsToSleep is greater than or equal
# to 0 and current run is not the last run.
if ((cmdLineOpts.secondsToSleep >= 0) and (i < (cmdLineOpts.numberOfRuns))):
message = "The script will sleep for %d seconds between each run of capturing the lockdump data." %(cmdLineOpts.secondsToSleep)
logging.getLogger(MAIN_LOGGER_NAME).info(message)
time.sleep(cmdLineOpts.secondsToSleep)
# Remove the handler:
logging.getLogger(MAIN_LOGGER_NAME).removeHandler(currentRunFileHandler)
# #######################################################################
# Archive the directory that contains all the data and archive it after
# all the information has been gathered.
# #######################################################################
message = "All the files have been gathered and this directory contains all the captured data: %s" %(pathToOutputDir)
logging.getLogger(MAIN_LOGGER_NAME).info(message)
message = "The lockdump data will now be archive. This could some time depending on the size of the data collected."
logging.getLogger(MAIN_LOGGER_NAME).info(message)
pathToTarFilename = archiveData(pathToOutputDir)
if (os.path.exists(pathToTarFilename)):
message = "The compressed archvied file was created: %s" %(pathToTarFilename)
logging.getLogger(MAIN_LOGGER_NAME).info(message)
# Do some cleanup by removing the directory of the data if file archived file was created.
try:
shutil.rmtree(pathToOutputDir)
except OSError:
message = "There was an error removing the directory: %s." %(pathToOutputDir)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
else:
message = "The compressed archvied failed to be created: %s" %(pathToTarFilename)
logging.getLogger(MAIN_LOGGER_NAME).error(message)
# #######################################################################
except KeyboardInterrupt:
print ""
message = "This script will exit since control-c was executed by end user."
logging.getLogger(MAIN_LOGGER_NAME).error(message)
exitScript(errorCode=1)
# #######################################################################
# Exit the application with zero exit code since we cleanly exited.
# #######################################################################
exitScript(errorCode=exitCode)
|