This file is indexed.

/usr/share/pyshared/CedarBackup2/filesystem.py is in cedar-backup2 2.27.0-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

   1
   2
   3
   4
   5
   6
   7
   8
   9
  10
  11
  12
  13
  14
  15
  16
  17
  18
  19
  20
  21
  22
  23
  24
  25
  26
  27
  28
  29
  30
  31
  32
  33
  34
  35
  36
  37
  38
  39
  40
  41
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
# -*- coding: iso-8859-1 -*-
# vim: set ft=python ts=3 sw=3 expandtab:
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#
#              C E D A R
#          S O L U T I O N S       "Software done right."
#           S O F T W A R E
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#
# Copyright (c) 2004-2008,2010 Kenneth J. Pronovici.
# All rights reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License,
# Version 2, as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Copies of the GNU General Public License are available from
# the Free Software Foundation website, http://www.gnu.org/.
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#
# Author   : Kenneth J. Pronovici <pronovic@ieee.org>
# Language : Python 2 (>= 2.7)
# Project  : Cedar Backup, release 2
# Purpose  : Provides filesystem-related objects.
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

########################################################################
# Module documentation
########################################################################

"""
Provides filesystem-related objects.
@sort: FilesystemList, BackupFileList, PurgeItemList
@author: Kenneth J. Pronovici <pronovic@ieee.org>
"""


########################################################################
# Imported modules
########################################################################

# System modules
import os
import re
import math
import logging
import tarfile

# Cedar Backup modules
from CedarBackup2.knapsack import firstFit, bestFit, worstFit, alternateFit
from CedarBackup2.util import AbsolutePathList, UnorderedList, RegexList
from CedarBackup2.util import removeKeys, displayBytes, calculateFileAge, encodePath, dereferenceLink


########################################################################
# Module-wide variables
########################################################################

logger = logging.getLogger("CedarBackup2.log.filesystem")


########################################################################
# FilesystemList class definition
########################################################################

class FilesystemList(list):

   ######################
   # Class documentation
   ######################

   """
   Represents a list of filesystem items.

   This is a generic class that represents a list of filesystem items.  Callers
   can add individual files or directories to the list, or can recursively add
   the contents of a directory.  The class also allows for up-front exclusions
   in several forms (all files, all directories, all items matching a pattern,
   all items whose basename matches a pattern, or all directories containing a
   specific "ignore file").  Symbolic links are typically backed up
   non-recursively, i.e. the link to a directory is backed up, but not the
   contents of that link (we don't want to deal with recursive loops, etc.).

   The custom methods such as L{addFile} will only add items if they exist on
   the filesystem and do not match any exclusions that are already in place.
   However, since a FilesystemList is a subclass of Python's standard list
   class, callers can also add items to the list in the usual way, using
   methods like C{append()} or C{insert()}.  No validations apply to items
   added to the list in this way; however, many list-manipulation methods deal
   "gracefully" with items that don't exist in the filesystem, often by
   ignoring them.

   Once a list has been created, callers can remove individual items from the
   list using standard methods like C{pop()} or C{remove()} or they can use
   custom methods to remove specific types of entries or entries which match a
   particular pattern.

   @note: Regular expression patterns that apply to paths are assumed to be
   bounded at front and back by the beginning and end of the string, i.e. they
   are treated as if they begin with C{^} and end with C{$}.  This is true
   whether we are matching a complete path or a basename.

   @note: Some platforms, like Windows, do not support soft links.  On those
   platforms, the ignore-soft-links flag can be set, but it won't do any good
   because the operating system never reports a file as a soft link.

   @sort: __init__, addFile, addDir, addDirContents, removeFiles, removeDirs,
          removeLinks, removeMatch, removeInvalid, normalize,
          excludeFiles, excludeDirs, excludeLinks, excludePaths,
          excludePatterns, excludeBasenamePatterns, ignoreFile
   """


   ##############
   # Constructor
   ##############

   def __init__(self):
      """Initializes a list with no configured exclusions."""
      list.__init__(self)
      self._excludeFiles = False
      self._excludeDirs = False
      self._excludeLinks = False
      self._excludePaths = None
      self._excludePatterns = None
      self._excludeBasenamePatterns = None
      self._ignoreFile = None
      self.excludeFiles = False
      self.excludeLinks = False
      self.excludeDirs = False
      self.excludePaths = []
      self.excludePatterns = RegexList()
      self.excludeBasenamePatterns = RegexList()
      self.ignoreFile = None


   #############
   # Properties
   #############

   def _setExcludeFiles(self, value):
      """
      Property target used to set the exclude files flag.
      No validations, but we normalize the value to C{True} or C{False}.
      """
      if value:
         self._excludeFiles = True
      else:
         self._excludeFiles = False

   def _getExcludeFiles(self):
      """
      Property target used to get the exclude files flag.
      """
      return self._excludeFiles

   def _setExcludeDirs(self, value):
      """
      Property target used to set the exclude directories flag.
      No validations, but we normalize the value to C{True} or C{False}.
      """
      if value:
         self._excludeDirs = True
      else:
         self._excludeDirs = False

   def _getExcludeDirs(self):
      """
      Property target used to get the exclude directories flag.
      """
      return self._excludeDirs

   def _setExcludeLinks(self, value):
      """
      Property target used to set the exclude soft links flag.
      No validations, but we normalize the value to C{True} or C{False}.
      """
      if value:
         self._excludeLinks = True
      else:
         self._excludeLinks = False

   def _getExcludeLinks(self):
      """
      Property target used to get the exclude soft links flag.
      """
      return self._excludeLinks

   def _setExcludePaths(self, value):
      """
      Property target used to set the exclude paths list.
      A C{None} value is converted to an empty list.
      Elements do not have to exist on disk at the time of assignment.
      @raise ValueError: If any list element is not an absolute path.
      """
      self._excludePaths = AbsolutePathList()
      if value is not None:
         self._excludePaths.extend(value)

   def _getExcludePaths(self):
      """
      Property target used to get the absolute exclude paths list.
      """
      return self._excludePaths

   def _setExcludePatterns(self, value):
      """
      Property target used to set the exclude patterns list.
      A C{None} value is converted to an empty list.
      """
      self._excludePatterns = RegexList()
      if value is not None:
         self._excludePatterns.extend(value)

   def _getExcludePatterns(self):
      """
      Property target used to get the exclude patterns list.
      """
      return self._excludePatterns

   def _setExcludeBasenamePatterns(self, value):
      """
      Property target used to set the exclude basename patterns list.
      A C{None} value is converted to an empty list.
      """
      self._excludeBasenamePatterns = RegexList()
      if value is not None:
         self._excludeBasenamePatterns.extend(value)

   def _getExcludeBasenamePatterns(self):
      """
      Property target used to get the exclude basename patterns list.
      """
      return self._excludeBasenamePatterns

   def _setIgnoreFile(self, value):
      """
      Property target used to set the ignore file.
      The value must be a non-empty string if it is not C{None}.
      @raise ValueError: If the value is an empty string.
      """
      if value is not None:
         if len(value) < 1:
            raise ValueError("The ignore file must be a non-empty string.")
      self._ignoreFile = value

   def _getIgnoreFile(self):
      """
      Property target used to get the ignore file.
      """
      return self._ignoreFile

   excludeFiles = property(_getExcludeFiles, _setExcludeFiles, None, "Boolean indicating whether files should be excluded.")
   excludeDirs = property(_getExcludeDirs, _setExcludeDirs, None, "Boolean indicating whether directories should be excluded.")
   excludeLinks = property(_getExcludeLinks, _setExcludeLinks, None, "Boolean indicating whether soft links should be excluded.")
   excludePaths = property(_getExcludePaths, _setExcludePaths, None, "List of absolute paths to be excluded.")
   excludePatterns = property(_getExcludePatterns, _setExcludePatterns, None,
                              "List of regular expression patterns (matching complete path) to be excluded.")
   excludeBasenamePatterns = property(_getExcludeBasenamePatterns, _setExcludeBasenamePatterns,
                                      None, "List of regular expression patterns (matching basename) to be excluded.")
   ignoreFile = property(_getIgnoreFile, _setIgnoreFile, None, "Name of file which will cause directory contents to be ignored.")


   ##############
   # Add methods
   ##############

   def addFile(self, path):
      """
      Adds a file to the list.

      The path must exist and must be a file or a link to an existing file.  It
      will be added to the list subject to any exclusions that are in place.

      @param path: File path to be added to the list
      @type path: String representing a path on disk

      @return: Number of items added to the list.

      @raise ValueError: If path is not a file or does not exist.
      @raise ValueError: If the path could not be encoded properly.
      """
      path = encodePath(path)
      if not os.path.exists(path) or not os.path.isfile(path):
         logger.debug("Path [%s] is not a file or does not exist on disk.", path)
         raise ValueError("Path is not a file or does not exist on disk.")
      if self.excludeLinks and os.path.islink(path):
         logger.debug("Path [%s] is excluded based on excludeLinks.", path)
         return 0
      if self.excludeFiles:
         logger.debug("Path [%s] is excluded based on excludeFiles.", path)
         return 0
      if path in self.excludePaths:
         logger.debug("Path [%s] is excluded based on excludePaths.", path)
         return 0
      for pattern in self.excludePatterns:
         pattern = encodePath(pattern)  # use same encoding as filenames
         if re.compile(r"^%s$" % pattern).match(path): # safe to assume all are valid due to RegexList
            logger.debug("Path [%s] is excluded based on pattern [%s].", path, pattern)
            return 0
      for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList
         pattern = encodePath(pattern)  # use same encoding as filenames
         if re.compile(r"^%s$" % pattern).match(os.path.basename(path)):
            logger.debug("Path [%s] is excluded based on basename pattern [%s].", path, pattern)
            return 0
      self.append(path)
      logger.debug("Added file to list: [%s]", path)
      return 1

   def addDir(self, path):
      """
      Adds a directory to the list.

      The path must exist and must be a directory or a link to an existing
      directory.  It will be added to the list subject to any exclusions that
      are in place.  The L{ignoreFile} does not apply to this method, only to
      L{addDirContents}.

      @param path: Directory path to be added to the list
      @type path: String representing a path on disk

      @return: Number of items added to the list.

      @raise ValueError: If path is not a directory or does not exist.
      @raise ValueError: If the path could not be encoded properly.
      """
      path = encodePath(path)
      path = normalizeDir(path)
      if not os.path.exists(path) or not os.path.isdir(path):
         logger.debug("Path [%s] is not a directory or does not exist on disk.", path)
         raise ValueError("Path is not a directory or does not exist on disk.")
      if self.excludeLinks and os.path.islink(path):
         logger.debug("Path [%s] is excluded based on excludeLinks.", path)
         return 0
      if self.excludeDirs:
         logger.debug("Path [%s] is excluded based on excludeDirs.", path)
         return 0
      if path in self.excludePaths:
         logger.debug("Path [%s] is excluded based on excludePaths.", path)
         return 0
      for pattern in self.excludePatterns: # safe to assume all are valid due to RegexList
         pattern = encodePath(pattern)  # use same encoding as filenames
         if re.compile(r"^%s$" % pattern).match(path):
            logger.debug("Path [%s] is excluded based on pattern [%s].", path, pattern)
            return 0
      for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList
         pattern = encodePath(pattern)  # use same encoding as filenames
         if re.compile(r"^%s$" % pattern).match(os.path.basename(path)):
            logger.debug("Path [%s] is excluded based on basename pattern [%s].", path, pattern)
            return 0
      self.append(path)
      logger.debug("Added directory to list: [%s]", path)
      return 1

   def addDirContents(self, path, recursive=True, addSelf=True, linkDepth=0, dereference=False):
      """
      Adds the contents of a directory to the list.

      The path must exist and must be a directory or a link to a directory.
      The contents of the directory (as well as the directory path itself) will
      be recursively added to the list, subject to any exclusions that are in
      place.  If you only want the directory and its immediate contents to be
      added, then pass in C{recursive=False}.

      @note: If a directory's absolute path matches an exclude pattern or path,
      or if the directory contains the configured ignore file, then the
      directory and all of its contents will be recursively excluded from the
      list.

      @note: If the passed-in directory happens to be a soft link, it will be
      recursed.  However, the linkDepth parameter controls whether any soft
      links I{within} the directory will be recursed.  The link depth is
      maximum depth of the tree at which soft links should be followed.  So, a
      depth of 0 does not follow any soft links, a depth of 1 follows only
      links within the passed-in directory, a depth of 2 follows the links at
      the next level down, etc.

      @note: Any invalid soft links (i.e.  soft links that point to
      non-existent items) will be silently ignored.

      @note: The L{excludeDirs} flag only controls whether any given directory
      path itself is added to the list once it has been discovered.  It does
      I{not} modify any behavior related to directory recursion.

      @note: If you call this method I{on a link to a directory} that link will
      never be dereferenced (it may, however, be followed).

      @param path: Directory path whose contents should be added to the list
      @type path: String representing a path on disk

      @param recursive: Indicates whether directory contents should be added recursively.
      @type recursive: Boolean value

      @param addSelf: Indicates whether the directory itself should be added to the list.
      @type addSelf: Boolean value

      @param linkDepth: Maximum depth of the tree at which soft links should be followed
      @type linkDepth: Integer value, where zero means not to follow any soft links

      @param dereference: Indicates whether soft links, if followed, should be dereferenced
      @type dereference: Boolean value

      @return: Number of items recursively added to the list

      @raise ValueError: If path is not a directory or does not exist.
      @raise ValueError: If the path could not be encoded properly.
      """
      path = encodePath(path)
      path = normalizeDir(path)
      return self._addDirContentsInternal(path, addSelf, recursive, linkDepth, dereference)

   def _addDirContentsInternal(self, path, includePath=True, recursive=True, linkDepth=0, dereference=False):
      """
      Internal implementation of C{addDirContents}.

      This internal implementation exists due to some refactoring.  Basically,
      some subclasses have a need to add the contents of a directory, but not
      the directory itself.  This is different than the standard C{FilesystemList}
      behavior and actually ends up making a special case out of the first
      call in the recursive chain.  Since I don't want to expose the modified
      interface, C{addDirContents} ends up being wholly implemented in terms
      of this method.

      The linkDepth parameter controls whether soft links are followed when we
      are adding the contents recursively.  Any recursive calls reduce the
      value by one.  If the value zero or less, then soft links will just be
      added as directories, but will not be followed.  This means that links
      are followed to a I{constant depth} starting from the top-most directory.

      There is one difference between soft links and directories: soft links
      that are added recursively are not placed into the list explicitly.  This
      is because if we do add the links recursively, the resulting tar file
      gets a little confused (it has a link and a directory with the same
      name).

      @note: If you call this method I{on a link to a directory} that link will
      never be dereferenced (it may, however, be followed).

      @param path: Directory path whose contents should be added to the list.
      @param includePath: Indicates whether to include the path as well as contents.
      @param recursive: Indicates whether directory contents should be added recursively.
      @param linkDepth: Depth of soft links that should be followed
      @param dereference: Indicates whether soft links, if followed, should be dereferenced

      @return: Number of items recursively added to the list

      @raise ValueError: If path is not a directory or does not exist.
      """
      added = 0
      if not os.path.exists(path) or not os.path.isdir(path):
         logger.debug("Path [%s] is not a directory or does not exist on disk.", path)
         raise ValueError("Path is not a directory or does not exist on disk.")
      if path in self.excludePaths:
         logger.debug("Path [%s] is excluded based on excludePaths.", path)
         return added
      for pattern in self.excludePatterns: # safe to assume all are valid due to RegexList
         pattern = encodePath(pattern)  # use same encoding as filenames
         if re.compile(r"^%s$" % pattern).match(path):
            logger.debug("Path [%s] is excluded based on pattern [%s].", path, pattern)
            return added
      for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList
         pattern = encodePath(pattern)  # use same encoding as filenames
         if re.compile(r"^%s$" % pattern).match(os.path.basename(path)):
            logger.debug("Path [%s] is excluded based on basename pattern [%s].", path, pattern)
            return added
      if self.ignoreFile is not None and os.path.exists(os.path.join(path, self.ignoreFile)):
         logger.debug("Path [%s] is excluded based on ignore file.", path)
         return added
      if includePath:
         added += self.addDir(path)    # could actually be excluded by addDir, yet
      for entry in os.listdir(path):
         entrypath = os.path.join(path, entry)
         if os.path.isfile(entrypath):
            if linkDepth > 0 and dereference:
               derefpath = dereferenceLink(entrypath)
               if derefpath != entrypath:
                  added += self.addFile(derefpath)
            added += self.addFile(entrypath)
         elif os.path.isdir(entrypath):
            if os.path.islink(entrypath):
               if recursive:
                  if linkDepth > 0:
                     newDepth = linkDepth - 1
                     if dereference:
                        derefpath = dereferenceLink(entrypath)
                        if derefpath != entrypath:
                           added += self._addDirContentsInternal(derefpath, True, recursive, newDepth, dereference)
                        added += self.addDir(entrypath)
                     else:
                        added += self._addDirContentsInternal(entrypath, False, recursive, newDepth, dereference)
                  else:
                     added += self.addDir(entrypath)
               else:
                  added += self.addDir(entrypath)
            else:
               if recursive:
                  newDepth = linkDepth - 1
                  added += self._addDirContentsInternal(entrypath, True, recursive, newDepth, dereference)
               else:
                  added += self.addDir(entrypath)
      return added


   #################
   # Remove methods
   #################

   def removeFiles(self, pattern=None):
      """
      Removes file entries from the list.

      If C{pattern} is not passed in or is C{None}, then all file entries will
      be removed from the list.  Otherwise, only those file entries matching
      the pattern will be removed.  Any entry which does not exist on disk
      will be ignored (use L{removeInvalid} to purge those entries).

      This method might be fairly slow for large lists, since it must check the
      type of each item in the list.  If you know ahead of time that you want
      to exclude all files, then you will be better off setting L{excludeFiles}
      to C{True} before adding items to the list.

      @param pattern: Regular expression pattern representing entries to remove

      @return: Number of entries removed
      @raise ValueError: If the passed-in pattern is not a valid regular expression.
      """
      removed = 0
      if pattern is None:
         for entry in self[:]:
            if os.path.exists(entry) and os.path.isfile(entry):
               self.remove(entry)
               logger.debug("Removed path [%s] from list.", entry)
               removed += 1
      else:
         try:
            pattern = encodePath(pattern)  # use same encoding as filenames
            compiled = re.compile(pattern)
         except re.error:
            raise ValueError("Pattern is not a valid regular expression.")
         for entry in self[:]:
            if os.path.exists(entry) and os.path.isfile(entry):
               if compiled.match(entry):
                  self.remove(entry)
                  logger.debug("Removed path [%s] from list.", entry)
                  removed += 1
      logger.debug("Removed a total of %d entries.", removed)
      return removed

   def removeDirs(self, pattern=None):
      """
      Removes directory entries from the list.

      If C{pattern} is not passed in or is C{None}, then all directory entries
      will be removed from the list.  Otherwise, only those directory entries
      matching the pattern will be removed.  Any entry which does not exist on
      disk will be ignored (use L{removeInvalid} to purge those entries).

      This method might be fairly slow for large lists, since it must check the
      type of each item in the list.  If you know ahead of time that you want
      to exclude all directories, then you will be better off setting
      L{excludeDirs} to C{True} before adding items to the list (note that this
      will not prevent you from recursively adding the I{contents} of
      directories).

      @param pattern: Regular expression pattern representing entries to remove

      @return: Number of entries removed
      @raise ValueError: If the passed-in pattern is not a valid regular expression.
      """
      removed = 0
      if pattern is None:
         for entry in self[:]:
            if os.path.exists(entry) and os.path.isdir(entry):
               self.remove(entry)
               logger.debug("Removed path [%s] from list.", entry)
               removed += 1
      else:
         try:
            pattern = encodePath(pattern)  # use same encoding as filenames
            compiled = re.compile(pattern)
         except re.error:
            raise ValueError("Pattern is not a valid regular expression.")
         for entry in self[:]:
            if os.path.exists(entry) and os.path.isdir(entry):
               if compiled.match(entry):
                  self.remove(entry)
                  logger.debug("Removed path [%s] from list based on pattern [%s].", entry, pattern)
                  removed += 1
      logger.debug("Removed a total of %d entries.", removed)
      return removed

   def removeLinks(self, pattern=None):
      """
      Removes soft link entries from the list.

      If C{pattern} is not passed in or is C{None}, then all soft link entries
      will be removed from the list.  Otherwise, only those soft link entries
      matching the pattern will be removed.  Any entry which does not exist on
      disk will be ignored (use L{removeInvalid} to purge those entries).

      This method might be fairly slow for large lists, since it must check the
      type of each item in the list.  If you know ahead of time that you want
      to exclude all soft links, then you will be better off setting
      L{excludeLinks} to C{True} before adding items to the list.

      @param pattern: Regular expression pattern representing entries to remove

      @return: Number of entries removed
      @raise ValueError: If the passed-in pattern is not a valid regular expression.
      """
      removed = 0
      if pattern is None:
         for entry in self[:]:
            if os.path.exists(entry) and os.path.islink(entry):
               self.remove(entry)
               logger.debug("Removed path [%s] from list.", entry)
               removed += 1
      else:
         try:
            pattern = encodePath(pattern)  # use same encoding as filenames
            compiled = re.compile(pattern)
         except re.error:
            raise ValueError("Pattern is not a valid regular expression.")
         for entry in self[:]:
            if os.path.exists(entry) and os.path.islink(entry):
               if compiled.match(entry):
                  self.remove(entry)
                  logger.debug("Removed path [%s] from list based on pattern [%s].", entry, pattern)
                  removed += 1
      logger.debug("Removed a total of %d entries.", removed)
      return removed

   def removeMatch(self, pattern):
      """
      Removes from the list all entries matching a pattern.

      This method removes from the list all entries which match the passed in
      C{pattern}.  Since there is no need to check the type of each entry, it
      is faster to call this method than to call the L{removeFiles},
      L{removeDirs} or L{removeLinks} methods individually.  If you know which
      patterns you will want to remove ahead of time, you may be better off
      setting L{excludePatterns} or L{excludeBasenamePatterns} before adding
      items to the list.

      @note: Unlike when using the exclude lists, the pattern here is I{not}
      bounded at the front and the back of the string.  You can use any pattern
      you want.

      @param pattern: Regular expression pattern representing entries to remove

      @return: Number of entries removed.
      @raise ValueError: If the passed-in pattern is not a valid regular expression.
      """
      try:
         pattern = encodePath(pattern)  # use same encoding as filenames
         compiled = re.compile(pattern)
      except re.error:
         raise ValueError("Pattern is not a valid regular expression.")
      removed = 0
      for entry in self[:]:
         if compiled.match(entry):
            self.remove(entry)
            logger.debug("Removed path [%s] from list based on pattern [%s].", entry, pattern)
            removed += 1
      logger.debug("Removed a total of %d entries.", removed)
      return removed

   def removeInvalid(self):
      """
      Removes from the list all entries that do not exist on disk.

      This method removes from the list all entries which do not currently
      exist on disk in some form.  No attention is paid to whether the entries
      are files or directories.

      @return: Number of entries removed.
      """
      removed = 0
      for entry in self[:]:
         if not os.path.exists(entry):
            self.remove(entry)
            logger.debug("Removed path [%s] from list.", entry)
            removed += 1
      logger.debug("Removed a total of %d entries.", removed)
      return removed


   ##################
   # Utility methods
   ##################

   def normalize(self):
      """Normalizes the list, ensuring that each entry is unique."""
      orig = len(self)
      self.sort()
      dups = filter(lambda x, self=self: self[x] == self[x+1], range(0, len(self) - 1)) # pylint: disable=W0110
      items = map(lambda x, self=self: self[x], dups)                                   # pylint: disable=W0110
      map(self.remove, items)
      new = len(self)
      logger.debug("Completed normalizing list; removed %d items (%d originally, %d now).", new-orig, orig, new)

   def verify(self):
      """
      Verifies that all entries in the list exist on disk.
      @return: C{True} if all entries exist, C{False} otherwise.
      """
      for entry in self:
         if not os.path.exists(entry):
            logger.debug("Path [%s] is invalid; list is not valid.", entry)
            return False
      logger.debug("All entries in list are valid.")
      return True


########################################################################
# SpanItem class definition
########################################################################

class SpanItem(object): # pylint: disable=R0903
   """
   Item returned by L{BackupFileList.generateSpan}.
   """
   def __init__(self, fileList, size, capacity, utilization):
      """
      Create object.
      @param fileList: List of files
      @param size: Size (in bytes) of files
      @param utilization: Utilization, as a percentage (0-100)
      """
      self.fileList = fileList
      self.size = size
      self.capacity = capacity
      self.utilization = utilization


########################################################################
# BackupFileList class definition
########################################################################

class BackupFileList(FilesystemList): # pylint: disable=R0904

   ######################
   # Class documentation
   ######################

   """
   List of files to be backed up.

   A BackupFileList is a L{FilesystemList} containing a list of files to be
   backed up.  It only contains files, not directories (soft links are treated
   like files).  On top of the generic functionality provided by
   L{FilesystemList}, this class adds functionality to keep a hash (checksum)
   for each file in the list, and it also provides a method to calculate the
   total size of the files in the list and a way to export the list into tar
   form.

   @sort: __init__, addDir, totalSize, generateSizeMap, generateDigestMap,
          generateFitted, generateTarfile, removeUnchanged
   """

   ##############
   # Constructor
   ##############

   def __init__(self):
      """Initializes a list with no configured exclusions."""
      FilesystemList.__init__(self)


   ################################
   # Overridden superclass methods
   ################################

   def addDir(self, path):
      """
      Adds a directory to the list.

      Note that this class does not allow directories to be added by themselves
      (a backup list contains only files).  However, since links to directories
      are technically files, we allow them to be added.

      This method is implemented in terms of the superclass method, with one
      additional validation: the superclass method is only called if the
      passed-in path is both a directory and a link.  All of the superclass's
      existing validations and restrictions apply.

      @param path: Directory path to be added to the list
      @type path: String representing a path on disk

      @return: Number of items added to the list.

      @raise ValueError: If path is not a directory or does not exist.
      @raise ValueError: If the path could not be encoded properly.
      """
      path = encodePath(path)
      path = normalizeDir(path)
      if os.path.isdir(path) and not os.path.islink(path):
         return 0
      else:
         return FilesystemList.addDir(self, path)


   ##################
   # Utility methods
   ##################

   def totalSize(self):
      """
      Returns the total size among all files in the list.
      Only files are counted.
      Soft links that point at files are ignored.
      Entries which do not exist on disk are ignored.
      @return: Total size, in bytes
      """
      total = 0.0
      for entry in self:
         if os.path.isfile(entry) and not os.path.islink(entry):
            total += float(os.stat(entry).st_size)
      return total

   def generateSizeMap(self):
      """
      Generates a mapping from file to file size in bytes.
      The mapping does include soft links, which are listed with size zero.
      Entries which do not exist on disk are ignored.
      @return: Dictionary mapping file to file size
      """
      table = { }
      for entry in self:
         if os.path.islink(entry):
            table[entry] = 0.0
         elif os.path.isfile(entry):
            table[entry] = float(os.stat(entry).st_size)
      return table

   def generateDigestMap(self, stripPrefix=None):
      """
      Generates a mapping from file to file digest.

      Currently, the digest is an SHA hash, which should be pretty secure.  In
      the future, this might be a different kind of hash, but we guarantee that
      the type of the hash will not change unless the library major version
      number is bumped.

      Entries which do not exist on disk are ignored.

      Soft links are ignored.  We would end up generating a digest for the file
      that the soft link points at, which doesn't make any sense.

      If C{stripPrefix} is passed in, then that prefix will be stripped from
      each key when the map is generated.  This can be useful in generating two
      "relative" digest maps to be compared to one another.

      @param stripPrefix: Common prefix to be stripped from paths
      @type stripPrefix: String with any contents

      @return: Dictionary mapping file to digest value
      @see: L{removeUnchanged}
      """
      table = { }
      if stripPrefix is not None:
         for entry in self:
            if os.path.isfile(entry) and not os.path.islink(entry):
               table[entry.replace(stripPrefix, "", 1)] = BackupFileList._generateDigest(entry)
      else:
         for entry in self:
            if os.path.isfile(entry) and not os.path.islink(entry):
               table[entry] = BackupFileList._generateDigest(entry)
      return table

   @staticmethod
   def _generateDigest(path):
      """
      Generates an SHA digest for a given file on disk.

      The original code for this function used this simplistic implementation,
      which requires reading the entire file into memory at once in order to
      generate a digest value::

         sha.new(open(path).read()).hexdigest()

      Not surprisingly, this isn't an optimal solution.  The U{Simple file
      hashing <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/259109>}
      Python Cookbook recipe describes how to incrementally generate a hash
      value by reading in chunks of data rather than reading the file all at
      once.  The recipe relies on the the C{update()} method of the various
      Python hashing algorithms.

      In my tests using a 110 MB file on CD, the original implementation
      requires 111 seconds.  This implementation requires only 40-45 seconds,
      which is a pretty substantial speed-up.

      Experience shows that reading in around 4kB (4096 bytes) at a time yields
      the best performance.  Smaller reads are quite a bit slower, and larger
      reads don't make much of a difference.  The 4kB number makes me a little
      suspicious, and I think it might be related to the size of a filesystem
      read at the hardware level.  However, I've decided to just hardcode 4096
      until I have evidence that shows it's worthwhile making the read size
      configurable.

      @param path: Path to generate digest for.

      @return: ASCII-safe SHA digest for the file.
      @raise OSError: If the file cannot be opened.
      """
      # pylint: disable=C0103,E1101
      try:
         import hashlib
         s = hashlib.sha1()
      except ImportError:
         import sha
         s = sha.new()
      f = open(path, mode="rb")  # in case platform cares about binary reads
      readBytes = 4096  # see notes above
      while readBytes > 0:
         readString = f.read(readBytes)
         s.update(readString)
         readBytes = len(readString)
      f.close()
      digest = s.hexdigest()
      logger.debug("Generated digest [%s] for file [%s].", digest, path)
      return digest

   def generateFitted(self, capacity, algorithm="worst_fit"):
      """
      Generates a list of items that fit in the indicated capacity.

      Sometimes, callers would like to include every item in a list, but are
      unable to because not all of the items fit in the space available.  This
      method returns a copy of the list, containing only the items that fit in
      a given capacity.  A copy is returned so that we don't lose any
      information if for some reason the fitted list is unsatisfactory.

      The fitting is done using the functions in the knapsack module.  By
      default, the first fit algorithm is used, but you can also choose
      from best fit, worst fit and alternate fit.

      @param capacity: Maximum capacity among the files in the new list
      @type capacity: Integer, in bytes

      @param algorithm: Knapsack (fit) algorithm to use
      @type algorithm: One of "first_fit", "best_fit", "worst_fit", "alternate_fit"

      @return: Copy of list with total size no larger than indicated capacity
      @raise ValueError: If the algorithm is invalid.
      """
      table = self._getKnapsackTable()
      function = BackupFileList._getKnapsackFunction(algorithm)
      return function(table, capacity)[0]

   def generateSpan(self, capacity, algorithm="worst_fit"):
      """
      Splits the list of items into sub-lists that fit in a given capacity.

      Sometimes, callers need split to a backup file list into a set of smaller
      lists.  For instance, you could use this to "span" the files across a set
      of discs.

      The fitting is done using the functions in the knapsack module.  By
      default, the first fit algorithm is used, but you can also choose
      from best fit, worst fit and alternate fit.

      @note: If any of your items are larger than the capacity, then it won't
      be possible to find a solution.  In this case, a value error will be
      raised.

      @param capacity: Maximum capacity among the files in the new list
      @type capacity: Integer, in bytes

      @param algorithm: Knapsack (fit) algorithm to use
      @type algorithm: One of "first_fit", "best_fit", "worst_fit", "alternate_fit"

      @return: List of L{SpanItem} objects.

      @raise ValueError: If the algorithm is invalid.
      @raise ValueError: If it's not possible to fit some items
      """
      spanItems = []
      function = BackupFileList._getKnapsackFunction(algorithm)
      table = self._getKnapsackTable(capacity)
      iteration = 0
      while len(table) > 0:
         iteration += 1
         fit = function(table, capacity)
         if len(fit[0]) == 0:
            # Should never happen due to validations in _convertToKnapsackForm(), but let's be safe
            raise ValueError("After iteration %d, unable to add any new items." % iteration)
         removeKeys(table, fit[0])
         utilization = (float(fit[1])/float(capacity))*100.0
         item = SpanItem(fit[0], fit[1], capacity, utilization)
         spanItems.append(item)
      return spanItems

   def _getKnapsackTable(self, capacity=None):
      """
      Converts the list into the form needed by the knapsack algorithms.
      @return: Dictionary mapping file name to tuple of (file path, file size).
      """
      table = { }
      for entry in self:
         if os.path.islink(entry):
            table[entry] = (entry, 0.0)
         elif os.path.isfile(entry):
            size = float(os.stat(entry).st_size)
            if capacity is not None:
               if size > capacity:
                  raise ValueError("File [%s] cannot fit in capacity %s." % (entry, displayBytes(capacity)))
            table[entry] = (entry, size)
      return table

   @staticmethod
   def _getKnapsackFunction(algorithm):
      """
      Returns a reference to the function associated with an algorithm name.
      Algorithm name must be one of "first_fit", "best_fit", "worst_fit", "alternate_fit"
      @param algorithm: Name of the algorithm
      @return: Reference to knapsack function
      @raise ValueError: If the algorithm name is unknown.
      """
      if algorithm == "first_fit":
         return firstFit
      elif algorithm == "best_fit":
         return bestFit
      elif algorithm == "worst_fit":
         return worstFit
      elif algorithm == "alternate_fit":
         return alternateFit
      else:
         raise ValueError("Algorithm [%s] is invalid." % algorithm)

   def generateTarfile(self, path, mode='tar', ignore=False, flat=False):
      """
      Creates a tar file containing the files in the list.

      By default, this method will create uncompressed tar files.  If you pass
      in mode C{'targz'}, then it will create gzipped tar files, and if you
      pass in mode C{'tarbz2'}, then it will create bzipped tar files.

      The tar file will be created as a GNU tar archive, which enables extended
      file name lengths, etc.  Since GNU tar is so prevalent, I've decided that
      the extra functionality out-weighs the disadvantage of not being
      "standard".

      If you pass in C{flat=True}, then a "flat" archive will be created, and
      all of the files will be added to the root of the archive.  So, the file
      C{/tmp/something/whatever.txt} would be added as just C{whatever.txt}.

      By default, the whole method call fails if there are problems adding any
      of the files to the archive, resulting in an exception.  Under these
      circumstances, callers are advised that they might want to call
      L{removeInvalid()} and then attempt to extract the tar file a second
      time, since the most common cause of failures is a missing file (a file
      that existed when the list was built, but is gone again by the time the
      tar file is built).

      If you want to, you can pass in C{ignore=True}, and the method will
      ignore errors encountered when adding individual files to the archive
      (but not errors opening and closing the archive itself).

      We'll always attempt to remove the tarfile from disk if an exception will
      be thrown.

      @note: No validation is done as to whether the entries in the list are
      files, since only files or soft links should be in an object like this.
      However, to be safe, everything is explicitly added to the tar archive
      non-recursively so it's safe to include soft links to directories.

      @note: The Python C{tarfile} module, which is used internally here, is
      supposed to deal properly with long filenames and links.  In my testing,
      I have found that it appears to be able to add long really long filenames
      to archives, but doesn't do a good job reading them back out, even out of
      an archive it created.  Fortunately, all Cedar Backup does is add files
      to archives.

      @param path: Path of tar file to create on disk
      @type path: String representing a path on disk

      @param mode: Tar creation mode
      @type mode: One of either C{'tar'}, C{'targz'} or C{'tarbz2'}

      @param ignore: Indicates whether to ignore certain errors.
      @type ignore: Boolean

      @param flat: Creates "flat" archive by putting all items in root
      @type flat: Boolean

      @raise ValueError: If mode is not valid
      @raise ValueError: If list is empty
      @raise ValueError: If the path could not be encoded properly.
      @raise TarError: If there is a problem creating the tar file
      """
      # pylint: disable=E1101
      path = encodePath(path)
      if len(self) == 0: raise ValueError("Empty list cannot be used to generate tarfile.")
      if mode == 'tar': tarmode = "w:"
      elif mode == 'targz': tarmode = "w:gz"
      elif mode == 'tarbz2': tarmode = "w:bz2"
      else: raise ValueError("Mode [%s] is not valid." % mode)
      try:
         tar = tarfile.open(path, tarmode)
         try:
            tar.format = tarfile.GNU_FORMAT
         except AttributeError:
            tar.posix = False
         for entry in self:
            try:
               if flat:
                  tar.add(entry, arcname=os.path.basename(entry), recursive=False)
               else:
                  tar.add(entry, recursive=False)
            except tarfile.TarError, e:
               if not ignore:
                  raise e
               logger.info("Unable to add file [%s]; going on anyway.", entry)
            except OSError, e:
               if not ignore:
                  raise tarfile.TarError(e)
               logger.info("Unable to add file [%s]; going on anyway.", entry)
         tar.close()
      except tarfile.ReadError, e:
         try: tar.close()
         except: pass
         if os.path.exists(path):
            try: os.remove(path)
            except: pass
         raise tarfile.ReadError("Unable to open [%s]; maybe directory doesn't exist?" % path)
      except tarfile.TarError, e:
         try: tar.close()
         except: pass
         if os.path.exists(path):
            try: os.remove(path)
            except: pass
         raise e

   def removeUnchanged(self, digestMap, captureDigest=False):
      """
      Removes unchanged entries from the list.

      This method relies on a digest map as returned from L{generateDigestMap}.
      For each entry in C{digestMap}, if the entry also exists in the current
      list I{and} the entry in the current list has the same digest value as in
      the map, the entry in the current list will be removed.

      This method offers a convenient way for callers to filter unneeded
      entries from a list.  The idea is that a caller will capture a digest map
      from C{generateDigestMap} at some point in time (perhaps the beginning of
      the week), and will save off that map using C{pickle} or some other
      method.  Then, the caller could use this method sometime in the future to
      filter out any unchanged files based on the saved-off map.

      If C{captureDigest} is passed-in as C{True}, then digest information will
      be captured for the entire list before the removal step occurs using the
      same rules as in L{generateDigestMap}.  The check will involve a lookup
      into the complete digest map.

      If C{captureDigest} is passed in as C{False}, we will only generate a
      digest value for files we actually need to check, and we'll ignore any
      entry in the list which isn't a file that currently exists on disk.

      The return value varies depending on C{captureDigest}, as well.  To
      preserve backwards compatibility, if C{captureDigest} is C{False}, then
      we'll just return a single value representing the number of entries
      removed.  Otherwise, we'll return a tuple of C{(entries removed, digest
      map)}.  The returned digest map will be in exactly the form returned by
      L{generateDigestMap}.

      @note: For performance reasons, this method actually ends up rebuilding
      the list from scratch.  First, we build a temporary dictionary containing
      all of the items from the original list.  Then, we remove items as needed
      from the dictionary (which is faster than the equivalent operation on a
      list).  Finally, we replace the contents of the current list based on the
      keys left in the dictionary.  This should be transparent to the caller.

      @param digestMap: Dictionary mapping file name to digest value.
      @type digestMap: Map as returned from L{generateDigestMap}.

      @param captureDigest: Indicates that digest information should be captured.
      @type captureDigest: Boolean

      @return: Results as discussed above (format varies based on arguments)
      """
      if captureDigest:
         removed = 0
         table = {}
         captured = {}
         for entry in self:
            if os.path.isfile(entry) and not os.path.islink(entry):
               table[entry] = BackupFileList._generateDigest(entry)
               captured[entry] = table[entry]
            else:
               table[entry] = None
         for entry in digestMap.keys():
            if table.has_key(entry):
               if table[entry] is not None:  # equivalent to file/link check in other case
                  digest = table[entry]
                  if digest == digestMap[entry]:
                     removed += 1
                     del table[entry]
                     logger.debug("Discarded unchanged file [%s].", entry)
         self[:] = table.keys()
         return (removed, captured)
      else:
         removed = 0
         table = {}
         for entry in self:
            table[entry] = None
         for entry in digestMap.keys():
            if table.has_key(entry):
               if os.path.isfile(entry) and not os.path.islink(entry):
                  digest = BackupFileList._generateDigest(entry)
                  if digest == digestMap[entry]:
                     removed += 1
                     del table[entry]
                     logger.debug("Discarded unchanged file [%s].", entry)
         self[:] = table.keys()
         return removed


########################################################################
# PurgeItemList class definition
########################################################################

class PurgeItemList(FilesystemList): # pylint: disable=R0904

   ######################
   # Class documentation
   ######################

   """
   List of files and directories to be purged.

   A PurgeItemList is a L{FilesystemList} containing a list of files and
   directories to be purged.  On top of the generic functionality provided by
   L{FilesystemList}, this class adds functionality to remove items that are
   too young to be purged, and to actually remove each item in the list from
   the filesystem.

   The other main difference is that when you add a directory's contents to a
   purge item list, the directory itself is not added to the list.  This way,
   if someone asks to purge within in C{/opt/backup/collect}, that directory
   doesn't get removed once all of the files within it is gone.
   """

   ##############
   # Constructor
   ##############

   def __init__(self):
      """Initializes a list with no configured exclusions."""
      FilesystemList.__init__(self)


   ##############
   # Add methods
   ##############

   def addDirContents(self, path, recursive=True, addSelf=True, linkDepth=0, dereference=False):
      """
      Adds the contents of a directory to the list.

      The path must exist and must be a directory or a link to a directory.
      The contents of the directory (but I{not} the directory path itself) will
      be recursively added to the list, subject to any exclusions that are in
      place.  If you only want the directory and its contents to be added, then
      pass in C{recursive=False}.

      @note: If a directory's absolute path matches an exclude pattern or path,
      or if the directory contains the configured ignore file, then the
      directory and all of its contents will be recursively excluded from the
      list.

      @note: If the passed-in directory happens to be a soft link, it will be
      recursed.  However, the linkDepth parameter controls whether any soft
      links I{within} the directory will be recursed.  The link depth is
      maximum depth of the tree at which soft links should be followed.  So, a
      depth of 0 does not follow any soft links, a depth of 1 follows only
      links within the passed-in directory, a depth of 2 follows the links at
      the next level down, etc.

      @note: Any invalid soft links (i.e.  soft links that point to
      non-existent items) will be silently ignored.

      @note: The L{excludeDirs} flag only controls whether any given soft link
      path itself is added to the list once it has been discovered.  It does
      I{not} modify any behavior related to directory recursion.

      @note: The L{excludeDirs} flag only controls whether any given directory
      path itself is added to the list once it has been discovered.  It does
      I{not} modify any behavior related to directory recursion.

      @note: If you call this method I{on a link to a directory} that link will
      never be dereferenced (it may, however, be followed).

      @param path: Directory path whose contents should be added to the list
      @type path: String representing a path on disk

      @param recursive: Indicates whether directory contents should be added recursively.
      @type recursive: Boolean value

      @param addSelf: Ignored in this subclass.

      @param linkDepth: Depth of soft links that should be followed
      @type linkDepth: Integer value, where zero means not to follow any soft links

      @param dereference: Indicates whether soft links, if followed, should be dereferenced
      @type dereference: Boolean value

      @return: Number of items recursively added to the list

      @raise ValueError: If path is not a directory or does not exist.
      @raise ValueError: If the path could not be encoded properly.
      """
      path = encodePath(path)
      path = normalizeDir(path)
      return super(PurgeItemList, self)._addDirContentsInternal(path, False, recursive, linkDepth, dereference)


   ##################
   # Utility methods
   ##################

   def removeYoungFiles(self, daysOld):
      """
      Removes from the list files younger than a certain age (in days).

      Any file whose "age" in days is less than (C{<}) the value of the
      C{daysOld} parameter will be removed from the list so that it will not be
      purged later when L{purgeItems} is called.  Directories and soft links
      will be ignored.

      The "age" of a file is the amount of time since the file was last used,
      per the most recent of the file's C{st_atime} and C{st_mtime} values.

      @note: Some people find the "sense" of this method confusing or
      "backwards".  Keep in mind that this method is used to remove items
      I{from the list}, not from the filesystem!  It removes from the list
      those items that you would I{not} want to purge because they are too
      young.  As an example, passing in C{daysOld} of zero (0) would remove
      from the list no files, which would result in purging all of the files
      later.  I would be happy to make a synonym of this method with an
      easier-to-understand "sense", if someone can suggest one.

      @param daysOld: Minimum age of files that are to be kept in the list.
      @type daysOld: Integer value >= 0.

      @return: Number of entries removed
      """
      removed = 0
      daysOld = int(daysOld)
      if daysOld < 0:
         raise ValueError("Days old value must be an integer >= 0.")
      for entry in self[:]:
         if os.path.isfile(entry) and not os.path.islink(entry):
            try:
               ageInDays = calculateFileAge(entry)
               ageInWholeDays = math.floor(ageInDays)
               if ageInWholeDays < 0: ageInWholeDays = 0
               if ageInWholeDays < daysOld:
                  removed += 1
                  self.remove(entry)
            except OSError:
               pass
      return removed

   def purgeItems(self):
      """
      Purges all items in the list.

      Every item in the list will be purged.  Directories in the list will
      I{not} be purged recursively, and hence will only be removed if they are
      empty.  Errors will be ignored.

      To faciliate easy removal of directories that will end up being empty,
      the delete process happens in two passes: files first (including soft
      links), then directories.

      @return: Tuple containing count of (files, dirs) removed
      """
      files = 0
      dirs = 0
      for entry in self:
         if os.path.exists(entry) and (os.path.isfile(entry) or os.path.islink(entry)):
            try:
               os.remove(entry)
               files += 1
               logger.debug("Purged file [%s].", entry)
            except OSError:
               pass
      for entry in self:
         if os.path.exists(entry) and os.path.isdir(entry) and not os.path.islink(entry):
            try:
               os.rmdir(entry)
               dirs += 1
               logger.debug("Purged empty directory [%s].", entry)
            except OSError:
               pass
      return (files, dirs)


########################################################################
# Public functions
########################################################################

##########################
# normalizeDir() function
##########################

def normalizeDir(path):
   """
   Normalizes a directory name.

   For our purposes, a directory name is normalized by removing the trailing
   path separator, if any.  This is important because we want directories to
   appear within lists in a consistent way, although from the user's
   perspective passing in C{/path/to/dir/} and C{/path/to/dir} are equivalent.

   @param path: Path to be normalized.
   @type path: String representing a path on disk

   @return: Normalized path, which should be equivalent to the original.
   """
   if path != os.sep and path[-1:] == os.sep:
      return path[:-1]
   return path


#############################
# compareContents() function
#############################

def compareContents(path1, path2, verbose=False):
   """
   Compares the contents of two directories to see if they are equivalent.

   The two directories are recursively compared.  First, we check whether they
   contain exactly the same set of files.  Then, we check to see every given
   file has exactly the same contents in both directories.

   This is all relatively simple to implement through the magic of
   L{BackupFileList.generateDigestMap}, which knows how to strip a path prefix
   off the front of each entry in the mapping it generates.  This makes our
   comparison as simple as creating a list for each path, then generating a
   digest map for each path and comparing the two.

   If no exception is thrown, the two directories are considered identical.

   If the C{verbose} flag is C{True}, then an alternate (but slower) method is
   used so that any thrown exception can indicate exactly which file caused the
   comparison to fail.  The thrown C{ValueError} exception distinguishes
   between the directories containing different files, and containing the same
   files with differing content.

   @note: Symlinks are I{not} followed for the purposes of this comparison.

   @param path1: First path to compare.
   @type path1: String representing a path on disk

   @param path2: First path to compare.
   @type path2: String representing a path on disk

   @param verbose: Indicates whether a verbose response should be given.
   @type verbose: Boolean

   @raise ValueError: If a directory doesn't exist or can't be read.
   @raise ValueError: If the two directories are not equivalent.
   @raise IOError: If there is an unusual problem reading the directories.
   """
   try:
      path1List = BackupFileList()
      path1List.addDirContents(path1)
      path1Digest = path1List.generateDigestMap(stripPrefix=normalizeDir(path1))
      path2List = BackupFileList()
      path2List.addDirContents(path2)
      path2Digest = path2List.generateDigestMap(stripPrefix=normalizeDir(path2))
      compareDigestMaps(path1Digest, path2Digest, verbose)
   except IOError, e:
      logger.error("I/O error encountered during consistency check.")
      raise e

def compareDigestMaps(digest1, digest2, verbose=False):
   """
   Compares two digest maps and throws an exception if they differ.

   @param digest1: First digest to compare.
   @type digest1: Digest as returned from BackupFileList.generateDigestMap()

   @param digest2: Second digest to compare.
   @type digest2: Digest as returned from BackupFileList.generateDigestMap()

   @param verbose: Indicates whether a verbose response should be given.
   @type verbose: Boolean

   @raise ValueError: If the two directories are not equivalent.
   """
   if not verbose:
      if digest1 != digest2:
         raise ValueError("Consistency check failed.")
   else:
      list1 = UnorderedList(digest1.keys())
      list2 = UnorderedList(digest2.keys())
      if list1 != list2:
         raise ValueError("Directories contain a different set of files.")
      for key in list1:
         if digest1[key] != digest2[key]:
            raise ValueError("File contents for [%s] vary between directories." % key)