This file is indexed.

/usr/share/pyshared/swift/common/db.py is in python-swift 1.4.8-0ubuntu2.5.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

   1
   2
   3
   4
   5
   6
   7
   8
   9
  10
  11
  12
  13
  14
  15
  16
  17
  18
  19
  20
  21
  22
  23
  24
  25
  26
  27
  28
  29
  30
  31
  32
  33
  34
  35
  36
  37
  38
  39
  40
  41
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
# Copyright (c) 2010-2012 OpenStack, LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.

""" Database code for Swift """

from __future__ import with_statement
from contextlib import contextmanager
import hashlib
import logging
import operator
import os
from uuid import uuid4
import sys
import time
import cPickle as pickle
import errno
from tempfile import mkstemp

from eventlet import sleep, Timeout
import simplejson as json
import sqlite3

from swift.common.utils import normalize_timestamp, renamer, \
        mkdirs, lock_parent_directory, fallocate
from swift.common.constraints import MAX_META_COUNT, MAX_META_OVERALL_SIZE
from swift.common.exceptions import LockTimeout
from webob.exc import HTTPBadRequest


#: Timeout for trying to connect to a DB
BROKER_TIMEOUT = 25
#: Pickle protocol to use
PICKLE_PROTOCOL = 2
#: Max number of pending entries
PENDING_CAP = 131072


class DatabaseConnectionError(sqlite3.DatabaseError):
    """More friendly error messages for DB Errors."""

    def __init__(self, path, msg, timeout=0):
        self.path = path
        self.timeout = timeout
        self.msg = msg

    def __str__(self):
        return 'DB connection error (%s, %s):\n%s' % (
                self.path, self.timeout, self.msg)


class GreenDBConnection(sqlite3.Connection):
    """SQLite DB Connection handler that plays well with eventlet."""

    def __init__(self, *args, **kwargs):
        self.timeout = kwargs.get('timeout', BROKER_TIMEOUT)
        kwargs['timeout'] = 0
        self.db_file = args and args[0] or '-'
        sqlite3.Connection.__init__(self, *args, **kwargs)

    def _timeout(self, call):
        with LockTimeout(self.timeout, self.db_file):
            while True:
                try:
                    return call()
                except sqlite3.OperationalError, e:
                    if 'locked' not in str(e):
                        raise
                sleep(0.05)

    def execute(self, *args, **kwargs):
        return self._timeout(lambda: sqlite3.Connection.execute(
                                        self, *args, **kwargs))

    def commit(self):
        return self._timeout(lambda: sqlite3.Connection.commit(self))


def dict_factory(crs, row):
    """
    This should only be used when you need a real dict,
    i.e. when you're going to serialize the results.
    """
    return dict(
        ((col[0], row[idx]) for idx, col in enumerate(crs.description)))


def chexor(old, name, timestamp):
    """
    Each entry in the account and container databases is XORed by the 128-bit
    hash on insert or delete.  This serves as a rolling, order-independent hash
    of the contents. (check + XOR)

    :param old: hex representation of the current DB hash
    :param name: name of the object or container being inserted
    :param timestamp: timestamp of the new record
    :returns: a hex representation of the new hash value
    """
    if name is None:
        raise Exception('name is None!')
    old = old.decode('hex')
    new = hashlib.md5(('%s-%s' % (name, timestamp)).encode('utf_8')).digest()
    response = ''.join(
        map(chr, map(operator.xor, map(ord, old), map(ord, new))))
    return response.encode('hex')


def get_db_connection(path, timeout=30, okay_to_create=False):
    """
    Returns a properly configured SQLite database connection.

    :param path: path to DB
    :param timeout: timeout for connection
    :param okay_to_create: if True, create the DB if it doesn't exist
    :returns: DB connection object
    """
    try:
        connect_time = time.time()
        conn = sqlite3.connect(path, check_same_thread=False,
                    factory=GreenDBConnection, timeout=timeout)
        if path != ':memory:' and not okay_to_create:
            # attempt to detect and fail when connect creates the db file
            stat = os.stat(path)
            if stat.st_size == 0 and stat.st_ctime >= connect_time:
                os.unlink(path)
                raise DatabaseConnectionError(path,
                    'DB file created by connect?')
        conn.row_factory = sqlite3.Row
        conn.text_factory = str
        conn.execute('PRAGMA synchronous = NORMAL')
        conn.execute('PRAGMA count_changes = OFF')
        conn.execute('PRAGMA temp_store = MEMORY')
        conn.execute('PRAGMA journal_mode = DELETE')
        conn.create_function('chexor', 3, chexor)
    except sqlite3.DatabaseError:
        import traceback
        raise DatabaseConnectionError(path, traceback.format_exc(),
                timeout=timeout)
    return conn


class DatabaseBroker(object):
    """Encapsulates working with a database."""

    def __init__(self, db_file, timeout=BROKER_TIMEOUT, logger=None,
                 account=None, container=None, pending_timeout=10,
                 stale_reads_ok=False):
        """ Encapsulates working with a database. """
        self.conn = None
        self.db_file = db_file
        self.pending_file = self.db_file + '.pending'
        self.pending_timeout = pending_timeout
        self.stale_reads_ok = stale_reads_ok
        self.db_dir = os.path.dirname(db_file)
        self.timeout = timeout
        self.logger = logger or logging.getLogger()
        self.account = account
        self.container = container
        self._db_version = -1

    def initialize(self, put_timestamp=None):
        """
        Create the DB

        :param put_timestamp: timestamp of initial PUT request
        """
        if self.db_file == ':memory:':
            tmp_db_file = None
            conn = get_db_connection(self.db_file, self.timeout)
        else:
            mkdirs(self.db_dir)
            fd, tmp_db_file = mkstemp(suffix='.tmp', dir=self.db_dir)
            os.close(fd)
            conn = sqlite3.connect(tmp_db_file, check_same_thread=False,
                        factory=GreenDBConnection, timeout=0)
        # creating dbs implicitly does a lot of transactions, so we
        # pick fast, unsafe options here and do a big fsync at the end.
        conn.execute('PRAGMA synchronous = OFF')
        conn.execute('PRAGMA temp_store = MEMORY')
        conn.execute('PRAGMA journal_mode = MEMORY')
        conn.create_function('chexor', 3, chexor)
        conn.row_factory = sqlite3.Row
        conn.text_factory = str
        conn.executescript("""
            CREATE TABLE outgoing_sync (
                remote_id TEXT UNIQUE,
                sync_point INTEGER,
                updated_at TEXT DEFAULT 0
            );
            CREATE TABLE incoming_sync (
                remote_id TEXT UNIQUE,
                sync_point INTEGER,
                updated_at TEXT DEFAULT 0
            );
            CREATE TRIGGER outgoing_sync_insert AFTER INSERT ON outgoing_sync
            BEGIN
                UPDATE outgoing_sync
                SET updated_at = STRFTIME('%s', 'NOW')
                WHERE ROWID = new.ROWID;
            END;
            CREATE TRIGGER outgoing_sync_update AFTER UPDATE ON outgoing_sync
            BEGIN
                UPDATE outgoing_sync
                SET updated_at = STRFTIME('%s', 'NOW')
                WHERE ROWID = new.ROWID;
            END;
            CREATE TRIGGER incoming_sync_insert AFTER INSERT ON incoming_sync
            BEGIN
                UPDATE incoming_sync
                SET updated_at = STRFTIME('%s', 'NOW')
                WHERE ROWID = new.ROWID;
            END;
            CREATE TRIGGER incoming_sync_update AFTER UPDATE ON incoming_sync
            BEGIN
                UPDATE incoming_sync
                SET updated_at = STRFTIME('%s', 'NOW')
                WHERE ROWID = new.ROWID;
            END;
        """)
        if not put_timestamp:
            put_timestamp = normalize_timestamp(0)
        self._initialize(conn, put_timestamp)
        conn.commit()
        if tmp_db_file:
            conn.close()
            with open(tmp_db_file, 'r+b') as fp:
                os.fsync(fp.fileno())
            with lock_parent_directory(self.db_file, self.pending_timeout):
                if os.path.exists(self.db_file):
                    # It's as if there was a "condition" where different parts
                    # of the system were "racing" each other.
                    raise DatabaseConnectionError(self.db_file,
                            'DB created by someone else while working?')
                renamer(tmp_db_file, self.db_file)
            self.conn = get_db_connection(self.db_file, self.timeout)
        else:
            self.conn = conn

    def delete_db(self, timestamp):
        """
        Mark the DB as deleted

        :param timestamp: delete timestamp
        """
        timestamp = normalize_timestamp(timestamp)
        # first, clear the metadata
        cleared_meta = {}
        for k in self.metadata.iterkeys():
            cleared_meta[k] = ('', timestamp)
        self.update_metadata(cleared_meta)
        # then mark the db as deleted
        with self.get() as conn:
            self._delete_db(conn, timestamp)
            conn.commit()

    def possibly_quarantine(self, exc_type, exc_value, exc_traceback):
        """
        Checks the exception info to see if it indicates a quarantine situation
        (malformed or corrupted database). If not, the original exception will
        be reraised. If so, the database will be quarantined and a new
        sqlite3.DatabaseError will be raised indicating the action taken.
        """
        if 'database disk image is malformed' in str(exc_value):
            exc_hint = 'malformed'
        elif 'file is encrypted or is not a database' in str(exc_value):
            exc_hint = 'corrupted'
        else:
            raise exc_type(*exc_value.args), None, exc_traceback
        prefix_path = os.path.dirname(self.db_dir)
        partition_path = os.path.dirname(prefix_path)
        dbs_path = os.path.dirname(partition_path)
        device_path = os.path.dirname(dbs_path)
        quar_path = os.path.join(device_path, 'quarantined',
            self.db_type + 's', os.path.basename(self.db_dir))
        try:
            renamer(self.db_dir, quar_path)
        except OSError, e:
            if e.errno not in (errno.EEXIST, errno.ENOTEMPTY):
                raise
            quar_path = "%s-%s" % (quar_path, uuid4().hex)
            renamer(self.db_dir, quar_path)
        detail = _('Quarantined %s to %s due to %s database') % \
                 (self.db_dir, quar_path, exc_hint)
        self.logger.error(detail)
        raise sqlite3.DatabaseError(detail)

    @contextmanager
    def get(self):
        """Use with the "with" statement; returns a database connection."""
        if not self.conn:
            if self.db_file != ':memory:' and os.path.exists(self.db_file):
                try:
                    self.conn = get_db_connection(self.db_file, self.timeout)
                except (sqlite3.DatabaseError, DatabaseConnectionError):
                    self.possibly_quarantine(*sys.exc_info())
            else:
                raise DatabaseConnectionError(self.db_file, "DB doesn't exist")
        conn = self.conn
        self.conn = None
        try:
            yield conn
            conn.rollback()
            self.conn = conn
        except sqlite3.DatabaseError:
            try:
                conn.close()
            except:
                pass
            self.possibly_quarantine(*sys.exc_info())
        except (Exception, Timeout):
            conn.close()
            raise

    @contextmanager
    def lock(self):
        """Use with the "with" statement; locks a database."""
        if not self.conn:
            if self.db_file != ':memory:' and os.path.exists(self.db_file):
                self.conn = get_db_connection(self.db_file, self.timeout)
            else:
                raise DatabaseConnectionError(self.db_file, "DB doesn't exist")
        conn = self.conn
        self.conn = None
        orig_isolation_level = conn.isolation_level
        conn.isolation_level = None
        conn.execute('BEGIN IMMEDIATE')
        try:
            yield True
        except (Exception, Timeout):
            pass
        try:
            conn.execute('ROLLBACK')
            conn.isolation_level = orig_isolation_level
            self.conn = conn
        except (Exception, Timeout):
            logging.exception(
                _('Broker error trying to rollback locked connection'))
            conn.close()

    def newid(self, remote_id):
        """
        Re-id the database.  This should be called after an rsync.

        :param remote_id: the ID of the remote database being rsynced in
        """
        with self.get() as conn:
            row = conn.execute('''
                UPDATE %s_stat SET id=?
            ''' % self.db_type, (str(uuid4()),))
            row = conn.execute('''
                SELECT ROWID FROM %s ORDER BY ROWID DESC LIMIT 1
            ''' % self.db_contains_type).fetchone()
            sync_point = row['ROWID'] if row else -1
            conn.execute('''
                INSERT OR REPLACE INTO incoming_sync (sync_point, remote_id)
                VALUES (?, ?)
            ''', (sync_point, remote_id))
            self._newid(conn)
            conn.commit()

    def _newid(self, conn):
        # Override for additional work when receiving an rsynced db.
        pass

    def merge_timestamps(self, created_at, put_timestamp, delete_timestamp):
        """
        Used in replication to handle updating timestamps.

        :param created_at: create timestamp
        :param put_timestamp: put timestamp
        :param delete_timestamp: delete timestamp
        """
        with self.get() as conn:
            conn.execute('''
                UPDATE %s_stat SET created_at=MIN(?, created_at),
                                   put_timestamp=MAX(?, put_timestamp),
                                   delete_timestamp=MAX(?, delete_timestamp)
            ''' % self.db_type, (created_at, put_timestamp, delete_timestamp))
            conn.commit()

    def get_items_since(self, start, count):
        """
        Get a list of objects in the database between start and end.

        :param start: start ROWID
        :param count: number to get
        :returns: list of objects between start and end
        """
        try:
            self._commit_puts()
        except LockTimeout:
            if not self.stale_reads_ok:
                raise
        with self.get() as conn:
            curs = conn.execute('''
                SELECT * FROM %s WHERE ROWID > ? ORDER BY ROWID ASC LIMIT ?
            ''' % self.db_contains_type, (start, count))
            curs.row_factory = dict_factory
            return [r for r in curs]

    def get_sync(self, id, incoming=True):
        """
        Gets the most recent sync point for a server from the sync table.

        :param id: remote ID to get the sync_point for
        :param incoming: if True, get the last incoming sync, otherwise get
                         the last outgoing sync
        :returns: the sync point, or -1 if the id doesn't exist.
        """
        with self.get() as conn:
            row = conn.execute(
                "SELECT sync_point FROM %s_sync WHERE remote_id=?"
                % ('incoming' if incoming else 'outgoing'), (id,)).fetchone()
            if not row:
                return -1
            return row['sync_point']

    def get_syncs(self, incoming=True):
        """
        Get a serialized copy of the sync table.

        :param incoming: if True, get the last incoming sync, otherwise get
                         the last outgoing sync
        :returns: list of {'remote_id', 'sync_point'}
        """
        with self.get() as conn:
            curs = conn.execute('''
                SELECT remote_id, sync_point FROM %s_sync
            ''' % 'incoming' if incoming else 'outgoing')
            result = []
            for row in curs:
                result.append({'remote_id': row[0], 'sync_point': row[1]})
            return result

    def get_replication_info(self):
        """
        Get information about the DB required for replication.

        :returns: dict containing keys: hash, id, created_at, put_timestamp,
            delete_timestamp, count, max_row, and metadata
        """
        try:
            self._commit_puts()
        except LockTimeout:
            if not self.stale_reads_ok:
                raise
        query_part1 = '''
            SELECT hash, id, created_at, put_timestamp, delete_timestamp,
                %s_count AS count,
                CASE WHEN SQLITE_SEQUENCE.seq IS NOT NULL
                    THEN SQLITE_SEQUENCE.seq ELSE -1 END AS max_row, ''' % \
            self.db_contains_type
        query_part2 = '''
            FROM (%s_stat LEFT JOIN SQLITE_SEQUENCE
                  ON SQLITE_SEQUENCE.name == '%s') LIMIT 1
        ''' % (self.db_type, self.db_contains_type)
        with self.get() as conn:
            try:
                curs = conn.execute(query_part1 + 'metadata' + query_part2)
            except sqlite3.OperationalError, err:
                if 'no such column: metadata' not in str(err):
                    raise
                curs = conn.execute(query_part1 + "'' as metadata" +
                                    query_part2)
            curs.row_factory = dict_factory
            return curs.fetchone()

    def _commit_puts(self):
        pass    # stub to be overridden if need be

    def merge_syncs(self, sync_points, incoming=True):
        """
        Merge a list of sync points with the incoming sync table.

        :param sync_points: list of sync points where a sync point is a dict of
                            {'sync_point', 'remote_id'}
        :param incoming: if True, get the last incoming sync, otherwise get
                         the last outgoing sync
        """
        with self.get() as conn:
            for rec in sync_points:
                try:
                    conn.execute('''
                        INSERT INTO %s_sync (sync_point, remote_id)
                        VALUES (?, ?)
                    ''' % ('incoming' if incoming else 'outgoing'),
                    (rec['sync_point'], rec['remote_id']))
                except sqlite3.IntegrityError:
                    conn.execute('''
                        UPDATE %s_sync SET sync_point=max(?, sync_point)
                        WHERE remote_id=?
                    ''' % ('incoming' if incoming else 'outgoing'),
                    (rec['sync_point'], rec['remote_id']))
            conn.commit()

    def _preallocate(self):
        """
        The idea is to allocate space in front of an expanding db.  If it gets
        within 512k of a boundary, it allocates to the next boundary.
        Boundaries are 2m, 5m, 10m, 25m, 50m, then every 50m after.
        """
        if self.db_file == ':memory:':
            return
        MB = (1024 * 1024)

        def prealloc_points():
            for pm in (1, 2, 5, 10, 25, 50):
                yield pm * MB
            while True:
                pm += 50
                yield pm * MB

        stat = os.stat(self.db_file)
        file_size = stat.st_size
        allocated_size = stat.st_blocks * 512
        for point in prealloc_points():
            if file_size <= point - MB / 2:
                prealloc_size = point
                break
        if allocated_size < prealloc_size:
            with open(self.db_file, 'rb+') as fp:
                fallocate(fp.fileno(), int(prealloc_size))

    @property
    def metadata(self):
        """
        Returns the metadata dict for the database. The metadata dict values
        are tuples of (value, timestamp) where the timestamp indicates when
        that key was set to that value.
        """
        with self.get() as conn:
            try:
                metadata = conn.execute('SELECT metadata FROM %s_stat' %
                                        self.db_type).fetchone()[0]
            except sqlite3.OperationalError, err:
                if 'no such column: metadata' not in str(err):
                    raise
                metadata = ''
        if metadata:
            metadata = json.loads(metadata)
        else:
            metadata = {}
        return metadata

    @staticmethod
    def validate_metadata(metadata):
        """
        Validates that metadata_falls within acceptable limits.

        :param metadata: to be validated
        :raises: HTTPBadRequest if MAX_META_COUNT or MAX_META_OVERALL_SIZE
                 is exceeded
        """
        meta_count = 0
        meta_size = 0
        for key, (value, timestamp) in metadata.iteritems():
            key = key.lower()
            if value != '' and (key.startswith('x-account-meta') or
                                key.startswith('x-container-meta')):
                prefix = 'x-account-meta-'
                if key.startswith('x-container-meta-'):
                    prefix = 'x-container-meta-'
                key = key[len(prefix):]
                meta_count = meta_count + 1
                meta_size = meta_size + len(key) + len(value)
        if meta_count > MAX_META_COUNT:
            raise HTTPBadRequest('Too many metadata items; max %d'
                                 % MAX_META_COUNT)
        if meta_size > MAX_META_OVERALL_SIZE:
            raise HTTPBadRequest('Total metadata too large; max %d'
                                 % MAX_META_OVERALL_SIZE)

    def update_metadata(self, metadata_updates, validate_metadata=False):
        """
        Updates the metadata dict for the database. The metadata dict values
        are tuples of (value, timestamp) where the timestamp indicates when
        that key was set to that value. Key/values will only be overwritten if
        the timestamp is newer. To delete a key, set its value to ('',
        timestamp). These empty keys will eventually be removed by
        :func:reclaim
        """
        old_metadata = self.metadata
        if set(metadata_updates).issubset(set(old_metadata)):
            for key, (value, timestamp) in metadata_updates.iteritems():
                if timestamp > old_metadata[key][1]:
                    break
            else:
                return
        with self.get() as conn:
            try:
                md = conn.execute('SELECT metadata FROM %s_stat' %
                                  self.db_type).fetchone()[0]
                md = md and json.loads(md) or {}
            except sqlite3.OperationalError, err:
                if 'no such column: metadata' not in str(err):
                    raise
                conn.execute("""
                    ALTER TABLE %s_stat
                    ADD COLUMN metadata TEXT DEFAULT '' """ % self.db_type)
                md = {}
            for key, value_timestamp in metadata_updates.iteritems():
                value, timestamp = value_timestamp
                if key not in md or timestamp > md[key][1]:
                    md[key] = value_timestamp
            if validate_metadata:
                DatabaseBroker.validate_metadata(md)
            conn.execute('UPDATE %s_stat SET metadata = ?' % self.db_type,
                         (json.dumps(md),))
            conn.commit()

    def reclaim(self, timestamp):
        """Removes any empty metadata values older than the timestamp"""
        if not self.metadata:
            return
        with self.get() as conn:
            if self._reclaim(conn, timestamp):
                conn.commit()

    def _reclaim(self, conn, timestamp):
        """
        Removes any empty metadata values older than the timestamp using the
        given database connection. This function will not call commit on the
        conn, but will instead return True if the database needs committing.
        This function was created as a worker to limit transactions and commits
        from other related functions.

        :param conn: Database connection to reclaim metadata within.
        :param timestamp: Empty metadata items last updated before this
                          timestamp will be removed.
        :returns: True if conn.commit() should be called
        """
        try:
            md = conn.execute('SELECT metadata FROM %s_stat' %
                              self.db_type).fetchone()[0]
            if md:
                md = json.loads(md)
                keys_to_delete = []
                for key, (value, value_timestamp) in md.iteritems():
                    if value == '' and value_timestamp < timestamp:
                        keys_to_delete.append(key)
                if keys_to_delete:
                    for key in keys_to_delete:
                        del md[key]
                    conn.execute('UPDATE %s_stat SET metadata = ?' %
                                 self.db_type, (json.dumps(md),))
                    return True
        except sqlite3.OperationalError, err:
            if 'no such column: metadata' not in str(err):
                raise
        return False


class ContainerBroker(DatabaseBroker):
    """Encapsulates working with a container database."""
    db_type = 'container'
    db_contains_type = 'object'

    def _initialize(self, conn, put_timestamp):
        """Creates a brand new database (tables, indices, triggers, etc.)"""
        if not self.account:
            raise ValueError(
                'Attempting to create a new database with no account set')
        if not self.container:
            raise ValueError(
                'Attempting to create a new database with no container set')
        self.create_object_table(conn)
        self.create_container_stat_table(conn, put_timestamp)

    def create_object_table(self, conn):
        """
        Create the object table which is specifc to the container DB.

        :param conn: DB connection object
        """
        conn.executescript("""
            CREATE TABLE object (
                ROWID INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                created_at TEXT,
                size INTEGER,
                content_type TEXT,
                etag TEXT,
                deleted INTEGER DEFAULT 0
            );

            CREATE INDEX ix_object_deleted_name ON object (deleted, name);

            CREATE TRIGGER object_insert AFTER INSERT ON object
            BEGIN
                UPDATE container_stat
                SET object_count = object_count + (1 - new.deleted),
                    bytes_used = bytes_used + new.size,
                    hash = chexor(hash, new.name, new.created_at);
            END;

            CREATE TRIGGER object_update BEFORE UPDATE ON object
            BEGIN
                SELECT RAISE(FAIL, 'UPDATE not allowed; DELETE and INSERT');
            END;

            CREATE TRIGGER object_delete AFTER DELETE ON object
            BEGIN
                UPDATE container_stat
                SET object_count = object_count - (1 - old.deleted),
                    bytes_used = bytes_used - old.size,
                    hash = chexor(hash, old.name, old.created_at);
            END;
        """)

    def create_container_stat_table(self, conn, put_timestamp=None):
        """
        Create the container_stat table which is specific to the container DB.

        :param conn: DB connection object
        :param put_timestamp: put timestamp
        """
        if put_timestamp is None:
            put_timestamp = normalize_timestamp(0)
        conn.executescript("""
            CREATE TABLE container_stat (
                account TEXT,
                container TEXT,
                created_at TEXT,
                put_timestamp TEXT DEFAULT '0',
                delete_timestamp TEXT DEFAULT '0',
                object_count INTEGER,
                bytes_used INTEGER,
                reported_put_timestamp TEXT DEFAULT '0',
                reported_delete_timestamp TEXT DEFAULT '0',
                reported_object_count INTEGER DEFAULT 0,
                reported_bytes_used INTEGER DEFAULT 0,
                hash TEXT default '00000000000000000000000000000000',
                id TEXT,
                status TEXT DEFAULT '',
                status_changed_at TEXT DEFAULT '0',
                metadata TEXT DEFAULT '',
                x_container_sync_point1 INTEGER DEFAULT -1,
                x_container_sync_point2 INTEGER DEFAULT -1
            );

            INSERT INTO container_stat (object_count, bytes_used)
                VALUES (0, 0);
        """)
        conn.execute('''
            UPDATE container_stat
            SET account = ?, container = ?, created_at = ?, id = ?,
                put_timestamp = ?
        ''', (self.account, self.container, normalize_timestamp(time.time()),
              str(uuid4()), put_timestamp))

    def get_db_version(self, conn):
        if self._db_version == -1:
            self._db_version = 0
            for row in conn.execute('''
                    SELECT name FROM sqlite_master
                    WHERE name = 'ix_object_deleted_name' '''):
                self._db_version = 1
        return self._db_version

    def _newid(self, conn):
        conn.execute('''
            UPDATE container_stat
            SET reported_put_timestamp = 0, reported_delete_timestamp = 0,
                reported_object_count = 0, reported_bytes_used = 0''')

    def update_put_timestamp(self, timestamp):
        """
        Update the put_timestamp.  Only modifies it if it is greater than
        the current timestamp.

        :param timestamp: put timestamp
        """
        with self.get() as conn:
            conn.execute('''
                UPDATE container_stat SET put_timestamp = ?
                WHERE put_timestamp < ? ''', (timestamp, timestamp))
            conn.commit()

    def _delete_db(self, conn, timestamp):
        """
        Mark the DB as deleted

        :param conn: DB connection object
        :param timestamp: timestamp to mark as deleted
        """
        conn.execute("""
            UPDATE container_stat
            SET delete_timestamp = ?,
                status = 'DELETED',
                status_changed_at = ?
            WHERE delete_timestamp < ? """, (timestamp, timestamp, timestamp))

    def empty(self):
        """
        Check if the DB is empty.

        :returns: True if the database has no active objects, False otherwise
        """
        try:
            self._commit_puts()
        except LockTimeout:
            if not self.stale_reads_ok:
                raise
        with self.get() as conn:
            row = conn.execute(
                    'SELECT object_count from container_stat').fetchone()
            return (row[0] == 0)

    def _commit_puts(self, item_list=None):
        """Handles commiting rows in .pending files."""
        if self.db_file == ':memory:' or not os.path.exists(self.pending_file):
            return
        if item_list is None:
            item_list = []
        with lock_parent_directory(self.pending_file, self.pending_timeout):
            self._preallocate()
            if not os.path.getsize(self.pending_file):
                if item_list:
                    self.merge_items(item_list)
                return
            with open(self.pending_file, 'r+b') as fp:
                for entry in fp.read().split(':'):
                    if entry:
                        try:
                            (name, timestamp, size, content_type, etag,
                                deleted) = pickle.loads(entry.decode('base64'))
                            item_list.append({'name': name, 'created_at':
                                timestamp, 'size': size, 'content_type':
                                content_type, 'etag': etag,
                                'deleted': deleted})
                        except Exception:
                            self.logger.exception(
                                _('Invalid pending entry %(file)s: %(entry)s'),
                                {'file': self.pending_file, 'entry': entry})
                if item_list:
                    self.merge_items(item_list)
                try:
                    os.ftruncate(fp.fileno(), 0)
                except OSError, err:
                    if err.errno != errno.ENOENT:
                        raise

    def reclaim(self, object_timestamp, sync_timestamp):
        """
        Delete rows from the object table that are marked deleted and
        whose created_at timestamp is < object_timestamp.  Also deletes rows
        from incoming_sync and outgoing_sync where the updated_at timestamp is
        < sync_timestamp.

        In addition, this calls the DatabaseBroker's :func:_reclaim method.

        :param object_timestamp: max created_at timestamp of object rows to
                                 delete
        :param sync_timestamp: max update_at timestamp of sync rows to delete
        """
        self._commit_puts()
        with self.get() as conn:
            conn.execute("""
                    DELETE FROM object
                    WHERE deleted = 1
                    AND created_at < ?""", (object_timestamp,))
            try:
                conn.execute('''
                    DELETE FROM outgoing_sync WHERE updated_at < ?
                ''', (sync_timestamp,))
                conn.execute('''
                    DELETE FROM incoming_sync WHERE updated_at < ?
                ''', (sync_timestamp,))
            except sqlite3.OperationalError, err:
                # Old dbs didn't have updated_at in the _sync tables.
                if 'no such column: updated_at' not in str(err):
                    raise
            DatabaseBroker._reclaim(self, conn, object_timestamp)
            conn.commit()

    def delete_object(self, name, timestamp):
        """
        Mark an object deleted.

        :param name: object name to be deleted
        :param timestamp: timestamp when the object was marked as deleted
        """
        self.put_object(name, timestamp, 0, 'application/deleted', 'noetag', 1)

    def put_object(self, name, timestamp, size, content_type, etag, deleted=0):
        """
        Creates an object in the DB with its metadata.

        :param name: object name to be created
        :param timestamp: timestamp of when the object was created
        :param size: object size
        :param content_type: object content-type
        :param etag: object etag
        :param deleted: if True, marks the object as deleted and sets the
                        deteleted_at timestamp to timestamp
        """
        record = {'name': name, 'created_at': timestamp, 'size': size,
                  'content_type': content_type, 'etag': etag,
                  'deleted': deleted}
        if self.db_file == ':memory:':
            self.merge_items([record])
            return
        if not os.path.exists(self.db_file):
            raise DatabaseConnectionError(self.db_file, "DB doesn't exist")
        pending_size = 0
        try:
            pending_size = os.path.getsize(self.pending_file)
        except OSError, err:
            if err.errno != errno.ENOENT:
                raise
        if pending_size > PENDING_CAP:
            self._commit_puts([record])
        else:
            with lock_parent_directory(
                    self.pending_file, self.pending_timeout):
                with open(self.pending_file, 'a+b') as fp:
                    # Colons aren't used in base64 encoding; so they are our
                    # delimiter
                    fp.write(':')
                    fp.write(pickle.dumps(
                        (name, timestamp, size, content_type, etag, deleted),
                        protocol=PICKLE_PROTOCOL).encode('base64'))
                    fp.flush()

    def is_deleted(self, timestamp=None):
        """
        Check if the DB is considered to be deleted.

        :returns: True if the DB is considered to be deleted, False otherwise
        """
        if self.db_file != ':memory:' and not os.path.exists(self.db_file):
            return True
        try:
            self._commit_puts()
        except LockTimeout:
            if not self.stale_reads_ok:
                raise
        with self.get() as conn:
            row = conn.execute('''
                SELECT put_timestamp, delete_timestamp, object_count
                FROM container_stat''').fetchone()
            # leave this db as a tombstone for a consistency window
            if timestamp and row['delete_timestamp'] > timestamp:
                return False
            # The container is considered deleted if the delete_timestamp
            # value is greater than the put_timestamp, and there are no
            # objects in the container.
            return (row['object_count'] in (None, '', 0, '0')) and \
                (float(row['delete_timestamp']) > float(row['put_timestamp']))

    def get_info(self, include_metadata=False):
        """
        Get global data for the container.

        :returns: dict with keys: account, container, created_at,
                  put_timestamp, delete_timestamp, object_count, bytes_used,
                  reported_put_timestamp, reported_delete_timestamp,
                  reported_object_count, reported_bytes_used, hash, id,
                  x_container_sync_point1, and x_container_sync_point2.
                  If include_metadata is set, metadata is included as a key
                  pointing to a dict of tuples of the metadata
        """
        try:
            self._commit_puts()
        except LockTimeout:
            if not self.stale_reads_ok:
                raise
        with self.get() as conn:
            data = None
            trailing1 = 'metadata'
            trailing2 = 'x_container_sync_point1, x_container_sync_point2'
            while not data:
                try:
                    data = conn.execute('''
                        SELECT account, container, created_at, put_timestamp,
                            delete_timestamp, object_count, bytes_used,
                            reported_put_timestamp, reported_delete_timestamp,
                            reported_object_count, reported_bytes_used, hash,
                            id, %s, %s
                        FROM container_stat
                    ''' % (trailing1, trailing2)).fetchone()
                except sqlite3.OperationalError, err:
                    if 'no such column: metadata' in str(err):
                        trailing1 = "'' as metadata"
                    elif 'no such column: x_container_sync_point' in str(err):
                        trailing2 = '-1 AS x_container_sync_point1, ' \
                                    '-1 AS x_container_sync_point2'
                    else:
                        raise
            data = dict(data)
            if include_metadata:
                try:
                    data['metadata'] = json.loads(data.get('metadata', ''))
                except ValueError:
                    data['metadata'] = {}
            elif 'metadata' in data:
                del data['metadata']
            return data

    def set_x_container_sync_points(self, sync_point1, sync_point2):
        with self.get() as conn:
            orig_isolation_level = conn.isolation_level
            try:
                # We turn off auto-transactions to ensure the alter table
                # commands are part of the transaction.
                conn.isolation_level = None
                conn.execute('BEGIN')
                try:
                    self._set_x_container_sync_points(conn, sync_point1,
                                                      sync_point2)
                except sqlite3.OperationalError, err:
                    if 'no such column: x_container_sync_point' not in \
                            str(err):
                        raise
                    conn.execute('''
                        ALTER TABLE container_stat
                        ADD COLUMN x_container_sync_point1 INTEGER DEFAULT -1
                    ''')
                    conn.execute('''
                        ALTER TABLE container_stat
                        ADD COLUMN x_container_sync_point2 INTEGER DEFAULT -1
                    ''')
                    self._set_x_container_sync_points(conn, sync_point1,
                                                      sync_point2)
                conn.execute('COMMIT')
            finally:
                conn.isolation_level = orig_isolation_level

    def _set_x_container_sync_points(self, conn, sync_point1, sync_point2):
        if sync_point1 is not None and sync_point2 is not None:
            conn.execute('''
                UPDATE container_stat
                SET x_container_sync_point1 = ?,
                    x_container_sync_point2 = ?
            ''', (sync_point1, sync_point2))
        elif sync_point1 is not None:
            conn.execute('''
                UPDATE container_stat
                SET x_container_sync_point1 = ?
            ''', (sync_point1,))
        elif sync_point2 is not None:
            conn.execute('''
                UPDATE container_stat
                SET x_container_sync_point2 = ?
            ''', (sync_point2,))

    def reported(self, put_timestamp, delete_timestamp, object_count,
                 bytes_used):
        """
        Update reported stats.

        :param put_timestamp: put_timestamp to update
        :param delete_timestamp: delete_timestamp to update
        :param object_count: object_count to update
        :param bytes_used: bytes_used to update
        """
        with self.get() as conn:
            conn.execute('''
                UPDATE container_stat
                SET reported_put_timestamp = ?, reported_delete_timestamp = ?,
                    reported_object_count = ?, reported_bytes_used = ?
            ''', (put_timestamp, delete_timestamp, object_count, bytes_used))
            conn.commit()

    def list_objects_iter(self, limit, marker, end_marker, prefix, delimiter,
                          path=None, format=None):
        """
        Get a list of objects sorted by name starting at marker onward, up
        to limit entries.  Entries will begin with the prefix and will not
        have the delimiter after the prefix.

        :param limit: maximum number of entries to get
        :param marker: marker query
        :param end_marker: end marker query
        :param prefix: prefix query
        :param delimeter: delimeter for query
        :param path: if defined, will set the prefix and delimter based on
                     the path
        :param format: TOOD: remove as it is no longer used

        :returns: list of tuples of (name, created_at, size, content_type,
                  etag)
        """
        try:
            self._commit_puts()
        except LockTimeout:
            if not self.stale_reads_ok:
                raise
        if path is not None:
            prefix = path
            if path:
                prefix = path = path.rstrip('/') + '/'
            delimiter = '/'
        elif delimiter and not prefix:
            prefix = ''
        orig_marker = marker
        with self.get() as conn:
            results = []
            while len(results) < limit:
                query = '''SELECT name, created_at, size, content_type, etag
                           FROM object WHERE'''
                query_args = []
                if end_marker:
                    query += ' name < ? AND'
                    query_args.append(end_marker)
                if marker and marker >= prefix:
                    query += ' name > ? AND'
                    query_args.append(marker)
                elif prefix:
                    query += ' name >= ? AND'
                    query_args.append(prefix)
                if self.get_db_version(conn) < 1:
                    query += ' +deleted = 0'
                else:
                    query += ' deleted = 0'
                query += ' ORDER BY name LIMIT ?'
                query_args.append(limit - len(results))
                curs = conn.execute(query, query_args)
                curs.row_factory = None

                if prefix is None:
                    return [r for r in curs]
                if not delimiter:
                    return [r for r in curs if r[0].startswith(prefix)]
                rowcount = 0
                for row in curs:
                    rowcount += 1
                    marker = name = row[0]
                    if len(results) >= limit or not name.startswith(prefix):
                        curs.close()
                        return results
                    end = name.find(delimiter, len(prefix))
                    if path is not None:
                        if name == path:
                            continue
                        if end >= 0 and len(name) > end + len(delimiter):
                            marker = name[:end] + chr(ord(delimiter) + 1)
                            curs.close()
                            break
                    elif end > 0:
                        marker = name[:end] + chr(ord(delimiter) + 1)
                        dir_name = name[:end + 1]
                        if dir_name != orig_marker:
                            results.append([dir_name, '0', 0, None, ''])
                        curs.close()
                        break
                    results.append(row)
                if not rowcount:
                    break
            return results

    def merge_items(self, item_list, source=None):
        """
        Merge items into the object table.

        :param item_list: list of dictionaries of {'name', 'created_at',
                          'size', 'content_type', 'etag', 'deleted'}
        :param source: if defined, update incoming_sync with the source
        """
        with self.get() as conn:
            max_rowid = -1
            for rec in item_list:
                query = '''
                    DELETE FROM object
                    WHERE name = ? AND (created_at < ?)
                '''
                if self.get_db_version(conn) >= 1:
                    query += ' AND deleted IN (0, 1)'
                conn.execute(query, (rec['name'], rec['created_at']))
                query = 'SELECT 1 FROM object WHERE name = ?'
                if self.get_db_version(conn) >= 1:
                    query += ' AND deleted IN (0, 1)'
                if not conn.execute(query, (rec['name'],)).fetchall():
                    conn.execute('''
                        INSERT INTO object (name, created_at, size,
                            content_type, etag, deleted)
                        VALUES (?, ?, ?, ?, ?, ?)
                    ''', ([rec['name'], rec['created_at'], rec['size'],
                          rec['content_type'], rec['etag'], rec['deleted']]))
                if source:
                    max_rowid = max(max_rowid, rec['ROWID'])
            if source:
                try:
                    conn.execute('''
                        INSERT INTO incoming_sync (sync_point, remote_id)
                        VALUES (?, ?)
                    ''', (max_rowid, source))
                except sqlite3.IntegrityError:
                    conn.execute('''
                        UPDATE incoming_sync SET sync_point=max(?, sync_point)
                        WHERE remote_id=?
                    ''', (max_rowid, source))
            conn.commit()


class AccountBroker(DatabaseBroker):
    """Encapsulates working with a account database."""
    db_type = 'account'
    db_contains_type = 'container'

    def _initialize(self, conn, put_timestamp):
        """
        Create a brand new database (tables, indices, triggers, etc.)

        :param conn: DB connection object
        :param put_timestamp: put timestamp
        """
        if not self.account:
            raise ValueError(
                'Attempting to create a new database with no account set')
        self.create_container_table(conn)
        self.create_account_stat_table(conn, put_timestamp)

    def create_container_table(self, conn):
        """
        Create container table which is specific to the account DB.

        :param conn: DB connection object
        """
        conn.executescript("""
            CREATE TABLE container (
                ROWID INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                put_timestamp TEXT,
                delete_timestamp TEXT,
                object_count INTEGER,
                bytes_used INTEGER,
                deleted INTEGER DEFAULT 0
            );

            CREATE INDEX ix_container_deleted_name ON
                container (deleted, name);

            CREATE TRIGGER container_insert AFTER INSERT ON container
            BEGIN
                UPDATE account_stat
                SET container_count = container_count + (1 - new.deleted),
                    object_count = object_count + new.object_count,
                    bytes_used = bytes_used + new.bytes_used,
                    hash = chexor(hash, new.name,
                                  new.put_timestamp || '-' ||
                                    new.delete_timestamp || '-' ||
                                    new.object_count || '-' || new.bytes_used);
            END;

            CREATE TRIGGER container_update BEFORE UPDATE ON container
            BEGIN
                SELECT RAISE(FAIL, 'UPDATE not allowed; DELETE and INSERT');
            END;


            CREATE TRIGGER container_delete AFTER DELETE ON container
            BEGIN
                UPDATE account_stat
                SET container_count = container_count - (1 - old.deleted),
                    object_count = object_count - old.object_count,
                    bytes_used = bytes_used - old.bytes_used,
                    hash = chexor(hash, old.name,
                                  old.put_timestamp || '-' ||
                                    old.delete_timestamp || '-' ||
                                    old.object_count || '-' || old.bytes_used);
            END;
        """)

    def create_account_stat_table(self, conn, put_timestamp):
        """
        Create account_stat table which is specific to the account DB.

        :param conn: DB connection object
        :param put_timestamp: put timestamp
        """
        conn.executescript("""
            CREATE TABLE account_stat (
                account TEXT,
                created_at TEXT,
                put_timestamp TEXT DEFAULT '0',
                delete_timestamp TEXT DEFAULT '0',
                container_count INTEGER,
                object_count INTEGER DEFAULT 0,
                bytes_used INTEGER DEFAULT 0,
                hash TEXT default '00000000000000000000000000000000',
                id TEXT,
                status TEXT DEFAULT '',
                status_changed_at TEXT DEFAULT '0',
                metadata TEXT DEFAULT ''
            );

            INSERT INTO account_stat (container_count) VALUES (0);
        """)

        conn.execute('''
            UPDATE account_stat SET account = ?, created_at = ?, id = ?,
                   put_timestamp = ?
            ''', (self.account, normalize_timestamp(time.time()), str(uuid4()),
            put_timestamp))

    def get_db_version(self, conn):
        if self._db_version == -1:
            self._db_version = 0
            for row in conn.execute('''
                    SELECT name FROM sqlite_master
                    WHERE name = 'ix_container_deleted_name' '''):
                self._db_version = 1
        return self._db_version

    def update_put_timestamp(self, timestamp):
        """
        Update the put_timestamp.  Only modifies it if it is greater than
        the current timestamp.

        :param timestamp: put timestamp
        """
        with self.get() as conn:
            conn.execute('''
                UPDATE account_stat SET put_timestamp = ?
                WHERE put_timestamp < ? ''', (timestamp, timestamp))
            conn.commit()

    def _delete_db(self, conn, timestamp, force=False):
        """
        Mark the DB as deleted.

        :param conn: DB connection object
        :param timestamp: timestamp to mark as deleted
        """
        conn.execute("""
            UPDATE account_stat
            SET delete_timestamp = ?,
                status = 'DELETED',
                status_changed_at = ?
            WHERE delete_timestamp < ? """, (timestamp, timestamp, timestamp))

    def _commit_puts(self, item_list=None):
        """Handles commiting rows in .pending files."""
        if self.db_file == ':memory:' or not os.path.exists(self.pending_file):
            return
        if item_list is None:
            item_list = []
        with lock_parent_directory(self.pending_file, self.pending_timeout):
            self._preallocate()
            if not os.path.getsize(self.pending_file):
                if item_list:
                    self.merge_items(item_list)
                return
            with open(self.pending_file, 'r+b') as fp:
                for entry in fp.read().split(':'):
                    if entry:
                        try:
                            (name, put_timestamp, delete_timestamp,
                                    object_count, bytes_used, deleted) = \
                                pickle.loads(entry.decode('base64'))
                            item_list.append({'name': name,
                                      'put_timestamp': put_timestamp,
                                      'delete_timestamp': delete_timestamp,
                                      'object_count': object_count,
                                      'bytes_used': bytes_used,
                                      'deleted': deleted})
                        except Exception:
                            self.logger.exception(
                                _('Invalid pending entry %(file)s: %(entry)s'),
                                {'file': self.pending_file, 'entry': entry})
                if item_list:
                    self.merge_items(item_list)
                try:
                    os.ftruncate(fp.fileno(), 0)
                except OSError, err:
                    if err.errno != errno.ENOENT:
                        raise

    def empty(self):
        """
        Check if the account DB is empty.

        :returns: True if the database has no active containers.
        """
        try:
            self._commit_puts()
        except LockTimeout:
            if not self.stale_reads_ok:
                raise
        with self.get() as conn:
            row = conn.execute(
                    'SELECT container_count from account_stat').fetchone()
            return (row[0] == 0)

    def reclaim(self, container_timestamp, sync_timestamp):
        """
        Delete rows from the container table that are marked deleted and
        whose created_at timestamp is < object_timestamp.  Also deletes rows
        from incoming_sync and outgoing_sync where the updated_at timestamp is
        < sync_timestamp.

        In addition, this calls the DatabaseBroker's :func:_reclaim method.

        :param object_timestamp: max created_at timestamp of container rows to
                                 delete
        :param sync_timestamp: max update_at timestamp of sync rows to delete
        """

        self._commit_puts()
        with self.get() as conn:
            conn.execute('''
                DELETE FROM container WHERE
                deleted = 1 AND delete_timestamp < ?
            ''', (container_timestamp,))
            try:
                conn.execute('''
                    DELETE FROM outgoing_sync WHERE updated_at < ?
                ''', (sync_timestamp,))
                conn.execute('''
                    DELETE FROM incoming_sync WHERE updated_at < ?
                ''', (sync_timestamp,))
            except sqlite3.OperationalError, err:
                # Old dbs didn't have updated_at in the _sync tables.
                if 'no such column: updated_at' not in str(err):
                    raise
            DatabaseBroker._reclaim(self, conn, container_timestamp)
            conn.commit()

    def get_container_timestamp(self, container_name):
        """
        Get the put_timestamp of a container.

        :param container_name: container name

        :returns: put_timestamp of the container
        """
        try:
            self._commit_puts()
        except LockTimeout:
            if not self.stale_reads_ok:
                raise
        with self.get() as conn:
            ret = conn.execute('''
                SELECT put_timestamp FROM container
                WHERE name = ? AND deleted != 1''',
                (container_name,)).fetchone()
            if ret:
                ret = ret[0]
            return ret

    def put_container(self, name, put_timestamp, delete_timestamp,
                      object_count, bytes_used):
        """
        Create a container with the given attributes.

        :param name: name of the container to create
        :param put_timestamp: put_timestamp of the container to create
        :param delete_timestamp: delete_timestamp of the container to create
        :param object_count: number of objects in the container
        :param bytes_used: number of bytes used by the container
        """
        if delete_timestamp > put_timestamp and \
                object_count in (None, '', 0, '0'):
            deleted = 1
        else:
            deleted = 0
        record = {'name': name, 'put_timestamp': put_timestamp,
                  'delete_timestamp': delete_timestamp,
                  'object_count': object_count,
                  'bytes_used': bytes_used,
                  'deleted': deleted}
        if self.db_file == ':memory:':
            self.merge_items([record])
            return
        if not os.path.exists(self.db_file):
            raise DatabaseConnectionError(self.db_file, "DB doesn't exist")
        pending_size = 0
        try:
            pending_size = os.path.getsize(self.pending_file)
        except OSError, err:
            if err.errno != errno.ENOENT:
                raise
        if pending_size > PENDING_CAP:
            self._commit_puts([record])
        else:
            with lock_parent_directory(self.pending_file,
                                       self.pending_timeout):
                with open(self.pending_file, 'a+b') as fp:
                    # Colons aren't used in base64 encoding; so they are our
                    # delimiter
                    fp.write(':')
                    fp.write(pickle.dumps(
                        (name, put_timestamp, delete_timestamp, object_count,
                         bytes_used, deleted),
                        protocol=PICKLE_PROTOCOL).encode('base64'))
                    fp.flush()

    def can_delete_db(self, cutoff):
        """
        Check if the accont DB can be deleted.

        :returns: True if the account can be deleted, False otherwise
        """
        self._commit_puts()
        with self.get() as conn:
            row = conn.execute('''
                SELECT status, put_timestamp, delete_timestamp, container_count
                FROM account_stat''').fetchone()
            # The account is considered deleted if its status is marked
            # as 'DELETED" and the delete_timestamp is older than the supplied
            # cutoff date; or if the delete_timestamp value is greater than
            # the put_timestamp, and there are no containers for the account
            status_del = (row['status'] == 'DELETED')
            deltime = float(row['delete_timestamp'])
            past_cutoff = (deltime < cutoff)
            time_later = (row['delete_timestamp'] > row['put_timestamp'])
            no_containers = (row['container_count'] in (None, '', 0, '0'))
            return (
                (status_del and past_cutoff) or (time_later and no_containers))

    def is_deleted(self):
        """
        Check if the account DB is considered to be deleted.

        :returns: True if the account DB is considered to be deleted, False
                  otherwise
        """
        if self.db_file != ':memory:' and not os.path.exists(self.db_file):
            return True
        try:
            self._commit_puts()
        except LockTimeout:
            if not self.stale_reads_ok:
                raise
        with self.get() as conn:
            row = conn.execute('''
                SELECT put_timestamp, delete_timestamp, container_count, status
                FROM account_stat''').fetchone()
            return row['status'] == 'DELETED' or (
                    row['container_count'] in (None, '', 0, '0') and
                    row['delete_timestamp'] > row['put_timestamp'])

    def is_status_deleted(self):
        """Only returns true if the status field is set to DELETED."""
        with self.get() as conn:
            row = conn.execute('''
                SELECT status
                FROM account_stat''').fetchone()
            return (row['status'] == "DELETED")

    def get_info(self):
        """
        Get global data for the account.

        :returns: dict with keys: account, created_at, put_timestamp,
                  delete_timestamp, container_count, object_count,
                  bytes_used, hash, id
        """
        try:
            self._commit_puts()
        except LockTimeout:
            if not self.stale_reads_ok:
                raise
        with self.get() as conn:
            return dict(conn.execute('''
                SELECT account, created_at,  put_timestamp, delete_timestamp,
                       container_count, object_count, bytes_used, hash, id
                FROM account_stat
            ''').fetchone())

    def list_containers_iter(self, limit, marker, end_marker, prefix,
                             delimiter):
        """
        Get a list of containerss sorted by name starting at marker onward, up
        to limit entries.  Entries will begin with the prefix and will not
        have the delimiter after the prefix.

        :param limit: maximum number of entries to get
        :param marker: marker query
        :param end_marker: end marker query
        :param prefix: prefix query
        :param delimeter: delimeter for query

        :returns: list of tuples of (name, object_count, bytes_used, 0)
        """
        try:
            self._commit_puts()
        except LockTimeout:
            if not self.stale_reads_ok:
                raise
        if delimiter and not prefix:
            prefix = ''
        orig_marker = marker
        with self.get() as conn:
            results = []
            while len(results) < limit:
                query = """
                    SELECT name, object_count, bytes_used, 0
                    FROM container
                    WHERE deleted = 0 AND """
                query_args = []
                if end_marker:
                    query += ' name <= ? AND'
                    query_args.append(end_marker)
                if marker and marker >= prefix:
                    query += ' name > ? AND'
                    query_args.append(marker)
                elif prefix:
                    query += ' name >= ? AND'
                    query_args.append(prefix)
                if self.get_db_version(conn) < 1:
                    query += ' +deleted = 0'
                else:
                    query += ' deleted = 0'
                query += ' ORDER BY name LIMIT ?'
                query_args.append(limit - len(results))
                curs = conn.execute(query, query_args)
                curs.row_factory = None

                if prefix is None:
                    return [r for r in curs]
                if not delimiter:
                    return [r for r in curs if r[0].startswith(prefix)]
                rowcount = 0
                for row in curs:
                    rowcount += 1
                    marker = name = row[0]
                    if len(results) >= limit or not name.startswith(prefix):
                        curs.close()
                        return results
                    end = name.find(delimiter, len(prefix))
                    if end > 0:
                        marker = name[:end] + chr(ord(delimiter) + 1)
                        dir_name = name[:end + 1]
                        if dir_name != orig_marker:
                            results.append([dir_name, 0, 0, 1])
                        curs.close()
                        break
                    results.append(row)
                if not rowcount:
                    break
            return results

    def merge_items(self, item_list, source=None):
        """
        Merge items into the container table.

        :param item_list: list of dictionaries of {'name', 'put_timestamp',
                          'delete_timestamp', 'object_count', 'bytes_used',
                          'deleted'}
        :param source: if defined, update incoming_sync with the source
        """
        with self.get() as conn:
            max_rowid = -1
            for rec in item_list:
                record = [rec['name'], rec['put_timestamp'],
                          rec['delete_timestamp'], rec['object_count'],
                          rec['bytes_used'], rec['deleted']]
                query = '''
                    SELECT name, put_timestamp, delete_timestamp,
                           object_count, bytes_used, deleted
                    FROM container WHERE name = ?
                '''
                if self.get_db_version(conn) >= 1:
                    query += ' AND deleted IN (0, 1)'
                curs = conn.execute(query, (rec['name'],))
                curs.row_factory = None
                row = curs.fetchone()
                if row:
                    row = list(row)
                    for i in xrange(5):
                        if record[i] is None and row[i] is not None:
                            record[i] = row[i]
                    if row[1] > record[1]:  # Keep newest put_timestamp
                        record[1] = row[1]
                    if row[2] > record[2]:  # Keep newest delete_timestamp
                        record[2] = row[2]
                    # If deleted, mark as such
                    if record[2] > record[1] and \
                            record[3] in (None, '', 0, '0'):
                        record[5] = 1
                    else:
                        record[5] = 0
                conn.execute('''
                    DELETE FROM container WHERE name = ? AND
                                                deleted IN (0, 1)
                ''', (record[0],))
                conn.execute('''
                    INSERT INTO container (name, put_timestamp,
                        delete_timestamp, object_count, bytes_used,
                        deleted)
                    VALUES (?, ?, ?, ?, ?, ?)
                ''', record)
                if source:
                    max_rowid = max(max_rowid, rec['ROWID'])
            if source:
                try:
                    conn.execute('''
                        INSERT INTO incoming_sync (sync_point, remote_id)
                        VALUES (?, ?)
                    ''', (max_rowid, source))
                except sqlite3.IntegrityError:
                    conn.execute('''
                        UPDATE incoming_sync SET sync_point=max(?, sync_point)
                        WHERE remote_id=?
                    ''', (max_rowid, source))
            conn.commit()