/usr/share/pyshared/allmydata/mutable/layout.py is in tahoe-lafs 1.9.2-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 | import struct
from allmydata.mutable.common import NeedMoreDataError, UnknownVersionError, \
BadShareError
from allmydata.interfaces import HASH_SIZE, SALT_SIZE, SDMF_VERSION, \
MDMF_VERSION, IMutableSlotWriter
from allmydata.util import mathutil
from twisted.python import failure
from twisted.internet import defer
from zope.interface import implements
# These strings describe the format of the packed structs they help process.
# Here's what they mean:
#
# PREFIX:
# >: Big-endian byte order; the most significant byte is first (leftmost).
# B: The container version information; stored as an unsigned 8-bit integer.
# This is currently either SDMF_VERSION or MDMF_VERSION.
# Q: The sequence number; this is sort of like a revision history for
# mutable files; they start at 1 and increase as they are changed after
# being uploaded. Stored as an unsigned 64-bit integer.
# 32s: The root hash of the share hash tree. We use sha-256d, so we use 32
# bytes to store the value.
# 16s: The salt for the readkey. This is a 16-byte random value.
#
# SIGNED_PREFIX additions, things that are covered by the signature:
# B: The "k" encoding parameter. We store this as an unsigned 8-bit
# integer, since our erasure coding scheme cannot encode to more than
# 255 pieces.
# B: The "N" encoding parameter. Stored as an unsigned 8-bit integer for
# the same reason as above.
# Q: The segment size of the uploaded file. This is an unsigned 64-bit
# integer, to allow handling large segments and files. For SDMF the
# segment size is the data length plus padding; for MDMF it can be
# smaller.
# Q: The data length of the uploaded file. Like the segment size field,
# it is an unsigned 64-bit integer.
#
# HEADER additions:
# L: The offset of the signature. An unsigned 32-bit integer.
# L: The offset of the share hash chain. An unsigned 32-bit integer.
# L: The offset of the block hash tree. An unsigned 32-bit integer.
# L: The offset of the share data. An unsigned 32-bit integer.
# Q: The offset of the encrypted private key. An unsigned 64-bit integer,
# to account for the possibility of a lot of share data.
# Q: The offset of the EOF. An unsigned 64-bit integer, to account for
# the possibility of a lot of share data.
#
# After all of these, we have the following:
# - The verification key: Occupies the space between the end of the header
# and the start of the signature (i.e.: data[HEADER_LENGTH:o['signature']].
# - The signature, which goes from the signature offset to the share hash
# chain offset.
# - The share hash chain, which goes from the share hash chain offset to
# the block hash tree offset.
# - The share data, which goes from the share data offset to the encrypted
# private key offset.
# - The encrypted private key offset, which goes until the end of the file.
#
# The block hash tree in this encoding has only one share, so the offset of
# the share data will be 32 bits more than the offset of the block hash tree.
# Given this, we may need to check to see how many bytes a reasonably sized
# block hash tree will take up.
PREFIX = ">BQ32s16s" # each version may have a different prefix
SIGNED_PREFIX = ">BQ32s16s BBQQ" # this is covered by the signature
SIGNED_PREFIX_LENGTH = struct.calcsize(SIGNED_PREFIX)
HEADER = ">BQ32s16s BBQQ LLLLQQ" # includes offsets
HEADER_LENGTH = struct.calcsize(HEADER)
OFFSETS = ">LLLLQQ"
OFFSETS_LENGTH = struct.calcsize(OFFSETS)
MAX_MUTABLE_SHARE_SIZE = 69105*1000*1000*1000*1000 # 69105 TB, kind of arbitrary
# These are still used for some tests of SDMF files.
def unpack_header(data):
o = {}
(version,
seqnum,
root_hash,
IV,
k, N, segsize, datalen,
o['signature'],
o['share_hash_chain'],
o['block_hash_tree'],
o['share_data'],
o['enc_privkey'],
o['EOF']) = struct.unpack(HEADER, data[:HEADER_LENGTH])
return (version, seqnum, root_hash, IV, k, N, segsize, datalen, o)
def unpack_share(data):
assert len(data) >= HEADER_LENGTH
o = {}
(version,
seqnum,
root_hash,
IV,
k, N, segsize, datalen,
o['signature'],
o['share_hash_chain'],
o['block_hash_tree'],
o['share_data'],
o['enc_privkey'],
o['EOF']) = struct.unpack(HEADER, data[:HEADER_LENGTH])
if version != 0:
raise UnknownVersionError("got mutable share version %d, but I only understand version 0" % version)
if len(data) < o['EOF']:
raise NeedMoreDataError(o['EOF'],
o['enc_privkey'], o['EOF']-o['enc_privkey'])
pubkey = data[HEADER_LENGTH:o['signature']]
signature = data[o['signature']:o['share_hash_chain']]
share_hash_chain_s = data[o['share_hash_chain']:o['block_hash_tree']]
share_hash_format = ">H32s"
hsize = struct.calcsize(share_hash_format)
if len(share_hash_chain_s) % hsize != 0:
raise BadShareError("hash chain is %d bytes, not multiple of %d"
% (len(share_hash_chain_s), hsize))
share_hash_chain = []
for i in range(0, len(share_hash_chain_s), hsize):
chunk = share_hash_chain_s[i:i+hsize]
(hid, h) = struct.unpack(share_hash_format, chunk)
share_hash_chain.append( (hid, h) )
share_hash_chain = dict(share_hash_chain)
block_hash_tree_s = data[o['block_hash_tree']:o['share_data']]
if len(block_hash_tree_s) % 32 != 0:
raise BadShareError("block_hash_tree is %d bytes, not multiple of %d"
% (len(block_hash_tree_s), 32))
block_hash_tree = []
for i in range(0, len(block_hash_tree_s), 32):
block_hash_tree.append(block_hash_tree_s[i:i+32])
share_data = data[o['share_data']:o['enc_privkey']]
enc_privkey = data[o['enc_privkey']:o['EOF']]
return (seqnum, root_hash, IV, k, N, segsize, datalen,
pubkey, signature, share_hash_chain, block_hash_tree,
share_data, enc_privkey)
def get_version_from_checkstring(checkstring):
(t, ) = struct.unpack(">B", checkstring[:1])
return t
def unpack_sdmf_checkstring(checkstring):
cs_len = struct.calcsize(PREFIX)
version, seqnum, root_hash, IV = struct.unpack(PREFIX, checkstring[:cs_len])
assert version == SDMF_VERSION, version
return (seqnum, root_hash, IV)
def unpack_mdmf_checkstring(checkstring):
cs_len = struct.calcsize(MDMFCHECKSTRING)
version, seqnum, root_hash = struct.unpack(MDMFCHECKSTRING, checkstring[:cs_len])
assert version == MDMF_VERSION, version
return (seqnum, root_hash)
def pack_offsets(verification_key_length, signature_length,
share_hash_chain_length, block_hash_tree_length,
share_data_length, encprivkey_length):
post_offset = HEADER_LENGTH
offsets = {}
o1 = offsets['signature'] = post_offset + verification_key_length
o2 = offsets['share_hash_chain'] = o1 + signature_length
o3 = offsets['block_hash_tree'] = o2 + share_hash_chain_length
o4 = offsets['share_data'] = o3 + block_hash_tree_length
o5 = offsets['enc_privkey'] = o4 + share_data_length
offsets['EOF'] = o5 + encprivkey_length
return struct.pack(">LLLLQQ",
offsets['signature'],
offsets['share_hash_chain'],
offsets['block_hash_tree'],
offsets['share_data'],
offsets['enc_privkey'],
offsets['EOF'])
def pack_share(prefix, verification_key, signature,
share_hash_chain, block_hash_tree,
share_data, encprivkey):
share_hash_chain_s = "".join([struct.pack(">H32s", i, share_hash_chain[i])
for i in sorted(share_hash_chain.keys())])
for h in block_hash_tree:
assert len(h) == 32
block_hash_tree_s = "".join(block_hash_tree)
offsets = pack_offsets(len(verification_key),
len(signature),
len(share_hash_chain_s),
len(block_hash_tree_s),
len(share_data),
len(encprivkey))
final_share = "".join([prefix,
offsets,
verification_key,
signature,
share_hash_chain_s,
block_hash_tree_s,
share_data,
encprivkey])
return final_share
def pack_prefix(seqnum, root_hash, IV,
required_shares, total_shares,
segment_size, data_length):
prefix = struct.pack(SIGNED_PREFIX,
0, # version,
seqnum,
root_hash,
IV,
required_shares,
total_shares,
segment_size,
data_length,
)
return prefix
class SDMFSlotWriteProxy:
implements(IMutableSlotWriter)
"""
I represent a remote write slot for an SDMF mutable file. I build a
share in memory, and then write it in one piece to the remote
server. This mimics how SDMF shares were built before MDMF (and the
new MDMF uploader), but provides that functionality in a way that
allows the MDMF uploader to be built without much special-casing for
file format, which makes the uploader code more readable.
"""
def __init__(self,
shnum,
rref, # a remote reference to a storage server
storage_index,
secrets, # (write_enabler, renew_secret, cancel_secret)
seqnum, # the sequence number of the mutable file
required_shares,
total_shares,
segment_size,
data_length): # the length of the original file
self.shnum = shnum
self._rref = rref
self._storage_index = storage_index
self._secrets = secrets
self._seqnum = seqnum
self._required_shares = required_shares
self._total_shares = total_shares
self._segment_size = segment_size
self._data_length = data_length
# This is an SDMF file, so it should have only one segment, so,
# modulo padding of the data length, the segment size and the
# data length should be the same.
expected_segment_size = mathutil.next_multiple(data_length,
self._required_shares)
assert expected_segment_size == segment_size
self._block_size = self._segment_size / self._required_shares
# This is meant to mimic how SDMF files were built before MDMF
# entered the picture: we generate each share in its entirety,
# then push it off to the storage server in one write. When
# callers call set_*, they are just populating this dict.
# finish_publishing will stitch these pieces together into a
# coherent share, and then write the coherent share to the
# storage server.
self._share_pieces = {}
# This tells the write logic what checkstring to use when
# writing remote shares.
self._testvs = []
self._readvs = [(0, struct.calcsize(PREFIX))]
def set_checkstring(self, checkstring_or_seqnum,
root_hash=None,
salt=None):
"""
Set the checkstring that I will pass to the remote server when
writing.
@param checkstring_or_seqnum: A packed checkstring to use,
or a sequence number. I will treat this as a checkstr
Note that implementations can differ in which semantics they
wish to support for set_checkstring -- they can, for example,
build the checkstring themselves from its constituents, or
some other thing.
"""
if root_hash and salt:
checkstring = struct.pack(PREFIX,
0,
checkstring_or_seqnum,
root_hash,
salt)
else:
checkstring = checkstring_or_seqnum
self._testvs = [(0, len(checkstring), "eq", checkstring)]
def get_checkstring(self):
"""
Get the checkstring that I think currently exists on the remote
server.
"""
if self._testvs:
return self._testvs[0][3]
return ""
def put_block(self, data, segnum, salt):
"""
Add a block and salt to the share.
"""
# SDMF files have only one segment
assert segnum == 0
assert len(data) == self._block_size
assert len(salt) == SALT_SIZE
self._share_pieces['sharedata'] = data
self._share_pieces['salt'] = salt
# TODO: Figure out something intelligent to return.
return defer.succeed(None)
def put_encprivkey(self, encprivkey):
"""
Add the encrypted private key to the share.
"""
self._share_pieces['encprivkey'] = encprivkey
return defer.succeed(None)
def put_blockhashes(self, blockhashes):
"""
Add the block hash tree to the share.
"""
assert isinstance(blockhashes, list)
for h in blockhashes:
assert len(h) == HASH_SIZE
# serialize the blockhashes, then set them.
blockhashes_s = "".join(blockhashes)
self._share_pieces['block_hash_tree'] = blockhashes_s
return defer.succeed(None)
def put_sharehashes(self, sharehashes):
"""
Add the share hash chain to the share.
"""
assert isinstance(sharehashes, dict)
for h in sharehashes.itervalues():
assert len(h) == HASH_SIZE
# serialize the sharehashes, then set them.
sharehashes_s = "".join([struct.pack(">H32s", i, sharehashes[i])
for i in sorted(sharehashes.keys())])
self._share_pieces['share_hash_chain'] = sharehashes_s
return defer.succeed(None)
def put_root_hash(self, root_hash):
"""
Add the root hash to the share.
"""
assert len(root_hash) == HASH_SIZE
self._share_pieces['root_hash'] = root_hash
return defer.succeed(None)
def put_salt(self, salt):
"""
Add a salt to an empty SDMF file.
"""
assert len(salt) == SALT_SIZE
self._share_pieces['salt'] = salt
self._share_pieces['sharedata'] = ""
def get_signable(self):
"""
Return the part of the share that needs to be signed.
SDMF writers need to sign the packed representation of the
first eight fields of the remote share, that is:
- version number (0)
- sequence number
- root of the share hash tree
- salt
- k
- n
- segsize
- datalen
This method is responsible for returning that to callers.
"""
return struct.pack(SIGNED_PREFIX,
0,
self._seqnum,
self._share_pieces['root_hash'],
self._share_pieces['salt'],
self._required_shares,
self._total_shares,
self._segment_size,
self._data_length)
def put_signature(self, signature):
"""
Add the signature to the share.
"""
self._share_pieces['signature'] = signature
return defer.succeed(None)
def put_verification_key(self, verification_key):
"""
Add the verification key to the share.
"""
self._share_pieces['verification_key'] = verification_key
return defer.succeed(None)
def get_verinfo(self):
"""
I return my verinfo tuple. This is used by the ServermapUpdater
to keep track of versions of mutable files.
The verinfo tuple for MDMF files contains:
- seqnum
- root hash
- a blank (nothing)
- segsize
- datalen
- k
- n
- prefix (the thing that you sign)
- a tuple of offsets
We include the nonce in MDMF to simplify processing of version
information tuples.
The verinfo tuple for SDMF files is the same, but contains a
16-byte IV instead of a hash of salts.
"""
return (self._seqnum,
self._share_pieces['root_hash'],
self._share_pieces['salt'],
self._segment_size,
self._data_length,
self._required_shares,
self._total_shares,
self.get_signable(),
self._get_offsets_tuple())
def _get_offsets_dict(self):
post_offset = HEADER_LENGTH
offsets = {}
verification_key_length = len(self._share_pieces['verification_key'])
o1 = offsets['signature'] = post_offset + verification_key_length
signature_length = len(self._share_pieces['signature'])
o2 = offsets['share_hash_chain'] = o1 + signature_length
share_hash_chain_length = len(self._share_pieces['share_hash_chain'])
o3 = offsets['block_hash_tree'] = o2 + share_hash_chain_length
block_hash_tree_length = len(self._share_pieces['block_hash_tree'])
o4 = offsets['share_data'] = o3 + block_hash_tree_length
share_data_length = len(self._share_pieces['sharedata'])
o5 = offsets['enc_privkey'] = o4 + share_data_length
encprivkey_length = len(self._share_pieces['encprivkey'])
offsets['EOF'] = o5 + encprivkey_length
return offsets
def _get_offsets_tuple(self):
offsets = self._get_offsets_dict()
return tuple([(key, value) for key, value in offsets.items()])
def _pack_offsets(self):
offsets = self._get_offsets_dict()
return struct.pack(">LLLLQQ",
offsets['signature'],
offsets['share_hash_chain'],
offsets['block_hash_tree'],
offsets['share_data'],
offsets['enc_privkey'],
offsets['EOF'])
def finish_publishing(self):
"""
Do anything necessary to finish writing the share to a remote
server. I require that no further publishing needs to take place
after this method has been called.
"""
for k in ["sharedata", "encprivkey", "signature", "verification_key",
"share_hash_chain", "block_hash_tree"]:
assert k in self._share_pieces, (self.shnum, k, self._share_pieces.keys())
# This is the only method that actually writes something to the
# remote server.
# First, we need to pack the share into data that we can write
# to the remote server in one write.
offsets = self._pack_offsets()
prefix = self.get_signable()
final_share = "".join([prefix,
offsets,
self._share_pieces['verification_key'],
self._share_pieces['signature'],
self._share_pieces['share_hash_chain'],
self._share_pieces['block_hash_tree'],
self._share_pieces['sharedata'],
self._share_pieces['encprivkey']])
# Our only data vector is going to be writing the final share,
# in its entirely.
datavs = [(0, final_share)]
if not self._testvs:
# Our caller has not provided us with another checkstring
# yet, so we assume that we are writing a new share, and set
# a test vector that will allow a new share to be written.
self._testvs = []
self._testvs.append(tuple([0, 1, "eq", ""]))
tw_vectors = {}
tw_vectors[self.shnum] = (self._testvs, datavs, None)
return self._rref.callRemote("slot_testv_and_readv_and_writev",
self._storage_index,
self._secrets,
tw_vectors,
# TODO is it useful to read something?
self._readvs)
MDMFHEADER = ">BQ32sBBQQ QQQQQQQQ"
MDMFHEADERWITHOUTOFFSETS = ">BQ32sBBQQ"
MDMFHEADERSIZE = struct.calcsize(MDMFHEADER)
MDMFHEADERWITHOUTOFFSETSSIZE = struct.calcsize(MDMFHEADERWITHOUTOFFSETS)
MDMFCHECKSTRING = ">BQ32s"
MDMFSIGNABLEHEADER = ">BQ32sBBQQ"
MDMFOFFSETS = ">QQQQQQQQ"
MDMFOFFSETS_LENGTH = struct.calcsize(MDMFOFFSETS)
PRIVATE_KEY_SIZE = 1220
SIGNATURE_SIZE = 260
VERIFICATION_KEY_SIZE = 292
# We know we won't have more than 256 shares, and we know that we won't need
# to store more than ln2(256) hash-chain nodes to validate, so that's our
# bound. Each node requires 2 bytes of node-number plus 32 bytes of hash.
SHARE_HASH_CHAIN_SIZE = (2+HASH_SIZE)*mathutil.log_ceil(256, 2)
class MDMFSlotWriteProxy:
implements(IMutableSlotWriter)
"""
I represent a remote write slot for an MDMF mutable file.
I abstract away from my caller the details of block and salt
management, and the implementation of the on-disk format for MDMF
shares.
"""
# Expected layout, MDMF:
# offset: size: name:
#-- signed part --
# 0 1 version number (01)
# 1 8 sequence number
# 9 32 share tree root hash
# 41 1 The "k" encoding parameter
# 42 1 The "N" encoding parameter
# 43 8 The segment size of the uploaded file
# 51 8 The data length of the original plaintext
#-- end signed part --
# 59 8 The offset of the encrypted private key
# 67 8 The offset of the share hash chain
# 75 8 The offset of the signature
# 83 8 The offset of the verification key
# 91 8 The offset of the end of the v. key.
# 99 8 The offset of the share data
# 107 8 The offset of the block hash tree
# 115 8 The offset of EOF
# 123 var encrypted private key
# var var share hash chain
# var var signature
# var var verification key
# var large share data
# var var block hash tree
#
# We order the fields that way to make smart downloaders -- downloaders
# which prempetively read a big part of the share -- possible.
#
# The checkstring is the first three fields -- the version number,
# sequence number, root hash and root salt hash. This is consistent
# in meaning to what we have with SDMF files, except now instead of
# using the literal salt, we use a value derived from all of the
# salts -- the share hash root.
#
# The salt is stored before the block for each segment. The block
# hash tree is computed over the combination of block and salt for
# each segment. In this way, we get integrity checking for both
# block and salt with the current block hash tree arrangement.
#
# The ordering of the offsets is different to reflect the dependencies
# that we'll run into with an MDMF file. The expected write flow is
# something like this:
#
# 0: Initialize with the sequence number, encoding parameters and
# data length. From this, we can deduce the number of segments,
# and where they should go.. We can also figure out where the
# encrypted private key should go, because we can figure out how
# big the share data will be.
#
# 1: Encrypt, encode, and upload the file in chunks. Do something
# like
#
# put_block(data, segnum, salt)
#
# to write a block and a salt to the disk. We can do both of
# these operations now because we have enough of the offsets to
# know where to put them.
#
# 2: Put the encrypted private key. Use:
#
# put_encprivkey(encprivkey)
#
# Now that we know the length of the private key, we can fill
# in the offset for the block hash tree.
#
# 3: We're now in a position to upload the block hash tree for
# a share. Put that using something like:
#
# put_blockhashes(block_hash_tree)
#
# Note that block_hash_tree is a list of hashes -- we'll take
# care of the details of serializing that appropriately. When
# we get the block hash tree, we are also in a position to
# calculate the offset for the share hash chain, and fill that
# into the offsets table.
#
# 4: We're now in a position to upload the share hash chain for
# a share. Do that with something like:
#
# put_sharehashes(share_hash_chain)
#
# share_hash_chain should be a dictionary mapping shnums to
# 32-byte hashes -- the wrapper handles serialization.
# We'll know where to put the signature at this point, also.
# The root of this tree will be put explicitly in the next
# step.
#
# 5: Before putting the signature, we must first put the
# root_hash. Do this with:
#
# put_root_hash(root_hash).
#
# In terms of knowing where to put this value, it was always
# possible to place it, but it makes sense semantically to
# place it after the share hash tree, so that's why you do it
# in this order.
#
# 6: With the root hash put, we can now sign the header. Use:
#
# get_signable()
#
# to get the part of the header that you want to sign, and use:
#
# put_signature(signature)
#
# to write your signature to the remote server.
#
# 6: Add the verification key, and finish. Do:
#
# put_verification_key(key)
#
# and
#
# finish_publish()
#
# Checkstring management:
#
# To write to a mutable slot, we have to provide test vectors to ensure
# that we are writing to the same data that we think we are. These
# vectors allow us to detect uncoordinated writes; that is, writes
# where both we and some other shareholder are writing to the
# mutable slot, and to report those back to the parts of the program
# doing the writing.
#
# With SDMF, this was easy -- all of the share data was written in
# one go, so it was easy to detect uncoordinated writes, and we only
# had to do it once. With MDMF, not all of the file is written at
# once.
#
# If a share is new, we write out as much of the header as we can
# before writing out anything else. This gives other writers a
# canary that they can use to detect uncoordinated writes, and, if
# they do the same thing, gives us the same canary. We them update
# the share. We won't be able to write out two fields of the header
# -- the share tree hash and the salt hash -- until we finish
# writing out the share. We only require the writer to provide the
# initial checkstring, and keep track of what it should be after
# updates ourselves.
#
# If we haven't written anything yet, then on the first write (which
# will probably be a block + salt of a share), we'll also write out
# the header. On subsequent passes, we'll expect to see the header.
# This changes in two places:
#
# - When we write out the salt hash
# - When we write out the root of the share hash tree
#
# since these values will change the header. It is possible that we
# can just make those be written in one operation to minimize
# disruption.
def __init__(self,
shnum,
rref, # a remote reference to a storage server
storage_index,
secrets, # (write_enabler, renew_secret, cancel_secret)
seqnum, # the sequence number of the mutable file
required_shares,
total_shares,
segment_size,
data_length): # the length of the original file
self.shnum = shnum
self._rref = rref
self._storage_index = storage_index
self._seqnum = seqnum
self._required_shares = required_shares
assert self.shnum >= 0 and self.shnum < total_shares
self._total_shares = total_shares
# We build up the offset table as we write things. It is the
# last thing we write to the remote server.
self._offsets = {}
self._testvs = []
# This is a list of write vectors that will be sent to our
# remote server once we are directed to write things there.
self._writevs = []
self._secrets = secrets
# The segment size needs to be a multiple of the k parameter --
# any padding should have been carried out by the publisher
# already.
assert segment_size % required_shares == 0
self._segment_size = segment_size
self._data_length = data_length
# These are set later -- we define them here so that we can
# check for their existence easily
# This is the root of the share hash tree -- the Merkle tree
# over the roots of the block hash trees computed for shares in
# this upload.
self._root_hash = None
# We haven't yet written anything to the remote bucket. By
# setting this, we tell the _write method as much. The write
# method will then know that it also needs to add a write vector
# for the checkstring (or what we have of it) to the first write
# request. We'll then record that value for future use. If
# we're expecting something to be there already, we need to call
# set_checkstring before we write anything to tell the first
# write about that.
self._written = False
# When writing data to the storage servers, we get a read vector
# for free. We'll read the checkstring, which will help us
# figure out what's gone wrong if a write fails.
self._readv = [(0, struct.calcsize(MDMFCHECKSTRING))]
# We calculate the number of segments because it tells us
# where the salt part of the file ends/share segment begins,
# and also because it provides a useful amount of bounds checking.
self._num_segments = mathutil.div_ceil(self._data_length,
self._segment_size)
self._block_size = self._segment_size / self._required_shares
# We also calculate the share size, to help us with block
# constraints later.
tail_size = self._data_length % self._segment_size
if not tail_size:
self._tail_block_size = self._block_size
else:
self._tail_block_size = mathutil.next_multiple(tail_size,
self._required_shares)
self._tail_block_size /= self._required_shares
# We already know where the sharedata starts; right after the end
# of the header (which is defined as the signable part + the offsets)
# We can also calculate where the encrypted private key begins
# from what we know know.
self._actual_block_size = self._block_size + SALT_SIZE
data_size = self._actual_block_size * (self._num_segments - 1)
data_size += self._tail_block_size
data_size += SALT_SIZE
self._offsets['enc_privkey'] = MDMFHEADERSIZE
# We don't define offsets for these because we want them to be
# tightly packed -- this allows us to ignore the responsibility
# of padding individual values, and of removing that padding
# later. So nonconstant_start is where we start writing
# nonconstant data.
nonconstant_start = self._offsets['enc_privkey']
nonconstant_start += PRIVATE_KEY_SIZE
nonconstant_start += SIGNATURE_SIZE
nonconstant_start += VERIFICATION_KEY_SIZE
nonconstant_start += SHARE_HASH_CHAIN_SIZE
self._offsets['share_data'] = nonconstant_start
# Finally, we know how big the share data will be, so we can
# figure out where the block hash tree needs to go.
# XXX: But this will go away if Zooko wants to make it so that
# you don't need to know the size of the file before you start
# uploading it.
self._offsets['block_hash_tree'] = self._offsets['share_data'] + \
data_size
# Done. We can snow start writing.
def set_checkstring(self,
seqnum_or_checkstring,
root_hash=None,
salt=None):
"""
Set checkstring checkstring for the given shnum.
This can be invoked in one of two ways.
With one argument, I assume that you are giving me a literal
checkstring -- e.g., the output of get_checkstring. I will then
set that checkstring as it is. This form is used by unit tests.
With two arguments, I assume that you are giving me a sequence
number and root hash to make a checkstring from. In that case, I
will build a checkstring and set it for you. This form is used
by the publisher.
By default, I assume that I am writing new shares to the grid.
If you don't explcitly set your own checkstring, I will use
one that requires that the remote share not exist. You will want
to use this method if you are updating a share in-place;
otherwise, writes will fail.
"""
# You're allowed to overwrite checkstrings with this method;
# I assume that users know what they are doing when they call
# it.
if root_hash:
checkstring = struct.pack(MDMFCHECKSTRING,
1,
seqnum_or_checkstring,
root_hash)
else:
checkstring = seqnum_or_checkstring
if checkstring == "":
# We special-case this, since len("") = 0, but we need
# length of 1 for the case of an empty share to work on the
# storage server, which is what a checkstring that is the
# empty string means.
self._testvs = []
else:
self._testvs = []
self._testvs.append((0, len(checkstring), "eq", checkstring))
def __repr__(self):
return "MDMFSlotWriteProxy for share %d" % self.shnum
def get_checkstring(self):
"""
Given a share number, I return a representation of what the
checkstring for that share on the server will look like.
I am mostly used for tests.
"""
if self._root_hash:
roothash = self._root_hash
else:
roothash = "\x00" * 32
return struct.pack(MDMFCHECKSTRING,
1,
self._seqnum,
roothash)
def put_block(self, data, segnum, salt):
"""
I queue a write vector for the data, salt, and segment number
provided to me. I return None, as I do not actually cause
anything to be written yet.
"""
if segnum >= self._num_segments:
raise LayoutInvalid("I won't overwrite the block hash tree")
if len(salt) != SALT_SIZE:
raise LayoutInvalid("I was given a salt of size %d, but "
"I wanted a salt of size %d")
if segnum + 1 == self._num_segments:
if len(data) != self._tail_block_size:
raise LayoutInvalid("I was given the wrong size block to write")
elif len(data) != self._block_size:
raise LayoutInvalid("I was given the wrong size block to write")
# We want to write at len(MDMFHEADER) + segnum * block_size.
offset = self._offsets['share_data'] + \
(self._actual_block_size * segnum)
data = salt + data
self._writevs.append(tuple([offset, data]))
def put_encprivkey(self, encprivkey):
"""
I queue a write vector for the encrypted private key provided to
me.
"""
assert self._offsets
assert self._offsets['enc_privkey']
# You shouldn't re-write the encprivkey after the block hash
# tree is written, since that could cause the private key to run
# into the block hash tree. Before it writes the block hash
# tree, the block hash tree writing method writes the offset of
# the share hash chain. So that's a good indicator of whether or
# not the block hash tree has been written.
if "signature" in self._offsets:
raise LayoutInvalid("You can't put the encrypted private key "
"after putting the share hash chain")
self._offsets['share_hash_chain'] = self._offsets['enc_privkey'] + \
len(encprivkey)
self._writevs.append(tuple([self._offsets['enc_privkey'], encprivkey]))
def put_blockhashes(self, blockhashes):
"""
I queue a write vector to put the block hash tree in blockhashes
onto the remote server.
The encrypted private key must be queued before the block hash
tree, since we need to know how large it is to know where the
block hash tree should go. The block hash tree must be put
before the share hash chain, since its size determines the
offset of the share hash chain.
"""
assert self._offsets
assert "block_hash_tree" in self._offsets
assert isinstance(blockhashes, list)
blockhashes_s = "".join(blockhashes)
self._offsets['EOF'] = self._offsets['block_hash_tree'] + len(blockhashes_s)
self._writevs.append(tuple([self._offsets['block_hash_tree'],
blockhashes_s]))
def put_sharehashes(self, sharehashes):
"""
I queue a write vector to put the share hash chain in my
argument onto the remote server.
The block hash tree must be queued before the share hash chain,
since we need to know where the block hash tree ends before we
can know where the share hash chain starts. The share hash chain
must be put before the signature, since the length of the packed
share hash chain determines the offset of the signature. Also,
semantically, you must know what the root of the block hash tree
is before you can generate a valid signature.
"""
assert isinstance(sharehashes, dict)
assert self._offsets
if "share_hash_chain" not in self._offsets:
raise LayoutInvalid("You must put the block hash tree before "
"putting the share hash chain")
# The signature comes after the share hash chain. If the
# signature has already been written, we must not write another
# share hash chain. The signature writes the verification key
# offset when it gets sent to the remote server, so we look for
# that.
if "verification_key" in self._offsets:
raise LayoutInvalid("You must write the share hash chain "
"before you write the signature")
sharehashes_s = "".join([struct.pack(">H32s", i, sharehashes[i])
for i in sorted(sharehashes.keys())])
self._offsets['signature'] = self._offsets['share_hash_chain'] + \
len(sharehashes_s)
self._writevs.append(tuple([self._offsets['share_hash_chain'],
sharehashes_s]))
def put_root_hash(self, roothash):
"""
Put the root hash (the root of the share hash tree) in the
remote slot.
"""
# It does not make sense to be able to put the root
# hash without first putting the share hashes, since you need
# the share hashes to generate the root hash.
#
# Signature is defined by the routine that places the share hash
# chain, so it's a good thing to look for in finding out whether
# or not the share hash chain exists on the remote server.
if len(roothash) != HASH_SIZE:
raise LayoutInvalid("hashes and salts must be exactly %d bytes"
% HASH_SIZE)
self._root_hash = roothash
# To write both of these values, we update the checkstring on
# the remote server, which includes them
checkstring = self.get_checkstring()
self._writevs.append(tuple([0, checkstring]))
# This write, if successful, changes the checkstring, so we need
# to update our internal checkstring to be consistent with the
# one on the server.
def get_signable(self):
"""
Get the first seven fields of the mutable file; the parts that
are signed.
"""
if not self._root_hash:
raise LayoutInvalid("You need to set the root hash "
"before getting something to "
"sign")
return struct.pack(MDMFSIGNABLEHEADER,
1,
self._seqnum,
self._root_hash,
self._required_shares,
self._total_shares,
self._segment_size,
self._data_length)
def put_signature(self, signature):
"""
I queue a write vector for the signature of the MDMF share.
I require that the root hash and share hash chain have been put
to the grid before I will write the signature to the grid.
"""
if "signature" not in self._offsets:
raise LayoutInvalid("You must put the share hash chain "
# It does not make sense to put a signature without first
# putting the root hash and the salt hash (since otherwise
# the signature would be incomplete), so we don't allow that.
"before putting the signature")
if not self._root_hash:
raise LayoutInvalid("You must complete the signed prefix "
"before computing a signature")
# If we put the signature after we put the verification key, we
# could end up running into the verification key, and will
# probably screw up the offsets as well. So we don't allow that.
if "verification_key_end" in self._offsets:
raise LayoutInvalid("You can't put the signature after the "
"verification key")
# The method that writes the verification key defines the EOF
# offset before writing the verification key, so look for that.
self._offsets['verification_key'] = self._offsets['signature'] +\
len(signature)
self._writevs.append(tuple([self._offsets['signature'], signature]))
def put_verification_key(self, verification_key):
"""
I queue a write vector for the verification key.
I require that the signature have been written to the storage
server before I allow the verification key to be written to the
remote server.
"""
if "verification_key" not in self._offsets:
raise LayoutInvalid("You must put the signature before you "
"can put the verification key")
self._offsets['verification_key_end'] = \
self._offsets['verification_key'] + len(verification_key)
assert self._offsets['verification_key_end'] <= self._offsets['share_data']
self._writevs.append(tuple([self._offsets['verification_key'],
verification_key]))
def _get_offsets_tuple(self):
return tuple([(key, value) for key, value in self._offsets.items()])
def get_verinfo(self):
return (self._seqnum,
self._root_hash,
None,
self._segment_size,
self._data_length,
self._required_shares,
self._total_shares,
self.get_signable(),
self._get_offsets_tuple())
def finish_publishing(self):
"""
I add a write vector for the offsets table, and then cause all
of the write vectors that I've dealt with so far to be published
to the remote server, ending the write process.
"""
if "verification_key_end" not in self._offsets:
raise LayoutInvalid("You must put the verification key before "
"you can publish the offsets")
offsets_offset = struct.calcsize(MDMFHEADERWITHOUTOFFSETS)
offsets = struct.pack(MDMFOFFSETS,
self._offsets['enc_privkey'],
self._offsets['share_hash_chain'],
self._offsets['signature'],
self._offsets['verification_key'],
self._offsets['verification_key_end'],
self._offsets['share_data'],
self._offsets['block_hash_tree'],
self._offsets['EOF'])
self._writevs.append(tuple([offsets_offset, offsets]))
encoding_parameters_offset = struct.calcsize(MDMFCHECKSTRING)
params = struct.pack(">BBQQ",
self._required_shares,
self._total_shares,
self._segment_size,
self._data_length)
self._writevs.append(tuple([encoding_parameters_offset, params]))
return self._write(self._writevs)
def _write(self, datavs, on_failure=None, on_success=None):
"""I write the data vectors in datavs to the remote slot."""
tw_vectors = {}
if not self._testvs:
self._testvs = []
self._testvs.append(tuple([0, 1, "eq", ""]))
if not self._written:
# Write a new checkstring to the share when we write it, so
# that we have something to check later.
new_checkstring = self.get_checkstring()
datavs.append((0, new_checkstring))
def _first_write():
self._written = True
self._testvs = [(0, len(new_checkstring), "eq", new_checkstring)]
on_success = _first_write
tw_vectors[self.shnum] = (self._testvs, datavs, None)
d = self._rref.callRemote("slot_testv_and_readv_and_writev",
self._storage_index,
self._secrets,
tw_vectors,
self._readv)
def _result(results):
if isinstance(results, failure.Failure) or not results[0]:
# Do nothing; the write was unsuccessful.
if on_failure: on_failure()
else:
if on_success: on_success()
return results
d.addCallback(_result)
return d
def _handle_bad_struct(f):
# struct.unpack errors mean the server didn't give us enough data, so
# this share is bad
f.trap(struct.error)
raise BadShareError(f.value.args[0])
class MDMFSlotReadProxy:
"""
I read from a mutable slot filled with data written in the MDMF data
format (which is described above).
I can be initialized with some amount of data, which I will use (if
it is valid) to eliminate some of the need to fetch it from servers.
"""
def __init__(self,
rref,
storage_index,
shnum,
data=""):
# Start the initialization process.
self._rref = rref
self._storage_index = storage_index
self.shnum = shnum
# Before doing anything, the reader is probably going to want to
# verify that the signature is correct. To do that, they'll need
# the verification key, and the signature. To get those, we'll
# need the offset table. So fetch the offset table on the
# assumption that that will be the first thing that a reader is
# going to do.
# The fact that these encoding parameters are None tells us
# that we haven't yet fetched them from the remote share, so we
# should. We could just not set them, but the checks will be
# easier to read if we don't have to use hasattr.
self._version_number = None
self._sequence_number = None
self._root_hash = None
# Filled in if we're dealing with an SDMF file. Unused
# otherwise.
self._salt = None
self._required_shares = None
self._total_shares = None
self._segment_size = None
self._data_length = None
self._offsets = None
# If the user has chosen to initialize us with some data, we'll
# try to satisfy subsequent data requests with that data before
# asking the storage server for it. If
self._data = data
# The way callers interact with cache in the filenode returns
# None if there isn't any cached data, but the way we index the
# cached data requires a string, so convert None to "".
if self._data == None:
self._data = ""
def _maybe_fetch_offsets_and_header(self, force_remote=False):
"""
I fetch the offset table and the header from the remote slot if
I don't already have them. If I do have them, I do nothing and
return an empty Deferred.
"""
if self._offsets:
return defer.succeed(None)
# At this point, we may be either SDMF or MDMF. Fetching 107
# bytes will be enough to get header and offsets for both SDMF and
# MDMF, though we'll be left with 4 more bytes than we
# need if this ends up being MDMF. This is probably less
# expensive than the cost of a second roundtrip.
readvs = [(0, 123)]
d = self._read(readvs, force_remote)
d.addCallback(self._process_encoding_parameters)
d.addCallback(self._process_offsets)
d.addErrback(_handle_bad_struct)
return d
def _process_encoding_parameters(self, encoding_parameters):
if self.shnum not in encoding_parameters:
raise BadShareError("no data for shnum %d" % self.shnum)
encoding_parameters = encoding_parameters[self.shnum][0]
# The first byte is the version number. It will tell us what
# to do next.
(verno,) = struct.unpack(">B", encoding_parameters[:1])
if verno == MDMF_VERSION:
read_size = MDMFHEADERWITHOUTOFFSETSSIZE
(verno,
seqnum,
root_hash,
k,
n,
segsize,
datalen) = struct.unpack(MDMFHEADERWITHOUTOFFSETS,
encoding_parameters[:read_size])
if segsize == 0 and datalen == 0:
# Empty file, no segments.
self._num_segments = 0
else:
self._num_segments = mathutil.div_ceil(datalen, segsize)
elif verno == SDMF_VERSION:
read_size = SIGNED_PREFIX_LENGTH
(verno,
seqnum,
root_hash,
salt,
k,
n,
segsize,
datalen) = struct.unpack(">BQ32s16s BBQQ",
encoding_parameters[:SIGNED_PREFIX_LENGTH])
self._salt = salt
if segsize == 0 and datalen == 0:
# empty file
self._num_segments = 0
else:
# non-empty SDMF files have one segment.
self._num_segments = 1
else:
raise UnknownVersionError("You asked me to read mutable file "
"version %d, but I only understand "
"%d and %d" % (verno, SDMF_VERSION,
MDMF_VERSION))
self._version_number = verno
self._sequence_number = seqnum
self._root_hash = root_hash
self._required_shares = k
self._total_shares = n
self._segment_size = segsize
self._data_length = datalen
self._block_size = self._segment_size / self._required_shares
# We can upload empty files, and need to account for this fact
# so as to avoid zero-division and zero-modulo errors.
if datalen > 0:
tail_size = self._data_length % self._segment_size
else:
tail_size = 0
if not tail_size:
self._tail_block_size = self._block_size
else:
self._tail_block_size = mathutil.next_multiple(tail_size,
self._required_shares)
self._tail_block_size /= self._required_shares
return encoding_parameters
def _process_offsets(self, offsets):
if self._version_number == 0:
read_size = OFFSETS_LENGTH
read_offset = SIGNED_PREFIX_LENGTH
end = read_size + read_offset
(signature,
share_hash_chain,
block_hash_tree,
share_data,
enc_privkey,
EOF) = struct.unpack(">LLLLQQ",
offsets[read_offset:end])
self._offsets = {}
self._offsets['signature'] = signature
self._offsets['share_data'] = share_data
self._offsets['block_hash_tree'] = block_hash_tree
self._offsets['share_hash_chain'] = share_hash_chain
self._offsets['enc_privkey'] = enc_privkey
self._offsets['EOF'] = EOF
elif self._version_number == 1:
read_offset = MDMFHEADERWITHOUTOFFSETSSIZE
read_length = MDMFOFFSETS_LENGTH
end = read_offset + read_length
(encprivkey,
sharehashes,
signature,
verification_key,
verification_key_end,
sharedata,
blockhashes,
eof) = struct.unpack(MDMFOFFSETS,
offsets[read_offset:end])
self._offsets = {}
self._offsets['enc_privkey'] = encprivkey
self._offsets['block_hash_tree'] = blockhashes
self._offsets['share_hash_chain'] = sharehashes
self._offsets['signature'] = signature
self._offsets['verification_key'] = verification_key
self._offsets['verification_key_end']= \
verification_key_end
self._offsets['EOF'] = eof
self._offsets['share_data'] = sharedata
def get_block_and_salt(self, segnum):
"""
I return (block, salt), where block is the block data and
salt is the salt used to encrypt that segment.
"""
d = self._maybe_fetch_offsets_and_header()
def _then(ignored):
base_share_offset = self._offsets['share_data']
if segnum + 1 > self._num_segments:
raise LayoutInvalid("Not a valid segment number")
if self._version_number == 0:
share_offset = base_share_offset + self._block_size * segnum
else:
share_offset = base_share_offset + (self._block_size + \
SALT_SIZE) * segnum
if segnum + 1 == self._num_segments:
data = self._tail_block_size
else:
data = self._block_size
if self._version_number == 1:
data += SALT_SIZE
readvs = [(share_offset, data)]
return readvs
d.addCallback(_then)
d.addCallback(lambda readvs: self._read(readvs))
def _process_results(results):
if self.shnum not in results:
raise BadShareError("no data for shnum %d" % self.shnum)
if self._version_number == 0:
# We only read the share data, but we know the salt from
# when we fetched the header
data = results[self.shnum]
if not data:
data = ""
else:
if len(data) != 1:
raise BadShareError("got %d vectors, not 1" % len(data))
data = data[0]
salt = self._salt
else:
data = results[self.shnum]
if not data:
salt = data = ""
else:
salt_and_data = results[self.shnum][0]
salt = salt_and_data[:SALT_SIZE]
data = salt_and_data[SALT_SIZE:]
return data, salt
d.addCallback(_process_results)
return d
def get_blockhashes(self, needed=None, force_remote=False):
"""
I return the block hash tree
I take an optional argument, needed, which is a set of indices
correspond to hashes that I should fetch. If this argument is
missing, I will fetch the entire block hash tree; otherwise, I
may attempt to fetch fewer hashes, based on what needed says
that I should do. Note that I may fetch as many hashes as I
want, so long as the set of hashes that I do fetch is a superset
of the ones that I am asked for, so callers should be prepared
to tolerate additional hashes.
"""
# TODO: Return only the parts of the block hash tree necessary
# to validate the blocknum provided?
# This is a good idea, but it is hard to implement correctly. It
# is bad to fetch any one block hash more than once, so we
# probably just want to fetch the whole thing at once and then
# serve it.
if needed == set([]):
return defer.succeed([])
d = self._maybe_fetch_offsets_and_header()
def _then(ignored):
blockhashes_offset = self._offsets['block_hash_tree']
if self._version_number == 1:
blockhashes_length = self._offsets['EOF'] - blockhashes_offset
else:
blockhashes_length = self._offsets['share_data'] - blockhashes_offset
readvs = [(blockhashes_offset, blockhashes_length)]
return readvs
d.addCallback(_then)
d.addCallback(lambda readvs:
self._read(readvs, force_remote=force_remote))
def _build_block_hash_tree(results):
if self.shnum not in results:
raise BadShareError("no data for shnum %d" % self.shnum)
rawhashes = results[self.shnum][0]
results = [rawhashes[i:i+HASH_SIZE]
for i in range(0, len(rawhashes), HASH_SIZE)]
return results
d.addCallback(_build_block_hash_tree)
return d
def get_sharehashes(self, needed=None, force_remote=False):
"""
I return the part of the share hash chain placed to validate
this share.
I take an optional argument, needed. Needed is a set of indices
that correspond to the hashes that I should fetch. If needed is
not present, I will fetch and return the entire share hash
chain. Otherwise, I may fetch and return any part of the share
hash chain that is a superset of the part that I am asked to
fetch. Callers should be prepared to deal with more hashes than
they've asked for.
"""
if needed == set([]):
return defer.succeed([])
d = self._maybe_fetch_offsets_and_header()
def _make_readvs(ignored):
sharehashes_offset = self._offsets['share_hash_chain']
if self._version_number == 0:
sharehashes_length = self._offsets['block_hash_tree'] - sharehashes_offset
else:
sharehashes_length = self._offsets['signature'] - sharehashes_offset
readvs = [(sharehashes_offset, sharehashes_length)]
return readvs
d.addCallback(_make_readvs)
d.addCallback(lambda readvs:
self._read(readvs, force_remote=force_remote))
def _build_share_hash_chain(results):
if self.shnum not in results:
raise BadShareError("no data for shnum %d" % self.shnum)
sharehashes = results[self.shnum][0]
results = [sharehashes[i:i+(HASH_SIZE + 2)]
for i in range(0, len(sharehashes), HASH_SIZE + 2)]
results = dict([struct.unpack(">H32s", data)
for data in results])
return results
d.addCallback(_build_share_hash_chain)
d.addErrback(_handle_bad_struct)
return d
def get_encprivkey(self):
"""
I return the encrypted private key.
"""
d = self._maybe_fetch_offsets_and_header()
def _make_readvs(ignored):
privkey_offset = self._offsets['enc_privkey']
if self._version_number == 0:
privkey_length = self._offsets['EOF'] - privkey_offset
else:
privkey_length = self._offsets['share_hash_chain'] - privkey_offset
readvs = [(privkey_offset, privkey_length)]
return readvs
d.addCallback(_make_readvs)
d.addCallback(lambda readvs: self._read(readvs))
def _process_results(results):
if self.shnum not in results:
raise BadShareError("no data for shnum %d" % self.shnum)
privkey = results[self.shnum][0]
return privkey
d.addCallback(_process_results)
return d
def get_signature(self):
"""
I return the signature of my share.
"""
d = self._maybe_fetch_offsets_and_header()
def _make_readvs(ignored):
signature_offset = self._offsets['signature']
if self._version_number == 1:
signature_length = self._offsets['verification_key'] - signature_offset
else:
signature_length = self._offsets['share_hash_chain'] - signature_offset
readvs = [(signature_offset, signature_length)]
return readvs
d.addCallback(_make_readvs)
d.addCallback(lambda readvs: self._read(readvs))
def _process_results(results):
if self.shnum not in results:
raise BadShareError("no data for shnum %d" % self.shnum)
signature = results[self.shnum][0]
return signature
d.addCallback(_process_results)
return d
def get_verification_key(self):
"""
I return the verification key.
"""
d = self._maybe_fetch_offsets_and_header()
def _make_readvs(ignored):
if self._version_number == 1:
vk_offset = self._offsets['verification_key']
vk_length = self._offsets['verification_key_end'] - vk_offset
else:
vk_offset = struct.calcsize(">BQ32s16sBBQQLLLLQQ")
vk_length = self._offsets['signature'] - vk_offset
readvs = [(vk_offset, vk_length)]
return readvs
d.addCallback(_make_readvs)
d.addCallback(lambda readvs: self._read(readvs))
def _process_results(results):
if self.shnum not in results:
raise BadShareError("no data for shnum %d" % self.shnum)
verification_key = results[self.shnum][0]
return verification_key
d.addCallback(_process_results)
return d
def get_encoding_parameters(self):
"""
I return (k, n, segsize, datalen)
"""
d = self._maybe_fetch_offsets_and_header()
d.addCallback(lambda ignored:
(self._required_shares,
self._total_shares,
self._segment_size,
self._data_length))
return d
def get_seqnum(self):
"""
I return the sequence number for this share.
"""
d = self._maybe_fetch_offsets_and_header()
d.addCallback(lambda ignored:
self._sequence_number)
return d
def get_root_hash(self):
"""
I return the root of the block hash tree
"""
d = self._maybe_fetch_offsets_and_header()
d.addCallback(lambda ignored: self._root_hash)
return d
def get_checkstring(self):
"""
I return the packed representation of the following:
- version number
- sequence number
- root hash
- salt hash
which my users use as a checkstring to detect other writers.
"""
d = self._maybe_fetch_offsets_and_header()
def _build_checkstring(ignored):
if self._salt:
checkstring = struct.pack(PREFIX,
self._version_number,
self._sequence_number,
self._root_hash,
self._salt)
else:
checkstring = struct.pack(MDMFCHECKSTRING,
self._version_number,
self._sequence_number,
self._root_hash)
return checkstring
d.addCallback(_build_checkstring)
return d
def get_prefix(self, force_remote):
d = self._maybe_fetch_offsets_and_header(force_remote)
d.addCallback(lambda ignored:
self._build_prefix())
return d
def _build_prefix(self):
# The prefix is another name for the part of the remote share
# that gets signed. It consists of everything up to and
# including the datalength, packed by struct.
if self._version_number == SDMF_VERSION:
return struct.pack(SIGNED_PREFIX,
self._version_number,
self._sequence_number,
self._root_hash,
self._salt,
self._required_shares,
self._total_shares,
self._segment_size,
self._data_length)
else:
return struct.pack(MDMFSIGNABLEHEADER,
self._version_number,
self._sequence_number,
self._root_hash,
self._required_shares,
self._total_shares,
self._segment_size,
self._data_length)
def _get_offsets_tuple(self):
# The offsets tuple is another component of the version
# information tuple. It is basically our offsets dictionary,
# itemized and in a tuple.
return self._offsets.copy()
def get_verinfo(self):
"""
I return my verinfo tuple. This is used by the ServermapUpdater
to keep track of versions of mutable files.
The verinfo tuple for MDMF files contains:
- seqnum
- root hash
- a blank (nothing)
- segsize
- datalen
- k
- n
- prefix (the thing that you sign)
- a tuple of offsets
We include the nonce in MDMF to simplify processing of version
information tuples.
The verinfo tuple for SDMF files is the same, but contains a
16-byte IV instead of a hash of salts.
"""
d = self._maybe_fetch_offsets_and_header()
def _build_verinfo(ignored):
if self._version_number == SDMF_VERSION:
salt_to_use = self._salt
else:
salt_to_use = None
return (self._sequence_number,
self._root_hash,
salt_to_use,
self._segment_size,
self._data_length,
self._required_shares,
self._total_shares,
self._build_prefix(),
self._get_offsets_tuple())
d.addCallback(_build_verinfo)
return d
def _read(self, readvs, force_remote=False):
unsatisfiable = filter(lambda x: x[0] + x[1] > len(self._data), readvs)
# TODO: It's entirely possible to tweak this so that it just
# fulfills the requests that it can, and not demand that all
# requests are satisfiable before running it.
if not unsatisfiable and not force_remote:
results = [self._data[offset:offset+length]
for (offset, length) in readvs]
results = {self.shnum: results}
return defer.succeed(results)
else:
return self._rref.callRemote("slot_readv",
self._storage_index,
[self.shnum],
readvs)
def is_sdmf(self):
"""I tell my caller whether or not my remote file is SDMF or MDMF
"""
d = self._maybe_fetch_offsets_and_header()
d.addCallback(lambda ignored:
self._version_number == 0)
return d
class LayoutInvalid(BadShareError):
"""
This isn't a valid MDMF mutable file
"""
|