/usr/share/go-1.6/src/runtime/mbitmap.go is in golang-1.6-src 1.6.1-0ubuntu1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 | // Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Garbage collector: type and heap bitmaps.
//
// Stack, data, and bss bitmaps
//
// Stack frames and global variables in the data and bss sections are described
// by 1-bit bitmaps in which 0 means uninteresting and 1 means live pointer
// to be visited during GC. The bits in each byte are consumed starting with
// the low bit: 1<<0, 1<<1, and so on.
//
// Heap bitmap
//
// The allocated heap comes from a subset of the memory in the range [start, used),
// where start == mheap_.arena_start and used == mheap_.arena_used.
// The heap bitmap comprises 2 bits for each pointer-sized word in that range,
// stored in bytes indexed backward in memory from start.
// That is, the byte at address start-1 holds the 2-bit entries for the four words
// start through start+3*ptrSize, the byte at start-2 holds the entries for
// start+4*ptrSize through start+7*ptrSize, and so on.
//
// In each 2-bit entry, the lower bit holds the same information as in the 1-bit
// bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC.
// The meaning of the high bit depends on the position of the word being described
// in its allocated object. In the first word, the high bit is the GC ``marked'' bit.
// In the second word, the high bit is the GC ``checkmarked'' bit (see below).
// In the third and later words, the high bit indicates that the object is still
// being described. In these words, if a bit pair with a high bit 0 is encountered,
// the low bit can also be assumed to be 0, and the object description is over.
// This 00 is called the ``dead'' encoding: it signals that the rest of the words
// in the object are uninteresting to the garbage collector.
//
// The 2-bit entries are split when written into the byte, so that the top half
// of the byte contains 4 mark bits and the bottom half contains 4 pointer bits.
// This form allows a copy from the 1-bit to the 4-bit form to keep the
// pointer bits contiguous, instead of having to space them out.
//
// The code makes use of the fact that the zero value for a heap bitmap
// has no live pointer bit set and is (depending on position), not marked,
// not checkmarked, and is the dead encoding.
// These properties must be preserved when modifying the encoding.
//
// Checkmarks
//
// In a concurrent garbage collector, one worries about failing to mark
// a live object due to mutations without write barriers or bugs in the
// collector implementation. As a sanity check, the GC has a 'checkmark'
// mode that retraverses the object graph with the world stopped, to make
// sure that everything that should be marked is marked.
// In checkmark mode, in the heap bitmap, the high bit of the 2-bit entry
// for the second word of the object holds the checkmark bit.
// When not in checkmark mode, this bit is set to 1.
//
// The smallest possible allocation is 8 bytes. On a 32-bit machine, that
// means every allocated object has two words, so there is room for the
// checkmark bit. On a 64-bit machine, however, the 8-byte allocation is
// just one word, so the second bit pair is not available for encoding the
// checkmark. However, because non-pointer allocations are combined
// into larger 16-byte (maxTinySize) allocations, a plain 8-byte allocation
// must be a pointer, so the type bit in the first word is not actually needed.
// It is still used in general, except in checkmark the type bit is repurposed
// as the checkmark bit and then reinitialized (to 1) as the type bit when
// finished.
package runtime
import (
"runtime/internal/atomic"
"runtime/internal/sys"
"unsafe"
)
const (
bitPointer = 1 << 0
bitMarked = 1 << 4
heapBitsShift = 1 // shift offset between successive bitPointer or bitMarked entries
heapBitmapScale = sys.PtrSize * (8 / 2) // number of data bytes described by one heap bitmap byte
// all mark/pointer bits in a byte
bitMarkedAll = bitMarked | bitMarked<<heapBitsShift | bitMarked<<(2*heapBitsShift) | bitMarked<<(3*heapBitsShift)
bitPointerAll = bitPointer | bitPointer<<heapBitsShift | bitPointer<<(2*heapBitsShift) | bitPointer<<(3*heapBitsShift)
)
// addb returns the byte pointer p+n.
//go:nowritebarrier
func addb(p *byte, n uintptr) *byte {
// Note: wrote out full expression instead of calling add(p, n)
// to reduce the number of temporaries generated by the
// compiler for this trivial expression during inlining.
return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + n))
}
// subtractb returns the byte pointer p-n.
//go:nowritebarrier
func subtractb(p *byte, n uintptr) *byte {
// Note: wrote out full expression instead of calling add(p, -n)
// to reduce the number of temporaries generated by the
// compiler for this trivial expression during inlining.
return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - n))
}
// add1 returns the byte pointer p+1.
//go:nowritebarrier
func add1(p *byte) *byte {
// Note: wrote out full expression instead of calling addb(p, 1)
// to reduce the number of temporaries generated by the
// compiler for this trivial expression during inlining.
return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + 1))
}
// subtract1 returns the byte pointer p-1.
//go:nowritebarrier
//
// nosplit because it is used during write barriers and must not be preempted.
//go:nosplit
func subtract1(p *byte) *byte {
// Note: wrote out full expression instead of calling subtractb(p, 1)
// to reduce the number of temporaries generated by the
// compiler for this trivial expression during inlining.
return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1))
}
// mHeap_MapBits is called each time arena_used is extended.
// It maps any additional bitmap memory needed for the new arena memory.
// It must be called with the expected new value of arena_used,
// *before* h.arena_used has been updated.
// Waiting to update arena_used until after the memory has been mapped
// avoids faults when other threads try access the bitmap immediately
// after observing the change to arena_used.
//
//go:nowritebarrier
func (h *mheap) mapBits(arena_used uintptr) {
// Caller has added extra mappings to the arena.
// Add extra mappings of bitmap words as needed.
// We allocate extra bitmap pieces in chunks of bitmapChunk.
const bitmapChunk = 8192
n := (arena_used - mheap_.arena_start) / heapBitmapScale
n = round(n, bitmapChunk)
n = round(n, sys.PhysPageSize)
if h.bitmap_mapped >= n {
return
}
sysMap(unsafe.Pointer(h.arena_start-n), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys)
h.bitmap_mapped = n
}
// heapBits provides access to the bitmap bits for a single heap word.
// The methods on heapBits take value receivers so that the compiler
// can more easily inline calls to those methods and registerize the
// struct fields independently.
type heapBits struct {
bitp *uint8
shift uint32
}
// heapBitsForAddr returns the heapBits for the address addr.
// The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used).
//
// nosplit because it is used during write barriers and must not be preempted.
//go:nosplit
func heapBitsForAddr(addr uintptr) heapBits {
// 2 bits per work, 4 pairs per byte, and a mask is hard coded.
off := (addr - mheap_.arena_start) / sys.PtrSize
return heapBits{(*uint8)(unsafe.Pointer(mheap_.arena_start - off/4 - 1)), uint32(off & 3)}
}
// heapBitsForSpan returns the heapBits for the span base address base.
func heapBitsForSpan(base uintptr) (hbits heapBits) {
if base < mheap_.arena_start || base >= mheap_.arena_used {
throw("heapBitsForSpan: base out of range")
}
hbits = heapBitsForAddr(base)
if hbits.shift != 0 {
throw("heapBitsForSpan: unaligned start")
}
return hbits
}
// heapBitsForObject returns the base address for the heap object
// containing the address p, along with the heapBits for base.
// If p does not point into a heap object,
// return base == 0
// otherwise return the base of the object.
//
// refBase and refOff optionally give the base address of the object
// in which the pointer p was found and the byte offset at which it
// was found. These are used for error reporting.
func heapBitsForObject(p, refBase, refOff uintptr) (base uintptr, hbits heapBits, s *mspan) {
arenaStart := mheap_.arena_start
if p < arenaStart || p >= mheap_.arena_used {
return
}
off := p - arenaStart
idx := off >> _PageShift
// p points into the heap, but possibly to the middle of an object.
// Consult the span table to find the block beginning.
k := p >> _PageShift
s = h_spans[idx]
if s == nil || pageID(k) < s.start || p >= s.limit || s.state != mSpanInUse {
if s == nil || s.state == _MSpanStack {
// If s is nil, the virtual address has never been part of the heap.
// This pointer may be to some mmap'd region, so we allow it.
// Pointers into stacks are also ok, the runtime manages these explicitly.
return
}
// The following ensures that we are rigorous about what data
// structures hold valid pointers.
if debug.invalidptr != 0 {
// Typically this indicates an incorrect use
// of unsafe or cgo to store a bad pointer in
// the Go heap. It may also indicate a runtime
// bug.
//
// TODO(austin): We could be more aggressive
// and detect pointers to unallocated objects
// in allocated spans.
printlock()
print("runtime: pointer ", hex(p))
if s.state != mSpanInUse {
print(" to unallocated span")
} else {
print(" to unused region of span")
}
print("idx=", hex(idx), " span.start=", hex(s.start<<_PageShift), " span.limit=", hex(s.limit), " span.state=", s.state, "\n")
if refBase != 0 {
print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n")
gcDumpObject("object", refBase, refOff)
}
throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)")
}
return
}
// If this span holds object of a power of 2 size, just mask off the bits to
// the interior of the object. Otherwise use the size to get the base.
if s.baseMask != 0 {
// optimize for power of 2 sized objects.
base = s.base()
base = base + (p-base)&s.baseMask
// base = p & s.baseMask is faster for small spans,
// but doesn't work for large spans.
// Overall, it's faster to use the more general computation above.
} else {
base = s.base()
if p-base >= s.elemsize {
// n := (p - base) / s.elemsize, using division by multiplication
n := uintptr(uint64(p-base) >> s.divShift * uint64(s.divMul) >> s.divShift2)
base += n * s.elemsize
}
}
// Now that we know the actual base, compute heapBits to return to caller.
hbits = heapBitsForAddr(base)
return
}
// prefetch the bits.
func (h heapBits) prefetch() {
prefetchnta(uintptr(unsafe.Pointer((h.bitp))))
}
// next returns the heapBits describing the next pointer-sized word in memory.
// That is, if h describes address p, h.next() describes p+ptrSize.
// Note that next does not modify h. The caller must record the result.
//
// nosplit because it is used during write barriers and must not be preempted.
//go:nosplit
func (h heapBits) next() heapBits {
if h.shift < 3*heapBitsShift {
return heapBits{h.bitp, h.shift + heapBitsShift}
}
return heapBits{subtract1(h.bitp), 0}
}
// forward returns the heapBits describing n pointer-sized words ahead of h in memory.
// That is, if h describes address p, h.forward(n) describes p+n*ptrSize.
// h.forward(1) is equivalent to h.next(), just slower.
// Note that forward does not modify h. The caller must record the result.
// bits returns the heap bits for the current word.
func (h heapBits) forward(n uintptr) heapBits {
n += uintptr(h.shift) / heapBitsShift
return heapBits{subtractb(h.bitp, n/4), uint32(n%4) * heapBitsShift}
}
// The caller can test isMarked and isPointer by &-ing with bitMarked and bitPointer.
// The result includes in its higher bits the bits for subsequent words
// described by the same bitmap byte.
func (h heapBits) bits() uint32 {
return uint32(*h.bitp) >> h.shift
}
// isMarked reports whether the heap bits have the marked bit set.
// h must describe the initial word of the object.
func (h heapBits) isMarked() bool {
return *h.bitp&(bitMarked<<h.shift) != 0
}
// setMarked sets the marked bit in the heap bits, atomically.
// h must describe the initial word of the object.
func (h heapBits) setMarked() {
// Each byte of GC bitmap holds info for four words.
// Might be racing with other updates, so use atomic update always.
// We used to be clever here and use a non-atomic update in certain
// cases, but it's not worth the risk.
atomic.Or8(h.bitp, bitMarked<<h.shift)
}
// setMarkedNonAtomic sets the marked bit in the heap bits, non-atomically.
// h must describe the initial word of the object.
func (h heapBits) setMarkedNonAtomic() {
*h.bitp |= bitMarked << h.shift
}
// isPointer reports whether the heap bits describe a pointer word.
// h must describe the initial word of the object.
//
// nosplit because it is used during write barriers and must not be preempted.
//go:nosplit
func (h heapBits) isPointer() bool {
return (*h.bitp>>h.shift)&bitPointer != 0
}
// hasPointers reports whether the given object has any pointers.
// It must be told how large the object at h is, so that it does not read too
// far into the bitmap.
// h must describe the initial word of the object.
func (h heapBits) hasPointers(size uintptr) bool {
if size == sys.PtrSize { // 1-word objects are always pointers
return true
}
// Otherwise, at least a 2-word object, and at least 2-word aligned,
// so h.shift is either 0 or 2, so we know we can get the bits for the
// first two words out of *h.bitp.
// If either of the first two words is a pointer, not pointer free.
b := uint32(*h.bitp >> h.shift)
if b&(bitPointer|bitPointer<<heapBitsShift) != 0 {
return true
}
if size == 2*sys.PtrSize {
return false
}
// At least a 4-word object. Check scan bit (aka marked bit) in third word.
if h.shift == 0 {
return b&(bitMarked<<(2*heapBitsShift)) != 0
}
return uint32(*subtract1(h.bitp))&bitMarked != 0
}
// isCheckmarked reports whether the heap bits have the checkmarked bit set.
// It must be told how large the object at h is, because the encoding of the
// checkmark bit varies by size.
// h must describe the initial word of the object.
func (h heapBits) isCheckmarked(size uintptr) bool {
if size == sys.PtrSize {
return (*h.bitp>>h.shift)&bitPointer != 0
}
// All multiword objects are 2-word aligned,
// so we know that the initial word's 2-bit pair
// and the second word's 2-bit pair are in the
// same heap bitmap byte, *h.bitp.
return (*h.bitp>>(heapBitsShift+h.shift))&bitMarked != 0
}
// setCheckmarked sets the checkmarked bit.
// It must be told how large the object at h is, because the encoding of the
// checkmark bit varies by size.
// h must describe the initial word of the object.
func (h heapBits) setCheckmarked(size uintptr) {
if size == sys.PtrSize {
atomic.Or8(h.bitp, bitPointer<<h.shift)
return
}
atomic.Or8(h.bitp, bitMarked<<(heapBitsShift+h.shift))
}
// heapBitsBulkBarrier executes writebarrierptr_nostore
// for every pointer slot in the memory range [p, p+size),
// using the heap bitmap to locate those pointer slots.
// This executes the write barriers necessary after a memmove.
// Both p and size must be pointer-aligned.
// The range [p, p+size) must lie within a single allocation.
//
// Callers should call heapBitsBulkBarrier immediately after
// calling memmove(p, src, size). This function is marked nosplit
// to avoid being preempted; the GC must not stop the goroutine
// between the memmove and the execution of the barriers.
//
// The heap bitmap is not maintained for allocations containing
// no pointers at all; any caller of heapBitsBulkBarrier must first
// make sure the underlying allocation contains pointers, usually
// by checking typ.kind&kindNoPointers.
//
//go:nosplit
func heapBitsBulkBarrier(p, size uintptr) {
if (p|size)&(sys.PtrSize-1) != 0 {
throw("heapBitsBulkBarrier: unaligned arguments")
}
if !writeBarrier.needed {
return
}
if !inheap(p) {
// If p is on the stack and in a higher frame than the
// caller, we either need to execute write barriers on
// it (which is what happens for normal stack writes
// through pointers to higher frames), or we need to
// force the mark termination stack scan to scan the
// frame containing p.
//
// Executing write barriers on p is complicated in the
// general case because we either need to unwind the
// stack to get the stack map, or we need the type's
// bitmap, which may be a GC program.
//
// Hence, we opt for forcing the re-scan to scan the
// frame containing p, which we can do by simply
// unwinding the stack barriers between the current SP
// and p's frame.
gp := getg().m.curg
if gp != nil && gp.stack.lo <= p && p < gp.stack.hi {
// Run on the system stack to give it more
// stack space.
systemstack(func() {
gcUnwindBarriers(gp, p)
})
}
return
}
h := heapBitsForAddr(p)
for i := uintptr(0); i < size; i += sys.PtrSize {
if h.isPointer() {
x := (*uintptr)(unsafe.Pointer(p + i))
writebarrierptr_nostore(x, *x)
}
h = h.next()
}
}
// typeBitsBulkBarrier executes writebarrierptr_nostore
// for every pointer slot in the memory range [p, p+size),
// using the type bitmap to locate those pointer slots.
// The type typ must correspond exactly to [p, p+size).
// This executes the write barriers necessary after a copy.
// Both p and size must be pointer-aligned.
// The type typ must have a plain bitmap, not a GC program.
// The only use of this function is in channel sends, and the
// 64 kB channel element limit takes care of this for us.
//
// Must not be preempted because it typically runs right after memmove,
// and the GC must not complete between those two.
//
//go:nosplit
func typeBitsBulkBarrier(typ *_type, p, size uintptr) {
if typ == nil {
throw("runtime: typeBitsBulkBarrier without type")
}
if typ.size != size {
println("runtime: typeBitsBulkBarrier with type ", *typ._string, " of size ", typ.size, " but memory size", size)
throw("runtime: invalid typeBitsBulkBarrier")
}
if typ.kind&kindGCProg != 0 {
println("runtime: typeBitsBulkBarrier with type ", *typ._string, " with GC prog")
throw("runtime: invalid typeBitsBulkBarrier")
}
if !writeBarrier.needed {
return
}
ptrmask := typ.gcdata
var bits uint32
for i := uintptr(0); i < typ.ptrdata; i += sys.PtrSize {
if i&(sys.PtrSize*8-1) == 0 {
bits = uint32(*ptrmask)
ptrmask = addb(ptrmask, 1)
} else {
bits = bits >> 1
}
if bits&1 != 0 {
x := (*uintptr)(unsafe.Pointer(p + i))
writebarrierptr_nostore(x, *x)
}
}
}
// The methods operating on spans all require that h has been returned
// by heapBitsForSpan and that size, n, total are the span layout description
// returned by the mspan's layout method.
// If total > size*n, it means that there is extra leftover memory in the span,
// usually due to rounding.
//
// TODO(rsc): Perhaps introduce a different heapBitsSpan type.
// initSpan initializes the heap bitmap for a span.
func (h heapBits) initSpan(size, n, total uintptr) {
if total%heapBitmapScale != 0 {
throw("initSpan: unaligned length")
}
nbyte := total / heapBitmapScale
if sys.PtrSize == 8 && size == sys.PtrSize {
end := h.bitp
bitp := subtractb(end, nbyte-1)
for {
*bitp = bitPointerAll
if bitp == end {
break
}
bitp = add1(bitp)
}
return
}
memclr(unsafe.Pointer(subtractb(h.bitp, nbyte-1)), nbyte)
}
// initCheckmarkSpan initializes a span for being checkmarked.
// It clears the checkmark bits, which are set to 1 in normal operation.
func (h heapBits) initCheckmarkSpan(size, n, total uintptr) {
// The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely.
if sys.PtrSize == 8 && size == sys.PtrSize {
// Checkmark bit is type bit, bottom bit of every 2-bit entry.
// Only possible on 64-bit system, since minimum size is 8.
// Must clear type bit (checkmark bit) of every word.
// The type bit is the lower of every two-bit pair.
bitp := h.bitp
for i := uintptr(0); i < n; i += 4 {
*bitp &^= bitPointerAll
bitp = subtract1(bitp)
}
return
}
for i := uintptr(0); i < n; i++ {
*h.bitp &^= bitMarked << (heapBitsShift + h.shift)
h = h.forward(size / sys.PtrSize)
}
}
// clearCheckmarkSpan undoes all the checkmarking in a span.
// The actual checkmark bits are ignored, so the only work to do
// is to fix the pointer bits. (Pointer bits are ignored by scanobject
// but consulted by typedmemmove.)
func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
// The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely.
if sys.PtrSize == 8 && size == sys.PtrSize {
// Checkmark bit is type bit, bottom bit of every 2-bit entry.
// Only possible on 64-bit system, since minimum size is 8.
// Must clear type bit (checkmark bit) of every word.
// The type bit is the lower of every two-bit pair.
bitp := h.bitp
for i := uintptr(0); i < n; i += 4 {
*bitp |= bitPointerAll
bitp = subtract1(bitp)
}
}
}
// heapBitsSweepSpan coordinates the sweeping of a span by reading
// and updating the corresponding heap bitmap entries.
// For each free object in the span, heapBitsSweepSpan sets the type
// bits for the first two words (or one for single-word objects) to typeDead
// and then calls f(p), where p is the object's base address.
// f is expected to add the object to a free list.
// For non-free objects, heapBitsSweepSpan turns off the marked bit.
func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) {
h := heapBitsForSpan(base)
switch {
default:
throw("heapBitsSweepSpan")
case sys.PtrSize == 8 && size == sys.PtrSize:
// Consider mark bits in all four 2-bit entries of each bitmap byte.
bitp := h.bitp
for i := uintptr(0); i < n; i += 4 {
x := uint32(*bitp)
// Note that unlike the other size cases, we leave the pointer bits set here.
// These are initialized during initSpan when the span is created and left
// in place the whole time the span is used for pointer-sized objects.
// That lets heapBitsSetType avoid an atomic update to set the pointer bit
// during allocation.
if x&bitMarked != 0 {
x &^= bitMarked
} else {
f(base + i*sys.PtrSize)
}
if x&(bitMarked<<heapBitsShift) != 0 {
x &^= bitMarked << heapBitsShift
} else {
f(base + (i+1)*sys.PtrSize)
}
if x&(bitMarked<<(2*heapBitsShift)) != 0 {
x &^= bitMarked << (2 * heapBitsShift)
} else {
f(base + (i+2)*sys.PtrSize)
}
if x&(bitMarked<<(3*heapBitsShift)) != 0 {
x &^= bitMarked << (3 * heapBitsShift)
} else {
f(base + (i+3)*sys.PtrSize)
}
*bitp = uint8(x)
bitp = subtract1(bitp)
}
case size%(4*sys.PtrSize) == 0:
// Mark bit is in first word of each object.
// Each object starts at bit 0 of a heap bitmap byte.
bitp := h.bitp
step := size / heapBitmapScale
for i := uintptr(0); i < n; i++ {
x := uint32(*bitp)
if x&bitMarked != 0 {
x &^= bitMarked
} else {
x = 0
f(base + i*size)
}
*bitp = uint8(x)
bitp = subtractb(bitp, step)
}
case size%(4*sys.PtrSize) == 2*sys.PtrSize:
// Mark bit is in first word of each object,
// but every other object starts halfway through a heap bitmap byte.
// Unroll loop 2x to handle alternating shift count and step size.
bitp := h.bitp
step := size / heapBitmapScale
var i uintptr
for i = uintptr(0); i < n; i += 2 {
x := uint32(*bitp)
if x&bitMarked != 0 {
x &^= bitMarked
} else {
x &^= bitMarked | bitPointer | (bitMarked|bitPointer)<<heapBitsShift
f(base + i*size)
if size > 2*sys.PtrSize {
x = 0
}
}
*bitp = uint8(x)
if i+1 >= n {
break
}
bitp = subtractb(bitp, step)
x = uint32(*bitp)
if x&(bitMarked<<(2*heapBitsShift)) != 0 {
x &^= bitMarked << (2 * heapBitsShift)
} else {
x &^= (bitMarked|bitPointer)<<(2*heapBitsShift) | (bitMarked|bitPointer)<<(3*heapBitsShift)
f(base + (i+1)*size)
if size > 2*sys.PtrSize {
*subtract1(bitp) = 0
}
}
*bitp = uint8(x)
bitp = subtractb(bitp, step+1)
}
}
}
// heapBitsSetType records that the new allocation [x, x+size)
// holds in [x, x+dataSize) one or more values of type typ.
// (The number of values is given by dataSize / typ.size.)
// If dataSize < size, the fragment [x+dataSize, x+size) is
// recorded as non-pointer data.
// It is known that the type has pointers somewhere;
// malloc does not call heapBitsSetType when there are no pointers,
// because all free objects are marked as noscan during
// heapBitsSweepSpan.
// There can only be one allocation from a given span active at a time,
// so this code is not racing with other instances of itself,
// and we don't allocate from a span until it has been swept,
// so this code is not racing with heapBitsSweepSpan.
// It is, however, racing with the concurrent GC mark phase,
// which can be setting the mark bit in the leading 2-bit entry
// of an allocated block. The block we are modifying is not quite
// allocated yet, so the GC marker is not racing with updates to x's bits,
// but if the start or end of x shares a bitmap byte with an adjacent
// object, the GC marker is racing with updates to those object's mark bits.
func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
const doubleCheck = false // slow but helpful; enable to test modifications to this code
// dataSize is always size rounded up to the next malloc size class,
// except in the case of allocating a defer block, in which case
// size is sizeof(_defer{}) (at least 6 words) and dataSize may be
// arbitrarily larger.
//
// The checks for size == ptrSize and size == 2*ptrSize can therefore
// assume that dataSize == size without checking it explicitly.
if sys.PtrSize == 8 && size == sys.PtrSize {
// It's one word and it has pointers, it must be a pointer.
// In general we'd need an atomic update here if the
// concurrent GC were marking objects in this span,
// because each bitmap byte describes 3 other objects
// in addition to the one being allocated.
// However, since all allocated one-word objects are pointers
// (non-pointers are aggregated into tinySize allocations),
// initSpan sets the pointer bits for us. Nothing to do here.
if doubleCheck {
h := heapBitsForAddr(x)
if !h.isPointer() {
throw("heapBitsSetType: pointer bit missing")
}
}
return
}
h := heapBitsForAddr(x)
ptrmask := typ.gcdata // start of 1-bit pointer mask (or GC program, handled below)
// Heap bitmap bits for 2-word object are only 4 bits,
// so also shared with objects next to it; use atomic updates.
// This is called out as a special case primarily for 32-bit systems,
// so that on 32-bit systems the code below can assume all objects
// are 4-word aligned (because they're all 16-byte aligned).
if size == 2*sys.PtrSize {
if typ.size == sys.PtrSize {
// We're allocating a block big enough to hold two pointers.
// On 64-bit, that means the actual object must be two pointers,
// or else we'd have used the one-pointer-sized block.
// On 32-bit, however, this is the 8-byte block, the smallest one.
// So it could be that we're allocating one pointer and this was
// just the smallest block available. Distinguish by checking dataSize.
// (In general the number of instances of typ being allocated is
// dataSize/typ.size.)
if sys.PtrSize == 4 && dataSize == sys.PtrSize {
// 1 pointer.
if gcphase == _GCoff {
*h.bitp |= bitPointer << h.shift
} else {
atomic.Or8(h.bitp, bitPointer<<h.shift)
}
} else {
// 2-element slice of pointer.
if gcphase == _GCoff {
*h.bitp |= (bitPointer | bitPointer<<heapBitsShift) << h.shift
} else {
atomic.Or8(h.bitp, (bitPointer|bitPointer<<heapBitsShift)<<h.shift)
}
}
return
}
// Otherwise typ.size must be 2*ptrSize, and typ.kind&kindGCProg == 0.
if doubleCheck {
if typ.size != 2*sys.PtrSize || typ.kind&kindGCProg != 0 {
print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n")
throw("heapBitsSetType")
}
}
b := uint32(*ptrmask)
hb := b & 3
if gcphase == _GCoff {
*h.bitp |= uint8(hb << h.shift)
} else {
atomic.Or8(h.bitp, uint8(hb<<h.shift))
}
return
}
// Copy from 1-bit ptrmask into 2-bit bitmap.
// The basic approach is to use a single uintptr as a bit buffer,
// alternating between reloading the buffer and writing bitmap bytes.
// In general, one load can supply two bitmap byte writes.
// This is a lot of lines of code, but it compiles into relatively few
// machine instructions.
var (
// Ptrmask input.
p *byte // last ptrmask byte read
b uintptr // ptrmask bits already loaded
nb uintptr // number of bits in b at next read
endp *byte // final ptrmask byte to read (then repeat)
endnb uintptr // number of valid bits in *endp
pbits uintptr // alternate source of bits
// Heap bitmap output.
w uintptr // words processed
nw uintptr // number of words to process
hbitp *byte // next heap bitmap byte to write
hb uintptr // bits being prepared for *hbitp
)
hbitp = h.bitp
// Handle GC program. Delayed until this part of the code
// so that we can use the same double-checking mechanism
// as the 1-bit case. Nothing above could have encountered
// GC programs: the cases were all too small.
if typ.kind&kindGCProg != 0 {
heapBitsSetTypeGCProg(h, typ.ptrdata, typ.size, dataSize, size, addb(typ.gcdata, 4))
if doubleCheck {
// Double-check the heap bits written by GC program
// by running the GC program to create a 1-bit pointer mask
// and then jumping to the double-check code below.
// This doesn't catch bugs shared between the 1-bit and 4-bit
// GC program execution, but it does catch mistakes specific
// to just one of those and bugs in heapBitsSetTypeGCProg's
// implementation of arrays.
lock(&debugPtrmask.lock)
if debugPtrmask.data == nil {
debugPtrmask.data = (*byte)(persistentalloc(1<<20, 1, &memstats.other_sys))
}
ptrmask = debugPtrmask.data
runGCProg(addb(typ.gcdata, 4), nil, ptrmask, 1)
goto Phase4
}
return
}
// Note about sizes:
//
// typ.size is the number of words in the object,
// and typ.ptrdata is the number of words in the prefix
// of the object that contains pointers. That is, the final
// typ.size - typ.ptrdata words contain no pointers.
// This allows optimization of a common pattern where
// an object has a small header followed by a large scalar
// buffer. If we know the pointers are over, we don't have
// to scan the buffer's heap bitmap at all.
// The 1-bit ptrmasks are sized to contain only bits for
// the typ.ptrdata prefix, zero padded out to a full byte
// of bitmap. This code sets nw (below) so that heap bitmap
// bits are only written for the typ.ptrdata prefix; if there is
// more room in the allocated object, the next heap bitmap
// entry is a 00, indicating that there are no more pointers
// to scan. So only the ptrmask for the ptrdata bytes is needed.
//
// Replicated copies are not as nice: if there is an array of
// objects with scalar tails, all but the last tail does have to
// be initialized, because there is no way to say "skip forward".
// However, because of the possibility of a repeated type with
// size not a multiple of 4 pointers (one heap bitmap byte),
// the code already must handle the last ptrmask byte specially
// by treating it as containing only the bits for endnb pointers,
// where endnb <= 4. We represent large scalar tails that must
// be expanded in the replication by setting endnb larger than 4.
// This will have the effect of reading many bits out of b,
// but once the real bits are shifted out, b will supply as many
// zero bits as we try to read, which is exactly what we need.
p = ptrmask
if typ.size < dataSize {
// Filling in bits for an array of typ.
// Set up for repetition of ptrmask during main loop.
// Note that ptrmask describes only a prefix of
const maxBits = sys.PtrSize*8 - 7
if typ.ptrdata/sys.PtrSize <= maxBits {
// Entire ptrmask fits in uintptr with room for a byte fragment.
// Load into pbits and never read from ptrmask again.
// This is especially important when the ptrmask has
// fewer than 8 bits in it; otherwise the reload in the middle
// of the Phase 2 loop would itself need to loop to gather
// at least 8 bits.
// Accumulate ptrmask into b.
// ptrmask is sized to describe only typ.ptrdata, but we record
// it as describing typ.size bytes, since all the high bits are zero.
nb = typ.ptrdata / sys.PtrSize
for i := uintptr(0); i < nb; i += 8 {
b |= uintptr(*p) << i
p = add1(p)
}
nb = typ.size / sys.PtrSize
// Replicate ptrmask to fill entire pbits uintptr.
// Doubling and truncating is fewer steps than
// iterating by nb each time. (nb could be 1.)
// Since we loaded typ.ptrdata/ptrSize bits
// but are pretending to have typ.size/ptrSize,
// there might be no replication necessary/possible.
pbits = b
endnb = nb
if nb+nb <= maxBits {
for endnb <= sys.PtrSize*8 {
pbits |= pbits << endnb
endnb += endnb
}
// Truncate to a multiple of original ptrmask.
endnb = maxBits / nb * nb
pbits &= 1<<endnb - 1
b = pbits
nb = endnb
}
// Clear p and endp as sentinel for using pbits.
// Checked during Phase 2 loop.
p = nil
endp = nil
} else {
// Ptrmask is larger. Read it multiple times.
n := (typ.ptrdata/sys.PtrSize+7)/8 - 1
endp = addb(ptrmask, n)
endnb = typ.size/sys.PtrSize - n*8
}
}
if p != nil {
b = uintptr(*p)
p = add1(p)
nb = 8
}
if typ.size == dataSize {
// Single entry: can stop once we reach the non-pointer data.
nw = typ.ptrdata / sys.PtrSize
} else {
// Repeated instances of typ in an array.
// Have to process first N-1 entries in full, but can stop
// once we reach the non-pointer data in the final entry.
nw = ((dataSize/typ.size-1)*typ.size + typ.ptrdata) / sys.PtrSize
}
if nw == 0 {
// No pointers! Caller was supposed to check.
println("runtime: invalid type ", *typ._string)
throw("heapBitsSetType: called with non-pointer type")
return
}
if nw < 2 {
// Must write at least 2 words, because the "no scan"
// encoding doesn't take effect until the third word.
nw = 2
}
// Phase 1: Special case for leading byte (shift==0) or half-byte (shift==4).
// The leading byte is special because it contains the bits for words 0 and 1,
// which do not have the marked bits set.
// The leading half-byte is special because it's a half a byte and must be
// manipulated atomically.
switch {
default:
throw("heapBitsSetType: unexpected shift")
case h.shift == 0:
// Ptrmask and heap bitmap are aligned.
// Handle first byte of bitmap specially.
// The first byte we write out contains the first two words of the object.
// In those words, the mark bits are mark and checkmark, respectively,
// and must not be set. In all following words, we want to set the mark bit
// as a signal that the object continues to the next 2-bit entry in the bitmap.
hb = b & bitPointerAll
hb |= bitMarked<<(2*heapBitsShift) | bitMarked<<(3*heapBitsShift)
if w += 4; w >= nw {
goto Phase3
}
*hbitp = uint8(hb)
hbitp = subtract1(hbitp)
b >>= 4
nb -= 4
case sys.PtrSize == 8 && h.shift == 2:
// Ptrmask and heap bitmap are misaligned.
// The bits for the first two words are in a byte shared with another object
// and must be updated atomically.
// NOTE(rsc): The atomic here may not be necessary.
// We took care of 1-word and 2-word objects above,
// so this is at least a 6-word object, so our start bits
// are shared only with the type bits of another object,
// not with its mark bit. Since there is only one allocation
// from a given span at a time, we should be able to set
// these bits non-atomically. Not worth the risk right now.
hb = (b & 3) << (2 * heapBitsShift)
b >>= 2
nb -= 2
// Note: no bitMarker in hb because the first two words don't get markers from us.
if gcphase == _GCoff {
*hbitp |= uint8(hb)
} else {
atomic.Or8(hbitp, uint8(hb))
}
hbitp = subtract1(hbitp)
if w += 2; w >= nw {
// We know that there is more data, because we handled 2-word objects above.
// This must be at least a 6-word object. If we're out of pointer words,
// mark no scan in next bitmap byte and finish.
hb = 0
w += 4
goto Phase3
}
}
// Phase 2: Full bytes in bitmap, up to but not including write to last byte (full or partial) in bitmap.
// The loop computes the bits for that last write but does not execute the write;
// it leaves the bits in hb for processing by phase 3.
// To avoid repeated adjustment of nb, we subtract out the 4 bits we're going to
// use in the first half of the loop right now, and then we only adjust nb explicitly
// if the 8 bits used by each iteration isn't balanced by 8 bits loaded mid-loop.
nb -= 4
for {
// Emit bitmap byte.
// b has at least nb+4 bits, with one exception:
// if w+4 >= nw, then b has only nw-w bits,
// but we'll stop at the break and then truncate
// appropriately in Phase 3.
hb = b & bitPointerAll
hb |= bitMarkedAll
if w += 4; w >= nw {
break
}
*hbitp = uint8(hb)
hbitp = subtract1(hbitp)
b >>= 4
// Load more bits. b has nb right now.
if p != endp {
// Fast path: keep reading from ptrmask.
// nb unmodified: we just loaded 8 bits,
// and the next iteration will consume 8 bits,
// leaving us with the same nb the next time we're here.
if nb < 8 {
b |= uintptr(*p) << nb
p = add1(p)
} else {
// Reduce the number of bits in b.
// This is important if we skipped
// over a scalar tail, since nb could
// be larger than the bit width of b.
nb -= 8
}
} else if p == nil {
// Almost as fast path: track bit count and refill from pbits.
// For short repetitions.
if nb < 8 {
b |= pbits << nb
nb += endnb
}
nb -= 8 // for next iteration
} else {
// Slow path: reached end of ptrmask.
// Process final partial byte and rewind to start.
b |= uintptr(*p) << nb
nb += endnb
if nb < 8 {
b |= uintptr(*ptrmask) << nb
p = add1(ptrmask)
} else {
nb -= 8
p = ptrmask
}
}
// Emit bitmap byte.
hb = b & bitPointerAll
hb |= bitMarkedAll
if w += 4; w >= nw {
break
}
*hbitp = uint8(hb)
hbitp = subtract1(hbitp)
b >>= 4
}
Phase3:
// Phase 3: Write last byte or partial byte and zero the rest of the bitmap entries.
if w > nw {
// Counting the 4 entries in hb not yet written to memory,
// there are more entries than possible pointer slots.
// Discard the excess entries (can't be more than 3).
mask := uintptr(1)<<(4-(w-nw)) - 1
hb &= mask | mask<<4 // apply mask to both pointer bits and mark bits
}
// Change nw from counting possibly-pointer words to total words in allocation.
nw = size / sys.PtrSize
// Write whole bitmap bytes.
// The first is hb, the rest are zero.
if w <= nw {
*hbitp = uint8(hb)
hbitp = subtract1(hbitp)
hb = 0 // for possible final half-byte below
for w += 4; w <= nw; w += 4 {
*hbitp = 0
hbitp = subtract1(hbitp)
}
}
// Write final partial bitmap byte if any.
// We know w > nw, or else we'd still be in the loop above.
// It can be bigger only due to the 4 entries in hb that it counts.
// If w == nw+4 then there's nothing left to do: we wrote all nw entries
// and can discard the 4 sitting in hb.
// But if w == nw+2, we need to write first two in hb.
// The byte is shared with the next object so we may need an atomic.
if w == nw+2 {
if gcphase == _GCoff {
*hbitp = *hbitp&^(bitPointer|bitMarked|(bitPointer|bitMarked)<<heapBitsShift) | uint8(hb)
} else {
atomic.And8(hbitp, ^uint8(bitPointer|bitMarked|(bitPointer|bitMarked)<<heapBitsShift))
atomic.Or8(hbitp, uint8(hb))
}
}
Phase4:
// Phase 4: all done, but perhaps double check.
if doubleCheck {
end := heapBitsForAddr(x + size)
if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) {
println("ended at wrong bitmap byte for", *typ._string, "x", dataSize/typ.size)
print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
h0 := heapBitsForAddr(x)
print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
print("ended at hbitp=", hbitp, " but next starts at bitp=", end.bitp, " shift=", end.shift, "\n")
throw("bad heapBitsSetType")
}
// Double-check that bits to be written were written correctly.
// Does not check that other bits were not written, unfortunately.
h := heapBitsForAddr(x)
nptr := typ.ptrdata / sys.PtrSize
ndata := typ.size / sys.PtrSize
count := dataSize / typ.size
totalptr := ((count-1)*typ.size + typ.ptrdata) / sys.PtrSize
for i := uintptr(0); i < size/sys.PtrSize; i++ {
j := i % ndata
var have, want uint8
have = (*h.bitp >> h.shift) & (bitPointer | bitMarked)
if i >= totalptr {
want = 0 // deadmarker
if typ.kind&kindGCProg != 0 && i < (totalptr+3)/4*4 {
want = bitMarked
}
} else {
if j < nptr && (*addb(ptrmask, j/8)>>(j%8))&1 != 0 {
want |= bitPointer
}
if i >= 2 {
want |= bitMarked
} else {
have &^= bitMarked
}
}
if have != want {
println("mismatch writing bits for", *typ._string, "x", dataSize/typ.size)
print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
print("kindGCProg=", typ.kind&kindGCProg != 0, "\n")
print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
h0 := heapBitsForAddr(x)
print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
print("current bits h.bitp=", h.bitp, " h.shift=", h.shift, " *h.bitp=", hex(*h.bitp), "\n")
print("ptrmask=", ptrmask, " p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n")
println("at word", i, "offset", i*sys.PtrSize, "have", have, "want", want)
if typ.kind&kindGCProg != 0 {
println("GC program:")
dumpGCProg(addb(typ.gcdata, 4))
}
throw("bad heapBitsSetType")
}
h = h.next()
}
if ptrmask == debugPtrmask.data {
unlock(&debugPtrmask.lock)
}
}
}
var debugPtrmask struct {
lock mutex
data *byte
}
// heapBitsSetTypeGCProg implements heapBitsSetType using a GC program.
// progSize is the size of the memory described by the program.
// elemSize is the size of the element that the GC program describes (a prefix of).
// dataSize is the total size of the intended data, a multiple of elemSize.
// allocSize is the total size of the allocated memory.
//
// GC programs are only used for large allocations.
// heapBitsSetType requires that allocSize is a multiple of 4 words,
// so that the relevant bitmap bytes are not shared with surrounding
// objects and need not be accessed with atomic instructions.
func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize uintptr, prog *byte) {
if sys.PtrSize == 8 && allocSize%(4*sys.PtrSize) != 0 {
// Alignment will be wrong.
throw("heapBitsSetTypeGCProg: small allocation")
}
var totalBits uintptr
if elemSize == dataSize {
totalBits = runGCProg(prog, nil, h.bitp, 2)
if totalBits*sys.PtrSize != progSize {
println("runtime: heapBitsSetTypeGCProg: total bits", totalBits, "but progSize", progSize)
throw("heapBitsSetTypeGCProg: unexpected bit count")
}
} else {
count := dataSize / elemSize
// Piece together program trailer to run after prog that does:
// literal(0)
// repeat(1, elemSize-progSize-1) // zeros to fill element size
// repeat(elemSize, count-1) // repeat that element for count
// This zero-pads the data remaining in the first element and then
// repeats that first element to fill the array.
var trailer [40]byte // 3 varints (max 10 each) + some bytes
i := 0
if n := elemSize/sys.PtrSize - progSize/sys.PtrSize; n > 0 {
// literal(0)
trailer[i] = 0x01
i++
trailer[i] = 0
i++
if n > 1 {
// repeat(1, n-1)
trailer[i] = 0x81
i++
n--
for ; n >= 0x80; n >>= 7 {
trailer[i] = byte(n | 0x80)
i++
}
trailer[i] = byte(n)
i++
}
}
// repeat(elemSize/ptrSize, count-1)
trailer[i] = 0x80
i++
n := elemSize / sys.PtrSize
for ; n >= 0x80; n >>= 7 {
trailer[i] = byte(n | 0x80)
i++
}
trailer[i] = byte(n)
i++
n = count - 1
for ; n >= 0x80; n >>= 7 {
trailer[i] = byte(n | 0x80)
i++
}
trailer[i] = byte(n)
i++
trailer[i] = 0
i++
runGCProg(prog, &trailer[0], h.bitp, 2)
// Even though we filled in the full array just now,
// record that we only filled in up to the ptrdata of the
// last element. This will cause the code below to
// memclr the dead section of the final array element,
// so that scanobject can stop early in the final element.
totalBits = (elemSize*(count-1) + progSize) / sys.PtrSize
}
endProg := unsafe.Pointer(subtractb(h.bitp, (totalBits+3)/4))
endAlloc := unsafe.Pointer(subtractb(h.bitp, allocSize/heapBitmapScale))
memclr(add(endAlloc, 1), uintptr(endProg)-uintptr(endAlloc))
}
// progToPointerMask returns the 1-bit pointer mask output by the GC program prog.
// size the size of the region described by prog, in bytes.
// The resulting bitvector will have no more than size/ptrSize bits.
func progToPointerMask(prog *byte, size uintptr) bitvector {
n := (size/sys.PtrSize + 7) / 8
x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1]
x[len(x)-1] = 0xa1 // overflow check sentinel
n = runGCProg(prog, nil, &x[0], 1)
if x[len(x)-1] != 0xa1 {
throw("progToPointerMask: overflow")
}
return bitvector{int32(n), &x[0]}
}
// Packed GC pointer bitmaps, aka GC programs.
//
// For large types containing arrays, the type information has a
// natural repetition that can be encoded to save space in the
// binary and in the memory representation of the type information.
//
// The encoding is a simple Lempel-Ziv style bytecode machine
// with the following instructions:
//
// 00000000: stop
// 0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes
// 10000000 n c: repeat the previous n bits c times; n, c are varints
// 1nnnnnnn c: repeat the previous n bits c times; c is a varint
// runGCProg executes the GC program prog, and then trailer if non-nil,
// writing to dst with entries of the given size.
// If size == 1, dst is a 1-bit pointer mask laid out moving forward from dst.
// If size == 2, dst is the 2-bit heap bitmap, and writes move backward
// starting at dst (because the heap bitmap does). In this case, the caller guarantees
// that only whole bytes in dst need to be written.
//
// runGCProg returns the number of 1- or 2-bit entries written to memory.
func runGCProg(prog, trailer, dst *byte, size int) uintptr {
dstStart := dst
// Bits waiting to be written to memory.
var bits uintptr
var nbits uintptr
p := prog
Run:
for {
// Flush accumulated full bytes.
// The rest of the loop assumes that nbits <= 7.
for ; nbits >= 8; nbits -= 8 {
if size == 1 {
*dst = uint8(bits)
dst = add1(dst)
bits >>= 8
} else {
v := bits&bitPointerAll | bitMarkedAll
*dst = uint8(v)
dst = subtract1(dst)
bits >>= 4
v = bits&bitPointerAll | bitMarkedAll
*dst = uint8(v)
dst = subtract1(dst)
bits >>= 4
}
}
// Process one instruction.
inst := uintptr(*p)
p = add1(p)
n := inst & 0x7F
if inst&0x80 == 0 {
// Literal bits; n == 0 means end of program.
if n == 0 {
// Program is over; continue in trailer if present.
if trailer != nil {
//println("trailer")
p = trailer
trailer = nil
continue
}
//println("done")
break Run
}
//println("lit", n, dst)
nbyte := n / 8
for i := uintptr(0); i < nbyte; i++ {
bits |= uintptr(*p) << nbits
p = add1(p)
if size == 1 {
*dst = uint8(bits)
dst = add1(dst)
bits >>= 8
} else {
v := bits&0xf | bitMarkedAll
*dst = uint8(v)
dst = subtract1(dst)
bits >>= 4
v = bits&0xf | bitMarkedAll
*dst = uint8(v)
dst = subtract1(dst)
bits >>= 4
}
}
if n %= 8; n > 0 {
bits |= uintptr(*p) << nbits
p = add1(p)
nbits += n
}
continue Run
}
// Repeat. If n == 0, it is encoded in a varint in the next bytes.
if n == 0 {
for off := uint(0); ; off += 7 {
x := uintptr(*p)
p = add1(p)
n |= (x & 0x7F) << off
if x&0x80 == 0 {
break
}
}
}
// Count is encoded in a varint in the next bytes.
c := uintptr(0)
for off := uint(0); ; off += 7 {
x := uintptr(*p)
p = add1(p)
c |= (x & 0x7F) << off
if x&0x80 == 0 {
break
}
}
c *= n // now total number of bits to copy
// If the number of bits being repeated is small, load them
// into a register and use that register for the entire loop
// instead of repeatedly reading from memory.
// Handling fewer than 8 bits here makes the general loop simpler.
// The cutoff is ptrSize*8 - 7 to guarantee that when we add
// the pattern to a bit buffer holding at most 7 bits (a partial byte)
// it will not overflow.
src := dst
const maxBits = sys.PtrSize*8 - 7
if n <= maxBits {
// Start with bits in output buffer.
pattern := bits
npattern := nbits
// If we need more bits, fetch them from memory.
if size == 1 {
src = subtract1(src)
for npattern < n {
pattern <<= 8
pattern |= uintptr(*src)
src = subtract1(src)
npattern += 8
}
} else {
src = add1(src)
for npattern < n {
pattern <<= 4
pattern |= uintptr(*src) & 0xf
src = add1(src)
npattern += 4
}
}
// We started with the whole bit output buffer,
// and then we loaded bits from whole bytes.
// Either way, we might now have too many instead of too few.
// Discard the extra.
if npattern > n {
pattern >>= npattern - n
npattern = n
}
// Replicate pattern to at most maxBits.
if npattern == 1 {
// One bit being repeated.
// If the bit is 1, make the pattern all 1s.
// If the bit is 0, the pattern is already all 0s,
// but we can claim that the number of bits
// in the word is equal to the number we need (c),
// because right shift of bits will zero fill.
if pattern == 1 {
pattern = 1<<maxBits - 1
npattern = maxBits
} else {
npattern = c
}
} else {
b := pattern
nb := npattern
if nb+nb <= maxBits {
// Double pattern until the whole uintptr is filled.
for nb <= sys.PtrSize*8 {
b |= b << nb
nb += nb
}
// Trim away incomplete copy of original pattern in high bits.
// TODO(rsc): Replace with table lookup or loop on systems without divide?
nb = maxBits / npattern * npattern
b &= 1<<nb - 1
pattern = b
npattern = nb
}
}
// Add pattern to bit buffer and flush bit buffer, c/npattern times.
// Since pattern contains >8 bits, there will be full bytes to flush
// on each iteration.
for ; c >= npattern; c -= npattern {
bits |= pattern << nbits
nbits += npattern
if size == 1 {
for nbits >= 8 {
*dst = uint8(bits)
dst = add1(dst)
bits >>= 8
nbits -= 8
}
} else {
for nbits >= 4 {
*dst = uint8(bits&0xf | bitMarkedAll)
dst = subtract1(dst)
bits >>= 4
nbits -= 4
}
}
}
// Add final fragment to bit buffer.
if c > 0 {
pattern &= 1<<c - 1
bits |= pattern << nbits
nbits += c
}
continue Run
}
// Repeat; n too large to fit in a register.
// Since nbits <= 7, we know the first few bytes of repeated data
// are already written to memory.
off := n - nbits // n > nbits because n > maxBits and nbits <= 7
if size == 1 {
// Leading src fragment.
src = subtractb(src, (off+7)/8)
if frag := off & 7; frag != 0 {
bits |= uintptr(*src) >> (8 - frag) << nbits
src = add1(src)
nbits += frag
c -= frag
}
// Main loop: load one byte, write another.
// The bits are rotating through the bit buffer.
for i := c / 8; i > 0; i-- {
bits |= uintptr(*src) << nbits
src = add1(src)
*dst = uint8(bits)
dst = add1(dst)
bits >>= 8
}
// Final src fragment.
if c %= 8; c > 0 {
bits |= (uintptr(*src) & (1<<c - 1)) << nbits
nbits += c
}
} else {
// Leading src fragment.
src = addb(src, (off+3)/4)
if frag := off & 3; frag != 0 {
bits |= (uintptr(*src) & 0xf) >> (4 - frag) << nbits
src = subtract1(src)
nbits += frag
c -= frag
}
// Main loop: load one byte, write another.
// The bits are rotating through the bit buffer.
for i := c / 4; i > 0; i-- {
bits |= (uintptr(*src) & 0xf) << nbits
src = subtract1(src)
*dst = uint8(bits&0xf | bitMarkedAll)
dst = subtract1(dst)
bits >>= 4
}
// Final src fragment.
if c %= 4; c > 0 {
bits |= (uintptr(*src) & (1<<c - 1)) << nbits
nbits += c
}
}
}
// Write any final bits out, using full-byte writes, even for the final byte.
var totalBits uintptr
if size == 1 {
totalBits = (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*8 + nbits
nbits += -nbits & 7
for ; nbits > 0; nbits -= 8 {
*dst = uint8(bits)
dst = add1(dst)
bits >>= 8
}
} else {
totalBits = (uintptr(unsafe.Pointer(dstStart))-uintptr(unsafe.Pointer(dst)))*4 + nbits
nbits += -nbits & 3
for ; nbits > 0; nbits -= 4 {
v := bits&0xf | bitMarkedAll
*dst = uint8(v)
dst = subtract1(dst)
bits >>= 4
}
// Clear the mark bits in the first two entries.
// They are the actual mark and checkmark bits,
// not non-dead markers. It simplified the code
// above to set the marker in every bit written and
// then clear these two as a special case at the end.
*dstStart &^= bitMarked | bitMarked<<heapBitsShift
}
return totalBits
}
func dumpGCProg(p *byte) {
nptr := 0
for {
x := *p
p = add1(p)
if x == 0 {
print("\t", nptr, " end\n")
break
}
if x&0x80 == 0 {
print("\t", nptr, " lit ", x, ":")
n := int(x+7) / 8
for i := 0; i < n; i++ {
print(" ", hex(*p))
p = add1(p)
}
print("\n")
nptr += int(x)
} else {
nbit := int(x &^ 0x80)
if nbit == 0 {
for nb := uint(0); ; nb += 7 {
x := *p
p = add1(p)
nbit |= int(x&0x7f) << nb
if x&0x80 == 0 {
break
}
}
}
count := 0
for nb := uint(0); ; nb += 7 {
x := *p
p = add1(p)
count |= int(x&0x7f) << nb
if x&0x80 == 0 {
break
}
}
print("\t", nptr, " repeat ", nbit, " × ", count, "\n")
nptr += nbit * count
}
}
}
// Testing.
func getgcmaskcb(frame *stkframe, ctxt unsafe.Pointer) bool {
target := (*stkframe)(ctxt)
if frame.sp <= target.sp && target.sp < frame.varp {
*target = *frame
return false
}
return true
}
// gcbits returns the GC type info for x, for testing.
// The result is the bitmap entries (0 or 1), one entry per byte.
//go:linkname reflect_gcbits reflect.gcbits
func reflect_gcbits(x interface{}) []byte {
ret := getgcmask(x)
typ := (*ptrtype)(unsafe.Pointer(efaceOf(&x)._type)).elem
nptr := typ.ptrdata / sys.PtrSize
for uintptr(len(ret)) > nptr && ret[len(ret)-1] == 0 {
ret = ret[:len(ret)-1]
}
return ret
}
// Returns GC type info for object p for testing.
func getgcmask(ep interface{}) (mask []byte) {
e := *efaceOf(&ep)
p := e.data
t := e._type
// data or bss
for datap := &firstmoduledata; datap != nil; datap = datap.next {
// data
if datap.data <= uintptr(p) && uintptr(p) < datap.edata {
bitmap := datap.gcdatamask.bytedata
n := (*ptrtype)(unsafe.Pointer(t)).elem.size
mask = make([]byte, n/sys.PtrSize)
for i := uintptr(0); i < n; i += sys.PtrSize {
off := (uintptr(p) + i - datap.data) / sys.PtrSize
mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
}
return
}
// bss
if datap.bss <= uintptr(p) && uintptr(p) < datap.ebss {
bitmap := datap.gcbssmask.bytedata
n := (*ptrtype)(unsafe.Pointer(t)).elem.size
mask = make([]byte, n/sys.PtrSize)
for i := uintptr(0); i < n; i += sys.PtrSize {
off := (uintptr(p) + i - datap.bss) / sys.PtrSize
mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
}
return
}
}
// heap
var n uintptr
var base uintptr
if mlookup(uintptr(p), &base, &n, nil) != 0 {
mask = make([]byte, n/sys.PtrSize)
for i := uintptr(0); i < n; i += sys.PtrSize {
hbits := heapBitsForAddr(base + i)
if hbits.isPointer() {
mask[i/sys.PtrSize] = 1
}
if i >= 2*sys.PtrSize && !hbits.isMarked() {
mask = mask[:i/sys.PtrSize]
break
}
}
return
}
// stack
if _g_ := getg(); _g_.m.curg.stack.lo <= uintptr(p) && uintptr(p) < _g_.m.curg.stack.hi {
var frame stkframe
frame.sp = uintptr(p)
_g_ := getg()
gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0)
if frame.fn != nil {
f := frame.fn
targetpc := frame.continpc
if targetpc == 0 {
return
}
if targetpc != f.entry {
targetpc--
}
pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc, nil)
if pcdata == -1 {
return
}
stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
if stkmap == nil || stkmap.n <= 0 {
return
}
bv := stackmapdata(stkmap, pcdata)
size := uintptr(bv.n) * sys.PtrSize
n := (*ptrtype)(unsafe.Pointer(t)).elem.size
mask = make([]byte, n/sys.PtrSize)
for i := uintptr(0); i < n; i += sys.PtrSize {
bitmap := bv.bytedata
off := (uintptr(p) + i - frame.varp + size) / sys.PtrSize
mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
}
}
return
}
// otherwise, not something the GC knows about.
// possibly read-only data, like malloc(0).
// must not have pointers
return
}
|