/usr/include/rocksdb/env.h is in librocksdb-dev 5.8.8-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
//
// An Env is an interface used by the rocksdb implementation to access
// operating system functionality like the filesystem etc. Callers
// may wish to provide a custom Env object when opening a database to
// get fine gain control; e.g., to rate limit file system operations.
//
// All Env implementations are safe for concurrent access from
// multiple threads without any external synchronization.
#ifndef STORAGE_ROCKSDB_INCLUDE_ENV_H_
#define STORAGE_ROCKSDB_INCLUDE_ENV_H_
#include <stdint.h>
#include <cstdarg>
#include <functional>
#include <limits>
#include <memory>
#include <string>
#include <vector>
#include "rocksdb/status.h"
#include "rocksdb/thread_status.h"
#ifdef _WIN32
// Windows API macro interference
#undef DeleteFile
#undef GetCurrentTime
#endif
namespace rocksdb {
class FileLock;
class Logger;
class RandomAccessFile;
class SequentialFile;
class Slice;
class WritableFile;
class RandomRWFile;
class Directory;
struct DBOptions;
struct ImmutableDBOptions;
class RateLimiter;
class ThreadStatusUpdater;
struct ThreadStatus;
using std::unique_ptr;
using std::shared_ptr;
const size_t kDefaultPageSize = 4 * 1024;
// Options while opening a file to read/write
struct EnvOptions {
// Construct with default Options
EnvOptions();
// Construct from Options
explicit EnvOptions(const DBOptions& options);
// If true, then use mmap to read data
bool use_mmap_reads = false;
// If true, then use mmap to write data
bool use_mmap_writes = true;
// If true, then use O_DIRECT for reading data
bool use_direct_reads = false;
// If true, then use O_DIRECT for writing data
bool use_direct_writes = false;
// If false, fallocate() calls are bypassed
bool allow_fallocate = true;
// If true, set the FD_CLOEXEC on open fd.
bool set_fd_cloexec = true;
// Allows OS to incrementally sync files to disk while they are being
// written, in the background. Issue one request for every bytes_per_sync
// written. 0 turns it off.
// Default: 0
uint64_t bytes_per_sync = 0;
// If true, we will preallocate the file with FALLOC_FL_KEEP_SIZE flag, which
// means that file size won't change as part of preallocation.
// If false, preallocation will also change the file size. This option will
// improve the performance in workloads where you sync the data on every
// write. By default, we set it to true for MANIFEST writes and false for
// WAL writes
bool fallocate_with_keep_size = true;
// See DBOptions doc
size_t compaction_readahead_size;
// See DBOptions doc
size_t random_access_max_buffer_size;
// See DBOptions doc
size_t writable_file_max_buffer_size = 1024 * 1024;
// If not nullptr, write rate limiting is enabled for flush and compaction
RateLimiter* rate_limiter = nullptr;
};
class Env {
public:
struct FileAttributes {
// File name
std::string name;
// Size of file in bytes
uint64_t size_bytes;
};
Env() : thread_status_updater_(nullptr) {}
virtual ~Env();
// Return a default environment suitable for the current operating
// system. Sophisticated users may wish to provide their own Env
// implementation instead of relying on this default environment.
//
// The result of Default() belongs to rocksdb and must never be deleted.
static Env* Default();
// Create a brand new sequentially-readable file with the specified name.
// On success, stores a pointer to the new file in *result and returns OK.
// On failure stores nullptr in *result and returns non-OK. If the file does
// not exist, returns a non-OK status.
//
// The returned file will only be accessed by one thread at a time.
virtual Status NewSequentialFile(const std::string& fname,
unique_ptr<SequentialFile>* result,
const EnvOptions& options)
= 0;
// Create a brand new random access read-only file with the
// specified name. On success, stores a pointer to the new file in
// *result and returns OK. On failure stores nullptr in *result and
// returns non-OK. If the file does not exist, returns a non-OK
// status.
//
// The returned file may be concurrently accessed by multiple threads.
virtual Status NewRandomAccessFile(const std::string& fname,
unique_ptr<RandomAccessFile>* result,
const EnvOptions& options)
= 0;
// Create an object that writes to a new file with the specified
// name. Deletes any existing file with the same name and creates a
// new file. On success, stores a pointer to the new file in
// *result and returns OK. On failure stores nullptr in *result and
// returns non-OK.
//
// The returned file will only be accessed by one thread at a time.
virtual Status NewWritableFile(const std::string& fname,
unique_ptr<WritableFile>* result,
const EnvOptions& options) = 0;
// Create an object that writes to a new file with the specified
// name. Deletes any existing file with the same name and creates a
// new file. On success, stores a pointer to the new file in
// *result and returns OK. On failure stores nullptr in *result and
// returns non-OK.
//
// The returned file will only be accessed by one thread at a time.
virtual Status ReopenWritableFile(const std::string& fname,
unique_ptr<WritableFile>* result,
const EnvOptions& options) {
return Status::NotSupported();
}
// Reuse an existing file by renaming it and opening it as writable.
virtual Status ReuseWritableFile(const std::string& fname,
const std::string& old_fname,
unique_ptr<WritableFile>* result,
const EnvOptions& options);
// Open `fname` for random read and write, if file doesn't exist the file
// will be created. On success, stores a pointer to the new file in
// *result and returns OK. On failure returns non-OK.
//
// The returned file will only be accessed by one thread at a time.
virtual Status NewRandomRWFile(const std::string& fname,
unique_ptr<RandomRWFile>* result,
const EnvOptions& options) {
return Status::NotSupported("RandomRWFile is not implemented in this Env");
}
// Create an object that represents a directory. Will fail if directory
// doesn't exist. If the directory exists, it will open the directory
// and create a new Directory object.
//
// On success, stores a pointer to the new Directory in
// *result and returns OK. On failure stores nullptr in *result and
// returns non-OK.
virtual Status NewDirectory(const std::string& name,
unique_ptr<Directory>* result) = 0;
// Returns OK if the named file exists.
// NotFound if the named file does not exist,
// the calling process does not have permission to determine
// whether this file exists, or if the path is invalid.
// IOError if an IO Error was encountered
virtual Status FileExists(const std::string& fname) = 0;
// Store in *result the names of the children of the specified directory.
// The names are relative to "dir".
// Original contents of *results are dropped.
// Returns OK if "dir" exists and "*result" contains its children.
// NotFound if "dir" does not exist, the calling process does not have
// permission to access "dir", or if "dir" is invalid.
// IOError if an IO Error was encountered
virtual Status GetChildren(const std::string& dir,
std::vector<std::string>* result) = 0;
// Store in *result the attributes of the children of the specified directory.
// In case the implementation lists the directory prior to iterating the files
// and files are concurrently deleted, the deleted files will be omitted from
// result.
// The name attributes are relative to "dir".
// Original contents of *results are dropped.
// Returns OK if "dir" exists and "*result" contains its children.
// NotFound if "dir" does not exist, the calling process does not have
// permission to access "dir", or if "dir" is invalid.
// IOError if an IO Error was encountered
virtual Status GetChildrenFileAttributes(const std::string& dir,
std::vector<FileAttributes>* result);
// Delete the named file.
virtual Status DeleteFile(const std::string& fname) = 0;
// Create the specified directory. Returns error if directory exists.
virtual Status CreateDir(const std::string& dirname) = 0;
// Creates directory if missing. Return Ok if it exists, or successful in
// Creating.
virtual Status CreateDirIfMissing(const std::string& dirname) = 0;
// Delete the specified directory.
virtual Status DeleteDir(const std::string& dirname) = 0;
// Store the size of fname in *file_size.
virtual Status GetFileSize(const std::string& fname, uint64_t* file_size) = 0;
// Store the last modification time of fname in *file_mtime.
virtual Status GetFileModificationTime(const std::string& fname,
uint64_t* file_mtime) = 0;
// Rename file src to target.
virtual Status RenameFile(const std::string& src,
const std::string& target) = 0;
// Hard Link file src to target.
virtual Status LinkFile(const std::string& src, const std::string& target) {
return Status::NotSupported("LinkFile is not supported for this Env");
}
// Lock the specified file. Used to prevent concurrent access to
// the same db by multiple processes. On failure, stores nullptr in
// *lock and returns non-OK.
//
// On success, stores a pointer to the object that represents the
// acquired lock in *lock and returns OK. The caller should call
// UnlockFile(*lock) to release the lock. If the process exits,
// the lock will be automatically released.
//
// If somebody else already holds the lock, finishes immediately
// with a failure. I.e., this call does not wait for existing locks
// to go away.
//
// May create the named file if it does not already exist.
virtual Status LockFile(const std::string& fname, FileLock** lock) = 0;
// Release the lock acquired by a previous successful call to LockFile.
// REQUIRES: lock was returned by a successful LockFile() call
// REQUIRES: lock has not already been unlocked.
virtual Status UnlockFile(FileLock* lock) = 0;
// Priority for scheduling job in thread pool
enum Priority { BOTTOM, LOW, HIGH, TOTAL };
// Priority for requesting bytes in rate limiter scheduler
enum IOPriority {
IO_LOW = 0,
IO_HIGH = 1,
IO_TOTAL = 2
};
// Arrange to run "(*function)(arg)" once in a background thread, in
// the thread pool specified by pri. By default, jobs go to the 'LOW'
// priority thread pool.
// "function" may run in an unspecified thread. Multiple functions
// added to the same Env may run concurrently in different threads.
// I.e., the caller may not assume that background work items are
// serialized.
// When the UnSchedule function is called, the unschedFunction
// registered at the time of Schedule is invoked with arg as a parameter.
virtual void Schedule(void (*function)(void* arg), void* arg,
Priority pri = LOW, void* tag = nullptr,
void (*unschedFunction)(void* arg) = 0) = 0;
// Arrange to remove jobs for given arg from the queue_ if they are not
// already scheduled. Caller is expected to have exclusive lock on arg.
virtual int UnSchedule(void* arg, Priority pri) { return 0; }
// Start a new thread, invoking "function(arg)" within the new thread.
// When "function(arg)" returns, the thread will be destroyed.
virtual void StartThread(void (*function)(void* arg), void* arg) = 0;
// Wait for all threads started by StartThread to terminate.
virtual void WaitForJoin() {}
// Get thread pool queue length for specific thread pool.
virtual unsigned int GetThreadPoolQueueLen(Priority pri = LOW) const {
return 0;
}
// *path is set to a temporary directory that can be used for testing. It may
// or many not have just been created. The directory may or may not differ
// between runs of the same process, but subsequent calls will return the
// same directory.
virtual Status GetTestDirectory(std::string* path) = 0;
// Create and return a log file for storing informational messages.
virtual Status NewLogger(const std::string& fname,
shared_ptr<Logger>* result) = 0;
// Returns the number of micro-seconds since some fixed point in time.
// It is often used as system time such as in GenericRateLimiter
// and other places so a port needs to return system time in order to work.
virtual uint64_t NowMicros() = 0;
// Returns the number of nano-seconds since some fixed point in time. Only
// useful for computing deltas of time in one run.
// Default implementation simply relies on NowMicros.
// In platform-specific implementations, NowNanos() should return time points
// that are MONOTONIC.
virtual uint64_t NowNanos() {
return NowMicros() * 1000;
}
// Sleep/delay the thread for the perscribed number of micro-seconds.
virtual void SleepForMicroseconds(int micros) = 0;
// Get the current host name.
virtual Status GetHostName(char* name, uint64_t len) = 0;
// Get the number of seconds since the Epoch, 1970-01-01 00:00:00 (UTC).
// Only overwrites *unix_time on success.
virtual Status GetCurrentTime(int64_t* unix_time) = 0;
// Get full directory name for this db.
virtual Status GetAbsolutePath(const std::string& db_path,
std::string* output_path) = 0;
// The number of background worker threads of a specific thread pool
// for this environment. 'LOW' is the default pool.
// default number: 1
virtual void SetBackgroundThreads(int number, Priority pri = LOW) = 0;
virtual int GetBackgroundThreads(Priority pri = LOW) = 0;
// Enlarge number of background worker threads of a specific thread pool
// for this environment if it is smaller than specified. 'LOW' is the default
// pool.
virtual void IncBackgroundThreadsIfNeeded(int number, Priority pri) = 0;
// Lower IO priority for threads from the specified pool.
virtual void LowerThreadPoolIOPriority(Priority pool = LOW) {}
// Converts seconds-since-Jan-01-1970 to a printable string
virtual std::string TimeToString(uint64_t time) = 0;
// Generates a unique id that can be used to identify a db
virtual std::string GenerateUniqueId();
// OptimizeForLogWrite will create a new EnvOptions object that is a copy of
// the EnvOptions in the parameters, but is optimized for reading log files.
virtual EnvOptions OptimizeForLogRead(const EnvOptions& env_options) const;
// OptimizeForManifestRead will create a new EnvOptions object that is a copy
// of the EnvOptions in the parameters, but is optimized for reading manifest
// files.
virtual EnvOptions OptimizeForManifestRead(
const EnvOptions& env_options) const;
// OptimizeForLogWrite will create a new EnvOptions object that is a copy of
// the EnvOptions in the parameters, but is optimized for writing log files.
// Default implementation returns the copy of the same object.
virtual EnvOptions OptimizeForLogWrite(const EnvOptions& env_options,
const DBOptions& db_options) const;
// OptimizeForManifestWrite will create a new EnvOptions object that is a copy
// of the EnvOptions in the parameters, but is optimized for writing manifest
// files. Default implementation returns the copy of the same object.
virtual EnvOptions OptimizeForManifestWrite(
const EnvOptions& env_options) const;
// OptimizeForCompactionTableWrite will create a new EnvOptions object that is
// a copy of the EnvOptions in the parameters, but is optimized for writing
// table files.
virtual EnvOptions OptimizeForCompactionTableWrite(
const EnvOptions& env_options,
const ImmutableDBOptions& db_options) const;
// OptimizeForCompactionTableWrite will create a new EnvOptions object that
// is a copy of the EnvOptions in the parameters, but is optimized for reading
// table files.
virtual EnvOptions OptimizeForCompactionTableRead(
const EnvOptions& env_options,
const ImmutableDBOptions& db_options) const;
// Returns the status of all threads that belong to the current Env.
virtual Status GetThreadList(std::vector<ThreadStatus>* thread_list) {
return Status::NotSupported("Not supported.");
}
// Returns the pointer to ThreadStatusUpdater. This function will be
// used in RocksDB internally to update thread status and supports
// GetThreadList().
virtual ThreadStatusUpdater* GetThreadStatusUpdater() const {
return thread_status_updater_;
}
// Returns the ID of the current thread.
virtual uint64_t GetThreadID() const;
protected:
// The pointer to an internal structure that will update the
// status of each thread.
ThreadStatusUpdater* thread_status_updater_;
private:
// No copying allowed
Env(const Env&);
void operator=(const Env&);
};
// The factory function to construct a ThreadStatusUpdater. Any Env
// that supports GetThreadList() feature should call this function in its
// constructor to initialize thread_status_updater_.
ThreadStatusUpdater* CreateThreadStatusUpdater();
// A file abstraction for reading sequentially through a file
class SequentialFile {
public:
SequentialFile() { }
virtual ~SequentialFile();
// Read up to "n" bytes from the file. "scratch[0..n-1]" may be
// written by this routine. Sets "*result" to the data that was
// read (including if fewer than "n" bytes were successfully read).
// May set "*result" to point at data in "scratch[0..n-1]", so
// "scratch[0..n-1]" must be live when "*result" is used.
// If an error was encountered, returns a non-OK status.
//
// REQUIRES: External synchronization
virtual Status Read(size_t n, Slice* result, char* scratch) = 0;
// Skip "n" bytes from the file. This is guaranteed to be no
// slower that reading the same data, but may be faster.
//
// If end of file is reached, skipping will stop at the end of the
// file, and Skip will return OK.
//
// REQUIRES: External synchronization
virtual Status Skip(uint64_t n) = 0;
// Indicates the upper layers if the current SequentialFile implementation
// uses direct IO.
virtual bool use_direct_io() const { return false; }
// Use the returned alignment value to allocate
// aligned buffer for Direct I/O
virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize; }
// Remove any kind of caching of data from the offset to offset+length
// of this file. If the length is 0, then it refers to the end of file.
// If the system is not caching the file contents, then this is a noop.
virtual Status InvalidateCache(size_t offset, size_t length) {
return Status::NotSupported("InvalidateCache not supported.");
}
// Positioned Read for direct I/O
// If Direct I/O enabled, offset, n, and scratch should be properly aligned
virtual Status PositionedRead(uint64_t offset, size_t n, Slice* result,
char* scratch) {
return Status::NotSupported();
}
};
// A file abstraction for randomly reading the contents of a file.
class RandomAccessFile {
public:
RandomAccessFile() { }
virtual ~RandomAccessFile();
// Read up to "n" bytes from the file starting at "offset".
// "scratch[0..n-1]" may be written by this routine. Sets "*result"
// to the data that was read (including if fewer than "n" bytes were
// successfully read). May set "*result" to point at data in
// "scratch[0..n-1]", so "scratch[0..n-1]" must be live when
// "*result" is used. If an error was encountered, returns a non-OK
// status.
//
// Safe for concurrent use by multiple threads.
// If Direct I/O enabled, offset, n, and scratch should be aligned properly.
virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const = 0;
// Readahead the file starting from offset by n bytes for caching.
virtual Status Prefetch(uint64_t offset, size_t n) {
return Status::OK();
}
// Tries to get an unique ID for this file that will be the same each time
// the file is opened (and will stay the same while the file is open).
// Furthermore, it tries to make this ID at most "max_size" bytes. If such an
// ID can be created this function returns the length of the ID and places it
// in "id"; otherwise, this function returns 0, in which case "id"
// may not have been modified.
//
// This function guarantees, for IDs from a given environment, two unique ids
// cannot be made equal to each other by adding arbitrary bytes to one of
// them. That is, no unique ID is the prefix of another.
//
// This function guarantees that the returned ID will not be interpretable as
// a single varint.
//
// Note: these IDs are only valid for the duration of the process.
virtual size_t GetUniqueId(char* id, size_t max_size) const {
return 0; // Default implementation to prevent issues with backwards
// compatibility.
};
enum AccessPattern { NORMAL, RANDOM, SEQUENTIAL, WILLNEED, DONTNEED };
virtual void Hint(AccessPattern pattern) {}
// Indicates the upper layers if the current RandomAccessFile implementation
// uses direct IO.
virtual bool use_direct_io() const { return false; }
// Use the returned alignment value to allocate
// aligned buffer for Direct I/O
virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize; }
// Remove any kind of caching of data from the offset to offset+length
// of this file. If the length is 0, then it refers to the end of file.
// If the system is not caching the file contents, then this is a noop.
virtual Status InvalidateCache(size_t offset, size_t length) {
return Status::NotSupported("InvalidateCache not supported.");
}
};
// A file abstraction for sequential writing. The implementation
// must provide buffering since callers may append small fragments
// at a time to the file.
class WritableFile {
public:
WritableFile()
: last_preallocated_block_(0),
preallocation_block_size_(0),
io_priority_(Env::IO_TOTAL) {
}
virtual ~WritableFile();
// Append data to the end of the file
// Note: A WriteabelFile object must support either Append or
// PositionedAppend, so the users cannot mix the two.
virtual Status Append(const Slice& data) = 0;
// PositionedAppend data to the specified offset. The new EOF after append
// must be larger than the previous EOF. This is to be used when writes are
// not backed by OS buffers and hence has to always start from the start of
// the sector. The implementation thus needs to also rewrite the last
// partial sector.
// Note: PositionAppend does not guarantee moving the file offset after the
// write. A WritableFile object must support either Append or
// PositionedAppend, so the users cannot mix the two.
//
// PositionedAppend() can only happen on the page/sector boundaries. For that
// reason, if the last write was an incomplete sector we still need to rewind
// back to the nearest sector/page and rewrite the portion of it with whatever
// we need to add. We need to keep where we stop writing.
//
// PositionedAppend() can only write whole sectors. For that reason we have to
// pad with zeros for the last write and trim the file when closing according
// to the position we keep in the previous step.
//
// PositionedAppend() requires aligned buffer to be passed in. The alignment
// required is queried via GetRequiredBufferAlignment()
virtual Status PositionedAppend(const Slice& /* data */, uint64_t /* offset */) {
return Status::NotSupported();
}
// Truncate is necessary to trim the file to the correct size
// before closing. It is not always possible to keep track of the file
// size due to whole pages writes. The behavior is undefined if called
// with other writes to follow.
virtual Status Truncate(uint64_t size) {
return Status::OK();
}
virtual Status Close() = 0;
virtual Status Flush() = 0;
virtual Status Sync() = 0; // sync data
/*
* Sync data and/or metadata as well.
* By default, sync only data.
* Override this method for environments where we need to sync
* metadata as well.
*/
virtual Status Fsync() {
return Sync();
}
// true if Sync() and Fsync() are safe to call concurrently with Append()
// and Flush().
virtual bool IsSyncThreadSafe() const {
return false;
}
// Indicates the upper layers if the current WritableFile implementation
// uses direct IO.
virtual bool use_direct_io() const { return false; }
// Use the returned alignment value to allocate
// aligned buffer for Direct I/O
virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize; }
/*
* Change the priority in rate limiter if rate limiting is enabled.
* If rate limiting is not enabled, this call has no effect.
*/
virtual void SetIOPriority(Env::IOPriority pri) {
io_priority_ = pri;
}
virtual Env::IOPriority GetIOPriority() { return io_priority_; }
/*
* Get the size of valid data in the file.
*/
virtual uint64_t GetFileSize() {
return 0;
}
/*
* Get and set the default pre-allocation block size for writes to
* this file. If non-zero, then Allocate will be used to extend the
* underlying storage of a file (generally via fallocate) if the Env
* instance supports it.
*/
virtual void SetPreallocationBlockSize(size_t size) {
preallocation_block_size_ = size;
}
virtual void GetPreallocationStatus(size_t* block_size,
size_t* last_allocated_block) {
*last_allocated_block = last_preallocated_block_;
*block_size = preallocation_block_size_;
}
// For documentation, refer to RandomAccessFile::GetUniqueId()
virtual size_t GetUniqueId(char* id, size_t max_size) const {
return 0; // Default implementation to prevent issues with backwards
}
// Remove any kind of caching of data from the offset to offset+length
// of this file. If the length is 0, then it refers to the end of file.
// If the system is not caching the file contents, then this is a noop.
// This call has no effect on dirty pages in the cache.
virtual Status InvalidateCache(size_t offset, size_t length) {
return Status::NotSupported("InvalidateCache not supported.");
}
// Sync a file range with disk.
// offset is the starting byte of the file range to be synchronized.
// nbytes specifies the length of the range to be synchronized.
// This asks the OS to initiate flushing the cached data to disk,
// without waiting for completion.
// Default implementation does nothing.
virtual Status RangeSync(uint64_t offset, uint64_t nbytes) { return Status::OK(); }
// PrepareWrite performs any necessary preparation for a write
// before the write actually occurs. This allows for pre-allocation
// of space on devices where it can result in less file
// fragmentation and/or less waste from over-zealous filesystem
// pre-allocation.
virtual void PrepareWrite(size_t offset, size_t len) {
if (preallocation_block_size_ == 0) {
return;
}
// If this write would cross one or more preallocation blocks,
// determine what the last preallocation block necessary to
// cover this write would be and Allocate to that point.
const auto block_size = preallocation_block_size_;
size_t new_last_preallocated_block =
(offset + len + block_size - 1) / block_size;
if (new_last_preallocated_block > last_preallocated_block_) {
size_t num_spanned_blocks =
new_last_preallocated_block - last_preallocated_block_;
Allocate(block_size * last_preallocated_block_,
block_size * num_spanned_blocks);
last_preallocated_block_ = new_last_preallocated_block;
}
}
// Pre-allocates space for a file.
virtual Status Allocate(uint64_t offset, uint64_t len) {
return Status::OK();
}
protected:
size_t preallocation_block_size() { return preallocation_block_size_; }
private:
size_t last_preallocated_block_;
size_t preallocation_block_size_;
// No copying allowed
WritableFile(const WritableFile&);
void operator=(const WritableFile&);
protected:
friend class WritableFileWrapper;
friend class WritableFileMirror;
Env::IOPriority io_priority_;
};
// A file abstraction for random reading and writing.
class RandomRWFile {
public:
RandomRWFile() {}
virtual ~RandomRWFile() {}
// Indicates if the class makes use of direct I/O
// If false you must pass aligned buffer to Write()
virtual bool use_direct_io() const { return false; }
// Use the returned alignment value to allocate
// aligned buffer for Direct I/O
virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize; }
// Write bytes in `data` at offset `offset`, Returns Status::OK() on success.
// Pass aligned buffer when use_direct_io() returns true.
virtual Status Write(uint64_t offset, const Slice& data) = 0;
// Read up to `n` bytes starting from offset `offset` and store them in
// result, provided `scratch` size should be at least `n`.
// Returns Status::OK() on success.
virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const = 0;
virtual Status Flush() = 0;
virtual Status Sync() = 0;
virtual Status Fsync() { return Sync(); }
virtual Status Close() = 0;
// No copying allowed
RandomRWFile(const RandomRWFile&) = delete;
RandomRWFile& operator=(const RandomRWFile&) = delete;
};
// Directory object represents collection of files and implements
// filesystem operations that can be executed on directories.
class Directory {
public:
virtual ~Directory() {}
// Fsync directory. Can be called concurrently from multiple threads.
virtual Status Fsync() = 0;
};
enum InfoLogLevel : unsigned char {
DEBUG_LEVEL = 0,
INFO_LEVEL,
WARN_LEVEL,
ERROR_LEVEL,
FATAL_LEVEL,
HEADER_LEVEL,
NUM_INFO_LOG_LEVELS,
};
// An interface for writing log messages.
class Logger {
public:
size_t kDoNotSupportGetLogFileSize = (std::numeric_limits<size_t>::max)();
explicit Logger(const InfoLogLevel log_level = InfoLogLevel::INFO_LEVEL)
: log_level_(log_level) {}
virtual ~Logger();
// Write a header to the log file with the specified format
// It is recommended that you log all header information at the start of the
// application. But it is not enforced.
virtual void LogHeader(const char* format, va_list ap) {
// Default implementation does a simple INFO level log write.
// Please override as per the logger class requirement.
Logv(format, ap);
}
// Write an entry to the log file with the specified format.
virtual void Logv(const char* format, va_list ap) = 0;
// Write an entry to the log file with the specified log level
// and format. Any log with level under the internal log level
// of *this (see @SetInfoLogLevel and @GetInfoLogLevel) will not be
// printed.
virtual void Logv(const InfoLogLevel log_level, const char* format, va_list ap);
virtual size_t GetLogFileSize() const { return kDoNotSupportGetLogFileSize; }
// Flush to the OS buffers
virtual void Flush() {}
virtual InfoLogLevel GetInfoLogLevel() const { return log_level_; }
virtual void SetInfoLogLevel(const InfoLogLevel log_level) {
log_level_ = log_level;
}
private:
// No copying allowed
Logger(const Logger&);
void operator=(const Logger&);
InfoLogLevel log_level_;
};
// Identifies a locked file.
class FileLock {
public:
FileLock() { }
virtual ~FileLock();
private:
// No copying allowed
FileLock(const FileLock&);
void operator=(const FileLock&);
};
extern void LogFlush(const shared_ptr<Logger>& info_log);
extern void Log(const InfoLogLevel log_level,
const shared_ptr<Logger>& info_log, const char* format, ...);
// a set of log functions with different log levels.
extern void Header(const shared_ptr<Logger>& info_log, const char* format, ...);
extern void Debug(const shared_ptr<Logger>& info_log, const char* format, ...);
extern void Info(const shared_ptr<Logger>& info_log, const char* format, ...);
extern void Warn(const shared_ptr<Logger>& info_log, const char* format, ...);
extern void Error(const shared_ptr<Logger>& info_log, const char* format, ...);
extern void Fatal(const shared_ptr<Logger>& info_log, const char* format, ...);
// Log the specified data to *info_log if info_log is non-nullptr.
// The default info log level is InfoLogLevel::INFO_LEVEL.
extern void Log(const shared_ptr<Logger>& info_log, const char* format, ...)
# if defined(__GNUC__) || defined(__clang__)
__attribute__((__format__ (__printf__, 2, 3)))
# endif
;
extern void LogFlush(Logger *info_log);
extern void Log(const InfoLogLevel log_level, Logger* info_log,
const char* format, ...);
// The default info log level is InfoLogLevel::INFO_LEVEL.
extern void Log(Logger* info_log, const char* format, ...)
# if defined(__GNUC__) || defined(__clang__)
__attribute__((__format__ (__printf__, 2, 3)))
# endif
;
// a set of log functions with different log levels.
extern void Header(Logger* info_log, const char* format, ...);
extern void Debug(Logger* info_log, const char* format, ...);
extern void Info(Logger* info_log, const char* format, ...);
extern void Warn(Logger* info_log, const char* format, ...);
extern void Error(Logger* info_log, const char* format, ...);
extern void Fatal(Logger* info_log, const char* format, ...);
// A utility routine: write "data" to the named file.
extern Status WriteStringToFile(Env* env, const Slice& data,
const std::string& fname,
bool should_sync = false);
// A utility routine: read contents of named file into *data
extern Status ReadFileToString(Env* env, const std::string& fname,
std::string* data);
// An implementation of Env that forwards all calls to another Env.
// May be useful to clients who wish to override just part of the
// functionality of another Env.
class EnvWrapper : public Env {
public:
// Initialize an EnvWrapper that delegates all calls to *t
explicit EnvWrapper(Env* t) : target_(t) { }
~EnvWrapper() override;
// Return the target to which this Env forwards all calls
Env* target() const { return target_; }
// The following text is boilerplate that forwards all methods to target()
Status NewSequentialFile(const std::string& f, unique_ptr<SequentialFile>* r,
const EnvOptions& options) override {
return target_->NewSequentialFile(f, r, options);
}
Status NewRandomAccessFile(const std::string& f,
unique_ptr<RandomAccessFile>* r,
const EnvOptions& options) override {
return target_->NewRandomAccessFile(f, r, options);
}
Status NewWritableFile(const std::string& f, unique_ptr<WritableFile>* r,
const EnvOptions& options) override {
return target_->NewWritableFile(f, r, options);
}
Status ReopenWritableFile(const std::string& fname,
unique_ptr<WritableFile>* result,
const EnvOptions& options) override {
return target_->ReopenWritableFile(fname, result, options);
}
Status ReuseWritableFile(const std::string& fname,
const std::string& old_fname,
unique_ptr<WritableFile>* r,
const EnvOptions& options) override {
return target_->ReuseWritableFile(fname, old_fname, r, options);
}
Status NewRandomRWFile(const std::string& fname,
unique_ptr<RandomRWFile>* result,
const EnvOptions& options) override {
return target_->NewRandomRWFile(fname, result, options);
}
Status NewDirectory(const std::string& name,
unique_ptr<Directory>* result) override {
return target_->NewDirectory(name, result);
}
Status FileExists(const std::string& f) override {
return target_->FileExists(f);
}
Status GetChildren(const std::string& dir,
std::vector<std::string>* r) override {
return target_->GetChildren(dir, r);
}
Status GetChildrenFileAttributes(
const std::string& dir, std::vector<FileAttributes>* result) override {
return target_->GetChildrenFileAttributes(dir, result);
}
Status DeleteFile(const std::string& f) override {
return target_->DeleteFile(f);
}
Status CreateDir(const std::string& d) override {
return target_->CreateDir(d);
}
Status CreateDirIfMissing(const std::string& d) override {
return target_->CreateDirIfMissing(d);
}
Status DeleteDir(const std::string& d) override {
return target_->DeleteDir(d);
}
Status GetFileSize(const std::string& f, uint64_t* s) override {
return target_->GetFileSize(f, s);
}
Status GetFileModificationTime(const std::string& fname,
uint64_t* file_mtime) override {
return target_->GetFileModificationTime(fname, file_mtime);
}
Status RenameFile(const std::string& s, const std::string& t) override {
return target_->RenameFile(s, t);
}
Status LinkFile(const std::string& s, const std::string& t) override {
return target_->LinkFile(s, t);
}
Status LockFile(const std::string& f, FileLock** l) override {
return target_->LockFile(f, l);
}
Status UnlockFile(FileLock* l) override { return target_->UnlockFile(l); }
void Schedule(void (*f)(void* arg), void* a, Priority pri,
void* tag = nullptr, void (*u)(void* arg) = 0) override {
return target_->Schedule(f, a, pri, tag, u);
}
int UnSchedule(void* tag, Priority pri) override {
return target_->UnSchedule(tag, pri);
}
void StartThread(void (*f)(void*), void* a) override {
return target_->StartThread(f, a);
}
void WaitForJoin() override { return target_->WaitForJoin(); }
unsigned int GetThreadPoolQueueLen(Priority pri = LOW) const override {
return target_->GetThreadPoolQueueLen(pri);
}
Status GetTestDirectory(std::string* path) override {
return target_->GetTestDirectory(path);
}
Status NewLogger(const std::string& fname,
shared_ptr<Logger>* result) override {
return target_->NewLogger(fname, result);
}
uint64_t NowMicros() override { return target_->NowMicros(); }
void SleepForMicroseconds(int micros) override {
target_->SleepForMicroseconds(micros);
}
Status GetHostName(char* name, uint64_t len) override {
return target_->GetHostName(name, len);
}
Status GetCurrentTime(int64_t* unix_time) override {
return target_->GetCurrentTime(unix_time);
}
Status GetAbsolutePath(const std::string& db_path,
std::string* output_path) override {
return target_->GetAbsolutePath(db_path, output_path);
}
void SetBackgroundThreads(int num, Priority pri) override {
return target_->SetBackgroundThreads(num, pri);
}
int GetBackgroundThreads(Priority pri) override {
return target_->GetBackgroundThreads(pri);
}
void IncBackgroundThreadsIfNeeded(int num, Priority pri) override {
return target_->IncBackgroundThreadsIfNeeded(num, pri);
}
void LowerThreadPoolIOPriority(Priority pool = LOW) override {
target_->LowerThreadPoolIOPriority(pool);
}
std::string TimeToString(uint64_t time) override {
return target_->TimeToString(time);
}
Status GetThreadList(std::vector<ThreadStatus>* thread_list) override {
return target_->GetThreadList(thread_list);
}
ThreadStatusUpdater* GetThreadStatusUpdater() const override {
return target_->GetThreadStatusUpdater();
}
uint64_t GetThreadID() const override {
return target_->GetThreadID();
}
std::string GenerateUniqueId() override {
return target_->GenerateUniqueId();
}
private:
Env* target_;
};
// An implementation of WritableFile that forwards all calls to another
// WritableFile. May be useful to clients who wish to override just part of the
// functionality of another WritableFile.
// It's declared as friend of WritableFile to allow forwarding calls to
// protected virtual methods.
class WritableFileWrapper : public WritableFile {
public:
explicit WritableFileWrapper(WritableFile* t) : target_(t) { }
Status Append(const Slice& data) override { return target_->Append(data); }
Status PositionedAppend(const Slice& data, uint64_t offset) override {
return target_->PositionedAppend(data, offset);
}
Status Truncate(uint64_t size) override { return target_->Truncate(size); }
Status Close() override { return target_->Close(); }
Status Flush() override { return target_->Flush(); }
Status Sync() override { return target_->Sync(); }
Status Fsync() override { return target_->Fsync(); }
bool IsSyncThreadSafe() const override { return target_->IsSyncThreadSafe(); }
void SetIOPriority(Env::IOPriority pri) override {
target_->SetIOPriority(pri);
}
Env::IOPriority GetIOPriority() override { return target_->GetIOPriority(); }
uint64_t GetFileSize() override { return target_->GetFileSize(); }
void GetPreallocationStatus(size_t* block_size,
size_t* last_allocated_block) override {
target_->GetPreallocationStatus(block_size, last_allocated_block);
}
size_t GetUniqueId(char* id, size_t max_size) const override {
return target_->GetUniqueId(id, max_size);
}
Status InvalidateCache(size_t offset, size_t length) override {
return target_->InvalidateCache(offset, length);
}
void SetPreallocationBlockSize(size_t size) override {
target_->SetPreallocationBlockSize(size);
}
void PrepareWrite(size_t offset, size_t len) override {
target_->PrepareWrite(offset, len);
}
protected:
Status Allocate(uint64_t offset, uint64_t len) override {
return target_->Allocate(offset, len);
}
Status RangeSync(uint64_t offset, uint64_t nbytes) override {
return target_->RangeSync(offset, nbytes);
}
private:
WritableFile* target_;
};
// Returns a new environment that stores its data in memory and delegates
// all non-file-storage tasks to base_env. The caller must delete the result
// when it is no longer needed.
// *base_env must remain live while the result is in use.
Env* NewMemEnv(Env* base_env);
// Returns a new environment that is used for HDFS environment.
// This is a factory method for HdfsEnv declared in hdfs/env_hdfs.h
Status NewHdfsEnv(Env** hdfs_env, const std::string& fsname);
// Returns a new environment that measures function call times for filesystem
// operations, reporting results to variables in PerfContext.
// This is a factory method for TimedEnv defined in utilities/env_timed.cc.
Env* NewTimedEnv(Env* base_env);
} // namespace rocksdb
#endif // STORAGE_ROCKSDB_INCLUDE_ENV_H_
|