From 3d2ede037c6309f81f97c7cf522e4ef2e2e391aa Mon Sep 17 00:00:00 2001 From: Flier Lu Date: Fri, 19 Mar 2021 22:26:32 +0800 Subject: [PATCH 1/3] Add missing options --- options.go | 236 +++++++++++++++++++++++++++++++++++ options_block_based_table.go | 20 +++ options_compression.go | 18 +-- options_read.go | 43 ++++++- options_write.go | 41 ++++++ 5 files changed, 349 insertions(+), 9 deletions(-) diff --git a/options.go b/options.go index 07000215..9cc8d739 100644 --- a/options.go +++ b/options.go @@ -307,6 +307,18 @@ func (opts *Options) SetAllowConcurrentMemtableWrites(allow bool) { C.rocksdb_options_set_allow_concurrent_memtable_write(opts.c, boolToChar(allow)) } +// SetEnableWriteThreadAdaptiveYield sets enable_write_thread_adaptive_yield. +// +// If true, threads synchronizing with the write batch group leader will +// wait for up to write_thread_max_yield_usec before blocking on a mutex. +// This can substantially improve throughput for concurrent workloads, +// regardless of whether allow_concurrent_memtable_write is enabled. +// +// Default: true +func (opts *Options) SetEnableWriteThreadAdaptiveYield(allow bool) { + C.rocksdb_options_set_enable_write_thread_adaptive_yield(opts.c, boolToChar(allow)) +} + // OptimizeLevelStyleCompaction optimize the DB for leveld compaction. // // Default values for some parameters in ColumnFamilyOptions are not @@ -370,6 +382,41 @@ func (opts *Options) SetMinWriteBufferNumberToMerge(value int) { C.rocksdb_options_set_min_write_buffer_number_to_merge(opts.c, C.int(value)) } +// SetMaxWriteBufferSizeToMaintain sets max_write_buffer_size_to_maintain +// The total maximum size(bytes) of write buffers to maintain in memory +// including copies of buffers that have already been flushed. This parameter +// only affects trimming of flushed buffers and does not affect flushing. +// This controls the maximum amount of write history that will be available +// in memory for conflict checking when Transactions are used. The actual +// size of write history (flushed Memtables) might be higher than this limit +// if further trimming will reduce write history total size below this +// limit. For example, if max_write_buffer_size_to_maintain is set to 64MB, +// and there are three flushed Memtables, with sizes of 32MB, 20MB, 20MB. +// Because trimming the next Memtable of size 20MB will reduce total memory +// usage to 52MB which is below the limit, RocksDB will stop trimming. +// +// When using an OptimisticTransactionDB: +// If this value is too low, some transactions may fail at commit time due +// to not being able to determine whether there were any write conflicts. +// +// When using a TransactionDB: +// If Transaction::SetSnapshot is used, TransactionDB will read either +// in-memory write buffers or SST files to do write-conflict checking. +// Increasing this value can reduce the number of reads to SST files +// done for conflict detection. +// +// Setting this value to 0 will cause write buffers to be freed immediately +// after they are flushed. If this value is set to -1, +// 'max_write_buffer_number * write_buffer_size' will be used. +// +// Default: +// If using a TransactionDB/OptimisticTransactionDB, the default value will +// be set to the value of 'max_write_buffer_number * write_buffer_size' +// if it is not explicitly set by the user. Otherwise, the default is 0. +func (opts *Options) SetMaxWriteBufferSizeToMaintain(value int64) { + C.rocksdb_options_set_max_write_buffer_size_to_maintain(opts.c, C.int64_t(value)) +} + // SetMaxOpenFiles sets the number of open files that can be used by the DB. // // You may need to increase this if your database has a large working set @@ -405,6 +452,15 @@ func (opts *Options) SetCompression(value CompressionType) { C.rocksdb_options_set_compression(opts.c, C.int(value)) } +// SetBottommostCompression sets the compression algorithm +// that will be used for the bottommost level that contain files. +// +// Default: SnappyCompression, which gives lightweight but fast +// compression. +func (opts *Options) SetBottommostCompression(value CompressionType) { + C.rocksdb_options_set_bottommost_compression(opts.c, C.int(value)) +} + // SetCompressionPerLevel sets different compression algorithm per level. // // Different levels can have different compression policies. There @@ -432,6 +488,20 @@ func (opts *Options) SetMinLevelToCompress(value int) { // Default: nil func (opts *Options) SetCompressionOptions(value *CompressionOptions) { C.rocksdb_options_set_compression_options(opts.c, C.int(value.WindowBits), C.int(value.Level), C.int(value.Strategy), C.int(value.MaxDictBytes)) + if value.ZstdMaxTrainBytes > 0 { + C.rocksdb_options_set_compression_options_zstd_max_train_bytes(opts.c, C.int(value.ZstdMaxTrainBytes)) + } +} + +// SetBottommostCompressionOptions sets different options for compression algorithms used by bottommost_compression +// if it is enabled. To enable it, please see the definition of +// CompressionOptions. +// Default: nil +func (opts *Options) SetBottommostCompressionOptions(value *CompressionOptions, enabled bool) { + C.rocksdb_options_set_bottommost_compression_options(opts.c, C.int(value.WindowBits), C.int(value.Level), C.int(value.Strategy), C.int(value.MaxDictBytes), boolToChar(enabled)) + if value.ZstdMaxTrainBytes > 0 { + C.rocksdb_options_set_bottommost_compression_options_zstd_max_train_bytes(opts.c, C.int(value.ZstdMaxTrainBytes), boolToChar(enabled)) + } } // SetPrefixExtractor sets the prefic extractor. @@ -739,6 +809,19 @@ func (opts *Options) SetKeepLogFileNum(value int) { C.rocksdb_options_set_keep_log_file_num(opts.c, C.size_t(value)) } +// SetRecycleLogFileNum sets the recycle log files. +// +// If non-zero, we will reuse previously written log files for new +// logs, overwriting the old data. The value indicates how many +// such files we will keep around at any point in time for later +// use. This is more efficient because the blocks are already +// allocated and fdatasync does not need to update the inode after +// each write. +// Default: 0 +func (opts *Options) SetRecycleLogFileNum(value int) { + C.rocksdb_options_set_recycle_log_file_num(opts.c, C.size_t(value)) +} + // SetSoftRateLimit sets the soft rate limit. // // Puts are delayed 0-1 ms when any level has a compaction score that exceeds @@ -853,6 +936,55 @@ func (opts *Options) SetEnablePipelinedWrite(value bool) { C.rocksdb_options_set_enable_pipelined_write(opts.c, boolToChar(value)) } +// SetUnorderedWrite enables unordered write +// +// Setting unordered_write to true trades higher write throughput with +// relaxing the immutability guarantee of snapshots. This violates the +// repeatability one expects from ::Get from a snapshot, as well as +// ::MultiGet and Iterator's consistent-point-in-time view property. +// If the application cannot tolerate the relaxed guarantees, it can implement +// its own mechanisms to work around that and yet benefit from the higher +// throughput. Using TransactionDB with WRITE_PREPARED write policy and +// two_write_queues=true is one way to achieve immutable snapshots despite +// unordered_write. +// +// By default, i.e., when it is false, rocksdb does not advance the sequence +// number for new snapshots unless all the writes with lower sequence numbers +// are already finished. This provides the immutability that we except from +// snapshots. Moreover, since Iterator and MultiGet internally depend on +// snapshots, the snapshot immutability results into Iterator and MultiGet +// offering consistent-point-in-time view. If set to true, although +// Read-Your-Own-Write property is still provided, the snapshot immutability +// property is relaxed: the writes issued after the snapshot is obtained (with +// larger sequence numbers) will be still not visible to the reads from that +// snapshot, however, there still might be pending writes (with lower sequence +// number) that will change the state visible to the snapshot after they are +// landed to the memtable. +// +// Default: false +func (opts *Options) SetUnorderedWrite(value bool) { + C.rocksdb_options_set_unordered_write(opts.c, boolToChar(value)) +} + +// SetMaxSubcompactions sets the maximum number of threads that will +// concurrently perform a compaction job by breaking it into multiple, +// smaller ones that are run simultaneously. +// Default: 1 (i.e. no subcompactions) +// +// Dynamically changeable through SetDBOptions() API. +func (opts *Options) SetMaxSubcompactions(value uint) { + C.rocksdb_options_set_max_subcompactions(opts.c, C.uint32_t(value)) +} + +// SetMaxBackgroundJobs sets the maximum number of concurrent background jobs (compactions and flushes). +// +// Default: 2 +// +// Dynamically changeable through SetDBOptions() API. +func (opts *Options) SetMaxBackgroundJobs(value int) { + C.rocksdb_options_set_max_background_jobs(opts.c, C.int(value)) +} + // SetManifestPreallocationSize sets the number of bytes // to preallocate (via fallocate) the manifest files. // @@ -918,6 +1050,14 @@ func (opts *Options) SetStatsDumpPeriodSec(value uint) { C.rocksdb_options_set_stats_dump_period_sec(opts.c, C.uint(value)) } +// SetStatsPersistPeriodSec sets the stats persist period in seconds. +// +// if not zero, dump rocksdb.stats to RocksDB every stats_persist_period_sec +// Default: 600 +func (opts *Options) SetStatsPersistPeriodSec(value int) { + C.rocksdb_options_set_stats_persist_period_sec(opts.c, C.uint(value)) +} + // SetAdviseRandomOnOpen specifies whether we will hint the underlying // file system that the file access pattern is random, when a sst file is opened. // Default: true @@ -969,6 +1109,31 @@ func (opts *Options) SetBytesPerSync(value uint64) { C.rocksdb_options_set_bytes_per_sync(opts.c, C.uint64_t(value)) } +// SetWalBytesPerSync sets the bytes per sync for WAL files. +// +// Same as bytes_per_sync, but applies to WAL files +// +// Default: 0, turned off +// +// Dynamically changeable through SetDBOptions() API. +func (opts *Options) SetWalBytesPerSync(value uint64) { + C.rocksdb_options_set_wal_bytes_per_sync(opts.c, C.uint64_t(value)) +} + +// SetWritableFileMaxBufferSize sets the maximum buffer size that is used by WritableFileWriter. +// +// On Windows, we need to maintain an aligned buffer for writes. +// We allow the buffer to grow until it's size hits the limit in buffered +// IO and fix the buffer size when using direct IO to ensure alignment of +// write requests if the logical sector size is unusual +// +// Default: 1024 * 1024 (1 MB) +// +// Dynamically changeable through SetDBOptions() API. +func (opts *Options) SetWritableFileMaxBufferSize(value uint64) { + C.rocksdb_options_set_writable_file_max_buffer_size(opts.c, C.uint64_t(value)) +} + // SetCompactionStyle sets the compaction style. // Default: LevelCompactionStyle func (opts *Options) SetCompactionStyle(value CompactionStyle) { @@ -988,6 +1153,30 @@ func (opts *Options) SetFIFOCompactionOptions(value *FIFOCompactionOptions) { C.rocksdb_options_set_fifo_compaction_options(opts.c, value.c) } +// SetAtomicFlush sets atomic_flush +// If true, RocksDB supports flushing multiple column families and committing +// their results atomically to MANIFEST. Note that it is not +// necessary to set atomic_flush to true if WAL is always enabled since WAL +// allows the database to be restored to the last persistent state in WAL. +// This option is useful when there are column families with writes NOT +// protected by WAL. +// For manual flush, application has to specify which column families to +// flush atomically in DB::Flush. +// For auto-triggered flush, RocksDB atomically flushes ALL column families. +// +// Currently, any WAL-enabled writes after atomic flush may be replayed +// independently if the process crashes later and tries to recover. +func (opts *Options) SetAtomicFlush(value bool) { + C.rocksdb_options_set_atomic_flush(opts.c, boolToChar(value)) +} + +// SetRowCache sets a global cache for table-level rows. +// Default: nullptr (disabled) +// Not supported in ROCKSDB_LITE mode! +func (opts *Options) SetRowCache(cache *Cache) { + C.rocksdb_options_set_row_cache(opts.c, cache.c) +} + // GetStatisticsString returns the statistics as a string. func (opts *Options) GetStatisticsString() string { sString := C.rocksdb_options_statistics_get_string(opts.c) @@ -1031,6 +1220,15 @@ func (opts *Options) SetInplaceUpdateNumLocks(value int) { C.rocksdb_options_set_inplace_update_num_locks(opts.c, C.size_t(value)) } +// SetReportBgIoStats enable/disable to measure IO stats in compactions and flushes. +// +// Default: false +// +// Dynamically changeable through SetOptions() API +func (opts *Options) SetReportBgIoStats(value bool) { + C.rocksdb_options_set_inplace_update_support(opts.c, boolToChar(value)) +} + // SetMemtableHugePageSize sets the page size for huge page for // arena used by the memtable. // If <=0, it won't allocate from huge page but from malloc. @@ -1096,6 +1294,20 @@ func (opts *Options) SetMemtableVectorRep() { C.rocksdb_options_set_memtable_vector_rep(opts.c) } +// SetMemtablePrefixBloomSizeRatio sets memtable_prefix_bloom_size_ratio. +// +// if prefix_extractor is set and memtable_prefix_bloom_size_ratio is not 0, +// create prefix bloom for memtable with the size of +// write_buffer_size * memtable_prefix_bloom_size_ratio. +// If it is larger than 0.25, it is sanitized to 0.25. +// +// Default: 0 (disable) +// +// Dynamically changeable through SetOptions() API +func (opts *Options) SetMemtablePrefixBloomSizeRatio(value float64) { + C.rocksdb_options_set_memtable_prefix_bloom_size_ratio(opts.c, C.double(value)) +} + // SetHashSkipListRep sets a hash skip list as MemTableRep. // // It contains a fixed array of buckets, each @@ -1196,6 +1408,30 @@ func (opts *Options) SetOptimizeFiltersForHits(value bool) { C.rocksdb_options_set_optimize_filters_for_hits(opts.c, C.int(btoi(value))) } +// SetSkipStatsUpdateOnDbOpen sets skip_stats_update_on_db_open +// If true, then DB::Open() will not update the statistics used to optimize +// compaction decision by loading table properties from many files. +// Turning off this feature will improve DBOpen time especially in +// disk environment. +// +// Default: false +func (opts *Options) SetSkipStatsUpdateOnDbOpen(value bool) { + C.rocksdb_options_set_skip_stats_update_on_db_open(opts.c, boolToChar(value)) +} + +// SetSkipCheckingSstFileSizesOnDbOpen sets skip_checking_sst_file_sizes_on_db_open +// If true, then DB::Open() will not fetch and check sizes of all sst files. +// This may significantly speed up startup if there are many sst files, +// especially when using non-default Env with expensive GetFileSize(). +// We'll still check that all required sst files exist. +// If paranoid_checks is false, this option is ignored, and sst files are +// not checked at all. +// +// Default: false +func (opts *Options) SetSkipCheckingSstFileSizesOnDbOpen(value bool) { + C.rocksdb_options_set_skip_checking_sst_file_sizes_on_db_open(opts.c, boolToChar(value)) +} + // Destroy deallocates the Options object. func (opts *Options) Destroy() { C.rocksdb_options_destroy(opts.c) diff --git a/options_block_based_table.go b/options_block_based_table.go index 80244132..465f7f0d 100644 --- a/options_block_based_table.go +++ b/options_block_based_table.go @@ -18,6 +18,14 @@ const ( KTwoLevelIndexSearchIndexType = 2 ) +// DataBlockIndexType specifies the index type that will be used for the data block. +type DataBlockIndexType byte + +const ( + kDataBlockBinarySearch = 0 // traditional block type + kDataBlockBinaryAndHash = 1 // additional hash index +) + // BlockBasedTableOptions represents block-based table options. type BlockBasedTableOptions struct { c *C.rocksdb_block_based_table_options_t @@ -235,3 +243,15 @@ func (opts *BlockBasedTableOptions) SetFormatVersion(version int) { func (opts *BlockBasedTableOptions) SetIndexType(value IndexType) { C.rocksdb_block_based_options_set_index_type(opts.c, C.int(value)) } + +// SetDataBlockIndexType sets the index type that will be used for the data block. +func (opts *BlockBasedTableOptions) SetDataBlockIndexType(value DataBlockIndexType) { + C.rocksdb_block_based_options_set_data_block_index_type(opts.c, C.int(value)) +} + +// SetDataBlockIndexType sets the hash radio that will be used for the data block. +// #entries/#buckets. It is valid only when data_block_hash_index_type is +// kDataBlockBinaryAndHash. +func (opts *BlockBasedTableOptions) SetDataBlockHashRadio(value float64) { + C.rocksdb_block_based_options_set_data_block_hash_ratio(opts.c, C.double(value)) +} diff --git a/options_compression.go b/options_compression.go index 7165ceda..fbcb80ad 100644 --- a/options_compression.go +++ b/options_compression.go @@ -2,10 +2,11 @@ package gorocksdb // CompressionOptions represents options for different compression algorithms like Zlib. type CompressionOptions struct { - WindowBits int - Level int - Strategy int - MaxDictBytes int + WindowBits int + Level int + Strategy int + MaxDictBytes int + ZstdMaxTrainBytes int } // NewDefaultCompressionOptions creates a default CompressionOptions object. @@ -16,9 +17,10 @@ func NewDefaultCompressionOptions() *CompressionOptions { // NewCompressionOptions creates a CompressionOptions object. func NewCompressionOptions(windowBits, level, strategy, maxDictBytes int) *CompressionOptions { return &CompressionOptions{ - WindowBits: windowBits, - Level: level, - Strategy: strategy, - MaxDictBytes: maxDictBytes, + WindowBits: windowBits, + Level: level, + Strategy: strategy, + MaxDictBytes: maxDictBytes, + ZstdMaxTrainBytes: 0, } } diff --git a/options_read.go b/options_read.go index 6a37cc48..da734d36 100644 --- a/options_read.go +++ b/options_read.go @@ -2,7 +2,9 @@ package gorocksdb // #include "rocksdb/c.h" import "C" -import "unsafe" +import ( + "unsafe" +) // ReadTier controls fetching of data during a read request. // An application can issue a read request (via Get/Iterators) and specify @@ -129,6 +131,45 @@ func (opts *ReadOptions) SetReadaheadSize(value uint64) { C.rocksdb_readoptions_set_readahead_size(opts.c, C.size_t(value)) } +// SetTotalOrderSeek specifies the value of "total_order_seek". +// Enable a total order seek regardless of index format (e.g. hash index) +// used in the table. Some table format (e.g. plain table) may not support +// this option. +// If true when calling Get(), we also skip prefix bloom when reading from +// block based table. It provides a way to read existing data after +// changing implementation of prefix extractor. +// Default: false +func (opts *ReadOptions) SetTotalOrderSeek(value bool) { + C.rocksdb_readoptions_set_total_order_seek(opts.c, boolToChar(value)) +} + +// SetMaxSkippableInternalKeys specifies the value of "max_skippable_internal_keys". +// A threshold for the number of keys that can be skipped before failing an +// iterator seek as incomplete. The default value of 0 should be used to +// never fail a request as incomplete, even on skipping too many keys. +// Default: 0 +func (opts *ReadOptions) SetMaxSkippableInternalKeys(value uint64) { + C.rocksdb_readoptions_set_max_skippable_internal_keys(opts.c, C.uint64_t(value)) +} + +// SetBackgroundPurgeOnIteratorCleanup specifies the value of "background_purge_on_iterator_cleanup". +// If true, when PurgeObsoleteFile is called in CleanupIteratorState, we +// schedule a background job in the flush job queue and delete obsolete files +// in background. +// Default: false +func (opts *ReadOptions) SetBackgroundPurgeOnIteratorCleanup(value bool) { + C.rocksdb_readoptions_set_background_purge_on_iterator_cleanup(opts.c, boolToChar(value)) +} + +// SetIgnoreRangeDeletions specifies the value of "ignore_range_deletions". +// If true, keys deleted using the DeleteRange() API will be visible to +// readers until they are naturally deleted during compaction. This improves +// read performance in DBs with many range deletions. +// Default: false +func (opts *ReadOptions) SetIgnoreRangeDeletions(value bool) { + C.rocksdb_readoptions_set_ignore_range_deletions(opts.c, boolToChar(value)) +} + // Destroy deallocates the ReadOptions object. func (opts *ReadOptions) Destroy() { C.rocksdb_readoptions_destroy(opts.c) diff --git a/options_write.go b/options_write.go index 01cd9c9a..ad52263a 100644 --- a/options_write.go +++ b/options_write.go @@ -35,6 +35,47 @@ func (opts *WriteOptions) DisableWAL(value bool) { C.rocksdb_writeoptions_disable_WAL(opts.c, C.int(btoi(value))) } +// SetIgnoreMissingColumnFamilies specifies the value of "ignore_missing_column_families". +// If true and if user is trying to write to column families that don't exist +// (they were dropped), ignore the write (don't return an error). If there +// are multiple writes in a WriteBatch, other writes will succeed. +// Default: false +func (opts *WriteOptions) SetIgnoreMissingColumnFamilies(value bool) { + C.rocksdb_writeoptions_set_ignore_missing_column_families(opts.c, boolToChar(value)) +} + +// SetNoSlowdown specifies the value of "no_slowdown". +// If true and we need to wait or sleep for the write request, fails +// immediately with Status::Incomplete(). +// Default: false +func (opts *WriteOptions) SetNoSlowdown(value bool) { + C.rocksdb_writeoptions_set_no_slowdown(opts.c, boolToChar(value)) +} + +// SetLowPri specifies the value of "low_pri". +// If true, this write request is of lower priority if compaction is +// behind. In this case, no_slowdown = true, the request will be cancelled +// immediately with Status::Incomplete() returned. Otherwise, it will be +// slowed down. The slowdown value is determined by RocksDB to guarantee +// it introduces minimum impacts to high priority writes. +// +// Default: false +func (opts *WriteOptions) SetLowPri(value bool) { + C.rocksdb_writeoptions_set_low_pri(opts.c, boolToChar(value)) +} + +// SetMemtableInsertHintPerBatch specifies the value of "memtable_insert_hint_per_batch". +// If true, this writebatch will maintain the last insert positions of each +// memtable as hints in concurrent write. It can improve write performance +// in concurrent writes if keys in one writebatch are sequential. In +// non-concurrent writes (when concurrent_memtable_writes is false) this +// option will be ignored. +// +// Default: false +func (opts *WriteOptions) SetMemtableInsertHintPerBatch(value bool) { + C.rocksdb_writeoptions_set_memtable_insert_hint_per_batch(opts.c, boolToChar(value)) +} + // Destroy deallocates the WriteOptions object. func (opts *WriteOptions) Destroy() { C.rocksdb_writeoptions_destroy(opts.c) From c73e12aa5623116ea2eb0fa1def6fb18978aba63 Mon Sep 17 00:00:00 2001 From: Flier Lu Date: Mon, 22 Mar 2021 23:02:24 +0800 Subject: [PATCH 2/3] use build tag rocksdb_v6 --- options.go | 139 ------------------------------- options_block_based_table.go | 12 --- options_block_based_table_v6.go | 17 ++++ options_v6.go | 142 ++++++++++++++++++++++++++++++++ options_write.go | 12 --- options_write_v6.go | 17 ++++ 6 files changed, 176 insertions(+), 163 deletions(-) create mode 100644 options_block_based_table_v6.go create mode 100644 options_v6.go create mode 100644 options_write_v6.go diff --git a/options.go b/options.go index 9cc8d739..6527a3e7 100644 --- a/options.go +++ b/options.go @@ -382,41 +382,6 @@ func (opts *Options) SetMinWriteBufferNumberToMerge(value int) { C.rocksdb_options_set_min_write_buffer_number_to_merge(opts.c, C.int(value)) } -// SetMaxWriteBufferSizeToMaintain sets max_write_buffer_size_to_maintain -// The total maximum size(bytes) of write buffers to maintain in memory -// including copies of buffers that have already been flushed. This parameter -// only affects trimming of flushed buffers and does not affect flushing. -// This controls the maximum amount of write history that will be available -// in memory for conflict checking when Transactions are used. The actual -// size of write history (flushed Memtables) might be higher than this limit -// if further trimming will reduce write history total size below this -// limit. For example, if max_write_buffer_size_to_maintain is set to 64MB, -// and there are three flushed Memtables, with sizes of 32MB, 20MB, 20MB. -// Because trimming the next Memtable of size 20MB will reduce total memory -// usage to 52MB which is below the limit, RocksDB will stop trimming. -// -// When using an OptimisticTransactionDB: -// If this value is too low, some transactions may fail at commit time due -// to not being able to determine whether there were any write conflicts. -// -// When using a TransactionDB: -// If Transaction::SetSnapshot is used, TransactionDB will read either -// in-memory write buffers or SST files to do write-conflict checking. -// Increasing this value can reduce the number of reads to SST files -// done for conflict detection. -// -// Setting this value to 0 will cause write buffers to be freed immediately -// after they are flushed. If this value is set to -1, -// 'max_write_buffer_number * write_buffer_size' will be used. -// -// Default: -// If using a TransactionDB/OptimisticTransactionDB, the default value will -// be set to the value of 'max_write_buffer_number * write_buffer_size' -// if it is not explicitly set by the user. Otherwise, the default is 0. -func (opts *Options) SetMaxWriteBufferSizeToMaintain(value int64) { - C.rocksdb_options_set_max_write_buffer_size_to_maintain(opts.c, C.int64_t(value)) -} - // SetMaxOpenFiles sets the number of open files that can be used by the DB. // // You may need to increase this if your database has a large working set @@ -452,15 +417,6 @@ func (opts *Options) SetCompression(value CompressionType) { C.rocksdb_options_set_compression(opts.c, C.int(value)) } -// SetBottommostCompression sets the compression algorithm -// that will be used for the bottommost level that contain files. -// -// Default: SnappyCompression, which gives lightweight but fast -// compression. -func (opts *Options) SetBottommostCompression(value CompressionType) { - C.rocksdb_options_set_bottommost_compression(opts.c, C.int(value)) -} - // SetCompressionPerLevel sets different compression algorithm per level. // // Different levels can have different compression policies. There @@ -484,26 +440,6 @@ func (opts *Options) SetMinLevelToCompress(value int) { C.rocksdb_options_set_min_level_to_compress(opts.c, C.int(value)) } -// SetCompressionOptions sets different options for compression algorithms. -// Default: nil -func (opts *Options) SetCompressionOptions(value *CompressionOptions) { - C.rocksdb_options_set_compression_options(opts.c, C.int(value.WindowBits), C.int(value.Level), C.int(value.Strategy), C.int(value.MaxDictBytes)) - if value.ZstdMaxTrainBytes > 0 { - C.rocksdb_options_set_compression_options_zstd_max_train_bytes(opts.c, C.int(value.ZstdMaxTrainBytes)) - } -} - -// SetBottommostCompressionOptions sets different options for compression algorithms used by bottommost_compression -// if it is enabled. To enable it, please see the definition of -// CompressionOptions. -// Default: nil -func (opts *Options) SetBottommostCompressionOptions(value *CompressionOptions, enabled bool) { - C.rocksdb_options_set_bottommost_compression_options(opts.c, C.int(value.WindowBits), C.int(value.Level), C.int(value.Strategy), C.int(value.MaxDictBytes), boolToChar(enabled)) - if value.ZstdMaxTrainBytes > 0 { - C.rocksdb_options_set_bottommost_compression_options_zstd_max_train_bytes(opts.c, C.int(value.ZstdMaxTrainBytes), boolToChar(enabled)) - } -} - // SetPrefixExtractor sets the prefic extractor. // // If set, use the specified function to determine the @@ -936,36 +872,6 @@ func (opts *Options) SetEnablePipelinedWrite(value bool) { C.rocksdb_options_set_enable_pipelined_write(opts.c, boolToChar(value)) } -// SetUnorderedWrite enables unordered write -// -// Setting unordered_write to true trades higher write throughput with -// relaxing the immutability guarantee of snapshots. This violates the -// repeatability one expects from ::Get from a snapshot, as well as -// ::MultiGet and Iterator's consistent-point-in-time view property. -// If the application cannot tolerate the relaxed guarantees, it can implement -// its own mechanisms to work around that and yet benefit from the higher -// throughput. Using TransactionDB with WRITE_PREPARED write policy and -// two_write_queues=true is one way to achieve immutable snapshots despite -// unordered_write. -// -// By default, i.e., when it is false, rocksdb does not advance the sequence -// number for new snapshots unless all the writes with lower sequence numbers -// are already finished. This provides the immutability that we except from -// snapshots. Moreover, since Iterator and MultiGet internally depend on -// snapshots, the snapshot immutability results into Iterator and MultiGet -// offering consistent-point-in-time view. If set to true, although -// Read-Your-Own-Write property is still provided, the snapshot immutability -// property is relaxed: the writes issued after the snapshot is obtained (with -// larger sequence numbers) will be still not visible to the reads from that -// snapshot, however, there still might be pending writes (with lower sequence -// number) that will change the state visible to the snapshot after they are -// landed to the memtable. -// -// Default: false -func (opts *Options) SetUnorderedWrite(value bool) { - C.rocksdb_options_set_unordered_write(opts.c, boolToChar(value)) -} - // SetMaxSubcompactions sets the maximum number of threads that will // concurrently perform a compaction job by breaking it into multiple, // smaller ones that are run simultaneously. @@ -1050,14 +956,6 @@ func (opts *Options) SetStatsDumpPeriodSec(value uint) { C.rocksdb_options_set_stats_dump_period_sec(opts.c, C.uint(value)) } -// SetStatsPersistPeriodSec sets the stats persist period in seconds. -// -// if not zero, dump rocksdb.stats to RocksDB every stats_persist_period_sec -// Default: 600 -func (opts *Options) SetStatsPersistPeriodSec(value int) { - C.rocksdb_options_set_stats_persist_period_sec(opts.c, C.uint(value)) -} - // SetAdviseRandomOnOpen specifies whether we will hint the underlying // file system that the file access pattern is random, when a sst file is opened. // Default: true @@ -1153,30 +1051,6 @@ func (opts *Options) SetFIFOCompactionOptions(value *FIFOCompactionOptions) { C.rocksdb_options_set_fifo_compaction_options(opts.c, value.c) } -// SetAtomicFlush sets atomic_flush -// If true, RocksDB supports flushing multiple column families and committing -// their results atomically to MANIFEST. Note that it is not -// necessary to set atomic_flush to true if WAL is always enabled since WAL -// allows the database to be restored to the last persistent state in WAL. -// This option is useful when there are column families with writes NOT -// protected by WAL. -// For manual flush, application has to specify which column families to -// flush atomically in DB::Flush. -// For auto-triggered flush, RocksDB atomically flushes ALL column families. -// -// Currently, any WAL-enabled writes after atomic flush may be replayed -// independently if the process crashes later and tries to recover. -func (opts *Options) SetAtomicFlush(value bool) { - C.rocksdb_options_set_atomic_flush(opts.c, boolToChar(value)) -} - -// SetRowCache sets a global cache for table-level rows. -// Default: nullptr (disabled) -// Not supported in ROCKSDB_LITE mode! -func (opts *Options) SetRowCache(cache *Cache) { - C.rocksdb_options_set_row_cache(opts.c, cache.c) -} - // GetStatisticsString returns the statistics as a string. func (opts *Options) GetStatisticsString() string { sString := C.rocksdb_options_statistics_get_string(opts.c) @@ -1419,19 +1293,6 @@ func (opts *Options) SetSkipStatsUpdateOnDbOpen(value bool) { C.rocksdb_options_set_skip_stats_update_on_db_open(opts.c, boolToChar(value)) } -// SetSkipCheckingSstFileSizesOnDbOpen sets skip_checking_sst_file_sizes_on_db_open -// If true, then DB::Open() will not fetch and check sizes of all sst files. -// This may significantly speed up startup if there are many sst files, -// especially when using non-default Env with expensive GetFileSize(). -// We'll still check that all required sst files exist. -// If paranoid_checks is false, this option is ignored, and sst files are -// not checked at all. -// -// Default: false -func (opts *Options) SetSkipCheckingSstFileSizesOnDbOpen(value bool) { - C.rocksdb_options_set_skip_checking_sst_file_sizes_on_db_open(opts.c, boolToChar(value)) -} - // Destroy deallocates the Options object. func (opts *Options) Destroy() { C.rocksdb_options_destroy(opts.c) diff --git a/options_block_based_table.go b/options_block_based_table.go index 465f7f0d..8c941433 100644 --- a/options_block_based_table.go +++ b/options_block_based_table.go @@ -243,15 +243,3 @@ func (opts *BlockBasedTableOptions) SetFormatVersion(version int) { func (opts *BlockBasedTableOptions) SetIndexType(value IndexType) { C.rocksdb_block_based_options_set_index_type(opts.c, C.int(value)) } - -// SetDataBlockIndexType sets the index type that will be used for the data block. -func (opts *BlockBasedTableOptions) SetDataBlockIndexType(value DataBlockIndexType) { - C.rocksdb_block_based_options_set_data_block_index_type(opts.c, C.int(value)) -} - -// SetDataBlockIndexType sets the hash radio that will be used for the data block. -// #entries/#buckets. It is valid only when data_block_hash_index_type is -// kDataBlockBinaryAndHash. -func (opts *BlockBasedTableOptions) SetDataBlockHashRadio(value float64) { - C.rocksdb_block_based_options_set_data_block_hash_ratio(opts.c, C.double(value)) -} diff --git a/options_block_based_table_v6.go b/options_block_based_table_v6.go new file mode 100644 index 00000000..696b2dab --- /dev/null +++ b/options_block_based_table_v6.go @@ -0,0 +1,17 @@ +// +build rocksdb_v6 + +package gorocksdb + +import "C" + +// SetDataBlockIndexType sets the index type that will be used for the data block. +func (opts *BlockBasedTableOptions) SetDataBlockIndexType(value DataBlockIndexType) { + C.rocksdb_block_based_options_set_data_block_index_type(opts.c, C.int(value)) +} + +// SetDataBlockIndexType sets the hash radio that will be used for the data block. +// #entries/#buckets. It is valid only when data_block_hash_index_type is +// kDataBlockBinaryAndHash. +func (opts *BlockBasedTableOptions) SetDataBlockHashRadio(value float64) { + C.rocksdb_block_based_options_set_data_block_hash_ratio(opts.c, C.double(value)) +} diff --git a/options_v6.go b/options_v6.go new file mode 100644 index 00000000..55111714 --- /dev/null +++ b/options_v6.go @@ -0,0 +1,142 @@ +// +build rocksdb_v6 + +package gorocksdb + +// SetAtomicFlush sets atomic_flush +// If true, RocksDB supports flushing multiple column families and committing +// their results atomically to MANIFEST. Note that it is not +// necessary to set atomic_flush to true if WAL is always enabled since WAL +// allows the database to be restored to the last persistent state in WAL. +// This option is useful when there are column families with writes NOT +// protected by WAL. +// For manual flush, application has to specify which column families to +// flush atomically in DB::Flush. +// For auto-triggered flush, RocksDB atomically flushes ALL column families. +// +// Currently, any WAL-enabled writes after atomic flush may be replayed +// independently if the process crashes later and tries to recover. +func (opts *Options) SetAtomicFlush(value bool) { + C.rocksdb_options_set_atomic_flush(opts.c, boolToChar(value)) +} + +// SetBottommostCompression sets the compression algorithm +// that will be used for the bottommost level that contain files. +// +// Default: SnappyCompression, which gives lightweight but fast +// compression. +func (opts *Options) SetBottommostCompression(value CompressionType) { + C.rocksdb_options_set_bottommost_compression(opts.c, C.int(value)) +} + +// SetCompressionOptions sets different options for compression algorithms. +// Default: nil +func (opts *Options) SetCompressionOptions(value *CompressionOptions) { + C.rocksdb_options_set_compression_options(opts.c, C.int(value.WindowBits), C.int(value.Level), C.int(value.Strategy), C.int(value.MaxDictBytes)) + if value.ZstdMaxTrainBytes > 0 { + C.rocksdb_options_set_compression_options_zstd_max_train_bytes(opts.c, C.int(value.ZstdMaxTrainBytes)) + } +} + +// SetBottommostCompressionOptions sets different options for compression algorithms used by bottommost_compression +// if it is enabled. To enable it, please see the definition of +// CompressionOptions. +// Default: nil +func (opts *Options) SetBottommostCompressionOptions(value *CompressionOptions, enabled bool) { + C.rocksdb_options_set_bottommost_compression_options(opts.c, C.int(value.WindowBits), C.int(value.Level), C.int(value.Strategy), C.int(value.MaxDictBytes), boolToChar(enabled)) + if value.ZstdMaxTrainBytes > 0 { + C.rocksdb_options_set_bottommost_compression_options_zstd_max_train_bytes(opts.c, C.int(value.ZstdMaxTrainBytes), boolToChar(enabled)) + } +} + +// SetMaxWriteBufferSizeToMaintain sets max_write_buffer_size_to_maintain +// The total maximum size(bytes) of write buffers to maintain in memory +// including copies of buffers that have already been flushed. This parameter +// only affects trimming of flushed buffers and does not affect flushing. +// This controls the maximum amount of write history that will be available +// in memory for conflict checking when Transactions are used. The actual +// size of write history (flushed Memtables) might be higher than this limit +// if further trimming will reduce write history total size below this +// limit. For example, if max_write_buffer_size_to_maintain is set to 64MB, +// and there are three flushed Memtables, with sizes of 32MB, 20MB, 20MB. +// Because trimming the next Memtable of size 20MB will reduce total memory +// usage to 52MB which is below the limit, RocksDB will stop trimming. +// +// When using an OptimisticTransactionDB: +// If this value is too low, some transactions may fail at commit time due +// to not being able to determine whether there were any write conflicts. +// +// When using a TransactionDB: +// If Transaction::SetSnapshot is used, TransactionDB will read either +// in-memory write buffers or SST files to do write-conflict checking. +// Increasing this value can reduce the number of reads to SST files +// done for conflict detection. +// +// Setting this value to 0 will cause write buffers to be freed immediately +// after they are flushed. If this value is set to -1, +// 'max_write_buffer_number * write_buffer_size' will be used. +// +// Default: +// If using a TransactionDB/OptimisticTransactionDB, the default value will +// be set to the value of 'max_write_buffer_number * write_buffer_size' +// if it is not explicitly set by the user. Otherwise, the default is 0. +func (opts *Options) SetMaxWriteBufferSizeToMaintain(value int64) { + C.rocksdb_options_set_max_write_buffer_size_to_maintain(opts.c, C.int64_t(value)) +} + +// SetRowCache sets a global cache for table-level rows. +// Default: nullptr (disabled) +// Not supported in ROCKSDB_LITE mode! +func (opts *Options) SetRowCache(cache *Cache) { + C.rocksdb_options_set_row_cache(opts.c, cache.c) +} + +// SetSkipCheckingSstFileSizesOnDbOpen sets skip_checking_sst_file_sizes_on_db_open +// If true, then DB::Open() will not fetch and check sizes of all sst files. +// This may significantly speed up startup if there are many sst files, +// especially when using non-default Env with expensive GetFileSize(). +// We'll still check that all required sst files exist. +// If paranoid_checks is false, this option is ignored, and sst files are +// not checked at all. +// +// Default: false +func (opts *Options) SetSkipCheckingSstFileSizesOnDbOpen(value bool) { + C.rocksdb_options_set_skip_checking_sst_file_sizes_on_db_open(opts.c, boolToChar(value)) +} + +// SetStatsPersistPeriodSec sets the stats persist period in seconds. +// +// if not zero, dump rocksdb.stats to RocksDB every stats_persist_period_sec +// Default: 600 +func (opts *Options) SetStatsPersistPeriodSec(value int) { + C.rocksdb_options_set_stats_persist_period_sec(opts.c, C.uint(value)) +} + +// SetUnorderedWrite enables unordered write +// +// Setting unordered_write to true trades higher write throughput with +// relaxing the immutability guarantee of snapshots. This violates the +// repeatability one expects from ::Get from a snapshot, as well as +// ::MultiGet and Iterator's consistent-point-in-time view property. +// If the application cannot tolerate the relaxed guarantees, it can implement +// its own mechanisms to work around that and yet benefit from the higher +// throughput. Using TransactionDB with WRITE_PREPARED write policy and +// two_write_queues=true is one way to achieve immutable snapshots despite +// unordered_write. +// +// By default, i.e., when it is false, rocksdb does not advance the sequence +// number for new snapshots unless all the writes with lower sequence numbers +// are already finished. This provides the immutability that we except from +// snapshots. Moreover, since Iterator and MultiGet internally depend on +// snapshots, the snapshot immutability results into Iterator and MultiGet +// offering consistent-point-in-time view. If set to true, although +// Read-Your-Own-Write property is still provided, the snapshot immutability +// property is relaxed: the writes issued after the snapshot is obtained (with +// larger sequence numbers) will be still not visible to the reads from that +// snapshot, however, there still might be pending writes (with lower sequence +// number) that will change the state visible to the snapshot after they are +// landed to the memtable. +// +// Default: false +func (opts *Options) SetUnorderedWrite(value bool) { + C.rocksdb_options_set_unordered_write(opts.c, boolToChar(value)) +} diff --git a/options_write.go b/options_write.go index ad52263a..0c713821 100644 --- a/options_write.go +++ b/options_write.go @@ -64,18 +64,6 @@ func (opts *WriteOptions) SetLowPri(value bool) { C.rocksdb_writeoptions_set_low_pri(opts.c, boolToChar(value)) } -// SetMemtableInsertHintPerBatch specifies the value of "memtable_insert_hint_per_batch". -// If true, this writebatch will maintain the last insert positions of each -// memtable as hints in concurrent write. It can improve write performance -// in concurrent writes if keys in one writebatch are sequential. In -// non-concurrent writes (when concurrent_memtable_writes is false) this -// option will be ignored. -// -// Default: false -func (opts *WriteOptions) SetMemtableInsertHintPerBatch(value bool) { - C.rocksdb_writeoptions_set_memtable_insert_hint_per_batch(opts.c, boolToChar(value)) -} - // Destroy deallocates the WriteOptions object. func (opts *WriteOptions) Destroy() { C.rocksdb_writeoptions_destroy(opts.c) diff --git a/options_write_v6.go b/options_write_v6.go new file mode 100644 index 00000000..93e986c9 --- /dev/null +++ b/options_write_v6.go @@ -0,0 +1,17 @@ +// +build rocksdb_v6 + +package gorocksdb + +import "C" + +// SetMemtableInsertHintPerBatch specifies the value of "memtable_insert_hint_per_batch". +// If true, this writebatch will maintain the last insert positions of each +// memtable as hints in concurrent write. It can improve write performance +// in concurrent writes if keys in one writebatch are sequential. In +// non-concurrent writes (when concurrent_memtable_writes is false) this +// option will be ignored. +// +// Default: false +func (opts *WriteOptions) SetMemtableInsertHintPerBatch(value bool) { + C.rocksdb_writeoptions_set_memtable_insert_hint_per_batch(opts.c, boolToChar(value)) +} From 51a3fbacb6696e0324f268244448c4e817323481 Mon Sep 17 00:00:00 2001 From: Flier Lu Date: Mon, 22 Mar 2021 23:37:31 +0800 Subject: [PATCH 3/3] move DataBlockIndexType to v6 --- options_block_based_table.go | 8 -------- options_block_based_table_v6.go | 8 ++++++++ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/options_block_based_table.go b/options_block_based_table.go index 8c941433..80244132 100644 --- a/options_block_based_table.go +++ b/options_block_based_table.go @@ -18,14 +18,6 @@ const ( KTwoLevelIndexSearchIndexType = 2 ) -// DataBlockIndexType specifies the index type that will be used for the data block. -type DataBlockIndexType byte - -const ( - kDataBlockBinarySearch = 0 // traditional block type - kDataBlockBinaryAndHash = 1 // additional hash index -) - // BlockBasedTableOptions represents block-based table options. type BlockBasedTableOptions struct { c *C.rocksdb_block_based_table_options_t diff --git a/options_block_based_table_v6.go b/options_block_based_table_v6.go index 696b2dab..c2bb1fd7 100644 --- a/options_block_based_table_v6.go +++ b/options_block_based_table_v6.go @@ -4,6 +4,14 @@ package gorocksdb import "C" +// DataBlockIndexType specifies the index type that will be used for the data block. +type DataBlockIndexType byte + +const ( + KDataBlockBinarySearch = 0 // traditional block type + KDataBlockBinaryAndHash = 1 // additional hash index +) + // SetDataBlockIndexType sets the index type that will be used for the data block. func (opts *BlockBasedTableOptions) SetDataBlockIndexType(value DataBlockIndexType) { C.rocksdb_block_based_options_set_data_block_index_type(opts.c, C.int(value))