From 0fe4ba2a85c2ee77ebbd37ae45753020fcaee628 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Tue, 21 Jan 2025 14:20:54 -0500 Subject: [PATCH] validation: Use dirty entry count in flush warnings and disk space checks Changes flush warnings to use the actual number of dirty entries being written rather than total cache size or memory usage: * Moves warning from `FlushStateToDisk` to `CCoinsViewDB::BatchWrite` so it applies to both regular flushes and `AssumeUTXO` snapshot writes * Changes threshold from `WARN_FLUSH_COINS_SIZE` (1 GiB) to `WARN_FLUSH_COINS_COUNT` (10M entries), approximately equivalent - this also helps with the confusion caused by UTXO size difference on-disk vs in-memory * Moves benchmark logging to `BatchWrite` where the actual disk I/O occurs to make sure AssumeUTXO also warns * Uses dirty count for disk space check (48 bytes per entry estimate) * Removes redundant `changed` counter since `dirty_count` is now tracked This ensures users are warned appropriately even when only a fraction of the cache is dirty, and provides accurate warnings during `AssumeUTXO` loads. Co-authored-by: l0rinc --- src/coins.h | 2 ++ src/txdb.cpp | 14 ++++++++++---- src/validation.cpp | 12 +++--------- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/coins.h b/src/coins.h index 9d12354f8f2..1844d920235 100644 --- a/src/coins.h +++ b/src/coins.h @@ -300,6 +300,8 @@ struct CoinsViewCacheCursor } inline bool WillErase(CoinsCachePair& current) const noexcept { return m_will_erase || current.second.coin.IsSpent(); } + size_t GetDirtyCount() const noexcept { return m_dirty_count; } + size_t GetTotalCount() const noexcept { return m_map.size(); } private: size_t& m_usage; size_t& m_dirty_count; diff --git a/src/txdb.cpp b/src/txdb.cpp index bb6ee2eb524..7a6750f4083 100644 --- a/src/txdb.cpp +++ b/src/txdb.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -25,6 +26,9 @@ static constexpr uint8_t DB_HEAD_BLOCKS{'H'}; // Keys used in previous version that might still be found in the DB: static constexpr uint8_t DB_COINS{'c'}; +// Threshold for warning when writing this many dirty cache entries to disk. +static constexpr size_t WARN_FLUSH_COINS_COUNT{10'000'000}; + bool CCoinsViewDB::NeedsUpgrade() { std::unique_ptr cursor{m_db->NewIterator()}; @@ -93,7 +97,7 @@ std::vector CCoinsViewDB::GetHeadBlocks() const { bool CCoinsViewDB::BatchWrite(CoinsViewCacheCursor& cursor, const uint256 &hashBlock) { CDBBatch batch(*m_db); size_t count = 0; - size_t changed = 0; + const size_t dirty_count{cursor.GetDirtyCount()}; assert(!hashBlock.IsNull()); uint256 old_tip = GetBestBlock(); @@ -109,6 +113,10 @@ bool CCoinsViewDB::BatchWrite(CoinsViewCacheCursor& cursor, const uint256 &hashB } } + if (dirty_count > WARN_FLUSH_COINS_COUNT) LogWarning("Flushing large (%d entries) UTXO set to disk, it may take several minutes", dirty_count); + LOG_TIME_MILLIS_WITH_CATEGORY(strprintf("write coins cache to disk (%d out of %d cached coins)", + dirty_count, cursor.GetTotalCount()), BCLog::BENCH); + // In the first batch, mark the database as being in the middle of a // transition from old_tip to hashBlock. // A vector is used for future extensibility, as we may want to support @@ -124,8 +132,6 @@ bool CCoinsViewDB::BatchWrite(CoinsViewCacheCursor& cursor, const uint256 &hashB } else { batch.Write(entry, it->second.coin); } - - changed++; } count++; it = cursor.NextAndMaybeErase(*it); @@ -150,7 +156,7 @@ bool CCoinsViewDB::BatchWrite(CoinsViewCacheCursor& cursor, const uint256 &hashB LogDebug(BCLog::COINDB, "Writing final batch of %.2f MiB\n", batch.ApproximateSize() * (1.0 / 1048576.0)); bool ret = m_db->WriteBatch(batch); - LogDebug(BCLog::COINDB, "Committed %u changed transaction outputs (out of %u) to coin database...\n", (unsigned int)changed, (unsigned int)count); + LogDebug(BCLog::COINDB, "Committed %u changed transaction outputs (out of %u) to coin database...", (unsigned int)dirty_count, (unsigned int)count); return ret; } diff --git a/src/validation.cpp b/src/validation.cpp index 7f97f99b479..5345ce7d462 100644 --- a/src/validation.cpp +++ b/src/validation.cpp @@ -88,8 +88,6 @@ using node::CBlockIndexHeightOnlyComparator; using node::CBlockIndexWorkComparator; using node::SnapshotMetadata; -/** Size threshold for warning about slow UTXO set flush to disk. */ -static constexpr size_t WARN_FLUSH_COINS_SIZE = 1 << 30; // 1 GiB /** Time window to wait between writing blocks/block index and chainstate to disk. * Randomize writing time inside the window to prevent a situation where the * network over time settles into a few cohorts of synchronized writers. @@ -2772,8 +2770,8 @@ bool Chainstate::FlushStateToDisk( std::set setFilesToPrune; bool full_flush_completed = false; - const size_t coins_count = CoinsTip().GetCacheSize(); - const size_t coins_mem_usage = CoinsTip().DynamicMemoryUsage(); + [[maybe_unused]] const size_t coins_count{CoinsTip().GetCacheSize()}; + [[maybe_unused]] const size_t coins_mem_usage{CoinsTip().DynamicMemoryUsage()}; try { { @@ -2867,16 +2865,12 @@ bool Chainstate::FlushStateToDisk( } if (!CoinsTip().GetBestBlock().IsNull()) { - if (coins_mem_usage >= WARN_FLUSH_COINS_SIZE) LogWarning("Flushing large (%d GiB) UTXO set to disk, it may take several minutes", coins_mem_usage >> 30); - LOG_TIME_MILLIS_WITH_CATEGORY(strprintf("write coins cache to disk (%d coins, %.2fKiB)", - coins_count, coins_mem_usage >> 10), BCLog::BENCH); - // Typical Coin structures on disk are around 48 bytes in size. // Pushing a new one to the database can cause it to be written // twice (once in the log, and once in the tables). This is already // an overestimation, as most will delete an existing entry or // overwrite one. Still, use a conservative safety factor of 2. - if (!CheckDiskSpace(m_chainman.m_options.datadir, 48 * 2 * 2 * CoinsTip().GetCacheSize())) { + if (!CheckDiskSpace(m_chainman.m_options.datadir, 48 * 2 * 2 * CoinsTip().GetDirtyCount())) { return FatalError(m_chainman.GetNotifications(), state, _("Disk space is too low!")); } // Flush the chainstate (which may refer to block index entries).