Skip to content

Commit cb232a7

Browse files
committed
db_stress: Rebuild expected state from DB when trace is truncated in blackbox crash tests
Summary: In blackbox crash tests, db_stress can be killed at any point — including while it is actively appending records to the trace file. This leaves the recovered DB in a consistent state (thanks to WAL recovery), but the trace file shorter than expected: fewer operations were recorded than the DB's recovered sequence number implies. Previously, when FinishInitDb detected this situation it called FileExpectedStateManager::Restore() which ended up returning Status::Corruption("Trace ended before replaying all expected write ops"). db_stress treated that as a hard failure and called exit(1), causing spurious test failures that had nothing to do with actual data corruption. This PR fixes the two-part problem: 1. expected_state.cc – FileExpectedStateManager::Restore(): When the trace replay finishes (either because Prepare() signals the trace is already at EOF, or because Next() exhausts all records) but the handler has not replayed all expected write operations, return Status::Incomplete() instead of Status::Corruption(). Incomplete means "the trace ran out early, which is expected in a blackbox crash"; Corruption is reserved for actual data integrity problems. 2. db_stress_test_base.cc – StressTest::FinishInitDb() + new RebuildExpectedStateFromDb(): When Restore() returns Incomplete, fall back to scanning the recovered DB directly and rebuilding the expected state from its actual contents (the same approach used when there is no history at all). If the rebuild scan fails, or if Restore() returns any other non-OK status, still exit(1). The new RebuildExpectedStateFromDb() method iterates every column family, calls ClearColumnFamily() + SyncPut() for each key found, and validates wide column consistency and key-range sanity along the way. Test Plan: Covered by existing blackbox crash test modes (blackbox, cf_consistency, etc.) which can now survive a truncated trace without false failures. Reviewers: Subscribers: Tasks: Tags:
1 parent 3efe946 commit cb232a7

3 files changed

Lines changed: 85 additions & 9 deletions

File tree

db_stress_tool/db_stress_test_base.cc

Lines changed: 70 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -466,15 +466,24 @@ void StressTest::FinishInitDb(SharedState* shared) {
466466
}
467467

468468
if (shared->HasHistory()) {
469-
// The way it works right now is, if there's any history, that means the
470-
// previous run mutating the DB had all its operations traced, in which case
471-
// we should always be able to `Restore()` the expected values to match the
472-
// `db_`'s current seqno.
469+
// Normally startup replays trace records to catch the expected state up to
470+
// `db_`'s recovered seqno. Blackbox crash tests can kill db_stress while
471+
// the trace file is being appended, so treat a truncated replay suffix as a
472+
// signal to rebuild the latest expected state directly from the DB.
473473
Status s = shared->Restore(db_);
474474
if (!s.ok()) {
475-
fprintf(stderr, "Error restoring historical expected values: %s\n",
476-
s.ToString().c_str());
477-
exit(1);
475+
if (s.IsIncomplete()) {
476+
fprintf(stdout,
477+
"Historical expected-state restore was incomplete during "
478+
"crash recovery; rebuilding from recovered DB: %s\n",
479+
s.ToString().c_str());
480+
s = RebuildExpectedStateFromDb(shared);
481+
}
482+
if (!s.ok()) {
483+
fprintf(stderr, "Error restoring historical expected values: %s\n",
484+
s.ToString().c_str());
485+
exit(1);
486+
}
478487
}
479488
}
480489
if (FLAGS_use_txn && !FLAGS_use_optimistic_txn) {
@@ -499,6 +508,60 @@ void StressTest::FinishInitDb(SharedState* shared) {
499508
}
500509
}
501510

511+
Status StressTest::RebuildExpectedStateFromDb(SharedState* shared) {
512+
assert(shared);
513+
514+
ReadOptions read_opts;
515+
read_opts.total_order_seek = true;
516+
517+
for (size_t cf_idx = 0; cf_idx < column_families_.size(); ++cf_idx) {
518+
const int cf = static_cast<int>(cf_idx);
519+
ColumnFamilyHandle* const cfh = column_families_[cf_idx];
520+
assert(cfh);
521+
522+
shared->LockColumnFamily(cf);
523+
shared->ClearColumnFamily(cf);
524+
525+
std::unique_ptr<Iterator> iter(db_->NewIterator(read_opts, cfh));
526+
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
527+
if (!VerifyWideColumns(iter->value(), iter->columns())) {
528+
Status s = Status::Corruption(
529+
"Wide columns inconsistent while rebuilding expected state",
530+
cfh->GetName());
531+
shared->UnlockColumnFamily(cf);
532+
return s;
533+
}
534+
535+
uint64_t key_id = 0;
536+
if (!GetIntVal(iter->key().ToString(), &key_id)) {
537+
Status s = Status::Corruption(
538+
"Unable to parse key while rebuilding expected state",
539+
iter->key().ToString(/* hex */ true));
540+
shared->UnlockColumnFamily(cf);
541+
return s;
542+
}
543+
if (key_id >= static_cast<uint64_t>(shared->GetMaxKey())) {
544+
Status s = Status::Corruption(
545+
"Out-of-range key while rebuilding expected state",
546+
std::to_string(key_id));
547+
shared->UnlockColumnFamily(cf);
548+
return s;
549+
}
550+
551+
shared->SyncPut(cf, static_cast<int64_t>(key_id),
552+
GetValueBase(iter->value()));
553+
}
554+
555+
Status s = iter->status();
556+
shared->UnlockColumnFamily(cf);
557+
if (!s.ok()) {
558+
return s;
559+
}
560+
}
561+
562+
return Status::OK();
563+
}
564+
502565
void StressTest::TrackExpectedState(SharedState* shared) {
503566
// When data loss is simulated, recovery from potential data loss is a prefix
504567
// recovery that requires tracing

db_stress_tool/db_stress_test_base.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,9 @@ class StressTest {
144144
void PreloadDbAndReopenAsReadOnly(int64_t number_of_keys,
145145
SharedState* shared);
146146

147+
// Rebuilds the latest expected state from the recovered DB contents.
148+
Status RebuildExpectedStateFromDb(SharedState* shared);
149+
147150
Status SetOptions(ThreadState* thread);
148151

149152
// For transactionsDB, there can be txns prepared but not yet committeed

db_stress_tool/expected_state.cc

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -770,10 +770,15 @@ Status FileExpectedStateManager::Restore(DB* db) {
770770
std::move(trace_reader), &replayer);
771771
}
772772

773+
bool reached_trace_end = false;
773774
if (s.ok()) {
774775
s = replayer->Prepare();
776+
if (s.IsIncomplete()) {
777+
reached_trace_end = true;
778+
s = Status::OK();
779+
}
775780
}
776-
for (; s.ok();) {
781+
for (; s.ok() && !reached_trace_end;) {
777782
std::unique_ptr<TraceRecord> record;
778783
s = replayer->Next(&record);
779784
if (!s.ok()) {
@@ -789,13 +794,18 @@ Status FileExpectedStateManager::Restore(DB* db) {
789794
// trace records.
790795
s = Status::OK();
791796
}
797+
reached_trace_end = true;
792798
break;
793799
}
794800
std::unique_ptr<TraceRecordResult> res;
795801
s = record->Accept(handler.get(), &res);
796802
}
797803
if (s.ok() && !handler->IsDone()) {
798-
s = Status::Corruption(
804+
// Blackbox crash tests can terminate db_stress while it is appending the
805+
// trace, leaving a valid recovered DB but a shorter trace suffix. Treat
806+
// that as an incomplete restore so startup can rebuild expected state
807+
// from the live DB instead of reporting a false corruption.
808+
s = Status::Incomplete(
799809
"Trace ended before replaying all expected write ops",
800810
std::to_string(handler->NumWriteOps()) + " < " +
801811
std::to_string(seqno - saved_seqno_));

0 commit comments

Comments
 (0)