Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CacheAdvance.podspec
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Pod::Spec.new do |s|
s.name = 'CacheAdvance'
s.version = '1.2.3'
s.version = '1.2.4'
s.license = 'Apache License, Version 2.0'
s.summary = 'A performant cache for logging systems. CacheAdvance persists log events 30x faster than SQLite.'
s.homepage = 'https://github.com/dfed/CacheAdvance'
Expand Down
4 changes: 3 additions & 1 deletion Sources/CacheAdvance/CacheAdvance.swift
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ public final class CacheAdvance<T: Codable> {
self.init(
fileURL: fileURL,
writer: try FileHandle(forWritingTo: fileURL),
reader: try CacheReader(forReadingFrom: fileURL),
reader: try CacheReader(
forReadingFrom: fileURL,
maximumBytes: maximumBytes),
header: try CacheHeaderHandle(
forReadingFrom: fileURL,
maximumBytes: maximumBytes,
Expand Down
29 changes: 23 additions & 6 deletions Sources/CacheAdvance/CacheReader.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,12 @@ final class CacheReader {

/// Creates a new instance of the receiver.
///
/// - Parameter file: The file URL indicating the desired location of the on-disk store. This file should already exist.
init(forReadingFrom file: URL) throws {
/// - Parameters:
/// - file: The file URL indicating the desired location of the on-disk store. This file should already exist.
/// - maximumBytes: The maximum size of the cache, in bytes.
init(forReadingFrom file: URL, maximumBytes: Bytes) throws {
reader = try FileHandle(forReadingFrom: file)
self.maximumBytes = maximumBytes
}

deinit {
Expand All @@ -42,7 +45,7 @@ final class CacheReader {
}

/// Returns the next encodable message, seeking to the beginning of the next message.
func nextEncodedMessage(previousReadWasEmpty: Bool = false) throws -> Data? {

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The case we previously caught with previousReadWasEmpty is now caught by our new checks below!

func nextEncodedMessage() throws -> Data? {
let startingOffset = offsetInFile

guard startingOffset != offsetInFileAtEndOfNewestMessage else {
Expand All @@ -52,6 +55,18 @@ final class CacheReader {

switch try nextEncodedMessageSpan() {
case let .span(messageLength):
// Check our assumptions before we try to read the message.
let endOfMessage = startingOffset + UInt64(MessageSpan.storageLength) + UInt64(messageLength)
let startingOffsetIsBeforeEndOfNewestMessageAndDoesNotExceedEndOfNewestMessage = startingOffset < offsetInFileAtEndOfNewestMessage && endOfMessage <= offsetInFileAtEndOfNewestMessage
let startingOffsetIsAfterEndOfNewestMessageAndDoesNotExceedEndOfFile = offsetInFileAtEndOfNewestMessage < startingOffset && endOfMessage <= maximumBytes
guard
startingOffsetIsBeforeEndOfNewestMessageAndDoesNotExceedEndOfNewestMessage
|| startingOffsetIsAfterEndOfNewestMessageAndDoesNotExceedEndOfFile
else {
// The offsetInFileAtEndOfNewestMessage is incorrect. This likely occured due to a crash when writing our header file.
throw CacheAdvanceError.fileCorrupted
Comment thread
bachand marked this conversation as resolved.
}

let message = try reader.readDataUp(toLength: Int(messageLength))
guard message.count > 0 else {
throw CacheAdvanceError.fileCorrupted
Expand All @@ -60,15 +75,16 @@ final class CacheReader {
return message

case .emptyRead:
guard !previousReadWasEmpty else {
// If the previous read was also empty, then the file has been corrupted.
guard offsetInFileAtEndOfNewestMessage < startingOffset else {
// We started reading before the offset of the end of the newest message, therefore we expected a message to be read. We instead read an empty space, meaning that the file is corrupt.
throw CacheAdvanceError.fileCorrupted
}

// We know the next message is at the end of the file header. Let's seek to it.
try reader.seek(to: FileHeader.expectedEndOfHeaderInFile)

// We know there's a message to read now that we're at the start of the file.
return try nextEncodedMessage(previousReadWasEmpty: true)
return try nextEncodedMessage()

case .invalidFormat:
throw CacheAdvanceError.fileCorrupted
Expand Down Expand Up @@ -116,6 +132,7 @@ final class CacheReader {
}

private let reader: FileHandle
private let maximumBytes: Bytes

}

Expand Down
2 changes: 1 addition & 1 deletion Sources/CacheAdvance/FileHeader.swift
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ struct FileHeader {
static let version: UInt8 = 1

/// Calculates the offset in the file where the header should end.
static var expectedEndOfHeaderInFile = Field(rawValue: Field.allCases.endIndex)!.expectedEndOfFieldInFile
static let expectedEndOfHeaderInFile = Field(rawValue: Field.allCases.endIndex)!.expectedEndOfFieldInFile

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change is unrelated to the overall PR but I saw it and couldn't unsee it.


func data(for field: Field) -> Data {
switch field {
Expand Down
123 changes: 103 additions & 20 deletions Tests/CacheAdvanceTests/CacheAdvanceTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -96,31 +96,114 @@ final class CacheAdvanceTests: XCTestCase {
XCTAssertEqual(messages, [])
}

func test_messages_throwsFileCorruptedWhenOffsetInFileAtEndOfNewsetMessageOutOfSync() throws {

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test was passing before I deleted it in 4d3be01, but it executed the same code path as test_messages_throwsFileCorruptedWhenOffsetInFileAtEndOfNewestMessageIsBeyondEndOfNewestMessageButBeforeEndOfFile, and I liked the new test better since it didn't rely on random large numbers.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I'm understanding is this deleted test is basically a special case of test_messages_throwsFileCorruptedWhenOffsetInFileAtEndOfNewestMessageIsBeyondEndOfNewestMessageButBeforeEndOfFile where we had no messages. Is that correct?

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct!

let randomHighValue: UInt64 = 10_1000
func test_messages_whenOffsetInFileAtEndOfNewestMessageIsBeyondEndOfNewestMessageButBeforeEndOfFile_throwsFileCorrupted() throws {
let message: TestableMessage = "This is a test"
let requiredByteCount = try self.requiredByteCount(for: [message])
let maximumBytes = requiredByteCount + 2
Comment thread
bachand marked this conversation as resolved.
let header = try CacheHeaderHandle(
forReadingFrom: testFileLocation,
maximumBytes: randomHighValue,
maximumBytes: maximumBytes,
overwritesOldMessages: true)
let cache = CacheAdvance<TestableMessage>(
fileURL: testFileLocation,
writer: try FileHandle(forWritingTo: testFileLocation),
reader: try CacheReader(forReadingFrom: testFileLocation),
header: try CacheHeaderHandle(
forReadingFrom: testFileLocation,
maximumBytes: header.maximumBytes,
overwritesOldMessages: header.overwritesOldMessages),
decoder: JSONDecoder(),
encoder: JSONEncoder())

// Make sure the header data is persisted before we read it as part of the `messages()` call below.
try header.synchronizeHeaderData()
// Our file is empty. Make the file corrupted by setting the offset at end of newest message to be further in the file.
// This should never happen, but past versions of this repo could lead to a file having this kind of inconsistency if a crash occurred at the wrong time.

func makeCache() throws -> CacheAdvance<TestableMessage> {
return CacheAdvance<TestableMessage>(
fileURL: testFileLocation,
writer: try FileHandle(forWritingTo: testFileLocation),
reader: try CacheReader(
forReadingFrom: testFileLocation,
maximumBytes: maximumBytes),
header: header,
decoder: JSONDecoder(),
encoder: JSONEncoder())
}
let writingCache = try makeCache()
try writingCache.append(message: message)

// Make the file corrupted by setting the offset at end of newest message to be further in the file.
// This could happen if a crash occurred during a write of `header.offsetInFileAtEndOfNewestMessage` on a big-endian device.
Comment thread
bachand marked this conversation as resolved.
// Big-endian devices write the most significant digits first, meaning that if we were offsetInFileAtEndOfNewestMessage from 00001010 to 00010000, it would be possible to crash with the following bytes written to disk: 00011010.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Love it.

// The 00011010 value is a larger value what we intended to write, which would lead to file corruption.
try header.updateOffsetInFileAtEndOfNewestMessage(
to: requiredByteCount + 1)

// Create a new cache instance that uses the corrupted data persisted to disk
let corruptedReadingCache = try makeCache()

XCTAssertThrowsError(try corruptedReadingCache.messages()) {
Comment thread
bachand marked this conversation as resolved.
XCTAssertEqual($0 as? CacheAdvanceError, CacheAdvanceError.fileCorrupted)
}
}

func test_messages_whenOffsetInFileAtEndOfNewestMessageIsBeyondEndOfFile_throwsFileCorrupted() throws {
let message: TestableMessage = "This is a test"
let maximumBytes = try requiredByteCount(for: [message])
let header = try CacheHeaderHandle(
forReadingFrom: testFileLocation,
maximumBytes: maximumBytes,
overwritesOldMessages: true)

func makeCache() throws -> CacheAdvance<TestableMessage> {
return CacheAdvance<TestableMessage>(
fileURL: testFileLocation,
writer: try FileHandle(forWritingTo: testFileLocation),
reader: try CacheReader(
forReadingFrom: testFileLocation,
maximumBytes: maximumBytes),
header: header,
decoder: JSONDecoder(),
encoder: JSONEncoder())
}
let writingCache = try makeCache()
try writingCache.append(message: message)

// Make the file corrupted by setting the offset at end of newest message to be further in the file.
// This could happen if a crash occurred during a write of `header.offsetInFileAtEndOfNewestMessage` on a big-endian device.
// Big-endian devices write the most significant digits first, meaning that if we were offsetInFileAtEndOfNewestMessage from 00001010 to 00010000, it would be possible to crash with the following bytes written to disk: 00011010.
// The 00011010 value is a larger value what we intended to write, which would lead to file corruption.
try header.updateOffsetInFileAtEndOfNewestMessage(
to: header.offsetInFileAtEndOfNewestMessage + 1)

// Create a new cache instance that uses the corrupted data persisted to disk
let corruptedReadingCache = try makeCache()

XCTAssertThrowsError(try corruptedReadingCache.messages()) {
XCTAssertEqual($0 as? CacheAdvanceError, CacheAdvanceError.fileCorrupted)
}
}

func test_messages_whenOffsetInFileAtEndOfNewestMessageIsBeforeEndOfNewestMessage_throwsFileCorrupted() throws {
let message: TestableMessage = "This is a test"
let maximumBytes = try requiredByteCount(for: [message])
let header = try CacheHeaderHandle(
forReadingFrom: testFileLocation,
maximumBytes: maximumBytes,
overwritesOldMessages: true)

func makeCache() throws -> CacheAdvance<TestableMessage> {
return CacheAdvance<TestableMessage>(
fileURL: testFileLocation,
writer: try FileHandle(forWritingTo: testFileLocation),
reader: try CacheReader(
forReadingFrom: testFileLocation,
maximumBytes: maximumBytes),
header: header,
decoder: JSONDecoder(),
encoder: JSONEncoder())
}
let writingCache = try makeCache()
try writingCache.append(message: message)

// Make the file corrupted by setting the offset at end of newest message to be earlier in the file.
// This could happen if a crash occurred during a write of `header.offsetInFileAtEndOfNewestMessage` on a little-endian device.
// Little-endian devices write the lest significant digits first, meaning that if we were offsetInFileAtEndOfNewestMessage from 01010000 to 00001000, it would be possible to crash with the following bytes written to disk: 00010000.
// The 00010000 value is a smaller value what we intended to write, which would lead to file corruption.
try header.updateOffsetInFileAtEndOfNewestMessage(
to: FileHeader.expectedEndOfHeaderInFile + 1)
to: header.offsetInFileAtEndOfNewestMessage - 1)

// Create a new cache instance that uses the corrupted data persisted to disk
let corruptedReadingCache = try makeCache()

XCTAssertThrowsError(try cache.messages()) {
XCTAssertThrowsError(try corruptedReadingCache.messages()) {
Comment thread
bachand marked this conversation as resolved.
XCTAssertEqual($0 as? CacheAdvanceError, CacheAdvanceError.fileCorrupted)
}
}
Expand Down