Skip to content
Merged
2 changes: 1 addition & 1 deletion CacheAdvance.podspec
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Pod::Spec.new do |s|
Comment thread
dfed marked this conversation as resolved.
s.name = 'CacheAdvance'
s.version = '1.2.4'
s.version = '1.2.5'
s.license = 'Apache License, Version 2.0'
s.summary = 'A performant cache for logging systems. CacheAdvance persists log events 30x faster than SQLite.'
s.homepage = 'https://github.com/dfed/CacheAdvance'
Expand Down
35 changes: 29 additions & 6 deletions Sources/CacheAdvance/CacheAdvance.swift
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ public final class CacheAdvance<T: Codable> {
self.init(
fileURL: fileURL,
writer: try FileHandle(forWritingTo: fileURL),
reader: try CacheReader(
forReadingFrom: fileURL,
maximumBytes: maximumBytes),
reader: try CacheReader(forReadingFrom: fileURL),
header: try CacheHeaderHandle(
forReadingFrom: fileURL,
maximumBytes: maximumBytes,
Expand Down Expand Up @@ -172,10 +170,35 @@ public final class CacheAdvance<T: Codable> {
try header.checkFile()

var messages = [T]()
while let encodedMessage = try reader.nextEncodedMessage() {
messages.append(try decoder.decode(T.self, from: encodedMessage))
}
if reader.offsetInFileOfOldestMessage < reader.offsetInFileAtEndOfNewestMessage {

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When the cache is empty I assume that reader.offsetInFileOfOldestMessage == reader.offsetInFileAtEndOfNewestMessage? I wonder if it would be best to explicitly handle the case of these two values being equal, to make this code easier to reason about.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would also be nice to validate my assumption that reader.offsetInFileOfOldestMessage == reader.offsetInFileAtEndOfNewestMessage when the cache is empty.

@dfed dfed Nov 8, 2022

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When the cache is empty I assume that reader.offsetInFileOfOldestMessage == reader.offsetInFileAtEndOfNewestMessage

Per this comment block

// up until the current position of the writing handle – which is at the end of the newest persisted message. This algorithm implies that if
// the reading handle and the writing handle are at the same position in the file, then the file is empty. Therefore, when writing a message

when the reading handle and the writing handle point at the same position the file is empty. The reader starts out at header.offsetInFileOfOldestMessage, and the writer starts out at header.offsetInFileAtEndOfNewestMessage. And our reader.offsetInFileOfOldestMessage and reader.offsetInFileAtEndOfNewestMessage should always be set to the same values as those in the header, so your assumption is indeed true. I like the idea of explicitly handling this case.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

like this idea too. handle == as empty will make it more clear

// There is only one range: | `offsetInFileOfOldestMessage` -> `offsetInFileAtEndOfNewestMessage` |
let encodedMessages = try reader.encodedMessagesFromOffset(
reader.offsetInFileOfOldestMessage,
endOffset: reader.offsetInFileAtEndOfNewestMessage)
for encodedMessage in encodedMessages {
messages.append(try decoder.decode(T.self, from: encodedMessage))
}
} else if reader.offsetInFileOfOldestMessage == reader.offsetInFileAtEndOfNewestMessage {
// This is an empty cache.
return []

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

} else {
// In this case, the messages could be split to two ranges
// | First Range | (GAP: ignore) | Second Range |
Comment thread
bachand marked this conversation as resolved.

// This is second range: | `offsetInFileOfOldestMessage` -> EOF |
let olderMessages = try reader.encodedMessagesFromOffset(reader.offsetInFileOfOldestMessage)
for encodedMessage in olderMessages {
messages.append(try decoder.decode(T.self, from: encodedMessage))
}

// This is first range: | `expectedEndOfHeaderInFile` -> `offsetInFileAtEndOfNewestMessage` |
let newerMessages = try reader.encodedMessagesFromOffset(
FileHeader.expectedEndOfHeaderInFile,
endOffset: reader.offsetInFileAtEndOfNewestMessage)
for encodedMessage in newerMessages {
messages.append(try decoder.decode(T.self, from: encodedMessage))
}
}
// Now that we've read all messages, seek back to the oldest message.
try reader.seekToBeginningOfOldestMessage()

Expand Down
88 changes: 40 additions & 48 deletions Sources/CacheAdvance/CacheReader.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,9 @@ final class CacheReader {

/// Creates a new instance of the receiver.
///
/// - Parameters:
/// - file: The file URL indicating the desired location of the on-disk store. This file should already exist.
/// - maximumBytes: The maximum size of the cache, in bytes.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👌

init(forReadingFrom file: URL, maximumBytes: Bytes) throws {
/// - Parameter file: The file URL indicating the desired location of the on-disk store. This file should already exist.
init(forReadingFrom file: URL) throws {
reader = try FileHandle(forReadingFrom: file)
self.maximumBytes = maximumBytes
}

deinit {
Expand All @@ -44,51 +41,27 @@ final class CacheReader {
reader.offsetInFile
}

/// Returns the next encodable message, seeking to the beginning of the next message.
func nextEncodedMessage() throws -> Data? {
let startingOffset = offsetInFile

guard startingOffset != offsetInFileAtEndOfNewestMessage else {
// We're at the last message.
return nil
}

switch try nextEncodedMessageSpan() {
case let .span(messageLength):
// Check our assumptions before we try to read the message.
let endOfMessage = startingOffset + UInt64(MessageSpan.storageLength) + UInt64(messageLength)
let startingOffsetIsBeforeEndOfNewestMessageAndDoesNotExceedEndOfNewestMessage = startingOffset < offsetInFileAtEndOfNewestMessage && endOfMessage <= offsetInFileAtEndOfNewestMessage
let startingOffsetIsAfterEndOfNewestMessageAndDoesNotExceedEndOfFile = offsetInFileAtEndOfNewestMessage < startingOffset && endOfMessage <= maximumBytes
guard
startingOffsetIsBeforeEndOfNewestMessageAndDoesNotExceedEndOfNewestMessage
|| startingOffsetIsAfterEndOfNewestMessageAndDoesNotExceedEndOfFile
else {
// The offsetInFileAtEndOfNewestMessage is incorrect. This likely occured due to a crash when writing our header file.
throw CacheAdvanceError.fileCorrupted
}
Comment thread
dfed marked this conversation as resolved.

let message = try reader.readDataUp(toLength: Int(messageLength))
guard message.count > 0 else {
throw CacheAdvanceError.fileCorrupted
}

return message

case .emptyRead:
guard offsetInFileAtEndOfNewestMessage < startingOffset else {
// We started reading before the offset of the end of the newest message, therefore we expected a message to be read. We instead read an empty space, meaning that the file is corrupt.
throw CacheAdvanceError.fileCorrupted
/// Returns the encodable messages in a range
///
/// - Parameter startOffset: the offset from which to start reading
/// - Parameter endOffset: the offset at which to stop reading. If `nil`, the end offset will be the EOF
func encodedMessagesFromOffset(_ startOffset: UInt64, endOffset: UInt64? = nil) throws -> [Data] {
Comment thread
dfed marked this conversation as resolved.
var encodedMessages = [Data]()
try reader.seek(to: startOffset)
while let data = try nextEncodedMessage() {
encodedMessages.append(data)
if let endOffset = endOffset {
if offsetInFile == endOffset {
break
} else if offsetInFile > endOffset {
throw CacheAdvanceError.fileCorrupted
}
}

// We know the next message is at the end of the file header. Let's seek to it.
try reader.seek(to: FileHeader.expectedEndOfHeaderInFile)

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that the library is safer against unknown unknown errors without this line of code.


// We know there's a message to read now that we're at the start of the file.
return try nextEncodedMessage()

case .invalidFormat:
}
if let endOffset = endOffset, offsetInFile != endOffset {

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need this if anymore given the if within the while loop?

@jianjunwoo jianjunwoo Nov 9, 2022

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This make no difference compared to the code before this. However, this looks more readable to handle == and > separately. @dfed

                if offsetInFile >= endOffset {
                    break
                }

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh. This code is catching the offsetInFile < endOffset case. Maybe we could make that explicit here? Far from necessary, but it could make the intent more clear if I'm reading this right.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oooh, I misunderstood your question. The if within the while loop is need to check offsetInFile == endOffset to stop at the end.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh. This code is catching the offsetInFile < endOffset case. Maybe we could make that explicit here? Far from necessary, but it could make the intent more clear if I'm reading this right.

I agree it would be nice to make this more clear.

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll take this in a follow-up PR

throw CacheAdvanceError.fileCorrupted
}
return encodedMessages
}

/// Seeks to the beginning of the oldest message in the file.
Expand All @@ -114,6 +87,26 @@ final class CacheReader {

// MARK: Private

/// Returns the next encodable message, seeking to the beginning of the next message.
private func nextEncodedMessage() throws -> Data? {

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mark this func as private since it is not used by other files any more.

switch try nextEncodedMessageSpan() {
case let .span(messageLength):
let message = try reader.readDataUp(toLength: Int(messageLength))
guard message.count > 0 else {
throw CacheAdvanceError.fileCorrupted
}

return message

case .emptyRead:
// An empty read means we hit the EOF. It is the responsibility of the calling code to validate this assumption.
return nil

case .invalidFormat:
throw CacheAdvanceError.fileCorrupted
}
}

/// Returns the next encoded message span, seeking to the end the span.
private func nextEncodedMessageSpan() throws -> NextMessageSpan {
let messageSizeData = try reader.readDataUp(toLength: MessageSpan.storageLength)
Expand All @@ -132,7 +125,6 @@ final class CacheReader {
}

private let reader: FileHandle
private let maximumBytes: Bytes

}

Expand Down
11 changes: 4 additions & 7 deletions Tests/CacheAdvanceTests/CacheAdvanceTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS"BASIS,
Expand Down Expand Up @@ -110,8 +110,7 @@ final class CacheAdvanceTests: XCTestCase {
fileURL: testFileLocation,
writer: try FileHandle(forWritingTo: testFileLocation),
reader: try CacheReader(
forReadingFrom: testFileLocation,
maximumBytes: maximumBytes),
forReadingFrom: testFileLocation),
header: header,
decoder: JSONDecoder(),
encoder: JSONEncoder())
Expand Down Expand Up @@ -147,8 +146,7 @@ final class CacheAdvanceTests: XCTestCase {
fileURL: testFileLocation,
writer: try FileHandle(forWritingTo: testFileLocation),
reader: try CacheReader(
forReadingFrom: testFileLocation,
maximumBytes: maximumBytes),
forReadingFrom: testFileLocation),
header: header,
decoder: JSONDecoder(),
encoder: JSONEncoder())
Expand Down Expand Up @@ -184,8 +182,7 @@ final class CacheAdvanceTests: XCTestCase {
fileURL: testFileLocation,
writer: try FileHandle(forWritingTo: testFileLocation),
reader: try CacheReader(
forReadingFrom: testFileLocation,
maximumBytes: maximumBytes),
forReadingFrom: testFileLocation),
header: header,
decoder: JSONDecoder(),
encoder: JSONEncoder())
Expand Down