Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 19 additions & 19 deletions Sources/CodexBarCore/Vendored/CostUsage/CostUsageJsonl.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,24 @@ enum CostUsageJsonl {
try handle.seek(toOffset: UInt64(startOffset))
}

var buffer = Data()
buffer.reserveCapacity(64 * 1024)

var current = Data()
current.reserveCapacity(4 * 1024)
var lineBytes = 0
var truncated = false
var bytesRead: Int64 = 0

func appendSegment(_ segment: Data.SubSequence) {
guard !segment.isEmpty else { return }
lineBytes += segment.count
guard !truncated else { return }
if lineBytes > maxLineBytes || lineBytes > prefixBytes {
truncated = true
current.removeAll(keepingCapacity: true)
return
}
current.append(contentsOf: segment)
}

func flushLine() {
guard lineBytes > 0 else { return }
let line = Line(bytes: current, wasTruncated: truncated)
Expand All @@ -49,23 +58,14 @@ enum CostUsageJsonl {
}

bytesRead += Int64(chunk.count)
buffer.append(chunk)

while true {
guard let nl = buffer.firstIndex(of: 0x0A) else { break }
let linePart = buffer[..<nl]
buffer.removeSubrange(...nl)

lineBytes += linePart.count
if !truncated {
if lineBytes > maxLineBytes || lineBytes > prefixBytes {
truncated = true
current.removeAll(keepingCapacity: true)
} else {
current.append(contentsOf: linePart)
}
}
var segmentStart = chunk.startIndex
while let nl = chunk[segmentStart...].firstIndex(of: 0x0A) {
appendSegment(chunk[segmentStart..<nl])
flushLine()
segmentStart = chunk.index(after: nl)
}
if segmentStart < chunk.endIndex {
appendSegment(chunk[segmentStart..<chunk.endIndex])
}
}

Expand Down
204 changes: 204 additions & 0 deletions Tests/CodexBarTests/CostUsageJsonlPerformanceTests.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
import Foundation
import Testing
@testable import CodexBarCore

@Suite(.serialized)
struct CostUsageJsonlPerformanceTests {
@Test
func scannerBenchmarkBeatsFrontBufferBaseline() throws {
let root = FileManager.default.temporaryDirectory.appendingPathComponent(
"codexbar-cost-usage-bench-\(UUID().uuidString)",
isDirectory: true)
try FileManager.default.createDirectory(at: root, withIntermediateDirectories: true)
defer { try? FileManager.default.removeItem(at: root) }

let fileURL = root.appendingPathComponent("scanner-benchmark.jsonl", isDirectory: false)
let lineCount = 20000
let line = #"{"type":"assistant","message":{"usage":{"input_tokens":1,"output_tokens":2}}}"#
let fixture = makeBenchmarkFixture(line: line, lineCount: lineCount)
try fixture.write(to: fileURL)

let maxLineBytes = 8192
let prefixBytes = 8192

let currentSummary = try summarizeScan(
fileURL: fileURL,
maxLineBytes: maxLineBytes,
prefixBytes: prefixBytes,
scanner: CostUsageJsonl.scan)
let baselineSummary = try summarizeScan(
fileURL: fileURL,
maxLineBytes: maxLineBytes,
prefixBytes: prefixBytes,
scanner: scanWithFrontBufferBaseline)

#expect(currentSummary == baselineSummary)
#expect(currentSummary.lineCount == lineCount)
#expect(currentSummary.truncatedCount == 0)

// Warm up both code paths before timing.
_ = try summarizeScan(
fileURL: fileURL,
maxLineBytes: maxLineBytes,
prefixBytes: prefixBytes,
scanner: CostUsageJsonl.scan)
_ = try summarizeScan(
fileURL: fileURL,
maxLineBytes: maxLineBytes,
prefixBytes: prefixBytes,
scanner: scanWithFrontBufferBaseline)

let currentFastest = try fastestScanDurationNanoseconds(
runs: 3,
fileURL: fileURL,
maxLineBytes: maxLineBytes,
prefixBytes: prefixBytes,
scanner: CostUsageJsonl.scan)
let baselineFastest = try fastestScanDurationNanoseconds(
runs: 3,
fileURL: fileURL,
maxLineBytes: maxLineBytes,
prefixBytes: prefixBytes,
scanner: scanWithFrontBufferBaseline)

let speedup = Double(baselineFastest) / Double(currentFastest)
#expect(speedup >= 5.0)
}
}

private struct JsonlScanSummary: Equatable {
let lineCount: Int
let truncatedCount: Int
let payloadByteCount: Int
let endOffset: Int64
}

private typealias JsonlScanner = (
_ fileURL: URL,
_ offset: Int64,
_ maxLineBytes: Int,
_ prefixBytes: Int,
_ onLine: (CostUsageJsonl.Line) -> Void) throws -> Int64

private func makeBenchmarkFixture(line: String, lineCount: Int) -> Data {
let lineBytes = Data(line.utf8)
var data = Data()
data.reserveCapacity((lineBytes.count + 1) * lineCount)
for _ in 0..<lineCount {
data.append(lineBytes)
data.append(0x0A)
}
return data
}

private func summarizeScan(
fileURL: URL,
maxLineBytes: Int,
prefixBytes: Int,
scanner: JsonlScanner) throws -> JsonlScanSummary
{
var lineCount = 0
var truncatedCount = 0
var payloadByteCount = 0

let endOffset = try scanner(fileURL, 0, maxLineBytes, prefixBytes) { line in
lineCount += 1
payloadByteCount += line.bytes.count
if line.wasTruncated {
truncatedCount += 1
}
}

return JsonlScanSummary(
lineCount: lineCount,
truncatedCount: truncatedCount,
payloadByteCount: payloadByteCount,
endOffset: endOffset)
}

private func fastestScanDurationNanoseconds(
runs: Int,
fileURL: URL,
maxLineBytes: Int,
prefixBytes: Int,
scanner: JsonlScanner) throws -> UInt64
{
var fastest = UInt64.max
for _ in 0..<runs {
let startedAt = DispatchTime.now().uptimeNanoseconds
_ = try summarizeScan(
fileURL: fileURL,
maxLineBytes: maxLineBytes,
prefixBytes: prefixBytes,
scanner: scanner)
let elapsed = DispatchTime.now().uptimeNanoseconds - startedAt
fastest = min(fastest, elapsed)
}
return fastest
}

@discardableResult
private func scanWithFrontBufferBaseline(
fileURL: URL,
offset: Int64 = 0,
maxLineBytes: Int,
prefixBytes: Int,
onLine: (CostUsageJsonl.Line) -> Void) throws
-> Int64
{
let handle = try FileHandle(forReadingFrom: fileURL)
defer { try? handle.close() }

let startOffset = max(0, offset)
if startOffset > 0 {
try handle.seek(toOffset: UInt64(startOffset))
}

var buffer = Data()
buffer.reserveCapacity(64 * 1024)

var current = Data()
current.reserveCapacity(4 * 1024)
var lineBytes = 0
var truncated = false
var bytesRead: Int64 = 0

func flushLine() {
guard lineBytes > 0 else { return }
onLine(.init(bytes: current, wasTruncated: truncated))
current.removeAll(keepingCapacity: true)
lineBytes = 0
truncated = false
}

while true {
let chunk = try handle.read(upToCount: 256 * 1024) ?? Data()
if chunk.isEmpty {
flushLine()
break
}

bytesRead += Int64(chunk.count)
buffer.append(chunk)

while true {
guard let nl = buffer.firstIndex(of: 0x0A) else { break }
let linePart = buffer[..<nl]
buffer.removeSubrange(...nl)

lineBytes += linePart.count
if !truncated {
if lineBytes > maxLineBytes || lineBytes > prefixBytes {
truncated = true
current.removeAll(keepingCapacity: true)
} else {
current.append(contentsOf: linePart)
}
}

flushLine()
}
}

return startOffset + bytesRead
}
54 changes: 54 additions & 0 deletions Tests/CodexBarTests/CostUsageScannerTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,60 @@ struct CostUsageScannerTests {
#expect(report.data[0].outputTokens == 15)
#expect(report.data[0].totalTokens == 45)
}

@Test
func jsonlScannerHandlesLinesAcrossReadChunks() throws {
let env = try CostUsageTestEnvironment()
defer { env.cleanup() }

let fileURL = env.root.appendingPathComponent("large-lines.jsonl", isDirectory: false)
let largeLine = String(repeating: "x", count: 300_000)
let contents = "\(largeLine)\nsmall\n"
try contents.write(to: fileURL, atomically: true, encoding: .utf8)

var scanned: [(count: Int, truncated: Bool)] = []
let endOffset = try CostUsageJsonl.scan(
fileURL: fileURL,
maxLineBytes: 400_000,
prefixBytes: 400_000)
{ line in
scanned.append((line.bytes.count, line.wasTruncated))
}

#expect(endOffset == Int64(Data(contents.utf8).count))
#expect(scanned.count == 2)
#expect(scanned[0].count == 300_000)
#expect(scanned[0].truncated == false)
#expect(scanned[1].count == 5)
#expect(scanned[1].truncated == false)
}

@Test
func jsonlScannerMarksPrefixLimitedLinesAsTruncated() throws {
let env = try CostUsageTestEnvironment()
defer { env.cleanup() }

let fileURL = env.root.appendingPathComponent("truncated-lines.jsonl", isDirectory: false)
let shortLine = "ok"
let longLine = String(repeating: "a", count: 2000)
let contents = "\(shortLine)\n\(longLine)\n"
try contents.write(to: fileURL, atomically: true, encoding: .utf8)

var scanned: [CostUsageJsonl.Line] = []
_ = try CostUsageJsonl.scan(
fileURL: fileURL,
maxLineBytes: 10000,
prefixBytes: 64)
{ line in
scanned.append(line)
}

#expect(scanned.count == 2)
#expect(String(data: scanned[0].bytes, encoding: .utf8) == "ok")
#expect(scanned[0].wasTruncated == false)
#expect(scanned[1].bytes.isEmpty)
#expect(scanned[1].wasTruncated == true)
}
}

private struct CostUsageTestEnvironment {
Expand Down