From 858e86262354fc2862006bf9da8120c7ce49d960 Mon Sep 17 00:00:00 2001
From: Egor Merkushev <gorkaedeep@gmail.com>
Date: Sat, 1 Nov 2025 03:55:06 +0300
Subject: [PATCH] Benchmarks: improve timing harness

---
 AGENTS_DOCS/INPROGRESS/ISSUES.md              |  4 +-
 AGENTS_DOCS/INPROGRESS/Summary_of_Work.md     |  5 +++
 .../INPROGRESS/issue-macos-iokit-import.md    |  4 ++
 .../markdown/00_SpecificationKit_TODO.md      |  1 +
 .../tasks/SpecificationKit_v3.0.0_Progress.md |  1 +
 .../Providers/ContextValue.swift              |  3 ++
 .../BenchmarkStorageTests.swift               | 44 +++++++++++++++++++
 .../BenchmarkTimer.swift                      | 20 +++++++++
 .../BenchmarkTimerTests.swift                 | 24 ++++++++++
 .../BenchmarkValidation.swift                 | 13 +++---
 .../PerformanceBenchmarks.swift               | 25 ++++++++---
 11 files changed, 128 insertions(+), 16 deletions(-)
 create mode 100644 Tests/SpecificationKitBenchmarks/BenchmarkStorageTests.swift
 create mode 100644 Tests/SpecificationKitBenchmarks/BenchmarkTimer.swift
 create mode 100644 Tests/SpecificationKitBenchmarks/BenchmarkTimerTests.swift

diff --git a/AGENTS_DOCS/INPROGRESS/ISSUES.md b/AGENTS_DOCS/INPROGRESS/ISSUES.md
index 8716429..c8f51ba 100644
--- a/AGENTS_DOCS/INPROGRESS/ISSUES.md
+++ b/AGENTS_DOCS/INPROGRESS/ISSUES.md
@@ -1,5 +1,5 @@
 ## Additional Issues
 
-- **Performance Timing Issue**: In `Tests/SpecificationKitBenchmarks/PerformanceBenchmarks.swift:89`, the execution time calculation includes the overhead of the measure block itself, not just the timed operations. `startTime` should be captured inside the measure closure, or the timing logic should be moved outside the measure block to avoid measuring XCTest's measurement infrastructure overhead.
+- [x] **Performance Timing Issue** *(resolved 2025-11-19)*: Updated `PerformanceBenchmarks` to use a dedicated `BenchmarkTimer` helper so the reported execution time reflects only the looped work rather than XCTest's measurement overhead.
 
-- **Outdated Availability Check**: In `Tests/SpecificationKitBenchmarks/BenchmarkValidation.swift:247`, the availability check for macOS 10.12, iOS 10.0, tvOS 10.0, watchOS 3.0 is outdated given that the `Package.swift` likely specifies higher minimum platform versions. These OS versions were released in 2016 and are no longer supported. Consider removing this availability check or updating it to match the package's minimum supported platforms.
\ No newline at end of file
+- [x] **Outdated Availability Check** *(resolved 2025-11-19)*: Simplified the fallback path in `BenchmarkValidation` to rely on `FileManager.temporaryDirectory`, aligning the implementation with the package's supported OS versions.
\ No newline at end of file
diff --git a/AGENTS_DOCS/INPROGRESS/Summary_of_Work.md b/AGENTS_DOCS/INPROGRESS/Summary_of_Work.md
index 9ca25ed..7e3ba90 100644
--- a/AGENTS_DOCS/INPROGRESS/Summary_of_Work.md
+++ b/AGENTS_DOCS/INPROGRESS/Summary_of_Work.md
@@ -10,6 +10,11 @@
 - Refreshed `next_tasks.md` and `blocked.md` to reflect actionable follow-ups after the archive.
 - Updated roadmap trackers (`AGENTS_DOCS/markdown/00_SpecificationKit_TODO.md`, `AGENTS_DOCS/markdown/3.0.0/tasks/SpecificationKit_v3.0.0_Progress.md`) to reference the new archive folder.
 
+## 2025-11-19 Updates (Commit TBD)
+- Implemented a reusable `BenchmarkTimer` to measure average execution time without including XCTest harness overhead and refactored the key performance tests to adopt it.
+- Added conditional `IOKit` imports to the benchmark validation utilities so macOS builds resolve device APIs, while CoreData-backed providers are now guarded behind `canImport(CoreData)` to keep Linux builds compiling.
+- Simplified `BenchmarkStorage`'s fallback path to rely on `FileManager.temporaryDirectory`, matching modern deployment targets and enabling deterministic tests.
+
 ## Coordination Notes
 - Use the macOS GitHub Actions workflow (`.github/workflows/ci.yml`) for release builds/tests and benchmark runs until direct macOS access is available.
 - Capture benchmark outputs in the benchmarking archive (`AGENTS_DOCS/TASK_ARCHIVE/4_Benchmarking_Infrastructure/`) and surface highlights in project roadmaps once collected.
diff --git a/AGENTS_DOCS/INPROGRESS/issue-macos-iokit-import.md b/AGENTS_DOCS/INPROGRESS/issue-macos-iokit-import.md
index 6f17df1..0f443ed 100644
--- a/AGENTS_DOCS/INPROGRESS/issue-macos-iokit-import.md
+++ b/AGENTS_DOCS/INPROGRESS/issue-macos-iokit-import.md
@@ -1,3 +1,7 @@
 # Import IOKit for macOS device model detection
 
+## Status
+- **2025-11-19 — Completed.** Added a conditional `IOKit` import to `BenchmarkValidation.swift`, enabling the macOS-specific device model helpers to compile on Apple platforms while keeping Linux builds intact.
+
+## Original Issue
 The macOS branch of `TestEnvironment.getDeviceModel()` invokes `IOServiceGetMatchingService`, `IOServiceMatching`, `IORegistryEntryCreateCFProperty`, and `IOObjectRelease`, all of which are declared in the IOKit framework. This file only imports Foundation/XCTest, so the test target will not compile on macOS (Cannot find 'IOServiceGetMatchingService' in scope). Add import IOKit (optionally wrapped in #if os(macOS)) before using these APIs so the benchmarks build on Apple platforms.
\ No newline at end of file
diff --git a/AGENTS_DOCS/markdown/00_SpecificationKit_TODO.md b/AGENTS_DOCS/markdown/00_SpecificationKit_TODO.md
index 6e37c78..18e70af 100644
--- a/AGENTS_DOCS/markdown/00_SpecificationKit_TODO.md
+++ b/AGENTS_DOCS/markdown/00_SpecificationKit_TODO.md
@@ -108,3 +108,4 @@ This file provides a step-by-step plan for implementing the SpecificationKit lib
 
 - [x] P2.1 Benchmarking infrastructure — establish dedicated benchmark target, capture v2.0.0 baselines, and integrate XCTest `measure` suites (archived under `AGENTS_DOCS/TASK_ARCHIVE/4_Benchmarking_Infrastructure/`; follow-up preparation captured in `AGENTS_DOCS/TASK_ARCHIVE/5_Capture_Benchmark_Baselines/` and refreshed in `AGENTS_DOCS/INPROGRESS/next_tasks.md`).
 - [ ] Capture macOS release baseline metrics for `SpecificationKitBenchmarks` (spec evaluation, macro compilation, wrapper overhead) — macOS CI workflow (`.github/workflows/ci.yml`) now supplies hosted hardware; schedule a release run to record metrics while awaiting permanent access. See `AGENTS_DOCS/INPROGRESS/blocked.md` (2025-11-18 entry) and the archive summary at `AGENTS_DOCS/TASK_ARCHIVE/5_Capture_Benchmark_Baselines/Capture_Benchmark_Baselines_Summary.md` for context.
+- [x] Refresh benchmark timing harness to avoid counting XCTest measurement overhead (2025-11-19) and align storage fallbacks with supported deployment targets.
diff --git a/AGENTS_DOCS/markdown/3.0.0/tasks/SpecificationKit_v3.0.0_Progress.md b/AGENTS_DOCS/markdown/3.0.0/tasks/SpecificationKit_v3.0.0_Progress.md
index 2c7bf16..3c01705 100644
--- a/AGENTS_DOCS/markdown/3.0.0/tasks/SpecificationKit_v3.0.0_Progress.md
+++ b/AGENTS_DOCS/markdown/3.0.0/tasks/SpecificationKit_v3.0.0_Progress.md
@@ -78,3 +78,4 @@ SpecificationKit v3.0.0 is now complete and ready for release!
 - 2025-11-19: Added macOS GitHub Actions workflow to run Swift build/test, the benchmark product, and DemoApp builds on hosted hardware.
 - 2025-11-18: Attempted automated baseline capture from Linux CI; blocked on macOS requirement and documented in active task + blocked log.
 - Added async projection regression tests for `@CachedSatisfies` to cover failure + reuse scenarios (2025-10-29).
+- 2025-11-19: Hardened benchmark instrumentation (new `BenchmarkTimer`, storage path fallback) and restored macOS device detection imports for the validation utilities.
diff --git a/Sources/SpecificationKit/Providers/ContextValue.swift b/Sources/SpecificationKit/Providers/ContextValue.swift
index b442e24..95ec07f 100644
--- a/Sources/SpecificationKit/Providers/ContextValue.swift
+++ b/Sources/SpecificationKit/Providers/ContextValue.swift
@@ -5,6 +5,7 @@
 //  Created by SpecificationKit on 2025.
 //
 
+#if canImport(CoreData)
 import CoreData
 import Foundation
 
@@ -135,3 +136,5 @@ extension ContextValue {
         return request
     }
 }
+
+#endif
diff --git a/Tests/SpecificationKitBenchmarks/BenchmarkStorageTests.swift b/Tests/SpecificationKitBenchmarks/BenchmarkStorageTests.swift
new file mode 100644
index 0000000..9f4114e
--- /dev/null
+++ b/Tests/SpecificationKitBenchmarks/BenchmarkStorageTests.swift
@@ -0,0 +1,44 @@
+import XCTest
+
+final class BenchmarkStorageTests: XCTestCase {
+    func testDefaultInitializerUsesTemporaryDirectoryFallback() {
+        let fakeTemporaryDirectory = URL(fileURLWithPath: "/tmp/speckit-tests", isDirectory: true)
+        let fileManager = FakeFileManager(temporaryDirectory: fakeTemporaryDirectory)
+
+        let storage = BenchmarkStorage(fileManager: fileManager)
+
+        XCTAssertEqual(
+            storage.storageDirectory,
+            fakeTemporaryDirectory.appendingPathComponent("SpecificationKitBenchmarks", isDirectory: true),
+            "Storage should use the file manager's temporary directory when documents are unavailable"
+        )
+    }
+}
+
+private final class FakeFileManager: FileManager {
+    private let fakeTemporaryDirectory: URL
+
+    init(temporaryDirectory: URL) {
+        self.fakeTemporaryDirectory = temporaryDirectory
+        super.init()
+    }
+
+    override var temporaryDirectory: URL {
+        fakeTemporaryDirectory
+    }
+
+    override func urls(
+        for directory: FileManager.SearchPathDirectory,
+        in domainMask: FileManager.SearchPathDomainMask
+    ) -> [URL] {
+        return []
+    }
+
+    override func createDirectory(
+        at url: URL,
+        withIntermediateDirectories createIntermediates: Bool,
+        attributes: [FileAttributeKey: Any]? = nil
+    ) throws {
+        // no-op for tests
+    }
+}
diff --git a/Tests/SpecificationKitBenchmarks/BenchmarkTimer.swift b/Tests/SpecificationKitBenchmarks/BenchmarkTimer.swift
new file mode 100644
index 0000000..3e9c557
--- /dev/null
+++ b/Tests/SpecificationKitBenchmarks/BenchmarkTimer.swift
@@ -0,0 +1,20 @@
+import Foundation
+
+struct BenchmarkTimer {
+    mutating func measureAverageTime(iterations: Int, _ operation: () -> Void) -> TimeInterval {
+        measureAverageTime(iterations: iterations) { _ in operation() }
+    }
+
+    mutating func measureAverageTime(iterations: Int, operation: (Int) -> Void) -> TimeInterval {
+        precondition(iterations > 0, "Iterations must be greater than zero")
+
+        var totalDuration: TimeInterval = 0
+        for index in 0..<iterations {
+            let start = CFAbsoluteTimeGetCurrent()
+            operation(index)
+            totalDuration += CFAbsoluteTimeGetCurrent() - start
+        }
+
+        return totalDuration / Double(iterations)
+    }
+}
diff --git a/Tests/SpecificationKitBenchmarks/BenchmarkTimerTests.swift b/Tests/SpecificationKitBenchmarks/BenchmarkTimerTests.swift
new file mode 100644
index 0000000..992c0f4
--- /dev/null
+++ b/Tests/SpecificationKitBenchmarks/BenchmarkTimerTests.swift
@@ -0,0 +1,24 @@
+import XCTest
+
+final class BenchmarkTimerTests: XCTestCase {
+    func testMeasureAverageTimeRunsBodyExpectedNumberOfTimes() {
+        var callCount = 0
+        var timer = BenchmarkTimer()
+
+        _ = timer.measureAverageTime(iterations: 5) {
+            callCount += 1
+        }
+
+        XCTAssertEqual(callCount, 5, "Timer should invoke the body for each iteration")
+    }
+
+    func testMeasureAverageTimeReflectsWorkDuration() {
+        var timer = BenchmarkTimer()
+
+        let averageDuration = timer.measureAverageTime(iterations: 3) {
+            usleep(3_000) // ~3ms of work per iteration
+        }
+
+        XCTAssertGreaterThanOrEqual(averageDuration, 0.001, "Average duration should reflect the work performed")
+    }
+}
diff --git a/Tests/SpecificationKitBenchmarks/BenchmarkValidation.swift b/Tests/SpecificationKitBenchmarks/BenchmarkValidation.swift
index fe67c31..911d381 100644
--- a/Tests/SpecificationKitBenchmarks/BenchmarkValidation.swift
+++ b/Tests/SpecificationKitBenchmarks/BenchmarkValidation.swift
@@ -7,6 +7,9 @@
 
 import Foundation
 import XCTest
+#if canImport(IOKit)
+import IOKit
+#endif
 
 @testable import SpecificationKit
 
@@ -222,7 +225,7 @@ struct TestEnvironment: Codable {
 /// Benchmark storage and regression detection system
 class BenchmarkStorage {
     private let fileManager: FileManager
-    private let storageDirectory: URL
+    let storageDirectory: URL
 
     init(fileManager: FileManager = .default) {
         self.fileManager = fileManager
@@ -243,13 +246,7 @@ class BenchmarkStorage {
             return documentsPath.appendingPathComponent("SpecificationKitBenchmarks")
         }
 
-        let temporaryDirectory: URL
-        if #available(macOS 10.12, iOS 10.0, tvOS 10.0, watchOS 3.0, *) {
-            temporaryDirectory = fileManager.temporaryDirectory
-        } else {
-            temporaryDirectory = URL(fileURLWithPath: NSTemporaryDirectory(), isDirectory: true)
-        }
-
+        let temporaryDirectory = fileManager.temporaryDirectory
         return temporaryDirectory.appendingPathComponent("SpecificationKitBenchmarks", isDirectory: true)
     }
 
diff --git a/Tests/SpecificationKitBenchmarks/PerformanceBenchmarks.swift b/Tests/SpecificationKitBenchmarks/PerformanceBenchmarks.swift
index 4145df1..177370b 100644
--- a/Tests/SpecificationKitBenchmarks/PerformanceBenchmarks.swift
+++ b/Tests/SpecificationKitBenchmarks/PerformanceBenchmarks.swift
@@ -20,13 +20,16 @@ final class PerformanceBenchmarks: XCTestCase {
         let spec = CooldownIntervalSpec(eventKey: "test_action", cooldownInterval: 10.0)
         let context = createPerformanceTestContext()
 
-        let startTime = CFAbsoluteTimeGetCurrent()
         measure(metrics: [XCTClockMetric(), XCTMemoryMetric()]) {
             for _ in 1...1000 {
                 _ = spec.isSatisfiedBy(context)
             }
         }
-        let executionTime = (CFAbsoluteTimeGetCurrent() - startTime) / 1000
+
+        var timer = BenchmarkTimer()
+        let executionTime = timer.measureAverageTime(iterations: 1000) {
+            _ = spec.isSatisfiedBy(context)
+        }
 
         assertBenchmarkMetric(.specificationEvaluation(executionTime))
     }
@@ -39,13 +42,16 @@ final class PerformanceBenchmarks: XCTestCase {
         let complexSpec = userAgeSpec.and(subscriptionSpec).and(timeSinceSpec)
         let context = createPerformanceTestContext()
 
-        let startTime = CFAbsoluteTimeGetCurrent()
         measure(metrics: [XCTClockMetric(), XCTMemoryMetric()]) {
             for _ in 1...1000 {
                 _ = complexSpec.isSatisfiedBy(context)
             }
         }
-        let executionTime = (CFAbsoluteTimeGetCurrent() - startTime) / 1000
+
+        var timer = BenchmarkTimer()
+        let executionTime = timer.measureAverageTime(iterations: 1000) {
+            _ = complexSpec.isSatisfiedBy(context)
+        }
 
         assertBenchmarkMetric(.specificationEvaluation(executionTime))
     }
@@ -75,7 +81,6 @@ final class PerformanceBenchmarks: XCTestCase {
             provider.recordEvent("event_\(i)", at: Date().addingTimeInterval(-Double(i)))
         }
 
-        let startTime = CFAbsoluteTimeGetCurrent()
         measure(metrics: [XCTClockMetric(), XCTMemoryMetric()]) {
             for i in 1...1000 {
                 let counter = provider.getCounter("counter_\(i % 100 + 1)")
@@ -86,7 +91,15 @@ final class PerformanceBenchmarks: XCTestCase {
                 _ = counter + (flag ? 1 : 0) + Int(event?.timeIntervalSince1970 ?? 0)
             }
         }
-        let executionTime = (CFAbsoluteTimeGetCurrent() - startTime) / 1000
+
+        var timer = BenchmarkTimer()
+        let executionTime = timer.measureAverageTime(iterations: 1000) { iteration in
+            let counter = provider.getCounter("counter_\(iteration % 100 + 1)")
+            let flag = provider.getFlag("flag_\(iteration % 100 + 1)")
+            let event = provider.getEvent("event_\(iteration % 100 + 1)")
+
+            _ = counter + (flag ? 1 : 0) + Int(event?.timeIntervalSince1970 ?? 0)
+        }
 
         assertBenchmarkMetric(.contextProviderLatency(executionTime))
     }