diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..0ee2455 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,22 @@ +name: CoreML Player CI + +on: + pull_request: + types: [opened, synchronize] + +jobs: + tests: + runs-on: macos-15 + steps: + - uses: actions/checkout@v4 + - name: Select Xcode 26.1.1 + run: sudo xcode-select -s /Applications/Xcode_26.1.1.app/Contents/Developer || sudo xcode-select -s /Applications/Xcode.app/Contents/Developer + - name: Show Xcode version + run: xcodebuild -version + - name: Run tests + run: | + xcodebuild test \ + -scheme "CoreML Player" \ + -destination 'platform=macOS' \ + -testPlan "CoreML Player" \ + -quiet diff --git a/CoreML Player.xctestplan b/CoreML Player.xctestplan new file mode 100644 index 0000000..2c2e9fb --- /dev/null +++ b/CoreML Player.xctestplan @@ -0,0 +1,29 @@ +{ + "configurations" : [ + { + "id" : "1C188226-3069-4FCA-A833-A5EA0729A1DA", + "name" : "Test Scheme Action", + "options" : { + + } + } + ], + "defaultOptions" : { + "performanceAntipatternCheckerEnabled" : true, + "targetForVariableExpansion" : { + "containerPath" : "container:CoreMLPlayer.xcodeproj", + "identifier" : "0D680A24297B38E7004F3839", + "name" : "CoreML Player" + } + }, + "testTargets" : [ + { + "target" : { + "containerPath" : "container:CoreMLPlayer.xcodeproj", + "identifier" : "B3559561388448499FAD2F36", + "name" : "CoreMLPlayerTests" + } + } + ], + "version" : 1 +} diff --git a/CoreMLPlayer.xcodeproj/project.pbxproj b/CoreMLPlayer.xcodeproj/project.pbxproj index 0756610..2687745 100644 --- a/CoreMLPlayer.xcodeproj/project.pbxproj +++ b/CoreMLPlayer.xcodeproj/project.pbxproj @@ -49,8 +49,34 @@ 0DFC8B93297C886700D1B3C9 /* Videos.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0DFC8B92297C886700D1B3C9 /* Videos.swift */; }; 0DFC8B95297C88F800D1B3C9 /* Base.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0DFC8B94297C88F800D1B3C9 /* Base.swift */; }; 0DFC8B97297C96FA00D1B3C9 /* CoreMLModelView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0DFC8B96297C96FA00D1B3C9 /* CoreMLModelView.swift */; }; + 125F5E572F5849C3B8BE9F2D /* CoreML Player.app in Frameworks */ = {isa = PBXBuildFile; fileRef = 0D680A25297B38E7004F3839 /* CoreML Player.app */; }; + 136576A07E5B4251BA91A59D /* VisionPipelineTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 136576A07E5B4251BA91A59E /* VisionPipelineTests.swift */; }; + 5287F1F98D6844138659C85F /* YOLOv3Tiny.mlmodel in Resources */ = {isa = PBXBuildFile; fileRef = 0D73DBF3298B3757005D214B /* YOLOv3Tiny.mlmodel */; }; + 78B86E38D81A415F8AA229B5 /* ModelLoadingTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 78B86E38D81A415F8AA229B6 /* ModelLoadingTests.swift */; }; + 80166255DF6740559D13CBF5 /* VideoDetectionTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 80166255DF6740559D13CBF6 /* VideoDetectionTests.swift */; }; + F1D100A1299C000100000002 /* BaseDetectionTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = F1D100A0299C000100000001 /* BaseDetectionTests.swift */; }; + F1D100A3299C000100000004 /* CoreMLModelFormatTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = F1D100A2299C000100000003 /* CoreMLModelFormatTests.swift */; }; + F1D100A5299C000100000006 /* CoreMLModelOptimizationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = F1D100A4299C000100000005 /* CoreMLModelOptimizationTests.swift */; }; + F1D100A7299C000100000008 /* FixtureBuilder.swift in Sources */ = {isa = PBXBuildFile; fileRef = F1D100A6299C000100000007 /* FixtureBuilder.swift */; }; + F1D100A9299C00010000000A /* MultiFunctionModelTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = F1D100A8299C000100000009 /* MultiFunctionModelTests.swift */; }; + F1D100AB299C00010000000C /* OverlaySnapshotTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = F1D100AA299C00010000000B /* OverlaySnapshotTests.swift */; }; + F1D100AD299C00010000000E /* ProjectConfigurationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = F1D100AC299C00010000000D /* ProjectConfigurationTests.swift */; }; + F1D100AF299C000100000010 /* VideoDetectionMetricsTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = F1D100AE299C00010000000F /* VideoDetectionMetricsTests.swift */; }; + F1D100B1299C000100000012 /* VideoDetectionPixelFormatTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = F1D100B0299C000100000011 /* VideoDetectionPixelFormatTests.swift */; }; + F1D100B3299C000100000014 /* VideoDetectionStateTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = F1D100B2299C000100000013 /* VideoDetectionStateTests.swift */; }; + F1D100B5299C000100000016 /* VideoDetectionTestDoubles.swift in Sources */ = {isa = PBXBuildFile; fileRef = F1D100B4299C000100000015 /* VideoDetectionTestDoubles.swift */; }; /* End PBXBuildFile section */ +/* Begin PBXContainerItemProxy section */ + 80166255DF6740559D13CBF7 /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 0D680A1D297B38E7004F3839 /* Project object */; + proxyType = 1; + remoteGlobalIDString = 0D680A24297B38E7004F3839; + remoteInfo = "CoreML Player"; + }; +/* End PBXContainerItemProxy section */ + /* Begin PBXFileReference section */ 0D11159229880EE900D032C8 /* VideoDetection.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VideoDetection.swift; sourceTree = ""; }; 0D25FC58297EFEEC00E3F562 /* Status.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Status.swift; sourceTree = ""; }; @@ -84,6 +110,7 @@ 0D97DD7A2984B0DA0030BC7A /* File.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = File.swift; sourceTree = ""; }; 0D97DD7C2984B11F0030BC7A /* Gallery.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Gallery.swift; sourceTree = ""; }; 0D97DD7E2984B9BF0030BC7A /* LoadingGallery.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LoadingGallery.swift; sourceTree = ""; }; + 0DADC3C9889E4D269B0454AE /* CoreMLPlayerTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = CoreMLPlayerTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; 0DC550EC298ECBB20088608C /* FPSChartData.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FPSChartData.swift; sourceTree = ""; }; 0DC550EE298ECF910088608C /* FPSChart.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FPSChart.swift; sourceTree = ""; }; 0DDA121A29832D1700991BD0 /* CoreMLModelDescriptionView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CoreMLModelDescriptionView.swift; sourceTree = ""; }; @@ -97,6 +124,22 @@ 0DFC8B92297C886700D1B3C9 /* Videos.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Videos.swift; sourceTree = ""; }; 0DFC8B94297C88F800D1B3C9 /* Base.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Base.swift; sourceTree = ""; }; 0DFC8B96297C96FA00D1B3C9 /* CoreMLModelView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CoreMLModelView.swift; sourceTree = ""; }; + 136576A07E5B4251BA91A59E /* VisionPipelineTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VisionPipelineTests.swift; sourceTree = ""; }; + 2F3478EC064245388F50489B /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; + 78B86E38D81A415F8AA229B6 /* ModelLoadingTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelLoadingTests.swift; sourceTree = ""; }; + 80166255DF6740559D13CBF6 /* VideoDetectionTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VideoDetectionTests.swift; sourceTree = ""; }; + F1D100A0299C000100000001 /* BaseDetectionTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BaseDetectionTests.swift; sourceTree = ""; }; + F1D100A2299C000100000003 /* CoreMLModelFormatTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CoreMLModelFormatTests.swift; sourceTree = ""; }; + F1D100A4299C000100000005 /* CoreMLModelOptimizationTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CoreMLModelOptimizationTests.swift; sourceTree = ""; }; + F1D100A6299C000100000007 /* FixtureBuilder.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FixtureBuilder.swift; sourceTree = ""; }; + F1D100A8299C000100000009 /* MultiFunctionModelTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MultiFunctionModelTests.swift; sourceTree = ""; }; + F1D100AA299C00010000000B /* OverlaySnapshotTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OverlaySnapshotTests.swift; sourceTree = ""; }; + F1D100AC299C00010000000D /* ProjectConfigurationTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ProjectConfigurationTests.swift; sourceTree = ""; }; + F1D100AE299C00010000000F /* VideoDetectionMetricsTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VideoDetectionMetricsTests.swift; sourceTree = ""; }; + F1D100B0299C000100000011 /* VideoDetectionPixelFormatTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VideoDetectionPixelFormatTests.swift; sourceTree = ""; }; + F1D100B2299C000100000013 /* VideoDetectionStateTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VideoDetectionStateTests.swift; sourceTree = ""; }; + F1D100B4299C000100000015 /* VideoDetectionTestDoubles.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VideoDetectionTestDoubles.swift; sourceTree = ""; }; + C80095392ED1CCA6006A1F60 /* CoreML Player.xctestplan */ = {isa = PBXFileReference; lastKnownFileType = text; path = "CoreML Player.xctestplan"; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -107,6 +150,14 @@ ); runOnlyForDeploymentPostprocessing = 0; }; + BE28BE67416D476B8DB74C4E /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 125F5E572F5849C3B8BE9F2D /* CoreML Player.app in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; /* End PBXFrameworksBuildPhase section */ /* Begin PBXGroup section */ @@ -147,8 +198,10 @@ 0D680A1C297B38E7004F3839 = { isa = PBXGroup; children = ( + C80095392ED1CCA6006A1F60 /* CoreML Player.xctestplan */, 0D680A27297B38E7004F3839 /* CoreMLPlayer */, 0D680A26297B38E7004F3839 /* Products */, + 731C6B71F6CC400D8C912CAD /* CoreMLPlayerTests */, ); sourceTree = ""; }; @@ -156,6 +209,7 @@ isa = PBXGroup; children = ( 0D680A25297B38E7004F3839 /* CoreML Player.app */, + 0DADC3C9889E4D269B0454AE /* CoreMLPlayerTests.xctest */, ); name = Products; sourceTree = ""; @@ -241,6 +295,28 @@ path = Structs; sourceTree = ""; }; + 731C6B71F6CC400D8C912CAD /* CoreMLPlayerTests */ = { + isa = PBXGroup; + children = ( + F1D100A0299C000100000001 /* BaseDetectionTests.swift */, + 78B86E38D81A415F8AA229B6 /* ModelLoadingTests.swift */, + F1D100A2299C000100000003 /* CoreMLModelFormatTests.swift */, + F1D100A4299C000100000005 /* CoreMLModelOptimizationTests.swift */, + F1D100A6299C000100000007 /* FixtureBuilder.swift */, + F1D100A8299C000100000009 /* MultiFunctionModelTests.swift */, + F1D100AA299C00010000000B /* OverlaySnapshotTests.swift */, + F1D100AC299C00010000000D /* ProjectConfigurationTests.swift */, + 80166255DF6740559D13CBF6 /* VideoDetectionTests.swift */, + F1D100AE299C00010000000F /* VideoDetectionMetricsTests.swift */, + F1D100B0299C000100000011 /* VideoDetectionPixelFormatTests.swift */, + F1D100B2299C000100000013 /* VideoDetectionStateTests.swift */, + F1D100B4299C000100000015 /* VideoDetectionTestDoubles.swift */, + 136576A07E5B4251BA91A59E /* VisionPipelineTests.swift */, + 2F3478EC064245388F50489B /* Info.plist */, + ); + path = CoreMLPlayerTests; + sourceTree = ""; + }; /* End PBXGroup section */ /* Begin PBXNativeTarget section */ @@ -261,6 +337,24 @@ productReference = 0D680A25297B38E7004F3839 /* CoreML Player.app */; productType = "com.apple.product-type.application"; }; + B3559561388448499FAD2F36 /* CoreMLPlayerTests */ = { + isa = PBXNativeTarget; + buildConfigurationList = F559AF08098947A08F82D262 /* Build configuration list for PBXNativeTarget "CoreMLPlayerTests" */; + buildPhases = ( + 5287F1F98D6844138659C860 /* Sources */, + BE28BE67416D476B8DB74C4E /* Frameworks */, + 2F3478EC064245388F50489C /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + 78B86E38D81A415F8AA229B7 /* PBXTargetDependency */, + ); + name = CoreMLPlayerTests; + productName = CoreMLPlayerTests; + productReference = 0DADC3C9889E4D269B0454AE /* CoreMLPlayerTests.xctest */; + productType = "com.apple.product-type.bundle.unit-test"; + }; /* End PBXNativeTarget section */ /* Begin PBXProject section */ @@ -290,6 +384,7 @@ projectRoot = ""; targets = ( 0D680A24297B38E7004F3839 /* CoreML Player */, + B3559561388448499FAD2F36 /* CoreMLPlayerTests */, ); }; /* End PBXProject section */ @@ -304,6 +399,14 @@ ); runOnlyForDeploymentPostprocessing = 0; }; + 2F3478EC064245388F50489C /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 5287F1F98D6844138659C85F /* YOLOv3Tiny.mlmodel in Resources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; /* End PBXResourcesBuildPhase section */ /* Begin PBXSourcesBuildPhase section */ @@ -354,8 +457,37 @@ ); runOnlyForDeploymentPostprocessing = 0; }; + 5287F1F98D6844138659C860 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + F1D100A1299C000100000002 /* BaseDetectionTests.swift in Sources */, + 78B86E38D81A415F8AA229B5 /* ModelLoadingTests.swift in Sources */, + F1D100A3299C000100000004 /* CoreMLModelFormatTests.swift in Sources */, + F1D100A5299C000100000006 /* CoreMLModelOptimizationTests.swift in Sources */, + F1D100A7299C000100000008 /* FixtureBuilder.swift in Sources */, + F1D100A9299C00010000000A /* MultiFunctionModelTests.swift in Sources */, + F1D100AB299C00010000000C /* OverlaySnapshotTests.swift in Sources */, + F1D100AD299C00010000000E /* ProjectConfigurationTests.swift in Sources */, + 80166255DF6740559D13CBF5 /* VideoDetectionTests.swift in Sources */, + F1D100AF299C000100000010 /* VideoDetectionMetricsTests.swift in Sources */, + F1D100B1299C000100000012 /* VideoDetectionPixelFormatTests.swift in Sources */, + F1D100B3299C000100000014 /* VideoDetectionStateTests.swift in Sources */, + F1D100B5299C000100000016 /* VideoDetectionTestDoubles.swift in Sources */, + 136576A07E5B4251BA91A59D /* VisionPipelineTests.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; /* End PBXSourcesBuildPhase section */ +/* Begin PBXTargetDependency section */ + 78B86E38D81A415F8AA229B7 /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = 0D680A24297B38E7004F3839 /* CoreML Player */; + targetProxy = 80166255DF6740559D13CBF7 /* PBXContainerItemProxy */; + }; +/* End PBXTargetDependency section */ + /* Begin XCBuildConfiguration section */ 0D680A32297B38E8004F3839 /* Debug */ = { isa = XCBuildConfiguration; @@ -407,7 +539,7 @@ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; - MACOSX_DEPLOYMENT_TARGET = 13.1; + MACOSX_DEPLOYMENT_TARGET = 14.0; MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; MTL_FAST_MATH = YES; ONLY_ACTIVE_ARCH = YES; @@ -461,7 +593,7 @@ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; - MACOSX_DEPLOYMENT_TARGET = 13.1; + MACOSX_DEPLOYMENT_TARGET = 14.0; MTL_ENABLE_DEBUG_INFO = NO; MTL_FAST_MATH = YES; SDKROOT = macosx; @@ -491,7 +623,7 @@ "$(inherited)", "@executable_path/../Frameworks", ); - MACOSX_DEPLOYMENT_TARGET = 13.0; + MACOSX_DEPLOYMENT_TARGET = 14.0; MARKETING_VERSION = 1.0.2; PRODUCT_BUNDLE_IDENTIFIER = useYourOwn.Identifier; PRODUCT_NAME = "$(TARGET_NAME)"; @@ -521,7 +653,7 @@ "$(inherited)", "@executable_path/../Frameworks", ); - MACOSX_DEPLOYMENT_TARGET = 13.0; + MACOSX_DEPLOYMENT_TARGET = 14.0; MARKETING_VERSION = 1.0.2; PRODUCT_BUNDLE_IDENTIFIER = useYourOwn.Identifier; PRODUCT_NAME = "$(TARGET_NAME)"; @@ -530,6 +662,49 @@ }; name = Release; }; + 0DA47494F3DF4786A25EC365 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + BUNDLE_LOADER = "$(TEST_HOST)"; + CODE_SIGN_STYLE = Automatic; + DEVELOPMENT_TEAM = ""; + INFOPLIST_FILE = CoreMLPlayerTests/Info.plist; + IPHONEOS_DEPLOYMENT_TARGET = 14.0; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + "@loader_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = com.example.CoreMLPlayerTests; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + TEST_HOST = "$(BUILT_PRODUCTS_DIR)/CoreML Player.app/Contents/MacOS/CoreML Player"; + }; + name = Debug; + }; + 0DF19DE3679F402B8A38A14A /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + BUNDLE_LOADER = "$(TEST_HOST)"; + CODE_SIGN_STYLE = Automatic; + DEVELOPMENT_TEAM = ""; + INFOPLIST_FILE = CoreMLPlayerTests/Info.plist; + IPHONEOS_DEPLOYMENT_TARGET = 14.0; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + "@loader_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = com.example.CoreMLPlayerTests; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + TEST_HOST = "$(BUILT_PRODUCTS_DIR)/CoreML Player.app/Contents/MacOS/CoreML Player"; + }; + name = Release; + }; /* End XCBuildConfiguration section */ /* Begin XCConfigurationList section */ @@ -551,6 +726,15 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; }; + F559AF08098947A08F82D262 /* Build configuration list for PBXNativeTarget "CoreMLPlayerTests" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 0DA47494F3DF4786A25EC365 /* Debug */, + 0DF19DE3679F402B8A38A14A /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; /* End XCConfigurationList section */ }; rootObject = 0D680A1D297B38E7004F3839 /* Project object */; diff --git a/CoreMLPlayer.xcodeproj/xcshareddata/xcschemes/CoreML Player.xcscheme b/CoreMLPlayer.xcodeproj/xcshareddata/xcschemes/CoreML Player.xcscheme new file mode 100644 index 0000000..3c9e9ec --- /dev/null +++ b/CoreMLPlayer.xcodeproj/xcshareddata/xcschemes/CoreML Player.xcscheme @@ -0,0 +1,95 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/CoreMLPlayer/Classes/Base.swift b/CoreMLPlayer/Classes/Base.swift index 6a6e387..969ffe3 100644 --- a/CoreMLPlayer/Classes/Base.swift +++ b/CoreMLPlayer/Classes/Base.swift @@ -8,10 +8,95 @@ import SwiftUI import UniformTypeIdentifiers import Vision +import ImageIO +import CoreML class Base { typealias detectionOutput = (objects: [DetectedObject], detectionTime: String, detectionFPS: String) let emptyDetection: detectionOutput = ([], "", "") + /// Serial queue for all model inferences to avoid concurrent contention and to centralize error handling. + static let inferenceQueue = DispatchQueue(label: "com.coremlplayer.inference") + /// Map underlying MLModels so we can reach model descriptions/state when only a VNCoreMLModel is in hand. + private static let underlyingModelMap = NSMapTable(keyOptions: .weakMemory, valueOptions: .strongMemory) + static func register(mlModel: MLModel, for vnModel: VNCoreMLModel) { + underlyingModelMap.setObject(mlModel, forKey: vnModel) + } + private func underlyingModel(for vnModel: VNCoreMLModel) -> MLModel? { + if let mapped = Base.underlyingModelMap.object(forKey: vnModel) { + return mapped + } + if let ml = vnModel.value(forKey: "model") as? MLModel { + Base.underlyingModelMap.setObject(ml, forKey: vnModel) + return ml + } + return nil + } + + /// Reusable per-model context; weakly keyed to avoid leaks when models are swapped. + private static let requestCache = NSMapTable(keyOptions: .weakMemory, valueOptions: .strongMemory) + + private final class ModelContext { + let model: VNCoreMLModel + let mlModel: MLModel? + let sequenceHandler = VNSequenceRequestHandler() + let isStateful: Bool + let inputDescriptions: [String: MLFeatureDescription] + let outputDescriptions: [String: MLFeatureDescription] + let stateInputNames: [String] + let stateOutputNames: [String] + let imageInputName: String? + var mlStateStorage: Any? + @available(macOS 15.0, *) + var mlState: MLState? { + get { mlStateStorage as? MLState } + set { mlStateStorage = newValue } + } + var manualState: MLFeatureProvider? + + init(model: VNCoreMLModel, underlying: MLModel?) { + self.model = model + self.mlModel = underlying + if let description = underlying?.modelDescription { + self.inputDescriptions = description.inputDescriptionsByName + self.outputDescriptions = description.outputDescriptionsByName + } else { + self.inputDescriptions = [:] + self.outputDescriptions = [:] + } + self.stateInputNames = Self.names(containing: "state", in: inputDescriptions) + self.stateOutputNames = Self.names(containing: "state", in: outputDescriptions) + self.isStateful = !stateInputNames.isEmpty || !stateOutputNames.isEmpty + self.imageInputName = inputDescriptions.first(where: { $0.value.type == .image })?.key + + #if DEBUG + let multiArrayInputs = inputDescriptions.keys.filter { inputDescriptions[$0]?.type == .multiArray } + let multiArrayOutputs = outputDescriptions.keys.filter { outputDescriptions[$0]?.type == .multiArray } + if isStateful { + print("[CoreMLPlayer] Stateful model detected. State inputs: \(stateInputNames); outputs: \(stateOutputNames)") + } else if !multiArrayInputs.isEmpty || !multiArrayOutputs.isEmpty { + print("[CoreMLPlayer] No explicit 'state' keys found. Multi-array inputs: \(multiArrayInputs); outputs: \(multiArrayOutputs)") + } + #endif + } + + func resetState() { + mlStateStorage = nil + manualState = nil + } + + private static func names(containing needle: String, in dict: [String: MLFeatureDescription]) -> [String] { + return dict.keys.filter { $0.lowercased().contains(needle) } + } + } + + private func context(for model: VNCoreMLModel) -> ModelContext { + if let cached = Base.requestCache.object(forKey: model) { + return cached + } + let fresh = ModelContext(model: model, underlying: underlyingModel(for: model)) + Base.requestCache.setObject(fresh, forKey: model) + return fresh + } func selectFiles(contentTypes: [UTType], multipleSelection: Bool = true) -> [URL]? { let picker = NSOpenPanel() @@ -39,43 +124,272 @@ class Base { func detectImageObjects(image: ImageFile?, model: VNCoreMLModel?) -> detectionOutput { guard let vnModel = model, - let nsImage = image?.getNSImage() + let nsImage = image?.getNSImage(), + let cgImage = nsImage.cgImageForCurrentRepresentation else { return emptyDetection } - - guard let tiffImage = nsImage.tiffRepresentation else { - showAlert(title: "Failed to convert image!") - return emptyDetection - } - - return performObjectDetection(requestHandler: VNImageRequestHandler(data: tiffImage), vnModel: vnModel) + + let orientation = nsImage.cgImagePropertyOrientation ?? .up + #if DEBUG + Base.sharedLastImageOrientation = orientation + #endif + + let cropOption = cropOptionForIdealFormat() + return performObjectDetection(cgImage: cgImage, orientation: orientation, vnModel: vnModel, functionName: CoreMLModel.sharedSelectedFunction, cropAndScale: cropOption) } - func performObjectDetection(requestHandler: VNImageRequestHandler, vnModel: VNCoreMLModel) -> detectionOutput { + func performObjectDetection(requestHandler: VNImageRequestHandler, vnModel: VNCoreMLModel, functionName: String? = nil, cropAndScale: VNImageCropAndScaleOption = .scaleFill) -> detectionOutput { + let ctx = context(for: vnModel) + if ctx.isStateful { + return performStatefulDetection(input: .handler(requestHandler), context: ctx, cropAndScale: cropAndScale, functionName: functionName) + } + return performVisionDetection(with: ctx, cropAndScale: cropAndScale, functionName: functionName) { request in + try requestHandler.perform([request]) + } + } + + /// Pixel-buffer based detection path (preferred for video/stateful use cases). + func performObjectDetection(pixelBuffer: CVPixelBuffer, orientation: CGImagePropertyOrientation, vnModel: VNCoreMLModel, functionName: String? = nil, cropAndScale: VNImageCropAndScaleOption = .scaleFill) -> detectionOutput { + let ctx = context(for: vnModel) + if ctx.isStateful { + return performStatefulDetection(input: .pixelBuffer(pixelBuffer, orientation), context: ctx, cropAndScale: cropAndScale, functionName: functionName) + } + return performVisionDetection(with: ctx, cropAndScale: cropAndScale, functionName: functionName) { request in + try ctx.sequenceHandler.perform([request], on: pixelBuffer, orientation: orientation) + } + } + + /// CGImage-based detection path used by the image gallery. + func performObjectDetection(cgImage: CGImage, orientation: CGImagePropertyOrientation, vnModel: VNCoreMLModel, functionName: String? = nil, cropAndScale: VNImageCropAndScaleOption = .scaleFill) -> detectionOutput { + let ctx = context(for: vnModel) + #if DEBUG + Base.sharedLastImageOrientation = orientation + #endif + if ctx.isStateful { + return performStatefulDetection(input: .cgImage(cgImage, orientation), context: ctx, cropAndScale: cropAndScale, functionName: functionName) + } + return performVisionDetection(with: ctx, cropAndScale: cropAndScale, functionName: functionName) { request in + try ctx.sequenceHandler.perform([request], on: cgImage, orientation: orientation) + } + } + + /// Vision-backed inference path (non-stateful). + private func performVisionDetection(with context: ModelContext, cropAndScale: VNImageCropAndScaleOption, functionName: String?, operation: (VNCoreMLRequest) throws -> Void) -> detectionOutput { var observationResults: [VNObservation]? - let request = VNCoreMLRequest(model: vnModel) { (request, error) in + let request = VNCoreMLRequest(model: context.model) { request, _ in observationResults = request.results } - - request.imageCropAndScaleOption = .scaleFill - - + request.preferBackgroundProcessing = true + request.imageCropAndScaleOption = cropAndScale + + #if DEBUG + Base.sharedLastFunctionName = functionName + #endif + let detectionTime = ContinuousClock().measure { - try? requestHandler.perform([request]) + do { + try Base.inferenceQueue.sync { + try operation(request) + } + } catch { + #if DEBUG + Base.sharedLastError = error + #endif + } } - + return asDetectedObjects(visionObservationResults: observationResults, detectionTime: detectionTime) } + private enum DetectionInput { + case pixelBuffer(CVPixelBuffer, CGImagePropertyOrientation) + case cgImage(CGImage, CGImagePropertyOrientation) + case handler(VNImageRequestHandler) + } + + /// Core ML stateful inference path used when the model declares state inputs/outputs. + private func performStatefulDetection(input: DetectionInput, context: ModelContext, cropAndScale: VNImageCropAndScaleOption, functionName: String?) -> detectionOutput { + #if DEBUG + Base.sharedLastFunctionName = functionName + #endif + + var outputProvider: MLFeatureProvider? + + let detectionTime = ContinuousClock().measure { + Base.inferenceQueue.sync { + guard let mlModel = context.mlModel, + let features = makeFeatureProvider(for: context, input: input) else { return } + do { + if #available(macOS 15.0, *) { + if context.mlState == nil { + context.mlState = mlModel.makeState() + } + if let state = context.mlState { + outputProvider = try mlModel.prediction(from: features, using: state) + } else { + outputProvider = try mlModel.prediction(from: features) + } + } else { + outputProvider = try mlModel.prediction(from: features) + if let provider = outputProvider { + context.manualState = extractState(from: provider, outputNames: context.stateOutputNames) + } + } + } catch { + #if DEBUG + Base.sharedLastError = error + #endif + } + } + } + + let seconds = Double(detectionTime.components.seconds) + (Double(detectionTime.components.attoseconds) / 1_000_000_000_000_000_000) + let msTime = String(format: "%.0f ms", seconds * 1000) + let detectionFPS = seconds > 0 ? String(format: "%.0f", 1.0 / seconds) : "0" + + let objects = outputProvider.flatMap { detectedObjects(from: $0) } ?? [] + + #if DEBUG + if let provider = outputProvider { + var states: [String: MLFeatureValue] = [:] + for name in context.stateOutputNames { + if provider.featureNames.contains(name), + let value = provider.featureValue(for: name) { + states[name] = value + } + } + Base.sharedLastStateValues = states.isEmpty ? nil : states + } + #endif + + if let provider = outputProvider, + let extracted = extractState(from: provider, outputNames: context.stateOutputNames) { + context.manualState = extracted + } + + return (objects, msTime, detectionFPS) + } + + /// Build an MLFeatureProvider for the current input, including any carried state. + private func makeFeatureProvider(for context: ModelContext, input: DetectionInput) -> MLFeatureProvider? { + var dict: [String: MLFeatureValue] = [:] + var pixelBuffer: CVPixelBuffer? + var cgImage: CGImage? + + switch input { + case .pixelBuffer(let pb, _): + pixelBuffer = pb + case .cgImage(let cg, _): + cgImage = cg + case .handler: + break + } + + for (name, desc) in context.inputDescriptions { + switch desc.type { + case .image: + if let pb = pixelBuffer { + dict[name] = MLFeatureValue(pixelBuffer: pb) + } else if let cg = cgImage, let constraint = desc.imageConstraint { + dict[name] = try? MLFeatureValue(cgImage: cg, constraint: constraint, options: [:]) + } + case .multiArray: + // State inputs reuse prior state if available, otherwise zeros. + if name.lowercased().contains("state") { + if let manual = context.manualState?.featureValue(for: name) { + dict[name] = manual + continue + } + if #available(macOS 15.0, *), let state = context.mlState { + var captured: MLFeatureValue? + state.withMultiArray(for: name) { buffer in + captured = MLFeatureValue(multiArray: buffer) + } + if let captured { + dict[name] = captured + continue + } + } + } + + if let arr = multiArray(for: desc, fill: 1.0) { + dict[name] = MLFeatureValue(multiArray: arr) + } + default: + continue + } + } + + guard !dict.isEmpty else { return nil } + return try? MLDictionaryFeatureProvider(dictionary: dict) + } + + private func multiArray(for desc: MLFeatureDescription, fill value: Double) -> MLMultiArray? { + guard let shape = desc.multiArrayConstraint?.shape else { return nil } + let dataType = desc.multiArrayConstraint?.dataType ?? .double + guard let array = try? MLMultiArray(shape: shape, dataType: dataType) else { return nil } + switch dataType { + case .float32: + let ptr = array.dataPointer.bindMemory(to: Float32.self, capacity: array.count) + for i in 0.. MLFeatureProvider? { + var dict: [String: MLFeatureValue] = [:] + for name in outputNames { + if let value = provider.featureValue(for: name) { + dict[name] = value + } + } + guard !dict.isEmpty else { return nil } + return try? MLDictionaryFeatureProvider(dictionary: dict) + } + + private func detectedObjects(from provider: MLFeatureProvider) -> [DetectedObject] { + // Attempt to treat dictionary outputs as classification probabilities; otherwise fall back to empty. + if let dictFeatureName = provider.featureNames.first(where: { provider.featureValue(for: $0)?.type == .dictionary }), + let dict = provider.featureValue(for: dictFeatureName)?.dictionaryValue as? [String: NSNumber], + let best = dict.max(by: { $0.value.doubleValue < $1.value.doubleValue }) { + let object = DetectedObject( + id: UUID(), + label: best.key, + confidence: String(format: "%.3f", best.value.doubleValue), + otherLabels: dict.map { (label: $0.key, confidence: String(format: "%.4f", $0.value.doubleValue)) }, + width: 0.9, + height: 0.85, + x: 0.05, + y: 0.05, + isClassification: true + ) + return [object] + } + + return [] + } + func asDetectedObjects(visionObservationResults: [VNObservation]?, detectionTime: Duration) -> detectionOutput { let classificationObservations = visionObservationResults as? [VNClassificationObservation] let objectObservations = visionObservationResults as? [VNRecognizedObjectObservation] var detectedObjects: [DetectedObject] = [] - - let msTime = detectionTime.formatted(.units(allowed: [.seconds, .milliseconds], width: .narrow)) - let detectionFPS = String(format: "%.0f", Duration.seconds(1) / detectionTime) + let seconds = Double(detectionTime.components.seconds) + (Double(detectionTime.components.attoseconds) / 1_000_000_000_000_000_000) + let msTime = String(format: "%.0f ms", seconds * 1000) + let detectionFPS = seconds > 0 ? String(format: "%.0f", 1.0 / seconds) : "0" var labels: [(label: String, confidence: String)] = [] @@ -138,20 +452,49 @@ class Base { } func checkModelIO(modelDescription: MLModelDescription) throws { - if !modelDescription.inputDescriptionsByName.contains(where: { $0.key.contains("image") }) { + let inputs = modelDescription.inputDescriptionsByName.values + let outputs = modelDescription.outputDescriptionsByName.values + + let hasImageInput = inputs.contains { $0.type == .image && $0.imageConstraint != nil } + if !hasImageInput { DispatchQueue.main.async { self.showAlert(title: "This model does not accept Images as an input, and at the moment is not supported.") } throw MLModelError(.io) } - - if !modelDescription.outputDescriptionsByName.contains(where: { $0.key.contains("coordinate") || $0.key.contains("confidence") || $0.key.contains("class") }) { + + let supportsOutput = outputs.contains { desc in + switch desc.type { + case .multiArray, .dictionary, .string: + return true + default: + return false + } + } + + if !supportsOutput { DispatchQueue.main.async { self.showAlert(title: "This model is not of type Object Detection or Classification, and at the moment is not supported.") } throw MLModelError(.io) } } + + /// Derive crop-and-scale based on the ideal format if available (square ⇒ centerCrop, otherwise scaleFit) + func cropOptionForIdealFormat() -> VNImageCropAndScaleOption { + // If a model explicitly set a crop preference without idealFormat, honor it. + if let stored = CoreMLModel.sharedCropAndScale, CoreMLModel.sharedIdealFormat == nil { + return stored + } + if let format = CoreMLModel.sharedIdealFormat { + return format.width == format.height ? .centerCrop : .scaleFit + } + // fallback to any stored preference or default + if let stored = CoreMLModel.sharedCropAndScale { + return stored + } + return .scaleFill + } func prepareObjectForSwiftUI(object: DetectedObject, geometry: GeometryProxy) -> CGRect { let objectRect = CGRect(x: object.x, y: object.y, width: object.width, height: object.height) @@ -171,9 +514,26 @@ extension NSImage { guard representations.count > 0 else { return .zero } return NSSize(width: representations[0].pixelsWide, height: representations[0].pixelsHigh) } + + /// Current CGImage for the representation, if available. + var cgImageForCurrentRepresentation: CGImage? { + return cgImage(forProposedRect: nil, context: nil, hints: nil) + } + + /// EXIF orientation mapping for Vision handlers. + var cgImagePropertyOrientation: CGImagePropertyOrientation? { + guard let tiffData = self.tiffRepresentation, + let source = CGImageSourceCreateWithData(tiffData as CFData, nil), + let properties = CGImageSourceCopyPropertiesAtIndex(source, 0, nil) as? [CFString: Any], + let raw = properties[kCGImagePropertyOrientation] as? UInt32, + let orientation = CGImagePropertyOrientation(rawValue: raw) else { + return nil + } + return orientation + } } -extension VNRecognizedObjectObservation: Identifiable { +extension VNRecognizedObjectObservation: @retroactive Identifiable { public var id: UUID { return self.uuid } @@ -181,3 +541,16 @@ extension VNRecognizedObjectObservation: Identifiable { return lhs.uuid == rhs.uuid } } + +#if DEBUG +extension Base { + /// Last used image orientation (testing only). + static var sharedLastImageOrientation: CGImagePropertyOrientation? + /// Last Vision error encountered (testing only). + static var sharedLastError: Error? + /// Last function name requested on a VNCoreMLRequest (testing only). + static var sharedLastFunctionName: String? + /// Last observed state outputs (testing only, stateful models). + static var sharedLastStateValues: [String: MLFeatureValue]? +} +#endif diff --git a/CoreMLPlayer/Classes/CoreMLModel.swift b/CoreMLPlayer/Classes/CoreMLModel.swift index c49fd30..0a498fc 100644 --- a/CoreMLPlayer/Classes/CoreMLModel.swift +++ b/CoreMLPlayer/Classes/CoreMLModel.swift @@ -8,11 +8,46 @@ import SwiftUI import CoreML import Vision +import CoreVideo class CoreMLModel: Base, ObservableObject { + /// Shared ideal format for crop/pixel decisions across app; updated when a model loads. + static var sharedIdealFormat: (width: Int, height: Int, type: OSType)? + /// Shared selection for multi-function models (test hook). + static var sharedSelectedFunction: String? + /// Shared crop/scaling preference derived from the model input constraints. + static var sharedCropAndScale: VNImageCropAndScaleOption? + + enum ModelKind: String, CaseIterable, Identifiable { + case detector + case classifier + case embedding + case unknown + + var id: String { rawValue } + } + @Published var isValid = false @Published var isLoading = false @Published var name: String? + @Published var availableFunctions: [String] = [] + @Published var selectedFunction: String? { + didSet { + CoreMLModel.sharedSelectedFunction = selectedFunction + storedSelectedFunctionName = selectedFunction + if isValid, selectedFunction != oldValue { + reconfigure() + } + } + } + @Published var modelKind: ModelKind = .unknown { + didSet { storedModelKind = modelKind.rawValue } + } + @Published var cropAndScaleOption: VNImageCropAndScaleOption = .scaleFill { + didSet { CoreMLModel.sharedCropAndScale = cropAndScaleOption } + } + @Published var supportsStatefulModel: Bool = false + @Published var optimizationWarning: String? @AppStorage("CoreMLModel-selectedBuiltInModel") var selectedBuiltInModel: String? @AppStorage("CoreMLModel-autoloadSelection") var autoloadSelection: AutoloadChoices = .disabled @@ -21,10 +56,40 @@ class CoreMLModel: Base, ObservableObject { @AppStorage("CoreMLModel-compiledModelURL") var compiledModelURL: URL? @AppStorage("CoreMLModel-computeUnits") var computeUnits: MLComputeUnits = .all @AppStorage("CoreMLModel-gpuAllowLowPrecision") var gpuAllowLowPrecision: Bool = false + @AppStorage("CoreMLModel-allowBackgroundTasks") var allowBackgroundTasks: Bool = true + @AppStorage("CoreMLModel-optimizeOnLoad") var optimizeOnLoad: Bool = false + @AppStorage("CoreMLModel-selectedFunctionName") var storedSelectedFunctionName: String? + @AppStorage("CoreMLModel-modelKind") private var storedModelKind: String = ModelKind.unknown.rawValue var model: VNCoreMLModel? var modelDescription: [ModelDescription] = [] - var idealFormat: (width: Int, height: Int, type: OSType)? + var idealFormat: (width: Int, height: Int, type: OSType)? { + didSet { + CoreMLModel.sharedIdealFormat = idealFormat + } + } + @Published var wasOptimized: Bool = false + + override init() { + super.init() + modelKind = ModelKind(rawValue: storedModelKind) ?? .unknown + CoreMLModel.sharedSelectedFunction = storedSelectedFunctionName + CoreMLModel.sharedCropAndScale = cropAndScaleOption + + if UserDefaults.standard.object(forKey: "CoreMLModel-computeUnits") == nil { + #if arch(x86_64) + computeUnits = .cpuAndGPU + #else + computeUnits = .all + #endif + } + + if let storedSelectedFunctionName { + selectedFunction = storedSelectedFunctionName + } + + optimizationWarning = nil + } enum AutoloadChoices: String, CaseIterable, Identifiable { case disabled = "Disabled" @@ -91,61 +156,79 @@ class CoreMLModel: Base, ObservableObject { } func loadTheModel(url: URL, useSecurityScope: Bool = false) { - self.isLoading = true + DispatchQueue.main.async { + self.isLoading = true + } DispatchQueue.global(qos: .userInitiated).async { - do { - if useSecurityScope { - _ = url.startAccessingSecurityScopedResource() - } - - var getCompiledURL: URL? - var URLIsCompiled = false - - if url.pathExtension == "mlmodelc" { // mlmodel"c" is compiled - getCompiledURL = url - URLIsCompiled = true - } else { - getCompiledURL = try MLModel.compileModel(at: url) - } - - guard let compiledURL = getCompiledURL else { - throw URLError(.badURL) + var hasScope = false + DispatchQueue.main.async { + self.optimizationWarning = nil + } + if useSecurityScope { + hasScope = url.startAccessingSecurityScopedResource() + } + + defer { + if hasScope { + url.stopAccessingSecurityScopedResource() } - - let config = MLModelConfiguration() - config.computeUnits = self.computeUnits - config.allowLowPrecisionAccumulationOnGPU = self.gpuAllowLowPrecision - - let mlModel = try MLModel(contentsOf: compiledURL, configuration: config) + } + + do { + // Identify function names before load so we can honor selection when building the configuration. + let functions = self.functionNames(from: url) + let selectedFn: String? = { + if let current = self.selectedFunction, functions.isEmpty || functions.contains(current) { return current } + if let stored = self.storedSelectedFunctionName, functions.isEmpty || functions.contains(stored) { return stored } + return functions.first + }() + + let (sourceURL, isCompiled, optimizedFlag) = try self.prepareSourceURL(for: url) + let compiledURL = try self.compileModelIfNeeded(sourceURL: sourceURL, isAlreadyCompiled: isCompiled) + + let config = self.makeConfiguration(selectedFunction: selectedFn) + let mlModel = try self.loadModel(at: compiledURL, configuration: config) try super.checkModelIO(modelDescription: mlModel.modelDescription) - + + let inferredKind = self.inferModelKind(from: mlModel.modelDescription) + let ideal = self.extractIdealFormat(from: mlModel.modelDescription) + let crop = self.deriveCropAndScale(from: ideal) + let stateful = self.detectStateful(from: mlModel.modelDescription) + let vnModel = try VNCoreMLModel(for: mlModel) - + Base.register(mlModel: mlModel, for: vnModel) + self.performWarmupIfPossible(vnModel: vnModel, ideal: ideal, crop: crop, functionName: selectedFn) + DispatchQueue.main.async { - if useSecurityScope { - url.stopAccessingSecurityScopedResource() - } - if !URLIsCompiled && !useSecurityScope { + self.wasOptimized = optimizedFlag || (self.optimizeOnLoad && isCompiled) + if !isCompiled && !useSecurityScope { self.originalModelURL = url self.bookmarkModel() } self.compiledModelURL = compiledURL self.model = vnModel self.setModelDescriptionInfo(mlModel.modelDescription) + self.idealFormat = ideal + self.cropAndScaleOption = crop + self.modelKind = inferredKind + self.supportsStatefulModel = stateful + self.availableFunctions = functions + self.selectedFunction = selectedFn self.name = url.lastPathComponent withAnimation { self.isValid = true self.isLoading = false } + if let warning = self.optimizationWarning, + ProcessInfo.processInfo.environment["XCTestConfigurationFilePath"] == nil { + self.showAlert(title: "Optimization skipped", message: warning) + } } } catch { #if DEBUG print(error) #endif DispatchQueue.main.async { - if useSecurityScope { - url.stopAccessingSecurityScopedResource() - } self.unSelectModel() super.showAlert(title: "Failed to compile/initiate your MLModel!") } @@ -162,11 +245,26 @@ class CoreMLModel: Base, ObservableObject { bookmarkData = nil model = nil name = nil + selectedFunction = nil + storedSelectedFunctionName = nil + availableFunctions = [] + modelKind = .unknown + cropAndScaleOption = .scaleFill + supportsStatefulModel = false + optimizationWarning = nil + CoreMLModel.sharedIdealFormat = nil + CoreMLModel.sharedCropAndScale = nil + CoreMLModel.sharedSelectedFunction = nil withAnimation { isValid = false isLoading = false } } + + @MainActor + func recordOptimizationWarning(_ message: String) { + optimizationWarning = message + } func getModelURLString() -> (original: (file: String, directory: String), compiled: (file: String, directory: String)) { var originalFile = "" @@ -231,7 +329,7 @@ class CoreMLModel: Base, ObservableObject { } } info.append(ModelDescription(category: "MetaData", items: metaDataItems)) - + if let predictedFeatureName = description.predictedFeatureName { info.append(ModelDescription(category: "Predicted Feature Name", items: [ModelDescription.Item(key: "predictedFeatureName", value: predictedFeatureName)])) } @@ -247,8 +345,286 @@ class CoreMLModel: Base, ObservableObject { } info.append(ModelDescription(category: "Class Labels", items: classLabelItems)) } - + modelDescription = info + + if let idealFormat { + cropAndScaleOption = deriveCropAndScale(from: idealFormat) + } + } + + private func optimizedDestination(for url: URL) throws -> URL { + let appSupport = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask).first! + let optimizedDir = appSupport.appendingPathComponent("CoreMLPlayer/Optimized", isDirectory: true) + try FileManager.default.createDirectory(at: optimizedDir, withIntermediateDirectories: true) + let baseName = url.deletingPathExtension().lastPathComponent + return optimizedDir.appendingPathComponent("\(baseName).optimized.mlmodel") + } + + private func prepareSourceURL(for url: URL) throws -> (URL, Bool, Bool) { + // If already compiled, just use as-is. + if url.pathExtension == "mlmodelc" { + return (url, true, false) + } + + var sourceURL = url + var optimized = false + + if optimizeOnLoad { + do { + let optimizedURL = try optimizedDestination(for: url) + try? FileManager.default.removeItem(at: optimizedURL) + + let quantized = try optimizeModelIfPossible(source: url, destination: optimizedURL) + if !quantized { + try FileManager.default.copyItem(at: url, to: optimizedURL) + } + + if validateOptimizedCandidate(source: url, candidate: optimizedURL) { + sourceURL = optimizedURL + optimized = true + } else { + try? FileManager.default.removeItem(at: optimizedURL) + optimized = false + sourceURL = url + } + } catch { + #if DEBUG + print("Optimization copy failed, falling back to original:", error) + #endif + optimized = false + sourceURL = url + } + } + return (sourceURL, false, optimized) + } + + /// Attempt to quantize/palettize using coremltools when available. Returns true on success. + @discardableResult + func optimizeModelIfPossible(source: URL, destination: URL) throws -> Bool { + let script = """ +import sys, pathlib, traceback +src = pathlib.Path(r\"""\(source.path)\""") +dst = pathlib.Path(r\"""\(destination.path)\""") +try: + import coremltools as ct +except ImportError: + sys.exit(2) + +try: + ml = ct.models.MLModel(src) + try: + from coremltools.models.neural_network.quantization_utils import quantize_weights + quantized = quantize_weights(ml, nbits=4, quantization_mode="linear") + except Exception: + try: + quantized = ct.optimize.coreml.quantization(ml, mode="linear8") + except Exception: + quantized = ml + quantized.save(dst) +except Exception: + traceback.print_exc() + sys.exit(3) +""" + + let proc = Process() + proc.executableURL = URL(fileURLWithPath: "/usr/bin/python3") + proc.arguments = ["-c", script] + let pipe = Pipe() + proc.standardError = pipe + proc.standardOutput = Pipe() + try proc.run() + proc.waitUntilExit() + + if proc.terminationStatus == 0, FileManager.default.fileExists(atPath: destination.path) { + Task { @MainActor in self.optimizationWarning = nil } + return true + } + + let stderr = String(data: pipe.fileHandleForReading.readDataToEndOfFile(), encoding: .utf8) ?? "" + #if DEBUG + print("coremltools optimization skipped/fell back:", stderr) + #endif + + let warning: String + switch proc.terminationStatus { + case 2: + warning = "coremltools is not installed; install it to enable Optimize on Load or turn the toggle off." + case 3: + warning = "coremltools failed to optimize this model; using the original copy instead." + default: + warning = "Optimization could not run (exit code \(proc.terminationStatus)); original model will be used." + } + + Task { @MainActor in + self.optimizationWarning = warning + } + return false + } + + /// Ensure an optimized candidate is valid and no larger than the source. + private func validateOptimizedCandidate(source: URL, candidate: URL) -> Bool { + guard let sourceSize = fileSize(at: source), + let candidateSize = fileSize(at: candidate), + candidateSize <= sourceSize else { + Task { @MainActor in self.optimizationWarning = "Optimized model was larger than the original; reverting to the original file." } + return false + } + + do { + let compiled = try MLModel.compileModel(at: candidate) + try? FileManager.default.removeItem(at: compiled) + return true + } catch { + #if DEBUG + print("Optimized candidate failed validation:", error) + #endif + Task { @MainActor in self.optimizationWarning = "Optimized model failed to compile; using the original model instead." } + return false + } + } + + private func fileSize(at url: URL) -> Int64? { + guard let attrs = try? FileManager.default.attributesOfItem(atPath: url.path), + let size = attrs[.size] as? NSNumber else { + return nil + } + return size.int64Value + } + + private func compileModelIfNeeded(sourceURL: URL, isAlreadyCompiled: Bool) throws -> URL { + if isAlreadyCompiled { return sourceURL } + return try MLModel.compileModel(at: sourceURL) + } + + private func makeConfiguration(selectedFunction: String?) -> MLModelConfiguration { + let config = MLModelConfiguration() + config.computeUnits = computeUnits + config.allowLowPrecisionAccumulationOnGPU = gpuAllowLowPrecision && computeUnits != .cpuOnly + // Public symbol for allowsBackgroundTasks is not yet surfaced on macOS SDK; guard via selector to stay source-compatible. + if config.responds(to: Selector(("setAllowsBackgroundTasks:"))) { + config.setValue(allowBackgroundTasks, forKey: "allowsBackgroundTasks") + } + if let selectedFunction { + if #available(macOS 15.0, *) { + config.functionName = selectedFunction + } + } + return config + } + + private func loadModel(at url: URL, configuration: MLModelConfiguration) throws -> MLModel { + var loadedModel: MLModel? + var loadError: Error? + + let semaphore = DispatchSemaphore(value: 0) + MLModel.load(contentsOf: url, configuration: configuration) { result in + switch result { + case .success(let model): + loadedModel = model + case .failure(let error): + loadError = error + } + semaphore.signal() + } + semaphore.wait() + + if let model = loadedModel { + return model + } + if let error = loadError { + throw error + } + return try MLModel(contentsOf: url, configuration: configuration) + } + + private func extractIdealFormat(from description: MLModelDescription) -> (width: Int, height: Int, type: OSType)? { + for item in description.inputDescriptionsByName.values { + if let image = item.imageConstraint { + return (width: image.pixelsWide, height: image.pixelsHigh, type: image.pixelFormatType) + } + } + return nil + } + + private func deriveCropAndScale(from ideal: (width: Int, height: Int, type: OSType)?) -> VNImageCropAndScaleOption { + guard let ideal else { return .scaleFill } + return ideal.width == ideal.height ? .centerCrop : .scaleFit + } + + private func inferModelKind(from description: MLModelDescription) -> ModelKind { + if description.predictedFeatureName != nil || description.predictedProbabilitiesName != nil { + return .classifier + } + + let outputs = Array(description.outputDescriptionsByName.values) + let hasVectorOnly = outputs.allSatisfy { $0.type == .multiArray } + if hasVectorOnly { + return .embedding + } + + let hasProbabilities = outputs.contains { $0.type == .dictionary || $0.type == .string } + if hasProbabilities { + return .classifier + } + + let hasCoordinateLike = outputs.contains { desc in + if let shape = desc.multiArrayConstraint?.shape, shape.count >= 3 { + return true + } + return false + } + if hasCoordinateLike { + return .detector + } + + return .unknown + } + + private func detectStateful(from description: MLModelDescription) -> Bool { + let names = Array(description.inputDescriptionsByName.keys) + Array(description.outputDescriptionsByName.keys) + return names.contains { $0.lowercased().contains("state") } + } + + private func functionNames(from url: URL) -> [String] { + let packageURL: URL + switch url.pathExtension { + case "mlpackage": + packageURL = url + case "mlmodelc", "mlmodel": + let candidate = url.deletingPathExtension().appendingPathExtension("mlpackage") + guard FileManager.default.fileExists(atPath: candidate.path) else { return [] } + packageURL = candidate + default: + return [] + } + + let manifest = packageURL.appendingPathComponent("Manifest.json") + guard let data = try? Data(contentsOf: manifest), + let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any], + let fn = json["functions"] as? [[String: Any]] else { + return [] + } + return fn.compactMap { $0["name"] as? String } + } + + private func performWarmupIfPossible(vnModel: VNCoreMLModel, ideal: (width: Int, height: Int, type: OSType)?, crop: VNImageCropAndScaleOption, functionName: String?) { + let size = ideal.map { ($0.width, $0.height, $0.type) } ?? (224, 224, kCVPixelFormatType_32BGRA) + var pixelBuffer: CVPixelBuffer? + let status = CVPixelBufferCreate(kCFAllocatorDefault, size.0, size.1, size.2, nil, &pixelBuffer) + guard status == kCVReturnSuccess, let buffer = pixelBuffer else { return } + + CVPixelBufferLockBaseAddress(buffer, []) + if let base = CVPixelBufferGetBaseAddress(buffer) { + let length = CVPixelBufferGetDataSize(buffer) + // Fill with a neutral mid-gray to better resemble real input. + memset(base, 0x7F, length) + } + CVPixelBufferUnlockBaseAddress(buffer, []) + + DispatchQueue.global(qos: .utility).async { + _ = self.performObjectDetection(pixelBuffer: buffer, orientation: .up, vnModel: vnModel, functionName: functionName, cropAndScale: crop) + } } func saveBookmark(_ url: URL) { diff --git a/CoreMLPlayer/Classes/DrawSettings.swift b/CoreMLPlayer/Classes/DrawSettings.swift index cc18b61..8e545c0 100644 --- a/CoreMLPlayer/Classes/DrawSettings.swift +++ b/CoreMLPlayer/Classes/DrawSettings.swift @@ -46,7 +46,7 @@ class DrawSettings: ObservableObject { } // Storing Color in AppStorage -extension Array: RawRepresentable where Element: Codable { +extension Array: @retroactive RawRepresentable where Element: Codable { public init?(rawValue: String) { guard let data = rawValue.data(using: .utf8), let result = try? JSONDecoder().decode([Element].self, from: data) @@ -67,7 +67,7 @@ extension Array: RawRepresentable where Element: Codable { } // It's not very accurate, but it's enough for our use case -extension Color: RawRepresentable { +extension Color: @retroactive RawRepresentable { public init?(rawValue: String) { let c = rawValue.components(separatedBy: ",") if let r = Double(c[0]), diff --git a/CoreMLPlayer/Classes/Images.swift b/CoreMLPlayer/Classes/Images.swift index 9c555f1..f12ec3b 100644 --- a/CoreMLPlayer/Classes/Images.swift +++ b/CoreMLPlayer/Classes/Images.swift @@ -71,15 +71,17 @@ class Images: Base, Gallery, ObservableObject { func detectImageObjects(imageFile: ImageFile?, model: VNCoreMLModel?) -> [DetectedObject] { let (detectedObjects, detectionTime, _) = super.detectImageObjects(image: imageFile, model: model) - - DetectionStats.shared.addMultiple([ - Stats(key: "Det. Objects", value: "\(detectedObjects.count)"), - Stats(key: "Time", value: "\(detectionTime)"), - Stats(key: "-", value: ""), // Divider - Stats(key: "Width", value: "\(currentNSImageDetails().width)"), - Stats(key: "Height", value: "\(currentNSImageDetails().height)") - ]) - + + DispatchQueue.main.async { + DetectionStats.shared.addMultiple([ + Stats(key: "Det. Objects", value: "\(detectedObjects.count)"), + Stats(key: "Time", value: "\(detectionTime)"), + Stats(key: "-", value: ""), // Divider + Stats(key: "Width", value: "\(self.currentNSImageDetails().width)"), + Stats(key: "Height", value: "\(self.currentNSImageDetails().height)") + ]) + } + return detectedObjects } diff --git a/CoreMLPlayer/Classes/VideoDetection.swift b/CoreMLPlayer/Classes/VideoDetection.swift index 07b4ee0..72ab79f 100644 --- a/CoreMLPlayer/Classes/VideoDetection.swift +++ b/CoreMLPlayer/Classes/VideoDetection.swift @@ -9,6 +9,7 @@ import CoreML import AVKit import Vision import Combine +import ImageIO class VideoDetection: Base, ObservableObject { @Published var playMode = PlayModes.normal { @@ -32,14 +33,21 @@ class VideoDetection: Base, ObservableObject { private var fpsDisplay = 0 private var chartDuration: Duration = .zero private var playerOutput = AVPlayerItemVideoOutput(pixelBufferAttributes: nil) + private var videoOutputAttributes: [String: Any]? private var timeTracker = DispatchTime.now() private var lastDetectionTime: Double = 0 private var videoHasEnded = false + private var idealFormat: (width: Int, height: Int, type: OSType)? + private var videoOrientation: CGImagePropertyOrientation = .up + private var stateFrameCounter: Int = 0 + private var droppedFrames: Int = 0 + private var warmupCompleted = false var videoURL: URL? { didSet { - Task { - await prepareToPlay(videoURL: videoURL) + let url = videoURL + Task { [weak self, url] in + await self?.prepareToPlay(videoURL: url) } } } @@ -80,6 +88,9 @@ class VideoDetection: Base, ObservableObject { func disappearing() { playing = false frameObjects = [] + stateFrameCounter = 0 + droppedFrames = 0 + warmupCompleted = false DispatchQueue.main.asyncAfter(deadline: .now() + 0.1) { DetectionStats.shared.items = [] } @@ -87,12 +98,20 @@ class VideoDetection: Base, ObservableObject { func setModel(_ vnModel: VNCoreMLModel?) { model = vnModel + stateFrameCounter = 0 + droppedFrames = 0 + warmupCompleted = false + } + + func setIdealFormat(_ format: (width: Int, height: Int, type: OSType)?) { + idealFormat = format + configureVideoOutputIfNeeded(attachedTo: player?.currentItem) } func playManager() { if let playerItem = player?.currentItem, playerItem.currentTime() >= playerItem.duration { player?.seek(to: CMTime.zero) - DispatchQueue.main.asyncAfter(deadline: .now() + 0.1) { + DispatchQueue.global(qos: .userInitiated).asyncAfter(deadline: .now() + 0.1) { self.detectObjectsInFrame() } videoHasEnded = true @@ -128,19 +147,44 @@ class VideoDetection: Base, ObservableObject { } func getRepeatInterval(_ reduceLastDetectionTime: Bool = true) -> Double { - var interval = 0.0 - if videoInfo.frameRate > 0 { - interval = (1 / videoInfo.frameRate) - } else { - interval = 30 + let nominalFrameInterval = videoInfo.frameRate > 0 ? (1.0 / videoInfo.frameRate) : (1.0 / 30.0) + + guard reduceLastDetectionTime else { + return nominalFrameInterval } - - if reduceLastDetectionTime { - interval = max((interval - lastDetectionTime), 0.02) + + let minInterval = max(0.02, lastDetectionTime * 0.5) + let maxInterval = max(nominalFrameInterval + lastDetectionTime, nominalFrameInterval * 2) + let adjusted = nominalFrameInterval - lastDetectionTime + + return min(max(adjusted, minInterval), maxInterval) + } + + /// Simple guard to verify detection time stays within a budget (defaults to 50ms). + func isWithinLatencyBudget(budgetMs: Double = 50) -> Bool { + return (lastDetectionTime * 1000) <= budgetMs + } + + private func configureVideoOutputIfNeeded(attachedTo playerItem: AVPlayerItem? = nil) { + let oldOutput = playerOutput + var attrs: [String: Any]? = nil + if let idealFormat { + attrs = [ + kCVPixelBufferPixelFormatTypeKey as String: idealFormat.type, + kCVPixelBufferWidthKey as String: idealFormat.width, + kCVPixelBufferHeightKey as String: idealFormat.height + ] + } + videoOutputAttributes = attrs + playerOutput = AVPlayerItemVideoOutput(pixelBufferAttributes: attrs) + + if let item = playerItem { + item.remove(oldOutput) + item.add(playerOutput) } - return interval } + @MainActor func prepareToPlay(videoURL: URL?) async { guard let url = videoURL, url.isFileURL, @@ -156,9 +200,12 @@ class VideoDetection: Base, ObservableObject { if let videoTrack = try await asset.loadTracks(withMediaType: .video).first { let (frameRate, size) = try await videoTrack.load(.nominalFrameRate, .naturalSize) + let transform = try await videoTrack.load(.preferredTransform) let (isPlayable, duration) = try await asset.load(.isPlayable, .duration) let playerItem = AVPlayerItem(asset: asset) + configureVideoOutputIfNeeded() playerItem.add(playerOutput) + self.videoOrientation = Self.orientation(from: transform) DispatchQueue.main.async { self.videoInfo.frameRate = Double(frameRate) @@ -189,6 +236,22 @@ class VideoDetection: Base, ObservableObject { #endif } } + + private static func orientation(from transform: CGAffineTransform) -> CGImagePropertyOrientation { + // Derived from AVAssetTrack preferredTransform conventions + switch (transform.a, transform.b, transform.c, transform.d) { + case (0, 1, -1, 0): + return .right + case (0, -1, 1, 0): + return .left + case (1, 0, 0, 1): + return .up + case (-1, 0, 0, -1): + return .down + default: + return .up + } + } func getPlayerItemIfContinuing(mode: PlayModes) -> AVPlayerItem? { guard let playerItem = player?.currentItem, @@ -213,10 +276,12 @@ class VideoDetection: Base, ObservableObject { guard getPlayerItemIfContinuing(mode: .normal) != nil else { return } - - self.detectObjectsInFrame() { - DispatchQueue.global(qos: .userInitiated).asyncAfter(deadline: .now() + self.getRepeatInterval()) { [weak self] in - self?.startNormalDetection() + + DispatchQueue.global(qos: .userInitiated).async { + self.detectObjectsInFrame() { + DispatchQueue.global(qos: .userInitiated).asyncAfter(deadline: .now() + self.getRepeatInterval()) { [weak self] in + self?.startNormalDetection() + } } } } @@ -235,17 +300,27 @@ class VideoDetection: Base, ObservableObject { } func detectObjectsInFrame(completion: (() -> ())? = nil) { - guard let pixelBuffer = getPixelBuffer(), let model else { return } + guard let model else { completion?(); return } + guard let pixelBuffer = getPixelBuffer() else { + recordDroppedFrame() + completion?() + return + } // Process the frame - let handler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer) - let detectionResult = performObjectDetection(requestHandler: handler, vnModel: model) + #if DEBUG + VideoDetection.sharedLastVideoOrientation = videoOrientation + #endif + let cropOption = cropOptionForIdealFormat() + let detectionResult = performObjectDetection(pixelBuffer: pixelBuffer, orientation: videoOrientation, vnModel: model, functionName: CoreMLModel.sharedSelectedFunction, cropAndScale: cropOption) DispatchQueue.main.async { + let isWarmup = self.warmupCompleted == false + self.warmupCompleted = true self.frameObjects = detectionResult.objects self.fpsCounter += 1 let timePassed = DispatchTime.now().uptimeNanoseconds - self.timeTracker.uptimeNanoseconds - if timePassed >= 1_000_000_000 { + if timePassed >= 1_000_000_000 && !isWarmup { self.chartDuration += .seconds(1) if let detFPSDouble = Double(detectionResult.detectionFPS), self.playMode == .maxFPS @@ -268,23 +343,29 @@ class VideoDetection: Base, ObservableObject { var stats: [Stats] = [] - if self.playMode == .maxFPS { + if self.playMode == .maxFPS && !isWarmup { stats.append(Stats(key: "FPS", value: "\(self.fpsDisplay)")) stats.append(Stats(key: "Det. FPS", value: "\(detectionResult.detectionFPS)")) } let detTime = Double(detectionResult.detectionTime.replacingOccurrences(of: " ms", with: "")) ?? 0 self.lastDetectionTime = detTime / 1000 + self.stateFrameCounter += 1 - stats += [ - Stats(key: "Det. Objects", value: "\(detectionResult.objects.count)"), - Stats(key: "Det. Time", value: "\(detectionResult.detectionTime)"), - Stats(key: "-", value: ""), // Divider - Stats(key: "Width", value: "\(self.videoInfo.size.width)"), - Stats(key: "Height", value: "\(self.videoInfo.size.height)") - ] + if !isWarmup { + stats += [ + Stats(key: "Det. Objects", value: "\(detectionResult.objects.count)"), + Stats(key: "Det. Time", value: "\(detectionResult.detectionTime)"), + Stats(key: "Dropped Frames", value: "\(self.droppedFrames)"), + Stats(key: "-", value: ""), // Divider + Stats(key: "Width", value: "\(self.videoInfo.size.width)"), + Stats(key: "Height", value: "\(self.videoInfo.size.height)") + ] + } - DetectionStats.shared.addMultiple(stats) + if !stats.isEmpty { + DetectionStats.shared.addMultiple(stats) + } if completion != nil { completion!() @@ -298,10 +379,90 @@ class VideoDetection: Base, ObservableObject { func getPixelBuffer() -> CVPixelBuffer? { if let currentTime = player?.currentTime() { - return playerOutput.copyPixelBuffer(forItemTime: currentTime, itemTimeForDisplay: nil) + if let buffer = playerOutput.copyPixelBuffer(forItemTime: currentTime, itemTimeForDisplay: nil) { + return buffer + } + + // Fallback: if ideal format was too strict, downgrade to default BGRA + if videoOutputAttributes != nil, let item = player?.currentItem { + let reattach = { + item.remove(self.playerOutput) + self.playerOutput = AVPlayerItemVideoOutput(pixelBufferAttributes: nil) + self.videoOutputAttributes = nil + item.add(self.playerOutput) + } + if Thread.isMainThread { + reattach() + } else { + DispatchQueue.main.sync { reattach() } + } + return playerOutput.copyPixelBuffer(forItemTime: currentTime, itemTimeForDisplay: nil) + } } return nil } + + private func recordDroppedFrame() { + droppedFrames += 1 + if warmupCompleted { + DetectionStats.shared.addMultiple([ + Stats(key: "Dropped Frames", value: "\(droppedFrames)") + ]) + } + } } +#if DEBUG +extension VideoDetection { + /// Test-only helper to allow deterministic configuration without relying on async player setup. + func setVideoInfoForTesting(_ info: (isPlayable: Bool, frameRate: Double, duration: CMTime, size: CGSize)) { + videoInfo = info + } + + /// Test-only helper to inject a last detection duration when verifying scheduling behavior. + func setLastDetectionTimeForTesting(_ value: Double) { + lastDetectionTime = value + } + + /// Test-only helper to set ideal format. + func setIdealFormatForTesting(_ format: (width: Int, height: Int, type: OSType)?) { + setIdealFormat(format) + } + + /// Test-only helper to set orientation. + func setVideoOrientationForTesting(_ orientation: CGImagePropertyOrientation) { + videoOrientation = orientation + } + + static var sharedLastVideoOrientation: CGImagePropertyOrientation? + /// Optional override buffer to drive fallback tests. + static var testFallbackPixelBuffer: CVPixelBuffer? + + func getPixelBufferAttributesForTesting() -> [String: Any]? { + return videoOutputAttributes + } + + /// Test-only helper to run detection on a supplied pixel buffer, returning the state counter. + func detectPixelBufferForTesting(_ pixelBuffer: CVPixelBuffer) -> (objects: [DetectedObject], stateFrameCounter: Int) { + guard let model else { return ([], stateFrameCounter) } + VideoDetection.sharedLastVideoOrientation = videoOrientation + let result = performObjectDetection(pixelBuffer: pixelBuffer, orientation: videoOrientation, vnModel: model, functionName: CoreMLModel.sharedSelectedFunction, cropAndScale: cropOptionForIdealFormat()) + stateFrameCounter += 1 + return (result.objects, stateFrameCounter) + } + + /// Force the pixel-buffer fallback path without needing an AVPlayer instance. + func forcePixelBufferFallbackForTesting() -> CVPixelBuffer? { + guard videoOutputAttributes != nil else { return nil } + videoOutputAttributes = nil + playerOutput = AVPlayerItemVideoOutput(pixelBufferAttributes: nil) + return VideoDetection.testFallbackPixelBuffer + } + + /// Expose internal counters for tests. + func metricsForTesting() -> (droppedFrames: Int, warmupCompleted: Bool, lastDetectionTime: Double, stateFrameCounter: Int) { + return (droppedFrames, warmupCompleted, lastDetectionTime, stateFrameCounter) + } +} +#endif diff --git a/CoreMLPlayer/Constants.swift b/CoreMLPlayer/Constants.swift index a807db6..a740ba5 100644 --- a/CoreMLPlayer/Constants.swift +++ b/CoreMLPlayer/Constants.swift @@ -13,7 +13,10 @@ struct K { static let builtInModels: [(name: String, source: String)] = [ // name must match file name in app bundle (name: "YOLOv3Tiny", source: "https://github.com/pjreddie/darknet") ] - static let contentTypes: [UTType] = [UTType(importedAs: "com.apple.coreml.model")] + static let contentTypes: [UTType] = [ + UTType(importedAs: "com.apple.coreml.model"), + UTType(importedAs: "com.apple.coreml.mlpackage") + ] } struct LazyVGrid { diff --git a/CoreMLPlayer/Info.plist b/CoreMLPlayer/Info.plist index 9edbb1d..b015e4b 100644 --- a/CoreMLPlayer/Info.plist +++ b/CoreMLPlayer/Info.plist @@ -2,27 +2,44 @@ - UTImportedTypeDeclarations - - - UTTypeConformsTo - - - - UTTypeDescription - Core ML Model - UTTypeIcons - - UTTypeIdentifier - com.apple.coreml.model - UTTypeTagSpecification - - public.filename-extension - - mlmodel - - - - + UTImportedTypeDeclarations + + + + UTTypeConformsTo + + public.data + + UTTypeDescription + Core ML Model + UTTypeIdentifier + com.apple.coreml.model + UTTypeTagSpecification + + public.filename-extension + + mlmodel + + + + + + UTTypeConformsTo + + public.data + + UTTypeDescription + Core ML Package + UTTypeIdentifier + com.apple.coreml.mlpackage + UTTypeTagSpecification + + public.filename-extension + + mlpackage + + + + diff --git a/CoreMLPlayer/Structs/ImageFile.swift b/CoreMLPlayer/Structs/ImageFile.swift index 06528db..e9545c3 100644 --- a/CoreMLPlayer/Structs/ImageFile.swift +++ b/CoreMLPlayer/Structs/ImageFile.swift @@ -52,6 +52,11 @@ struct ImageFile: File, Identifiable { } func getNSImage() -> NSImage? { + #if DEBUG + if let override = ImageFile.nsImageOverrideForTests { + return override + } + #endif if nsImage != nil { return nsImage } else { @@ -86,3 +91,9 @@ extension ImageFile: Equatable { return lhs.id == rhs.id && lhs.url == rhs.url } } + +#if DEBUG +extension ImageFile { + static var nsImageOverrideForTests: NSImage? +} +#endif diff --git a/CoreMLPlayer/Views/CoreMLModelView.swift b/CoreMLPlayer/Views/CoreMLModelView.swift index 4213d30..ede5dab 100644 --- a/CoreMLPlayer/Views/CoreMLModelView.swift +++ b/CoreMLPlayer/Views/CoreMLModelView.swift @@ -72,7 +72,7 @@ struct CoreMLModelView: View { } .pickerStyle(.segmented) .padding(.vertical) - .onChange(of: coreMLModel.autoloadSelection) { _ in + .onChange(of: coreMLModel.autoloadSelection) { _, _ in coreMLModel.bookmarkModel() } @@ -86,8 +86,9 @@ struct CoreMLModelView: View { } .pickerStyle(.segmented) .padding(.vertical) - .onChange(of: coreMLModel.computeUnits) { _ in - switch coreMLModel.computeUnits { + .onChange(of: coreMLModel.computeUnits) { _, newValue in + // Drop GPU low-precision flag when GPU is not part of the selection. + switch newValue { case .cpuAndGPU, .all: break default: @@ -108,13 +109,51 @@ struct CoreMLModelView: View { .pickerStyle(.segmented) .padding(.top) .padding(.bottom, 30) - .onChange(of: coreMLModel.gpuAllowLowPrecision) { _ in + .onChange(of: coreMLModel.gpuAllowLowPrecision) { _, _ in coreMLModel.reconfigure() } default: EmptyView() } - + + if let warning = coreMLModel.optimizationWarning { + HStack(spacing: 8) { + Image(systemName: "exclamationmark.triangle.fill") + .foregroundColor(.orange) + Text(warning) + .font(.footnote) + .foregroundStyle(.secondary) + } + .frame(maxWidth: .infinity, alignment: .leading) + .padding(.bottom, 8) + } + + Divider() + + if coreMLModel.availableFunctions.count > 1 { + Picker("Function", selection: Binding( + get: { coreMLModel.selectedFunction }, + set: { coreMLModel.selectedFunction = $0 } + )) { + ForEach(coreMLModel.availableFunctions, id: \.self) { fn in + Text(fn).tag(Optional(fn)) + } + } + .pickerStyle(.menu) + .padding(.vertical) + } else if let only = coreMLModel.availableFunctions.first { + HStack { + Text("Function:").bold() + Text(only) + } + .padding(.vertical, 4) + } + + HStack { + Text("Model Kind:").bold() + Text(coreMLModel.modelKind.rawValue.capitalized) + } + HStack { Button(action: coreMLModel.selectCoreMLModel) { Label("Change CoreML Model", systemImage: "m.square.fill") diff --git a/CoreMLPlayer/Views/DrawSettingsPopover.swift b/CoreMLPlayer/Views/DrawSettingsPopover.swift index fd1e547..92bdd81 100644 --- a/CoreMLPlayer/Views/DrawSettingsPopover.swift +++ b/CoreMLPlayer/Views/DrawSettingsPopover.swift @@ -182,7 +182,7 @@ struct DrawSettingsPopover: View { maximumValueLabel: Text("1"), label: {} ) - .onChange(of: drawSettings.confidenceLimit) { _ in + .onChange(of: drawSettings.confidenceLimit) { _, _ in presentConfidencePopover = true } .popover(isPresented: $presentConfidencePopover) { diff --git a/CoreMLPlayer/Views/ImageDetectionView.swift b/CoreMLPlayer/Views/ImageDetectionView.swift index c899fd8..df239dc 100644 --- a/CoreMLPlayer/Views/ImageDetectionView.swift +++ b/CoreMLPlayer/Views/ImageDetectionView.swift @@ -171,7 +171,17 @@ struct ImageDetectionView: View { } func setObjectLocations() { - detectedObjects = images.detectImageObjects(imageFile: currentImage, model: coreMLModel.model) + guard let currentImage else { + detectedObjects = [] + return + } + let model = coreMLModel.model + DispatchQueue.global(qos: .userInitiated).async { + let objects = images.detectImageObjects(imageFile: currentImage, model: model) + DispatchQueue.main.async { + self.detectedObjects = objects + } + } } var zoomGesture: some Gesture { diff --git a/CoreMLPlayer/Views/SubViews/DetectionView.swift b/CoreMLPlayer/Views/SubViews/DetectionView.swift index 74ec47e..489d8ec 100644 --- a/CoreMLPlayer/Views/SubViews/DetectionView.swift +++ b/CoreMLPlayer/Views/SubViews/DetectionView.swift @@ -24,21 +24,18 @@ struct DetectionView: View { var body: some View { GeometryReader { geometry in - if let videoSize, let videoRect = getVideoRect(geometrySize: geometry.size, videoSize: videoSize) { - ZStack { - VStack { EmptyView() } - .frame(width: videoRect.width, height: videoRect.height) - .offset(x: videoRect.origin.x, y: videoRect.origin.y) - .overlay { - GeometryReader { videoGeometry in - forEachBB(detectedObjects: detectedObjects, geometry: videoGeometry) - } - } - } - .frame(maxWidth: .infinity, maxHeight: .infinity) - } else { - forEachBB(detectedObjects: detectedObjects, geometry: geometry) + let videoRect = getVideoRect(geometrySize: geometry.size, videoSize: videoSize ?? CGSize(width: 0, height: 0)) + ZStack { + VStack { EmptyView() } + .frame(width: videoRect.width, height: videoRect.height) + .offset(x: videoRect.origin.x, y: videoRect.origin.y) + .overlay { + GeometryReader { videoGeometry in + forEachBB(detectedObjects: detectedObjects, geometry: videoGeometry) + } + } } + .frame(maxWidth: .infinity, maxHeight: .infinity) } } diff --git a/CoreMLPlayer/Views/VideoDetectionView.swift b/CoreMLPlayer/Views/VideoDetectionView.swift index 4a6dc30..19bb212 100644 --- a/CoreMLPlayer/Views/VideoDetectionView.swift +++ b/CoreMLPlayer/Views/VideoDetectionView.swift @@ -82,8 +82,8 @@ struct VideoDetectionView: View { Text("FPS Mode") Toggle("", isOn: $maxFPSMode) .toggleStyle(.switch) - .onChange(of: maxFPSMode) { enabled in - if enabled { + .onChange(of: maxFPSMode) { _, newValue in + if newValue { videoDetection.playMode = .maxFPS } else { videoDetection.playMode = .normal diff --git a/CoreMLPlayerTests/BaseDetectionTests.swift b/CoreMLPlayerTests/BaseDetectionTests.swift new file mode 100644 index 0000000..243e692 --- /dev/null +++ b/CoreMLPlayerTests/BaseDetectionTests.swift @@ -0,0 +1,115 @@ +import XCTest +import Vision +import CoreML +import CoreVideo +import AppKit +@testable import CoreML_Player + +/// Tests for lightweight image detection helpers that don't require full model execution. +final class BaseDetectionTests: XCTestCase { + func testDetectImageObjectsReturnsEmptyWhenInputsMissing() { + let sut = Base() + let output = sut.detectImageObjects(image: nil, model: nil) + XCTAssertTrue(output.objects.isEmpty) + XCTAssertEqual(output.detectionTime, "") + XCTAssertEqual(output.detectionFPS, "") + } + + func testClassificationConversionPreservesLabelsAndMarksClassification() { + let sut = Base() + let labelA = VNClassificationObservation(identifier: "cat", confidence: 0.8) + let labelB = VNClassificationObservation(identifier: "dog", confidence: 0.6) + let duration: Duration = .milliseconds(10) // 100 FPS + + let result = sut.asDetectedObjects( + visionObservationResults: [labelA, labelB], + detectionTime: duration + ) + + guard let object = result.objects.first else { + return XCTFail("Expected one classification object") + } + + XCTAssertTrue(object.isClassification) + XCTAssertEqual(object.otherLabels.count, 2) + XCTAssertEqual(object.width, 0.9, accuracy: 0.001) // synthetic box used for classification + XCTAssertEqual(object.height, 0.85, accuracy: 0.001) + XCTAssertEqual(result.detectionFPS, "100") + XCTAssertTrue(result.detectionTime.contains("ms")) + } + + func testImageOrientationIsCapturedAndUsed() throws { + let model = try VNCoreMLModel(for: MLModel(contentsOf: compiledYOLOURL())) + let base = Base() + let portraitImage = Self.makeImage(width: 40, height: 80) + let imageFile = ImageFile(name: "portrait", type: "png", url: URL(fileURLWithPath: "/tmp/portrait.png")) + ImageFile.nsImageOverrideForTests = portraitImage + defer { ImageFile.nsImageOverrideForTests = nil } + + _ = base.detectImageObjects(image: imageFile, model: model) + + XCTAssertEqual(Base.sharedLastImageOrientation, .up) // default when no EXIF, but captured + } + + func testCropAndScaleFollowsIdealFormatSquareCenterCrop() { + CoreMLModel.sharedIdealFormat = (width: 224, height: 224, type: kCVPixelFormatType_32BGRA) + let base = Base() + XCTAssertEqual(base.cropOptionForIdealFormat(), .centerCrop) + + CoreMLModel.sharedIdealFormat = (width: 224, height: 112, type: kCVPixelFormatType_32BGRA) + XCTAssertEqual(base.cropOptionForIdealFormat(), .scaleFit) + } + + func testCropOptionMatchesIdealFormatShapes() { + CoreMLModel.sharedIdealFormat = (width: 320, height: 160, type: kCVPixelFormatType_32BGRA) + XCTAssertEqual(Base().cropOptionForIdealFormat(), .scaleFit) + CoreMLModel.sharedIdealFormat = nil + } + + func testPerformObjectDetectionCapturesErrors() { + let base = Base() + let bogusURL = URL(fileURLWithPath: "/tmp/does_not_exist.png") + let handler = VNImageRequestHandler(url: bogusURL) + let vnModel = try! VNCoreMLModel(for: MLModel(contentsOf: try! compiledYOLOURL())) + + _ = base.performObjectDetection(requestHandler: handler, vnModel: vnModel) + + XCTAssertNotNil(Base.sharedLastError) + } + + // MARK: - Helpers + private func compiledYOLOURL() throws -> URL { + let bundle = Bundle(for: type(of: self)) + if let compiledURL = bundle.url(forResource: "YOLOv3Tiny", withExtension: "mlmodelc") { + return compiledURL + } + guard let rawURL = bundle.url(forResource: "YOLOv3Tiny", withExtension: "mlmodel") else { + throw XCTSkip("YOLOv3Tiny model not present in test bundle") + } + return try MLModel.compileModel(at: rawURL) + } + + private static func makeImage(width: Int, height: Int) -> NSImage { + let rep = NSBitmapImageRep( + bitmapDataPlanes: nil, + pixelsWide: width, + pixelsHigh: height, + bitsPerSample: 8, + samplesPerPixel: 4, + hasAlpha: true, + isPlanar: false, + colorSpaceName: .deviceRGB, + bytesPerRow: width * 4, + bitsPerPixel: 32 + )! + rep.size = NSSize(width: width, height: height) + NSGraphicsContext.saveGraphicsState() + NSGraphicsContext.current = NSGraphicsContext(bitmapImageRep: rep) + NSColor.red.setFill() + NSBezierPath(rect: NSRect(x: 0, y: 0, width: width, height: height)).fill() + NSGraphicsContext.restoreGraphicsState() + let image = NSImage(size: NSSize(width: width, height: height)) + image.addRepresentation(rep) + return image + } +} diff --git a/CoreMLPlayerTests/CoreMLModelFormatTests.swift b/CoreMLPlayerTests/CoreMLModelFormatTests.swift new file mode 100644 index 0000000..5438cdb --- /dev/null +++ b/CoreMLPlayerTests/CoreMLModelFormatTests.swift @@ -0,0 +1,56 @@ +import XCTest +import CoreML +@testable import CoreML_Player + +private func compiledYOLOURL(testCase: XCTestCase) throws -> URL { + let bundle = Bundle(for: type(of: testCase)) + if let compiledURL = bundle.url(forResource: "YOLOv3Tiny", withExtension: "mlmodelc") { + return compiledURL + } + guard let rawURL = bundle.url(forResource: "YOLOv3Tiny", withExtension: "mlmodel") else { + XCTFail("Missing YOLOv3Tiny model in test bundle") + throw XCTSkip("Model fixture unavailable") + } + return try MLModel.compileModel(at: rawURL) +} + +private func allowLowPrecisionIfSupported(configuration: MLModelConfiguration, allowLowPrecision: Bool) -> MLModelConfiguration { + let config = configuration + config.allowLowPrecisionAccumulationOnGPU = allowLowPrecision && configuration.computeUnits != .cpuOnly + return config +} + +/// Tests focused on CoreMLModel's handling of model descriptions, IO guardrails, and configuration flags. +final class CoreMLModelFormatTests: XCTestCase { + func testIdealFormatCapturedFromModelDescription() throws { + let model = try MLModel(contentsOf: compiledYOLOURL(testCase: self)) + let sut = CoreMLModel() + sut.setModelDescriptionInfo(model.modelDescription) + + guard let ideal = sut.idealFormat else { + return XCTFail("idealFormat was not populated from model description") + } + + XCTAssertGreaterThan(ideal.width, 0) + XCTAssertGreaterThan(ideal.height, 0) + XCTAssertNotEqual(ideal.type, 0) + } + + func testModelIOValidationUsesFeatureDescriptionsPositive() throws { + let mlModel = try MLModel(contentsOf: compiledYOLOURL(testCase: self)) + let base = Base() + XCTAssertNoThrow(try base.checkModelIO(modelDescription: mlModel.modelDescription)) + } + + func testLowPrecisionDisabledWhenCPUOnly() { + let config = MLModelConfiguration() + config.computeUnits = .cpuOnly + var applied = allowLowPrecisionIfSupported(configuration: config, allowLowPrecision: true) + XCTAssertFalse(applied.allowLowPrecisionAccumulationOnGPU) + + let configGPU = MLModelConfiguration() + configGPU.computeUnits = .cpuAndGPU + applied = allowLowPrecisionIfSupported(configuration: configGPU, allowLowPrecision: true) + XCTAssertTrue(applied.allowLowPrecisionAccumulationOnGPU) + } +} diff --git a/CoreMLPlayerTests/CoreMLModelOptimizationTests.swift b/CoreMLPlayerTests/CoreMLModelOptimizationTests.swift new file mode 100644 index 0000000..1a584c6 --- /dev/null +++ b/CoreMLPlayerTests/CoreMLModelOptimizationTests.swift @@ -0,0 +1,183 @@ +import XCTest +import CoreML +@testable import CoreML_Player + +/// Tests that exercise CoreMLModel's optimize-on-load behaviors and related fallbacks. +final class CoreMLModelOptimizationTests: XCTestCase { + private func rawModelURL() throws -> URL { + let bundle = Bundle(for: type(of: self)) + if let raw = bundle.url(forResource: "YOLOv3Tiny", withExtension: "mlmodel") { + return raw + } + throw XCTSkip("Raw YOLOv3Tiny.mlmodel not bundled; optimization tests skipped.") + } + + private func compiledModelURL() throws -> URL { + let bundle = Bundle(for: type(of: self)) + if let compiledURL = bundle.url(forResource: "YOLOv3Tiny", withExtension: "mlmodelc") { + return compiledURL + } + guard let rawURL = bundle.url(forResource: "YOLOv3Tiny", withExtension: "mlmodel") else { + XCTFail("Missing YOLOv3Tiny model in test bundle") + throw XCTSkip("Model fixture unavailable") + } + return try MLModel.compileModel(at: rawURL) + } + + func testOptimizeToggleMarksModelAsOptimized() throws { + let sut = CoreMLModel() + sut.optimizeOnLoad = true + let bundle = Bundle(for: type(of: self)) + let url: URL + if let raw = bundle.url(forResource: "YOLOv3Tiny", withExtension: "mlmodel") { + url = raw + } else { + url = try compiledModelURL() + } + sut.loadTheModel(url: url) + + let expectation = expectation(description: "model optimized") + DispatchQueue.main.asyncAfter(deadline: .now() + 1.0) { + if sut.wasOptimized && sut.isValid { + expectation.fulfill() + } + } + wait(for: [expectation], timeout: 4.0) + + XCTAssertTrue(sut.wasOptimized, "optimizeOnLoad should mark the model as optimized") + XCTAssertTrue(sut.isValid, "model should be valid after load") + } + + func testOptimizeOnLoadProducesNonLargerCopyAndValidModel() throws { + let source = try rawModelURL() + let sourceSize = try fileSize(at: source) + + let sut = CoreMLModel() + sut.optimizeOnLoad = true + sut.computeUnits = .cpuOnly // deterministic + + sut.loadTheModel(url: source) + + let exp = expectation(description: "model loads") + DispatchQueue.main.asyncAfter(deadline: .now() + 6.0) { + if sut.isValid { + exp.fulfill() + } + } + wait(for: [exp], timeout: 10.0) + + let optimizedURL = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask).first! + .appendingPathComponent("CoreMLPlayer/Optimized/\(source.deletingPathExtension().lastPathComponent).optimized.mlmodel") + XCTAssertTrue(FileManager.default.fileExists(atPath: optimizedURL.path)) + + let optimizedSize = try fileSize(at: optimizedURL) + XCTAssertLessThanOrEqual(optimizedSize, sourceSize, "Optimized copy should not exceed original size") + XCTAssertTrue(sut.wasOptimized) + XCTAssertTrue(sut.isValid) + } + + func testOptimizeOnLoadFallsBackWhenCandidateInvalid() throws { + final class InvalidOptimizingModel: CoreMLModel { + override func optimizeModelIfPossible(source: URL, destination: URL) throws -> Bool { + // Write an invalid payload to force validation failure. + let data = Data(repeating: 0xFF, count: 1024) + try data.write(to: destination) + return true + } + } + + let source = try rawModelURL() + let sut = InvalidOptimizingModel() + sut.optimizeOnLoad = true + sut.computeUnits = .cpuOnly + + sut.loadTheModel(url: source) + + let exp = expectation(description: "model falls back after invalid optimization") + DispatchQueue.main.asyncAfter(deadline: .now() + 6.0) { + if sut.isValid { + exp.fulfill() + } + } + wait(for: [exp], timeout: 10.0) + + XCTAssertFalse(sut.wasOptimized) + + let optimizedURL = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask).first! + .appendingPathComponent("CoreMLPlayer/Optimized/\(source.deletingPathExtension().lastPathComponent).optimized.mlmodel") + XCTAssertFalse(FileManager.default.fileExists(atPath: optimizedURL.path), "Invalid candidate should be removed") + } + + func testOptimizeOnLoadSurfacesMissingToolchainWarning() throws { + final class MissingToolModel: CoreMLModel { + override func optimizeModelIfPossible(source: URL, destination: URL) throws -> Bool { + Task { @MainActor in self.optimizationWarning = "coremltools is not installed; install it to enable Optimize on Load or turn the toggle off." } + return false + } + } + + let source = try rawModelURL() + let sut = MissingToolModel() + sut.optimizeOnLoad = true + sut.computeUnits = .cpuOnly + + sut.loadTheModel(url: source) + + let exp = expectation(description: "warning surfaced") + DispatchQueue.main.asyncAfter(deadline: .now() + 2.0) { + if sut.optimizationWarning != nil { + exp.fulfill() + } + } + wait(for: [exp], timeout: 5.0) + + XCTAssertEqual(sut.optimizationWarning, "coremltools is not installed; install it to enable Optimize on Load or turn the toggle off.") + } + + func testOptimizeOnLoadShrinksModelWhenCoremltoolsPresent() throws { + guard coremltoolsAvailable() else { throw XCTSkip("coremltools unavailable") } + + let source = try rawModelURL() + let sourceSize = try fileSize(at: source) + + let sut = CoreMLModel() + sut.optimizeOnLoad = true + sut.computeUnits = .cpuOnly + sut.loadTheModel(url: source) + + let exp = expectation(description: "model loads optimized") + DispatchQueue.main.asyncAfter(deadline: .now() + 8.0) { + if sut.isValid { + exp.fulfill() + } + } + wait(for: [exp], timeout: 12.0) + + let optimizedURL = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask).first! + .appendingPathComponent("CoreMLPlayer/Optimized/\(source.deletingPathExtension().lastPathComponent).optimized.mlmodel") + + let optimizedSize = try fileSize(at: optimizedURL) + XCTAssertLessThan(optimizedSize, sourceSize, "Optimized model should be smaller after quantization") + XCTAssertTrue(sut.wasOptimized) + XCTAssertTrue(sut.isValid) + } + + // MARK: - Helpers + private func fileSize(at url: URL) throws -> Int64 { + let attrs = try FileManager.default.attributesOfItem(atPath: url.path) + return (attrs[.size] as? NSNumber)?.int64Value ?? 0 + } + + private func coremltoolsAvailable() -> Bool { + let process = Process() + process.executableURL = URL(fileURLWithPath: "/usr/bin/python3") + process.arguments = ["-c", "import coremltools"] + do { + try process.run() + process.waitUntilExit() + return process.terminationStatus == 0 + } catch { + return false + } + } +} diff --git a/CoreMLPlayerTests/FixtureBuilder.swift b/CoreMLPlayerTests/FixtureBuilder.swift new file mode 100644 index 0000000..c3b7f6d --- /dev/null +++ b/CoreMLPlayerTests/FixtureBuilder.swift @@ -0,0 +1,153 @@ +import Foundation +import XCTest +import CoreML + +/// Utility to lazily generate lightweight Core ML fixtures (multi-function, stateful) using Python + coremltools when available. +/// Tests call these helpers and skip gracefully if tooling or platform support is missing. +enum FixtureBuilder { + enum FixtureError: Error { + case generationFailed(String) + } + + /// Creates (or reuses) a tiny multi-function model and returns the compiled URL plus function names. + static func ensureMultiFunctionModel() throws -> (compiledURL: URL, functionNames: [String]) { + let tmpDir = URL(fileURLWithPath: NSTemporaryDirectory()).appendingPathComponent("CMPFixtures", isDirectory: true) + try FileManager.default.createDirectory(at: tmpDir, withIntermediateDirectories: true) + let modelURL = tmpDir.appendingPathComponent("multifunction.mlpackage") + let compiledURL = tmpDir.appendingPathComponent("multifunction.mlmodelc") + + if !FileManager.default.fileExists(atPath: compiledURL.path) { + try generateFixture(kind: .multifunction, outputURL: modelURL) + _ = try compileModel(at: modelURL, compiledURL: compiledURL) + } + + // Probe function names from the package manifest (simple JSON read) + let manifestURL = modelURL.appendingPathComponent("Manifest.json") + let data = try Data(contentsOf: manifestURL) + let json = try JSONSerialization.jsonObject(with: data) as? [String: Any] + let fnames = (json?["functions"] as? [[String: Any]])?.compactMap { $0["name"] as? String } ?? [] + guard !fnames.isEmpty else { + throw XCTSkip("Multi-function fixture manifest missing function names; likely not supported on this platform.") + } + return (compiledURL, fnames) + } + + /// Creates (or reuses) a tiny stateful model; returns compiled URL. + static func ensureStatefulModel() throws -> URL { + let tmpDir = URL(fileURLWithPath: NSTemporaryDirectory()).appendingPathComponent("CMPFixtures", isDirectory: true) + try FileManager.default.createDirectory(at: tmpDir, withIntermediateDirectories: true) + let modelURL = tmpDir.appendingPathComponent("stateful.mlpackage") + let compiledURL = tmpDir.appendingPathComponent("stateful.mlmodelc") + + if !FileManager.default.fileExists(atPath: compiledURL.path) { + try generateFixture(kind: .stateful, outputURL: modelURL) + _ = try compileModel(at: modelURL, compiledURL: compiledURL) + } + return compiledURL + } + + private enum FixtureKind: String { + case multifunction + case stateful + } + + /// Runs a tiny Python script that builds the requested model via coremltools. + private static func generateFixture(kind: FixtureKind, outputURL: URL) throws { + let script: String + switch kind { + case .multifunction: + script = """ +import sys, json, pathlib +try: + import coremltools as ct +except ImportError: + sys.exit(2) + +out = pathlib.Path(r\"""\(outputURL.path)\""") +out.parent.mkdir(parents=True, exist_ok=True) + +# Build two trivial functions (y = x and y = x + 1) +def make_model(offset): + from coremltools.models import neural_network as nn + from coremltools.models import datatypes + input_features = [("x", datatypes.Array(1))] + output_features = [("y", datatypes.Array(1))] + builder = nn.NeuralNetworkBuilder(input_features, output_features) + builder.add_elementwise(name="add", input_names=["x"], output_name="y", mode="ADD", alpha=offset) + return builder.spec + +specs = {"identity": make_model(0.0), "plus_one": make_model(1.0)} + +try: + ct.models.multifunction.save_multifunction(specs, str(out)) +except Exception as e: + sys.stderr.write(str(e)) + sys.exit(3) +""" + case .stateful: + script = """ +import sys, pathlib +try: + import coremltools as ct +except ImportError: + sys.exit(2) + +out = pathlib.Path(r\"""\(outputURL.path)\""") +out.parent.mkdir(parents=True, exist_ok=True) + +# Minimal stateful counter: state_out = state_in + x; y = state_out +try: + from coremltools.models import datatypes + from coremltools.models.neural_network import NeuralNetworkBuilder + + input_features = [("x", datatypes.Array(1)), ("state_in", datatypes.Array(1))] + output_features = [("y", datatypes.Array(1)), ("state_out", datatypes.Array(1))] + builder = NeuralNetworkBuilder(input_features, output_features, has_skip_connections=False) + builder.add_elementwise(name="acc", input_names=["x", "state_in"], output_name="y", mode="ADD") + builder.add_copy(name="copy_state", input_name="y", output_name="state_out") + builder.spec.description.statefulNetwork.isStateful = True + builder.spec.description.input[1].isOptional = False + builder.spec.description.output[1].isLossLayer = False + + mlmodel = ct.models.MLModel(builder.spec) + mlmodel.save(str(out)) +except Exception as e: + sys.stderr.write(str(e)) + sys.exit(3) +""" + } + + let process = Process() + process.executableURL = URL(fileURLWithPath: "/usr/bin/python3") + process.arguments = ["-c", script] + + let pipe = Pipe() + process.standardError = pipe + process.standardOutput = Pipe() + try process.run() + process.waitUntilExit() + + let err = String(data: pipe.fileHandleForReading.readDataToEndOfFile(), encoding: .utf8) ?? "" + + if process.terminationStatus == 2 { + throw XCTSkip("coremltools not available; skipping \(kind.rawValue) fixture generation.") + } + if process.terminationStatus != 0 { + if err.contains("App Sandbox") || err.contains("xcrun: error") { + throw XCTSkip("Fixture generation not permitted in sandbox: \(err)") + } + throw FixtureError.generationFailed(err) + } + } + + @discardableResult + private static func compileModel(at url: URL, compiledURL: URL) throws -> URL { + if FileManager.default.fileExists(atPath: compiledURL.path) { + return compiledURL + } + let compiled = try MLModel.compileModel(at: url) + try? FileManager.default.removeItem(at: compiledURL) + try FileManager.default.copyItem(at: compiled, to: compiledURL) + return compiledURL + } +} diff --git a/CoreMLPlayerTests/Info.plist b/CoreMLPlayerTests/Info.plist new file mode 100644 index 0000000..1dac20b --- /dev/null +++ b/CoreMLPlayerTests/Info.plist @@ -0,0 +1,22 @@ + + + + + CFBundleDevelopmentRegion + $(DEVELOPMENT_LANGUAGE) + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + BNDL + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + + diff --git a/CoreMLPlayerTests/ModelLoadingTests.swift b/CoreMLPlayerTests/ModelLoadingTests.swift new file mode 100644 index 0000000..5a621d1 --- /dev/null +++ b/CoreMLPlayerTests/ModelLoadingTests.swift @@ -0,0 +1,59 @@ +import XCTest +@testable import CoreML_Player +import CoreML +import Vision + +final class ModelLoadingTests: XCTestCase { + private func compiledModelURL() throws -> URL { + if let compiledURL = Bundle(for: type(of: self)).url(forResource: "YOLOv3Tiny", withExtension: "mlmodelc") { + return compiledURL + } + + guard let rawURL = Bundle(for: type(of: self)).url(forResource: "YOLOv3Tiny", withExtension: "mlmodel") else { + XCTFail("Missing YOLOv3Tiny model in test bundle") + throw XCTSkip("Model fixture unavailable") + } + + return try MLModel.compileModel(at: rawURL) + } + + func testModelCompilationAndConfiguration() throws { + let compiledURL = try compiledModelURL() + let configuration = MLModelConfiguration() + configuration.computeUnits = .cpuOnly + configuration.allowLowPrecisionAccumulationOnGPU = true + + let mlModel = try MLModel(contentsOf: compiledURL, configuration: configuration) + + let sut = CoreMLModel() + XCTAssertNoThrow(try sut.checkModelIO(modelDescription: mlModel.modelDescription)) + XCTAssertEqual(configuration.computeUnits, .cpuOnly) + XCTAssertTrue(configuration.allowLowPrecisionAccumulationOnGPU) + } + + func testModelWarmupRequestSucceeds() throws { + let compiledURL = try compiledModelURL() + let mlModel = try MLModel(contentsOf: compiledURL) + let vnModel = try VNCoreMLModel(for: mlModel) + + let handler = VNImageRequestHandler(cgImage: CGImage.mockSquare, options: [:]) + let request = VNCoreMLRequest(model: vnModel) + request.imageCropAndScaleOption = .centerCrop + + // Running the warm-up request can print a benign warning about missing + // `precisionRecallCurves` on non-updatable models; the model still + // executes correctly, so we keep the request and only assert success. + + let expectation = expectation(description: "Warmup completes") + DispatchQueue.global(qos: .userInitiated).async { + do { + try handler.perform([request]) + expectation.fulfill() + } catch { + XCTFail("Warmup failed: \(error)") + } + } + + wait(for: [expectation], timeout: 5.0) + } +} diff --git a/CoreMLPlayerTests/MultiFunctionModelTests.swift b/CoreMLPlayerTests/MultiFunctionModelTests.swift new file mode 100644 index 0000000..c37c0bb --- /dev/null +++ b/CoreMLPlayerTests/MultiFunctionModelTests.swift @@ -0,0 +1,110 @@ +import XCTest +import CoreML +import Vision +@testable import CoreML_Player + +/// Tests covering multi-function model selection and stateful model behavior. +final class MultiFunctionModelTests: XCTestCase { + func testMultiFunctionModelSelectionExecutesChosenFunction() throws { + let (compiledURL, functions) = try FixtureBuilder.ensureMultiFunctionModel() + let mlModel = try MLModel(contentsOf: compiledURL) + guard functions.count >= 2 else { + throw XCTSkip("Insufficient functions in generated model") + } + + let fn = functions[1] // choose second function (plus_one) + CoreMLModel.sharedSelectedFunction = fn + + let vnModel = try VNCoreMLModel(for: mlModel) + let handler = VNImageRequestHandler(cgImage: CGImage.mockSquare, options: [:]) + let base = Base() + + let result = base.performObjectDetection(requestHandler: handler, vnModel: vnModel, functionName: fn) + + // We don't care about numeric outputs, only that the function name is plumbed and captured. + XCTAssertEqual(Base.sharedLastFunctionName, fn) + XCTAssertNotNil(result.detectionTime) + } + + func testSelectedFunctionPropagatesToRequests() throws { + let sut = try makeStubVideoDetection() + CoreMLModel.sharedSelectedFunction = "fn_a" + let exp = expectation(description: "detection") + sut.detectObjectsInFrame { exp.fulfill() } + wait(for: [exp], timeout: 1.0) + XCTAssertEqual(Base.sharedLastFunctionName, "fn_a") + } + + func testAutoloadRestoresSelectedFunction() throws { + let (compiledURL, functions) = try FixtureBuilder.ensureMultiFunctionModel() + guard functions.count > 1 else { throw XCTSkip("insufficient functions") } + + let sut = CoreMLModel() + sut.autoloadSelection = .reloadCompiled + sut.compiledModelURL = compiledURL + sut.storedSelectedFunctionName = functions[1] + sut.selectedFunction = nil + + let exp = expectation(description: "autoload loads model") + sut.autoload() + DispatchQueue.main.asyncAfter(deadline: .now() + 2.0) { + if sut.isValid { exp.fulfill() } + } + wait(for: [exp], timeout: 6.0) + + XCTAssertEqual(sut.selectedFunction, functions[1]) + if let model = sut.model { + _ = Base().performObjectDetection(cgImage: CGImage.mockSquare, orientation: .up, vnModel: model, functionName: sut.selectedFunction) + XCTAssertEqual(Base.sharedLastFunctionName, functions[1]) + } + sut.unSelectModel() + } + + func testStatefulModelPersistsAcrossCalls() throws { + let compiledURL = try FixtureBuilder.ensureStatefulModel() + let mlModel = try MLModel(contentsOf: compiledURL) + let vnModel = try VNCoreMLModel(for: mlModel) + + // Build two input buffers; we reuse the same buffer to simulate sequential frames. + var pb: CVPixelBuffer? + CVPixelBufferCreate(nil, 1, 1, kCVPixelFormatType_32BGRA, nil, &pb) + guard let buffer = pb else { return XCTFail("Failed to create pixel buffer") } + + let vd = VideoDetection() + vd.setModel(vnModel) + vd.setVideoOrientationForTesting(.up) + + // First detection warms up state; second should increment the state counter (tracked internally). + _ = vd.detectPixelBufferForTesting(buffer) + let second = vd.detectPixelBufferForTesting(buffer) + XCTAssertGreaterThan(second.stateFrameCounter, 1) + } + + // MARK: - Helpers + private func compiledModelURL() throws -> URL { + let bundle = Bundle(for: type(of: self)) + if let compiled = bundle.url(forResource: "YOLOv3Tiny", withExtension: "mlmodelc") { + return compiled + } + guard let raw = bundle.url(forResource: "YOLOv3Tiny", withExtension: "mlmodel") else { + XCTFail("Missing YOLOv3Tiny model in test bundle") + throw XCTSkip("Model fixture unavailable") + } + return try MLModel.compileModel(at: raw) + } + + private func makeStubVideoDetection( + detectionTimeMs: Double = 12, + detectionFPS: String = "90", + objects: Int = 2 + ) throws -> StubVideoDetection { + let model = try VNCoreMLModel(for: MLModel(contentsOf: compiledModelURL())) + let sut = StubVideoDetection( + stubDetectionTimeMs: detectionTimeMs, + stubDetectionFPS: detectionFPS, + stubObjects: objects + ) + sut.setModel(model) + return sut + } +} diff --git a/CoreMLPlayerTests/OverlaySnapshotTests.swift b/CoreMLPlayerTests/OverlaySnapshotTests.swift new file mode 100644 index 0000000..ccac891 --- /dev/null +++ b/CoreMLPlayerTests/OverlaySnapshotTests.swift @@ -0,0 +1,50 @@ +import XCTest +import AppKit +@testable import CoreML_Player + +/// Lightweight snapshot-style checks for overlay math: draws a box onto a bitmap and validates pixel hits. +final class OverlaySnapshotTests: XCTestCase { + func testDetectionOverlayDrawsAtExpectedPosition() { + let size = CGSize(width: 200, height: 100) + let rect = CGRect(x: 50, y: 25, width: 100, height: 25) // Expected after rectForNormalizedRect + var buffer = [UInt8](repeating: 0, count: Int(size.width * size.height * 4)) + guard let ctx = CGContext( + data: &buffer, + width: Int(size.width), + height: Int(size.height), + bitsPerComponent: 8, + bytesPerRow: Int(size.width) * 4, + space: CGColorSpaceCreateDeviceRGB(), + bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue + ) else { + return XCTFail("Failed to create context") + } + + ctx.setFillColor(NSColor.black.cgColor) + ctx.fill(CGRect(origin: .zero, size: size)) + + ctx.setFillColor(NSColor.red.cgColor) + ctx.fill(rect) + + guard let cg = ctx.makeImage() else { return XCTFail("No CGImage") } + + let inside = samplePixel(cg: cg, x: 100, y: 40) + XCTAssertGreaterThan(inside.redComponent, 0.8) + XCTAssertLessThan(inside.greenComponent, 0.2) + XCTAssertLessThan(inside.blueComponent, 0.2) + + let outside = samplePixel(cg: cg, x: 5, y: 5) + XCTAssertLessThan(outside.redComponent, 0.1) + } + + private func samplePixel(cg: CGImage, x: Int, y: Int) -> NSColor { + guard let data = cg.dataProvider?.data else { return .clear } + let ptr = CFDataGetBytePtr(data)! + let bytesPerPixel = 4 + let offset = ((cg.height - 1 - y) * cg.bytesPerRow) + x * bytesPerPixel + let r = ptr[offset] + let g = ptr[offset + 1] + let b = ptr[offset + 2] + return NSColor(red: CGFloat(r)/255, green: CGFloat(g)/255, blue: CGFloat(b)/255, alpha: 1) + } +} diff --git a/CoreMLPlayerTests/ProjectConfigurationTests.swift b/CoreMLPlayerTests/ProjectConfigurationTests.swift new file mode 100644 index 0000000..02116e5 --- /dev/null +++ b/CoreMLPlayerTests/ProjectConfigurationTests.swift @@ -0,0 +1,22 @@ +import XCTest +import Foundation + +/// Sanity checks for project configuration files used by CI. +final class ProjectConfigurationTests: XCTestCase { + func testXCTestPlanListsCoreMLPlayerTestsTarget() throws { + let testFile = URL(fileURLWithPath: #filePath) + let repoRoot = testFile.deletingLastPathComponent().deletingLastPathComponent() + let planURL = repoRoot.appendingPathComponent("CoreML Player.xctestplan") + + guard FileManager.default.fileExists(atPath: planURL.path) else { + throw XCTSkip("xctestplan not found at expected path: \(planURL.path)") + } + + let data = try Data(contentsOf: planURL) + let json = try JSONSerialization.jsonObject(with: data) as? [String: Any] + let testTargets = json?["testTargets"] as? [[String: Any]] + let target = testTargets?.first?["target"] as? [String: Any] + + XCTAssertEqual(target?["name"] as? String, "CoreMLPlayerTests") + } +} diff --git a/CoreMLPlayerTests/VideoDetectionMetricsTests.swift b/CoreMLPlayerTests/VideoDetectionMetricsTests.swift new file mode 100644 index 0000000..8f87275 --- /dev/null +++ b/CoreMLPlayerTests/VideoDetectionMetricsTests.swift @@ -0,0 +1,143 @@ +import XCTest +import Vision +import CoreML +import CoreVideo +import CoreGraphics +@testable import CoreML_Player + +/// Tests covering timing, metrics, and scheduling behaviors in VideoDetection. +final class VideoDetectionMetricsTests: XCTestCase { + // MARK: - Performance / scheduling / stats + func testDetectionLatencyFeedsRepeatIntervalAndStats() throws { + let sut = try makeStubVideoDetection(detectionTimeMs: 12, detectionFPS: "84", objects: 3) + sut.setVideoInfoForTesting((isPlayable: true, frameRate: 30, duration: .zero, size: CGSize(width: 1920, height: 1080))) + DetectionStats.shared.items = [] + + // Warm-up run (stats intentionally skipped) + sut.detectObjectsInFrame() + + let exp = expectation(description: "second detection completes") + sut.detectObjectsInFrame { exp.fulfill() } + wait(for: [exp], timeout: 1.0) + + // Stats should reflect the stubbed detection result + let detTime = DetectionStats.shared.items.first(where: { $0.key == "Det. Time" })?.value + XCTAssertEqual(detTime, "12 ms") + let detObjects = DetectionStats.shared.items.first(where: { $0.key == "Det. Objects" })?.value + XCTAssertEqual(detObjects, "3") + + // Repeat interval should subtract last detection time but stay above the clamp (0.02s) + let expected = (1.0 / 30.0) - 0.012 + XCTAssertEqual(sut.getRepeatInterval(), expected, accuracy: 0.002) + } + + func testFrameObjectsAndStatsClearOnDisappearing() throws { + let sut = try makeStubVideoDetection() + sut.setVideoInfoForTesting((isPlayable: true, frameRate: 24, duration: .zero, size: CGSize(width: 640, height: 360))) + DetectionStats.shared.items = [] + + // Warm-up + sut.detectObjectsInFrame() + let exp = expectation(description: "post-warmup detection completes") + sut.detectObjectsInFrame { exp.fulfill() } + wait(for: [exp], timeout: 1.0) + XCTAssertFalse(sut.frameObjects.isEmpty) + XCTAssertFalse(DetectionStats.shared.items.isEmpty) + + sut.disappearing() + + XCTAssertTrue(sut.frameObjects.isEmpty) + + let cleared = expectation(description: "stats cleared") + DispatchQueue.main.asyncAfter(deadline: .now() + 0.15) { + if DetectionStats.shared.items.isEmpty { + cleared.fulfill() + } + } + wait(for: [cleared], timeout: 0.5) + } + + func testWarmupFrameExcludedFromStatsAndChart() throws { + let sut = try makeStubVideoDetection() + sut.setVideoInfoForTesting((isPlayable: true, frameRate: 30, duration: .zero, size: CGSize(width: 1280, height: 720))) + DetectionStats.shared.items = [] + + // First call should be warm-up and not push stats. + sut.detectObjectsInFrame() + XCTAssertTrue(DetectionStats.shared.items.isEmpty) + + // Second call should push stats. + let exp = expectation(description: "second detection") + sut.detectObjectsInFrame { exp.fulfill() } + wait(for: [exp], timeout: 1.0) + XCTAssertFalse(DetectionStats.shared.items.isEmpty) + XCTAssertTrue(sut.metricsForTesting().warmupCompleted) + } + + func testDroppedFramesCountedAfterWarmup() throws { + final class NilFirstPixelBufferVD: StubVideoDetection { + private var first = true + override func getPixelBuffer() -> CVPixelBuffer? { + if first { + first = false + return nil + } + return super.getPixelBuffer() + } + } + + let sut = NilFirstPixelBufferVD(stubDetectionTimeMs: 10, stubDetectionFPS: "100", stubObjects: 1) + sut.setModel(try VNCoreMLModel(for: MLModel(contentsOf: compiledModelURL()))) + sut.setVideoInfoForTesting((isPlayable: true, frameRate: 30, duration: .zero, size: CGSize(width: 320, height: 240))) + DetectionStats.shared.items = [] + + sut.detectObjectsInFrame() // warmup + dropped frame, should not count + XCTAssertEqual(sut.metricsForTesting().droppedFrames, 1) + XCTAssertTrue(DetectionStats.shared.items.isEmpty) + + // Second call becomes warm-up (because first bailed early) + sut.detectObjectsInFrame() + let exp = expectation(description: "post-warmup detection") + sut.detectObjectsInFrame { exp.fulfill() } + wait(for: [exp], timeout: 1.0) + + let dropStat = DetectionStats.shared.items.first(where: { $0.key == "Dropped Frames" }) + XCTAssertEqual(dropStat?.value, "1") + } + + func testDetectionLatencyWithinBudgetHelper() { + let sut = VideoDetection() + sut.setLastDetectionTimeForTesting(0.030) // 30ms + XCTAssertTrue(sut.isWithinLatencyBudget()) + sut.setLastDetectionTimeForTesting(0.080) + XCTAssertFalse(sut.isWithinLatencyBudget(budgetMs: 50)) + } + + // MARK: - Helpers + private func compiledModelURL() throws -> URL { + let bundle = Bundle(for: type(of: self)) + if let compiled = bundle.url(forResource: "YOLOv3Tiny", withExtension: "mlmodelc") { + return compiled + } + guard let raw = bundle.url(forResource: "YOLOv3Tiny", withExtension: "mlmodel") else { + XCTFail("Missing YOLOv3Tiny model in test bundle") + throw XCTSkip("Model fixture unavailable") + } + return try MLModel.compileModel(at: raw) + } + + private func makeStubVideoDetection( + detectionTimeMs: Double = 12, + detectionFPS: String = "90", + objects: Int = 2 + ) throws -> StubVideoDetection { + let model = try VNCoreMLModel(for: MLModel(contentsOf: compiledModelURL())) + let sut = StubVideoDetection( + stubDetectionTimeMs: detectionTimeMs, + stubDetectionFPS: detectionFPS, + stubObjects: objects + ) + sut.setModel(model) + return sut + } +} diff --git a/CoreMLPlayerTests/VideoDetectionPixelFormatTests.swift b/CoreMLPlayerTests/VideoDetectionPixelFormatTests.swift new file mode 100644 index 0000000..15e4ca6 --- /dev/null +++ b/CoreMLPlayerTests/VideoDetectionPixelFormatTests.swift @@ -0,0 +1,33 @@ +import XCTest +import CoreVideo +@testable import CoreML_Player + +/// Pixel format selection and fallbacks for video pipeline. +final class VideoDetectionPixelFormatTests: XCTestCase { + func testPixelBufferAttributesPreferIdealFormat() { + let vd = VideoDetection() + vd.setIdealFormatForTesting((width: 320, height: 240, type: kCVPixelFormatType_32BGRA)) + let attrs = vd.getPixelBufferAttributesForTesting() + XCTAssertEqual(attrs?[kCVPixelBufferPixelFormatTypeKey as String] as? OSType, kCVPixelFormatType_32BGRA) + } + + func testPixelBufferAttributesFallbackWhenIdealFormatMissing() { + let sut = VideoDetection() + sut.setIdealFormatForTesting(nil) + XCTAssertNil(sut.getPixelBufferAttributesForTesting()) + } + + func testPixelFormatFallbackReturnsOverrideBuffer() { + let sut = VideoDetection() + sut.setIdealFormatForTesting((width: 320, height: 240, type: kCVPixelFormatType_420YpCbCr8BiPlanarFullRange)) + var pb: CVPixelBuffer? + CVPixelBufferCreate(nil, 2, 2, kCVPixelFormatType_32BGRA, nil, &pb) + VideoDetection.testFallbackPixelBuffer = pb + + let fallback = sut.forcePixelBufferFallbackForTesting() + + XCTAssertNotNil(fallback) + XCTAssertNil(sut.getPixelBufferAttributesForTesting()) + VideoDetection.testFallbackPixelBuffer = nil + } +} diff --git a/CoreMLPlayerTests/VideoDetectionStateTests.swift b/CoreMLPlayerTests/VideoDetectionStateTests.swift new file mode 100644 index 0000000..c01311d --- /dev/null +++ b/CoreMLPlayerTests/VideoDetectionStateTests.swift @@ -0,0 +1,111 @@ +import XCTest +import Vision +import CoreML +import CoreVideo +import CoreGraphics +@testable import CoreML_Player + +/// Tests that focus on VideoDetection state, orientation, and stateful model plumbing. +final class VideoDetectionStateTests: XCTestCase { + func testStateTokenPersistsAcrossFrames() throws { + let vd = VideoDetection() + let vnModel = try VNCoreMLModel(for: MLModel(contentsOf: compiledModelURL())) + vd.setModel(vnModel) + vd.setVideoOrientationForTesting(.up) + vd.setIdealFormatForTesting((width: 32, height: 32, type: kCVPixelFormatType_32BGRA)) + + var pixelBuffer: CVPixelBuffer? + CVPixelBufferCreate(nil, 32, 32, kCVPixelFormatType_32BGRA, nil, &pixelBuffer) + guard let pb = pixelBuffer else { return XCTFail("Failed to create pixel buffer") } + + let first = vd.detectPixelBufferForTesting(pb) + let second = vd.detectPixelBufferForTesting(pb) + + XCTAssertEqual(second.stateFrameCounter, first.stateFrameCounter + 1) + } + + func testStateCounterResetsOnModelChangeAndDisappearing() throws { + let sut = try makeStubVideoDetection() + sut.setVideoOrientationForTesting(.up) + var pb: CVPixelBuffer? + CVPixelBufferCreate(nil, 8, 8, kCVPixelFormatType_32BGRA, nil, &pb) + guard let buffer = pb else { return XCTFail("buffer missing") } + + _ = sut.detectPixelBufferForTesting(buffer) + _ = sut.detectPixelBufferForTesting(buffer) + XCTAssertEqual(sut.metricsForTesting().stateFrameCounter, 2) + + sut.setModel(nil) + XCTAssertEqual(sut.metricsForTesting().stateFrameCounter, 0) + + sut.setModel(try VNCoreMLModel(for: MLModel(contentsOf: compiledModelURL()))) + _ = sut.detectPixelBufferForTesting(buffer) + sut.disappearing() + XCTAssertEqual(sut.metricsForTesting().stateFrameCounter, 0) + } + + func testStatefulModelStateAdvancesAcrossFrames() throws { + let compiledURL = try FixtureBuilder.ensureStatefulModel() + let vnModel = try VNCoreMLModel(for: MLModel(contentsOf: compiledURL)) + let sut = VideoDetection() + sut.setModel(vnModel) + sut.setVideoOrientationForTesting(.up) + var pb: CVPixelBuffer? + CVPixelBufferCreate(nil, 1, 1, kCVPixelFormatType_32BGRA, nil, &pb) + guard let buffer = pb else { return XCTFail("buffer missing") } + + _ = sut.detectPixelBufferForTesting(buffer) + guard let first = Base.sharedLastStateValues?["state_out"]?.multiArrayValue else { + throw XCTSkip("state outputs not available on this platform") + } + _ = sut.detectPixelBufferForTesting(buffer) + guard let second = Base.sharedLastStateValues?["state_out"]?.multiArrayValue else { + throw XCTSkip("state outputs not available on this platform") + } + + XCTAssertLessThan(first[0].doubleValue, second[0].doubleValue) + } + + func testVideoOrientationPersistsDuringDetection() throws { + let model = try VNCoreMLModel(for: MLModel(contentsOf: compiledModelURL())) + let sut = VideoDetection() + sut.setModel(model) + sut.setVideoOrientationForTesting(.left) + var pb: CVPixelBuffer? + CVPixelBufferCreate(nil, 4, 4, kCVPixelFormatType_32BGRA, nil, &pb) + guard let buffer = pb else { return XCTFail("pixel buffer missing") } + + _ = sut.detectPixelBufferForTesting(buffer) + + XCTAssertEqual(VideoDetection.sharedLastVideoOrientation, .left) + let rect = Base().rectForNormalizedRect(normalizedRect: CGRect(x: 0.25, y: 0.25, width: 0.5, height: 0.25), width: 200, height: 100) + XCTAssertEqual(rect.origin.y, 50, accuracy: 0.1) // letterbox math still holds + } + + // MARK: - Helpers + private func compiledModelURL() throws -> URL { + let bundle = Bundle(for: type(of: self)) + if let compiledURL = bundle.url(forResource: "YOLOv3Tiny", withExtension: "mlmodelc") { + return compiledURL + } + guard let rawURL = bundle.url(forResource: "YOLOv3Tiny", withExtension: "mlmodel") else { + throw XCTSkip("YOLOv3Tiny model not present in test bundle") + } + return try MLModel.compileModel(at: rawURL) + } + + private func makeStubVideoDetection( + detectionTimeMs: Double = 12, + detectionFPS: String = "90", + objects: Int = 2 + ) throws -> StubVideoDetection { + let model = try VNCoreMLModel(for: MLModel(contentsOf: compiledModelURL())) + let sut = StubVideoDetection( + stubDetectionTimeMs: detectionTimeMs, + stubDetectionFPS: detectionFPS, + stubObjects: objects + ) + sut.setModel(model) + return sut + } +} diff --git a/CoreMLPlayerTests/VideoDetectionTestDoubles.swift b/CoreMLPlayerTests/VideoDetectionTestDoubles.swift new file mode 100644 index 0000000..e79d305 --- /dev/null +++ b/CoreMLPlayerTests/VideoDetectionTestDoubles.swift @@ -0,0 +1,66 @@ +import Foundation +import CoreVideo +import Vision +@testable import CoreML_Player + +/// Lightweight stub to avoid real Vision execution when testing scheduling and state. +class StubVideoDetection: VideoDetection { + private let stubDetectionTimeMs: Double + private let stubDetectionFPS: String + private let stubObjects: Int + private var cachedPixelBuffer: CVPixelBuffer? + + init(stubDetectionTimeMs: Double, stubDetectionFPS: String, stubObjects: Int) { + self.stubDetectionTimeMs = stubDetectionTimeMs + self.stubDetectionFPS = stubDetectionFPS + self.stubObjects = stubObjects + super.init() + } + + override func getPixelBuffer() -> CVPixelBuffer? { + if let cachedPixelBuffer { + return cachedPixelBuffer + } + var pb: CVPixelBuffer? + CVPixelBufferCreate(kCFAllocatorDefault, 4, 4, kCVPixelFormatType_32BGRA, nil, &pb) + cachedPixelBuffer = pb + return pb + } + + override func performObjectDetection( + pixelBuffer: CVPixelBuffer, + orientation: CGImagePropertyOrientation, + vnModel: VNCoreMLModel, + functionName: String? = nil, + cropAndScale: VNImageCropAndScaleOption = .scaleFill + ) -> detectionOutput { + Base.sharedLastFunctionName = functionName + return stubbedOutput() + } + + override func performObjectDetection( + requestHandler: VNImageRequestHandler, + vnModel: VNCoreMLModel, + functionName: String? = nil, + cropAndScale: VNImageCropAndScaleOption = .scaleFill + ) -> detectionOutput { + Base.sharedLastFunctionName = functionName + return stubbedOutput() + } + + private func stubbedOutput() -> detectionOutput { + let object = DetectedObject( + id: UUID(), + label: "stub", + confidence: "1.0", + otherLabels: [], + width: 0.1, + height: 0.1, + x: 0, + y: 0 + ) + let objects = Array(repeating: object, count: stubObjects) + let detTime = String(format: "%.0f ms", stubDetectionTimeMs) + return (objects, detTime, stubDetectionFPS) + } +} diff --git a/CoreMLPlayerTests/VideoDetectionTests.swift b/CoreMLPlayerTests/VideoDetectionTests.swift new file mode 100644 index 0000000..e5367b5 --- /dev/null +++ b/CoreMLPlayerTests/VideoDetectionTests.swift @@ -0,0 +1,58 @@ +import XCTest +@testable import CoreML_Player + +final class VideoDetectionTests: XCTestCase { + func testRepeatIntervalRespectsFrameRateAndLatency() { + let sut = VideoDetection() + sut.setVideoInfoForTesting((isPlayable: true, frameRate: 60, duration: .zero, size: .zero)) + let interval = sut.getRepeatInterval(false) + XCTAssertEqual(interval, 1 / 60) + } + + func testRepeatIntervalTrimsLastDetectionTime() { + let sut = VideoDetection() + sut.setVideoInfoForTesting((isPlayable: true, frameRate: 30, duration: .zero, size: .zero)) + sut.setLastDetectionTimeForTesting(0.01) + let interval = sut.getRepeatInterval() + XCTAssertLessThan(interval, 1 / 30) + XCTAssertGreaterThan(interval, 0) + } + + func testRepeatIntervalClampsToDetectionBudget() { + let sut = VideoDetection() + sut.setVideoInfoForTesting((isPlayable: true, frameRate: 30, duration: .zero, size: .zero)) + sut.setLastDetectionTimeForTesting(1.0) // detection took 1s previously + + let interval = sut.getRepeatInterval() + XCTAssertGreaterThanOrEqual(interval, 0.5) + XCTAssertLessThanOrEqual(interval, 1.1) + } + + func testRepeatIntervalClampsToMinimumWhenLastDetectionIsHigh() { + let sut = VideoDetection() + sut.setVideoInfoForTesting((isPlayable: true, frameRate: 30, duration: .zero, size: .zero)) + sut.setLastDetectionTimeForTesting(1.0) // 1s > frame interval + + let interval = sut.getRepeatInterval() + XCTAssertEqual(interval, 0.5, accuracy: 0.001) // clamped to half the prior detection time + } + + func testDisappearingClearsDetectionStats() { + let sut = VideoDetection() + DetectionStats.shared.addMultiple([Stats(key: "FPS", value: "10")]) + XCTAssertFalse(DetectionStats.shared.items.isEmpty) + + let expectation = expectation(description: "Detection stats cleared after disappearing") + sut.disappearing() + + DispatchQueue.main.asyncAfter(deadline: .now() + 0.15) { + if DetectionStats.shared.items.isEmpty { + expectation.fulfill() + } else { + XCTFail("Detection stats were not cleared") + } + } + + wait(for: [expectation], timeout: 0.5) + } +} diff --git a/CoreMLPlayerTests/VisionPipelineTests.swift b/CoreMLPlayerTests/VisionPipelineTests.swift new file mode 100644 index 0000000..8fd61ed --- /dev/null +++ b/CoreMLPlayerTests/VisionPipelineTests.swift @@ -0,0 +1,58 @@ +import XCTest +@testable import CoreML_Player +import Vision +import CoreGraphics +import AppKit + +final class VisionPipelineTests: XCTestCase { + func testDetectedObjectsConversion() { + let boundingBox = CGRect(x: 0.1, y: 0.2, width: 0.3, height: 0.4) + let label = VNClassificationObservation(identifier: "object", confidence: 0.75) + let observation = VNRecognizedObjectObservation(boundingBox: boundingBox) + observation.setValue([label], forKey: "labels") + let duration: Duration = .milliseconds(40) + + let sut = Base() + let output = sut.asDetectedObjects(visionObservationResults: [observation], detectionTime: duration) + + XCTAssertEqual(output.objects.first?.label, "object") + XCTAssertEqual(output.objects.first?.width, boundingBox.width) + XCTAssertEqual(output.detectionTime.contains("ms"), true) + XCTAssertEqual(output.detectionFPS, "25") + } + + func testRectForNormalizedRectFlipsYAxis() { + let sut = Base() + let normalized = CGRect(x: 0, y: 0, width: 0.5, height: 0.5) + let rect = sut.rectForNormalizedRect(normalizedRect: normalized, width: 200, height: 100) + + XCTAssertEqual(rect.origin.y, 50) + XCTAssertEqual(rect.size.width, 100) + } + + func testOverlayRectMatchesLetterboxedMath() { + let base = Base() + let normalized = CGRect(x: 0.25, y: 0.25, width: 0.5, height: 0.25) + let rect = base.rectForNormalizedRect(normalizedRect: normalized, width: 200, height: 100) + XCTAssertEqual(rect.width, 100, accuracy: 0.1) + XCTAssertEqual(rect.height, 25, accuracy: 0.1) + XCTAssertEqual(rect.origin.y, 50, accuracy: 0.1) + } +} + +extension VNClassificationObservation { + convenience init(identifier: String, confidence: VNConfidence) { + self.init() + setValue(identifier, forKey: "identifier") + setValue(confidence, forKey: "confidence") + } +} + +extension CGImage { + static var mockSquare: CGImage { + let context = CGContext(data: nil, width: 32, height: 32, bitsPerComponent: 8, bytesPerRow: 32 * 4, space: CGColorSpaceCreateDeviceRGB(), bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue)! + context.setFillColor(NSColor.red.cgColor) + context.fill(CGRect(x: 0, y: 0, width: 32, height: 32)) + return context.makeImage()! + } +} diff --git a/README.md b/README.md index 612efb9..89ca968 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ --- # Requirements -macOS 13.0+ +macOS 14.0+ Currently supports Object Detection and Classification models that can be used with Vision framework. diff --git a/TEST_PLAN.md b/TEST_PLAN.md new file mode 100644 index 0000000..2fc6aa3 --- /dev/null +++ b/TEST_PLAN.md @@ -0,0 +1,43 @@ +# CoreMLPlayer Test Plan + +This test plan focuses on preventing regressions from the planned Core ML/Vision pipeline changes (request reuse, error handling, orientation handling, async model loading, and pixel format tuning). It covers new unit and integration tests plus manual checks where automation is difficult. + +## 1. Test Targets and Fixtures +- **Add a `CoreMLPlayerTests` target** with XCTest. +- **Fixtures**: small sample video (landscape) and portrait stills for orientation checks; synthetic pixel buffers in `kCVPixelFormatType_32BGRA` and model-preferred formats; a lightweight mock `MLModel` and `VNCoreMLModel` wrapper for failure injection. +- **Environment**: ensure tests run on device and simulator; gate GPU-dependent tests with `XCTSkip` when `MTLDevice` is unavailable. + +## 2. Model Loading & Configuration +- **Async load success**: `MLModel.load(contentsOf:configuration:)` resolves and returns a non-nil model; configuration respects `computeUnits` and `allowLowPrecisionAccumulationOnGPU`. +- **Compilation fallback**: when passed an uncompiled model URL, verify the code compiles once and caches the compiled URL; subsequent loads reuse the compiled path. +- **Error surfacing**: inject an invalid model and assert the load path propagates errors (no silent failures). +- **Warm-up inference**: after loading, assert a single warm-up request executes without throwing and marks the model as ready for reuse. YOLOv3Tiny emits a benign console warning about missing `precisionRecallCurves` because it is not an updatable model; the test should tolerate this log as long as the request succeeds. + +## 3. Vision Request Lifecycle +- **Request reuse**: repeated video frames should reuse the same `VNCoreMLRequest` / `VNSequenceRequestHandler`; track allocations or identifiers to ensure no per-frame recreation. +- **Error handling**: simulate a Vision error (e.g., mismatched pixel buffer attributes) and assert errors are logged/propagated, not swallowed. +- **Crop/scale compliance**: validate `imageCropAndScaleOption` matches the model’s expected input; tests check `.centerCrop` (or configured option) is applied rather than a hard-coded default. + +## 4. Orientation & Pixel Buffer Handling +- **Orientation mapping**: feed landscape and portrait CMSampleBuffers and assert the resolved `CGImagePropertyOrientation` passed to Vision matches the video track orientation. +- **Pixel format selection**: when `idealFormat` is available, `AVPlayerItemVideoOutput` should be initialized with matching pixel buffer attributes; tests confirm the requested attributes and that fallback to BGRA occurs when unsupported. +- **Pixel buffer validity**: confirm the request handler rejects nil or stale pixel buffers and surfaces an error. + +## 5. Inference Output Validation +- **Deterministic outputs**: run inference on a fixed fixture image and assert detections (labels/bounding boxes) stay within a tolerance envelope to catch pre/post-processing regressions. +- **Performance budget**: use `measure` blocks to ensure per-frame inference time stays within the real-time threshold (e.g., <33ms on a supported device) after changes. +- **Max-FPS loop stability**: stress test with rapid frame delivery to ensure no memory growth (via allocations audit) and no dropped frames due to request recreation. + +## 6. UI/Integration Checks +- **Playback overlay**: snapshot tests for the detection overlay view to ensure bounding boxes render in correct orientation and aspect after crop/scale changes. +- **Lifecycle**: start/stop playback multiple times to verify requests/handlers are released and recreated cleanly; assert no crashes when the app re-enters foreground. +- **Error messaging**: inject model load failures and Vision errors and assert user-facing alerts or logs appear (and do not block main thread responsiveness). + +## 7. Manual Verification +- **On-device smoke test**: run the app on a physical device, confirm stable FPS, correct bounding boxes, and no UI hangs when toggling models or playback speed. +- **GPU/CPU toggle**: switch compute units (CPU-only vs. ANE/GPU) to observe consistency and ensure the app handles hardware differences gracefully. + +## 8. Automation Hooks +- Integrate tests into CI with separate schemes for unit/integration and UI snapshot runs. +- Capture performance baselines in CI artifacts for regression tracking. +- Provide toggles/env vars to skip GPU-reliant tests on unsupported runners.