From 5e4d8e45c32abca81a1a161f4aaf6b93e325676e Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Thu, 9 Jan 2025 11:28:24 -0700 Subject: [PATCH 01/20] Fixed SideprojectAccountsView --- .../Intramodular/Accounts/SideprojectAccountsView.swift | 6 ++++-- .../SideprojectCore/Intramodular/UI/_AccountPicker.swift | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Sources/SideprojectCore/Intramodular/Accounts/SideprojectAccountsView.swift b/Sources/SideprojectCore/Intramodular/Accounts/SideprojectAccountsView.swift index acc4a17..c944fe3 100644 --- a/Sources/SideprojectCore/Intramodular/Accounts/SideprojectAccountsView.swift +++ b/Sources/SideprojectCore/Intramodular/Accounts/SideprojectAccountsView.swift @@ -54,9 +54,9 @@ public struct SideprojectAccountsView: View { cellGrid .frame(minWidth: 126) - #if os(macOS) +#if os(macOS) PathControl(url: try! store.$accounts.url) - #endif +#endif } .padding() .environmentObject(store) @@ -136,9 +136,11 @@ public struct SideprojectAccountsView: View { store.accounts.append(account) } +#if os(macOS) ._overrideOnExitCommand { presentationManager.dismiss() } +#endif } } label: { Image(systemName: .plus) diff --git a/Sources/SideprojectCore/Intramodular/UI/_AccountPicker.swift b/Sources/SideprojectCore/Intramodular/UI/_AccountPicker.swift index 834314d..af02b03 100644 --- a/Sources/SideprojectCore/Intramodular/UI/_AccountPicker.swift +++ b/Sources/SideprojectCore/Intramodular/UI/_AccountPicker.swift @@ -37,9 +37,11 @@ public struct _AccountPicker: View { presentationMode.dismiss() } } + #if os(macOS) ._overrideOnExitCommand { presentationMode.dismiss() } + #endif } .frame(idealWidth: 448, idealHeight: 560) .background(Color.accountModalBackgroundColor.ignoresSafeArea()) From 436e5fc32372623edfc9c9dced4851e43e87e940 Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Thu, 9 Jan 2025 11:39:35 -0700 Subject: [PATCH 02/20] Edit account fix --- .../Intramodular/Accounts/SideprojectAccountsView.swift | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Sources/SideprojectCore/Intramodular/Accounts/SideprojectAccountsView.swift b/Sources/SideprojectCore/Intramodular/Accounts/SideprojectAccountsView.swift index c944fe3..cb70d18 100644 --- a/Sources/SideprojectCore/Intramodular/Accounts/SideprojectAccountsView.swift +++ b/Sources/SideprojectCore/Intramodular/Accounts/SideprojectAccountsView.swift @@ -85,9 +85,11 @@ public struct SideprojectAccountsView: View { presentationManager.dismiss() } + #if os(macOS) ._overrideOnExitCommand { presentationManager.dismiss() } + #endif } } label: { Cell(account: $account._assigningLogicalParent(store, to: \.$store)) From aa18be1b113f43f068e18411e4d2e56177a55d05 Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Fri, 10 Jan 2025 13:04:47 -0700 Subject: [PATCH 03/20] MediaGenerationView (not working) --- .../MediaGenerationView.swift | 452 ++++++++++++++++++ 1 file changed, 452 insertions(+) create mode 100644 Sources/Sideproject/Intramodular/Media Generation/MediaGenerationView.swift diff --git a/Sources/Sideproject/Intramodular/Media Generation/MediaGenerationView.swift b/Sources/Sideproject/Intramodular/Media Generation/MediaGenerationView.swift new file mode 100644 index 0000000..5191e88 --- /dev/null +++ b/Sources/Sideproject/Intramodular/Media Generation/MediaGenerationView.swift @@ -0,0 +1,452 @@ +// +// MediaGenerationView.swift +// Sideproject +// +// Created by Jared Davidson on 1/10/25. +// + +import SwiftUI +import ElevenLabs +import SwallowUI +import Media +import AVFoundation +import AI + +public enum MediaType { + case speech + case video +} + +public enum InputModality: String { + case text + case audio + case image + case video + + var description: String { + rawValue.capitalized + } +} + +// MARK: - Main View +struct GenerateMediaView: View { + public struct Configuration: Hashable { + public var textToSpeechModel: String + public var speechToSpeechModel: String + public var voiceSettings: AbstractVoiceSettings + public var videoSettings: VideoGenerationSettings + public var speechClient: (any SpeechSynthesisRequestHandling)? + public var videoClient: (any VideoGenerationRequestHandling)? + + public init( + textToSpeechModel: String = ElevenLabs.Model.EnglishV1.rawValue, + speechToSpeechModel: String = ElevenLabs.Model.EnglishSTSV2.rawValue, + voiceSettings: AbstractVoiceSettings = .init(), + videoSettings: VideoGenerationSettings = .init() + ) { + self.textToSpeechModel = textToSpeechModel + self.speechToSpeechModel = speechToSpeechModel + self.voiceSettings = voiceSettings + self.videoSettings = videoSettings + } + } + + private let mediaType: MediaType + private let inputModality: InputModality + private var configuration: Configuration + private let onComplete: ((AnyMediaFile) -> Void)? + + @StateObject private var viewModel: GenerationViewModel + + init( + mediaType: MediaType, + inputModality: InputModality, + configuration: Configuration = .init(), + onComplete: ((AnyMediaFile) -> Void)? = nil + ) { + self.mediaType = mediaType + self.inputModality = inputModality + self.configuration = configuration + self.onComplete = onComplete + + let viewModel = GenerationViewModel( + mediaType: mediaType, + inputModality: inputModality, + configuration: configuration, + onComplete: onComplete + ) + _viewModel = StateObject(wrappedValue: viewModel) + } + + var body: some View { + VStack(alignment: .leading, spacing: 20) { + if viewModel.isLoadingResources { + ResourceLoadingView() + } else if let error = viewModel.loadingError { + ResourceErrorView(error: error) { + Task { await viewModel.loadResources() } + } + } else { + GenerationContentView(viewModel: viewModel) + } + } + .padding() + .task { + await viewModel.loadResources() + } + } +} + +// MARK: - View Model +final class GenerationViewModel: ObservableObject { + @Published var availableVoices: [AbstractVoice] = [] + @Published var availableModels: [VideoModel] = [] + @Published var isLoadingResources = false + @Published var loadingError: Error? + @Published var inputText = "" + @Published var isLoading = false + @Published var showingPreview = false + @Published var selectedVoice: AbstractVoice.ID? + @Published var selectedAudioFile: AudioFile? + @Published var generatedAudioFile: AudioFile? + @Published var selectedVideoModel: VideoModel.ID? + @Published var selectedImage: URL? + @Published var selectedVideo: URL? + @Published var generatedVideoFile: VideoFile? + @Published var generatedFiles: [AnyMediaFile] = [] + + private let mediaType: MediaType + private let inputModality: InputModality + private var configuration: Configuration + private let onComplete: ((AnyMediaFile) -> Void)? + + init( + mediaType: MediaType, + inputModality: InputModality, + configuration: GenerateMediaView.Configuration, + onComplete: ((AnyMediaFile) -> Void)? + ) { + self.mediaType = mediaType + self.inputModality = inputModality + self.configuration = configuration + self.onComplete = onComplete + } + + @MainActor + internal func loadResources() async { + isLoadingResources = true + loadingError = nil + + do { + switch mediaType { + case .speech: + availableVoices = try await configuration.speechClient?.availableVoices() ?? [] + configuration.voiceSettings = .init() + + case .video: + availableModels = try await configuration.videoClient?.availableModels() ?? [] + configuration.videoSettings = .init() + } + } catch { + loadingError = error + } + + isLoadingResources = false + } + + @MainActor + private func generate() async { + isLoading = true + defer { isLoading = false } + + do { + switch mediaType { + case .speech: + try await generateSpeech() + case .video: + try await generateVideo() + } + } catch { + print("Error generating media: \(error)") + } + } + + @MainActor + private func generateSpeech() async throws { + guard let speechClient = configuration.speechClient else { + throw GenerationError.clientNotAvailable + } + + let audioData: Data? + + switch inputModality { + case .audio: + audioData = try await convertSpeechToSpeech() + case .text: + audioData = try await convertTextToSpeech() + default: + return + } + + guard let audioData = audioData else { return } + + let name = switch inputModality { + case .audio: selectedAudioFile?.name ?? "Converted Audio" + case .text: inputText.prefix(30).trimmingCharacters(in: .whitespacesAndNewlines) + default: "Generated Audio" + } + + let temporaryURL = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString) + .appendingPathExtension("m4a") + + try audioData.write(to: temporaryURL) + + let audioFile = try await AudioFile(url: temporaryURL) + generatedAudioFile = audioFile + + if let audioFile = generatedAudioFile { + let mediaFile = AnyMediaFile(audioFile) + if let onComplete = onComplete { + onComplete(mediaFile) + } else { + generatedFiles.append(mediaFile) + } + } + } + + @MainActor + private func generateVideo() async throws { + guard let videoClient = configuration.videoClient else { + throw GenerationError.clientNotAvailable + } + + guard let modelID = selectedVideoModel, + let model = availableModels.first(where: { $0.id == modelID }) else { return } + + let videoData: Data? + + switch inputModality { + case .text: + videoData = try await videoClient.textToVideo( + text: inputText, + model: model, + settings: configuration.videoSettings + ) + case .image: + guard let imageURL = selectedImage else { return } + videoData = try await videoClient.imageToVideo( + imageURL: imageURL, + model: model, + settings: configuration.videoSettings + ) + case .video: + guard let videoURL = selectedVideo else { return } + videoData = try await videoClient.videoToVideo( + videoURL: videoURL, + prompt: inputText, + model: model, + settings: configuration.videoSettings + ) + default: + return + } + + guard let videoData = videoData else { + throw GenerationError.invalidVideoData + } + + let temporaryURL = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString) + .appendingPathExtension("mp4") + + try videoData.write(to: temporaryURL) + + let videoFile = try await VideoFile(url: temporaryURL) + generatedVideoFile = videoFile + + let mediaFile = AnyMediaFile(videoFile) + if let onComplete = onComplete { + onComplete(mediaFile) + } else { + generatedFiles.append(mediaFile) + } + + showingPreview = true + } + + @MainActor + private func convertSpeechToSpeech() async throws -> Data? { + guard let voiceID = selectedVoice, + let voice = availableVoices.first(where: { $0.id == voiceID }), + let audioFile = selectedAudioFile else { + return nil + } + + return try await configuration.speechClient?.speechToSpeech( + inputAudioURL: audioFile.url, + voiceID: voice.voiceID, + voiceSettings: configuration.voiceSettings, + model: configuration.speechToSpeechModel + ) + } + + @MainActor + private func convertTextToSpeech() async throws -> Data? { + guard let voiceID = selectedVoice, + !inputText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else { + return nil + } + + return try await configuration.speechClient?.speech( + for: inputText, + voiceID: voiceID.id.rawValue, + voiceSettings: configuration.voiceSettings, + model: configuration.textToSpeechModel + ) + } +} + +// MARK: - Content Views +struct GenerationContentView: View { + @ObservedObject var viewModel: GenerationViewModel + + var body: some View { + VStack(alignment: .leading, spacing: 20) { + HeaderView(mediaType: viewModel.mediaType) + + InputView(viewModel: viewModel) + + ModelSelectionView(viewModel: viewModel) + + if case .video = viewModel.mediaType, case .video = viewModel.inputModality { + PromptInputView(inputText: $viewModel.inputText) + } + + ControlsView(viewModel: viewModel) + + if viewModel.onComplete == nil { + GeneratedFilesView(files: viewModel.generatedFiles) + } + } + } +} + +// MARK: - Supporting Views +struct HeaderView: View { + let mediaType: MediaType + + var body: some View { + VStack(alignment: .leading) { + Text(mediaType == .speech ? "Speech Synthesis" : "Video Generation") + .font(.title) + + Text(mediaType == .speech ? + "Generate realistic speech using advanced AI technology" : + "Create stunning videos using AI") + .foregroundColor(.secondary) + } + } +} + +struct InputView: View { + @ObservedObject var viewModel: GenerationViewModel + + var body: some View { + Group { + switch viewModel.inputModality { + case .text: + TextInputView(text: $viewModel.inputText) + case .audio, .image, .video: + MediaInputView(viewModel: viewModel) + } + } + } +} + +struct TextInputView: View { + @Binding var text: String + + var body: some View { + TextEditor(text: $text) + .frame(height: 100) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(Color.gray.opacity(0.2)) + ) + .overlay( + Group { + if text.isEmpty { + Text("Enter your text here...") + .foregroundColor(.gray) + .padding(.leading, 4) + } + }, + alignment: .topLeading + ) + } +} + +struct MediaInputView: View { + @ObservedObject var viewModel: GenerationViewModel + + var body: some View { + FileDropView { files in + switch viewModel.inputModality { + case .audio: + viewModel.selectedAudioFile = files.first?.audioFile + case .image: + viewModel.selectedImage = files.first?.imageFile + case .video: + viewModel.selectedVideo = files.first?.videoFile + default: + break + } + } content: { files in + if !files.isEmpty { + MediaFileListView(files: files) + } + } + } +} + +struct ResourceLoadingView: View { + var body: some View { + VStack { + ProgressView() + Text("Loading resources...") + .foregroundStyle(.secondary) + } + .frame(maxWidth: .infinity, maxHeight: .infinity) + } +} + +struct ResourceErrorView: View { + let error: Error + let retryAction: () -> Void + + var body: some View { + VStack(spacing: 12) { + Image(systemName: "exclamationmark.triangle") + .font(.largeTitle) + .foregroundColor(.red) + + Text("Failed to load resources") + .font(.headline) + + Text(error.localizedDescription) + .font(.caption) + .foregroundStyle(.secondary) + + Button("Try Again", action: retryAction) + .buttonStyle(.bordered) + } + .frame(maxWidth: .infinity, maxHeight: .infinity) + } +} + +enum GenerationError: Error { + case invalidVideoData + case clientNotAvailable + case resourceLoadingFailed +} From 081662623db36aeeff518cf1e0b5c443acf53995 Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Fri, 10 Jan 2025 13:32:17 -0700 Subject: [PATCH 04/20] Building again --- .../MediaGenerationView.swift | 173 ++++++++++++++++-- 1 file changed, 156 insertions(+), 17 deletions(-) diff --git a/Sources/Sideproject/Intramodular/Media Generation/MediaGenerationView.swift b/Sources/Sideproject/Intramodular/Media Generation/MediaGenerationView.swift index 5191e88..e2df5cb 100644 --- a/Sources/Sideproject/Intramodular/Media Generation/MediaGenerationView.swift +++ b/Sources/Sideproject/Intramodular/Media Generation/MediaGenerationView.swift @@ -30,7 +30,12 @@ public enum InputModality: String { // MARK: - Main View struct GenerateMediaView: View { - public struct Configuration: Hashable { + public struct Configuration: Equatable { + public static func == (lhs: GenerateMediaView.Configuration, rhs: GenerateMediaView.Configuration) -> Bool { + return lhs.textToSpeechModel == rhs.textToSpeechModel && + lhs.speechToSpeechModel == rhs.speechToSpeechModel + } + public var textToSpeechModel: String public var speechToSpeechModel: String public var voiceSettings: AbstractVoiceSettings @@ -99,26 +104,26 @@ struct GenerateMediaView: View { // MARK: - View Model final class GenerationViewModel: ObservableObject { - @Published var availableVoices: [AbstractVoice] = [] + @Published var availableVoices: [ElevenLabs.Voice] = [] @Published var availableModels: [VideoModel] = [] @Published var isLoadingResources = false @Published var loadingError: Error? @Published var inputText = "" @Published var isLoading = false @Published var showingPreview = false - @Published var selectedVoice: AbstractVoice.ID? + @Published var selectedVoice: ElevenLabs.Voice.ID? @Published var selectedAudioFile: AudioFile? @Published var generatedAudioFile: AudioFile? @Published var selectedVideoModel: VideoModel.ID? - @Published var selectedImage: URL? - @Published var selectedVideo: URL? + @Published var selectedImage: ImageFile? + @Published var selectedVideo: VideoFile? @Published var generatedVideoFile: VideoFile? @Published var generatedFiles: [AnyMediaFile] = [] - private let mediaType: MediaType - private let inputModality: InputModality - private var configuration: Configuration - private let onComplete: ((AnyMediaFile) -> Void)? + internal let mediaType: MediaType + internal let inputModality: InputModality + internal var configuration: GenerateMediaView.Configuration + internal let onComplete: ((AnyMediaFile) -> Void)? init( mediaType: MediaType, @@ -155,7 +160,7 @@ final class GenerationViewModel: ObservableObject { } @MainActor - private func generate() async { + internal func generate() async { isLoading = true defer { isLoading = false } @@ -234,14 +239,14 @@ final class GenerationViewModel: ObservableObject { settings: configuration.videoSettings ) case .image: - guard let imageURL = selectedImage else { return } + guard let imageURL = selectedImage?.url else { return } videoData = try await videoClient.imageToVideo( imageURL: imageURL, model: model, settings: configuration.videoSettings ) case .video: - guard let videoURL = selectedVideo else { return } + guard let videoURL = selectedVideo?.url else { return } videoData = try await videoClient.videoToVideo( videoURL: videoURL, prompt: inputText, @@ -286,8 +291,8 @@ final class GenerationViewModel: ObservableObject { return try await configuration.speechClient?.speechToSpeech( inputAudioURL: audioFile.url, voiceID: voice.voiceID, - voiceSettings: configuration.voiceSettings, - model: configuration.speechToSpeechModel + voiceSettings: ElevenLabs.VoiceSettings(settings: configuration.voiceSettings), + model: .init(rawValue: configuration.speechToSpeechModel)! // FIXME: - Will Crash ) } @@ -301,8 +306,8 @@ final class GenerationViewModel: ObservableObject { return try await configuration.speechClient?.speech( for: inputText, voiceID: voiceID.id.rawValue, - voiceSettings: configuration.voiceSettings, - model: configuration.textToSpeechModel + voiceSettings: ElevenLabs.VoiceSettings(settings: configuration.voiceSettings), //FIXME: - This should just accept AbstractVoiceSettings + model: .init(rawValue: configuration.textToSpeechModel)! // FIXME: - Will Crash ) } } @@ -404,7 +409,7 @@ struct MediaInputView: View { } } content: { files in if !files.isEmpty { - MediaFileListView(files: files) + MediaFileListView(files) } } } @@ -450,3 +455,137 @@ enum GenerationError: Error { case clientNotAvailable case resourceLoadingFailed } + + +// MARK: - Model Selection View +struct ModelSelectionView: View { + @ObservedObject var viewModel: GenerationViewModel + + var body: some View { + VStack(alignment: .leading, spacing: 8) { + switch viewModel.mediaType { + case .speech: + if !viewModel.availableVoices.isEmpty { + Text("Select Voice") + .font(.headline) + + Picker("Voice", selection: $viewModel.selectedVoice) { + Text("Select a voice").tag(Optional.none) + ForEach(viewModel.availableVoices) { voice in + Text(voice.name) + .tag(Optional(voice.id)) + } + } + } + + case .video: + if !viewModel.availableModels.isEmpty { + Text("Select Model") + .font(.headline) + + Picker("Model", selection: $viewModel.selectedVideoModel) { + Text("Select a model").tag(Optional.none) + ForEach(viewModel.availableModels) { model in + Text(model.name) + .tag(Optional(model.id)) + } + } + } + } + } + } +} + +// MARK: - Prompt Input View +struct PromptInputView: View { + @Binding var inputText: String + + var body: some View { + VStack(alignment: .leading, spacing: 8) { + Text("Enter Prompt") + .font(.headline) + + TextEditor(text: $inputText) + .frame(height: 100) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(Color.gray.opacity(0.2)) + ) + .overlay( + Group { + if inputText.isEmpty { + Text("Describe how you want to transform the video...") + .foregroundColor(.gray) + .padding(.leading, 4) + } + }, + alignment: .topLeading + ) + } + } +} + +// MARK: - Controls View +struct ControlsView: View { + @ObservedObject var viewModel: GenerationViewModel + + var body: some View { + VStack(spacing: 12) { + Button { + Task { + await viewModel.generate() + } + } label: { + if viewModel.isLoading { + ProgressView() + .progressViewStyle(.circular) + } else { + Text("Generate") + } + } + .buttonStyle(.borderedProminent) + .disabled(viewModel.isLoading || !isGenerateEnabled) + } + } + + private var isGenerateEnabled: Bool { + switch viewModel.mediaType { + case .speech: + switch viewModel.inputModality { + case .text: + return !viewModel.inputText.isEmpty && viewModel.selectedVoice != nil + case .audio: + return viewModel.selectedAudioFile != nil && viewModel.selectedVoice != nil + default: + return false + } + case .video: + switch viewModel.inputModality { + case .text: + return !viewModel.inputText.isEmpty && viewModel.selectedVideoModel != nil + case .image: + return viewModel.selectedImage != nil && viewModel.selectedVideoModel != nil + case .video: + return viewModel.selectedVideo != nil && viewModel.selectedVideoModel != nil + default: + return false + } + } + } +} + +// MARK: - Generated Files View +struct GeneratedFilesView: View { + let files: [AnyMediaFile] + + var body: some View { + if !files.isEmpty { + VStack(alignment: .leading, spacing: 12) { + Text("Generated Files") + .font(.headline) + + MediaFileListView(files) + } + } + } +} From 7a31fc978d62fc74dbc1acbccf8e7e881b638db4 Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Fri, 10 Jan 2025 17:31:41 -0700 Subject: [PATCH 05/20] MediaGenerationView (working) --- .../MediaGenerationView.swift | 409 +++++------------- .../MediaGenerationViewActor.swift | 232 ++++++++++ .../Intramodular/Sideproject.swift | 2 +- 3 files changed, 349 insertions(+), 294 deletions(-) create mode 100644 Sources/Sideproject/Intramodular/Media Generation/MediaGenerationViewActor.swift diff --git a/Sources/Sideproject/Intramodular/Media Generation/MediaGenerationView.swift b/Sources/Sideproject/Intramodular/Media Generation/MediaGenerationView.swift index e2df5cb..f388264 100644 --- a/Sources/Sideproject/Intramodular/Media Generation/MediaGenerationView.swift +++ b/Sources/Sideproject/Intramodular/Media Generation/MediaGenerationView.swift @@ -10,7 +10,9 @@ import ElevenLabs import SwallowUI import Media import AVFoundation +import SideprojectCore import AI +import Runtime public enum MediaType { case speech @@ -28,10 +30,9 @@ public enum InputModality: String { } } -// MARK: - Main View -struct GenerateMediaView: View { +public struct MediaGenerationView: View { public struct Configuration: Equatable { - public static func == (lhs: GenerateMediaView.Configuration, rhs: GenerateMediaView.Configuration) -> Bool { + public static func == (lhs: MediaGenerationView.Configuration, rhs: MediaGenerationView.Configuration) -> Bool { return lhs.textToSpeechModel == rhs.textToSpeechModel && lhs.speechToSpeechModel == rhs.speechToSpeechModel } @@ -40,8 +41,6 @@ struct GenerateMediaView: View { public var speechToSpeechModel: String public var voiceSettings: AbstractVoiceSettings public var videoSettings: VideoGenerationSettings - public var speechClient: (any SpeechSynthesisRequestHandling)? - public var videoClient: (any VideoGenerationRequestHandling)? public init( textToSpeechModel: String = ElevenLabs.Model.EnglishV1.rawValue, @@ -63,7 +62,10 @@ struct GenerateMediaView: View { @StateObject private var viewModel: GenerationViewModel - init( + @State var speechClient: AnySpeechSynthesisRequestHandling? = nil + @State var videoClient: AnyVideoGenerationRequestHandling? = nil + + public init( mediaType: MediaType, inputModality: InputModality, configuration: Configuration = .init(), @@ -83,273 +85,94 @@ struct GenerateMediaView: View { _viewModel = StateObject(wrappedValue: viewModel) } - var body: some View { + public var body: some View { VStack(alignment: .leading, spacing: 20) { - if viewModel.isLoadingResources { - ResourceLoadingView() - } else if let error = viewModel.loadingError { - ResourceErrorView(error: error) { - Task { await viewModel.loadResources() } - } - } else { - GenerationContentView(viewModel: viewModel) + InputView(viewModel: viewModel) + + ClientSelectionView( + mediaType: mediaType, + viewModel: viewModel + ) + + ModelSelectionView(viewModel: viewModel) + + if case .video = viewModel.mediaType, case .video = viewModel.inputModality { + PromptInputView(inputText: $viewModel.inputText) + } + + ControlsView(viewModel: viewModel) + + if viewModel.onComplete == nil { + GeneratedFilesView(files: viewModel.generatedFiles) } } .padding() .task { - await viewModel.loadResources() - } - } -} - -// MARK: - View Model -final class GenerationViewModel: ObservableObject { - @Published var availableVoices: [ElevenLabs.Voice] = [] - @Published var availableModels: [VideoModel] = [] - @Published var isLoadingResources = false - @Published var loadingError: Error? - @Published var inputText = "" - @Published var isLoading = false - @Published var showingPreview = false - @Published var selectedVoice: ElevenLabs.Voice.ID? - @Published var selectedAudioFile: AudioFile? - @Published var generatedAudioFile: AudioFile? - @Published var selectedVideoModel: VideoModel.ID? - @Published var selectedImage: ImageFile? - @Published var selectedVideo: VideoFile? - @Published var generatedVideoFile: VideoFile? - @Published var generatedFiles: [AnyMediaFile] = [] - - internal let mediaType: MediaType - internal let inputModality: InputModality - internal var configuration: GenerateMediaView.Configuration - internal let onComplete: ((AnyMediaFile) -> Void)? - - init( - mediaType: MediaType, - inputModality: InputModality, - configuration: GenerateMediaView.Configuration, - onComplete: ((AnyMediaFile) -> Void)? - ) { - self.mediaType = mediaType - self.inputModality = inputModality - self.configuration = configuration - self.onComplete = onComplete - } - - @MainActor - internal func loadResources() async { - isLoadingResources = true - loadingError = nil - - do { - switch mediaType { - case .speech: - availableVoices = try await configuration.speechClient?.availableVoices() ?? [] - configuration.voiceSettings = .init() - - case .video: - availableModels = try await configuration.videoClient?.availableModels() ?? [] - configuration.videoSettings = .init() + Task { + await loadClients() + await viewModel.loadResources( + viewModel.speechClient?.base(), + viewModel.videoClient?.base() + ) } - } catch { - loadingError = error } - - isLoadingResources = false } - @MainActor - internal func generate() async { - isLoading = true - defer { isLoading = false } - + private func loadClients() async { do { - switch mediaType { - case .speech: - try await generateSpeech() - case .video: - try await generateVideo() - } - } catch { - print("Error generating media: \(error)") - } - } - - @MainActor - private func generateSpeech() async throws { - guard let speechClient = configuration.speechClient else { - throw GenerationError.clientNotAvailable - } - - let audioData: Data? - - switch inputModality { - case .audio: - audioData = try await convertSpeechToSpeech() - case .text: - audioData = try await convertTextToSpeech() - default: - return - } - - guard let audioData = audioData else { return } - - let name = switch inputModality { - case .audio: selectedAudioFile?.name ?? "Converted Audio" - case .text: inputText.prefix(30).trimmingCharacters(in: .whitespacesAndNewlines) - default: "Generated Audio" - } - - let temporaryURL = FileManager.default.temporaryDirectory - .appendingPathComponent(UUID().uuidString) - .appendingPathExtension("m4a") - - try audioData.write(to: temporaryURL) - - let audioFile = try await AudioFile(url: temporaryURL) - generatedAudioFile = audioFile - - if let audioFile = generatedAudioFile { - let mediaFile = AnyMediaFile(audioFile) - if let onComplete = onComplete { - onComplete(mediaFile) - } else { - generatedFiles.append(mediaFile) - } - } - } - - @MainActor - private func generateVideo() async throws { - guard let videoClient = configuration.videoClient else { - throw GenerationError.clientNotAvailable - } - - guard let modelID = selectedVideoModel, - let model = availableModels.first(where: { $0.id == modelID }) else { return } - - let videoData: Data? - - switch inputModality { - case .text: - videoData = try await videoClient.textToVideo( - text: inputText, - model: model, - settings: configuration.videoSettings - ) - case .image: - guard let imageURL = selectedImage?.url else { return } - videoData = try await videoClient.imageToVideo( - imageURL: imageURL, - model: model, - settings: configuration.videoSettings - ) - case .video: - guard let videoURL = selectedVideo?.url else { return } - videoData = try await videoClient.videoToVideo( - videoURL: videoURL, - prompt: inputText, - model: model, - settings: configuration.videoSettings - ) - default: - return - } - - guard let videoData = videoData else { - throw GenerationError.invalidVideoData - } - - let temporaryURL = FileManager.default.temporaryDirectory - .appendingPathComponent(UUID().uuidString) - .appendingPathExtension("mp4") - - try videoData.write(to: temporaryURL) - - let videoFile = try await VideoFile(url: temporaryURL) - generatedVideoFile = videoFile - - let mediaFile = AnyMediaFile(videoFile) - if let onComplete = onComplete { - onComplete(mediaFile) - } else { - generatedFiles.append(mediaFile) - } - - showingPreview = true - } - - @MainActor - private func convertSpeechToSpeech() async throws -> Data? { - guard let voiceID = selectedVoice, - let voice = availableVoices.first(where: { $0.id == voiceID }), - let audioFile = selectedAudioFile else { - return nil - } - - return try await configuration.speechClient?.speechToSpeech( - inputAudioURL: audioFile.url, - voiceID: voice.voiceID, - voiceSettings: ElevenLabs.VoiceSettings(settings: configuration.voiceSettings), - model: .init(rawValue: configuration.speechToSpeechModel)! // FIXME: - Will Crash - ) - } - - @MainActor - private func convertTextToSpeech() async throws -> Data? { - guard let voiceID = selectedVoice, - !inputText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else { - return nil - } - - return try await configuration.speechClient?.speech( - for: inputText, - voiceID: voiceID.id.rawValue, - voiceSettings: ElevenLabs.VoiceSettings(settings: configuration.voiceSettings), //FIXME: - This should just accept AbstractVoiceSettings - model: .init(rawValue: configuration.textToSpeechModel)! // FIXME: - Will Crash - ) - } -} - -// MARK: - Content Views -struct GenerationContentView: View { - @ObservedObject var viewModel: GenerationViewModel - - var body: some View { - VStack(alignment: .leading, spacing: 20) { - HeaderView(mediaType: viewModel.mediaType) - - InputView(viewModel: viewModel) + let services = try await Sideproject.shared.services - ModelSelectionView(viewModel: viewModel) + print(services) - if case .video = viewModel.mediaType, case .video = viewModel.inputModality { - PromptInputView(inputText: $viewModel.inputText) + self.viewModel.availableSpeechClients = services.compactMap { service in + let originalService = service + if let client = service as? (any SpeechSynthesisRequestHandling) { + return AnySpeechSynthesisRequestHandling(client, service: originalService) + } + return nil } - ControlsView(viewModel: viewModel) - - if viewModel.onComplete == nil { - GeneratedFilesView(files: viewModel.generatedFiles) + self.viewModel.availableVideoClients = services.compactMap { service in + let originalService = service + + if let client = service as? (any VideoGenerationRequestHandling) { + return AnyVideoGenerationRequestHandling(client, service: originalService) + } + return nil } + + self.viewModel.speechClient = self.viewModel.availableSpeechClients.first + self.viewModel.videoClient = self.viewModel.availableVideoClients.first + } catch { + print("Error loading clients: \(error)") } } } -// MARK: - Supporting Views -struct HeaderView: View { +struct ClientSelectionView: View { let mediaType: MediaType + @ObservedObject var viewModel: GenerationViewModel + var body: some View { - VStack(alignment: .leading) { - Text(mediaType == .speech ? "Speech Synthesis" : "Video Generation") - .font(.title) - - Text(mediaType == .speech ? - "Generate realistic speech using advanced AI technology" : - "Create stunning videos using AI") - .foregroundColor(.secondary) + VStack(alignment: .leading, spacing: 20) { + if mediaType == .speech { + Picker("Select Speech Client", selection: $viewModel.speechClient) { + ForEach(viewModel.availableSpeechClients, id: \.self) { client in + Text("Speech Client \(client.hashValue)") // Customize this display + .tag(client as AnySpeechSynthesisRequestHandling?) + } + } + .pickerStyle(MenuPickerStyle()) + } else if mediaType == .video { + Picker("Select Video Client", selection: $viewModel.videoClient) { + ForEach(viewModel.availableVideoClients, id: \.self) { client in + Text("Video Client \(client.hashValue)") // Customize this display + .tag(client as AnyVideoGenerationRequestHandling?) + } + } + .pickerStyle(MenuPickerStyle()) + } } } } @@ -464,33 +287,27 @@ struct ModelSelectionView: View { var body: some View { VStack(alignment: .leading, spacing: 8) { switch viewModel.mediaType { - case .speech: - if !viewModel.availableVoices.isEmpty { - Text("Select Voice") - .font(.headline) - - Picker("Voice", selection: $viewModel.selectedVoice) { - Text("Select a voice").tag(Optional.none) - ForEach(viewModel.availableVoices) { voice in - Text(voice.name) - .tag(Optional(voice.id)) + case .speech: + if !viewModel.availableVoices.isEmpty { + Picker("Voice", selection: $viewModel.selectedVoice) { + Text("Select a voice").tag(Optional.none) + ForEach(viewModel.availableVoices) { voice in + Text(voice.name) + .tag(Optional(voice.id)) + } } } - } - - case .video: - if !viewModel.availableModels.isEmpty { - Text("Select Model") - .font(.headline) - Picker("Model", selection: $viewModel.selectedVideoModel) { - Text("Select a model").tag(Optional.none) - ForEach(viewModel.availableModels) { model in - Text(model.name) - .tag(Optional(model.id)) + case .video: + if !viewModel.availableModels.isEmpty { + Picker("Model", selection: $viewModel.selectedVideoModel) { + Text("Select a model").tag(Optional.none) + ForEach(viewModel.availableModels) { model in + Text(model.name) + .tag(Optional(model.id)) + } } } - } } } } @@ -529,11 +346,17 @@ struct PromptInputView: View { struct ControlsView: View { @ObservedObject var viewModel: GenerationViewModel + @Environment(\.speechSynthesizer) var speechClient + @Environment(\.videoClient) var videoClient + var body: some View { VStack(spacing: 12) { Button { Task { - await viewModel.generate() + await viewModel.generate( + speechClient, + videoClient + ) } } label: { if viewModel.isLoading { @@ -550,26 +373,26 @@ struct ControlsView: View { private var isGenerateEnabled: Bool { switch viewModel.mediaType { - case .speech: - switch viewModel.inputModality { - case .text: - return !viewModel.inputText.isEmpty && viewModel.selectedVoice != nil - case .audio: - return viewModel.selectedAudioFile != nil && viewModel.selectedVoice != nil - default: - return false - } - case .video: - switch viewModel.inputModality { - case .text: - return !viewModel.inputText.isEmpty && viewModel.selectedVideoModel != nil - case .image: - return viewModel.selectedImage != nil && viewModel.selectedVideoModel != nil + case .speech: + switch viewModel.inputModality { + case .text: + return !viewModel.inputText.isEmpty && viewModel.selectedVoice != nil + case .audio: + return viewModel.selectedAudioFile != nil && viewModel.selectedVoice != nil + default: + return false + } case .video: - return viewModel.selectedVideo != nil && viewModel.selectedVideoModel != nil - default: - return false - } + switch viewModel.inputModality { + case .text: + return !viewModel.inputText.isEmpty && viewModel.selectedVideoModel != nil + case .image: + return viewModel.selectedImage != nil && viewModel.selectedVideoModel != nil + case .video: + return viewModel.selectedVideo != nil && viewModel.selectedVideoModel != nil + default: + return false + } } } } diff --git a/Sources/Sideproject/Intramodular/Media Generation/MediaGenerationViewActor.swift b/Sources/Sideproject/Intramodular/Media Generation/MediaGenerationViewActor.swift new file mode 100644 index 0000000..9adccdb --- /dev/null +++ b/Sources/Sideproject/Intramodular/Media Generation/MediaGenerationViewActor.swift @@ -0,0 +1,232 @@ +// +// MediaGenerationViewActor.swift +// Sideproject +// +// Created by Jared Davidson on 1/10/25. +// + +import AI +import Media +import SwiftUI +import ElevenLabs + +final class GenerationViewModel: ObservableObject { + @Published var availableVoices: [ElevenLabs.Voice] = [] + @Published var availableModels: [VideoModel] = [] + @Published var isLoadingResources = false + @Published var loadingError: Error? + @Published var inputText = "" + @Published var isLoading = false + @Published var showingPreview = false + @Published var selectedVoice: ElevenLabs.Voice.ID? + @Published var selectedAudioFile: AudioFile? + @Published var generatedAudioFile: AudioFile? + @Published var selectedVideoModel: VideoModel.ID? + @Published var selectedImage: ImageFile? + @Published var selectedVideo: VideoFile? + @Published var generatedVideoFile: VideoFile? + @Published var generatedFiles: [AnyMediaFile] = [] + @Published var speechClient: AnySpeechSynthesisRequestHandling? + @Published var videoClient: AnyVideoGenerationRequestHandling? + @Published var availableSpeechClients: [AnySpeechSynthesisRequestHandling] = [] + @Published var availableVideoClients: [AnyVideoGenerationRequestHandling] = [] + + internal let mediaType: MediaType + internal let inputModality: InputModality + internal var configuration: MediaGenerationView.Configuration + internal let onComplete: ((AnyMediaFile) -> Void)? + + init( + mediaType: MediaType, + inputModality: InputModality, + configuration: MediaGenerationView.Configuration, + onComplete: ((AnyMediaFile) -> Void)? + ) { + self.mediaType = mediaType + self.inputModality = inputModality + self.configuration = configuration + self.onComplete = onComplete + } + + @MainActor + internal func loadResources( + _ speechClient: (any SpeechSynthesisRequestHandling)?, + _ videoClient: (any VideoGenerationRequestHandling)? + ) async { + isLoadingResources = true + loadingError = nil + + do { + switch mediaType { + case .speech: + availableVoices = try await speechClient?.availableVoices() ?? [] + configuration.voiceSettings = .init() + + case .video: + availableModels = try await videoClient?.availableModels() ?? [] + configuration.videoSettings = .init() + } + } catch { + loadingError = error + } + + isLoadingResources = false + } + + @MainActor + internal func generate( + _ speechClient: (any SpeechSynthesisRequestHandling)?, + _ videoClient: (any VideoGenerationRequestHandling)? + ) async { + isLoading = true + defer { isLoading = false } + + do { + switch mediaType { + case .speech: + try await generateSpeech(speechClient) + case .video: + try await generateVideo(videoClient) + } + } catch { + print("Error generating media: \(error)") + } + } + + @MainActor + private func generateSpeech( + _ speechClient: (any SpeechSynthesisRequestHandling)? + ) async throws { + guard let speechClient = speechClient else { + throw GenerationError.clientNotAvailable + } + + let audioData: Data? + + switch inputModality { + case .audio: + audioData = try await convertSpeechToSpeech(speechClient) + case .text: + audioData = try await convertTextToSpeech(speechClient) + default: + return + } + + guard let audioData = audioData else { return } + + let audioFile = try await AudioFile( + data: audioData, + name: UUID().uuidString, + id: .random() + ) + generatedAudioFile = audioFile + + let mediaFile = AnyMediaFile(audioFile) + if let onComplete = onComplete { + onComplete(mediaFile) + } else { + generatedFiles.append(mediaFile) + } + } + + @MainActor + private func generateVideo( + _ videoClient: (any VideoGenerationRequestHandling)? + ) async throws { + guard let videoClient = videoClient else { + throw GenerationError.clientNotAvailable + } + + guard let modelID = selectedVideoModel, + let model = availableModels.first(where: { $0.id == modelID }) else { return } + + let videoData: Data? + + switch inputModality { + case .text: + videoData = try await videoClient.textToVideo( + text: inputText, + model: model, + settings: configuration.videoSettings + ) + case .image: + guard let imageURL = selectedImage?.url else { return } + videoData = try await videoClient.imageToVideo( + imageURL: imageURL, + model: model, + settings: configuration.videoSettings + ) + case .video: + guard let videoURL = selectedVideo?.url else { return } + videoData = try await videoClient.videoToVideo( + videoURL: videoURL, + prompt: inputText, + model: model, + settings: configuration.videoSettings + ) + default: + return + } + + guard let videoData = videoData else { + throw GenerationError.invalidVideoData + } + + let temporaryURL = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString) + .appendingPathExtension("mp4") + + try videoData.write(to: temporaryURL) + + let videoFile = try await VideoFile(url: temporaryURL) + generatedVideoFile = videoFile + + let mediaFile = AnyMediaFile(videoFile) + if let onComplete = onComplete { + onComplete(mediaFile) + } else { + generatedFiles.append(mediaFile) + } + + showingPreview = true + } + + @MainActor + private func convertSpeechToSpeech( + _ speechClient: (any SpeechSynthesisRequestHandling)? + ) async throws -> Data? { + guard let voiceID = selectedVoice, + let voice = availableVoices.first(where: { $0.id == voiceID }), + let audioFile = selectedAudioFile else { + return nil + } + + return try await speechClient?.speechToSpeech( + inputAudioURL: audioFile.url, + voiceID: voice.voiceID, + voiceSettings: ElevenLabs.VoiceSettings(settings: configuration.voiceSettings), + model: .init(rawValue: configuration.speechToSpeechModel)! // FIXME: - Will Crash + ) + } + + @MainActor + private func convertTextToSpeech( + _ speechClient: (any SpeechSynthesisRequestHandling)? + ) async throws -> Data? { + guard let voiceID = selectedVoice, + !inputText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else { + return nil + } + + print(speechClient) + print(voiceID) + + let audio = try await speechClient?.speech( + for: inputText, + voiceID: voiceID.id.rawValue, + voiceSettings: ElevenLabs.VoiceSettings(settings: configuration.voiceSettings), //FIXME: - This should just accept AbstractVoiceSettings + model: .init(rawValue: configuration.textToSpeechModel)! // FIXME: - Will Crash + ) + return audio + } +} diff --git a/Sources/SideprojectCore/Intramodular/Sideproject.swift b/Sources/SideprojectCore/Intramodular/Sideproject.swift index 4464151..4d030da 100644 --- a/Sources/SideprojectCore/Intramodular/Sideproject.swift +++ b/Sources/SideprojectCore/Intramodular/Sideproject.swift @@ -29,7 +29,7 @@ public final class Sideproject: _CancellablesProviding, Logging, ObservableObjec #metatype((any CoreMI._ServiceClientProtocol).self), .nonAppleFramework ) - internal static var serviceTypes: [any CoreMI._ServiceClientProtocol.Type] + public static var serviceTypes: [any CoreMI._ServiceClientProtocol.Type] @_StaticMirrorQuery( #metatype((any Sideproject.ExternalAccountTypeDescriptor).self), From 470debfb6c76da7ccdfc1c8ce2716a03c10a346c Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Fri, 10 Jan 2025 18:20:01 -0700 Subject: [PATCH 06/20] Cleanup --- .../MediaGenerationView+Views.swift | 153 +++++++ .../MediaGenerationView.swift | 168 +++++++ .../MediaGenerationViewActor.swift | 19 +- .../MediaGenerationView.swift | 414 ------------------ ...roject.ExternalAccountTypeDescriptor.swift | 2 +- .../Intramodular/Sideproject.swift | 7 +- 6 files changed, 337 insertions(+), 426 deletions(-) create mode 100644 Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView+Views.swift create mode 100644 Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift rename Sources/Sideproject/Intramodular/{Media Generation => Media Generation (WIP)}/MediaGenerationViewActor.swift (95%) delete mode 100644 Sources/Sideproject/Intramodular/Media Generation/MediaGenerationView.swift diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView+Views.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView+Views.swift new file mode 100644 index 0000000..660ec18 --- /dev/null +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView+Views.swift @@ -0,0 +1,153 @@ +// +// MediaGenerationView+Views.swift +// Sideproject +// +// Created by Jared Davidson on 1/10/25. +// + +import SwiftUI +import ElevenLabs +import AI +import Media + +extension MediaGenerationView { + + var clientSelectionView: some View { + VStack(alignment: .leading, spacing: 20) { + if mediaType == .speech { + Picker("Select Speech Client", selection: $viewModel.speechClient) { + ForEach(viewModel.availableSpeechClients, id: \.self) { client in + Text("Speech Client \(client.hashValue)") // Customize this display + .tag(client as AnySpeechSynthesisRequestHandling?) + } + } + .pickerStyle(MenuPickerStyle()) + } else if mediaType == .video { + Picker("Select Video Client", selection: $viewModel.videoClient) { + ForEach(viewModel.availableVideoClients, id: \.self) { client in + Text("Video Client \(client.hashValue)") // Customize this display + .tag(client as AnyVideoGenerationRequestHandling?) + } + } + .pickerStyle(MenuPickerStyle()) + } + } + } + + var inputView: some View { + Group { + switch viewModel.inputModality { + case .text: + TextEditor(text: $viewModel.inputText) + .frame(height: 100) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(Color.gray.opacity(0.2)) + ) + .overlay( + Group { + if viewModel.inputText.isEmpty { + Text("Enter your text here...") + .foregroundColor(.gray) + .padding(.leading, 4) + } + }, + alignment: .topLeading + ) + case .audio, .image, .video: + FileDropView { files in + switch viewModel.inputModality { + case .audio: + viewModel.selectedAudioFile = files.first?.audioFile + case .image: + viewModel.selectedImage = files.first?.imageFile + case .video: + viewModel.selectedVideo = files.first?.videoFile + default: + break + } + } content: { files in + if !files.isEmpty { + MediaFileListView(files) + } + } + } + } + } + + // MARK: - Model Selection View + var modelSelectionView: some View { + + VStack(alignment: .leading, spacing: 8) { + switch viewModel.mediaType { + case .speech: + if !viewModel.availableVoices.isEmpty { + Picker("Voice", selection: $viewModel.selectedVoice) { + Text("Select a voice").tag(Optional.none) + ForEach(viewModel.availableVoices) { voice in + Text(voice.name) + .tag(Optional(voice.id)) + } + } + } + + case .video: + if !viewModel.availableModels.isEmpty { + Picker("Model", selection: $viewModel.selectedVideoModel) { + Text("Select a model").tag(Optional.none) + ForEach(viewModel.availableModels) { model in + Text(model.name) + .tag(Optional(model.id)) + } + } + } + } + } + } + + var promptInputView: some View { + VStack(alignment: .leading, spacing: 8) { + Text("Enter Prompt") + .font(.headline) + + TextEditor(text: $viewModel.inputText) + .frame(height: 100) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(Color.gray.opacity(0.2)) + ) + .overlay( + Group { + if viewModel.inputText.isEmpty { + Text("Describe how you want to transform the video...") + .foregroundColor(.gray) + .padding(.leading, 4) + } + }, + alignment: .topLeading + ) + } + } + + var controlsView: some View { + VStack(spacing: 12) { + Button { + Task { + await viewModel.generate( + viewModel.speechClient?.base(), + viewModel.videoClient?.base() + ) + } + } label: { + if viewModel.isLoading { + ProgressView() + .progressViewStyle(.circular) + } else { + Text("Generate") + } + } + .buttonStyle(.borderedProminent) + .disabled(viewModel.isLoading || !isGenerateEnabled) + } + } +} diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift new file mode 100644 index 0000000..1836905 --- /dev/null +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift @@ -0,0 +1,168 @@ +// +// MediaGenerationView.swift +// Sideproject +// +// Created by Jared Davidson on 1/10/25. +// + +import SwiftUI +import ElevenLabs +import SwallowUI +import Media +import AVFoundation +import SideprojectCore +import AI +import Runtime + +public enum MediaType { + case speech + case video +} + +public enum InputModality: String { + case text + case audio + case image + case video + + var description: String { + rawValue.capitalized + } +} + +public struct MediaGenerationView: View { + public struct Configuration: Equatable { + public static func == (lhs: MediaGenerationView.Configuration, rhs: MediaGenerationView.Configuration) -> Bool { + return lhs.textToSpeechModel == rhs.textToSpeechModel && + lhs.speechToSpeechModel == rhs.speechToSpeechModel + } + + public var textToSpeechModel: String + public var speechToSpeechModel: String + public var voiceSettings: AbstractVoiceSettings + public var videoSettings: VideoGenerationSettings + + public init( + textToSpeechModel: String = ElevenLabs.Model.EnglishV1.rawValue, + speechToSpeechModel: String = ElevenLabs.Model.EnglishSTSV2.rawValue, + voiceSettings: AbstractVoiceSettings = .init(), + videoSettings: VideoGenerationSettings = .init() + ) { + self.textToSpeechModel = textToSpeechModel + self.speechToSpeechModel = speechToSpeechModel + self.voiceSettings = voiceSettings + self.videoSettings = videoSettings + } + } + + internal let mediaType: MediaType + internal let inputModality: InputModality + internal var configuration: Configuration + internal let onComplete: ((AnyMediaFile) -> Void)? + + @StateObject internal var viewModel: GenerationViewModel + + public init( + mediaType: MediaType, + inputModality: InputModality, + configuration: Configuration = .init(), + onComplete: ((AnyMediaFile) -> Void)? = nil + ) { + self.mediaType = mediaType + self.inputModality = inputModality + self.configuration = configuration + self.onComplete = onComplete + + let viewModel = GenerationViewModel( + mediaType: mediaType, + inputModality: inputModality, + configuration: configuration, + onComplete: onComplete + ) + _viewModel = StateObject(wrappedValue: viewModel) + } + + public var body: some View { + VStack(alignment: .leading, spacing: 20) { + + if let mediaFile = viewModel.generatedFile { + MediaFileView(file: mediaFile.file) + } + + inputView + clientSelectionView + modelSelectionView + + if case .video = viewModel.mediaType, case .video = viewModel.inputModality { + promptInputView + } + + controlsView + } + .padding() + .task { + Task { + await loadClients() + await viewModel.loadResources( + viewModel.speechClient?.base(), + viewModel.videoClient?.base() + ) + } + } + } + + private func loadClients() async { + do { + let services = try await Sideproject.shared.services + + print(services) + + self.viewModel.availableSpeechClients = services.compactMap { service in + let originalService = service + if let client = service as? (any SpeechSynthesisRequestHandling) { + return AnySpeechSynthesisRequestHandling(client, service: originalService) + } + return nil + } + + self.viewModel.availableVideoClients = services.compactMap { service in + let originalService = service + + if let client = service as? (any VideoGenerationRequestHandling) { + return AnyVideoGenerationRequestHandling(client, service: originalService) + } + return nil + } + + self.viewModel.speechClient = self.viewModel.availableSpeechClients.first + self.viewModel.videoClient = self.viewModel.availableVideoClients.first + } catch { + print("Error loading clients: \(error)") + } + } + + internal var isGenerateEnabled: Bool { + switch viewModel.mediaType { + case .speech: + switch viewModel.inputModality { + case .text: + return !viewModel.inputText.isEmpty && viewModel.selectedVoice != nil + case .audio: + return viewModel.selectedAudioFile != nil && viewModel.selectedVoice != nil + default: + return false + } + case .video: + switch viewModel.inputModality { + case .text: + return !viewModel.inputText.isEmpty && viewModel.selectedVideoModel != nil + case .image: + return viewModel.selectedImage != nil && viewModel.selectedVideoModel != nil + case .video: + return viewModel.selectedVideo != nil && viewModel.selectedVideoModel != nil + default: + return false + } + } + } +} diff --git a/Sources/Sideproject/Intramodular/Media Generation/MediaGenerationViewActor.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift similarity index 95% rename from Sources/Sideproject/Intramodular/Media Generation/MediaGenerationViewActor.swift rename to Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift index 9adccdb..d49a659 100644 --- a/Sources/Sideproject/Intramodular/Media Generation/MediaGenerationViewActor.swift +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift @@ -20,12 +20,10 @@ final class GenerationViewModel: ObservableObject { @Published var showingPreview = false @Published var selectedVoice: ElevenLabs.Voice.ID? @Published var selectedAudioFile: AudioFile? - @Published var generatedAudioFile: AudioFile? + @Published var generatedFile: AnyMediaFile? @Published var selectedVideoModel: VideoModel.ID? @Published var selectedImage: ImageFile? @Published var selectedVideo: VideoFile? - @Published var generatedVideoFile: VideoFile? - @Published var generatedFiles: [AnyMediaFile] = [] @Published var speechClient: AnySpeechSynthesisRequestHandling? @Published var videoClient: AnyVideoGenerationRequestHandling? @Published var availableSpeechClients: [AnySpeechSynthesisRequestHandling] = [] @@ -119,13 +117,11 @@ final class GenerationViewModel: ObservableObject { name: UUID().uuidString, id: .random() ) - generatedAudioFile = audioFile + generatedFile = .init(audioFile) let mediaFile = AnyMediaFile(audioFile) if let onComplete = onComplete { onComplete(mediaFile) - } else { - generatedFiles.append(mediaFile) } } @@ -179,13 +175,11 @@ final class GenerationViewModel: ObservableObject { try videoData.write(to: temporaryURL) let videoFile = try await VideoFile(url: temporaryURL) - generatedVideoFile = videoFile + generatedFile = .init(videoFile) let mediaFile = AnyMediaFile(videoFile) if let onComplete = onComplete { onComplete(mediaFile) - } else { - generatedFiles.append(mediaFile) } showingPreview = true @@ -230,3 +224,10 @@ final class GenerationViewModel: ObservableObject { return audio } } + + +fileprivate enum GenerationError: Error { + case invalidVideoData + case clientNotAvailable + case resourceLoadingFailed +} diff --git a/Sources/Sideproject/Intramodular/Media Generation/MediaGenerationView.swift b/Sources/Sideproject/Intramodular/Media Generation/MediaGenerationView.swift deleted file mode 100644 index f388264..0000000 --- a/Sources/Sideproject/Intramodular/Media Generation/MediaGenerationView.swift +++ /dev/null @@ -1,414 +0,0 @@ -// -// MediaGenerationView.swift -// Sideproject -// -// Created by Jared Davidson on 1/10/25. -// - -import SwiftUI -import ElevenLabs -import SwallowUI -import Media -import AVFoundation -import SideprojectCore -import AI -import Runtime - -public enum MediaType { - case speech - case video -} - -public enum InputModality: String { - case text - case audio - case image - case video - - var description: String { - rawValue.capitalized - } -} - -public struct MediaGenerationView: View { - public struct Configuration: Equatable { - public static func == (lhs: MediaGenerationView.Configuration, rhs: MediaGenerationView.Configuration) -> Bool { - return lhs.textToSpeechModel == rhs.textToSpeechModel && - lhs.speechToSpeechModel == rhs.speechToSpeechModel - } - - public var textToSpeechModel: String - public var speechToSpeechModel: String - public var voiceSettings: AbstractVoiceSettings - public var videoSettings: VideoGenerationSettings - - public init( - textToSpeechModel: String = ElevenLabs.Model.EnglishV1.rawValue, - speechToSpeechModel: String = ElevenLabs.Model.EnglishSTSV2.rawValue, - voiceSettings: AbstractVoiceSettings = .init(), - videoSettings: VideoGenerationSettings = .init() - ) { - self.textToSpeechModel = textToSpeechModel - self.speechToSpeechModel = speechToSpeechModel - self.voiceSettings = voiceSettings - self.videoSettings = videoSettings - } - } - - private let mediaType: MediaType - private let inputModality: InputModality - private var configuration: Configuration - private let onComplete: ((AnyMediaFile) -> Void)? - - @StateObject private var viewModel: GenerationViewModel - - @State var speechClient: AnySpeechSynthesisRequestHandling? = nil - @State var videoClient: AnyVideoGenerationRequestHandling? = nil - - public init( - mediaType: MediaType, - inputModality: InputModality, - configuration: Configuration = .init(), - onComplete: ((AnyMediaFile) -> Void)? = nil - ) { - self.mediaType = mediaType - self.inputModality = inputModality - self.configuration = configuration - self.onComplete = onComplete - - let viewModel = GenerationViewModel( - mediaType: mediaType, - inputModality: inputModality, - configuration: configuration, - onComplete: onComplete - ) - _viewModel = StateObject(wrappedValue: viewModel) - } - - public var body: some View { - VStack(alignment: .leading, spacing: 20) { - InputView(viewModel: viewModel) - - ClientSelectionView( - mediaType: mediaType, - viewModel: viewModel - ) - - ModelSelectionView(viewModel: viewModel) - - if case .video = viewModel.mediaType, case .video = viewModel.inputModality { - PromptInputView(inputText: $viewModel.inputText) - } - - ControlsView(viewModel: viewModel) - - if viewModel.onComplete == nil { - GeneratedFilesView(files: viewModel.generatedFiles) - } - } - .padding() - .task { - Task { - await loadClients() - await viewModel.loadResources( - viewModel.speechClient?.base(), - viewModel.videoClient?.base() - ) - } - } - } - - private func loadClients() async { - do { - let services = try await Sideproject.shared.services - - print(services) - - self.viewModel.availableSpeechClients = services.compactMap { service in - let originalService = service - if let client = service as? (any SpeechSynthesisRequestHandling) { - return AnySpeechSynthesisRequestHandling(client, service: originalService) - } - return nil - } - - self.viewModel.availableVideoClients = services.compactMap { service in - let originalService = service - - if let client = service as? (any VideoGenerationRequestHandling) { - return AnyVideoGenerationRequestHandling(client, service: originalService) - } - return nil - } - - self.viewModel.speechClient = self.viewModel.availableSpeechClients.first - self.viewModel.videoClient = self.viewModel.availableVideoClients.first - } catch { - print("Error loading clients: \(error)") - } - } -} - -struct ClientSelectionView: View { - let mediaType: MediaType - - @ObservedObject var viewModel: GenerationViewModel - - var body: some View { - VStack(alignment: .leading, spacing: 20) { - if mediaType == .speech { - Picker("Select Speech Client", selection: $viewModel.speechClient) { - ForEach(viewModel.availableSpeechClients, id: \.self) { client in - Text("Speech Client \(client.hashValue)") // Customize this display - .tag(client as AnySpeechSynthesisRequestHandling?) - } - } - .pickerStyle(MenuPickerStyle()) - } else if mediaType == .video { - Picker("Select Video Client", selection: $viewModel.videoClient) { - ForEach(viewModel.availableVideoClients, id: \.self) { client in - Text("Video Client \(client.hashValue)") // Customize this display - .tag(client as AnyVideoGenerationRequestHandling?) - } - } - .pickerStyle(MenuPickerStyle()) - } - } - } -} - -struct InputView: View { - @ObservedObject var viewModel: GenerationViewModel - - var body: some View { - Group { - switch viewModel.inputModality { - case .text: - TextInputView(text: $viewModel.inputText) - case .audio, .image, .video: - MediaInputView(viewModel: viewModel) - } - } - } -} - -struct TextInputView: View { - @Binding var text: String - - var body: some View { - TextEditor(text: $text) - .frame(height: 100) - .overlay( - RoundedRectangle(cornerRadius: 8) - .stroke(Color.gray.opacity(0.2)) - ) - .overlay( - Group { - if text.isEmpty { - Text("Enter your text here...") - .foregroundColor(.gray) - .padding(.leading, 4) - } - }, - alignment: .topLeading - ) - } -} - -struct MediaInputView: View { - @ObservedObject var viewModel: GenerationViewModel - - var body: some View { - FileDropView { files in - switch viewModel.inputModality { - case .audio: - viewModel.selectedAudioFile = files.first?.audioFile - case .image: - viewModel.selectedImage = files.first?.imageFile - case .video: - viewModel.selectedVideo = files.first?.videoFile - default: - break - } - } content: { files in - if !files.isEmpty { - MediaFileListView(files) - } - } - } -} - -struct ResourceLoadingView: View { - var body: some View { - VStack { - ProgressView() - Text("Loading resources...") - .foregroundStyle(.secondary) - } - .frame(maxWidth: .infinity, maxHeight: .infinity) - } -} - -struct ResourceErrorView: View { - let error: Error - let retryAction: () -> Void - - var body: some View { - VStack(spacing: 12) { - Image(systemName: "exclamationmark.triangle") - .font(.largeTitle) - .foregroundColor(.red) - - Text("Failed to load resources") - .font(.headline) - - Text(error.localizedDescription) - .font(.caption) - .foregroundStyle(.secondary) - - Button("Try Again", action: retryAction) - .buttonStyle(.bordered) - } - .frame(maxWidth: .infinity, maxHeight: .infinity) - } -} - -enum GenerationError: Error { - case invalidVideoData - case clientNotAvailable - case resourceLoadingFailed -} - - -// MARK: - Model Selection View -struct ModelSelectionView: View { - @ObservedObject var viewModel: GenerationViewModel - - var body: some View { - VStack(alignment: .leading, spacing: 8) { - switch viewModel.mediaType { - case .speech: - if !viewModel.availableVoices.isEmpty { - Picker("Voice", selection: $viewModel.selectedVoice) { - Text("Select a voice").tag(Optional.none) - ForEach(viewModel.availableVoices) { voice in - Text(voice.name) - .tag(Optional(voice.id)) - } - } - } - - case .video: - if !viewModel.availableModels.isEmpty { - Picker("Model", selection: $viewModel.selectedVideoModel) { - Text("Select a model").tag(Optional.none) - ForEach(viewModel.availableModels) { model in - Text(model.name) - .tag(Optional(model.id)) - } - } - } - } - } - } -} - -// MARK: - Prompt Input View -struct PromptInputView: View { - @Binding var inputText: String - - var body: some View { - VStack(alignment: .leading, spacing: 8) { - Text("Enter Prompt") - .font(.headline) - - TextEditor(text: $inputText) - .frame(height: 100) - .overlay( - RoundedRectangle(cornerRadius: 8) - .stroke(Color.gray.opacity(0.2)) - ) - .overlay( - Group { - if inputText.isEmpty { - Text("Describe how you want to transform the video...") - .foregroundColor(.gray) - .padding(.leading, 4) - } - }, - alignment: .topLeading - ) - } - } -} - -// MARK: - Controls View -struct ControlsView: View { - @ObservedObject var viewModel: GenerationViewModel - - @Environment(\.speechSynthesizer) var speechClient - @Environment(\.videoClient) var videoClient - - var body: some View { - VStack(spacing: 12) { - Button { - Task { - await viewModel.generate( - speechClient, - videoClient - ) - } - } label: { - if viewModel.isLoading { - ProgressView() - .progressViewStyle(.circular) - } else { - Text("Generate") - } - } - .buttonStyle(.borderedProminent) - .disabled(viewModel.isLoading || !isGenerateEnabled) - } - } - - private var isGenerateEnabled: Bool { - switch viewModel.mediaType { - case .speech: - switch viewModel.inputModality { - case .text: - return !viewModel.inputText.isEmpty && viewModel.selectedVoice != nil - case .audio: - return viewModel.selectedAudioFile != nil && viewModel.selectedVoice != nil - default: - return false - } - case .video: - switch viewModel.inputModality { - case .text: - return !viewModel.inputText.isEmpty && viewModel.selectedVideoModel != nil - case .image: - return viewModel.selectedImage != nil && viewModel.selectedVideoModel != nil - case .video: - return viewModel.selectedVideo != nil && viewModel.selectedVideoModel != nil - default: - return false - } - } - } -} - -// MARK: - Generated Files View -struct GeneratedFilesView: View { - let files: [AnyMediaFile] - - var body: some View { - if !files.isEmpty { - VStack(alignment: .leading, spacing: 12) { - Text("Generated Files") - .font(.headline) - - MediaFileListView(files) - } - } - } -} diff --git a/Sources/SideprojectCore/Intramodular/Accounts/Sideproject.ExternalAccountTypeDescriptor.swift b/Sources/SideprojectCore/Intramodular/Accounts/Sideproject.ExternalAccountTypeDescriptor.swift index db81ff8..95a2747 100644 --- a/Sources/SideprojectCore/Intramodular/Accounts/Sideproject.ExternalAccountTypeDescriptor.swift +++ b/Sources/SideprojectCore/Intramodular/Accounts/Sideproject.ExternalAccountTypeDescriptor.swift @@ -234,7 +234,7 @@ extension Sideproject.ExternalAccountTypeDescriptors { } } - @HadeanIdentifier("fisul-tapos-hotak-nonov") + @HadeanIdentifier("jatap-jogaz-ritiz-vibok") public struct ElevenLabs: Sideproject.ExternalAccountTypeDescriptor, _StaticInstance { public var accountType: Sideproject.ExternalAccountTypeIdentifier { "com.vmanot.elevenlabs" diff --git a/Sources/SideprojectCore/Intramodular/Sideproject.swift b/Sources/SideprojectCore/Intramodular/Sideproject.swift index 4d030da..9e10f69 100644 --- a/Sources/SideprojectCore/Intramodular/Sideproject.swift +++ b/Sources/SideprojectCore/Intramodular/Sideproject.swift @@ -29,7 +29,7 @@ public final class Sideproject: _CancellablesProviding, Logging, ObservableObjec #metatype((any CoreMI._ServiceClientProtocol).self), .nonAppleFramework ) - public static var serviceTypes: [any CoreMI._ServiceClientProtocol.Type] + private static var serviceTypes: [any CoreMI._ServiceClientProtocol.Type] @_StaticMirrorQuery( #metatype((any Sideproject.ExternalAccountTypeDescriptor).self), @@ -46,13 +46,14 @@ public final class Sideproject: _CancellablesProviding, Logging, ObservableObjec @Published private var autoinitializedServices: [any CoreMI._ServiceClientProtocol]? = nil { didSet { if let newValue = autoinitializedServices { + logger.info(newValue.description) logger.info("Auto-initialized \(newValue.count) service(s).") } } } @MainActor - @Published private var manuallyAddedServices: [any CoreMI._ServiceClientProtocol] = [] + @Published public var manuallyAddedServices: [any CoreMI._ServiceClientProtocol] = [] // @Published public var modelIdentifierScope: ModelIdentifierScope? @@ -156,6 +157,8 @@ extension Sideproject { let oldAccounts: [CoreMI._AnyServiceAccount] = self.autodiscoveredServiceAccounts let newAccounts = try self._serviceAccounts() + print(newAccounts) + guard oldAccounts != newAccounts else { return } From b058adf5de3b9ec03f28817b3bf0c338edffadf6 Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Fri, 10 Jan 2025 19:10:40 -0700 Subject: [PATCH 07/20] display name --- .../MediaGenerationView+Views.swift | 8 ++++---- .../MediaGenerationView.swift | 15 ++++++--------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView+Views.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView+Views.swift index 660ec18..5ede9d0 100644 --- a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView+Views.swift +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView+Views.swift @@ -17,7 +17,7 @@ extension MediaGenerationView { if mediaType == .speech { Picker("Select Speech Client", selection: $viewModel.speechClient) { ForEach(viewModel.availableSpeechClients, id: \.self) { client in - Text("Speech Client \(client.hashValue)") // Customize this display + Text(client.displayName) .tag(client as AnySpeechSynthesisRequestHandling?) } } @@ -25,7 +25,7 @@ extension MediaGenerationView { } else if mediaType == .video { Picker("Select Video Client", selection: $viewModel.videoClient) { ForEach(viewModel.availableVideoClients, id: \.self) { client in - Text("Video Client \(client.hashValue)") // Customize this display + Text("Video Client") // Customize this display .tag(client as AnyVideoGenerationRequestHandling?) } } @@ -134,8 +134,8 @@ extension MediaGenerationView { Button { Task { await viewModel.generate( - viewModel.speechClient?.base(), - viewModel.videoClient?.base() + viewModel.speechClient?.base, + viewModel.videoClient?.base ) } } label: { diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift index 1836905..3d36300 100644 --- a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift @@ -104,8 +104,8 @@ public struct MediaGenerationView: View { Task { await loadClients() await viewModel.loadResources( - viewModel.speechClient?.base(), - viewModel.videoClient?.base() + viewModel.speechClient?.base, + viewModel.videoClient?.base ) } } @@ -118,18 +118,15 @@ public struct MediaGenerationView: View { print(services) self.viewModel.availableSpeechClients = services.compactMap { service in - let originalService = service - if let client = service as? (any SpeechSynthesisRequestHandling) { - return AnySpeechSynthesisRequestHandling(client, service: originalService) + if let service = service as? (any CoreMI._ServiceClientProtocol & SpeechSynthesisRequestHandling) { + return AnySpeechSynthesisRequestHandling(service) } return nil } self.viewModel.availableVideoClients = services.compactMap { service in - let originalService = service - - if let client = service as? (any VideoGenerationRequestHandling) { - return AnyVideoGenerationRequestHandling(client, service: originalService) + if let service = service as? (any CoreMI._ServiceClientProtocol & VideoGenerationRequestHandling) { + return AnyVideoGenerationRequestHandling(service) } return nil } From 6c1a95e0e784cbf733043cc2c831f9f3709ee376 Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Fri, 10 Jan 2025 19:53:06 -0700 Subject: [PATCH 08/20] Fixed Modality --- .../FileInputModality.swift | 35 ++++++ .../InputModalityProtocol.swift | 76 ++++++++++++ .../MediaGenerationView+Views.swift | 94 +++++---------- .../MediaGenerationView.swift | 81 +++++-------- .../MediaGenerationViewActor.swift | 112 ++++++++---------- .../TextInputModality.swift | 42 +++++++ 6 files changed, 265 insertions(+), 175 deletions(-) create mode 100644 Sources/Sideproject/Intramodular/Media Generation (WIP)/FileInputModality.swift create mode 100644 Sources/Sideproject/Intramodular/Media Generation (WIP)/InputModalityProtocol.swift create mode 100644 Sources/Sideproject/Intramodular/Media Generation (WIP)/TextInputModality.swift diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/FileInputModality.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/FileInputModality.swift new file mode 100644 index 0000000..b0ac9e1 --- /dev/null +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/FileInputModality.swift @@ -0,0 +1,35 @@ +// +// FileInputModality.swift +// Sideproject +// +// Created by Jared Davidson on 1/10/25. +// + +import Media +import SwiftUI + +public struct FileInputModality: InputModalityConfiguration { + public typealias InputType = T + + public var description: String + + public init(description: String) { + self.description = description + } + + public func makeInputView(inputBinding: Binding, placeholderText: String) -> AnyView { + AnyView( + FileDropView { files in + inputBinding.wrappedValue = files.first?.cast(to: T.self) + } content: { files in + if !files.isEmpty { + MediaFileListView(files) + } + } + ) + } + + public func validate(_ input: T?) -> Bool { + input != nil + } +} diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/InputModalityProtocol.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/InputModalityProtocol.swift new file mode 100644 index 0000000..b37e664 --- /dev/null +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/InputModalityProtocol.swift @@ -0,0 +1,76 @@ +// +// InputModalityProtocol.swift +// Sideproject +// +// Created by Jared Davidson on 1/10/25. +// + +import SwiftUI +import AVFoundation +import Media + +public enum InputModality { + public static let text = AnyInputModality(TextInputModality()) + public static let audio = AnyInputModality(FileInputModality(description: "Audio")) + public static let image = AnyInputModality(FileInputModality(description: "Image")) + public static let video = AnyInputModality(FileInputModality(description: "Video")) +} + +extension AnyInputModality { + public static var text: Self { InputModality.text } + public static var audio: Self { InputModality.audio } + public static var image: Self { InputModality.image } + public static var video: Self { InputModality.video } +} + +extension MediaGenerationView { + public func inputModality(_ modality: AnyInputModality) -> Self { + MediaGenerationView( + mediaType: self.mediaType, + inputModality: modality, + configuration: self.configuration, + onComplete: self.onComplete + ) + } +} + +public struct AnyInputModality { + private let _description: String + private let _makeInputView: (Binding, String) -> AnyView + private let _validate: (Any?) -> Bool + private let _type: Any.Type + + public var description: String { _description } + public var inputType: Any.Type { _type } + + public init(_ modality: T) { + self._description = modality.description + self._type = T.InputType.self + self._makeInputView = { binding, placeholder in + let typedBinding = Binding( + get: { binding.wrappedValue as? T.InputType }, + set: { binding.wrappedValue = $0 } + ) + return modality.makeInputView(inputBinding: typedBinding, placeholderText: placeholder) + } + self._validate = { input in + guard let typedInput = input as? T.InputType else { return false } + return modality.validate(typedInput) + } + } + + public func makeInputView(binding: Binding, placeholderText: String = "") -> AnyView { + _makeInputView(binding, placeholderText) + } + + public func validate(_ input: Any?) -> Bool { + _validate(input) + } +} + +public protocol InputModalityConfiguration { + associatedtype InputType + var description: String { get } + func makeInputView(inputBinding: Binding, placeholderText: String) -> AnyView + func validate(_ input: InputType?) -> Bool +} diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView+Views.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView+Views.swift index 5ede9d0..ff1c810 100644 --- a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView+Views.swift +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView+Views.swift @@ -11,6 +11,26 @@ import AI import Media extension MediaGenerationView { + var inputView: some View { + inputModality.makeInputView( + binding: $viewModel.currentInput, + placeholderText: getPlaceholderText() + ) + } + + private func getPlaceholderText() -> String { + if case .video = mediaType, inputModality.inputType == URL.self { + return "Describe how you want to transform the video..." + } + switch inputModality.inputType { + case is String.Type: + return "Enter your text here..." + case is URL.Type: + return "Drop files here" + default: + return "" + } + } var clientSelectionView: some View { VStack(alignment: .leading, spacing: 20) { @@ -25,7 +45,7 @@ extension MediaGenerationView { } else if mediaType == .video { Picker("Select Video Client", selection: $viewModel.videoClient) { ForEach(viewModel.availableVideoClients, id: \.self) { client in - Text("Video Client") // Customize this display + Text("Video Client") .tag(client as AnyVideoGenerationRequestHandling?) } } @@ -34,52 +54,9 @@ extension MediaGenerationView { } } - var inputView: some View { - Group { - switch viewModel.inputModality { - case .text: - TextEditor(text: $viewModel.inputText) - .frame(height: 100) - .overlay( - RoundedRectangle(cornerRadius: 8) - .stroke(Color.gray.opacity(0.2)) - ) - .overlay( - Group { - if viewModel.inputText.isEmpty { - Text("Enter your text here...") - .foregroundColor(.gray) - .padding(.leading, 4) - } - }, - alignment: .topLeading - ) - case .audio, .image, .video: - FileDropView { files in - switch viewModel.inputModality { - case .audio: - viewModel.selectedAudioFile = files.first?.audioFile - case .image: - viewModel.selectedImage = files.first?.imageFile - case .video: - viewModel.selectedVideo = files.first?.videoFile - default: - break - } - } content: { files in - if !files.isEmpty { - MediaFileListView(files) - } - } - } - } - } - - // MARK: - Model Selection View var modelSelectionView: some View { - VStack(alignment: .leading, spacing: 8) { - switch viewModel.mediaType { + switch mediaType { case .speech: if !viewModel.availableVoices.isEmpty { Picker("Voice", selection: $viewModel.selectedVoice) { @@ -106,26 +83,15 @@ extension MediaGenerationView { } var promptInputView: some View { - VStack(alignment: .leading, spacing: 8) { - Text("Enter Prompt") - .font(.headline) - - TextEditor(text: $viewModel.inputText) - .frame(height: 100) - .overlay( - RoundedRectangle(cornerRadius: 8) - .stroke(Color.gray.opacity(0.2)) - ) - .overlay( - Group { - if viewModel.inputText.isEmpty { - Text("Describe how you want to transform the video...") - .foregroundColor(.gray) - .padding(.leading, 4) - } - }, - alignment: .topLeading + Group { + if case .video = mediaType, inputModality.inputType == URL.self { + inputModality.makeInputView( + binding: $viewModel.currentInput, + placeholderText: "Describe how you want to transform the video..." ) + } else { + EmptyView() + } } } diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift index 3d36300..21cbe81 100644 --- a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift @@ -19,24 +19,8 @@ public enum MediaType { case video } -public enum InputModality: String { - case text - case audio - case image - case video - - var description: String { - rawValue.capitalized - } -} - public struct MediaGenerationView: View { public struct Configuration: Equatable { - public static func == (lhs: MediaGenerationView.Configuration, rhs: MediaGenerationView.Configuration) -> Bool { - return lhs.textToSpeechModel == rhs.textToSpeechModel && - lhs.speechToSpeechModel == rhs.speechToSpeechModel - } - public var textToSpeechModel: String public var speechToSpeechModel: String public var voiceSettings: AbstractVoiceSettings @@ -56,7 +40,7 @@ public struct MediaGenerationView: View { } internal let mediaType: MediaType - internal let inputModality: InputModality + internal let inputModality: AnyInputModality internal var configuration: Configuration internal let onComplete: ((AnyMediaFile) -> Void)? @@ -64,7 +48,21 @@ public struct MediaGenerationView: View { public init( mediaType: MediaType, - inputModality: InputModality, + configuration: Configuration = .init(), + onComplete: ((AnyMediaFile) -> Void)? = nil + ) { + // Default to text modality + self.init( + mediaType: mediaType, + inputModality: .text, + configuration: configuration, + onComplete: onComplete + ) + } + + internal init( + mediaType: MediaType, + inputModality: AnyInputModality, configuration: Configuration = .init(), onComplete: ((AnyMediaFile) -> Void)? = nil ) { @@ -84,16 +82,15 @@ public struct MediaGenerationView: View { public var body: some View { VStack(alignment: .leading, spacing: 20) { - if let mediaFile = viewModel.generatedFile { MediaFileView(file: mediaFile.file) } - inputView + inputModality.makeInputView(binding: $viewModel.currentInput) clientSelectionView modelSelectionView - if case .video = viewModel.mediaType, case .video = viewModel.inputModality { + if case .video = mediaType, inputModality.inputType == URL.self { promptInputView } @@ -101,13 +98,11 @@ public struct MediaGenerationView: View { } .padding() .task { - Task { - await loadClients() - await viewModel.loadResources( - viewModel.speechClient?.base, - viewModel.videoClient?.base - ) - } + await loadClients() + await viewModel.loadResources( + viewModel.speechClient?.base, + viewModel.videoClient?.base + ) } } @@ -115,8 +110,6 @@ public struct MediaGenerationView: View { do { let services = try await Sideproject.shared.services - print(services) - self.viewModel.availableSpeechClients = services.compactMap { service in if let service = service as? (any CoreMI._ServiceClientProtocol & SpeechSynthesisRequestHandling) { return AnySpeechSynthesisRequestHandling(service) @@ -139,27 +132,13 @@ public struct MediaGenerationView: View { } internal var isGenerateEnabled: Bool { - switch viewModel.mediaType { - case .speech: - switch viewModel.inputModality { - case .text: - return !viewModel.inputText.isEmpty && viewModel.selectedVoice != nil - case .audio: - return viewModel.selectedAudioFile != nil && viewModel.selectedVoice != nil - default: - return false - } - case .video: - switch viewModel.inputModality { - case .text: - return !viewModel.inputText.isEmpty && viewModel.selectedVideoModel != nil - case .image: - return viewModel.selectedImage != nil && viewModel.selectedVideoModel != nil - case .video: - return viewModel.selectedVideo != nil && viewModel.selectedVideoModel != nil - default: - return false - } + let isInputValid = inputModality.validate(viewModel.currentInput) + + let isModelSelected = switch mediaType { + case .speech: viewModel.selectedVoice != nil + case .video: viewModel.selectedVideoModel != nil } + + return isInputValid && isModelSelected } } diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift index d49a659..a410d67 100644 --- a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift @@ -15,28 +15,25 @@ final class GenerationViewModel: ObservableObject { @Published var availableModels: [VideoModel] = [] @Published var isLoadingResources = false @Published var loadingError: Error? - @Published var inputText = "" + @Published var currentInput: Any? @Published var isLoading = false @Published var showingPreview = false @Published var selectedVoice: ElevenLabs.Voice.ID? - @Published var selectedAudioFile: AudioFile? @Published var generatedFile: AnyMediaFile? @Published var selectedVideoModel: VideoModel.ID? - @Published var selectedImage: ImageFile? - @Published var selectedVideo: VideoFile? @Published var speechClient: AnySpeechSynthesisRequestHandling? @Published var videoClient: AnyVideoGenerationRequestHandling? @Published var availableSpeechClients: [AnySpeechSynthesisRequestHandling] = [] @Published var availableVideoClients: [AnyVideoGenerationRequestHandling] = [] internal let mediaType: MediaType - internal let inputModality: InputModality + internal let inputModality: AnyInputModality internal var configuration: MediaGenerationView.Configuration internal let onComplete: ((AnyMediaFile) -> Void)? init( mediaType: MediaType, - inputModality: InputModality, + inputModality: AnyInputModality, configuration: MediaGenerationView.Configuration, onComplete: ((AnyMediaFile) -> Void)? ) { @@ -101,11 +98,25 @@ final class GenerationViewModel: ObservableObject { let audioData: Data? - switch inputModality { - case .audio: - audioData = try await convertSpeechToSpeech(speechClient) - case .text: - audioData = try await convertTextToSpeech(speechClient) + switch inputModality.inputType { + case is URL.Type: + guard let audioURL = currentInput as? URL else { return } + audioData = try await speechClient.speechToSpeech( + inputAudioURL: audioURL, + voiceID: selectedVoice?.id.rawValue ?? "", + voiceSettings: ElevenLabs.VoiceSettings(settings: configuration.voiceSettings), + model: .init(rawValue: configuration.speechToSpeechModel)! + ) + + case is String.Type: + guard let text = currentInput as? String else { return } + audioData = try await speechClient.speech( + for: text, + voiceID: selectedVoice?.id.rawValue ?? "", + voiceSettings: ElevenLabs.VoiceSettings(settings: configuration.voiceSettings), + model: .init(rawValue: configuration.textToSpeechModel)! + ) + default: return } @@ -117,11 +128,11 @@ final class GenerationViewModel: ObservableObject { name: UUID().uuidString, id: .random() ) + generatedFile = .init(audioFile) - - let mediaFile = AnyMediaFile(audioFile) + if let onComplete = onComplete { - onComplete(mediaFile) + onComplete(AnyMediaFile(audioFile)) } } @@ -134,32 +145,39 @@ final class GenerationViewModel: ObservableObject { } guard let modelID = selectedVideoModel, - let model = availableModels.first(where: { $0.id == modelID }) else { return } + let model = availableModels.first(where: { $0.id == modelID }) else { + throw GenerationError.modelNotSelected + } let videoData: Data? - switch inputModality { - case .text: + switch inputModality.inputType { + case is String.Type: + guard let text = currentInput as? String else { return } videoData = try await videoClient.textToVideo( - text: inputText, + text: text, model: model, settings: configuration.videoSettings ) - case .image: - guard let imageURL = selectedImage?.url else { return } + + case is UIImage.Type: + guard let image = currentInput as? UIImage else { return } + let imageURL = try await saveImageTemporarily(image) videoData = try await videoClient.imageToVideo( imageURL: imageURL, model: model, settings: configuration.videoSettings ) - case .video: - guard let videoURL = selectedVideo?.url else { return } + + case is URL.Type: + guard let videoURL = currentInput as? URL else { return } videoData = try await videoClient.videoToVideo( videoURL: videoURL, - prompt: inputText, + prompt: "", // Note: Would need to add prompt handling model: model, settings: configuration.videoSettings ) + default: return } @@ -177,57 +195,31 @@ final class GenerationViewModel: ObservableObject { let videoFile = try await VideoFile(url: temporaryURL) generatedFile = .init(videoFile) - let mediaFile = AnyMediaFile(videoFile) if let onComplete = onComplete { - onComplete(mediaFile) + onComplete(AnyMediaFile(videoFile)) } showingPreview = true } + - @MainActor - private func convertSpeechToSpeech( - _ speechClient: (any SpeechSynthesisRequestHandling)? - ) async throws -> Data? { - guard let voiceID = selectedVoice, - let voice = availableVoices.first(where: { $0.id == voiceID }), - let audioFile = selectedAudioFile else { - return nil - } + private func saveImageTemporarily(_ image: UIImage) async throws -> URL { + let temporaryURL = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString) + .appendingPathExtension("png") - return try await speechClient?.speechToSpeech( - inputAudioURL: audioFile.url, - voiceID: voice.voiceID, - voiceSettings: ElevenLabs.VoiceSettings(settings: configuration.voiceSettings), - model: .init(rawValue: configuration.speechToSpeechModel)! // FIXME: - Will Crash - ) - } - - @MainActor - private func convertTextToSpeech( - _ speechClient: (any SpeechSynthesisRequestHandling)? - ) async throws -> Data? { - guard let voiceID = selectedVoice, - !inputText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else { - return nil + guard let imageData = image.pngData() else { + throw GenerationError.invalidVideoData } - print(speechClient) - print(voiceID) - - let audio = try await speechClient?.speech( - for: inputText, - voiceID: voiceID.id.rawValue, - voiceSettings: ElevenLabs.VoiceSettings(settings: configuration.voiceSettings), //FIXME: - This should just accept AbstractVoiceSettings - model: .init(rawValue: configuration.textToSpeechModel)! // FIXME: - Will Crash - ) - return audio + try imageData.write(to: temporaryURL) + return temporaryURL } } - fileprivate enum GenerationError: Error { case invalidVideoData case clientNotAvailable + case modelNotSelected case resourceLoadingFailed } diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/TextInputModality.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/TextInputModality.swift new file mode 100644 index 0000000..e699389 --- /dev/null +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/TextInputModality.swift @@ -0,0 +1,42 @@ +// +// TextInputModality.swift +// Sideproject +// +// Created by Jared Davidson on 1/10/25. +// + +import SwiftUI + +public struct TextInputModality: InputModalityConfiguration { + public typealias InputType = String + + public var description: String { "Text" } + + public func makeInputView(inputBinding: Binding, placeholderText: String) -> AnyView { + AnyView( + TextEditor(text: Binding( + get: { inputBinding.wrappedValue ?? "" }, + set: { inputBinding.wrappedValue = $0.isEmpty ? nil : $0 } + )) + .frame(height: 100) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(Color.gray.opacity(0.2)) + ) + .overlay( + Group { + if inputBinding.wrappedValue?.isEmpty ?? true { + Text(placeholderText) + .foregroundColor(.gray) + .padding(.leading, 4) + } + }, + alignment: .topLeading + ) + ) + } + + public func validate(_ input: String?) -> Bool { + !(input ?? "").isEmpty + } +} From 1cd6d4d505eebd90c323418e27423e586e8310c4 Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Fri, 10 Jan 2025 19:58:00 -0700 Subject: [PATCH 09/20] cleanup --- .../MediaGenerationView.swift | 2 +- .../MediaGenerationViewActor.swift | 32 ++++++------------- 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift index 21cbe81..b29eed5 100644 --- a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift @@ -99,7 +99,7 @@ public struct MediaGenerationView: View { .padding() .task { await loadClients() - await viewModel.loadResources( + try? await viewModel.loadResources( viewModel.speechClient?.base, viewModel.videoClient?.base ) diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift index a410d67..793d20f 100644 --- a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift @@ -13,11 +13,8 @@ import ElevenLabs final class GenerationViewModel: ObservableObject { @Published var availableVoices: [ElevenLabs.Voice] = [] @Published var availableModels: [VideoModel] = [] - @Published var isLoadingResources = false - @Published var loadingError: Error? @Published var currentInput: Any? @Published var isLoading = false - @Published var showingPreview = false @Published var selectedVoice: ElevenLabs.Voice.ID? @Published var generatedFile: AnyMediaFile? @Published var selectedVideoModel: VideoModel.ID? @@ -47,25 +44,16 @@ final class GenerationViewModel: ObservableObject { internal func loadResources( _ speechClient: (any SpeechSynthesisRequestHandling)?, _ videoClient: (any VideoGenerationRequestHandling)? - ) async { - isLoadingResources = true - loadingError = nil - - do { - switch mediaType { - case .speech: - availableVoices = try await speechClient?.availableVoices() ?? [] - configuration.voiceSettings = .init() - - case .video: - availableModels = try await videoClient?.availableModels() ?? [] - configuration.videoSettings = .init() - } - } catch { - loadingError = error + ) async throws { + switch mediaType { + case .speech: + availableVoices = try await speechClient?.availableVoices() ?? [] + configuration.voiceSettings = .init() + + case .video: + availableModels = try await videoClient?.availableModels() ?? [] + configuration.videoSettings = .init() } - - isLoadingResources = false } @MainActor @@ -198,8 +186,6 @@ final class GenerationViewModel: ObservableObject { if let onComplete = onComplete { onComplete(AnyMediaFile(videoFile)) } - - showingPreview = true } From 15bca9a73cca7e35058c887fd325f51c04ab85ae Mon Sep 17 00:00:00 2001 From: Vatsal Manot Date: Sun, 12 Jan 2025 22:54:58 -0800 Subject: [PATCH 10/20] Update package --- .../MediaGenerationViewActor.swift | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift index 793d20f..2b68a4e 100644 --- a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift @@ -5,10 +5,11 @@ // Created by Jared Davidson on 1/10/25. // +import ElevenLabs import AI import Media import SwiftUI -import ElevenLabs +import SwiftUIX final class GenerationViewModel: ObservableObject { @Published var availableVoices: [ElevenLabs.Voice] = [] @@ -148,8 +149,8 @@ final class GenerationViewModel: ObservableObject { settings: configuration.videoSettings ) - case is UIImage.Type: - guard let image = currentInput as? UIImage else { return } + case is AppKitOrUIKitImage.Type: + guard let image = currentInput as? AppKitOrUIKitImage else { return } let imageURL = try await saveImageTemporarily(image) videoData = try await videoClient.imageToVideo( imageURL: imageURL, @@ -189,7 +190,7 @@ final class GenerationViewModel: ObservableObject { } - private func saveImageTemporarily(_ image: UIImage) async throws -> URL { + private func saveImageTemporarily(_ image: AppKitOrUIKitImage) async throws -> URL { let temporaryURL = FileManager.default.temporaryDirectory .appendingPathComponent(UUID().uuidString) .appendingPathExtension("png") From ead3f46539ddfd9be52e1ec0f9fb8658c1758976 Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Tue, 14 Jan 2025 10:34:51 -0700 Subject: [PATCH 11/20] Fixed service accounts --- .../MediaGenerationView.swift | 2 ++ .../Intramodular/Sideproject.swift | 26 +++++++++++-------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift index b29eed5..d42c864 100644 --- a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift @@ -110,6 +110,8 @@ public struct MediaGenerationView: View { do { let services = try await Sideproject.shared.services + print(services) + self.viewModel.availableSpeechClients = services.compactMap { service in if let service = service as? (any CoreMI._ServiceClientProtocol & SpeechSynthesisRequestHandling) { return AnySpeechSynthesisRequestHandling(service) diff --git a/Sources/SideprojectCore/Intramodular/Sideproject.swift b/Sources/SideprojectCore/Intramodular/Sideproject.swift index 9e10f69..2eda4ec 100644 --- a/Sources/SideprojectCore/Intramodular/Sideproject.swift +++ b/Sources/SideprojectCore/Intramodular/Sideproject.swift @@ -30,7 +30,7 @@ public final class Sideproject: _CancellablesProviding, Logging, ObservableObjec .nonAppleFramework ) private static var serviceTypes: [any CoreMI._ServiceClientProtocol.Type] - + @_StaticMirrorQuery( #metatype((any Sideproject.ExternalAccountTypeDescriptor).self), .nonAppleFramework @@ -165,6 +165,7 @@ extension Sideproject { let services: [any CoreMI._ServiceClientProtocol] = try await self._makeServices(forAccounts: newAccounts) + print(services) self.autodiscoveredServiceAccounts = newAccounts self.autoinitializedServices = services @@ -205,20 +206,23 @@ extension Sideproject { let serviceTypes = Sideproject.serviceTypes var result: [any CoreMI._ServiceClientProtocol] = await serviceAccounts - .asyncMap { (account: CoreMI._AnyServiceAccount) in - await serviceTypes.first(byUnwrapping: { type -> (any CoreMI._ServiceClientProtocol)? in - do { - return try await type.init(account: account) - } catch { + .asyncMap { account in + await serviceTypes + .asyncCompactMap { type in do { - return try await type.init(account: nil) - } catch(_) { - return nil + let service = try await type.init(account: account) + return service + } catch { + do { + let service = try await type.init(account: nil) + return service + } catch { + return nil + } } } - }) } - .compactMap({ $0 }) + .flatMap { $0 } result += await serviceTypes .concurrentMap({ try? await $0.init(account: nil) }) From e175a3418cddcef86b75a9d9becea8b5bcec0e91 Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Tue, 14 Jan 2025 12:08:52 -0700 Subject: [PATCH 12/20] Added AudioInputModality (in progress) --- Package.swift | 2 + .../AudioInputModality.swift | 94 +++++++++++++++++++ .../InputModalityProtocol.swift | 2 +- 3 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 Sources/Sideproject/Intramodular/Media Generation (WIP)/AudioInputModality.swift diff --git a/Package.swift b/Package.swift index 36218af..73bc983 100644 --- a/Package.swift +++ b/Package.swift @@ -32,6 +32,7 @@ let package = Package( .package(url: "https://github.com/vmanot/Merge.git", branch: "master"), .package(url: "https://github.com/vmanot/NetworkKit.git", branch: "master"), .package(url: "https://github.com/vmanot/Swallow.git", branch: "master"), + .package(url: "https://github.com/vmanot/Media.git", branch: "main") ], targets: [ .macro( @@ -105,6 +106,7 @@ let package = Package( "Swallow", "SwiftUIX", "SwiftUIZ", + "Media" ], path: "Sources/Sideproject", resources: [], diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/AudioInputModality.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/AudioInputModality.swift new file mode 100644 index 0000000..b850a4e --- /dev/null +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/AudioInputModality.swift @@ -0,0 +1,94 @@ +// +// AudioInputModality.swift +// Sideproject +// +// Created by Jared Davidson on 1/14/25. +// + +import Media +import SwiftUI + +public struct AudioInputModality: InputModalityConfiguration { + public typealias InputType = AudioFile + + public var description: String + public var enableTranscription: Bool + + public init( + description: String, + enableTranscription: Bool = true + ) { + self.description = description + self.enableTranscription = enableTranscription + } + + public func makeInputView(inputBinding: Binding, placeholderText: String) -> AnyView { + AnyView( + CombinedAudioInputView( + audioFile: inputBinding, + enableTranscription: enableTranscription + ) + ) + } + + public func validate(_ input: AudioFile?) -> Bool { + input != nil + } +} + +private struct CombinedAudioInputView: View { + @Binding var audioFile: AudioFile? + let enableTranscription: Bool + + @State private var showingRecorder = false + + var body: some View { + VStack(spacing: 16) { + if audioFile == nil { + // File drop area + FileDropView { files in + audioFile = files.first?.cast(to: AudioFile.self) + } content: { files in + EmptyView() + } + .frame(height: 120) + + Text("or") + .foregroundStyle(.secondary) + + // Record button + Button { + showingRecorder = true + } label: { + Label("Record Audio", systemImage: "mic.circle.fill") + .font(.headline) + } + .buttonStyle(.borderedProminent) + } + + // Preview of recorded/dropped audio + if let audioFile = audioFile { + HStack { + Button { + self.audioFile = nil + } label: { + Text("Retry") + } + } + MediaFileView(file: audioFile) + } + } + .sheet(isPresented: $showingRecorder) { + AudioRecorderView(configuration: AudioRecorderViewConfiguration( + enableSpeechRecognition: true + )) { recordedAudio in + audioFile = recordedAudio + showingRecorder = false + } content: { media in + if let media { + AudioFileView(file: media) + } + } + } + } +} diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/InputModalityProtocol.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/InputModalityProtocol.swift index b37e664..141f752 100644 --- a/Sources/Sideproject/Intramodular/Media Generation (WIP)/InputModalityProtocol.swift +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/InputModalityProtocol.swift @@ -11,7 +11,7 @@ import Media public enum InputModality { public static let text = AnyInputModality(TextInputModality()) - public static let audio = AnyInputModality(FileInputModality(description: "Audio")) + public static let audio = AnyInputModality(AudioInputModality(description: "Audio")) public static let image = AnyInputModality(FileInputModality(description: "Image")) public static let video = AnyInputModality(FileInputModality(description: "Video")) } From 678728e068717942e29c5fd24db71dba8dd48149 Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Tue, 14 Jan 2025 13:58:27 -0700 Subject: [PATCH 13/20] Audio recorder with transcription --- .../AudioInputModality.swift | 54 ++++++++----------- .../MediaGenerationViewActor.swift | 11 +++- 2 files changed, 31 insertions(+), 34 deletions(-) diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/AudioInputModality.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/AudioInputModality.swift index b850a4e..3deb7f6 100644 --- a/Sources/Sideproject/Intramodular/Media Generation (WIP)/AudioInputModality.swift +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/AudioInputModality.swift @@ -44,50 +44,38 @@ private struct CombinedAudioInputView: View { var body: some View { VStack(spacing: 16) { - if audioFile == nil { - // File drop area + if let audioFile = audioFile { + HStack { + Button { + self.audioFile = nil + } label: { + Image(systemName: .arrowCounterclockwiseCircle) + } + } + + MediaFileView(file: audioFile) + } else { FileDropView { files in audioFile = files.first?.cast(to: AudioFile.self) } content: { files in EmptyView() } - .frame(height: 120) + .frame(height: 100) Text("or") .foregroundStyle(.secondary) - // Record button - Button { - showingRecorder = true - } label: { - Label("Record Audio", systemImage: "mic.circle.fill") - .font(.headline) - } - .buttonStyle(.borderedProminent) - } - - // Preview of recorded/dropped audio - if let audioFile = audioFile { - HStack { - Button { - self.audioFile = nil - } label: { - Text("Retry") + AudioRecorderView(configuration: AudioRecorderViewConfiguration( + enableSpeechRecognition: true + )) { recordedAudio in + audioFile = recordedAudio + showingRecorder = false + } content: { media in + if let media { + AudioFileView(file: media) } } - MediaFileView(file: audioFile) - } - } - .sheet(isPresented: $showingRecorder) { - AudioRecorderView(configuration: AudioRecorderViewConfiguration( - enableSpeechRecognition: true - )) { recordedAudio in - audioFile = recordedAudio - showingRecorder = false - } content: { media in - if let media { - AudioFileView(file: media) - } + .frame(height: 100) } } } diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift index 2b68a4e..0dc2340 100644 --- a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift @@ -88,6 +88,15 @@ final class GenerationViewModel: ObservableObject { let audioData: Data? switch inputModality.inputType { + case is AudioFile.Type: + guard let audioFile = currentInput as? AudioFile else { return } + audioData = try await speechClient.speechToSpeech( + inputAudioURL: audioFile.url, + voiceID: selectedVoice?.id.rawValue ?? "", + voiceSettings: ElevenLabs.VoiceSettings(settings: configuration.voiceSettings), + model: .init(rawValue: configuration.speechToSpeechModel)! + ) + case is URL.Type: guard let audioURL = currentInput as? URL else { return } audioData = try await speechClient.speechToSpeech( @@ -107,7 +116,7 @@ final class GenerationViewModel: ObservableObject { ) default: - return + fatalError(.unimplemented) } guard let audioData = audioData else { return } From 20d87486d82b326ad754e152e0091f88327ab3f8 Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Tue, 14 Jan 2025 16:11:41 -0700 Subject: [PATCH 14/20] Fixe type mismatches --- .../AudioInputModality.swift | 6 ++---- .../InputModalityProtocol.swift | 4 +++- .../MediaGenerationView.swift | 3 +-- .../MediaGenerationViewActor.swift | 14 +++++++------- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/AudioInputModality.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/AudioInputModality.swift index 3deb7f6..fa9b7f1 100644 --- a/Sources/Sideproject/Intramodular/Media Generation (WIP)/AudioInputModality.swift +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/AudioInputModality.swift @@ -16,7 +16,7 @@ public struct AudioInputModality: InputModalityConfiguration { public init( description: String, - enableTranscription: Bool = true + enableTranscription: Bool = false ) { self.description = description self.enableTranscription = enableTranscription @@ -60,13 +60,12 @@ private struct CombinedAudioInputView: View { } content: { files in EmptyView() } - .frame(height: 100) Text("or") .foregroundStyle(.secondary) AudioRecorderView(configuration: AudioRecorderViewConfiguration( - enableSpeechRecognition: true + enableSpeechRecognition: enableTranscription )) { recordedAudio in audioFile = recordedAudio showingRecorder = false @@ -75,7 +74,6 @@ private struct CombinedAudioInputView: View { AudioFileView(file: media) } } - .frame(height: 100) } } } diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/InputModalityProtocol.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/InputModalityProtocol.swift index 141f752..e47f4c8 100644 --- a/Sources/Sideproject/Intramodular/Media Generation (WIP)/InputModalityProtocol.swift +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/InputModalityProtocol.swift @@ -11,13 +11,15 @@ import Media public enum InputModality { public static let text = AnyInputModality(TextInputModality()) - public static let audio = AnyInputModality(AudioInputModality(description: "Audio")) + public static let audio = AnyInputModality(AudioInputModality(description: "Audio", enableTranscription: false)) + public static let audioWithTranscription = AnyInputModality(AudioInputModality(description: "Audio", enableTranscription: true)) public static let image = AnyInputModality(FileInputModality(description: "Image")) public static let video = AnyInputModality(FileInputModality(description: "Video")) } extension AnyInputModality { public static var text: Self { InputModality.text } + public static var audioWithTranscription: Self { InputModality.audioWithTranscription } public static var audio: Self { InputModality.audio } public static var image: Self { InputModality.image } public static var video: Self { InputModality.video } diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift index d42c864..2743a51 100644 --- a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift @@ -26,6 +26,7 @@ public struct MediaGenerationView: View { public var voiceSettings: AbstractVoiceSettings public var videoSettings: VideoGenerationSettings + // FIXME: - This should not be defaulting to ElevenLabs. Should be detached to an AbstractModel instead. public init( textToSpeechModel: String = ElevenLabs.Model.EnglishV1.rawValue, speechToSpeechModel: String = ElevenLabs.Model.EnglishSTSV2.rawValue, @@ -110,8 +111,6 @@ public struct MediaGenerationView: View { do { let services = try await Sideproject.shared.services - print(services) - self.viewModel.availableSpeechClients = services.compactMap { service in if let service = service as? (any CoreMI._ServiceClientProtocol & SpeechSynthesisRequestHandling) { return AnySpeechSynthesisRequestHandling(service) diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift index 0dc2340..0618cb5 100644 --- a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift @@ -48,7 +48,7 @@ final class GenerationViewModel: ObservableObject { ) async throws { switch mediaType { case .speech: - availableVoices = try await speechClient?.availableVoices() ?? [] + availableVoices = try await (speechClient?.availableVoices() ?? []).map({try ElevenLabs.Voice(voice: $0)}) configuration.voiceSettings = .init() case .video: @@ -93,8 +93,8 @@ final class GenerationViewModel: ObservableObject { audioData = try await speechClient.speechToSpeech( inputAudioURL: audioFile.url, voiceID: selectedVoice?.id.rawValue ?? "", - voiceSettings: ElevenLabs.VoiceSettings(settings: configuration.voiceSettings), - model: .init(rawValue: configuration.speechToSpeechModel)! + voiceSettings: configuration.voiceSettings, + model: configuration.speechToSpeechModel ) case is URL.Type: @@ -102,8 +102,8 @@ final class GenerationViewModel: ObservableObject { audioData = try await speechClient.speechToSpeech( inputAudioURL: audioURL, voiceID: selectedVoice?.id.rawValue ?? "", - voiceSettings: ElevenLabs.VoiceSettings(settings: configuration.voiceSettings), - model: .init(rawValue: configuration.speechToSpeechModel)! + voiceSettings: configuration.voiceSettings, + model: configuration.speechToSpeechModel ) case is String.Type: @@ -111,8 +111,8 @@ final class GenerationViewModel: ObservableObject { audioData = try await speechClient.speech( for: text, voiceID: selectedVoice?.id.rawValue ?? "", - voiceSettings: ElevenLabs.VoiceSettings(settings: configuration.voiceSettings), - model: .init(rawValue: configuration.textToSpeechModel)! + voiceSettings: configuration.voiceSettings, + model: configuration.textToSpeechModel ) default: From f96358549c274058cc10df24a51143e0c173ce2c Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Tue, 14 Jan 2025 16:34:44 -0700 Subject: [PATCH 15/20] Fixed warning and cleanup --- .../AudioInputModality.swift | 80 ------------------- .../MediaGenerationViewActor.swift | 4 +- .../AnyInputModality.swift} | 10 ++- .../Models/AudioInputModality.swift | 37 +++++++++ .../{ => Models}/FileInputModality.swift | 0 .../{ => Models}/TextInputModality.swift | 0 .../Views/AudioInputView.swift | 69 ++++++++++++++++ .../MediaGenerationView+Views.swift | 34 ++++---- .../{ => Views}/MediaGenerationView.swift | 30 +++---- 9 files changed, 146 insertions(+), 118 deletions(-) delete mode 100644 Sources/Sideproject/Intramodular/Media Generation (WIP)/AudioInputModality.swift rename Sources/Sideproject/Intramodular/Media Generation (WIP)/{InputModalityProtocol.swift => Models/AnyInputModality.swift} (86%) create mode 100644 Sources/Sideproject/Intramodular/Media Generation (WIP)/Models/AudioInputModality.swift rename Sources/Sideproject/Intramodular/Media Generation (WIP)/{ => Models}/FileInputModality.swift (100%) rename Sources/Sideproject/Intramodular/Media Generation (WIP)/{ => Models}/TextInputModality.swift (100%) create mode 100644 Sources/Sideproject/Intramodular/Media Generation (WIP)/Views/AudioInputView.swift rename Sources/Sideproject/Intramodular/Media Generation (WIP)/{ => Views}/MediaGenerationView+Views.swift (75%) rename Sources/Sideproject/Intramodular/Media Generation (WIP)/{ => Views}/MediaGenerationView.swift (81%) diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/AudioInputModality.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/AudioInputModality.swift deleted file mode 100644 index fa9b7f1..0000000 --- a/Sources/Sideproject/Intramodular/Media Generation (WIP)/AudioInputModality.swift +++ /dev/null @@ -1,80 +0,0 @@ -// -// AudioInputModality.swift -// Sideproject -// -// Created by Jared Davidson on 1/14/25. -// - -import Media -import SwiftUI - -public struct AudioInputModality: InputModalityConfiguration { - public typealias InputType = AudioFile - - public var description: String - public var enableTranscription: Bool - - public init( - description: String, - enableTranscription: Bool = false - ) { - self.description = description - self.enableTranscription = enableTranscription - } - - public func makeInputView(inputBinding: Binding, placeholderText: String) -> AnyView { - AnyView( - CombinedAudioInputView( - audioFile: inputBinding, - enableTranscription: enableTranscription - ) - ) - } - - public func validate(_ input: AudioFile?) -> Bool { - input != nil - } -} - -private struct CombinedAudioInputView: View { - @Binding var audioFile: AudioFile? - let enableTranscription: Bool - - @State private var showingRecorder = false - - var body: some View { - VStack(spacing: 16) { - if let audioFile = audioFile { - HStack { - Button { - self.audioFile = nil - } label: { - Image(systemName: .arrowCounterclockwiseCircle) - } - } - - MediaFileView(file: audioFile) - } else { - FileDropView { files in - audioFile = files.first?.cast(to: AudioFile.self) - } content: { files in - EmptyView() - } - - Text("or") - .foregroundStyle(.secondary) - - AudioRecorderView(configuration: AudioRecorderViewConfiguration( - enableSpeechRecognition: enableTranscription - )) { recordedAudio in - audioFile = recordedAudio - showingRecorder = false - } content: { media in - if let media { - AudioFileView(file: media) - } - } - } - } - } -} diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift index 0618cb5..3084b0b 100644 --- a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift @@ -11,7 +11,7 @@ import Media import SwiftUI import SwiftUIX -final class GenerationViewModel: ObservableObject { +final class MediaGenerationViewActor: ObservableObject { @Published var availableVoices: [ElevenLabs.Voice] = [] @Published var availableModels: [VideoModel] = [] @Published var currentInput: Any? @@ -198,7 +198,7 @@ final class GenerationViewModel: ObservableObject { } } - + @MainActor private func saveImageTemporarily(_ image: AppKitOrUIKitImage) async throws -> URL { let temporaryURL = FileManager.default.temporaryDirectory .appendingPathComponent(UUID().uuidString) diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/InputModalityProtocol.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/Models/AnyInputModality.swift similarity index 86% rename from Sources/Sideproject/Intramodular/Media Generation (WIP)/InputModalityProtocol.swift rename to Sources/Sideproject/Intramodular/Media Generation (WIP)/Models/AnyInputModality.swift index e47f4c8..c013408 100644 --- a/Sources/Sideproject/Intramodular/Media Generation (WIP)/InputModalityProtocol.swift +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/Models/AnyInputModality.swift @@ -10,21 +10,23 @@ import AVFoundation import Media public enum InputModality { + public static func audio(variants: AudioVariant = .all) -> AnyInputModality { + AnyInputModality(AudioInputModality(description: "Audio", variants: variants)) + } + public static let text = AnyInputModality(TextInputModality()) - public static let audio = AnyInputModality(AudioInputModality(description: "Audio", enableTranscription: false)) - public static let audioWithTranscription = AnyInputModality(AudioInputModality(description: "Audio", enableTranscription: true)) public static let image = AnyInputModality(FileInputModality(description: "Image")) public static let video = AnyInputModality(FileInputModality(description: "Video")) } extension AnyInputModality { public static var text: Self { InputModality.text } - public static var audioWithTranscription: Self { InputModality.audioWithTranscription } - public static var audio: Self { InputModality.audio } + public static func audio(variants: AudioVariant = .all) -> Self { InputModality.audio(variants: variants) } public static var image: Self { InputModality.image } public static var video: Self { InputModality.video } } + extension MediaGenerationView { public func inputModality(_ modality: AnyInputModality) -> Self { MediaGenerationView( diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/Models/AudioInputModality.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/Models/AudioInputModality.swift new file mode 100644 index 0000000..86b1df3 --- /dev/null +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/Models/AudioInputModality.swift @@ -0,0 +1,37 @@ +// +// AudioInputModality.swift +// Sideproject +// +// Created by Jared Davidson on 1/14/25. +// + +import Media +import SwiftUI + +public struct AudioInputModality: InputModalityConfiguration { + public typealias InputType = AudioFile + + public var description: String + public var variants: AudioVariant + + public init( + description: String, + variants: AudioVariant = .all + ) { + self.description = description + self.variants = variants + } + + public func makeInputView(inputBinding: Binding, placeholderText: String) -> AnyView { + AnyView( + AudioInputView( + audioFile: inputBinding, + variants: variants + ) + ) + } + + public func validate(_ input: AudioFile?) -> Bool { + input != nil + } +} diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/FileInputModality.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/Models/FileInputModality.swift similarity index 100% rename from Sources/Sideproject/Intramodular/Media Generation (WIP)/FileInputModality.swift rename to Sources/Sideproject/Intramodular/Media Generation (WIP)/Models/FileInputModality.swift diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/TextInputModality.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/Models/TextInputModality.swift similarity index 100% rename from Sources/Sideproject/Intramodular/Media Generation (WIP)/TextInputModality.swift rename to Sources/Sideproject/Intramodular/Media Generation (WIP)/Models/TextInputModality.swift diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/Views/AudioInputView.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/Views/AudioInputView.swift new file mode 100644 index 0000000..1f26ff8 --- /dev/null +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/Views/AudioInputView.swift @@ -0,0 +1,69 @@ +// +// AudioInputView.swift +// Sideproject +// +// Created by Jared Davidson on 1/14/25. +// + +import SwiftUI +import Media + +public struct AudioVariant: OptionSet { + public let rawValue: Int + + public init(rawValue: Int) { + self.rawValue = rawValue + } + + public static let fileDrop = AudioVariant(rawValue: 1 << 0) + public static let recorder = AudioVariant(rawValue: 1 << 1) + public static let recorderWithTranscription = AudioVariant(rawValue: 1 << 2) + + public static let all: AudioVariant = [.fileDrop, .recorder, .recorderWithTranscription] +} + +public struct AudioInputView: View { + @Binding var audioFile: AudioFile? + let variants: AudioVariant + + public var body: some View { + VStack(spacing: 16) { + if let audioFile = audioFile { + HStack { + Button { + self.audioFile = nil + } label: { + Image(systemName: .arrowCounterclockwiseCircle) + } + } + + MediaFileView(file: audioFile) + } else { + if variants.contains(.fileDrop) { + FileDropView { files in + audioFile = files.first?.cast(to: AudioFile.self) + } content: { files in + EmptyView() + } + + if variants.contains(.recorder) { + Text("or") + .foregroundStyle(.secondary) + } + } + + if variants.contains(.recorder) || variants.contains(.recorderWithTranscription) { + AudioRecorderView(configuration: AudioRecorderViewConfiguration( + enableSpeechRecognition: variants.contains(.recorderWithTranscription) + )) { recordedAudio in + audioFile = recordedAudio + } content: { media in + if let media { + AudioFileView(file: media) + } + } + } + } + } + } +} diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView+Views.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/Views/MediaGenerationView+Views.swift similarity index 75% rename from Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView+Views.swift rename to Sources/Sideproject/Intramodular/Media Generation (WIP)/Views/MediaGenerationView+Views.swift index ff1c810..511e776 100644 --- a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView+Views.swift +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/Views/MediaGenerationView+Views.swift @@ -13,7 +13,7 @@ import Media extension MediaGenerationView { var inputView: some View { inputModality.makeInputView( - binding: $viewModel.currentInput, + binding: $viewActor.currentInput, placeholderText: getPlaceholderText() ) } @@ -35,16 +35,16 @@ extension MediaGenerationView { var clientSelectionView: some View { VStack(alignment: .leading, spacing: 20) { if mediaType == .speech { - Picker("Select Speech Client", selection: $viewModel.speechClient) { - ForEach(viewModel.availableSpeechClients, id: \.self) { client in + Picker("Select Speech Client", selection: $viewActor.speechClient) { + ForEach(viewActor.availableSpeechClients, id: \.self) { client in Text(client.displayName) .tag(client as AnySpeechSynthesisRequestHandling?) } } .pickerStyle(MenuPickerStyle()) } else if mediaType == .video { - Picker("Select Video Client", selection: $viewModel.videoClient) { - ForEach(viewModel.availableVideoClients, id: \.self) { client in + Picker("Select Video Client", selection: $viewActor.videoClient) { + ForEach(viewActor.availableVideoClients, id: \.self) { client in Text("Video Client") .tag(client as AnyVideoGenerationRequestHandling?) } @@ -58,10 +58,10 @@ extension MediaGenerationView { VStack(alignment: .leading, spacing: 8) { switch mediaType { case .speech: - if !viewModel.availableVoices.isEmpty { - Picker("Voice", selection: $viewModel.selectedVoice) { + if !viewActor.availableVoices.isEmpty { + Picker("Voice", selection: $viewActor.selectedVoice) { Text("Select a voice").tag(Optional.none) - ForEach(viewModel.availableVoices) { voice in + ForEach(viewActor.availableVoices) { voice in Text(voice.name) .tag(Optional(voice.id)) } @@ -69,10 +69,10 @@ extension MediaGenerationView { } case .video: - if !viewModel.availableModels.isEmpty { - Picker("Model", selection: $viewModel.selectedVideoModel) { + if !viewActor.availableModels.isEmpty { + Picker("Model", selection: $viewActor.selectedVideoModel) { Text("Select a model").tag(Optional.none) - ForEach(viewModel.availableModels) { model in + ForEach(viewActor.availableModels) { model in Text(model.name) .tag(Optional(model.id)) } @@ -86,7 +86,7 @@ extension MediaGenerationView { Group { if case .video = mediaType, inputModality.inputType == URL.self { inputModality.makeInputView( - binding: $viewModel.currentInput, + binding: $viewActor.currentInput, placeholderText: "Describe how you want to transform the video..." ) } else { @@ -99,13 +99,13 @@ extension MediaGenerationView { VStack(spacing: 12) { Button { Task { - await viewModel.generate( - viewModel.speechClient?.base, - viewModel.videoClient?.base + await viewActor.generate( + viewActor.speechClient?.base, + viewActor.videoClient?.base ) } } label: { - if viewModel.isLoading { + if viewActor.isLoading { ProgressView() .progressViewStyle(.circular) } else { @@ -113,7 +113,7 @@ extension MediaGenerationView { } } .buttonStyle(.borderedProminent) - .disabled(viewModel.isLoading || !isGenerateEnabled) + .disabled(viewActor.isLoading || !isGenerateEnabled) } } } diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift b/Sources/Sideproject/Intramodular/Media Generation (WIP)/Views/MediaGenerationView.swift similarity index 81% rename from Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift rename to Sources/Sideproject/Intramodular/Media Generation (WIP)/Views/MediaGenerationView.swift index 2743a51..1e6e752 100644 --- a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationView.swift +++ b/Sources/Sideproject/Intramodular/Media Generation (WIP)/Views/MediaGenerationView.swift @@ -45,7 +45,7 @@ public struct MediaGenerationView: View { internal var configuration: Configuration internal let onComplete: ((AnyMediaFile) -> Void)? - @StateObject internal var viewModel: GenerationViewModel + @StateObject internal var viewActor: MediaGenerationViewActor public init( mediaType: MediaType, @@ -72,22 +72,22 @@ public struct MediaGenerationView: View { self.configuration = configuration self.onComplete = onComplete - let viewModel = GenerationViewModel( + let viewActor = MediaGenerationViewActor( mediaType: mediaType, inputModality: inputModality, configuration: configuration, onComplete: onComplete ) - _viewModel = StateObject(wrappedValue: viewModel) + _viewActor = StateObject(wrappedValue: viewActor) } public var body: some View { VStack(alignment: .leading, spacing: 20) { - if let mediaFile = viewModel.generatedFile { + if let mediaFile = viewActor.generatedFile { MediaFileView(file: mediaFile.file) } - inputModality.makeInputView(binding: $viewModel.currentInput) + inputModality.makeInputView(binding: $viewActor.currentInput) clientSelectionView modelSelectionView @@ -100,9 +100,9 @@ public struct MediaGenerationView: View { .padding() .task { await loadClients() - try? await viewModel.loadResources( - viewModel.speechClient?.base, - viewModel.videoClient?.base + try? await viewActor.loadResources( + viewActor.speechClient?.base, + viewActor.videoClient?.base ) } } @@ -111,33 +111,33 @@ public struct MediaGenerationView: View { do { let services = try await Sideproject.shared.services - self.viewModel.availableSpeechClients = services.compactMap { service in + self.viewActor.availableSpeechClients = services.compactMap { service in if let service = service as? (any CoreMI._ServiceClientProtocol & SpeechSynthesisRequestHandling) { return AnySpeechSynthesisRequestHandling(service) } return nil } - self.viewModel.availableVideoClients = services.compactMap { service in + self.viewActor.availableVideoClients = services.compactMap { service in if let service = service as? (any CoreMI._ServiceClientProtocol & VideoGenerationRequestHandling) { return AnyVideoGenerationRequestHandling(service) } return nil } - self.viewModel.speechClient = self.viewModel.availableSpeechClients.first - self.viewModel.videoClient = self.viewModel.availableVideoClients.first + self.viewActor.speechClient = self.viewActor.availableSpeechClients.first + self.viewActor.videoClient = self.viewActor.availableVideoClients.first } catch { print("Error loading clients: \(error)") } } internal var isGenerateEnabled: Bool { - let isInputValid = inputModality.validate(viewModel.currentInput) + let isInputValid = inputModality.validate(viewActor.currentInput) let isModelSelected = switch mediaType { - case .speech: viewModel.selectedVoice != nil - case .video: viewModel.selectedVideoModel != nil + case .speech: viewActor.selectedVoice != nil + case .video: viewActor.selectedVideoModel != nil } return isInputValid && isModelSelected From 97d11cc3718b229f08ada6a08d66a0ec0ce03732 Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Tue, 14 Jan 2025 17:05:37 -0700 Subject: [PATCH 16/20] moved files --- .../Media Generation (WIP)/MediaGenerationViewActor.swift | 0 .../Media Generation (WIP)/Models/AnyInputModality.swift | 0 .../Media Generation (WIP)/Models/AudioInputModality.swift | 0 .../Media Generation (WIP)/Models/FileInputModality.swift | 0 .../Media Generation (WIP)/Models/TextInputModality.swift | 0 .../Media Generation (WIP)/Views/AudioInputView.swift | 0 .../Media Generation (WIP)/Views/MediaGenerationView+Views.swift | 0 .../Media Generation (WIP)/Views/MediaGenerationView.swift | 0 8 files changed, 0 insertions(+), 0 deletions(-) rename Sources/Sideproject/{Intramodular => WIP}/Media Generation (WIP)/MediaGenerationViewActor.swift (100%) rename Sources/Sideproject/{Intramodular => WIP}/Media Generation (WIP)/Models/AnyInputModality.swift (100%) rename Sources/Sideproject/{Intramodular => WIP}/Media Generation (WIP)/Models/AudioInputModality.swift (100%) rename Sources/Sideproject/{Intramodular => WIP}/Media Generation (WIP)/Models/FileInputModality.swift (100%) rename Sources/Sideproject/{Intramodular => WIP}/Media Generation (WIP)/Models/TextInputModality.swift (100%) rename Sources/Sideproject/{Intramodular => WIP}/Media Generation (WIP)/Views/AudioInputView.swift (100%) rename Sources/Sideproject/{Intramodular => WIP}/Media Generation (WIP)/Views/MediaGenerationView+Views.swift (100%) rename Sources/Sideproject/{Intramodular => WIP}/Media Generation (WIP)/Views/MediaGenerationView.swift (100%) diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift b/Sources/Sideproject/WIP/Media Generation (WIP)/MediaGenerationViewActor.swift similarity index 100% rename from Sources/Sideproject/Intramodular/Media Generation (WIP)/MediaGenerationViewActor.swift rename to Sources/Sideproject/WIP/Media Generation (WIP)/MediaGenerationViewActor.swift diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/Models/AnyInputModality.swift b/Sources/Sideproject/WIP/Media Generation (WIP)/Models/AnyInputModality.swift similarity index 100% rename from Sources/Sideproject/Intramodular/Media Generation (WIP)/Models/AnyInputModality.swift rename to Sources/Sideproject/WIP/Media Generation (WIP)/Models/AnyInputModality.swift diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/Models/AudioInputModality.swift b/Sources/Sideproject/WIP/Media Generation (WIP)/Models/AudioInputModality.swift similarity index 100% rename from Sources/Sideproject/Intramodular/Media Generation (WIP)/Models/AudioInputModality.swift rename to Sources/Sideproject/WIP/Media Generation (WIP)/Models/AudioInputModality.swift diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/Models/FileInputModality.swift b/Sources/Sideproject/WIP/Media Generation (WIP)/Models/FileInputModality.swift similarity index 100% rename from Sources/Sideproject/Intramodular/Media Generation (WIP)/Models/FileInputModality.swift rename to Sources/Sideproject/WIP/Media Generation (WIP)/Models/FileInputModality.swift diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/Models/TextInputModality.swift b/Sources/Sideproject/WIP/Media Generation (WIP)/Models/TextInputModality.swift similarity index 100% rename from Sources/Sideproject/Intramodular/Media Generation (WIP)/Models/TextInputModality.swift rename to Sources/Sideproject/WIP/Media Generation (WIP)/Models/TextInputModality.swift diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/Views/AudioInputView.swift b/Sources/Sideproject/WIP/Media Generation (WIP)/Views/AudioInputView.swift similarity index 100% rename from Sources/Sideproject/Intramodular/Media Generation (WIP)/Views/AudioInputView.swift rename to Sources/Sideproject/WIP/Media Generation (WIP)/Views/AudioInputView.swift diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/Views/MediaGenerationView+Views.swift b/Sources/Sideproject/WIP/Media Generation (WIP)/Views/MediaGenerationView+Views.swift similarity index 100% rename from Sources/Sideproject/Intramodular/Media Generation (WIP)/Views/MediaGenerationView+Views.swift rename to Sources/Sideproject/WIP/Media Generation (WIP)/Views/MediaGenerationView+Views.swift diff --git a/Sources/Sideproject/Intramodular/Media Generation (WIP)/Views/MediaGenerationView.swift b/Sources/Sideproject/WIP/Media Generation (WIP)/Views/MediaGenerationView.swift similarity index 100% rename from Sources/Sideproject/Intramodular/Media Generation (WIP)/Views/MediaGenerationView.swift rename to Sources/Sideproject/WIP/Media Generation (WIP)/Views/MediaGenerationView.swift From 771360f878b3b9391d9d3bfe2c2881e63daf6e57 Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Tue, 14 Jan 2025 17:49:44 -0700 Subject: [PATCH 17/20] Fixed selection --- .../MediaGenerationViewActor.swift | 6 +++--- .../Views/MediaGenerationView+Views.swift | 9 +++++---- .../Views/MediaGenerationView.swift | 10 ++++++++++ 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/Sources/Sideproject/WIP/Media Generation (WIP)/MediaGenerationViewActor.swift b/Sources/Sideproject/WIP/Media Generation (WIP)/MediaGenerationViewActor.swift index 3084b0b..304c3ef 100644 --- a/Sources/Sideproject/WIP/Media Generation (WIP)/MediaGenerationViewActor.swift +++ b/Sources/Sideproject/WIP/Media Generation (WIP)/MediaGenerationViewActor.swift @@ -12,11 +12,11 @@ import SwiftUI import SwiftUIX final class MediaGenerationViewActor: ObservableObject { - @Published var availableVoices: [ElevenLabs.Voice] = [] + @Published var availableVoices: [AbstractVoice] = [] @Published var availableModels: [VideoModel] = [] @Published var currentInput: Any? @Published var isLoading = false - @Published var selectedVoice: ElevenLabs.Voice.ID? + @Published var selectedVoice: AbstractVoice.ID? @Published var generatedFile: AnyMediaFile? @Published var selectedVideoModel: VideoModel.ID? @Published var speechClient: AnySpeechSynthesisRequestHandling? @@ -48,7 +48,7 @@ final class MediaGenerationViewActor: ObservableObject { ) async throws { switch mediaType { case .speech: - availableVoices = try await (speechClient?.availableVoices() ?? []).map({try ElevenLabs.Voice(voice: $0)}) + availableVoices = try await speechClient?.availableVoices() ?? [] configuration.voiceSettings = .init() case .video: diff --git a/Sources/Sideproject/WIP/Media Generation (WIP)/Views/MediaGenerationView+Views.swift b/Sources/Sideproject/WIP/Media Generation (WIP)/Views/MediaGenerationView+Views.swift index 511e776..b4aa89f 100644 --- a/Sources/Sideproject/WIP/Media Generation (WIP)/Views/MediaGenerationView+Views.swift +++ b/Sources/Sideproject/WIP/Media Generation (WIP)/Views/MediaGenerationView+Views.swift @@ -60,10 +60,11 @@ extension MediaGenerationView { case .speech: if !viewActor.availableVoices.isEmpty { Picker("Voice", selection: $viewActor.selectedVoice) { - Text("Select a voice").tag(Optional.none) - ForEach(viewActor.availableVoices) { voice in + Text("Select a voice") + .tag(AbstractVoice.ID?.none) + ForEach(viewActor.availableVoices, id: \.id) { voice in Text(voice.name) - .tag(Optional(voice.id)) + .tag(Optional(AbstractVoice.ID(rawValue: voice.voiceID))) } } } @@ -72,7 +73,7 @@ extension MediaGenerationView { if !viewActor.availableModels.isEmpty { Picker("Model", selection: $viewActor.selectedVideoModel) { Text("Select a model").tag(Optional.none) - ForEach(viewActor.availableModels) { model in + ForEach(viewActor.availableModels, id: \.id) { model in Text(model.name) .tag(Optional(model.id)) } diff --git a/Sources/Sideproject/WIP/Media Generation (WIP)/Views/MediaGenerationView.swift b/Sources/Sideproject/WIP/Media Generation (WIP)/Views/MediaGenerationView.swift index 1e6e752..0c57861 100644 --- a/Sources/Sideproject/WIP/Media Generation (WIP)/Views/MediaGenerationView.swift +++ b/Sources/Sideproject/WIP/Media Generation (WIP)/Views/MediaGenerationView.swift @@ -105,6 +105,16 @@ public struct MediaGenerationView: View { viewActor.videoClient?.base ) } + .onChange(of: viewActor.speechClient) { + oldValue, + newValue in + Task { + try? await viewActor.loadResources( + viewActor.speechClient?.base, + viewActor.videoClient?.base + ) + } + } } private func loadClients() async { From 56596c7e5d24677f88908667a1c3f464757398d8 Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Fri, 17 Jan 2025 09:27:26 -0700 Subject: [PATCH 18/20] Fixed build issue --- .../WIP/Media Generation (WIP)/Views/MediaGenerationView.swift | 1 - 1 file changed, 1 deletion(-) diff --git a/Sources/Sideproject/WIP/Media Generation (WIP)/Views/MediaGenerationView.swift b/Sources/Sideproject/WIP/Media Generation (WIP)/Views/MediaGenerationView.swift index 0c57861..3e454e7 100644 --- a/Sources/Sideproject/WIP/Media Generation (WIP)/Views/MediaGenerationView.swift +++ b/Sources/Sideproject/WIP/Media Generation (WIP)/Views/MediaGenerationView.swift @@ -7,7 +7,6 @@ import SwiftUI import ElevenLabs -import SwallowUI import Media import AVFoundation import SideprojectCore From 3ca2c6f6014772ba90894ba3393b20c5c1d1aa2b Mon Sep 17 00:00:00 2001 From: Vatsal Manot Date: Tue, 21 Jan 2025 19:14:53 -0800 Subject: [PATCH 19/20] Update package --- Package.swift | 7 +++---- .../Accounts/Sideproject.ExternalAccountStore.swift | 9 ++++++++- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/Package.swift b/Package.swift index 73bc983..612e09d 100644 --- a/Package.swift +++ b/Package.swift @@ -29,10 +29,10 @@ let package = Package( .package(url: "https://github.com/SwiftUIX/SwiftUIX.git", branch: "master"), .package(url: "https://github.com/SwiftUIX/SwiftUIZ.git", branch: "main"), .package(url: "https://github.com/vmanot/CorePersistence.git", branch: "main"), + .package(url: "https://github.com/vmanot/Media.git", branch: "main"), .package(url: "https://github.com/vmanot/Merge.git", branch: "master"), .package(url: "https://github.com/vmanot/NetworkKit.git", branch: "master"), .package(url: "https://github.com/vmanot/Swallow.git", branch: "master"), - .package(url: "https://github.com/vmanot/Media.git", branch: "main") ], targets: [ .macro( @@ -99,6 +99,7 @@ let package = Package( "Cataphyl", "ChatKit", "CorePersistence", + "Media", "Merge", "NetworkKit", "SideprojectCore", @@ -106,7 +107,6 @@ let package = Package( "Swallow", "SwiftUIX", "SwiftUIZ", - "Media" ], path: "Sources/Sideproject", resources: [], @@ -121,6 +121,5 @@ let package = Package( ], path: "Tests/Sideproject" ), - ]/*, - cxxLanguageStandard: CXXLanguageStandard.cxx11*/ + ] ) diff --git a/Sources/SideprojectCore/Intramodular/Accounts/Sideproject.ExternalAccountStore.swift b/Sources/SideprojectCore/Intramodular/Accounts/Sideproject.ExternalAccountStore.swift index 8fce2ce..66fb4b6 100644 --- a/Sources/SideprojectCore/Intramodular/Accounts/Sideproject.ExternalAccountStore.swift +++ b/Sources/SideprojectCore/Intramodular/Accounts/Sideproject.ExternalAccountStore.swift @@ -81,6 +81,13 @@ extension Sideproject.ExternalAccountStore { .compactMapValues({ $0.credential }) } + public func firstCredentialIfAvailable( + ofType type: Sideproject.ExternalAccountCredentialTypeName, + for accountType: any Sideproject.ExternalAccountTypeDescriptor + ) throws -> T? { + try credentials(for: accountType).firstAndOnly(byUnwrapping: { $0.value as? T }) + } + /// Returns all available credentials for a given account type, keyed by account IDs. /// /// For example `Sideproject.ExternalAccountStore.shared.credentials(ofType: .apiKey, for: .groq)` @@ -88,7 +95,7 @@ extension Sideproject.ExternalAccountStore { ofType type: Sideproject.ExternalAccountCredentialTypeName, for accountType: any Sideproject.ExternalAccountTypeDescriptor ) throws -> T { - try credentials(for: accountType).firstAndOnly(byUnwrapping: { $0.value as? T }).unwrap() + try firstCredentialIfAvailable(ofType: type, for: accountType).unwrap() } public func hasCredentials( From 71da5857d9910734f29316b2dda93ca11df4aa57 Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Sat, 12 Apr 2025 15:15:37 -0600 Subject: [PATCH 20/20] Media generation cleanup --- Package.resolved | 141 +++++++++++++++ .../xcshareddata/swiftpm/Package.resolved | 168 ++++++++++++++++++ .../Models/AudioVariant.swift | 22 +++ .../AnyInputModality.swift | 19 -- .../AudioInputModality.swift | 0 .../FileInputModality.swift | 0 .../TextInputModality.swift | 0 .../InputModalityConfiguration.swift | 15 ++ .../Views/AudioInputView.swift | 15 +- ...MediaGenerationView+AnyInputModality.swift | 19 ++ .../Views/MediaGenerationView.swift | 2 + .../WIP/Views/ModelSearchVIew.Tab.swift | 34 ++++ .../WIP/Views/ModelSearchView.swift | 27 --- 13 files changed, 402 insertions(+), 60 deletions(-) create mode 100644 Package.resolved create mode 100644 SideprojectExample/SideprojectExample.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved create mode 100644 Sources/Sideproject/WIP/Media Generation (WIP)/Models/AudioVariant.swift rename Sources/Sideproject/WIP/Media Generation (WIP)/Models/{ => Input Modalities}/AnyInputModality.swift (79%) rename Sources/Sideproject/WIP/Media Generation (WIP)/Models/{ => Input Modalities}/AudioInputModality.swift (100%) rename Sources/Sideproject/WIP/Media Generation (WIP)/Models/{ => Input Modalities}/FileInputModality.swift (100%) rename Sources/Sideproject/WIP/Media Generation (WIP)/Models/{ => Input Modalities}/TextInputModality.swift (100%) create mode 100644 Sources/Sideproject/WIP/Media Generation (WIP)/Protocols/InputModalityConfiguration.swift create mode 100644 Sources/Sideproject/WIP/Media Generation (WIP)/Views/MediaGenerationView+AnyInputModality.swift create mode 100644 Sources/Sideproject/WIP/Views/ModelSearchVIew.Tab.swift diff --git a/Package.resolved b/Package.resolved new file mode 100644 index 0000000..31b1ef6 --- /dev/null +++ b/Package.resolved @@ -0,0 +1,141 @@ +{ + "originHash" : "f04b8fe9e82bcfef54bede3355d7647310db02857aade54807d042599ab5e7e8", + "pins" : [ + { + "identity" : "ai", + "kind" : "remoteSourceControl", + "location" : "https://github.com/PreternaturalAI/AI.git", + "state" : { + "branch" : "main", + "revision" : "4021a8f960476cebf51f5daf31483fd9d6f0452a" + } + }, + { + "identity" : "cataphyl", + "kind" : "remoteSourceControl", + "location" : "https://github.com/PreternaturalAI/Cataphyl.git", + "state" : { + "branch" : "main", + "revision" : "e53561c546fe4a0c5b50a5f3785c2bf5e65ee783" + } + }, + { + "identity" : "chatkit", + "kind" : "remoteSourceControl", + "location" : "https://github.com/PreternaturalAI/ChatKit.git", + "state" : { + "branch" : "main", + "revision" : "055ad6219fc87355b25ecdcb5eb4cbb1c6f51177" + } + }, + { + "identity" : "corepersistence", + "kind" : "remoteSourceControl", + "location" : "https://github.com/vmanot/CorePersistence.git", + "state" : { + "branch" : "main", + "revision" : "3de61dda6b7153bde07aa7c1fbfd44c01c82e3a8" + } + }, + { + "identity" : "media", + "kind" : "remoteSourceControl", + "location" : "https://github.com/vmanot/Media.git", + "state" : { + "branch" : "main", + "revision" : "0ba2baaebeb58667955daef68d3535ba1b217a12" + } + }, + { + "identity" : "merge", + "kind" : "remoteSourceControl", + "location" : "https://github.com/vmanot/Merge.git", + "state" : { + "branch" : "master", + "revision" : "17e267f961c5ea9a3375c3a49807e66795a004e2" + } + }, + { + "identity" : "networkkit", + "kind" : "remoteSourceControl", + "location" : "https://github.com/vmanot/NetworkKit.git", + "state" : { + "branch" : "master", + "revision" : "470af0276c2aa6e61acff22acc14c7ca30b99cbc" + } + }, + { + "identity" : "swallow", + "kind" : "remoteSourceControl", + "location" : "https://github.com/vmanot/Swallow.git", + "state" : { + "branch" : "master", + "revision" : "553de76697a15c8ad0f3ff4b62ac90935b1e3a87" + } + }, + { + "identity" : "swift-atomics", + "kind" : "remoteSourceControl", + "location" : "https://github.com/apple/swift-atomics.git", + "state" : { + "revision" : "cd142fd2f64be2100422d658e7411e39489da985", + "version" : "1.2.0" + } + }, + { + "identity" : "swift-collections", + "kind" : "remoteSourceControl", + "location" : "https://github.com/apple/swift-collections", + "state" : { + "revision" : "671108c96644956dddcd89dd59c203dcdb36cec7", + "version" : "1.1.4" + } + }, + { + "identity" : "swift-syntax", + "kind" : "remoteSourceControl", + "location" : "https://github.com/swiftlang/swift-syntax.git", + "state" : { + "revision" : "0687f71944021d616d34d922343dcef086855920", + "version" : "600.0.1" + } + }, + { + "identity" : "swiftapi", + "kind" : "remoteSourceControl", + "location" : "https://github.com/vmanot/SwiftAPI.git", + "state" : { + "branch" : "master", + "revision" : "3e47cc5f9b0cefe9ed1d0971aff22583bd9ac7b0" + } + }, + { + "identity" : "swiftui-introspect", + "kind" : "remoteSourceControl", + "location" : "https://github.com/siteline/SwiftUI-Introspect.git", + "state" : { + "revision" : "807f73ce09a9b9723f12385e592b4e0aaebd3336", + "version" : "1.3.0" + } + }, + { + "identity" : "swiftuix", + "kind" : "remoteSourceControl", + "location" : "https://github.com/SwiftUIX/SwiftUIX.git", + "state" : { + "branch" : "master", + "revision" : "264cb593d0a7cbff0d95dcf715cdf5328ceb5e11" + } + }, + { + "identity" : "swiftuiz", + "kind" : "remoteSourceControl", + "location" : "https://github.com/SwiftUIX/SwiftUIZ.git", + "state" : { + "branch" : "main", + "revision" : "6f9cecdf80139854e21c43e25d26bc3a9c582969" + } + } + ], + "version" : 3 +} diff --git a/SideprojectExample/SideprojectExample.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/SideprojectExample/SideprojectExample.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved new file mode 100644 index 0000000..d9f31a3 --- /dev/null +++ b/SideprojectExample/SideprojectExample.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -0,0 +1,168 @@ +{ + "originHash" : "fd6b49f16164d68a6c496e7b16ae556a43a809ab0ab84de151c20af8cd2f9bfe", + "pins" : [ + { + "identity" : "ai", + "kind" : "remoteSourceControl", + "location" : "https://github.com/PreternaturalAI/AI.git", + "state" : { + "branch" : "main", + "revision" : "4021a8f960476cebf51f5daf31483fd9d6f0452a" + } + }, + { + "identity" : "cataphyl", + "kind" : "remoteSourceControl", + "location" : "https://github.com/PreternaturalAI/Cataphyl.git", + "state" : { + "branch" : "main", + "revision" : "e53561c546fe4a0c5b50a5f3785c2bf5e65ee783" + } + }, + { + "identity" : "chatkit", + "kind" : "remoteSourceControl", + "location" : "https://github.com/PreternaturalAI/ChatKit.git", + "state" : { + "branch" : "main", + "revision" : "055ad6219fc87355b25ecdcb5eb4cbb1c6f51177" + } + }, + { + "identity" : "corepersistence", + "kind" : "remoteSourceControl", + "location" : "https://github.com/vmanot/CorePersistence.git", + "state" : { + "branch" : "main", + "revision" : "3de61dda6b7153bde07aa7c1fbfd44c01c82e3a8" + } + }, + { + "identity" : "media", + "kind" : "remoteSourceControl", + "location" : "https://github.com/vmanot/Media.git", + "state" : { + "branch" : "main", + "revision" : "0ba2baaebeb58667955daef68d3535ba1b217a12" + } + }, + { + "identity" : "merge", + "kind" : "remoteSourceControl", + "location" : "https://github.com/vmanot/Merge.git", + "state" : { + "branch" : "master", + "revision" : "17e267f961c5ea9a3375c3a49807e66795a004e2" + } + }, + { + "identity" : "networkkit", + "kind" : "remoteSourceControl", + "location" : "https://github.com/vmanot/NetworkKit.git", + "state" : { + "branch" : "master", + "revision" : "470af0276c2aa6e61acff22acc14c7ca30b99cbc" + } + }, + { + "identity" : "pow", + "kind" : "remoteSourceControl", + "location" : "https://github.com/PreternaturalAI/Pow.git", + "state" : { + "branch" : "main", + "revision" : "dc5839ef7cbb6c8b34698cda691159b21c68176d" + } + }, + { + "identity" : "swallow", + "kind" : "remoteSourceControl", + "location" : "https://github.com/vmanot/Swallow.git", + "state" : { + "branch" : "master", + "revision" : "553de76697a15c8ad0f3ff4b62ac90935b1e3a87" + } + }, + { + "identity" : "swift-algorithms", + "kind" : "remoteSourceControl", + "location" : "https://github.com/apple/swift-algorithms", + "state" : { + "revision" : "87e50f483c54e6efd60e885f7f5aa946cee68023", + "version" : "1.2.1" + } + }, + { + "identity" : "swift-atomics", + "kind" : "remoteSourceControl", + "location" : "https://github.com/apple/swift-atomics.git", + "state" : { + "revision" : "cd142fd2f64be2100422d658e7411e39489da985", + "version" : "1.2.0" + } + }, + { + "identity" : "swift-collections", + "kind" : "remoteSourceControl", + "location" : "https://github.com/apple/swift-collections", + "state" : { + "revision" : "671108c96644956dddcd89dd59c203dcdb36cec7", + "version" : "1.1.4" + } + }, + { + "identity" : "swift-numerics", + "kind" : "remoteSourceControl", + "location" : "https://github.com/apple/swift-numerics.git", + "state" : { + "revision" : "e0ec0f5f3af6f3e4d5e7a19d2af26b481acb6ba8", + "version" : "1.0.3" + } + }, + { + "identity" : "swift-syntax", + "kind" : "remoteSourceControl", + "location" : "https://github.com/swiftlang/swift-syntax.git", + "state" : { + "revision" : "0687f71944021d616d34d922343dcef086855920", + "version" : "600.0.1" + } + }, + { + "identity" : "swiftapi", + "kind" : "remoteSourceControl", + "location" : "https://github.com/vmanot/SwiftAPI.git", + "state" : { + "branch" : "master", + "revision" : "3e47cc5f9b0cefe9ed1d0971aff22583bd9ac7b0" + } + }, + { + "identity" : "swiftui-introspect", + "kind" : "remoteSourceControl", + "location" : "https://github.com/siteline/SwiftUI-Introspect.git", + "state" : { + "revision" : "807f73ce09a9b9723f12385e592b4e0aaebd3336", + "version" : "1.3.0" + } + }, + { + "identity" : "swiftuix", + "kind" : "remoteSourceControl", + "location" : "https://github.com/SwiftUIX/SwiftUIX.git", + "state" : { + "branch" : "master", + "revision" : "264cb593d0a7cbff0d95dcf715cdf5328ceb5e11" + } + }, + { + "identity" : "swiftuiz", + "kind" : "remoteSourceControl", + "location" : "https://github.com/SwiftUIX/SwiftUIZ.git", + "state" : { + "branch" : "main", + "revision" : "6f9cecdf80139854e21c43e25d26bc3a9c582969" + } + } + ], + "version" : 3 +} diff --git a/Sources/Sideproject/WIP/Media Generation (WIP)/Models/AudioVariant.swift b/Sources/Sideproject/WIP/Media Generation (WIP)/Models/AudioVariant.swift new file mode 100644 index 0000000..d066135 --- /dev/null +++ b/Sources/Sideproject/WIP/Media Generation (WIP)/Models/AudioVariant.swift @@ -0,0 +1,22 @@ +// +// AudioVariant.swift +// Sideproject +// +// Created by Jared Davidson on 4/12/25. +// + +import SwiftUI + +public struct AudioVariant: OptionSet { + public let rawValue: Int + + public init(rawValue: Int) { + self.rawValue = rawValue + } + + public static let fileDrop = AudioVariant(rawValue: 1 << 0) + public static let recorder = AudioVariant(rawValue: 1 << 1) + public static let recorderWithTranscription = AudioVariant(rawValue: 1 << 2) + + public static let all: AudioVariant = [.fileDrop, .recorder, .recorderWithTranscription] +} diff --git a/Sources/Sideproject/WIP/Media Generation (WIP)/Models/AnyInputModality.swift b/Sources/Sideproject/WIP/Media Generation (WIP)/Models/Input Modalities/AnyInputModality.swift similarity index 79% rename from Sources/Sideproject/WIP/Media Generation (WIP)/Models/AnyInputModality.swift rename to Sources/Sideproject/WIP/Media Generation (WIP)/Models/Input Modalities/AnyInputModality.swift index c013408..08a41ce 100644 --- a/Sources/Sideproject/WIP/Media Generation (WIP)/Models/AnyInputModality.swift +++ b/Sources/Sideproject/WIP/Media Generation (WIP)/Models/Input Modalities/AnyInputModality.swift @@ -26,18 +26,6 @@ extension AnyInputModality { public static var video: Self { InputModality.video } } - -extension MediaGenerationView { - public func inputModality(_ modality: AnyInputModality) -> Self { - MediaGenerationView( - mediaType: self.mediaType, - inputModality: modality, - configuration: self.configuration, - onComplete: self.onComplete - ) - } -} - public struct AnyInputModality { private let _description: String private let _makeInputView: (Binding, String) -> AnyView @@ -71,10 +59,3 @@ public struct AnyInputModality { _validate(input) } } - -public protocol InputModalityConfiguration { - associatedtype InputType - var description: String { get } - func makeInputView(inputBinding: Binding, placeholderText: String) -> AnyView - func validate(_ input: InputType?) -> Bool -} diff --git a/Sources/Sideproject/WIP/Media Generation (WIP)/Models/AudioInputModality.swift b/Sources/Sideproject/WIP/Media Generation (WIP)/Models/Input Modalities/AudioInputModality.swift similarity index 100% rename from Sources/Sideproject/WIP/Media Generation (WIP)/Models/AudioInputModality.swift rename to Sources/Sideproject/WIP/Media Generation (WIP)/Models/Input Modalities/AudioInputModality.swift diff --git a/Sources/Sideproject/WIP/Media Generation (WIP)/Models/FileInputModality.swift b/Sources/Sideproject/WIP/Media Generation (WIP)/Models/Input Modalities/FileInputModality.swift similarity index 100% rename from Sources/Sideproject/WIP/Media Generation (WIP)/Models/FileInputModality.swift rename to Sources/Sideproject/WIP/Media Generation (WIP)/Models/Input Modalities/FileInputModality.swift diff --git a/Sources/Sideproject/WIP/Media Generation (WIP)/Models/TextInputModality.swift b/Sources/Sideproject/WIP/Media Generation (WIP)/Models/Input Modalities/TextInputModality.swift similarity index 100% rename from Sources/Sideproject/WIP/Media Generation (WIP)/Models/TextInputModality.swift rename to Sources/Sideproject/WIP/Media Generation (WIP)/Models/Input Modalities/TextInputModality.swift diff --git a/Sources/Sideproject/WIP/Media Generation (WIP)/Protocols/InputModalityConfiguration.swift b/Sources/Sideproject/WIP/Media Generation (WIP)/Protocols/InputModalityConfiguration.swift new file mode 100644 index 0000000..a6eb056 --- /dev/null +++ b/Sources/Sideproject/WIP/Media Generation (WIP)/Protocols/InputModalityConfiguration.swift @@ -0,0 +1,15 @@ +// +// InputModalityConfiguration.swift +// Sideproject +// +// Created by Jared Davidson on 4/12/25. +// + +import SwiftUI + +public protocol InputModalityConfiguration { + associatedtype InputType + var description: String { get } + func makeInputView(inputBinding: Binding, placeholderText: String) -> AnyView + func validate(_ input: InputType?) -> Bool +} diff --git a/Sources/Sideproject/WIP/Media Generation (WIP)/Views/AudioInputView.swift b/Sources/Sideproject/WIP/Media Generation (WIP)/Views/AudioInputView.swift index 1f26ff8..7b31a4a 100644 --- a/Sources/Sideproject/WIP/Media Generation (WIP)/Views/AudioInputView.swift +++ b/Sources/Sideproject/WIP/Media Generation (WIP)/Views/AudioInputView.swift @@ -7,20 +7,7 @@ import SwiftUI import Media - -public struct AudioVariant: OptionSet { - public let rawValue: Int - - public init(rawValue: Int) { - self.rawValue = rawValue - } - - public static let fileDrop = AudioVariant(rawValue: 1 << 0) - public static let recorder = AudioVariant(rawValue: 1 << 1) - public static let recorderWithTranscription = AudioVariant(rawValue: 1 << 2) - - public static let all: AudioVariant = [.fileDrop, .recorder, .recorderWithTranscription] -} +import LargeLanguageModels public struct AudioInputView: View { @Binding var audioFile: AudioFile? diff --git a/Sources/Sideproject/WIP/Media Generation (WIP)/Views/MediaGenerationView+AnyInputModality.swift b/Sources/Sideproject/WIP/Media Generation (WIP)/Views/MediaGenerationView+AnyInputModality.swift new file mode 100644 index 0000000..91f2bed --- /dev/null +++ b/Sources/Sideproject/WIP/Media Generation (WIP)/Views/MediaGenerationView+AnyInputModality.swift @@ -0,0 +1,19 @@ +// +// MediaGenerationView+AnyInputModality.swift +// Sideproject +// +// Created by Jared Davidson on 4/12/25. +// + +import SwiftUI + +extension MediaGenerationView { + public func inputModality(_ modality: AnyInputModality) -> Self { + MediaGenerationView( + mediaType: self.mediaType, + inputModality: modality, + configuration: self.configuration, + onComplete: self.onComplete + ) + } +} diff --git a/Sources/Sideproject/WIP/Media Generation (WIP)/Views/MediaGenerationView.swift b/Sources/Sideproject/WIP/Media Generation (WIP)/Views/MediaGenerationView.swift index 3e454e7..6106828 100644 --- a/Sources/Sideproject/WIP/Media Generation (WIP)/Views/MediaGenerationView.swift +++ b/Sources/Sideproject/WIP/Media Generation (WIP)/Views/MediaGenerationView.swift @@ -11,6 +11,7 @@ import Media import AVFoundation import SideprojectCore import AI +import LargeLanguageModels import Runtime public enum MediaType { @@ -18,6 +19,7 @@ public enum MediaType { case video } +/// A simple input view to generate media of any format. public struct MediaGenerationView: View { public struct Configuration: Equatable { public var textToSpeechModel: String diff --git a/Sources/Sideproject/WIP/Views/ModelSearchVIew.Tab.swift b/Sources/Sideproject/WIP/Views/ModelSearchVIew.Tab.swift new file mode 100644 index 0000000..d1c0bf6 --- /dev/null +++ b/Sources/Sideproject/WIP/Views/ModelSearchVIew.Tab.swift @@ -0,0 +1,34 @@ +// +// ModelSearchVIew.Tab.swift +// Sideproject +// +// Created by Jared Davidson on 4/12/25. +// + +import SwiftUI + +extension ModelSearchView { + public enum Tab { + case discover + case downloaded + + var keyPath: KeyPath { + switch self { + case .discover: return \ModelStore.models + case .downloaded: return \ModelStore.downloadedModels + } + } + } +} + + +// MARK: - Conformances + +extension ModelSearchView.Tab: CustomStringConvertible, CaseIterable { + public var description: String { + switch self { + case .discover: "Discover" + case .downloaded: "Downloaded" + } + } +} diff --git a/Sources/Sideproject/WIP/Views/ModelSearchView.swift b/Sources/Sideproject/WIP/Views/ModelSearchView.swift index f80b5a5..25f57d1 100644 --- a/Sources/Sideproject/WIP/Views/ModelSearchView.swift +++ b/Sources/Sideproject/WIP/Views/ModelSearchView.swift @@ -26,30 +26,3 @@ public struct ModelSearchView: View { .environmentObject(Sideproject.ExternalAccountStore.shared) } } - - -extension ModelSearchView { - public enum Tab { - case discover - case downloaded - - var keyPath: KeyPath { - switch self { - case .discover: return \ModelStore.models - case .downloaded: return \ModelStore.downloadedModels - } - } - } -} - - -// MARK: - Conformances - -extension ModelSearchView.Tab: CustomStringConvertible, CaseIterable { - public var description: String { - switch self { - case .discover: "Discover" - case .downloaded: "Downloaded" - } - } -}