diff --git a/apps/expo-example/src/components/adapters/mlcModelSetupAdapter.ts b/apps/expo-example/src/components/adapters/mlcModelSetupAdapter.ts index 7352dddc..fb7631d7 100644 --- a/apps/expo-example/src/components/adapters/mlcModelSetupAdapter.ts +++ b/apps/expo-example/src/components/adapters/mlcModelSetupAdapter.ts @@ -1,6 +1,7 @@ import type { LanguageModelV3 } from '@ai-sdk/provider' import { mlc } from '@react-native-ai/mlc' import { File, Paths } from 'expo-file-system' +import { Platform } from 'react-native' import type { Availability, SetupAdapter } from '../../config/providers.common' @@ -19,7 +20,14 @@ export const createMLCLanguageSetupAdapter = ( }, builtIn: false, isAvailable(): Availability { - return new File(Paths.document, model.modelId, 'tensor-cache.json').exists + return new File( + Paths.document, + ...(Platform.select({ + ios: ['bundle'], + }) ?? []), + model.modelId, + 'tensor-cache.json' + ).exists ? 'yes' : 'availableForDownload' }, diff --git a/packages/mlc/ATTRIBUTIONS.md b/packages/mlc/ATTRIBUTIONS.md index 692f47b9..fe424393 100644 --- a/packages/mlc/ATTRIBUTIONS.md +++ b/packages/mlc/ATTRIBUTIONS.md @@ -1,8 +1,7 @@ -Third-Party Notices -=================== +# Third-Party Notices + +## MLC-LLM (mlc-ai/mlc-llm) -MLC-LLM (mlc-ai/mlc-llm) ------------------------ Portions of the iOS engine implementation are derived from the MLC-LLM project, and the prebuilt runtime binaries shipped in this package are based on MLC-LLM. @@ -14,7 +13,6 @@ License: Apache License, Version 2.0 License URL: https://www.apache.org/licenses/LICENSE-2.0 Derived source files in this package: + - packages/mlc/ios/engine/LLMEngine.h - packages/mlc/ios/engine/LLMEngine.mm -- packages/mlc/ios/engine/JSONFFIEngine.h -- packages/mlc/ios/engine/JSONFFIEngine.mm diff --git a/packages/mlc/android/src/main/java/com/callstack/ai/NativeMLCEngineModule.kt b/packages/mlc/android/src/main/java/com/callstack/ai/NativeMLCEngineModule.kt index a54ab507..6c9526af 100644 --- a/packages/mlc/android/src/main/java/com/callstack/ai/NativeMLCEngineModule.kt +++ b/packages/mlc/android/src/main/java/com/callstack/ai/NativeMLCEngineModule.kt @@ -307,7 +307,7 @@ class NativeMLCEngineModule(reactContext: ReactApplicationContext) : NativeMLCEn private fun getModelConfig(modelId: String): Pair? { val modelRecord = appConfig.model_list.find { it.model_id == modelId } ?: return null - val modelDir = File(reactApplicationContext.getExternalFilesDir(""), modelRecord.model_id) + val modelDir = File(reactApplicationContext.getFilesDir(), modelRecord.model_id) return Pair(modelRecord, modelDir) } } diff --git a/packages/mlc/ios/MLCEngine.mm b/packages/mlc/ios/MLCEngine.mm index ac4d6364..0b58c3bf 100644 --- a/packages/mlc/ios/MLCEngine.mm +++ b/packages/mlc/ios/MLCEngine.mm @@ -1,17 +1,22 @@ #import #import -#import #import +#import #import "LLMEngine.h" +typedef void (^MLCStreamCallback)(NSDictionary *response); + @interface MLCEngine : NativeMLCEngineSpecBase -@property(nonatomic, strong) LLMEngine* engine; -@property(nonatomic, strong) NSURL* bundleURL; -@property(nonatomic, strong) NSDictionary* cachedAppConfig; -@property(nonatomic, strong) NSArray* cachedModelList; +@property(nonatomic, strong) JSONFFIEngine *engine; +@property(nonatomic, strong) NSURL *bundleURL; +@property(nonatomic, strong) NSDictionary *cachedAppConfig; +@property(nonatomic, strong) NSArray *cachedModelList; +@property(nonatomic, strong) + NSMutableDictionary *pendingRequests; +@property(nonatomic) dispatch_queue_t streamCallbackQueue; @end @@ -26,68 +31,90 @@ + (NSString *)moduleName { - (instancetype)init { self = [super init]; if (self) { - _engine = [[LLMEngine alloc] init]; - + _engine = [[JSONFFIEngine alloc] init]; + _pendingRequests = [NSMutableDictionary new]; + _streamCallbackQueue = dispatch_queue_create( + "com.callstack.mlcegine.stream", DISPATCH_QUEUE_SERIAL); + // Get the Documents directory path for downloaded models - NSArray* paths = NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES); - NSString* documentsDirectory = [paths firstObject]; - _bundleURL = [NSURL fileURLWithPath:[documentsDirectory stringByAppendingPathComponent:@"bundle"]]; - + NSArray *paths = NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, + NSUserDomainMask, YES); + NSString *documentsDirectory = [paths firstObject]; + _bundleURL = + [NSURL fileURLWithPath:[documentsDirectory + stringByAppendingPathComponent:@"bundle"]]; + // Create bundle directory if it doesn't exist (for downloaded models) - NSError* dirError; - [[NSFileManager defaultManager] createDirectoryAtPath:[_bundleURL path] withIntermediateDirectories:YES attributes:nil error:&dirError]; + NSError *dirError; + [[NSFileManager defaultManager] createDirectoryAtPath:[_bundleURL path] + withIntermediateDirectories:YES + attributes:nil + error:&dirError]; if (dirError) { NSLog(@"Error creating bundle directory: %@", dirError); } + + [self.engine initBackgroundEngine:^(NSString *responseJSON) { + [self handleStreamCallback:responseJSON]; + }]; + dispatch_async(dispatch_get_global_queue(QOS_CLASS_USER_INITIATED, 0), ^{ + [self.engine runBackgroundLoop]; + }); + dispatch_async(dispatch_get_global_queue(QOS_CLASS_USER_INITIATED, 0), ^{ + [self.engine runBackgroundStreamBackLoop]; + }); } return self; } // Lazy getter for app config with caching - read directly from bundle -- (NSDictionary*)getAppConfig { +- (NSDictionary *)getAppConfig { if (_cachedAppConfig) { return _cachedAppConfig; } - + // Read config from main bundle resources - NSBundle* bundle = [NSBundle mainBundle]; - NSString* configPath = [bundle pathForResource:@"mlc-app-config" ofType:@"json"]; - + NSBundle *bundle = [NSBundle mainBundle]; + NSString *configPath = [bundle pathForResource:@"mlc-app-config" + ofType:@"json"]; + if (!configPath) { NSLog(@"Failed to find mlc-chat-config.json in bundle"); return nil; } - - NSData* jsonData = [NSData dataWithContentsOfFile:configPath]; + + NSData *jsonData = [NSData dataWithContentsOfFile:configPath]; if (!jsonData) { NSLog(@"Failed to read app config from: %@", configPath); return nil; } - - NSError* error; - NSDictionary* jsonDict = [NSJSONSerialization JSONObjectWithData:jsonData options:0 error:&error]; - + + NSError *error; + NSDictionary *jsonDict = [NSJSONSerialization JSONObjectWithData:jsonData + options:0 + error:&error]; + if (error) { NSLog(@"Error parsing app config JSON: %@", error); return nil; } - + if (![jsonDict isKindOfClass:[NSDictionary class]]) { NSLog(@"Invalid app config format"); return nil; } - + _cachedAppConfig = jsonDict; - + return _cachedAppConfig; } // Get cached model list -- (NSArray*)getModelList { +- (NSArray *)getModelList { if (_cachedModelList) { return _cachedModelList; } - NSDictionary* appConfig = [self getAppConfig]; + NSDictionary *appConfig = [self getAppConfig]; if (appConfig) { _cachedModelList = appConfig[@"model_list"]; } @@ -95,9 +122,9 @@ - (NSArray*)getModelList { } // Find model by ID with caching -- (NSDictionary*)findModelById:(NSString*)modelId { - NSArray* modelList = [self getModelList]; - for (NSDictionary* model in modelList) { +- (NSDictionary *)findModelById:(NSString *)modelId { + NSArray *modelList = [self getModelList]; + for (NSDictionary *model in modelList) { if ([model[@"model_id"] isEqualToString:modelId]) { return model; } @@ -105,15 +132,20 @@ - (NSDictionary*)findModelById:(NSString*)modelId { return nil; } - -- (std::shared_ptr)getTurboModule:(const react::ObjCTurboModule::InitParams &)params { +- (std::shared_ptr)getTurboModule: + (const react::ObjCTurboModule::InitParams &)params { return std::make_shared(params); } // Helper method to build complete request with messages and options -- (NSDictionary*)buildRequestWithMessages:(NSArray*)messages options:(const JS::NativeMLCEngine::GenerationOptions &)options { - NSMutableDictionary *request = [@{@"messages": messages, @"stream": @(YES)} mutableCopy]; - +- (NSDictionary *) + buildRequestWithMessages:(NSArray *)messages + options:(const JS::NativeMLCEngine::GenerationOptions &) + options { + NSMutableDictionary *request = + [@{@"messages" : messages, @"stream" : @(YES)} mutableCopy]; + request[@"stream_options"] = @{@"include_usage" : @(YES)}; + if (options.temperature().has_value()) { request[@"temperature"] = @(options.temperature().value()); } @@ -143,95 +175,168 @@ - (NSDictionary*)buildRequestWithMessages:(NSArray*)messages options:(const JS:: if (options.toolChoice()) { request[@"tool_choice"] = options.toolChoice(); } - + return request; } -- (void)generateText:(NSArray*)messages - options:(JS::NativeMLCEngine::GenerationOptions &)options - resolve:(RCTPromiseResolveBlock)resolve - reject:(RCTPromiseRejectBlock)reject { - NSDictionary *request = [self buildRequestWithMessages:messages options:options]; - - NSMutableString* accumulatedContent = [NSMutableString new]; - NSMutableArray* accumulatedToolCalls = [NSMutableArray new]; - - __block NSString* finalFinishReason = nil; - __block NSString* finalRole = nil; - - [self.engine chatCompletionWithMessages:messages - options:request - completion:^(NSDictionary* response) { - if (response[@"usage"]) { - resolve(@{ - @"role": finalRole, - @"content": accumulatedContent, - @"tool_calls": accumulatedToolCalls, - @"finish_reason": finalFinishReason, - @"usage": response[@"usage"], - }); +- (NSString *)jsonStringFromDictionary:(NSDictionary *)dictionary + error:(NSError **)error { + NSData *jsonData = [NSJSONSerialization dataWithJSONObject:dictionary + options:0 + error:error]; + if (!jsonData) { + return nil; + } + return [[NSString alloc] initWithData:jsonData encoding:NSUTF8StringEncoding]; +} + +- (void)handleStreamCallback:(NSString *)responseJSON { + dispatch_async(self.streamCallbackQueue, ^{ + NSError *error; + NSArray *responses = [NSJSONSerialization + JSONObjectWithData:[responseJSON dataUsingEncoding:NSUTF8StringEncoding] + options:0 + error:&error]; + if (error) { + NSLog(@"Error decoding JSON: %@", error); return; } - - NSDictionary* choice = response[@"choices"][0]; - if (choice) { - NSDictionary* delta = choice[@"delta"]; - if (delta[@"content"]) { - [accumulatedContent appendString:delta[@"content"]]; - } - if (delta[@"role"]) { - finalRole = delta[@"role"]; - } - if (delta[@"tool_calls"]) { - [accumulatedToolCalls addObjectsFromArray:delta[@"tool_calls"]]; - } - if (choice[@"finish_reason"]) { - finalFinishReason = choice[@"finish_reason"]; + + for (NSDictionary *res in responses) { + NSString *requestID = res[@"id"]; + void (^completion)(NSDictionary *) = self.pendingRequests[requestID]; + if (completion) { + completion(res); + if (res[@"usage"]) { + [self.pendingRequests removeObjectForKey:requestID]; + } } } - }]; + }); +} + +- (NSString *)startChatCompletionWithRequest:(NSDictionary *)request + completion:(MLCStreamCallback)completion + error:(NSError **)error { + NSString *requestJSON = [self jsonStringFromDictionary:request error:error]; + if (!requestJSON) { + return nil; + } + + NSString *requestId = [NSUUID UUID].UUIDString; + if (completion) { + self.pendingRequests[requestId] = [completion copy]; + } + [self.engine chatCompletion:requestJSON requestID:requestId]; + return requestId; +} + +- (void)generateText:(NSArray *)messages + options:(JS::NativeMLCEngine::GenerationOptions &)options + resolve:(RCTPromiseResolveBlock)resolve + reject:(RCTPromiseRejectBlock)reject { + NSDictionary *request = [self buildRequestWithMessages:messages + options:options]; + + NSMutableString *accumulatedContent = [NSMutableString new]; + NSMutableArray *accumulatedToolCalls = [NSMutableArray new]; + + __block NSString *finalFinishReason = nil; + __block NSString *finalRole = nil; + + NSError *requestError; + NSString *requestId = [self + startChatCompletionWithRequest:request + completion:^(NSDictionary *response) { + if (response[@"usage"]) { + resolve(@{ + @"role" : finalRole, + @"content" : accumulatedContent, + @"tool_calls" : accumulatedToolCalls, + @"finish_reason" : finalFinishReason, + @"usage" : response[@"usage"], + }); + return; + } + + NSDictionary *choice = response[@"choices"][0]; + if (choice) { + NSDictionary *delta = choice[@"delta"]; + if (delta[@"content"]) { + [accumulatedContent + appendString:delta[@"content"]]; + } + if (delta[@"role"]) { + finalRole = delta[@"role"]; + } + if (delta[@"tool_calls"]) { + [accumulatedToolCalls + addObjectsFromArray:delta[@"tool_calls"]]; + } + if (choice[@"finish_reason"]) { + finalFinishReason = choice[@"finish_reason"]; + } + } + } + error:&requestError]; + + if (!requestId) { + reject(@"MLCEngine", + requestError.localizedDescription ?: @"Failed to start generation", + nil); + } } -- (void)streamText:(NSArray*)messages +- (void)streamText:(NSArray *)messages options:(JS::NativeMLCEngine::GenerationOptions &)options resolve:(RCTPromiseResolveBlock)resolve reject:(RCTPromiseRejectBlock)reject { - - NSDictionary *request = [self buildRequestWithMessages:messages options:options]; - - __block NSString* finalFinishReason = nil; - + + NSDictionary *request = [self buildRequestWithMessages:messages + options:options]; + + __block NSString *finalFinishReason = nil; + @try { - NSString *requestId = [self.engine chatCompletionWithMessages:messages - options:request - completion:^(NSDictionary* response) { - if (response[@"usage"]) { - [self emitOnChatComplete:@{ - @"usage": response[@"usage"], - @"finish_reason": finalFinishReason - }]; - return; - } - - NSDictionary* choice = response[@"choices"][0]; - if (choice[@"finish_reason"]) { - finalFinishReason = choice[@"finish_reason"]; - } - - [self emitOnChatUpdate:choice]; - }]; - + NSError *requestError; + NSString *requestId = [self + startChatCompletionWithRequest:request + completion:^(NSDictionary *response) { + if (response[@"usage"]) { + [self emitOnChatComplete:@{ + @"usage" : response[@"usage"], + @"finish_reason" : finalFinishReason + }]; + return; + } + + NSDictionary *choice = response[@"choices"][0]; + if (choice[@"finish_reason"]) { + finalFinishReason = choice[@"finish_reason"]; + } + + [self emitOnChatUpdate:choice]; + } + error:&requestError]; + + if (!requestId) { + @throw [NSException exceptionWithName:@"MLCEngine" + reason:requestError.localizedDescription + ?: @"Failed to start generation" + userInfo:nil]; + } + resolve(requestId); - } @catch (NSException* exception) { + } @catch (NSException *exception) { reject(@"MLCEngine", exception.reason, nil); return; } } -- (void)getModel:(NSString*)name +- (void)getModel:(NSString *)name resolve:(RCTPromiseResolveBlock)resolve reject:(RCTPromiseRejectBlock)reject { - NSDictionary* modelConfig = [self findModelById:name]; + NSDictionary *modelConfig = [self findModelById:name]; if (!modelConfig) { reject(@"MLCEngine", @"Didn't find the model", nil); return; @@ -244,272 +349,349 @@ - (void)getModels:(RCTPromiseResolveBlock)resolve resolve([self getModelList]); } -- (void)prepareModel:(NSString*)modelId +- (void)prepareModel:(NSString *)modelId resolve:(RCTPromiseResolveBlock)resolve reject:(RCTPromiseRejectBlock)reject { @try { - NSDictionary* modelRecord = [self findModelById:modelId]; + NSDictionary *modelRecord = [self findModelById:modelId]; if (!modelRecord) { reject(@"MLCEngine", @"There's no record for requested model", nil); return; } - - NSString* modelLib = modelRecord[@"model_lib"]; + + NSString *modelLib = modelRecord[@"model_lib"]; if (!modelLib) { - reject(@"MLCEngine", @"Invalid model config - missing required fields", nil); + reject(@"MLCEngine", @"Invalid model config - missing required fields", + nil); return; } - - NSURL* modelLocalURL = [self.bundleURL URLByAppendingPathComponent:modelId]; + + NSURL *modelLocalURL = [self.bundleURL URLByAppendingPathComponent:modelId]; if (!modelLocalURL) { reject(@"MLCEngine", @"Failed to construct model path", nil); return; } - - NSString* modelLocalPath = [modelLocalURL path]; - + + NSString *modelLocalPath = [modelLocalURL path]; + BOOL isDirectory; - if (![[NSFileManager defaultManager] fileExistsAtPath:modelLocalPath isDirectory:&isDirectory] || !isDirectory) { - reject(@"MLCEngine", [NSString stringWithFormat:@"Model directory not found at path: %@", modelLocalPath], nil); + if (![[NSFileManager defaultManager] fileExistsAtPath:modelLocalPath + isDirectory:&isDirectory] || + !isDirectory) { + reject( + @"MLCEngine", + [NSString stringWithFormat:@"Model directory not found at path: %@", + modelLocalPath], + nil); return; } - - [self.engine reloadWithModelPath:modelLocalPath modelLib:modelLib]; - + + NSMutableDictionary *engineConfig = [NSMutableDictionary new]; + engineConfig[@"model"] = modelLocalPath; + engineConfig[@"model_lib"] = + [NSString stringWithFormat:@"system://%@", modelLib]; + engineConfig[@"mode"] = + @"interactive"; // at most 1 concurrent inference request + + NSError *configError; + NSString *engineConfigJSON = [self jsonStringFromDictionary:engineConfig + error:&configError]; + if (!engineConfigJSON) { + reject(@"MLCEngine", + configError.localizedDescription + ?: @"Failed to build engine config", + nil); + return; + } + + [self.engine reload:engineConfigJSON]; + resolve([NSString stringWithFormat:@"Model prepared: %@", modelId]); - } @catch (NSException* exception) { + } @catch (NSException *exception) { reject(@"MLCEngine", exception.reason, nil); } } -- (NSDictionary*)readModelConfig:(NSString*)modelId error:(NSError**)error { - NSURL* modelDirURL = [self.bundleURL URLByAppendingPathComponent:modelId]; - NSURL* modelConfigURL = [modelDirURL URLByAppendingPathComponent:@"mlc-chat-config.json"]; - - NSData* jsonData = [NSData dataWithContentsOfURL:modelConfigURL]; - if (!jsonData) { - if (error) { - *error = [NSError errorWithDomain:@"MLCEngine" code:1 userInfo:@{NSLocalizedDescriptionKey : @"Model config not found - may need to download first"}]; - } - return nil; - } - - return [NSJSONSerialization JSONObjectWithData:jsonData options:0 error:error]; -} +- (BOOL)downloadFile:(NSString *)modelUrl + filename:(NSString *)filename + toURL:(NSURL *)destURL + error:(NSError **)error { + NSString *urlString = + [NSString stringWithFormat:@"%@/resolve/main/%@", modelUrl, filename]; + NSURL *url = [NSURL URLWithString:urlString]; -- (BOOL)downloadFile:(NSString*)modelUrl filename:(NSString*)filename toURL:(NSURL*)destURL error:(NSError**)error { - NSString* urlString = [NSString stringWithFormat:@"%@/resolve/main/%@", modelUrl, filename]; - NSURL* url = [NSURL URLWithString:urlString]; - - NSData* fileData = [NSData dataWithContentsOfURL:url]; + NSData *fileData = [NSData dataWithContentsOfURL:url]; if (!fileData) { if (error) { - *error = [NSError errorWithDomain:@"MLCEngine" - code:2 - userInfo:@{NSLocalizedDescriptionKey : [NSString stringWithFormat:@"Failed to download %@", filename]}]; + *error = [NSError + errorWithDomain:@"MLCEngine" + code:2 + userInfo:@{ + NSLocalizedDescriptionKey : [NSString + stringWithFormat:@"Failed to download %@", filename] + }]; } return NO; } - + if (![fileData writeToURL:destURL atomically:YES]) { if (error) { - *error = [NSError errorWithDomain:@"MLCEngine" - code:6 - userInfo:@{NSLocalizedDescriptionKey : [NSString stringWithFormat:@"Failed to write %@", filename]}]; + *error = [NSError + errorWithDomain:@"MLCEngine" + code:6 + userInfo:@{ + NSLocalizedDescriptionKey : [NSString + stringWithFormat:@"Failed to write %@", filename] + }]; } return NO; } - + return YES; } // Download all model files with percentage updates -- (void)downloadModelFiles:(NSDictionary*)modelRecord +- (void)downloadModelFiles:(NSDictionary *)modelRecord progress:(void (^)(double percentage))progressCallback - error:(NSError**)error { - NSString* modelId = modelRecord[@"model_id"]; - NSString* modelUrl = modelRecord[@"model_url"]; - + error:(NSError **)error { + NSString *modelId = modelRecord[@"model_id"]; + NSString *modelUrl = modelRecord[@"model_url"]; + if (!modelId || !modelUrl) { if (error) { - *error = [NSError errorWithDomain:@"MLCEngine" code:3 userInfo:@{NSLocalizedDescriptionKey : @"Missing required model record fields"}]; + *error = [NSError errorWithDomain:@"MLCEngine" + code:3 + userInfo:@{ + NSLocalizedDescriptionKey : + @"Missing required model record fields" + }]; } return; } - + // Check if config already exists - NSURL* modelDirURL = [self.bundleURL URLByAppendingPathComponent:modelId]; - NSURL* modelConfigURL = [modelDirURL URLByAppendingPathComponent:@"mlc-chat-config.json"]; - NSURL* tensorCacheURL = [modelDirURL URLByAppendingPathComponent:@"tensor-cache.json"]; - + NSURL *modelDirURL = [self.bundleURL URLByAppendingPathComponent:modelId]; + NSURL *modelConfigURL = + [modelDirURL URLByAppendingPathComponent:@"mlc-chat-config.json"]; + NSURL *tensorCacheURL = + [modelDirURL URLByAppendingPathComponent:@"tensor-cache.json"]; + if (!modelDirURL || !modelConfigURL) { if (error) { - *error = [NSError errorWithDomain:@"MLCEngine" code:4 userInfo:@{NSLocalizedDescriptionKey : @"Failed to construct config URLs"}]; + *error = [NSError errorWithDomain:@"MLCEngine" + code:4 + userInfo:@{ + NSLocalizedDescriptionKey : + @"Failed to construct config URLs" + }]; } return; } - + // Create model directory if it doesn't exist - NSError* dirError; - [[NSFileManager defaultManager] createDirectoryAtPath:[modelDirURL path] withIntermediateDirectories:YES attributes:nil error:&dirError]; + NSError *dirError; + [[NSFileManager defaultManager] createDirectoryAtPath:[modelDirURL path] + withIntermediateDirectories:YES + attributes:nil + error:&dirError]; if (dirError) { *error = dirError; return; } - + // Download and save tensor-cache if it doesn't exist - if (![[NSFileManager defaultManager] fileExistsAtPath:[tensorCacheURL path]]) { - if (![self downloadFile:modelUrl filename:@"tensor-cache.json" toURL:tensorCacheURL error:error]) { + if (![[NSFileManager defaultManager] + fileExistsAtPath:[tensorCacheURL path]]) { + if (![self downloadFile:modelUrl + filename:@"tensor-cache.json" + toURL:tensorCacheURL + error:error]) { return; } } - + // Read and parse tensor cache - NSData* tensorCacheData = [NSData dataWithContentsOfURL:tensorCacheURL]; + NSData *tensorCacheData = [NSData dataWithContentsOfURL:tensorCacheURL]; if (!tensorCacheData) { if (error) { - *error = [NSError errorWithDomain:@"MLCEngine" code:2 userInfo:@{NSLocalizedDescriptionKey : @"Failed to read tensor cache"}]; + *error = [NSError + errorWithDomain:@"MLCEngine" + code:2 + userInfo:@{ + NSLocalizedDescriptionKey : @"Failed to read tensor cache" + }]; } return; } - - NSError* tensorCacheJsonError; - NSDictionary* tensorCache = [NSJSONSerialization JSONObjectWithData:tensorCacheData options:0 error:&tensorCacheJsonError]; + + NSError *tensorCacheJsonError; + NSDictionary *tensorCache = + [NSJSONSerialization JSONObjectWithData:tensorCacheData + options:0 + error:&tensorCacheJsonError]; if (tensorCacheJsonError) { *error = tensorCacheJsonError; return; } // Download and save model config if it doesn't exist - if (![[NSFileManager defaultManager] fileExistsAtPath:[modelConfigURL path]]) { - if (![self downloadFile:modelUrl filename:@"mlc-chat-config.json" toURL:modelConfigURL error:error]) { + if (![[NSFileManager defaultManager] + fileExistsAtPath:[modelConfigURL path]]) { + if (![self downloadFile:modelUrl + filename:@"mlc-chat-config.json" + toURL:modelConfigURL + error:error]) { return; } } // Read and parse model config - NSData* modelConfigData = [NSData dataWithContentsOfURL:modelConfigURL]; + NSData *modelConfigData = [NSData dataWithContentsOfURL:modelConfigURL]; if (!modelConfigData) { if (error) { - *error = [NSError errorWithDomain:@"MLCEngine" code:2 userInfo:@{NSLocalizedDescriptionKey : @"Failed to read model config"}]; + *error = [NSError + errorWithDomain:@"MLCEngine" + code:2 + userInfo:@{ + NSLocalizedDescriptionKey : @"Failed to read model config" + }]; } return; } - - NSError* modelConfigJsonError; - NSDictionary* modelConfig = [NSJSONSerialization JSONObjectWithData:modelConfigData options:0 error:&modelConfigJsonError]; + + NSError *modelConfigJsonError; + NSDictionary *modelConfig = + [NSJSONSerialization JSONObjectWithData:modelConfigData + options:0 + error:&modelConfigJsonError]; if (modelConfigJsonError) { *error = modelConfigJsonError; return; } - + // Create unified list of files to download - NSMutableArray* filesToDownload = [NSMutableArray new]; - + NSMutableArray *filesToDownload = [NSMutableArray new]; + // Add parameter files from tensor cache - NSArray* records = tensorCache[@"records"]; + NSArray *records = tensorCache[@"records"]; if ([records isKindOfClass:[NSArray class]]) { - for (NSDictionary* record in records) { - NSString* dataPath = record[@"dataPath"]; + for (NSDictionary *record in records) { + NSString *dataPath = record[@"dataPath"]; if (dataPath) { - NSURL* fileURL = [modelDirURL URLByAppendingPathComponent:dataPath]; + NSURL *fileURL = [modelDirURL URLByAppendingPathComponent:dataPath]; if (![[NSFileManager defaultManager] fileExistsAtPath:[fileURL path]]) { [filesToDownload addObject:dataPath]; } } } } - + // Add tokenizer files - NSArray* tokenizerFiles = modelConfig[@"tokenizer_files"]; + NSArray *tokenizerFiles = modelConfig[@"tokenizer_files"]; if ([tokenizerFiles isKindOfClass:[NSArray class]]) { - for (NSString* filename in tokenizerFiles) { - NSURL* fileURL = [modelDirURL URLByAppendingPathComponent:filename]; + for (NSString *filename in tokenizerFiles) { + NSURL *fileURL = [modelDirURL URLByAppendingPathComponent:filename]; if (![[NSFileManager defaultManager] fileExistsAtPath:[fileURL path]]) { [filesToDownload addObject:filename]; } } } - + // Download all files with progress tracking NSInteger totalFiles = filesToDownload.count; for (NSInteger i = 0; i < totalFiles; i++) { - NSString* filename = filesToDownload[i]; - NSURL* fileURL = [modelDirURL URLByAppendingPathComponent:filename]; - + NSString *filename = filesToDownload[i]; + NSURL *fileURL = [modelDirURL URLByAppendingPathComponent:filename]; + // Download the file first - if (![self downloadFile:modelUrl filename:filename toURL:fileURL error:error]) { + if (![self downloadFile:modelUrl + filename:filename + toURL:fileURL + error:error]) { return; } - + // Calculate and emit progress after successful download - double percentage = totalFiles > 0 ? (double)(i + 1) / totalFiles * 100.0 : 100.0; + double percentage = + totalFiles > 0 ? (double)(i + 1) / totalFiles * 100.0 : 100.0; if (progressCallback) { progressCallback(round(percentage)); } } } -- (void)downloadModel:(NSString*)modelId +- (void)downloadModel:(NSString *)modelId resolve:(RCTPromiseResolveBlock)resolve reject:(RCTPromiseRejectBlock)reject { - dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ - @try { - NSDictionary* modelRecord = [self findModelById:modelId]; - - if (!modelRecord) { - reject(@"MLCEngine", @"There's no record for requested model", nil); - return; - } - - NSError* downloadError = nil; - [self downloadModelFiles:modelRecord - progress:^(double percentage) { - [self emitOnDownloadProgress:@{@"percentage" : @(percentage)}]; - } - error:&downloadError]; - - if (downloadError) { - reject(@"MLCEngine", @"Failed to download model", downloadError); - return; - } - - resolve(nil); - } @catch (NSException* exception) { - reject(@"MLCEngine", exception.reason, nil); - } - }); + dispatch_async( + dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ + @try { + NSDictionary *modelRecord = [self findModelById:modelId]; + + if (!modelRecord) { + reject(@"MLCEngine", @"There's no record for requested model", nil); + return; + } + + NSError *downloadError = nil; + [self downloadModelFiles:modelRecord + progress:^(double percentage) { + [self emitOnDownloadProgress:@{ + @"percentage" : @(percentage) + }]; + } + error:&downloadError]; + + if (downloadError) { + reject(@"MLCEngine", @"Failed to download model", downloadError); + return; + } + + resolve(nil); + } @catch (NSException *exception) { + reject(@"MLCEngine", exception.reason, nil); + } + }); } -- (void)removeModel:(NSString*)modelId +- (void)removeModel:(NSString *)modelId resolve:(RCTPromiseResolveBlock)resolve reject:(RCTPromiseRejectBlock)reject { - dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ - @try { - NSURL* modelDirURL = [self.bundleURL URLByAppendingPathComponent:modelId]; - NSString* modelDirPath = [modelDirURL path]; - - BOOL isDirectory; - if ([[NSFileManager defaultManager] fileExistsAtPath:modelDirPath isDirectory:&isDirectory]) { - if (isDirectory) { - NSError* removeError; - BOOL removed = [[NSFileManager defaultManager] removeItemAtPath:modelDirPath error:&removeError]; - - if (removed) { - resolve(nil); + dispatch_async( + dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ + @try { + NSURL *modelDirURL = + [self.bundleURL URLByAppendingPathComponent:modelId]; + NSString *modelDirPath = [modelDirURL path]; + + BOOL isDirectory; + if ([[NSFileManager defaultManager] fileExistsAtPath:modelDirPath + isDirectory:&isDirectory]) { + if (isDirectory) { + NSError *removeError; + BOOL removed = [[NSFileManager defaultManager] + removeItemAtPath:modelDirPath + error:&removeError]; + + if (removed) { + resolve(nil); + } else { + reject(@"MLCEngine", + [NSString + stringWithFormat:@"Failed to clean model: %@", + removeError.localizedDescription], + removeError); + } + } else { + reject(@"MLCEngine", @"Path exists but is not a directory", nil); + } } else { - reject(@"MLCEngine", [NSString stringWithFormat:@"Failed to clean model: %@", removeError.localizedDescription], removeError); + resolve(nil); } - } else { - reject(@"MLCEngine", @"Path exists but is not a directory", nil); + } @catch (NSException *exception) { + reject(@"MLCEngine", exception.reason, nil); } - } else { - resolve(nil); - } - } @catch (NSException* exception) { - reject(@"MLCEngine", exception.reason, nil); - } - }); + }); } - (void)unloadModel:(RCTPromiseResolveBlock)resolve @@ -518,10 +700,16 @@ - (void)unloadModel:(RCTPromiseResolveBlock)resolve resolve(nil); } -- (void)cancelStream:(nonnull NSString *)streamId resolve:(nonnull RCTPromiseResolveBlock)resolve reject:(nonnull RCTPromiseRejectBlock)reject { - [self.engine cancelRequest:streamId]; +- (void)cancelStream:(nonnull NSString *)streamId + resolve:(nonnull RCTPromiseResolveBlock)resolve + reject:(nonnull RCTPromiseRejectBlock)reject { + [self.pendingRequests removeObjectForKey:streamId]; + [self.engine abort:streamId]; resolve(nil); } +- (void)dealloc { + [self.engine exitBackgroundLoop]; +} @end diff --git a/packages/mlc/ios/engine/BackgroundWorker.h b/packages/mlc/ios/engine/BackgroundWorker.h deleted file mode 100644 index a3899242..00000000 --- a/packages/mlc/ios/engine/BackgroundWorker.h +++ /dev/null @@ -1,14 +0,0 @@ -// -// BackgroundWorker.h -// Pods -// - -#import - -NS_ASSUME_NONNULL_BEGIN - -@interface BackgroundWorker : NSThread -- (instancetype)initWithTask:(void (^)(void))task; -@end - -NS_ASSUME_NONNULL_END diff --git a/packages/mlc/ios/engine/BackgroundWorker.mm b/packages/mlc/ios/engine/BackgroundWorker.mm deleted file mode 100644 index e07bfb05..00000000 --- a/packages/mlc/ios/engine/BackgroundWorker.mm +++ /dev/null @@ -1,32 +0,0 @@ -// -// BackgroundWorker.mm -// Pods -// - -#import "BackgroundWorker.h" - -/** - * BackgroundWorker manages background thread execution for the MLC engine. - * This class provides a simple interface to run long-running tasks on separate threads, - * ensuring the main thread remains responsive while the LLM engine processes requests. - * It's used to run the engine's background loop and stream processing loop concurrently. - */ -@implementation BackgroundWorker { - void (^_task)(void); -} - -- (instancetype)initWithTask:(void (^)(void))task { - self = [super init]; - if (self) { - _task = [task copy]; - } - return self; -} - -- (void)main { - if (_task) { - _task(); - } -} - -@end diff --git a/packages/mlc/ios/engine/EngineState.h b/packages/mlc/ios/engine/EngineState.h deleted file mode 100644 index e9f8dfd9..00000000 --- a/packages/mlc/ios/engine/EngineState.h +++ /dev/null @@ -1,24 +0,0 @@ -// -// MLCEngine.h -// Pods -// -// Created by Szymon Rybczak on 19/07/2024. -// - -#import "JSONFFIEngine.h" -#import - -NS_ASSUME_NONNULL_BEGIN - -@interface EngineState : NSObject -@property(nonatomic, strong) NSMutableDictionary *requestStateMap; - -- (NSString*)chatCompletionWithJSONFFIEngine:(JSONFFIEngine *)jsonFFIEngine - request:(NSDictionary *)request - completion:(void (^)(NSDictionary* response))completion; -- (void)streamCallbackWithResult:(NSString *)result; -- (void)cancelRequest:(NSString *)requestId - withJSONFFIEngine:(JSONFFIEngine *)jsonFFIEngine; -@end - -NS_ASSUME_NONNULL_END diff --git a/packages/mlc/ios/engine/EngineState.mm b/packages/mlc/ios/engine/EngineState.mm deleted file mode 100644 index 52ea4066..00000000 --- a/packages/mlc/ios/engine/EngineState.mm +++ /dev/null @@ -1,73 +0,0 @@ -// -// EngineState.mm -// Pods -// - -#import "EngineState.h" -#import "JSONFFIEngine.h" - -/** - * EngineState manages the request lifecycle and callback routing for chat completions. - * It maintains a mapping between request IDs and their corresponding completion handlers, - * ensuring that streaming responses are properly routed back to the correct caller. - * This class handles JSON serialization/deserialization and coordinates between - * the high-level API and the low-level JSON FFI engine. - */ -@implementation EngineState - -- (instancetype)init { - self = [super init]; - if (self) { - _requestStateMap = [NSMutableDictionary new]; - } - return self; -} - -- (NSString*)chatCompletionWithJSONFFIEngine:(JSONFFIEngine*)jsonFFIEngine - request:(NSDictionary*)request - completion:(void (^)(NSDictionary* response))completion { - NSError* error; - NSData* jsonData = [NSJSONSerialization dataWithJSONObject:request options:0 error:&error]; - if (error) { - @throw [NSException exceptionWithName:@"JSONSerializationException" - reason:[NSString stringWithFormat:@"Failed to serialize request: %@", - error.localizedDescription] - userInfo:nil]; - } - - NSString* jsonRequest = [[NSString alloc] initWithData:jsonData encoding:NSUTF8StringEncoding]; - NSString* requestID = [[NSUUID UUID] UUIDString]; - - self.requestStateMap[requestID] = completion; - - [jsonFFIEngine chatCompletion:jsonRequest requestID:requestID]; - - return requestID; -} - -- (void)streamCallbackWithResult:(NSString*)result { - NSError* error; - NSArray* responses = [NSJSONSerialization JSONObjectWithData:[result dataUsingEncoding:NSUTF8StringEncoding] options:0 error:&error]; - if (error) { - NSLog(@"Error decoding JSON: %@", error); - return; - } - - for (NSDictionary* res in responses) { - NSString* requestID = res[@"id"]; - void (^completion)(NSDictionary*) = self.requestStateMap[requestID]; - if (completion) { - completion(res); - if (res[@"usage"]) { - [self.requestStateMap removeObjectForKey:requestID]; - } - } - } -} - -- (void)cancelRequest:(NSString *)requestId withJSONFFIEngine:(JSONFFIEngine *)jsonFFIEngine { - [self.requestStateMap removeObjectForKey:requestId]; - [jsonFFIEngine abort:requestId]; -} - -@end diff --git a/packages/mlc/ios/engine/JSONFFIEngine.h b/packages/mlc/ios/engine/JSONFFIEngine.h deleted file mode 100644 index 1e4fb441..00000000 --- a/packages/mlc/ios/engine/JSONFFIEngine.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) MLC-AI - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * This file is derived from the MLC-LLM project: - * https://github.com/mlc-ai/mlc-llm - */ - -#import -#import - -/** - * This is an internal Raw JSON FFI Engine that redirects request to internal JSON FFI Engine in C++ - */ -@interface JSONFFIEngine : NSObject - -- (void)initBackgroundEngine:(void (^)(NSString *))streamCallback; - -- (void)reload:(NSString *)engineConfig; - -- (void)unload; - -- (void)reset; - -- (void)chatCompletion:(NSString *)requestJSON requestID:(NSString *)requestID; - -- (void)abort:(NSString *)requestID; - -- (void)runBackgroundLoop; - -- (void)runBackgroundStreamBackLoop; - -- (void)exitBackgroundLoop; - -@end diff --git a/packages/mlc/ios/engine/JSONFFIEngine.mm b/packages/mlc/ios/engine/JSONFFIEngine.mm deleted file mode 100644 index cec8778d..00000000 --- a/packages/mlc/ios/engine/JSONFFIEngine.mm +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) MLC-AI - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * This file is derived from the MLC-LLM project: - * https://github.com/mlc-ai/mlc-llm - */ -#import -#import -#include - -#include "JSONFFIEngine.h" - -#define TVM_USE_LIBBACKTRACE 0 -#define DMLC_USE_LOGGING_LIBRARY - -#include -#include - -using namespace tvm::runtime; - -@implementation JSONFFIEngine { - // Internal c++ classes - // internal module backed by JSON FFI - Module json_ffi_engine_; - // member functions - PackedFunc init_background_engine_func_; - PackedFunc unload_func_; - PackedFunc reload_func_; - PackedFunc reset_func_; - PackedFunc chat_completion_func_; - PackedFunc abort_func_; - PackedFunc run_background_loop_func_; - PackedFunc run_background_stream_back_loop_func_; - PackedFunc exit_background_loop_func_; -} - -- (instancetype)init { - if (self = [super init]) { - // load chat module - const PackedFunc* f_json_ffi_create = Registry::Get("mlc.json_ffi.CreateJSONFFIEngine"); - ICHECK(f_json_ffi_create) << "Cannot find mlc.json_ffi.CreateJSONFFIEngine"; - json_ffi_engine_ = (*f_json_ffi_create)(); - init_background_engine_func_ = json_ffi_engine_->GetFunction("init_background_engine"); - reload_func_ = json_ffi_engine_->GetFunction("reload"); - unload_func_ = json_ffi_engine_->GetFunction("unload"); - reset_func_ = json_ffi_engine_->GetFunction("reset"); - chat_completion_func_ = json_ffi_engine_->GetFunction("chat_completion"); - abort_func_ = json_ffi_engine_->GetFunction("abort"); - run_background_loop_func_ = json_ffi_engine_->GetFunction("run_background_loop"); - run_background_stream_back_loop_func_ = - json_ffi_engine_->GetFunction("run_background_stream_back_loop"); - exit_background_loop_func_ = json_ffi_engine_->GetFunction("exit_background_loop"); - - ICHECK(init_background_engine_func_ != nullptr); - ICHECK(reload_func_ != nullptr); - ICHECK(unload_func_ != nullptr); - ICHECK(reset_func_ != nullptr); - ICHECK(chat_completion_func_ != nullptr); - ICHECK(abort_func_ != nullptr); - ICHECK(run_background_loop_func_ != nullptr); - ICHECK(run_background_stream_back_loop_func_ != nullptr); - ICHECK(exit_background_loop_func_ != nullptr); - } - return self; -} - -- (void)initBackgroundEngine:(void (^)(NSString*))streamCallback { - TypedPackedFunc internal_stream_callback([streamCallback](String value) { - streamCallback([NSString stringWithUTF8String:value.c_str()]); - }); - int device_type = kDLMetal; - int device_id = 0; - init_background_engine_func_(device_type, device_id, internal_stream_callback); -} - -- (void)reload:(NSString*)engineConfigJson { - std::string engine_config = engineConfigJson.UTF8String; - reload_func_(engine_config); -} - -- (void)unload { - unload_func_(); -} - -- (void)reset { - reset_func_(); -} - -- (void)chatCompletion:(NSString*)requestJSON requestID:(NSString*)requestID { - std::string request_json = requestJSON.UTF8String; - std::string request_id = requestID.UTF8String; - chat_completion_func_(request_json, request_id); -} - -- (void)abort:(NSString*)requestID { - std::string request_id = requestID.UTF8String; - abort_func_(request_id); -} - -- (void)runBackgroundLoop { - run_background_loop_func_(); -} - -- (void)runBackgroundStreamBackLoop { - run_background_stream_back_loop_func_(); -} - -- (void)exitBackgroundLoop { - exit_background_loop_func_(); -} - -@end diff --git a/packages/mlc/ios/engine/LLMEngine.h b/packages/mlc/ios/engine/LLMEngine.h index 6a3fb5fb..1e4fb441 100644 --- a/packages/mlc/ios/engine/LLMEngine.h +++ b/packages/mlc/ios/engine/LLMEngine.h @@ -17,20 +17,29 @@ */ #import +#import -NS_ASSUME_NONNULL_BEGIN +/** + * This is an internal Raw JSON FFI Engine that redirects request to internal JSON FFI Engine in C++ + */ +@interface JSONFFIEngine : NSObject -@interface LLMEngine : NSObject +- (void)initBackgroundEngine:(void (^)(NSString *))streamCallback; -- (instancetype)init; +- (void)reload:(NSString *)engineConfig; -- (void)reloadWithModelPath:(NSString *)modelPath modelLib:(NSString *)modelLib; -- (void)reset; - (void)unload; -- (NSString*)chatCompletionWithMessages:(NSArray *)messages options:(NSDictionary *)options completion:(void (^)(NSDictionary* response))completion; -- (void)cancelRequest:(NSString *)requestId; +- (void)reset; -@end +- (void)chatCompletion:(NSString *)requestJSON requestID:(NSString *)requestID; + +- (void)abort:(NSString *)requestID; -NS_ASSUME_NONNULL_END +- (void)runBackgroundLoop; + +- (void)runBackgroundStreamBackLoop; + +- (void)exitBackgroundLoop; + +@end diff --git a/packages/mlc/ios/engine/LLMEngine.mm b/packages/mlc/ios/engine/LLMEngine.mm index bda8f0ff..2f31be86 100644 --- a/packages/mlc/ios/engine/LLMEngine.mm +++ b/packages/mlc/ios/engine/LLMEngine.mm @@ -16,73 +16,121 @@ * https://github.com/mlc-ai/mlc-llm */ -#import "LLMEngine.h" -#import "BackgroundWorker.h" -#import "EngineState.h" +#import +#import +#include -@interface LLMEngine () +#include "LLMEngine.h" -@property(nonatomic, strong) EngineState* state; -@property(nonatomic, strong) JSONFFIEngine* jsonFFIEngine; -@property(nonatomic, strong) NSMutableArray* threads; +#define TVM_USE_LIBBACKTRACE 0 +#define DMLC_USE_LOGGING_LIBRARY -@end +#include +#include +#include +#include +#include + +using namespace tvm::runtime; +using tvm::ffi::Function; +using tvm::ffi::Module; +using tvm::ffi::Optional; +using tvm::ffi::String; +using tvm::ffi::TypedFunction; -@implementation LLMEngine +@implementation JSONFFIEngine { + // Internal c++ classes + // internal module backed by JSON FFI + Optional json_ffi_engine_; + // member functions + Function init_background_engine_func_; + Function unload_func_; + Function reload_func_; + Function reset_func_; + Function chat_completion_func_; + Function abort_func_; + Function run_background_loop_func_; + Function run_background_stream_back_loop_func_; + Function exit_background_loop_func_; +} - (instancetype)init { - self = [super init]; - if (self) { - _state = [[EngineState alloc] init]; - _jsonFFIEngine = [[JSONFFIEngine alloc] init]; - _threads = [NSMutableArray array]; - - [_jsonFFIEngine initBackgroundEngine:^(NSString* _Nullable result) { - [self.state streamCallbackWithResult:result]; - }]; - - BackgroundWorker* backgroundWorker = [[BackgroundWorker alloc] initWithTask:^{ - [NSThread setThreadPriority:1.0]; - [self.jsonFFIEngine runBackgroundLoop]; - }]; - - BackgroundWorker* backgroundStreamBackWorker = [[BackgroundWorker alloc] initWithTask:^{ - [self.jsonFFIEngine runBackgroundStreamBackLoop]; - }]; - - backgroundWorker.qualityOfService = NSQualityOfServiceUserInteractive; - [_threads addObject:backgroundWorker]; - [_threads addObject:backgroundStreamBackWorker]; - [backgroundWorker start]; - [backgroundStreamBackWorker start]; + if (self = [super init]) { + // load chat module + Function f_json_ffi_create = Function::GetGlobalRequired("mlc.json_ffi.CreateJSONFFIEngine"); + json_ffi_engine_ = f_json_ffi_create().cast(); + init_background_engine_func_ = + json_ffi_engine_.value()->GetFunction("init_background_engine").value_or(Function(nullptr)); + reload_func_ = json_ffi_engine_.value()->GetFunction("reload").value_or(Function(nullptr)); + unload_func_ = json_ffi_engine_.value()->GetFunction("unload").value_or(Function(nullptr)); + reset_func_ = json_ffi_engine_.value()->GetFunction("reset").value_or(Function(nullptr)); + chat_completion_func_ = + json_ffi_engine_.value()->GetFunction("chat_completion").value_or(Function(nullptr)); + abort_func_ = json_ffi_engine_.value()->GetFunction("abort").value_or(Function(nullptr)); + run_background_loop_func_ = + json_ffi_engine_.value()->GetFunction("run_background_loop").value_or(Function(nullptr)); + run_background_stream_back_loop_func_ = json_ffi_engine_.value() + ->GetFunction("run_background_stream_back_loop") + .value_or(Function(nullptr)); + exit_background_loop_func_ = + json_ffi_engine_.value()->GetFunction("exit_background_loop").value_or(Function(nullptr)); + + ICHECK(init_background_engine_func_ != nullptr); + ICHECK(reload_func_ != nullptr); + ICHECK(unload_func_ != nullptr); + ICHECK(reset_func_ != nullptr); + ICHECK(chat_completion_func_ != nullptr); + ICHECK(abort_func_ != nullptr); + ICHECK(run_background_loop_func_ != nullptr); + ICHECK(run_background_stream_back_loop_func_ != nullptr); + ICHECK(exit_background_loop_func_ != nullptr); } return self; } -- (void)dealloc { - [self.jsonFFIEngine exitBackgroundLoop]; +- (void)initBackgroundEngine:(void (^)(NSString*))streamCallback { + TypedFunction internal_stream_callback([streamCallback](String value) { + streamCallback([NSString stringWithUTF8String:value.c_str()]); + }); + int device_type = kDLMetal; + int device_id = 0; + init_background_engine_func_(device_type, device_id, internal_stream_callback); +} + +- (void)reload:(NSString*)engineConfigJson { + std::string engine_config = engineConfigJson.UTF8String; + reload_func_(engine_config); } -- (void)reloadWithModelPath:(NSString*)modelPath modelLib:(NSString*)modelLib { - NSString* engineConfig = - [NSString stringWithFormat:@"{\"model\": \"%@\", \"model_lib\": \"system://%@\", \"mode\": \"interactive\"}", modelPath, modelLib]; - [self.jsonFFIEngine reload:engineConfig]; +- (void)unload { + unload_func_(); } - (void)reset { - [self.jsonFFIEngine reset]; + reset_func_(); } -- (void)unload { - [self.jsonFFIEngine unload]; +- (void)chatCompletion:(NSString*)requestJSON requestID:(NSString*)requestID { + std::string request_json = requestJSON.UTF8String; + std::string request_id = requestID.UTF8String; + chat_completion_func_(request_json, request_id); +} + +- (void)abort:(NSString*)requestID { + std::string request_id = requestID.UTF8String; + abort_func_(request_id); +} + +- (void)runBackgroundLoop { + run_background_loop_func_(); } -- (NSString*)chatCompletionWithMessages:(NSArray*)messages options:(NSDictionary*)options completion:(void (^)(NSDictionary* response))completion { - return [self.state chatCompletionWithJSONFFIEngine:self.jsonFFIEngine request:options completion:completion]; +- (void)runBackgroundStreamBackLoop { + run_background_stream_back_loop_func_(); } -- (void)cancelRequest:(NSString *)requestId { - [self.state cancelRequest:requestId withJSONFFIEngine:self.jsonFFIEngine]; +- (void)exitBackgroundLoop { + exit_background_loop_func_(); } @end diff --git a/packages/mlc/mlc-package-config-ios.json b/packages/mlc/mlc-package-config-ios.json index 39261074..330d645a 100644 --- a/packages/mlc/mlc-package-config-ios.json +++ b/packages/mlc/mlc-package-config-ios.json @@ -32,8 +32,8 @@ } }, { - "model": "HF://mlc-ai/Qwen2.5-0.5B-Instruct-q4f16_1-MLC", - "model_id": "Qwen2.5-0.5B-Instruct", + "model": "HF://mlc-ai/Qwen2-1.5B-Instruct-q4f16_1-MLC", + "model_id": "Qwen2-1.5B-Instruct", "estimated_vram_bytes": 600000000, "bundle_weight": false, "overrides": {