diff --git a/apps/expo-example/src/components/adapters/mlcModelSetupAdapter.ts b/apps/expo-example/src/components/adapters/mlcModelSetupAdapter.ts
index 7352dddc..fb7631d7 100644
--- a/apps/expo-example/src/components/adapters/mlcModelSetupAdapter.ts
+++ b/apps/expo-example/src/components/adapters/mlcModelSetupAdapter.ts
@@ -1,6 +1,7 @@
 import type { LanguageModelV3 } from '@ai-sdk/provider'
 import { mlc } from '@react-native-ai/mlc'
 import { File, Paths } from 'expo-file-system'
+import { Platform } from 'react-native'
 
 import type { Availability, SetupAdapter } from '../../config/providers.common'
 
@@ -19,7 +20,14 @@ export const createMLCLanguageSetupAdapter = (
     },
     builtIn: false,
     isAvailable(): Availability {
-      return new File(Paths.document, model.modelId, 'tensor-cache.json').exists
+      return new File(
+        Paths.document,
+        ...(Platform.select({
+          ios: ['bundle'],
+        }) ?? []),
+        model.modelId,
+        'tensor-cache.json'
+      ).exists
         ? 'yes'
         : 'availableForDownload'
     },
diff --git a/packages/mlc/ATTRIBUTIONS.md b/packages/mlc/ATTRIBUTIONS.md
index 692f47b9..fe424393 100644
--- a/packages/mlc/ATTRIBUTIONS.md
+++ b/packages/mlc/ATTRIBUTIONS.md
@@ -1,8 +1,7 @@
-Third-Party Notices
-===================
+# Third-Party Notices
+
+## MLC-LLM (mlc-ai/mlc-llm)
 
-MLC-LLM (mlc-ai/mlc-llm)
------------------------
 Portions of the iOS engine implementation are derived from the MLC-LLM project,
 and the prebuilt runtime binaries shipped in this package are based on MLC-LLM.
 
@@ -14,7 +13,6 @@ License: Apache License, Version 2.0
 License URL: https://www.apache.org/licenses/LICENSE-2.0
 
 Derived source files in this package:
+
 - packages/mlc/ios/engine/LLMEngine.h
 - packages/mlc/ios/engine/LLMEngine.mm
-- packages/mlc/ios/engine/JSONFFIEngine.h
-- packages/mlc/ios/engine/JSONFFIEngine.mm
diff --git a/packages/mlc/android/src/main/java/com/callstack/ai/NativeMLCEngineModule.kt b/packages/mlc/android/src/main/java/com/callstack/ai/NativeMLCEngineModule.kt
index a54ab507..6c9526af 100644
--- a/packages/mlc/android/src/main/java/com/callstack/ai/NativeMLCEngineModule.kt
+++ b/packages/mlc/android/src/main/java/com/callstack/ai/NativeMLCEngineModule.kt
@@ -307,7 +307,7 @@ class NativeMLCEngineModule(reactContext: ReactApplicationContext) : NativeMLCEn
 
   private fun getModelConfig(modelId: String): Pair<ModelRecord, File>? {
     val modelRecord = appConfig.model_list.find { it.model_id == modelId } ?: return null
-    val modelDir = File(reactApplicationContext.getExternalFilesDir(""), modelRecord.model_id)
+    val modelDir = File(reactApplicationContext.getFilesDir(), modelRecord.model_id)
     return Pair(modelRecord, modelDir)
   }
 }
diff --git a/packages/mlc/ios/MLCEngine.mm b/packages/mlc/ios/MLCEngine.mm
index ac4d6364..0b58c3bf 100644
--- a/packages/mlc/ios/MLCEngine.mm
+++ b/packages/mlc/ios/MLCEngine.mm
@@ -1,17 +1,22 @@
 #import <React/RCTEventEmitter.h>
 #import <ReactCommon/RCTTurboModule.h>
 
-#import <jsi/jsi.h>
 #import <NativeMLCEngine/NativeMLCEngine.h>
+#import <jsi/jsi.h>
 
 #import "LLMEngine.h"
 
+typedef void (^MLCStreamCallback)(NSDictionary *response);
+
 @interface MLCEngine : NativeMLCEngineSpecBase <NativeMLCEngineSpec>
 
-@property(nonatomic, strong) LLMEngine* engine;
-@property(nonatomic, strong) NSURL* bundleURL;
-@property(nonatomic, strong) NSDictionary* cachedAppConfig;
-@property(nonatomic, strong) NSArray* cachedModelList;
+@property(nonatomic, strong) JSONFFIEngine *engine;
+@property(nonatomic, strong) NSURL *bundleURL;
+@property(nonatomic, strong) NSDictionary *cachedAppConfig;
+@property(nonatomic, strong) NSArray *cachedModelList;
+@property(nonatomic, strong)
+    NSMutableDictionary<NSString *, MLCStreamCallback> *pendingRequests;
+@property(nonatomic) dispatch_queue_t streamCallbackQueue;
 
 @end
 
@@ -26,68 +31,90 @@ + (NSString *)moduleName {
 - (instancetype)init {
   self = [super init];
   if (self) {
-    _engine = [[LLMEngine alloc] init];
-    
+    _engine = [[JSONFFIEngine alloc] init];
+    _pendingRequests = [NSMutableDictionary new];
+    _streamCallbackQueue = dispatch_queue_create(
+        "com.callstack.mlcegine.stream", DISPATCH_QUEUE_SERIAL);
+
     // Get the Documents directory path for downloaded models
-    NSArray* paths = NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES);
-    NSString* documentsDirectory = [paths firstObject];
-    _bundleURL = [NSURL fileURLWithPath:[documentsDirectory stringByAppendingPathComponent:@"bundle"]];
-    
+    NSArray *paths = NSSearchPathForDirectoriesInDomains(NSDocumentDirectory,
+                                                         NSUserDomainMask, YES);
+    NSString *documentsDirectory = [paths firstObject];
+    _bundleURL =
+        [NSURL fileURLWithPath:[documentsDirectory
+                                   stringByAppendingPathComponent:@"bundle"]];
+
     // Create bundle directory if it doesn't exist (for downloaded models)
-    NSError* dirError;
-    [[NSFileManager defaultManager] createDirectoryAtPath:[_bundleURL path] withIntermediateDirectories:YES attributes:nil error:&dirError];
+    NSError *dirError;
+    [[NSFileManager defaultManager] createDirectoryAtPath:[_bundleURL path]
+                              withIntermediateDirectories:YES
+                                               attributes:nil
+                                                    error:&dirError];
     if (dirError) {
       NSLog(@"Error creating bundle directory: %@", dirError);
     }
+
+    [self.engine initBackgroundEngine:^(NSString *responseJSON) {
+      [self handleStreamCallback:responseJSON];
+    }];
+    dispatch_async(dispatch_get_global_queue(QOS_CLASS_USER_INITIATED, 0), ^{
+      [self.engine runBackgroundLoop];
+    });
+    dispatch_async(dispatch_get_global_queue(QOS_CLASS_USER_INITIATED, 0), ^{
+      [self.engine runBackgroundStreamBackLoop];
+    });
   }
   return self;
 }
 
 // Lazy getter for app config with caching - read directly from bundle
-- (NSDictionary*)getAppConfig {
+- (NSDictionary *)getAppConfig {
   if (_cachedAppConfig) {
     return _cachedAppConfig;
   }
-  
+
   // Read config from main bundle resources
-  NSBundle* bundle = [NSBundle mainBundle];
-  NSString* configPath = [bundle pathForResource:@"mlc-app-config" ofType:@"json"];
-  
+  NSBundle *bundle = [NSBundle mainBundle];
+  NSString *configPath = [bundle pathForResource:@"mlc-app-config"
+                                          ofType:@"json"];
+
   if (!configPath) {
     NSLog(@"Failed to find mlc-chat-config.json in bundle");
     return nil;
   }
-  
-  NSData* jsonData = [NSData dataWithContentsOfFile:configPath];
+
+  NSData *jsonData = [NSData dataWithContentsOfFile:configPath];
   if (!jsonData) {
     NSLog(@"Failed to read app config from: %@", configPath);
     return nil;
   }
-  
-  NSError* error;
-  NSDictionary* jsonDict = [NSJSONSerialization JSONObjectWithData:jsonData options:0 error:&error];
-  
+
+  NSError *error;
+  NSDictionary *jsonDict = [NSJSONSerialization JSONObjectWithData:jsonData
+                                                           options:0
+                                                             error:&error];
+
   if (error) {
     NSLog(@"Error parsing app config JSON: %@", error);
     return nil;
   }
-  
+
   if (![jsonDict isKindOfClass:[NSDictionary class]]) {
     NSLog(@"Invalid app config format");
     return nil;
   }
-  
+
   _cachedAppConfig = jsonDict;
-  
+
   return _cachedAppConfig;
 }
 
 // Get cached model list
-- (NSArray*)getModelList {
+- (NSArray *)getModelList {
   if (_cachedModelList) {
     return _cachedModelList;
   }
-  NSDictionary* appConfig = [self getAppConfig];
+  NSDictionary *appConfig = [self getAppConfig];
   if (appConfig) {
     _cachedModelList = appConfig[@"model_list"];
   }
@@ -95,9 +122,9 @@ - (NSArray*)getModelList {
 }
 
 // Find model by ID with caching
-- (NSDictionary*)findModelById:(NSString*)modelId {
-  NSArray* modelList = [self getModelList];
-  for (NSDictionary* model in modelList) {
+- (NSDictionary *)findModelById:(NSString *)modelId {
+  NSArray *modelList = [self getModelList];
+  for (NSDictionary *model in modelList) {
     if ([model[@"model_id"] isEqualToString:modelId]) {
       return model;
     }
@@ -105,15 +132,20 @@ - (NSDictionary*)findModelById:(NSString*)modelId {
   return nil;
 }
 
-
-- (std::shared_ptr<react::TurboModule>)getTurboModule:(const react::ObjCTurboModule::InitParams &)params {
+- (std::shared_ptr<react::TurboModule>)getTurboModule:
+    (const react::ObjCTurboModule::InitParams &)params {
   return std::make_shared<react::NativeMLCEngineSpecJSI>(params);
 }
 
 // Helper method to build complete request with messages and options
-- (NSDictionary*)buildRequestWithMessages:(NSArray*)messages options:(const JS::NativeMLCEngine::GenerationOptions &)options {
-  NSMutableDictionary *request = [@{@"messages": messages, @"stream": @(YES)} mutableCopy];
-  
+- (NSDictionary *)
+    buildRequestWithMessages:(NSArray *)messages
+                     options:(const JS::NativeMLCEngine::GenerationOptions &)
+                                 options {
+  NSMutableDictionary *request =
+      [@{@"messages" : messages, @"stream" : @(YES)} mutableCopy];
+  request[@"stream_options"] = @{@"include_usage" : @(YES)};
+
   if (options.temperature().has_value()) {
     request[@"temperature"] = @(options.temperature().value());
   }
@@ -143,95 +175,168 @@ - (NSDictionary*)buildRequestWithMessages:(NSArray*)messages options:(const JS::
   if (options.toolChoice()) {
     request[@"tool_choice"] = options.toolChoice();
   }
-  
+
   return request;
 }
 
-- (void)generateText:(NSArray<NSDictionary*>*)messages
-             options:(JS::NativeMLCEngine::GenerationOptions &)options
-             resolve:(RCTPromiseResolveBlock)resolve
-              reject:(RCTPromiseRejectBlock)reject {
-  NSDictionary *request = [self buildRequestWithMessages:messages options:options];
-  
-  NSMutableString* accumulatedContent = [NSMutableString new];
-  NSMutableArray* accumulatedToolCalls = [NSMutableArray new];
-  
-  __block NSString* finalFinishReason = nil;
-  __block NSString* finalRole = nil;
-  
-  [self.engine chatCompletionWithMessages:messages
-                                  options:request
-                               completion:^(NSDictionary* response) {
-    if (response[@"usage"]) {
-      resolve(@{
-        @"role": finalRole,
-        @"content": accumulatedContent,
-        @"tool_calls": accumulatedToolCalls,
-        @"finish_reason": finalFinishReason,
-        @"usage": response[@"usage"],
-      });
+- (NSString *)jsonStringFromDictionary:(NSDictionary *)dictionary
+                                 error:(NSError **)error {
+  NSData *jsonData = [NSJSONSerialization dataWithJSONObject:dictionary
+                                                     options:0
+                                                       error:error];
+  if (!jsonData) {
+    return nil;
+  }
+  return [[NSString alloc] initWithData:jsonData encoding:NSUTF8StringEncoding];
+}
+
+- (void)handleStreamCallback:(NSString *)responseJSON {
+  dispatch_async(self.streamCallbackQueue, ^{
+    NSError *error;
+    NSArray *responses = [NSJSONSerialization
+        JSONObjectWithData:[responseJSON dataUsingEncoding:NSUTF8StringEncoding]
+                   options:0
+                     error:&error];
+    if (error) {
+      NSLog(@"Error decoding JSON: %@", error);
       return;
     }
-    
-    NSDictionary* choice = response[@"choices"][0];
-    if (choice) {
-      NSDictionary* delta = choice[@"delta"];
-      if (delta[@"content"]) {
-        [accumulatedContent appendString:delta[@"content"]];
-      }
-      if (delta[@"role"]) {
-        finalRole = delta[@"role"];
-      }
-      if (delta[@"tool_calls"]) {
-        [accumulatedToolCalls addObjectsFromArray:delta[@"tool_calls"]];
-      }
-      if (choice[@"finish_reason"]) {
-        finalFinishReason = choice[@"finish_reason"];
+
+    for (NSDictionary *res in responses) {
+      NSString *requestID = res[@"id"];
+      void (^completion)(NSDictionary *) = self.pendingRequests[requestID];
+      if (completion) {
+        completion(res);
+        if (res[@"usage"]) {
+          [self.pendingRequests removeObjectForKey:requestID];
+        }
       }
     }
-  }];
+  });
+}
+
+- (NSString *)startChatCompletionWithRequest:(NSDictionary *)request
+                                  completion:(MLCStreamCallback)completion
+                                       error:(NSError **)error {
+  NSString *requestJSON = [self jsonStringFromDictionary:request error:error];
+  if (!requestJSON) {
+    return nil;
+  }
+
+  NSString *requestId = [NSUUID UUID].UUIDString;
+  if (completion) {
+    self.pendingRequests[requestId] = [completion copy];
+  }
+  [self.engine chatCompletion:requestJSON requestID:requestId];
+  return requestId;
+}
+
+- (void)generateText:(NSArray<NSDictionary *> *)messages
+             options:(JS::NativeMLCEngine::GenerationOptions &)options
+             resolve:(RCTPromiseResolveBlock)resolve
+              reject:(RCTPromiseRejectBlock)reject {
+  NSDictionary *request = [self buildRequestWithMessages:messages
+                                                 options:options];
+
+  NSMutableString *accumulatedContent = [NSMutableString new];
+  NSMutableArray *accumulatedToolCalls = [NSMutableArray new];
+
+  __block NSString *finalFinishReason = nil;
+  __block NSString *finalRole = nil;
+
+  NSError *requestError;
+  NSString *requestId = [self
+      startChatCompletionWithRequest:request
+                          completion:^(NSDictionary *response) {
+                            if (response[@"usage"]) {
+                              resolve(@{
+                                @"role" : finalRole,
+                                @"content" : accumulatedContent,
+                                @"tool_calls" : accumulatedToolCalls,
+                                @"finish_reason" : finalFinishReason,
+                                @"usage" : response[@"usage"],
+                              });
+                              return;
+                            }
+
+                            NSDictionary *choice = response[@"choices"][0];
+                            if (choice) {
+                              NSDictionary *delta = choice[@"delta"];
+                              if (delta[@"content"]) {
+                                [accumulatedContent
+                                    appendString:delta[@"content"]];
+                              }
+                              if (delta[@"role"]) {
+                                finalRole = delta[@"role"];
+                              }
+                              if (delta[@"tool_calls"]) {
+                                [accumulatedToolCalls
+                                    addObjectsFromArray:delta[@"tool_calls"]];
+                              }
+                              if (choice[@"finish_reason"]) {
+                                finalFinishReason = choice[@"finish_reason"];
+                              }
+                            }
+                          }
+                               error:&requestError];
+
+  if (!requestId) {
+    reject(@"MLCEngine",
+           requestError.localizedDescription ?: @"Failed to start generation",
+           nil);
+  }
 }
 
-- (void)streamText:(NSArray<NSDictionary*>*)messages
+- (void)streamText:(NSArray<NSDictionary *> *)messages
            options:(JS::NativeMLCEngine::GenerationOptions &)options
            resolve:(RCTPromiseResolveBlock)resolve
             reject:(RCTPromiseRejectBlock)reject {
-  
-  NSDictionary *request = [self buildRequestWithMessages:messages options:options];
-  
-  __block NSString* finalFinishReason = nil;
-  
+
+  NSDictionary *request = [self buildRequestWithMessages:messages
+                                                 options:options];
+
+  __block NSString *finalFinishReason = nil;
+
   @try {
-    NSString *requestId = [self.engine chatCompletionWithMessages:messages
-                                                          options:request
-                                                       completion:^(NSDictionary* response) {
-      if (response[@"usage"]) {
-        [self emitOnChatComplete:@{
-          @"usage": response[@"usage"],
-          @"finish_reason": finalFinishReason
-        }];
-        return;
-      }
-      
-      NSDictionary* choice = response[@"choices"][0];
-      if (choice[@"finish_reason"]) {
-        finalFinishReason = choice[@"finish_reason"];
-      }
-      
-      [self emitOnChatUpdate:choice];
-    }];
-    
+    NSError *requestError;
+    NSString *requestId = [self
+        startChatCompletionWithRequest:request
+                            completion:^(NSDictionary *response) {
+                              if (response[@"usage"]) {
+                                [self emitOnChatComplete:@{
+                                  @"usage" : response[@"usage"],
+                                  @"finish_reason" : finalFinishReason
+                                }];
+                                return;
+                              }
+
+                              NSDictionary *choice = response[@"choices"][0];
+                              if (choice[@"finish_reason"]) {
+                                finalFinishReason = choice[@"finish_reason"];
+                              }
+
+                              [self emitOnChatUpdate:choice];
+                            }
+                                 error:&requestError];
+
+    if (!requestId) {
+      @throw [NSException exceptionWithName:@"MLCEngine"
+                                     reason:requestError.localizedDescription
+                                                ?: @"Failed to start generation"
+                                   userInfo:nil];
+    }
+
     resolve(requestId);
-  } @catch (NSException* exception) {
+  } @catch (NSException *exception) {
     reject(@"MLCEngine", exception.reason, nil);
     return;
   }
 }
 
-- (void)getModel:(NSString*)name
+- (void)getModel:(NSString *)name
          resolve:(RCTPromiseResolveBlock)resolve
           reject:(RCTPromiseRejectBlock)reject {
-  NSDictionary* modelConfig = [self findModelById:name];
+  NSDictionary *modelConfig = [self findModelById:name];
   if (!modelConfig) {
     reject(@"MLCEngine", @"Didn't find the model", nil);
     return;
@@ -244,272 +349,349 @@ - (void)getModels:(RCTPromiseResolveBlock)resolve
   resolve([self getModelList]);
 }
 
-- (void)prepareModel:(NSString*)modelId
+- (void)prepareModel:(NSString *)modelId
              resolve:(RCTPromiseResolveBlock)resolve
               reject:(RCTPromiseRejectBlock)reject {
   @try {
-    NSDictionary* modelRecord = [self findModelById:modelId];
+    NSDictionary *modelRecord = [self findModelById:modelId];
     if (!modelRecord) {
       reject(@"MLCEngine", @"There's no record for requested model", nil);
       return;
     }
-    
-    NSString* modelLib = modelRecord[@"model_lib"];
+
+    NSString *modelLib = modelRecord[@"model_lib"];
     if (!modelLib) {
-      reject(@"MLCEngine", @"Invalid model config - missing required fields", nil);
+      reject(@"MLCEngine", @"Invalid model config - missing required fields",
+             nil);
       return;
     }
-    
-    NSURL* modelLocalURL = [self.bundleURL URLByAppendingPathComponent:modelId];
+
+    NSURL *modelLocalURL = [self.bundleURL URLByAppendingPathComponent:modelId];
     if (!modelLocalURL) {
       reject(@"MLCEngine", @"Failed to construct model path", nil);
       return;
     }
-    
-    NSString* modelLocalPath = [modelLocalURL path];
-    
+
+    NSString *modelLocalPath = [modelLocalURL path];
+
     BOOL isDirectory;
-    if (![[NSFileManager defaultManager] fileExistsAtPath:modelLocalPath isDirectory:&isDirectory] || !isDirectory) {
-      reject(@"MLCEngine", [NSString stringWithFormat:@"Model directory not found at path: %@", modelLocalPath], nil);
+    if (![[NSFileManager defaultManager] fileExistsAtPath:modelLocalPath
+                                              isDirectory:&isDirectory] ||
+        !isDirectory) {
+      reject(
+          @"MLCEngine",
+          [NSString stringWithFormat:@"Model directory not found at path: %@",
+                                     modelLocalPath],
+          nil);
       return;
     }
-    
-    [self.engine reloadWithModelPath:modelLocalPath modelLib:modelLib];
-    
+
+    NSMutableDictionary *engineConfig = [NSMutableDictionary new];
+    engineConfig[@"model"] = modelLocalPath;
+    engineConfig[@"model_lib"] =
+        [NSString stringWithFormat:@"system://%@", modelLib];
+    engineConfig[@"mode"] =
+        @"interactive"; // at most 1 concurrent inference request
+
+    NSError *configError;
+    NSString *engineConfigJSON = [self jsonStringFromDictionary:engineConfig
+                                                          error:&configError];
+    if (!engineConfigJSON) {
+      reject(@"MLCEngine",
+             configError.localizedDescription
+                 ?: @"Failed to build engine config",
+             nil);
+      return;
+    }
+
+    [self.engine reload:engineConfigJSON];
+
     resolve([NSString stringWithFormat:@"Model prepared: %@", modelId]);
-  } @catch (NSException* exception) {
+  } @catch (NSException *exception) {
     reject(@"MLCEngine", exception.reason, nil);
   }
 }
 
-- (NSDictionary*)readModelConfig:(NSString*)modelId error:(NSError**)error {
-  NSURL* modelDirURL = [self.bundleURL URLByAppendingPathComponent:modelId];
-  NSURL* modelConfigURL = [modelDirURL URLByAppendingPathComponent:@"mlc-chat-config.json"];
-  
-  NSData* jsonData = [NSData dataWithContentsOfURL:modelConfigURL];
-  if (!jsonData) {
-    if (error) {
-      *error = [NSError errorWithDomain:@"MLCEngine" code:1 userInfo:@{NSLocalizedDescriptionKey : @"Model config not found - may need to download first"}];
-    }
-    return nil;
-  }
-  
-  return [NSJSONSerialization JSONObjectWithData:jsonData options:0 error:error];
-}
+- (BOOL)downloadFile:(NSString *)modelUrl
+            filename:(NSString *)filename
+               toURL:(NSURL *)destURL
+               error:(NSError **)error {
+  NSString *urlString =
+      [NSString stringWithFormat:@"%@/resolve/main/%@", modelUrl, filename];
+  NSURL *url = [NSURL URLWithString:urlString];
 
-- (BOOL)downloadFile:(NSString*)modelUrl filename:(NSString*)filename toURL:(NSURL*)destURL error:(NSError**)error {
-  NSString* urlString = [NSString stringWithFormat:@"%@/resolve/main/%@", modelUrl, filename];
-  NSURL* url = [NSURL URLWithString:urlString];
-  
-  NSData* fileData = [NSData dataWithContentsOfURL:url];
+  NSData *fileData = [NSData dataWithContentsOfURL:url];
   if (!fileData) {
     if (error) {
-      *error = [NSError errorWithDomain:@"MLCEngine"
-                                   code:2
-                               userInfo:@{NSLocalizedDescriptionKey : [NSString stringWithFormat:@"Failed to download %@", filename]}];
+      *error = [NSError
+          errorWithDomain:@"MLCEngine"
+                     code:2
+                 userInfo:@{
+                   NSLocalizedDescriptionKey : [NSString
+                       stringWithFormat:@"Failed to download %@", filename]
+                 }];
     }
     return NO;
   }
-  
+
   if (![fileData writeToURL:destURL atomically:YES]) {
     if (error) {
-      *error = [NSError errorWithDomain:@"MLCEngine"
-                                   code:6
-                               userInfo:@{NSLocalizedDescriptionKey : [NSString stringWithFormat:@"Failed to write %@", filename]}];
+      *error = [NSError
+          errorWithDomain:@"MLCEngine"
+                     code:6
+                 userInfo:@{
+                   NSLocalizedDescriptionKey : [NSString
+                       stringWithFormat:@"Failed to write %@", filename]
+                 }];
     }
     return NO;
   }
-  
+
   return YES;
 }
 
 // Download all model files with percentage updates
-- (void)downloadModelFiles:(NSDictionary*)modelRecord
+- (void)downloadModelFiles:(NSDictionary *)modelRecord
                   progress:(void (^)(double percentage))progressCallback
-                     error:(NSError**)error {
-  NSString* modelId = modelRecord[@"model_id"];
-  NSString* modelUrl = modelRecord[@"model_url"];
-  
+                     error:(NSError **)error {
+  NSString *modelId = modelRecord[@"model_id"];
+  NSString *modelUrl = modelRecord[@"model_url"];
+
   if (!modelId || !modelUrl) {
     if (error) {
-      *error = [NSError errorWithDomain:@"MLCEngine" code:3 userInfo:@{NSLocalizedDescriptionKey : @"Missing required model record fields"}];
+      *error = [NSError errorWithDomain:@"MLCEngine"
+                                   code:3
+                               userInfo:@{
+                                 NSLocalizedDescriptionKey :
+                                     @"Missing required model record fields"
+                               }];
     }
     return;
   }
-  
+
   // Check if config already exists
-  NSURL* modelDirURL = [self.bundleURL URLByAppendingPathComponent:modelId];
-  NSURL* modelConfigURL = [modelDirURL URLByAppendingPathComponent:@"mlc-chat-config.json"];
-  NSURL* tensorCacheURL = [modelDirURL URLByAppendingPathComponent:@"tensor-cache.json"];
-  
+  NSURL *modelDirURL = [self.bundleURL URLByAppendingPathComponent:modelId];
+  NSURL *modelConfigURL =
+      [modelDirURL URLByAppendingPathComponent:@"mlc-chat-config.json"];
+  NSURL *tensorCacheURL =
+      [modelDirURL URLByAppendingPathComponent:@"tensor-cache.json"];
+
   if (!modelDirURL || !modelConfigURL) {
     if (error) {
-      *error = [NSError errorWithDomain:@"MLCEngine" code:4 userInfo:@{NSLocalizedDescriptionKey : @"Failed to construct config URLs"}];
+      *error = [NSError errorWithDomain:@"MLCEngine"
+                                   code:4
+                               userInfo:@{
+                                 NSLocalizedDescriptionKey :
+                                     @"Failed to construct config URLs"
+                               }];
     }
     return;
   }
-  
+
   // Create model directory if it doesn't exist
-  NSError* dirError;
-  [[NSFileManager defaultManager] createDirectoryAtPath:[modelDirURL path] withIntermediateDirectories:YES attributes:nil error:&dirError];
+  NSError *dirError;
+  [[NSFileManager defaultManager] createDirectoryAtPath:[modelDirURL path]
+                            withIntermediateDirectories:YES
+                                             attributes:nil
+                                                  error:&dirError];
   if (dirError) {
     *error = dirError;
     return;
   }
-  
+
   // Download and save tensor-cache if it doesn't exist
-  if (![[NSFileManager defaultManager] fileExistsAtPath:[tensorCacheURL path]]) {
-    if (![self downloadFile:modelUrl filename:@"tensor-cache.json" toURL:tensorCacheURL error:error]) {
+  if (![[NSFileManager defaultManager]
+          fileExistsAtPath:[tensorCacheURL path]]) {
+    if (![self downloadFile:modelUrl
+                   filename:@"tensor-cache.json"
+                      toURL:tensorCacheURL
+                      error:error]) {
       return;
     }
   }
-  
+
   // Read and parse tensor cache
-  NSData* tensorCacheData = [NSData dataWithContentsOfURL:tensorCacheURL];
+  NSData *tensorCacheData = [NSData dataWithContentsOfURL:tensorCacheURL];
   if (!tensorCacheData) {
     if (error) {
-      *error = [NSError errorWithDomain:@"MLCEngine" code:2 userInfo:@{NSLocalizedDescriptionKey : @"Failed to read tensor cache"}];
+      *error = [NSError
+          errorWithDomain:@"MLCEngine"
+                     code:2
+                 userInfo:@{
+                   NSLocalizedDescriptionKey : @"Failed to read tensor cache"
+                 }];
     }
     return;
   }
-  
-  NSError* tensorCacheJsonError;
-  NSDictionary* tensorCache = [NSJSONSerialization JSONObjectWithData:tensorCacheData options:0 error:&tensorCacheJsonError];
+
+  NSError *tensorCacheJsonError;
+  NSDictionary *tensorCache =
+      [NSJSONSerialization JSONObjectWithData:tensorCacheData
+                                      options:0
+                                        error:&tensorCacheJsonError];
   if (tensorCacheJsonError) {
     *error = tensorCacheJsonError;
     return;
   }
 
   // Download and save model config if it doesn't exist
-  if (![[NSFileManager defaultManager] fileExistsAtPath:[modelConfigURL path]]) {
-    if (![self downloadFile:modelUrl filename:@"mlc-chat-config.json" toURL:modelConfigURL error:error]) {
+  if (![[NSFileManager defaultManager]
+          fileExistsAtPath:[modelConfigURL path]]) {
+    if (![self downloadFile:modelUrl
+                   filename:@"mlc-chat-config.json"
+                      toURL:modelConfigURL
+                      error:error]) {
       return;
     }
   }
 
   // Read and parse model config
-  NSData* modelConfigData = [NSData dataWithContentsOfURL:modelConfigURL];
+  NSData *modelConfigData = [NSData dataWithContentsOfURL:modelConfigURL];
   if (!modelConfigData) {
     if (error) {
-      *error = [NSError errorWithDomain:@"MLCEngine" code:2 userInfo:@{NSLocalizedDescriptionKey : @"Failed to read model config"}];
+      *error = [NSError
+          errorWithDomain:@"MLCEngine"
+                     code:2
+                 userInfo:@{
+                   NSLocalizedDescriptionKey : @"Failed to read model config"
+                 }];
     }
     return;
   }
-  
-  NSError* modelConfigJsonError;
-  NSDictionary* modelConfig = [NSJSONSerialization JSONObjectWithData:modelConfigData options:0 error:&modelConfigJsonError];
+
+  NSError *modelConfigJsonError;
+  NSDictionary *modelConfig =
+      [NSJSONSerialization JSONObjectWithData:modelConfigData
+                                      options:0
+                                        error:&modelConfigJsonError];
   if (modelConfigJsonError) {
     *error = modelConfigJsonError;
     return;
   }
-  
+
   // Create unified list of files to download
-  NSMutableArray* filesToDownload = [NSMutableArray new];
-  
+  NSMutableArray *filesToDownload = [NSMutableArray new];
+
   // Add parameter files from tensor cache
-  NSArray* records = tensorCache[@"records"];
+  NSArray *records = tensorCache[@"records"];
   if ([records isKindOfClass:[NSArray class]]) {
-    for (NSDictionary* record in records) {
-      NSString* dataPath = record[@"dataPath"];
+    for (NSDictionary *record in records) {
+      NSString *dataPath = record[@"dataPath"];
       if (dataPath) {
-        NSURL* fileURL = [modelDirURL URLByAppendingPathComponent:dataPath];
+        NSURL *fileURL = [modelDirURL URLByAppendingPathComponent:dataPath];
         if (![[NSFileManager defaultManager] fileExistsAtPath:[fileURL path]]) {
           [filesToDownload addObject:dataPath];
         }
       }
     }
   }
-  
+
   // Add tokenizer files
-  NSArray* tokenizerFiles = modelConfig[@"tokenizer_files"];
+  NSArray *tokenizerFiles = modelConfig[@"tokenizer_files"];
   if ([tokenizerFiles isKindOfClass:[NSArray class]]) {
-    for (NSString* filename in tokenizerFiles) {
-      NSURL* fileURL = [modelDirURL URLByAppendingPathComponent:filename];
+    for (NSString *filename in tokenizerFiles) {
+      NSURL *fileURL = [modelDirURL URLByAppendingPathComponent:filename];
       if (![[NSFileManager defaultManager] fileExistsAtPath:[fileURL path]]) {
         [filesToDownload addObject:filename];
       }
     }
   }
-  
+
   // Download all files with progress tracking
   NSInteger totalFiles = filesToDownload.count;
   for (NSInteger i = 0; i < totalFiles; i++) {
-    NSString* filename = filesToDownload[i];
-    NSURL* fileURL = [modelDirURL URLByAppendingPathComponent:filename];
-    
+    NSString *filename = filesToDownload[i];
+    NSURL *fileURL = [modelDirURL URLByAppendingPathComponent:filename];
+
     // Download the file first
-    if (![self downloadFile:modelUrl filename:filename toURL:fileURL error:error]) {
+    if (![self downloadFile:modelUrl
+                   filename:filename
+                      toURL:fileURL
+                      error:error]) {
       return;
     }
-    
+
     // Calculate and emit progress after successful download
-    double percentage = totalFiles > 0 ? (double)(i + 1) / totalFiles * 100.0 : 100.0;
+    double percentage =
+        totalFiles > 0 ? (double)(i + 1) / totalFiles * 100.0 : 100.0;
     if (progressCallback) {
       progressCallback(round(percentage));
     }
   }
 }
 
-- (void)downloadModel:(NSString*)modelId
+- (void)downloadModel:(NSString *)modelId
               resolve:(RCTPromiseResolveBlock)resolve
                reject:(RCTPromiseRejectBlock)reject {
-  dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
-    @try {
-      NSDictionary* modelRecord = [self findModelById:modelId];
-      
-      if (!modelRecord) {
-        reject(@"MLCEngine", @"There's no record for requested model", nil);
-        return;
-      }
-      
-      NSError* downloadError = nil;
-      [self downloadModelFiles:modelRecord
-                      progress:^(double percentage) {
-        [self emitOnDownloadProgress:@{@"percentage" : @(percentage)}];
-      }
-                         error:&downloadError];
-      
-      if (downloadError) {
-        reject(@"MLCEngine", @"Failed to download model", downloadError);
-        return;
-      }
-      
-      resolve(nil);
-    } @catch (NSException* exception) {
-      reject(@"MLCEngine", exception.reason, nil);
-    }
-  });
+  dispatch_async(
+      dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
+        @try {
+          NSDictionary *modelRecord = [self findModelById:modelId];
+
+          if (!modelRecord) {
+            reject(@"MLCEngine", @"There's no record for requested model", nil);
+            return;
+          }
+
+          NSError *downloadError = nil;
+          [self downloadModelFiles:modelRecord
+                          progress:^(double percentage) {
+                            [self emitOnDownloadProgress:@{
+                              @"percentage" : @(percentage)
+                            }];
+                          }
+                             error:&downloadError];
+
+          if (downloadError) {
+            reject(@"MLCEngine", @"Failed to download model", downloadError);
+            return;
+          }
+
+          resolve(nil);
+        } @catch (NSException *exception) {
+          reject(@"MLCEngine", exception.reason, nil);
+        }
+      });
 }
 
-- (void)removeModel:(NSString*)modelId
+- (void)removeModel:(NSString *)modelId
             resolve:(RCTPromiseResolveBlock)resolve
              reject:(RCTPromiseRejectBlock)reject {
-  dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
-    @try {
-      NSURL* modelDirURL = [self.bundleURL URLByAppendingPathComponent:modelId];
-      NSString* modelDirPath = [modelDirURL path];
-      
-      BOOL isDirectory;
-      if ([[NSFileManager defaultManager] fileExistsAtPath:modelDirPath isDirectory:&isDirectory]) {
-        if (isDirectory) {
-          NSError* removeError;
-          BOOL removed = [[NSFileManager defaultManager] removeItemAtPath:modelDirPath error:&removeError];
-          
-          if (removed) {
-            resolve(nil);
+  dispatch_async(
+      dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
+        @try {
+          NSURL *modelDirURL =
+              [self.bundleURL URLByAppendingPathComponent:modelId];
+          NSString *modelDirPath = [modelDirURL path];
+
+          BOOL isDirectory;
+          if ([[NSFileManager defaultManager] fileExistsAtPath:modelDirPath
+                                                   isDirectory:&isDirectory]) {
+            if (isDirectory) {
+              NSError *removeError;
+              BOOL removed = [[NSFileManager defaultManager]
+                  removeItemAtPath:modelDirPath
+                             error:&removeError];
+
+              if (removed) {
+                resolve(nil);
+              } else {
+                reject(@"MLCEngine",
+                       [NSString
+                           stringWithFormat:@"Failed to clean model: %@",
+                                            removeError.localizedDescription],
+                       removeError);
+              }
+            } else {
+              reject(@"MLCEngine", @"Path exists but is not a directory", nil);
+            }
           } else {
-            reject(@"MLCEngine", [NSString stringWithFormat:@"Failed to clean model: %@", removeError.localizedDescription], removeError);
+            resolve(nil);
           }
-        } else {
-          reject(@"MLCEngine", @"Path exists but is not a directory", nil);
+        } @catch (NSException *exception) {
+          reject(@"MLCEngine", exception.reason, nil);
         }
-      } else {
-        resolve(nil);
-      }
-    } @catch (NSException* exception) {
-      reject(@"MLCEngine", exception.reason, nil);
-    }
-  });
+      });
 }
 
 - (void)unloadModel:(RCTPromiseResolveBlock)resolve
@@ -518,10 +700,16 @@ - (void)unloadModel:(RCTPromiseResolveBlock)resolve
   resolve(nil);
 }
 
-- (void)cancelStream:(nonnull NSString *)streamId resolve:(nonnull RCTPromiseResolveBlock)resolve reject:(nonnull RCTPromiseRejectBlock)reject { 
-  [self.engine cancelRequest:streamId];
+- (void)cancelStream:(nonnull NSString *)streamId
+             resolve:(nonnull RCTPromiseResolveBlock)resolve
+              reject:(nonnull RCTPromiseRejectBlock)reject {
+  [self.pendingRequests removeObjectForKey:streamId];
+  [self.engine abort:streamId];
   resolve(nil);
 }
 
+- (void)dealloc {
+  [self.engine exitBackgroundLoop];
+}
 
 @end
diff --git a/packages/mlc/ios/engine/BackgroundWorker.h b/packages/mlc/ios/engine/BackgroundWorker.h
deleted file mode 100644
index a3899242..00000000
--- a/packages/mlc/ios/engine/BackgroundWorker.h
+++ /dev/null
@@ -1,14 +0,0 @@
-//
-//  BackgroundWorker.h
-//  Pods
-//
-
-#import <Foundation/Foundation.h>
-
-NS_ASSUME_NONNULL_BEGIN
-
-@interface BackgroundWorker : NSThread
-- (instancetype)initWithTask:(void (^)(void))task;
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/packages/mlc/ios/engine/BackgroundWorker.mm b/packages/mlc/ios/engine/BackgroundWorker.mm
deleted file mode 100644
index e07bfb05..00000000
--- a/packages/mlc/ios/engine/BackgroundWorker.mm
+++ /dev/null
@@ -1,32 +0,0 @@
-//
-//  BackgroundWorker.mm
-//  Pods
-//
-
-#import "BackgroundWorker.h"
-
-/**
- * BackgroundWorker manages background thread execution for the MLC engine.
- * This class provides a simple interface to run long-running tasks on separate threads,
- * ensuring the main thread remains responsive while the LLM engine processes requests.
- * It's used to run the engine's background loop and stream processing loop concurrently.
- */
-@implementation BackgroundWorker {
-  void (^_task)(void);
-}
-
-- (instancetype)initWithTask:(void (^)(void))task {
-  self = [super init];
-  if (self) {
-    _task = [task copy];
-  }
-  return self;
-}
-
-- (void)main {
-  if (_task) {
-    _task();
-  }
-}
-
-@end
diff --git a/packages/mlc/ios/engine/EngineState.h b/packages/mlc/ios/engine/EngineState.h
deleted file mode 100644
index e9f8dfd9..00000000
--- a/packages/mlc/ios/engine/EngineState.h
+++ /dev/null
@@ -1,24 +0,0 @@
-//
-//  MLCEngine.h
-//  Pods
-//
-//  Created by Szymon Rybczak on 19/07/2024.
-//
-
-#import "JSONFFIEngine.h"
-#import <Foundation/Foundation.h>
-
-NS_ASSUME_NONNULL_BEGIN
-
-@interface EngineState : NSObject
-@property(nonatomic, strong) NSMutableDictionary<NSString *, id> *requestStateMap;
-
-- (NSString*)chatCompletionWithJSONFFIEngine:(JSONFFIEngine *)jsonFFIEngine
-                                request:(NSDictionary *)request
-                             completion:(void (^)(NSDictionary* response))completion;
-- (void)streamCallbackWithResult:(NSString *)result;
-- (void)cancelRequest:(NSString *)requestId
-    withJSONFFIEngine:(JSONFFIEngine *)jsonFFIEngine;
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/packages/mlc/ios/engine/EngineState.mm b/packages/mlc/ios/engine/EngineState.mm
deleted file mode 100644
index 52ea4066..00000000
--- a/packages/mlc/ios/engine/EngineState.mm
+++ /dev/null
@@ -1,73 +0,0 @@
-//
-//  EngineState.mm
-//  Pods
-//
-
-#import "EngineState.h"
-#import "JSONFFIEngine.h"
-
-/**
- * EngineState manages the request lifecycle and callback routing for chat completions.
- * It maintains a mapping between request IDs and their corresponding completion handlers,
- * ensuring that streaming responses are properly routed back to the correct caller.
- * This class handles JSON serialization/deserialization and coordinates between
- * the high-level API and the low-level JSON FFI engine.
- */
-@implementation EngineState
-
-- (instancetype)init {
-  self = [super init];
-  if (self) {
-    _requestStateMap = [NSMutableDictionary new];
-  }
-  return self;
-}
-
-- (NSString*)chatCompletionWithJSONFFIEngine:(JSONFFIEngine*)jsonFFIEngine
-                                request:(NSDictionary*)request
-                             completion:(void (^)(NSDictionary* response))completion {
-  NSError* error;
-  NSData* jsonData = [NSJSONSerialization dataWithJSONObject:request options:0 error:&error];
-  if (error) {
-    @throw [NSException exceptionWithName:@"JSONSerializationException"
-                                   reason:[NSString stringWithFormat:@"Failed to serialize request: %@",
-                                           error.localizedDescription]
-                                 userInfo:nil];
-  }
-
-  NSString* jsonRequest = [[NSString alloc] initWithData:jsonData encoding:NSUTF8StringEncoding];
-  NSString* requestID = [[NSUUID UUID] UUIDString];
-
-  self.requestStateMap[requestID] = completion;
-
-  [jsonFFIEngine chatCompletion:jsonRequest requestID:requestID];
-  
-  return requestID;
-}
-
-- (void)streamCallbackWithResult:(NSString*)result {
-  NSError* error;
-  NSArray* responses = [NSJSONSerialization JSONObjectWithData:[result dataUsingEncoding:NSUTF8StringEncoding] options:0 error:&error];
-  if (error) {
-    NSLog(@"Error decoding JSON: %@", error);
-    return;
-  }
-
-  for (NSDictionary* res in responses) {
-    NSString* requestID = res[@"id"];
-    void (^completion)(NSDictionary*) = self.requestStateMap[requestID];
-    if (completion) {
-      completion(res);
-      if (res[@"usage"]) {
-        [self.requestStateMap removeObjectForKey:requestID];
-      }
-    }
-  }
-}
-
-- (void)cancelRequest:(NSString *)requestId withJSONFFIEngine:(JSONFFIEngine *)jsonFFIEngine {
-  [self.requestStateMap removeObjectForKey:requestId];
-  [jsonFFIEngine abort:requestId];
-}
-
-@end
diff --git a/packages/mlc/ios/engine/JSONFFIEngine.h b/packages/mlc/ios/engine/JSONFFIEngine.h
deleted file mode 100644
index 1e4fb441..00000000
--- a/packages/mlc/ios/engine/JSONFFIEngine.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) MLC-AI
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * This file is derived from the MLC-LLM project:
- * https://github.com/mlc-ai/mlc-llm
- */
-
-#import <Foundation/Foundation.h>
-#import <UIKit/UIKit.h>
-
-/**
- * This is an internal Raw JSON FFI Engine that redirects request to internal JSON FFI Engine in C++
- */
-@interface JSONFFIEngine : NSObject
-
-- (void)initBackgroundEngine:(void (^)(NSString *))streamCallback;
-
-- (void)reload:(NSString *)engineConfig;
-
-- (void)unload;
-
-- (void)reset;
-
-- (void)chatCompletion:(NSString *)requestJSON requestID:(NSString *)requestID;
-
-- (void)abort:(NSString *)requestID;
-
-- (void)runBackgroundLoop;
-
-- (void)runBackgroundStreamBackLoop;
-
-- (void)exitBackgroundLoop;
-
-@end
diff --git a/packages/mlc/ios/engine/JSONFFIEngine.mm b/packages/mlc/ios/engine/JSONFFIEngine.mm
deleted file mode 100644
index cec8778d..00000000
--- a/packages/mlc/ios/engine/JSONFFIEngine.mm
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) MLC-AI
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * This file is derived from the MLC-LLM project:
- * https://github.com/mlc-ai/mlc-llm
- */
-#import <Foundation/Foundation.h>
-#import <UIKit/UIKit.h>
-#include <os/proc.h>
-
-#include "JSONFFIEngine.h"
-
-#define TVM_USE_LIBBACKTRACE 0
-#define DMLC_USE_LOGGING_LIBRARY <tvm/runtime/logging.h>
-
-#include <tvm/runtime/packed_func.h>
-#include <tvm/runtime/registry.h>
-
-using namespace tvm::runtime;
-
-@implementation JSONFFIEngine {
-  // Internal c++ classes
-  // internal module backed by JSON FFI
-  Module json_ffi_engine_;
-  // member functions
-  PackedFunc init_background_engine_func_;
-  PackedFunc unload_func_;
-  PackedFunc reload_func_;
-  PackedFunc reset_func_;
-  PackedFunc chat_completion_func_;
-  PackedFunc abort_func_;
-  PackedFunc run_background_loop_func_;
-  PackedFunc run_background_stream_back_loop_func_;
-  PackedFunc exit_background_loop_func_;
-}
-
-- (instancetype)init {
-  if (self = [super init]) {
-    // load chat module
-    const PackedFunc* f_json_ffi_create = Registry::Get("mlc.json_ffi.CreateJSONFFIEngine");
-    ICHECK(f_json_ffi_create) << "Cannot find mlc.json_ffi.CreateJSONFFIEngine";
-    json_ffi_engine_ = (*f_json_ffi_create)();
-    init_background_engine_func_ = json_ffi_engine_->GetFunction("init_background_engine");
-    reload_func_ = json_ffi_engine_->GetFunction("reload");
-    unload_func_ = json_ffi_engine_->GetFunction("unload");
-    reset_func_ = json_ffi_engine_->GetFunction("reset");
-    chat_completion_func_ = json_ffi_engine_->GetFunction("chat_completion");
-    abort_func_ = json_ffi_engine_->GetFunction("abort");
-    run_background_loop_func_ = json_ffi_engine_->GetFunction("run_background_loop");
-    run_background_stream_back_loop_func_ =
-    json_ffi_engine_->GetFunction("run_background_stream_back_loop");
-    exit_background_loop_func_ = json_ffi_engine_->GetFunction("exit_background_loop");
-    
-    ICHECK(init_background_engine_func_ != nullptr);
-    ICHECK(reload_func_ != nullptr);
-    ICHECK(unload_func_ != nullptr);
-    ICHECK(reset_func_ != nullptr);
-    ICHECK(chat_completion_func_ != nullptr);
-    ICHECK(abort_func_ != nullptr);
-    ICHECK(run_background_loop_func_ != nullptr);
-    ICHECK(run_background_stream_back_loop_func_ != nullptr);
-    ICHECK(exit_background_loop_func_ != nullptr);
-  }
-  return self;
-}
-
-- (void)initBackgroundEngine:(void (^)(NSString*))streamCallback {
-  TypedPackedFunc<void(String)> internal_stream_callback([streamCallback](String value) {
-    streamCallback([NSString stringWithUTF8String:value.c_str()]);
-  });
-  int device_type = kDLMetal;
-  int device_id = 0;
-  init_background_engine_func_(device_type, device_id, internal_stream_callback);
-}
-
-- (void)reload:(NSString*)engineConfigJson {
-  std::string engine_config = engineConfigJson.UTF8String;
-  reload_func_(engine_config);
-}
-
-- (void)unload {
-  unload_func_();
-}
-
-- (void)reset {
-  reset_func_();
-}
-
-- (void)chatCompletion:(NSString*)requestJSON requestID:(NSString*)requestID {
-  std::string request_json = requestJSON.UTF8String;
-  std::string request_id = requestID.UTF8String;
-  chat_completion_func_(request_json, request_id);
-}
-
-- (void)abort:(NSString*)requestID {
-  std::string request_id = requestID.UTF8String;
-  abort_func_(request_id);
-}
-
-- (void)runBackgroundLoop {
-  run_background_loop_func_();
-}
-
-- (void)runBackgroundStreamBackLoop {
-  run_background_stream_back_loop_func_();
-}
-
-- (void)exitBackgroundLoop {
-  exit_background_loop_func_();
-}
-
-@end
diff --git a/packages/mlc/ios/engine/LLMEngine.h b/packages/mlc/ios/engine/LLMEngine.h
index 6a3fb5fb..1e4fb441 100644
--- a/packages/mlc/ios/engine/LLMEngine.h
+++ b/packages/mlc/ios/engine/LLMEngine.h
@@ -17,20 +17,29 @@
  */
 
 #import <Foundation/Foundation.h>
+#import <UIKit/UIKit.h>
 
-NS_ASSUME_NONNULL_BEGIN
+/**
+ * This is an internal Raw JSON FFI Engine that redirects request to internal JSON FFI Engine in C++
+ */
+@interface JSONFFIEngine : NSObject
 
-@interface LLMEngine : NSObject
+- (void)initBackgroundEngine:(void (^)(NSString *))streamCallback;
 
-- (instancetype)init;
+- (void)reload:(NSString *)engineConfig;
 
-- (void)reloadWithModelPath:(NSString *)modelPath modelLib:(NSString *)modelLib;
-- (void)reset;
 - (void)unload;
 
-- (NSString*)chatCompletionWithMessages:(NSArray *)messages options:(NSDictionary *)options completion:(void (^)(NSDictionary* response))completion;
-- (void)cancelRequest:(NSString *)requestId;
+- (void)reset;
 
-@end
+- (void)chatCompletion:(NSString *)requestJSON requestID:(NSString *)requestID;
+
+- (void)abort:(NSString *)requestID;
 
-NS_ASSUME_NONNULL_END
+- (void)runBackgroundLoop;
+
+- (void)runBackgroundStreamBackLoop;
+
+- (void)exitBackgroundLoop;
+
+@end
diff --git a/packages/mlc/ios/engine/LLMEngine.mm b/packages/mlc/ios/engine/LLMEngine.mm
index bda8f0ff..2f31be86 100644
--- a/packages/mlc/ios/engine/LLMEngine.mm
+++ b/packages/mlc/ios/engine/LLMEngine.mm
@@ -16,73 +16,121 @@
  * https://github.com/mlc-ai/mlc-llm
  */
 
-#import "LLMEngine.h"
-#import "BackgroundWorker.h"
-#import "EngineState.h"
+#import <Foundation/Foundation.h>
+#import <UIKit/UIKit.h>
+#include <os/proc.h>
 
-@interface LLMEngine ()
+#include "LLMEngine.h"
 
-@property(nonatomic, strong) EngineState* state;
-@property(nonatomic, strong) JSONFFIEngine* jsonFFIEngine;
-@property(nonatomic, strong) NSMutableArray<NSThread*>* threads;
+#define TVM_USE_LIBBACKTRACE 0
+#define DMLC_USE_LOGGING_LIBRARY <tvm/runtime/logging.h>
 
-@end
+#include <tvm/ffi/extra/module.h>
+#include <tvm/ffi/function.h>
+#include <tvm/ffi/optional.h>
+#include <tvm/ffi/string.h>
+#include <tvm/runtime/module.h>
+
+using namespace tvm::runtime;
+using tvm::ffi::Function;
+using tvm::ffi::Module;
+using tvm::ffi::Optional;
+using tvm::ffi::String;
+using tvm::ffi::TypedFunction;
 
-@implementation LLMEngine
+@implementation JSONFFIEngine {
+  // Internal c++ classes
+  // internal module backed by JSON FFI
+  Optional<Module> json_ffi_engine_;
+  // member functions
+  Function init_background_engine_func_;
+  Function unload_func_;
+  Function reload_func_;
+  Function reset_func_;
+  Function chat_completion_func_;
+  Function abort_func_;
+  Function run_background_loop_func_;
+  Function run_background_stream_back_loop_func_;
+  Function exit_background_loop_func_;
+}
 
 - (instancetype)init {
-  self = [super init];
-  if (self) {
-    _state = [[EngineState alloc] init];
-    _jsonFFIEngine = [[JSONFFIEngine alloc] init];
-    _threads = [NSMutableArray array];
-    
-    [_jsonFFIEngine initBackgroundEngine:^(NSString* _Nullable result) {
-      [self.state streamCallbackWithResult:result];
-    }];
-    
-    BackgroundWorker* backgroundWorker = [[BackgroundWorker alloc] initWithTask:^{
-      [NSThread setThreadPriority:1.0];
-      [self.jsonFFIEngine runBackgroundLoop];
-    }];
-    
-    BackgroundWorker* backgroundStreamBackWorker = [[BackgroundWorker alloc] initWithTask:^{
-      [self.jsonFFIEngine runBackgroundStreamBackLoop];
-    }];
-    
-    backgroundWorker.qualityOfService = NSQualityOfServiceUserInteractive;
-    [_threads addObject:backgroundWorker];
-    [_threads addObject:backgroundStreamBackWorker];
-    [backgroundWorker start];
-    [backgroundStreamBackWorker start];
+  if (self = [super init]) {
+    // load chat module
+    Function f_json_ffi_create = Function::GetGlobalRequired("mlc.json_ffi.CreateJSONFFIEngine");
+    json_ffi_engine_ = f_json_ffi_create().cast<Module>();
+    init_background_engine_func_ =
+        json_ffi_engine_.value()->GetFunction("init_background_engine").value_or(Function(nullptr));
+    reload_func_ = json_ffi_engine_.value()->GetFunction("reload").value_or(Function(nullptr));
+    unload_func_ = json_ffi_engine_.value()->GetFunction("unload").value_or(Function(nullptr));
+    reset_func_ = json_ffi_engine_.value()->GetFunction("reset").value_or(Function(nullptr));
+    chat_completion_func_ =
+        json_ffi_engine_.value()->GetFunction("chat_completion").value_or(Function(nullptr));
+    abort_func_ = json_ffi_engine_.value()->GetFunction("abort").value_or(Function(nullptr));
+    run_background_loop_func_ =
+        json_ffi_engine_.value()->GetFunction("run_background_loop").value_or(Function(nullptr));
+    run_background_stream_back_loop_func_ = json_ffi_engine_.value()
+                                                ->GetFunction("run_background_stream_back_loop")
+                                                .value_or(Function(nullptr));
+    exit_background_loop_func_ =
+        json_ffi_engine_.value()->GetFunction("exit_background_loop").value_or(Function(nullptr));
+
+    ICHECK(init_background_engine_func_ != nullptr);
+    ICHECK(reload_func_ != nullptr);
+    ICHECK(unload_func_ != nullptr);
+    ICHECK(reset_func_ != nullptr);
+    ICHECK(chat_completion_func_ != nullptr);
+    ICHECK(abort_func_ != nullptr);
+    ICHECK(run_background_loop_func_ != nullptr);
+    ICHECK(run_background_stream_back_loop_func_ != nullptr);
+    ICHECK(exit_background_loop_func_ != nullptr);
   }
   return self;
 }
 
-- (void)dealloc {
-  [self.jsonFFIEngine exitBackgroundLoop];
+- (void)initBackgroundEngine:(void (^)(NSString*))streamCallback {
+  TypedFunction<void(String)> internal_stream_callback([streamCallback](String value) {
+    streamCallback([NSString stringWithUTF8String:value.c_str()]);
+  });
+  int device_type = kDLMetal;
+  int device_id = 0;
+  init_background_engine_func_(device_type, device_id, internal_stream_callback);
+}
+
+- (void)reload:(NSString*)engineConfigJson {
+  std::string engine_config = engineConfigJson.UTF8String;
+  reload_func_(engine_config);
 }
 
-- (void)reloadWithModelPath:(NSString*)modelPath modelLib:(NSString*)modelLib {
-  NSString* engineConfig =
-  [NSString stringWithFormat:@"{\"model\": \"%@\", \"model_lib\": \"system://%@\", \"mode\": \"interactive\"}", modelPath, modelLib];
-  [self.jsonFFIEngine reload:engineConfig];
+- (void)unload {
+  unload_func_();
 }
 
 - (void)reset {
-  [self.jsonFFIEngine reset];
+  reset_func_();
 }
 
-- (void)unload {
-  [self.jsonFFIEngine unload];
+- (void)chatCompletion:(NSString*)requestJSON requestID:(NSString*)requestID {
+  std::string request_json = requestJSON.UTF8String;
+  std::string request_id = requestID.UTF8String;
+  chat_completion_func_(request_json, request_id);
+}
+
+- (void)abort:(NSString*)requestID {
+  std::string request_id = requestID.UTF8String;
+  abort_func_(request_id);
+}
+
+- (void)runBackgroundLoop {
+  run_background_loop_func_();
 }
 
-- (NSString*)chatCompletionWithMessages:(NSArray*)messages options:(NSDictionary*)options completion:(void (^)(NSDictionary* response))completion {
-  return [self.state chatCompletionWithJSONFFIEngine:self.jsonFFIEngine request:options completion:completion];
+- (void)runBackgroundStreamBackLoop {
+  run_background_stream_back_loop_func_();
 }
 
-- (void)cancelRequest:(NSString *)requestId {
-  [self.state cancelRequest:requestId withJSONFFIEngine:self.jsonFFIEngine];
+- (void)exitBackgroundLoop {
+  exit_background_loop_func_();
 }
 
 @end
diff --git a/packages/mlc/mlc-package-config-ios.json b/packages/mlc/mlc-package-config-ios.json
index 39261074..330d645a 100644
--- a/packages/mlc/mlc-package-config-ios.json
+++ b/packages/mlc/mlc-package-config-ios.json
@@ -32,8 +32,8 @@
       }
     },
     {
-      "model": "HF://mlc-ai/Qwen2.5-0.5B-Instruct-q4f16_1-MLC",
-      "model_id": "Qwen2.5-0.5B-Instruct",
+      "model": "HF://mlc-ai/Qwen2-1.5B-Instruct-q4f16_1-MLC",
+      "model_id": "Qwen2-1.5B-Instruct",
       "estimated_vram_bytes": 600000000,
       "bundle_weight": false,
       "overrides": {