@@ -239,6 +239,7 @@ class NanoGraphRAGIndexingPipeline(GraphRAGIndexingPipeline):
239239
240240 prompts : dict [str , str ] = {}
241241 collection_graph_id : str
242+ index_batch_size : int = INDEX_BATCHSIZE
242243
243244 def store_file_id_with_graph_id (self , file_ids : list [str | None ]):
244245 if not settings .USE_GLOBAL_GRAPHRAG :
@@ -279,18 +280,31 @@ def get_user_settings(cls) -> dict:
279280 from nano_graphrag .prompt import PROMPTS
280281
281282 blacklist_keywords = ["default" , "response" , "process" ]
282- return {
283- prompt_name : {
284- "name" : f"Prompt for '{ prompt_name } '" ,
285- "value" : content ,
286- "component" : "text" ,
283+ settings_dict = {
284+ "batch_size" : {
285+ "name" : (
286+ "Index batch size " "(reduce if you have rate limit issues)"
287+ ),
288+ "value" : INDEX_BATCHSIZE ,
289+ "component" : "number" ,
287290 }
288- for prompt_name , content in PROMPTS .items ()
289- if all (
290- keyword not in prompt_name .lower () for keyword in blacklist_keywords
291- )
292- and isinstance (content , str )
293291 }
292+ settings_dict .update (
293+ {
294+ prompt_name : {
295+ "name" : f"Prompt for '{ prompt_name } '" ,
296+ "value" : content ,
297+ "component" : "text" ,
298+ }
299+ for prompt_name , content in PROMPTS .items ()
300+ if all (
301+ keyword not in prompt_name .lower ()
302+ for keyword in blacklist_keywords
303+ )
304+ and isinstance (content , str )
305+ }
306+ )
307+ return settings_dict
294308 except ImportError as e :
295309 print (e )
296310 return {}
@@ -355,8 +369,8 @@ def call_graphrag_index(self, graph_id: str, docs: list[Document]):
355369 ),
356370 )
357371
358- for doc_id in range (0 , len (all_docs ), INDEX_BATCHSIZE ):
359- cur_docs = all_docs [doc_id : doc_id + INDEX_BATCHSIZE ]
372+ for doc_id in range (0 , len (all_docs ), self . index_batch_size ):
373+ cur_docs = all_docs [doc_id : doc_id + self . index_batch_size ]
360374 combined_doc = "\n " .join (cur_docs )
361375
362376 # Use insert for incremental updates
0 commit comments