@@ -1059,15 +1059,18 @@ def _may_extract_zip(self, files, zip_dir: str):
10591059 """Handle zip files"""
10601060 zip_files = [file for file in files if file .endswith (".zip" )]
10611061 remaining_files = [file for file in files if not file .endswith ("zip" )]
1062+ errors : list [str ] = []
10621063
10631064 # Clean-up <zip_dir> before unzip to remove old files
10641065 shutil .rmtree (zip_dir , ignore_errors = True )
10651066
1067+ # Unzip
10661068 for zip_file in zip_files :
10671069 # Prepare new zip output dir, separated for each files
10681070 basename = os .path .splitext (os .path .basename (zip_file ))[0 ]
10691071 zip_out_dir = os .path .join (zip_dir , basename )
10701072 os .makedirs (zip_out_dir , exist_ok = True )
1073+
10711074 with zipfile .ZipFile (zip_file , "r" ) as zip_ref :
10721075 zip_ref .extractall (zip_out_dir )
10731076
@@ -1084,7 +1087,7 @@ def _may_extract_zip(self, files, zip_dir: str):
10841087 if n_zip_file > 0 :
10851088 print (f"Update zip files: { n_zip_file } " )
10861089
1087- return remaining_files
1090+ return remaining_files , errors
10881091
10891092 def index_fn (
10901093 self , files , urls , reindex : bool , settings , user_id
@@ -1100,20 +1103,22 @@ def index_fn(
11001103 """
11011104 if urls :
11021105 files = [it .strip () for it in urls .split ("\n " )]
1103- errors = []
1106+ errors = self . validate_urls ( files )
11041107 else :
11051108 if not files :
11061109 gr .Info ("No uploaded file" )
11071110 yield "" , ""
11081111 return
1112+ files , unzip_errors = self ._may_extract_zip (
1113+ files , flowsettings .KH_ZIP_INPUT_DIR
1114+ )
1115+ errors = self .validate_files (files )
1116+ errors .extend (unzip_errors )
11091117
1110- files = self ._may_extract_zip (files , flowsettings .KH_ZIP_INPUT_DIR )
1111-
1112- errors = self .validate (files )
1113- if errors :
1114- gr .Warning (", " .join (errors ))
1115- yield "" , ""
1116- return
1118+ if errors :
1119+ gr .Warning (", " .join (errors ))
1120+ yield "" , ""
1121+ return
11171122
11181123 gr .Info (f"Start indexing { len (files )} files..." )
11191124
@@ -1569,7 +1574,7 @@ def interact_group_list(self, list_groups, ev: gr.SelectData):
15691574 selected_item ["files" ],
15701575 )
15711576
1572- def validate (self , files : list [str ]):
1577+ def validate_files (self , files : list [str ]):
15731578 """Validate if the files are valid"""
15741579 paths = [Path (file ) for file in files ]
15751580 errors = []
@@ -1598,6 +1603,14 @@ def validate(self, files: list[str]):
15981603
15991604 return errors
16001605
1606+ def validate_urls (self , urls : list [str ]):
1607+ """Validate if the urls are valid"""
1608+ errors = []
1609+ for url in urls :
1610+ if not url .startswith ("http" ) and not url .startswith ("https" ):
1611+ errors .append (f"Invalid url `{ url } `" )
1612+ return errors
1613+
16011614
16021615class FileSelector (BasePage ):
16031616 """File selector UI in the Chat page"""
0 commit comments