Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 64 additions & 12 deletions libs/ktem/ktem/index/file/ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,20 @@
import zipfile
from copy import deepcopy
from pathlib import Path
from typing import Generator
from typing import Generator, List, Optional

import gradio as gr
import pandas as pd
from gradio.data_classes import FileData
from gradio.data_classes import FileData, ListFiles
from gradio.utils import NamedString
from ktem.app import BasePage
from ktem.db.engine import engine
from ktem.utils.render import Render
from sqlalchemy import select
from sqlalchemy.orm import Session
from theflow.settings import settings as flowsettings

from ktem.app import BasePage
from ktem.db.engine import engine
from ktem.utils.render import Render

DOWNLOAD_MESSAGE = "Press again to download"
MAX_FILENAME_LENGTH = 20

Expand Down Expand Up @@ -49,13 +50,28 @@
"""


def valid_files(
files: List[gr.utils.NamedString],
):
"""Validate files and process both valid and invalid files with a warning."""
if not files:
return "No files uploaded."

result = []
for file in files:
file_path = Path(file.name)
result.append(f"File: {file_path.name}, Size: {file_path.stat().st_size} bytes")

return "\n".join(result)


class File(gr.File):
"""Subclass from gr.File to maintain the original filename
"""Subclass from gr.File to add support for directory upload."""

The issue happens when user uploads file with name like: !@#$%%^&*().pdf
"""
directory_file_types: Optional[List[str]] = None

def _process_single_file(self, f: FileData) -> NamedString | bytes:
"""The issue happens when user uploads file with name like: !@#$%%^&*().pdf"""
file_name = f.path
if self.type == "filepath":
if f.orig_name and Path(file_name).name != f.orig_name:
Expand All @@ -74,6 +90,37 @@ def _process_single_file(self, f: FileData) -> NamedString | bytes:
+ ". Please choose from: 'filepath', 'binary'."
)

def preprocess(
self, payload: ListFiles | FileData | None
) -> bytes | str | list[bytes] | list[str] | None:
"""
Parameters:
payload: File information as a FileData object, or a list of FileData objects.
Returns:
Passes the file as a `str` or `bytes` object, or a list of `str` or list of `bytes` objects, depending on `type` and `file_count`.
"""
if payload is None:
return None

if self.file_count == "single":
if isinstance(payload, ListFiles):
return self._process_single_file(payload[0])
return self._process_single_file(payload)
if isinstance(payload, ListFiles):
if self.directory_file_types is not None:
result = []
for f in payload:
file = Path(f.path)
if (
file.suffix in self.directory_file_types
and not file.name.startswith("~$")
):
result.append(self._process_single_file(f))
else:
result = [self._process_single_file(f) for f in payload]
return result # type: ignore
return [self._process_single_file(payload)] # type: ignore


class DirectoryUpload(BasePage):
def __init__(self, app, index):
Expand Down Expand Up @@ -161,7 +208,6 @@ def render_file_list(self):
)

with gr.Row():

self.chat_button = gr.Button(
"Go to Chat",
visible=False,
Expand Down Expand Up @@ -262,10 +308,16 @@ def on_building_ui(self):
with gr.Column() as self.upload:
with gr.Tab("Upload Files"):
self.files = File(
file_types=self._supported_file_types,
file_count="multiple",
file_count="directory",
label="Upload a folder",
container=True,
show_label=False,
)
self.files.directory_file_types = self._supported_file_types
output = gr.Textbox()
self.files.change(
valid_files,
inputs=self.files,
outputs=output,
)

msg = self.upload_instruction()
Expand Down