Skip to content

Commit 3f5d1fb

Browse files
committed
feat: update extentions for pdf
1 parent dfc416e commit 3f5d1fb

File tree

4 files changed

+6
-11
lines changed

4 files changed

+6
-11
lines changed

libs/kotaemon/kotaemon/indices/ingests/extensions.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ def get_current_loader(self) -> dict[str, BaseReader]:
4545

4646
@staticmethod
4747
def _init_supported() -> tuple[dict[str, list[BaseReader]], dict[str, str]]:
48+
gocr = GOCR2ImageReader()
49+
4850
supported: dict[str, list[BaseReader]] = {
4951
".xlsx": [PandasExcelReader()],
5052
".docx": [unstructured],
@@ -53,12 +55,12 @@ def _init_supported() -> tuple[dict[str, list[BaseReader]], dict[str, str]]:
5355
".doc": [unstructured],
5456
".html": [HtmlReader()],
5557
".mhtml": [MhtmlReader()],
56-
".png": [unstructured, GOCR2ImageReader()],
57-
".jpeg": [unstructured, GOCR2ImageReader()],
58-
".jpg": [unstructured, GOCR2ImageReader()],
58+
".png": [unstructured, gocr],
59+
".jpeg": [unstructured, gocr],
60+
".jpg": [unstructured, gocr],
5961
".tiff": [unstructured],
6062
".tif": [unstructured],
61-
".pdf": [PDFThumbnailReader()],
63+
".pdf": [PDFThumbnailReader(), adobe_reader, azure_reader],
6264
".txt": [TxtReader()],
6365
".md": [TxtReader()],
6466
}

libs/kotaemon/kotaemon/indices/ingests/files.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,8 @@
1818
HtmlReader,
1919
MathpixPDFReader,
2020
OCRReader,
21-
PandasExcelReader,
22-
PDFThumbnailReader,
23-
TxtReader,
2421
UnstructuredReader,
2522
WebReader,
26-
UnstructuredReader,
27-
ImageReader,
2823
)
2924

3025
web_reader = WebReader()

libs/kotaemon/kotaemon/loaders/ocr_loader.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,4 +262,3 @@ def _tenacious_api_post(
262262
)
263263

264264
return result
265-

libs/ktem/ktem/pages/chat/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from typing import Optional
66

77
import gradio as gr
8-
from filelock import FileLock
98
from ktem.app import BasePage
109
from ktem.components import reasonings
1110
from ktem.db.models import Conversation, engine

0 commit comments

Comments
 (0)