Skip to content

Commit 31dd1d7

Browse files
committed
feat: update extentions for pdf
1 parent de703a3 commit 31dd1d7

File tree

5 files changed

+7
-12
lines changed

5 files changed

+7
-12
lines changed

libs/kotaemon/kotaemon/indices/ingests/extensions.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ def get_current_loader(self) -> dict[str, BaseReader]:
4545

4646
@staticmethod
4747
def _init_supported() -> tuple[dict[str, list[BaseReader]], dict[str, str]]:
48+
gocr = GOCR2ImageReader()
49+
4850
supported: dict[str, list[BaseReader]] = {
4951
".xlsx": [PandasExcelReader()],
5052
".docx": [unstructured],
@@ -53,12 +55,12 @@ def _init_supported() -> tuple[dict[str, list[BaseReader]], dict[str, str]]:
5355
".doc": [unstructured],
5456
".html": [HtmlReader()],
5557
".mhtml": [MhtmlReader()],
56-
".png": [unstructured, GOCR2ImageReader()],
57-
".jpeg": [unstructured, GOCR2ImageReader()],
58-
".jpg": [unstructured, GOCR2ImageReader()],
58+
".png": [unstructured, gocr],
59+
".jpeg": [unstructured, gocr],
60+
".jpg": [unstructured, gocr],
5961
".tiff": [unstructured],
6062
".tif": [unstructured],
61-
".pdf": [PDFThumbnailReader()],
63+
".pdf": [PDFThumbnailReader(), adobe_reader, azure_reader],
6264
".txt": [TxtReader()],
6365
".md": [TxtReader()],
6466
}

libs/kotaemon/kotaemon/indices/ingests/files.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,8 @@
1616
DirectoryReader,
1717
MathpixPDFReader,
1818
OCRReader,
19-
PandasExcelReader,
20-
PDFThumbnailReader,
21-
TxtReader,
2219
UnstructuredReader,
2320
WebReader,
24-
UnstructuredReader,
25-
ImageReader,
2621
)
2722

2823
web_reader = WebReader()

libs/kotaemon/kotaemon/loaders/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,5 @@
3030
"TxtReader",
3131
"PDFThumbnailReader",
3232
"WebReader",
33-
"GOCR2ImageReader"
33+
"GOCR2ImageReader",
3434
]

libs/kotaemon/kotaemon/loaders/ocr_loader.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,4 +262,3 @@ def _tenacious_api_post(
262262
)
263263

264264
return result
265-

libs/ktem/ktem/pages/chat/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from typing import Optional
66

77
import gradio as gr
8-
from filelock import FileLock
98
from ktem.app import BasePage
109
from ktem.components import reasonings
1110
from ktem.db.models import Conversation, engine

0 commit comments

Comments
 (0)