File tree Expand file tree Collapse file tree 1 file changed +6
-2
lines changed
Expand file tree Collapse file tree 1 file changed +6
-2
lines changed Original file line number Diff line number Diff line change 1616
1717import fireo
1818import fsspec
19- import pypdf
2019import requests
2120from fsspec .core import url_to_fs
22- from tika import parser
2321
2422from ..database import models as db_models
2523
@@ -854,6 +852,8 @@ def parse_doc_file(document_raw: bytes) -> str:
854852 str:
855853 A str of all text in the .doc file.
856854 """
855+ from tika import parser
856+
857857 parsed_content = parser .from_buffer (document_raw )["content" ]
858858 return remove_duplicate_space (parsed_content )
859859
@@ -872,6 +872,8 @@ def parse_pdf_file(document_raw: bytes) -> str:
872872 str:
873873 A str of all text in the .pdf file.
874874 """
875+ import pypdf
876+
875877 pdf_reader = pypdf .PdfReader (io .BytesIO (document_raw ))
876878 text = ""
877879
@@ -898,6 +900,8 @@ def parse_pptx_file(document_raw: bytes) -> str:
898900 str:
899901 A str of all text in the .pdf file.
900902 """
903+ from tika import parser
904+
901905 parsed_pptx = parser .from_buffer (document_raw )["content" ]
902906 return remove_duplicate_space (parsed_pptx )
903907
You can’t perform that action at this time.
0 commit comments