@@ -45,6 +45,8 @@ def get_current_loader(self) -> dict[str, BaseReader]:
4545
4646 @staticmethod
4747 def _init_supported () -> tuple [dict [str , list [BaseReader ]], dict [str , str ]]:
48+ gocr = GOCR2ImageReader ()
49+
4850 supported : dict [str , list [BaseReader ]] = {
4951 ".xlsx" : [PandasExcelReader ()],
5052 ".docx" : [unstructured ],
@@ -53,12 +55,12 @@ def _init_supported() -> tuple[dict[str, list[BaseReader]], dict[str, str]]:
5355 ".doc" : [unstructured ],
5456 ".html" : [HtmlReader ()],
5557 ".mhtml" : [MhtmlReader ()],
56- ".png" : [unstructured , GOCR2ImageReader () ],
57- ".jpeg" : [unstructured , GOCR2ImageReader () ],
58- ".jpg" : [unstructured , GOCR2ImageReader () ],
58+ ".png" : [unstructured , gocr ],
59+ ".jpeg" : [unstructured , gocr ],
60+ ".jpg" : [unstructured , gocr ],
5961 ".tiff" : [unstructured ],
6062 ".tif" : [unstructured ],
61- ".pdf" : [PDFThumbnailReader ()],
63+ ".pdf" : [PDFThumbnailReader (), adobe_reader , azure_reader ],
6264 ".txt" : [TxtReader ()],
6365 ".md" : [TxtReader ()],
6466 }
0 commit comments