@@ -141,49 +141,33 @@ private function _cleanWord($word)
141141
142142 private function _parserV2 ($ htmlData , $ word )
143143 {
144- $ doc = new DOMDocument ();
145- libxml_use_internal_errors (true );
146- $ doc ->loadHTML ($ htmlData );
147- libxml_clear_errors ();
148-
149- $ xpath = new DOMXPath ($ doc );
144+ $ doc = Dom \HTMLDocument::createFromString ($ htmlData , LIBXML_NOERROR );
150145 $ dataResponse = [];
151-
152- $ contentDiv = $ xpath -> query ( " // div[contains(@class, ' container body-content')] " )-> item ( 0 );
146+
147+ $ contentDiv = $ doc -> querySelector ( " div. container. body-content " );
153148 if (!$ contentDiv ) {
154149 return false ;
155150 }
156-
151+
157152 // Mengambil semua elemen h2 dalam div body-content
158- $ h2Elements = $ xpath ->query (".//h2[contains(@style, 'margin-bottom:3px')] " , $ contentDiv );
159- foreach ($ h2Elements as $ i => $ h2Element ) {
153+ foreach ($ contentDiv ->querySelectorAll ("h2[style*='margin-bottom:3px'] " ) as $ h2Element ) {
160154 // Mengambil lema dari link a di dalam span rootword
161- $ lemaLink = $ xpath ->query (".//span[contains(@class, 'rootword')]/a " , $ h2Element )->item (0 );
162- $ lema = '' ;
163- if ($ lemaLink ) {
164- $ lema = $ this ->_cleanText ($ lemaLink ->nodeValue );
165- }
166-
155+ $ lemaLink = $ h2Element ->querySelector ("span.rootword > a " );
156+ $ lema = $ lemaLink ? $ this ->_cleanText ($ lemaLink ->textContent ) : '' ;
157+
167158 // Mengambil link Tesaurus
168- $ tesaurusLink = '' ;
169- $ tesaurusAnchor = $ xpath ->query (".//p/a[contains(@href, 'tematis/lema')] " , $ h2Element )->item (0 );
170- if ($ tesaurusAnchor ) {
171- $ tesaurusLink = $ tesaurusAnchor ->getAttribute ('href ' );
172- } else {
173- $ tesaurusLink = "http://tesaurus.kemdikbud.go.id/tematis/lema/ " .$ word ;
174- }
175-
159+ $ tesaurusLink = $ h2Element ->querySelector ("p > a[href*='tematis/lema'] " )?->getAttribute('href ' ) ?? "http://tesaurus.kemdikbud.go.id/tematis/lema/ " . $ word ;
160+
176161 // Mengambil deskripsi/arti dari ul/li setelah h2
177- $ ulElement = $ xpath -> query ( " following-sibling::ul[@class= 'adjusted-par'][1] " , $ h2Element)-> item ( 0 ) ;
162+ $ ulElement = $ h2Element -> nextElementSibling ?->classList-> contains ( 'adjusted-par ' ) ? $ h2Element-> nextElementSibling : null ;
178163 $ arti = [];
179164 if ($ ulElement ) {
180- $ listItems = $ xpath ->query (".//li " , $ ulElement );
181- foreach ($ listItems as $ j => $ listItem ) {
182- $ deskripsi = $ this ->_cleanText ($ listItem ->nodeValue );
165+ foreach ($ ulElement ->querySelectorAll ("li " ) as $ listItem ) {
166+ $ deskripsi = $ this ->_cleanText ($ listItem ->textContent );
183167 $ arti [] = ['deskripsi ' => $ deskripsi ];
184168 }
185169 }
186-
170+
187171 // Menyimpan data dalam $dataResponse
188172 if (!empty ($ lema ) && !empty ($ arti )) {
189173 $ dataResponse [] = [
@@ -194,56 +178,41 @@ private function _parserV2($htmlData, $word)
194178 ];
195179 }
196180 }
197-
181+
198182 return count ($ dataResponse ) ? $ dataResponse : [];
199183 }
200-
184+
201185 private function _parserV3 ($ htmlData , $ word )
202186 {
203- $ doc = new DOMDocument ();
204- libxml_use_internal_errors (true );
205- $ doc ->loadHTML ($ htmlData );
206- libxml_clear_errors ();
207-
208- $ xpath = new DOMXPath ($ doc );
187+ $ doc = Dom \HTMLDocument::createFromString ($ htmlData , LIBXML_NOERROR );
209188 $ dataResponse = [];
210-
189+
211190 // Mengambil semua elemen h2 yang memiliki style 'margin-bottom:3px'
212- $ h2Elements = $ xpath ->query ("//h2[contains(@style, 'margin-bottom:3px')] " );
213- foreach ($ h2Elements as $ h2Element ) {
214- // Mengambil teks dari elemen h2
191+ foreach ($ doc ->querySelectorAll ("h2[style*='margin-bottom:3px'] " ) as $ h2Element ) {
215192 $ lema = $ this ->_cleanText ($ h2Element ->textContent );
216-
193+
217194 // Mengambil link Tesaurus dari elemen <p><a>
218- $ tesaurusLink = '' ;
219- $ tesaurusAnchor = $ xpath ->query ("following-sibling::p[1]/a[contains(@href, 'tematis/lema')] " , $ h2Element )->item (0 );
220- if ($ tesaurusAnchor ) {
221- $ tesaurusLink = $ tesaurusAnchor ->getAttribute ('href ' );
222- } else {
223- $ tesaurusLink = "http://tesaurus.kemdikbud.go.id/tematis/lema/ " . $ lema ;
224- }
225-
195+ $ tesaurusLink = $ h2Element ->nextElementSibling ?->querySelector("a[href*='tematis/lema'] " )?->getAttribute('href ' ) ?? "http://tesaurus.kemdikbud.go.id/tematis/lema/ " . $ lema ;
196+
226197 // Mengambil deskripsi/arti dari ol/li setelah h2
227198 $ arti = [];
228- $ olElement = $ xpath -> query ( " following-sibling::ol[1] " , $ h2Element)-> item ( 0 ) ;
199+ $ olElement = $ h2Element -> nextElementSibling ?->tagName === ' OL ' ? $ h2Element-> nextElementSibling : null ;
229200 if ($ olElement ) {
230- $ listItems = $ xpath ->query (".//li " , $ olElement );
231- foreach ($ listItems as $ listItem ) {
232- $ deskripsi = $ this ->_cleanText ($ listItem ->nodeValue );
201+ foreach ($ olElement ->querySelectorAll ("li " ) as $ listItem ) {
202+ $ deskripsi = $ this ->_cleanText ($ listItem ->textContent );
233203 $ arti [] = ['deskripsi ' => $ deskripsi ];
234204 }
235205 }
236-
206+
237207 // Mengambil deskripsi/arti dari ul/li setelah h2
238- $ ulElement = $ xpath -> query ( " following-sibling::ul[@class= 'adjusted-par'][1] " , $ h2Element)-> item ( 0 ) ;
208+ $ ulElement = $ h2Element -> nextElementSibling ?->classList-> contains ( 'adjusted-par ' ) ? $ h2Element-> nextElementSibling : null ;
239209 if ($ ulElement ) {
240- $ listItems = $ xpath ->query (".//li " , $ ulElement );
241- foreach ($ listItems as $ listItem ) {
242- $ deskripsi = $ this ->_cleanText ($ listItem ->nodeValue );
210+ foreach ($ ulElement ->querySelectorAll ("li " ) as $ listItem ) {
211+ $ deskripsi = $ this ->_cleanText ($ listItem ->textContent );
243212 $ arti [] = ['deskripsi ' => $ deskripsi ];
244213 }
245214 }
246-
215+
247216 // Menyimpan data dalam $dataResponse
248217 if (!empty ($ lema ) && !empty ($ arti )) {
249218 $ dataResponse [] = [
@@ -254,10 +223,11 @@ private function _parserV3($htmlData, $word)
254223 ];
255224 }
256225 }
257-
226+
258227 return count ($ dataResponse ) ? $ dataResponse : [];
259228 }
260229
230+
261231 private function _KBBI_official ($ word )
262232 {
263233 // Clean the word
0 commit comments