@@ -45,7 +45,8 @@ private function _cleanWord($word)
4545 return preg_replace ('/\s+/ ' , ' ' , strtolower (trim ($ cleanWord )));
4646 }
4747
48- private function _parserV1 ($ htmlData , $ word )
48+ // parserV1 disabled because has been enhance in parserV3
49+ /*private function _parserV1($htmlData, $word)
4950 {
5051 $doc = new DOMDocument();
5152 libxml_use_internal_errors(true);
@@ -91,7 +92,9 @@ private function _parserV1($htmlData, $word)
9192 'tesaurusLink' => $tesaurusLink,
9293 ];
9394 }
94- }
95+
96+ return count($dataResponse) ? $dataResponse : [];
97+ }*/
9598
9699 private function _parserV2 ($ htmlData , $ word )
97100 {
@@ -149,7 +152,67 @@ private function _parserV2($htmlData, $word)
149152 }
150153 }
151154
152- return $ dataResponse ;
155+ return count ($ dataResponse ) ? $ dataResponse : [];
156+ }
157+
158+ private function _parserV3 ($ htmlData , $ word )
159+ {
160+ $ doc = new DOMDocument ();
161+ libxml_use_internal_errors (true );
162+ $ doc ->loadHTML ($ htmlData );
163+ libxml_clear_errors ();
164+
165+ $ xpath = new DOMXPath ($ doc );
166+ $ dataResponse = [];
167+
168+ // Mengambil semua elemen h2 yang memiliki style 'margin-bottom:3px'
169+ $ h2Elements = $ xpath ->query ("//h2[contains(@style, 'margin-bottom:3px')] " );
170+ foreach ($ h2Elements as $ h2Element ) {
171+ // Mengambil teks dari elemen h2
172+ $ lema = $ this ->_cleanText ($ h2Element ->textContent );
173+
174+ // Mengambil link Tesaurus dari elemen <p><a>
175+ $ tesaurusLink = '' ;
176+ $ tesaurusAnchor = $ xpath ->query ("following-sibling::p[1]/a[contains(@href, 'tematis/lema')] " , $ h2Element )->item (0 );
177+ if ($ tesaurusAnchor ) {
178+ $ tesaurusLink = $ tesaurusAnchor ->getAttribute ('href ' );
179+ } else {
180+ $ tesaurusLink = "http://tesaurus.kemdikbud.go.id/tematis/lema/ " . $ lema ;
181+ }
182+
183+ // Mengambil deskripsi/arti dari ol/li setelah h2
184+ $ arti = [];
185+ $ olElement = $ xpath ->query ("following-sibling::ol[1] " , $ h2Element )->item (0 );
186+ if ($ olElement ) {
187+ $ listItems = $ xpath ->query (".//li " , $ olElement );
188+ foreach ($ listItems as $ listItem ) {
189+ $ deskripsi = $ this ->_cleanText ($ listItem ->nodeValue );
190+ $ arti [] = ['deskripsi ' => $ deskripsi ];
191+ }
192+ }
193+
194+ // Mengambil deskripsi/arti dari ul/li setelah h2
195+ $ ulElement = $ xpath ->query ("following-sibling::ul[@class='adjusted-par'][1] " , $ h2Element )->item (0 );
196+ if ($ ulElement ) {
197+ $ listItems = $ xpath ->query (".//li " , $ ulElement );
198+ foreach ($ listItems as $ listItem ) {
199+ $ deskripsi = $ this ->_cleanText ($ listItem ->nodeValue );
200+ $ arti [] = ['deskripsi ' => $ deskripsi ];
201+ }
202+ }
203+
204+ // Menyimpan data dalam $dataResponse
205+ if (!empty ($ lema ) && !empty ($ arti )) {
206+ $ dataResponse [] = [
207+ 'word ' => $ word ,
208+ 'lema ' => $ lema ,
209+ 'arti ' => $ arti ,
210+ 'tesaurusLink ' => $ tesaurusLink ,
211+ ];
212+ }
213+ }
214+
215+ return count ($ dataResponse ) ? $ dataResponse : [];
153216 }
154217
155218 public function searchWord ($ word )
@@ -161,6 +224,32 @@ public function searchWord($word)
161224
162225 $ dataResponse = [];
163226
227+ // parserV1 disabled because has been enhance in parserV3
228+ /*$_parserV1 = $this->_parserV1($htmlData, $cleanWord, $wordType);
229+ if(count($_parserV1)){
230+ $dataResponse = $_parserV1;
231+
232+ return $dataResponse;
233+ }*/
234+
235+ $ _parserV2 = $ this ->_parserV2 ($ htmlData , $ cleanWord , $ wordType );
236+ if (count ($ _parserV2 )){
237+ $ dataResponse = $ _parserV2 ;
238+
239+ return $ dataResponse ;
240+ }
241+
242+ $ _parserV3 = $ this ->_parserV3 ($ htmlData , $ cleanWord , $ wordType );
243+ if (count ($ _parserV3 )){
244+ $ dataResponse = $ _parserV3 ;
245+
246+ return $ dataResponse ;
247+ }
248+
249+ return false ;
250+
251+ /*$dataResponse = [];
252+
164253 $_parserV1 = $this->_parserV1($htmlData, $cleanWord);
165254 if(count($_parserV1)){
166255 $dataResponse = $_parserV1;
@@ -171,6 +260,6 @@ public function searchWord($word)
171260 }
172261 }
173262
174- return count ($ dataResponse ) ? $ dataResponse : false ;
263+ return count($dataResponse) ? $dataResponse : false;*/
175264 }
176265}
0 commit comments