1+ module FSharp.Data.Tests.BaseTypesHtmlDocument
2+
3+ open System.IO
4+ open NUnit.Framework
5+ open FsUnit
6+ open FSharp.Data
7+ open FSharp.Data .Runtime .BaseTypes
8+
9+ #nowarn " 10001" // Suppress "intended for use in generated code only" warnings
10+
11+ [<Test>]
12+ let ``HtmlDocument.Create successfully parses HTML with tables`` () =
13+ let htmlContent = """
14+ <html>
15+ <body>
16+ <table id="test-table">
17+ <tr><th>Name</th><th>Age</th></tr>
18+ <tr><td>John</td><td>30</td></tr>
19+ </table>
20+ </body>
21+ </html>"""
22+
23+ use reader = new StringReader( htmlContent)
24+ let htmlDoc = HtmlDocument.Create( false , reader)
25+
26+ htmlDoc.Html.ToString() |> should contain " test-table"
27+
28+ [<Test>]
29+ let ``HtmlDocument.Create with includeLayoutTables true`` () =
30+ let htmlContent = """
31+ <html>
32+ <body>
33+ <table id="layout-table">
34+ <tr><td>Layout cell</td></tr>
35+ </table>
36+ </body>
37+ </html>"""
38+
39+ use reader = new StringReader( htmlContent)
40+ let htmlDoc = HtmlDocument.Create( true , reader)
41+
42+ htmlDoc.Html.ToString() |> should contain " layout-table"
43+
44+ [<Test>]
45+ let ``HtmlDocument.Create with includeLayoutTables false`` () =
46+ let htmlContent = """
47+ <html>
48+ <body>
49+ <table id="data-table">
50+ <tr><td>Data cell</td></tr>
51+ </table>
52+ </body>
53+ </html>"""
54+
55+ use reader = new StringReader( htmlContent)
56+ let htmlDoc = HtmlDocument.Create( false , reader)
57+
58+ htmlDoc.Html.ToString() |> should contain " data-table"
59+
60+ [<Test>]
61+ let ``HtmlDocument.Html property returns the parsed document`` () =
62+ let htmlContent = """ <html><body><h1>Test Title</h1></body></html>"""
63+
64+ use reader = new StringReader( htmlContent)
65+ let htmlDoc = HtmlDocument.Create( false , reader)
66+ let doc = htmlDoc.Html
67+
68+ doc.ToString() |> should contain " Test Title"
69+
70+ [<Test>]
71+ let ``HtmlDocument.GetTable retrieves table by id`` () =
72+ let htmlContent = """
73+ <html>
74+ <body>
75+ <table id="data-table">
76+ <tr><th>Column1</th><th>Column2</th></tr>
77+ <tr><td>Value1</td><td>Value2</td></tr>
78+ </table>
79+ </body>
80+ </html>"""
81+
82+ use reader = new StringReader( htmlContent)
83+ let htmlDoc = HtmlDocument.Create( false , reader)
84+ let table = htmlDoc.GetTable( " data-table" )
85+
86+ table.Name |> should equal " data-table"
87+
88+ [<Test>]
89+ let ``HtmlDocument.GetTable throws when table not found`` () =
90+ let htmlContent = """ <html><body><p>No tables here</p></body></html>"""
91+
92+ use reader = new StringReader( htmlContent)
93+ let htmlDoc = HtmlDocument.Create( false , reader)
94+
95+ ( fun () -> htmlDoc.GetTable( " nonexistent" ) |> ignore) |> should throw typeof< System.Collections.Generic.KeyNotFoundException>
96+
97+ [<Test>]
98+ let ``HtmlDocument.GetList retrieves list by id`` () =
99+ let htmlContent = """
100+ <html>
101+ <body>
102+ <ul id="item-list">
103+ <li>Item 1</li>
104+ <li>Item 2</li>
105+ </ul>
106+ </body>
107+ </html>"""
108+
109+ use reader = new StringReader( htmlContent)
110+ let htmlDoc = HtmlDocument.Create( false , reader)
111+ let list = htmlDoc.GetList( " item-list" )
112+
113+ list.Name |> should equal " item-list"
114+
115+ [<Test>]
116+ let ``HtmlDocument.GetList works with ordered lists`` () =
117+ let htmlContent = """
118+ <html>
119+ <body>
120+ <ol id="numbered-list">
121+ <li>First</li>
122+ <li>Second</li>
123+ </ol>
124+ </body>
125+ </html>"""
126+
127+ use reader = new StringReader( htmlContent)
128+ let htmlDoc = HtmlDocument.Create( false , reader)
129+ let list = htmlDoc.GetList( " numbered-list" )
130+
131+ list.Name |> should equal " numbered-list"
132+
133+ [<Test>]
134+ let ``HtmlDocument.GetList throws when list not found`` () =
135+ let htmlContent = """ <html><body><p>No lists here</p></body></html>"""
136+
137+ use reader = new StringReader( htmlContent)
138+ let htmlDoc = HtmlDocument.Create( false , reader)
139+
140+ ( fun () -> htmlDoc.GetList( " nonexistent" ) |> ignore) |> should throw typeof< System.Collections.Generic.KeyNotFoundException>
141+
142+ [<Test>]
143+ let ``HtmlDocument.GetDefinitionList retrieves definition list by id`` () =
144+ let htmlContent = """
145+ <html>
146+ <body>
147+ <dl id="def-list">
148+ <dt>Term1</dt>
149+ <dd>Definition1</dd>
150+ <dt>Term2</dt>
151+ <dd>Definition2</dd>
152+ </dl>
153+ </body>
154+ </html>"""
155+
156+ use reader = new StringReader( htmlContent)
157+ let htmlDoc = HtmlDocument.Create( false , reader)
158+ let defList = htmlDoc.GetDefinitionList( " def-list" )
159+
160+ defList.Name |> should equal " def-list"
161+
162+ [<Test>]
163+ let ``HtmlDocument.GetDefinitionList throws when definition list not found`` () =
164+ let htmlContent = """ <html><body><p>No definition lists here</p></body></html>"""
165+
166+ use reader = new StringReader( htmlContent)
167+ let htmlDoc = HtmlDocument.Create( false , reader)
168+
169+ ( fun () -> htmlDoc.GetDefinitionList( " nonexistent" ) |> ignore) |> should throw typeof< System.Collections.Generic.KeyNotFoundException>
170+
171+ [<Test>]
172+ let ``HtmlDocument.Create handles empty HTML`` () =
173+ let htmlContent = " <html><body></body></html>"
174+
175+ use reader = new StringReader( htmlContent)
176+ let htmlDoc = HtmlDocument.Create( false , reader)
177+
178+ htmlDoc.Html.ToString() |> should not' ( equal " " )
179+
180+ [<Test>]
181+ let ``HtmlDocument.Create handles malformed HTML gracefully`` () =
182+ let htmlContent = " <html><body><h1>Test Content</h1><p>Valid paragraph</p><div>Unclosed div"
183+
184+ use reader = new StringReader( htmlContent)
185+ let htmlDoc = HtmlDocument.Create( false , reader)
186+
187+ // Parser handles malformed HTML by auto-closing tags and preserving content
188+ let htmlString = htmlDoc.Html.ToString()
189+ htmlString |> should contain " Test Content"
190+ htmlString |> should contain " Valid paragraph"
191+ // The parser should preserve at least some structure even with malformed HTML
192+
193+ [<Test>]
194+ let ``HtmlDocument.Create processes multiple tables correctly`` () =
195+ let htmlContent = """
196+ <html>
197+ <body>
198+ <table id="table1">
199+ <tr><td>Table 1 Content</td></tr>
200+ </table>
201+ <table id="table2">
202+ <tr><td>Table 2 Content</td></tr>
203+ </table>
204+ </body>
205+ </html>"""
206+
207+ use reader = new StringReader( htmlContent)
208+ let htmlDoc = HtmlDocument.Create( false , reader)
209+
210+ // Use the actual generated names since HTML parsing creates unique names
211+ htmlDoc.Html.ToString() |> should contain " Table 1 Content"
212+ htmlDoc.Html.ToString() |> should contain " Table 2 Content"
213+
214+ [<Test>]
215+ let ``HtmlDocument.Create processes multiple lists correctly`` () =
216+ let htmlContent = """
217+ <html>
218+ <body>
219+ <ul id="list1">
220+ <li>List 1 Item</li>
221+ </ul>
222+ <ol id="list2">
223+ <li>List 2 Item</li>
224+ </ol>
225+ </body>
226+ </html>"""
227+
228+ use reader = new StringReader( htmlContent)
229+ let htmlDoc = HtmlDocument.Create( false , reader)
230+
231+ // Verify the content is parsed correctly
232+ htmlDoc.Html.ToString() |> should contain " List 1 Item"
233+ htmlDoc.Html.ToString() |> should contain " List 2 Item"
234+
235+ [<Test>]
236+ let ``HtmlDocument.Create processes multiple definition lists correctly`` () =
237+ let htmlContent = """
238+ <html>
239+ <body>
240+ <dl id="def1">
241+ <dt>Term A</dt>
242+ <dd>Definition A</dd>
243+ </dl>
244+ <dl id="def2">
245+ <dt>Term B</dt>
246+ <dd>Definition B</dd>
247+ </dl>
248+ </body>
249+ </html>"""
250+
251+ use reader = new StringReader( htmlContent)
252+ let htmlDoc = HtmlDocument.Create( false , reader)
253+
254+ // Verify the definition lists are parsed correctly
255+ htmlDoc.Html.ToString() |> should contain " Term A"
256+ htmlDoc.Html.ToString() |> should contain " Definition A"
257+ htmlDoc.Html.ToString() |> should contain " Term B"
258+ htmlDoc.Html.ToString() |> should contain " Definition B"
0 commit comments