samsmithnz
Well-known member
Is there a way of loading an HTML page and extracting the text (with no formatting/tags/etc) in .NET???
Private Sub LoadForm()
Dim objFile As System.IO.File
Dim objSR As System.IO.StreamReader
Dim strText As String
objSR = objFile.OpenText("C:\Projects\Books.html")
strText = objSR.ReadToEnd
strText = RemoveHtmlTags(strText)
TextBox1.Text = strText
End Sub
Private Function RemoveHtmlTags(ByVal htmlText As String) As String
RemoveHtmlTags = System.Text.RegularExpressions.Regex.Replace(htmlText, "(<[^>]*> )", "", System.Text.RegularExpressions.RegexOptions.Multiline Or System.Text.RegularExpressions.RegexOptions.Compiled)
End Function