Categorygithub.com/gpestana/htmlizer
modulepackage
0.0.0-20180218084736-003fcb5d756c
Repository: https://github.com/gpestana/htmlizer.git
Documentation: pkg.go.dev

# README

htmlizer

Build Status

Parses only human readable content from HTML DOM.

Example

import (
  "fmt"
  "github.com/gpestana/htmlizer"
)

func main() {
  html := `
    <html>
     <body>
       <h1>Heading H1</h1>
       <p>This is the first text</p>
       <h2>heading h2</h2>
       <p>This is the second text</p>
     </body>
     <script>console.log("scripts are discarded")</script>
   </html>`

  // will trim out all the tabs from text
  ignore := []rune{'\t'}
  hizer := htmlizer.New(ignore)
  hizer.Load(html)

  fmt.Println(">> Struct:")
  fmt.Println(hizer)

  fmt.Println(">> Human readable content:")
  fmt.Println(hizer.HumanReadable())
}

Output:

>> Struct:
{[Heading H1 heading h2], [this is the first text this is the seconf text]}
>> Human readable content:
Heading H1
This is the first text
heading h2
This is the second text

Contribute

Fork and PR and use issues for bug reports, feature requests and general comments.

gpestana © MIT

# Functions

No description provided by the author

# Structs

No description provided by the author
No description provided by the author