package
0.0.4
Repository: https://github.com/cdvelop/vanify.git
Documentation: pkg.go.dev

# README

HTML GoDoc GoCover

This package is an HTML5 lexer written in Go. It follows the specification at The HTML syntax. The lexer takes an io.Reader and converts it into tokens until the EOF.

Installation

Run the following command

go get github.com/cdvelop/vanify/pkg/minify/parse/html

or add the following import and run project with go get

import "github.com/cdvelop/vanify/pkg/minify/parse/html"

Lexer

Usage

The following initializes a new Lexer with io.Reader r:

l := html.NewLexer(r)

To tokenize until EOF an error, use:

for {
	tt, data := l.Next()
	switch tt {
	case html.ErrorToken:
		// error or EOF set in l.Err()
		return
	case html.StartTagToken:
		// ...
		for {
			ttAttr, dataAttr := l.Next()
			if ttAttr != html.AttributeToken {
				break
			}
			// ...
		}
	// ...
	}
}

All tokens:

ErrorToken TokenType = iota // extra token when errors occur
CommentToken
DoctypeToken
StartTagToken
StartTagCloseToken
StartTagVoidToken
EndTagToken
AttributeToken
TextToken

Examples

package main

import (
	"os"

	"github.com/cdvelop/vanify/pkg/minify/parse/html"
)

// Tokenize HTML from stdin.
func main() {
	l := html.NewLexer(os.Stdin)
	for {
		tt, data := l.Next()
		switch tt {
		case html.ErrorToken:
			if l.Err() != io.EOF {
				fmt.Println("Error on line", l.Line(), ":", l.Err())
			}
			return
		case html.StartTagToken:
			fmt.Println("Tag", string(data))
			for {
				ttAttr, dataAttr := l.Next()
				if ttAttr != html.AttributeToken {
					break
				}

				key := dataAttr
				val := l.AttrVal()
				fmt.Println("Attribute", string(key), "=", string(val))
			}
		// ...
		}
	}
}

License

Released under the MIT license.

# Functions

EscapeAttrVal returns the escaped attribute value bytes without quotes.
NewLexer returns a new Lexer for a given io.Reader.
ToHash returns the hash whose name is s.

# Constants

a.
abbr.
accept.
accept-charset.
accesskey.
acronym.
action.
address.
align.
alink.
allowfullscreen.
alt.
annotation.
annotationXml.
applet.
area.
Article
article.
aside.
async.
TokenType values.
audio.
autocomplete.
autofocus.
autoplay.
axis.
b.
background.
base.
basefont.
bdi.
bdo.
bgcolor.
bgsound.
big.
blink.
blockquote.
body.
border.
br.
button.
canvas.
caption.
center.
challenge.
charset.
checked.
cite.
class.
classid.
clear.
code.
codebase.
codetype.
col.
colgroup.
color.
cols.
colspan.
command.
TokenType values.
compact.
content.
contenteditable.
contextmenu.
controls.
coords.
crossorigin.
data.
datalist.
datetime.
dd.
declare.
default.
defaultChecked.
defaultMuted.
defaultSelected.
defer.
del.
desc.
details.
dfn.
dialog.
dir.
dirname.
disabled.
div.
dl.
TokenType values.
download.
draggable.
dropzone.
dt.
em.
embed.
enabled.
enctype.
TokenType values.
extra token when errors occur.
face.
fieldset.
figcaption.
figure.
font.
footer.
for.
foreignobject.
foreignObject.
form.
formaction.
formenctype.
formmethod.
formnovalidate.
formtarget.
frame.
frameborder.
frameset.
h1.
h2.
h3.
h4.
h5.
h6.
head.
header.
headers.
height.
hgroup.
hidden.
high.
hr.
href.
hreflang.
html.
http-equiv.
i.
icon.
id.
iframe.
image.
img.
inert.
input.
ins.
isindex.
ismap.
itemid.
itemprop.
itemref.
itemscope.
itemtype.
kbd.
keygen.
keytype.
kind.
label.
lang.
language.
legend.
li.
link.
list.
listing.
longdesc.
loop.
low.
main.
malignmark.
manifest.
map.
mark.
marquee.
math.
TokenType values.
max.
maxlength.
media.
mediagroup.
menu.
meta.
meter.
method.
mglyph.
mi.
min.
mn.
mo.
ms.
mtext.
multiple.
muted.
name.
nav.
nobr.
noembed.
noframes.
nohref.
noresize.
noscript.
noshade.
novalidate.
nowrap.
object.
ol.
onabort.
onafterprint.
onbeforeprint.
onbeforeunload.
onblur.
oncancel.
oncanplay.
oncanplaythrough.
onchange.
onclick.
onclose.
oncontextmenu.
oncuechange.
ondblclick.
ondrag.
ondragend.
ondragenter.
ondragleave.
ondragover.
ondragstart.
ondrop.
ondurationchange.
onemptied.
onended.
onerror.
onfocus.
onhashchange.
oninput.
oninvalid.
onkeydown.
onkeypress.
onkeyup.
onload.
onloadeddata.
onloadedmetadata.
onloadstart.
onmessage.
onmousedown.
onmousemove.
onmouseout.
onmouseover.
onmouseup.
onmousewheel.
onoffline.
ononline.
onpagehide.
onpageshow.
onpause.
onplay.
onplaying.
onpopstate.
onprogress.
onratechange.
onreset.
onresize.
onscroll.
onseeked.
onseeking.
onselect.
onshow.
onstalled.
onstorage.
onsubmit.
onsuspend.
ontimeupdate.
onunload.
onvolumechange.
onwaiting.
open.
optgroup.
optimum.
option.
output.
p.
param.
pattern.
pauseonexit.
picture.
ping.
placeholder.
plaintext.
poster.
pre.
preload.
profile.
progress.
prompt.
public.
q.
radiogroup.
rb.
readonly.
rel.
required.
rev.
reversed.
rows.
rowspan.
rp.
rt.
rtc.
ruby.
rules.
s.
samp.
sandbox.
scope.
scoped.
script.
scrolling.
seamless.
section.
select.
selected.
shape.
size.
sizes.
small.
sortable.
source.
spacer.
span.
spellcheck.
src.
srcdoc.
srclang.
srcset.
start.
TokenType values.
TokenType values.
TokenType values.
step.
strike.
strong.
style.
sub.
summary.
sup.
svg.
TokenType values.
system.
tabindex.
table.
target.
tbody.
td.
template.
text.
textarea.
TokenType values.
tfoot.
th.
thead.
time.
title.
tr.
track.
translate.
truespeed.
tt.
type.
typemustmatch.
u.
ul.
undeterminate.
usemap.
valign.
value.
valuetype.
var.
video.
visible.
vlink.
wbr.
width.
wrap.
xmlns.
xmp.

# Structs

Lexer is the state for the lexer.

# Type aliases

Hash defines perfect hashes for a predefined list of strings.
TokenType determines the type of token, eg.