# README

htmlquery

Overview

htmlquery is an XPath query package for HTML, lets you extract data or evaluate from HTML documents by an XPath expression.

Installation

$ go get github.com/Aiicy/htmlquery

Getting Started

Load HTML document from URL.

ctx := context.Background()
ctx, cancel := context.WithTimeout(ctx, time.Second)
defer cancel()
doc, err := htmlquery.LoadURL(ctx,"http://example.com/")

Load HTML document from URL with Header set

ctx := context.Background()
ctx, cancel := context.WithTimeout(ctx, time.Second)
defer cancel()
header := map[string]string {
	"User-Agent": "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
}
doc,err := htmlquery.LoadURLWithHeader(ctx,"http://example.com/",header)

Load HTML document from URL with Proxy

ctx := context.Background()
ctx, cancel := context.WithTimeout(ctx, time.Second)
defer cancel()
doc,err := htmlquery.LoadURLWithProxy(ctx,"http://example.com/","http://proxyip:proxyport")

Load HTML document from string.

s := `<html>....</html>`
doc, err := htmlquery.Parse(strings.NewReader(s))

Find all A elements.

list := htmlquery.Find(doc, "//a")

Find all A elements with href attribute.

list := range htmlquery.Find(doc, "//a/@href")

Find the third A element.

a := htmlquery.FindOne(doc, "//a[3]")

Evaluate the number of all IMG element.

expr, _ := xpath.Compile("count(//img)")
v := expr.Evaluate(htmlquery.CreateXPathNavigator(doc)).(float64)
fmt.Printf("total count is %f", v)

Quick Tutorial

package main
import (
	"fmt"
	"context"

	"github.com/Aiicy/htmlquery"
)

func main() {
	ctx := context.Background()
    ctx, cancel := context.WithTimeout(ctx, time.Second)
    defer cancel()
	doc, err := htmlquery.LoadURL(ctx,"https://www.bing.com/search?q=golang")
	if err != nil {
		panic(err)
	}
	// Find all news item.
	for i, n := range htmlquery.Find(doc, "//ol/li") {
		a := htmlquery.FindOne(n, "//a")
		fmt.Printf("%d %s(%s)\n", i, htmlquery.InnerText(a), htmlquery.SelectAttr(a, "href"))
	}
}