# README
scraper
基本的な使い方
// 初期化
var logger scraper.ConsoleLogger
session := scraper.NewSession("session-name", logger) // session-name はログフォルダ名になる
// cookieを読む ( session-name/cookie というファイルを使う)
err := session.LoadCookie()
if err != nil {
log.Fatal(err)
}
// ページを開く
page, err := session.GetPage("https://example.com")
if err != nil {
log.Fatal(err)
}
// form 送信
form, err := page.Form("form") // CSS selector でformを特定する
if err != nil {
log.Fatal(err)
}
_ = form.Set("id", id)
_ = form.Set("password", password)
resp, err := session.Submit(form) // レスポンスを得る
if err != nil {
log.Fatal(err)
}
page, err = resp.Page() // レスポンスからページにする
if err != nil {
log.Fatal(err)
}
// cookie を保存
err := session.SaveCookie()
if err != nil {
log.Fatal(err)
}
// Pageから読み取る
type Link struct {
Href string `attr:"href"`
Text string
}
var links []Link
err := scraper.Unmarshal(&links, page.Find("div.items a"), scraper.UnmarshalOption{})
if err != nil {
log.Fatal(err)
}
// -> links に <div class="items"> 以下にある <a> タグの href とテキスト要素を収集する
メモ
https://github.com/juju/persistent-cookiejar は max-age がないクッキーを永続化してくれないので https://github.com/orirawlings/persistent-cookiejar を使ったらいけた。神
# Functions
No description provided by the author
No description provided by the author
No description provided by the author
Unmarshal parses selection and stores to v.
# Constants
No description provided by the author
UserAgent_Chrome39 = "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.99 Safari/537.36"UserAgent_iOS8 = "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Mobile/12B466".
# Structs
AvailableValue holds an available value and corresponding label to display.
No description provided by the author
No description provided by the author
No description provided by the author
No description provided by the author
No description provided by the author
No description provided by the author
Form holds form data and submit information.
FormElement holds a form element.
No description provided by the author
No description provided by the author
No description provided by the author
No description provided by the author
Page holds DOM structure of the page and its URL, Logging information.
No description provided by the author
No description provided by the author
Response holds a raw response and its request information.
No description provided by the author
No description provided by the author
Session holds communication and logging options.
No description provided by the author
No description provided by the author
No description provided by the author
No description provided by the author
No description provided by the author
No description provided by the author