我们需要先下载两个库,/net/html
用于解析网页对应的编码格式,/text
用于将网页对应的编码格式转换为utf8
go get golang.org/x/text
go get golang.org/x/net/html
代码如下:
func main() {
resp, err := http.Get(url)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
fmt.Errorf("wrong status code: %d",resp.StatusCode)
return
}
bodyReader := bufio.NewReader(resp.Body)
e := determineEncoding(bodyReader)
utf8Reader := transform.NewReader(bodyReader,e.NewDecoder())
all, err := ioutil.ReadAll(utf8Reader)
if err != nil {
panic(err)
}
fmt.Print(all)
}
// 解析编码格式
func determineEncoding(r *bufio.Reader) encoding.Encoding {
bytes, err := r.Peek(1024)
if err != nil {
log.Printf("Fetcher error: %v", err)
return unicode.UTF8
}
e, _, _ := charset.DetermineEncoding(
bytes, "")
return e
}