pyquery的中文编码问题

# coding=UTF-8
import urllib.request
import pyquery
import requests
import time
import json
import pymysql
import sys
import math
from datetime import datetime
import time
import csv
from time import sleep
import random
from bs4 import BeautifulSoup
import asyncio
from pyppeteer import launch
import pyppeteer
from pyquery import PyQuery as pq
import chardet
import codecs

path='D:/code-py-download/02fang/xqxq_demo_utf8.html'
txt = open(path, 'rb').read()
encodings = chardet.detect(txt)['encoding']

with open(path, "r", encoding=encodings)as f:
    content = f.read()
    doc = pq(content)
    name=doc('.tit.clearfix h1>strong')
    title=name.text()
    print(title) #扬州天下花园

# path='D:/code-py-download/02fang/xqxq_demo_utf8.html'
# with open(path, "r")as f:
#     content = f.read()
#     doc = pq(content)
#     name=doc('.tit.clearfix h1>strong')
#     title=name.text()
#     print(title) #UnicodeDecodeError: 'gbk' codec can't decode byte 


# path='D:/code-py-download/02fang/小区详情demo.txt'   #ansi编码都不行
# with open(path, "r")as f:
#     content = f.read()
#     doc = pq(content)
#     name=doc('.tit.clearfix h1>strong')
#     title=name.text()
#     print(title) # 扬州天下花园

# path='D:/code-py-download/02fang/xqxq_demo_utf8.html'
# doc = pq(filename=path, encoding='utf-8') 直接读也不行
# name=doc('.tit.clearfix h1>strong')
# title=name.text()
# print(title) # UnicodeDecodeError: 'gbk' codec can't decode byte

# path='D:/code-py-download/02fang/小区详情demo.txt'
# doc = pq(filename=path, encoding='utf-8')
# name=doc('.tit.clearfix h1>strong')
# title=name.text()
# print(title)  # æ¬å·¤©ä¸è±å

# path='D:/code-py-download/02fang/小区详情demo.txt'
# doc = pq(filename=path, encoding='gbk')
# name=doc('.tit.clearfix h1>strong')
# title=name.text()
# print(title)  # æ¬å·¤©ä¸è±å

 

上一篇:【Rust】函数文档注释


下一篇:在使用itextpdf对富文本转pdf时遇到Invalid nested tag XX found, expected closing tag XX的错误