Node request、cheerio

request

用来发送请求的库

  • 发送request
request('http://www.baidu.com', function (error, response, body) {
    console.log(body); //body.com/index.html 的内容
});
//图片下载
const file = "https://upload.jianshu.io/users/upload_avatars/7232100/3ac5a4a7-eb45-4bff-8413-3c0b4684ad6d?imageMogr2/auto-orient/strip|imageView2/1/w/120/h/120";
request(file).pipe(fs.createWriteStream('li.png')); //下载图片
  • 设置 header
request({
    "url": 'https://www.baidu.com',
    "headers": {
        'User-Agent': 'request'
    }
},(err,res,body)=>{
    if(err){
        throw err
    }
    console.log(res);
});
  • cookie
rp({
    "url": 'https://www.baidu.com',
    "headers": {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
        'Cookie':'abc=123'
    },
})
.then(res=>{
    console.log('res');
    const result = /q857637472/i.exec(res);
    if(result&&result[0]){
        console.log(result[0]);        
    }
})
.catch(err=>{
    console.log(err);
})
  • promise 异步化
const res= rp('http://www.baidu.com')
    .then(function (htmlString) {
        console.log('2222');
        console.log(htmlString);
        // Process html...
    })
    .catch(function (err) {
        console.log('err');
        // Crawling failed...
    });
  • 乱码设置
const iconv = require("iconv-lite");
request.get({
    url : url ,
    encoding : null //让body 直接是buffer
}, response);

var response = function (err, response, body) {
    //返回的body 直接就是buffer 了...
    var buf =  iconv.decode(body, 'gb2312');
    
}

cheeio

用来解析HTML的库

  • 加载html
const html = `<ul>
      <li>1</li>
      <li>2</li>
      <li>3</li>
    </ul>`;

const $ = cheerio.load(html);

const li2 = $('li:nth-child(2)');
console.log(li2); //一个dom 节点 和react Vdom 形式一致
  • 解析
const text = li2.text(); //获取 dom 内容
console.log(text);
const id = li2.attr('data-id');
console.log(id);
  • 遍历
const lis = $('ul>li');
if(lis&&lis.length){
    lis.each((i,el)=>{
        console.log(`index:${i}`);
        console.log(`el:${el}`);
    });
}
上一篇:mac 使用记录


下一篇:javascript-如何使用cheerio捕获具有不同类的元素?