爬虫之遇到521,破解cookie之js监测到无头浏览器

使用python、scrapy

import execjs# 安装pip install PyExecJS  用这种方式只是为了调试,实际使用中还是要用js引擎v8

看了不少博客是要用到模拟浏览器phantomjs、chrome什么的,其实有很多网站的js会发现这种操作。

下面就以一个会监测无头浏览器的一个网站为例(具体网站不公开!):

<script>var x="attachEvent@String@@@if@Sun@e@PqwgnPCMHw@781@@@fromCharCode@chars@match@@@@substr@new@@GMT@@onreadystatechange@@@@innerHTML@@@@challenge@@location@3@@DOMContentLoaded@D@Expires@0xFF@Mar@for@__cdn_clearance@@addEventListener@@href@div@@13@else@@join@@@@2@document@@cookie@https@@pathname@@@d@while@@@31@@firstChild@charCodeAt@parseInt@try@@a@Path@@catch@@1@@window@function@g@@15@false@reverse@MLPxP5@@RegExp@replace@return@headless@search@f@1554042973@charAt@@split@@@@length@0xEDB88320@36@BGZKFSTk@1500@createElement@toString@@@8@rOm9XFMtA3QKV7nYsPGT4lifyWwkq5vcjH2IdxUoCbhERLaz81DNB6@setTimeout@eval@@var@JgSe0upZ@@19@captcha@Array@@toLowerCase@0@".replace(/@*$/,"").split("@"),y="54 36=3f(){51('1a.20=1a.2g+1a.44.41(/[\\?|&]58-18/,\\'\\')',4h);2b.2d='1j=46.9|5c|'+(3f(){54 36=59(+[[1b+(+!+[])+((+!+[])+[~~{}])/[(+!+[])+(+!+[])]]]),2m=['1e',(-~{}/~~[]+[]).47(~~[])+[3e.43+[]+[[]][5c]][5c].47(~~''),'4g%',[-~[[2a]*(-~~~[]+(+!+[])+(+!+[]))]],'8%',[2a],'3l',[(-~[]|2a)]];1i(54 1c=5c;1c<2m.4d;1c++){36.3k()[1c]=2m[1c]};42 36.26('')})()+';1f=6, 30-1h-57 3i:4f:23 l;38=/;'};5((3f(){35{42 !!3e.1l;}3a(7){42 3j;}})()){2b.1l('1d',36,3j)}24{2b.1('10',36)}",f=function(x,y){var a=0,b=0,c=0;x=x.split("");y=y||99;while((a=x.shift())&&(b=a.charCodeAt(0)-77.5))c=(Math.abs(b)<13?(b+48.5):parseInt(a,36))+y*c;return c},z=f(y.match(/\w/g).sort(function(x,y){return f(x)-f(y)}).pop());while(z++)try{eval(y.replace(/\b\w+\b/g, function(y){return x[f(y,z)-1]||("_"+y)}));break}catch(_){}</script

美化:

<script>
    var x = "attachEvent@String@@@if@Sun@e@PqwgnPCMHw@781@@@fromCharCode@chars@match@@@@substr@new@@GMT@@onreadystatechange@@@@innerHTML@@@@challenge@@location@3@@DOMContentLoaded@D@Expires@0xFF@Mar@for@__cdn_clearance@@addEventListener@@href@div@@13@else@@join@@@@2@document@@cookie@https@@pathname@@@d@while@@@31@@firstChild@charCodeAt@parseInt@try@@a@Path@@catch@@1@@window@function@g@@15@false@reverse@MLPxP5@@RegExp@replace@return@headless@search@f@1554042973@charAt@@split@@@@length@0xEDB88320@36@BGZKFSTk@1500@createElement@toString@@@8@rOm9XFMtA3QKV7nYsPGT4lifyWwkq5vcjH2IdxUoCbhERLaz81DNB6@setTimeout@eval@@var@JgSe0upZ@@19@captcha@Array@@toLowerCase@0@".replace(/@*$/, "").split("@"),
        y = "54 36=3f(){51('1a.20=1a.2g+1a.44.41(/[\\?|&]58-18/,\\'\\')',4h);2b.2d='1j=46.9|5c|'+(3f(){54 36=59(+[[1b+(+!+[])+((+!+[])+[~~{}])/[(+!+[])+(+!+[])]]]),2m=['1e',(-~{}/~~[]+[]).47(~~[])+[3e.43+[]+[[]][5c]][5c].47(~~''),'4g%',[-~[[2a]*(-~~~[]+(+!+[])+(+!+[]))]],'8%',[2a],'3l',[(-~[]|2a)]];1i(54 1c=5c;1c<2m.4d;1c++){36.3k()[1c]=2m[1c]};42 36.26('')})()+';1f=6, 30-1h-57 3i:4f:23 l;38=/;'};5((3f(){35{42 !!3e.1l;}3a(7){42 3j;}})()){2b.1l('1d',36,3j)}24{2b.1('10',36)}",
        f = function (x, y) {
            var a = 0,
                b = 0,
                c = 0;
            x = x.split("");
            y = y || 99;
            while ((a = x.shift()) && (b = a.charCodeAt(0) - 77.5)) c = (Math.abs(b) < 13 ? (b + 48.5) : parseInt(a, 36)) + y * c;
            return c
        },
        z = f(y.match(/\w/g).sort(function (x, y) {
            return f(x) - f(y)
        }).pop());
    while (z++) try {
        eval(y.replace(/\b\w+\b/g, function (y) {
            return x[f(y, z) - 1] || ("_" + y)
        }));
        break
    } catch (_) {}
</script>
发现问题出在这里爬虫之遇到521,破解cookie之js监测到无头浏览器,可以把eval替换成alert、console.log看看结果,不多说上代码

# cookie第一个值
__jsluid = response.headers["Set-Cookie"].split(';')[0]
cookie1 = __jsluid
# 解密
get_js = re.findall(r'<script>(.*?)</script>', resp_body)[0].replace('eval', 'return')
resHtml = "function getClearance(){" + get_js + "};"
ctx = execjs.compile(resHtml)
# 一级解密结果
temp1 = ctx.call('getClearance')

结果返回的又是一段js:

var _36=function(){setTimeout('location.href=location.pathname+location.search.replace(/[\?|&]captcha-challenge/,\'\')',1500);document.cookie='__cdn_clearance=1554042973.781|0|'+(function(){var _36=Array(+[[3+(+!+[])+((+!+[])+[~~{}])/[(+!+[])+(+!+[])]]]),_2m=['D',(-~{}/~~[]+[]).charAt(~~[])+[window.headless+[]+[[]][0]][0].charAt(~~''),'BGZKFSTk%',[-~[[2]*(-~~~[]+(+!+[])+(+!+[]))]],'PqwgnPCMHw%',[2],'MLPxP5',[(-~[]|2)]];for(var _1c=0;_1c<_2m.length;_1c++){_36.reverse()[_1c]=_2m[_1c]};return _36.join('')})()+';Expires=Sun, 31-Mar-19 15:36:13 GMT;Path=/;'};if((function(){try{return !!window.addEventListener;}catch(e){return false;}})()){document.addEventListener('DOMContentLoaded',_36,false)}else{document.attachEvent('onreadystatechange',_36)}

美化:

var _36 = function () {
        setTimeout('location.href=location.pathname+location.search.replace(/[\?|&]captcha-challenge/,\'\')', 1500);
        document.cookie = '__cdn_clearance=1554042973.781|0|' + (function () {
                    var _36 = Array(+[
                            [3 + (+!+[]) + ((+!+[]) + [~~{}]) / [(+!+[]) + (+!+[])]]
                        ]),
                        _2m = ['D', (-~{}
                                /~~[]+[]).charAt(~~[])+[window.headless+[]+[[]][0]][0].charAt(~~''),'BGZKFSTk%',[-~[[2]*(-~~~[]+(+!+[])+(+!+[]))]],'PqwgnPCMHw%',[2],'MLPxP5',[(-~[]|2)]];for(var _1c=0;_1c<_2m.length;_1c++){_36.reverse()[_1c]=_2m[_1c]};return _36.join('')})()+';Expires=Sun, 31-Mar-19 15:36:13 GMT;Path=/;
                                '};if((function(){try{return !!window.addEventListener;}catch(e){return false;}})()){document.addEventListener('
                                DOMContentLoaded ',_36,false)}else{document.attachEvent('
                                onreadystatechange ',_36)}

可以看到window浏览器对象,window.headless这个就是监测无头的,所以说现在无头浏览器并不能解决了!

截取有用:

document.cookie = '__cdn_clearance=1554042973.781|0|' + (function () {
            var _36 = Array(+[
                    [3 + (+!+[]) + ((+!+[]) + [~~{}]) / [(+!+[]) + (+!+[])]]
                ]),
                _2m = ['D', (-~{}
                        /~~[]+[]).charAt(~~[])+[window.headless+[]+[[]][0]][0].charAt(~~''),'BGZKFSTk%',[-~[[2]*(-~~~[]+(+!+[])+(+!+[]))]],'PqwgnPCMHw%',[2],'MLPxP5',[(-~[]|2)]];for(var _1c=0;_1c<_2m.length;_1c++){_36.reverse()[_1c]=_2m[_1c]};return _36.join('')

 

具体的破解我就不公开了。。。。

 

注:工作中的遇到的一些问题,可能我的方法不一定是最好的,大家一起相互交流+扣扣571848990

上一篇:Codeforces 1C(外接圆与正多边形)


下一篇:485. 最大连续 1 的个数