用python爬获取这样一条数据:
<td class="ip" id="ip"><p style="display: none;"></p><span></span><p style="display: none;">4</p><span></span><p style="display: none;">2</p><span>2</span><div style="display:inline-block;">2</div><span style="display:inline-block;">2</span><div style="display:inline-block;">.9</div><p style="display: none;">4</p><span>4</span><span style="display:inline-block;">.1</span><span style="display:inline-block;"></span><div style="display:inline-block;">49</div><span style="display:inline-block;">.</span><div style="display:inline-block;">13</div><span style="display:inline-block;"></span><span style="display:inline-block;"></span><span style="display:inline-block;"></span><p style="display: none;">4</p><span>4</span>:<span class="port GEGE">8976</span></td>
格式话是这样子的:
<td class="ip" id="ip"><p style="display: none;"></p><span></span>
<p style="display: none;">4</p><span></span>
<p style="display: none;">2</p><span>2</span>
<div style="display:inline-block;">2</div>
<span style="display:inline-block;">2</span>
<div style="display:inline-block;">.9</div>
<p style="display: none;">4</p><span>4</span><span style="display:inline-block;">.1</span><span
style="display:inline-block;"></span>
<div style="display:inline-block;">49</div>
<span style="display:inline-block;">.</span>
<div style="display:inline-block;">13</div>
<span style="display:inline-block;"></span><span style="display:inline-block;"></span><span
style="display:inline-block;"></span>
<p style="display: none;">4</p><span>4</span>:<span class="port GEGE">8976</span></td>
通过正则获取的数据是样子:
222.92.149.134.8976
代码如下:
/*
* \<span\>\d{1,5}\<\/span\>
* \<span\sstyle\=\"display\:inline\-block\;\"\>(\.|\.\d{1,}|\d{1,})\<\/span\>
* \<div\sstyle\=\"display\:inline\-block\;\"\>(\.|\.\d{1,}|\d{1,})\<\/div\>
* \:\<span\sclass\=\"port\sGEGE\"\>\d{0,5}\<\/span\>
* */
var str = '<td class="ip" id="ip"><p style="display: none;"></p><span></span><p style="display: none;">4</p><span></span><p style="display: none;">2</p><span>2</span><div style="display:inline-block;">2</div><span style="display:inline-block;">2</span><div style="display:inline-block;">.9</div><p style="display: none;">4</p><span>4</span><span style="display:inline-block;">.1</span><span style="display:inline-block;"></span><div style="display:inline-block;">49</div><span style="display:inline-block;">.</span><div style="display:inline-block;">13</div><span style="display:inline-block;"></span><span style="display:inline-block;"></span><span style="display:inline-block;"></span><p style="display: none;">4</p><span>4</span>:<span class="port GEGE">8976</span></td>'
pattern = /(\<span\>\d{1,4}\<\/span\>)|(\<span\sstyle\=\"display\:inline\-block\;\"\>(\.|\.\d{1,}|\d{1,})\<\/span\>)|(\<div\sstyle\=\"display\:inline\-block\;\"\>(\.|\.\d{1,}|\d{1,})\<\/div\>)|(\:\<span\sclass\=\"port\sGEGE\"\>\d{0,5}\<\/span\>)/ig;
var result = str.match(pattern);
console.log(result);
var numArr = [];
for(var i = 0; i < result.length; i++){
var flag = result[i].match(/(^\:)|(\.{0,1}\d{1,5})|\./ig);
if(flag){
var sigle = flag;
numArr = numArr.concat(sigle);
}
}
console.log(numArr.join(''));