编程
#!/usr/bin/env python
-- coding: utf-8 --
import time
import urllib, time, os, base64, json
import re, sys
from lxml import etree
from urllib.request import urlopen
from urllib import request
def getPage(base_url):
try:
req = request.Request(base_url)
page = urllib.request.urlopen(req) # 5
content = page.read().decode("utf-8") # 2
re_tag = re.compile('\<[\S\s]+?\>', re.I)
re_cr = re.compile('\s{1,}', re.I)
re_script = re.compile('\<script[\S\s]+?\</script\>', re.I) # Script
re_style = re.compile('\<style[\S\s]+?\</style\>', re.I) # style
content = re_script.sub('', content) # ȥ SCRIPT 2
content = re_style.sub('', content) # ȥ style 2
selector = etree.HTML(content)
menu_items = selector.xpath("//*[@id=\"headContents\"]/div[1]/a") # 5
for item in menu_items:
writefile("/home/output/crawler_result.csv", item.text) # 2
except Exception as e: # 1
print("Failed to read from %s." % base_url)
print(sys.exc_info())
return False
def writefile(filename, content):
try:
fp = open(filename, ‘a’) # 5
fp.write(content + “\n”) # 5
fp.close() # 5
except:
return False
now = time.strftime(’%Y-%m-%d %X’, time.localtime(time.time()))
try:
# 5
url = ‘http://117.73.9.229:9090/’
getPage(url)
except Exception as e:
info = ‘%s\nError: %s’ % (now, e)
writefile(‘Error.log’, info)
print(info)
time.sleep(1)
filebeat:
filebeat.inputs:
- type: log # to do enabled: true # to do paths: - /app/httpd/logs/access_logfilebeat.config.modules: path: ${path.config}/modules.d/*.yml reload.enabled: false setup.template.settings: index.number_of_shards: 3setup.kibana:output.logstash: # to do The Logstash hosts hosts: [“localhost:5045”]processors: - add_host_metadata: ~ - add_cloud_metadata: ~
logs:
Sample Logstash configuration for creating a simple
Beats -> Logstash -> Elasticsearch pipeline.
input {
# to do
beats {
port => 5045
}
}
filter {
# to do
grok {
match => {“message” => “%{HTTPD_COMMONLOG}” }
}
}
output {
csv {
path => “/home/output/httpd-outfile.csv”
fields => [“clientip”, “response”]
csv_options => {“col_sep” => " "}
}
stdout{
codec => rubydebug
}
}