Linux之curl命令详解

2021-11-19 00:27:56

url命令是一个功能强大的网络工具，它能够通过http、ftp等方式下载文件，也能够上传文件。其实curl远不止前面所说的那些功能，大家可以通过man curl阅读手册页获取更多的信息。类似的工具还有wget。

curl命令使用了libcurl库来实现，libcurl库常用在C程序中用来处理HTTP请求，curlpp是libcurl的一个C++封装，这几个东西可以用在抓取网页、网络监控等方面的开发，而curl命令可以帮助来解决开发过程中遇到的问题。

常用参数

curl命令参数很多，这里只列出我曾经用过、特别是在shell脚本中用到过的那些。

-v/--verbose 小写的v参数，用于打印更多信息，包括发送的请求信息，这在调试脚本是特别有用。

-m/--max-time <seconds> 指定处理的最大时长

-H/--header <header> 指定请求头参数

-s/--slient 减少输出的信息，比如进度

--connect-timeout <seconds> 指定尝试连接的最大时长

-x/--proxy <proxyhost[:port]> 指定代理服务器地址和端口，端口默认为1080

-T/--upload-file <file> 指定上传文件路径

-o/--output <file> 指定输出文件名称

-d/--data/--data-ascii <data> 指定POST的内容

--retry <num> 指定重试次数

-e/--referer <URL> 指定引用地址

-I/--head 仅返回头部信息，使用HEAD请求

使用示例

示例一获取指定网页

[root@jfht ~]# curl http://www.sunrisecorp.net/ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html; charset=gbk" /> <meta name="title" content="欢迎您 - 上海腾一" /> <meta name="keyword" content="上海腾一，融合通信，电子商务，语音通信，数据通信，基站工程外包托管，物联网，网站建设，电子商务解决方案，移动互联网，短信，彩信，呼叫中心，多方会议，PBX，IVR，电子商务策划方案，设备代维，网络优化，通信工程，电信增值，3G" /> <meta name="description" content="上海腾一信息技术有限公司专注于电信增值、通信工程、电子商务等领域，拥有近十年的经验。" /> <title> 欢迎您 - 上海腾一 </title>

插播一下广告：上海腾一信息技术有限公司是一家致力于通信工程、电子商务和电信增值业务的公司，请访问官方网址： http://www.sunrisecorp.net/ 。

此处省略掉网页内容。

</body> </html>[root@jfht ~]#

示例二查看响应头信息

[root@jfht ~]# curl -I http://www.sunrisecorp.net/ HTTP/1.1 200 OK Server: Apache-Coyote/1.1 Accept-Ranges: bytes ETag: W/"17801-1285643951000" Last-Modified: Tue, 28 Sep 2010 03:19:11 GMT Content-Type: text/html Content-Length: 17801 Date: Tue, 12 Oct 2010 12:49:20 GMT
[root@jfht ~]#

示例三检查网页是否可正常访问

以下是一个tomcat监控脚本的部分内容。

# usage: check_once <port> <path>
# 使用curl检查网页是否可以正常访问，如果不能访问就重启tomcat。
check_once()
{
echo
echo "$(date) Tomcat check once"
# 2008.08.21 -I/--head -s/--silent
#if curl -s -I --connect-timeout 5 --max-time 10 http://localhost:$1/; then
# 2010.02.16 add 200 OK test
if curl -s -I --connect-timeout 5 --max-time 10 http://localhost:$1/$2 | grep -q '200 OK';
then
echo "$(date) Tomcat maybe OK"
else
echo "$(date) Tomcat maybe FAULT"
restart
fi
}
# usage: check_loop <port> <path>
# 每隔一分钟检查一次网页是否正常
check_loop()
{
while true;
do
sleep 60
check_once $1 $2 >> $CATALINA_HOME/logs/check.$(date +%Y-%m-%d).log
done
}
# usage: monitor <path>
# 对path指定的本机网页进行监控
# 2008.06.26
# 2010.09.20 add path parameter
monitor()
{
PORT=80
if grep 'Connector port="80"' $CATALINA_HOME/conf/server.xml; then
PORT=80
elif grep 'Connector port="8080"' $CATALINA_HOME/conf/server.xml; then
PORT=8080
else
echo "Cannot detect server port for Tomcat"
return 12
fi
echo "Tomcat server port is $PORT"
if status; then
check_loop $PORT "$1" &
#check_loop $PORT "$1"
fi
}

# usage: check_once <port> <path>
# 使用curl检查网页是否可以正常访问，如果不能访问就重启tomcat。
check_once()
{
echo
echo "$(date) Tomcat check once"
# 2008.08.21 -I/--head -s/--silent
#if curl -s -I --connect-timeout 5 --max-time 10 http://localhost:$1/; then
# 2010.02.16 add 200 OK test
if curl -s -I --connect-timeout 5 --max-time 10 http://localhost:$1/$2 | grep -q '200 OK';
then
echo "$(date) Tomcat maybe OK"
else
echo "$(date) Tomcat maybe FAULT"
restart
fi
}
# usage: check_loop <port> <path>
# 每隔一分钟检查一次网页是否正常
check_loop()
{
while true;
do
sleep 60
check_once $1 $2 >> $CATALINA_HOME/logs/check.$(date +%Y-%m-%d).log
done
}
# usage: monitor <path>
# 对path指定的本机网页进行监控
# 2008.06.26
# 2010.09.20 add path parameter
monitor()
{
PORT=80
if grep 'Connector port="80"' $CATALINA_HOME/conf/server.xml; then
PORT=80
elif grep 'Connector port="8080"' $CATALINA_HOME/conf/server.xml; then
PORT=8080
else
echo "Cannot detect server port for Tomcat"
return 12
fi
echo "Tomcat server port is $PORT"
if status; then
check_loop $PORT "$1" &
#check_loop $PORT "$1"
fi
}

这个脚本的执行方式是 monitor <path>，比如monitor main/index.html。下面是执行时输出的日志信息片段。

2010年 10月 09日星期六 15:20:38 CST Tomcat check once 2010年 10月 09日星期六 15:20:46 CST Tomcat maybe OK
2010年 10月 09日星期六 15:21:46 CST Tomcat check once 2010年 10月 09日星期六 15:21:57 CST Tomcat maybe FAULT Tomcat is now running, not stopped: 0 Tomcat is now running, not stopped: 1 Tomcat is now running, not stopped: 2 Tomcat is now running, not stopped: 3 Tomcat is now running, not stopped: 4 Tomcat is now running, not stopped: 5 Tomcat is now running, not stopped: 6 Tomcat is now running, not stopped: 7 Tomcat is now running, not stopped: 8 Tomcat is now running, not stopped: 9 Tomcat killed use SIGKILL Tomcat stopped Starting tomcat
2010年 10月 09日星期六 15:23:09 CST Tomcat check once 2010年 10月 09日星期六 15:23:09 CST Tomcat maybe OK
2010年 10月 09日星期六 15:24:09 CST Tomcat check once 2010年 10月 09日星期六 15:24:09 CST Tomcat maybe OK

示例四另一个检查网页是否正常的脚本

# 要检查的网页地址
URL="http://www.sunrisecorp.net/"
# usage:
curlit()
{
curl --connect-timeout 15 --max-time 30 --head --silent "$URL" | grep 'HTTP/1.1 200 OK'
}
# 只有MIN_ALARM次访问失败时才告警
MIN_ALARM=10
#
doit()
{
echo "===== $(now) ====="
if ! curlit; then
echo "$(now) bbdx nss access failed"
N=1
if [ -e curlit_error ]; then
N="$(cat curlit_error)"
N=$[N+1]
fi
echo "$(now) N=$N"
echo $N >curlit_error
if [ "$N" == "$MIN_ALARM" ]; then
echo "$(now) do notify"
touch curlit_error
notify_curlit_error
fi
else
if [ -e curlit_error ]; then
echo "$(now) recovered"
N=$(cat curlit_error)
echo "$(now) N=$N"
rm -f curlit_error
if [ "$N" -ge "$MIN_ALARM" ]; then
notify_curlit_recovered
fi
fi
fi
}
doit >>log/curlit.log 2>&1

# 要检查的网页地址
URL="http://www.sunrisecorp.net/"
# usage:
curlit()
{
curl --connect-timeout 15 --max-time 30 --head --silent "$URL" | grep 'HTTP/1.1 200 OK'
}
# 只有MIN_ALARM次访问失败时才告警
MIN_ALARM=10
#
doit()
{
echo "===== $(now) ====="
if ! curlit; then
echo "$(now) bbdx nss access failed"
N=1
if [ -e curlit_error ]; then
N="$(cat curlit_error)"
N=$[N+1]
fi
echo "$(now) N=$N"
echo $N >curlit_error
if [ "$N" == "$MIN_ALARM" ]; then
echo "$(now) do notify"
touch curlit_error
notify_curlit_error
fi
else
if [ -e curlit_error ]; then
echo "$(now) recovered"
N=$(cat curlit_error)
echo "$(now) N=$N"
rm -f curlit_error
if [ "$N" -ge "$MIN_ALARM" ]; then
notify_curlit_recovered
fi
fi
fi
}
doit >>log/curlit.log 2>&1

示例五使用HttpPost上传数据

一个用于http post的脚本。

#!/bin/sh
MS=1350514xxxx
TM=$(date +"%Y%m%d%H%M%S")
DT=$(date +"%Y%m%d")
cat <<EOF >reqtmp.xml
<?xml version="1.0" encoding="GBK" ?>
<OwnPlatForm>
<OrigDomain>QZT</OrigDomain>
<HomeDomain>BOSS</HomeDomain>
<ActivityCode>T5100001</ActivityCode>
<ActionCode>0</ActionCode>
<TransIDO>$TM</TransIDO>
<TransIDH></TransIDH>
<ProcessTime>$TM</ProcessTime>
<CutOffDay>$DT</CutOffDay>
<TestFlag>0</TestFlag>
<Content>
<![CDATA[
<BizProcReq>
<IDType>01</IDType>
<IDValue>$MS</IDValue>
<UserCity>14</UserCity>
<UserCounty>1419</UserCounty>
<OprCode>01</OprCode>
<BizType>51</BizType>
<OprTime>$TM</OprTime>
<OprSrc>27</OprSrc>
<ProductInfo>
<PrdCode>510001</PrdCode>
</ProductInfo>
</BizProcReq>
]]>
</Content>
</OwnPlatForm>
EOF
cat reqtmp.xml
URL="http://10.32.140.230:7092/fcgi-bin/UIG_NEWINT"
curl --verbose --upload-file reqtmp.xml --header "Content-Type: text/xml" "$URL" --output rsptmp.xml
cat rsptmp.xml

#!/bin/sh
MS=1350514xxxx
TM=$(date +"%Y%m%d%H%M%S")
DT=$(date +"%Y%m%d")
cat <<EOF >reqtmp.xml
<?xml version="1.0" encoding="GBK" ?>
<OwnPlatForm>
<OrigDomain>QZT</OrigDomain>
<HomeDomain>BOSS</HomeDomain>
<ActivityCode>T5100001</ActivityCode>
<ActionCode>0</ActionCode>
<TransIDO>$TM</TransIDO>
<TransIDH></TransIDH>
<ProcessTime>$TM</ProcessTime>
<CutOffDay>$DT</CutOffDay>
<TestFlag>0</TestFlag>
<Content>
<![CDATA[
<BizProcReq>
<IDType>01</IDType>
<IDValue>$MS</IDValue>
<UserCity>14</UserCity>
<UserCounty>1419</UserCounty>
<OprCode>01</OprCode>
<BizType>51</BizType>
<OprTime>$TM</OprTime>
<OprSrc>27</OprSrc>
<ProductInfo>
<PrdCode>510001</PrdCode>
</ProductInfo>
</BizProcReq>
]]>
</Content>
</OwnPlatForm>
EOF
cat reqtmp.xml
URL="http://10.32.140.230:7092/fcgi-bin/UIG_NEWINT"
curl --verbose --upload-file reqtmp.xml --header "Content-Type: text/xml" "$URL" --output rsptmp.xml
cat rsptmp.xml

示例六使用proxy的脚本

# usage: do_sync_once <mobile> <codes> <area_id> <opening>
do_sync_once()
{
mobile=$1
codes=$2
area_id=$3
opening=$4
curl --silent --max-time 60 --proxy http://10.32.187.170:8080 "http://host/boss/sync.jsp?seq=1251747862492&mobile=$mobile&serviceCodes=$codes&areaId=$area_id&opening=$opening"
}

# usage: do_sync_once <mobile> <codes> <area_id> <opening>
do_sync_once()
{
mobile=$1
codes=$2
area_id=$3
opening=$4
curl --silent --max-time 60 --proxy http://10.32.187.170:8080 "http://host/boss/sync.jsp?seq=1251747862492&mobile=$mobile&serviceCodes=$codes&areaId=$area_id&opening=$opening"
}

示例七使用Google AJAX Search API进行搜索

# usage: google_search <STR>
# Google搜索
google_search()
{
REF="http://codingstandards.iteye.com/"
KEY="ABQIAAAAHg_ENG5Yq9pOZd19v64gyxTMcdcN4KfyGCBxustvF1FXdNe4WBQOej_ZiBgIK6-a4M3hTxcVfSkt2g"
STR="$1"
# 采用网页搜索
curl --retry 5 -e "$REF" "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=$STR&hl=zh-CN&key=$KEY" 2>/dev/null
# 采用博客搜索
#curl -e $REF "http://ajax.googleapis.com/ajax/services/search/blogs?v=1.0&q=$STR&hl=zh-CN" 2>/dev/null
#curl --retry 5 -e $REF "http://ajax.googleapis.com/ajax/services/search/blogs?v=1.0&q=$STR&hl=zh-CN" 2>/dev/null
#curl --retry 5 -e "$REF" "http://ajax.googleapis.com/ajax/services/search/blogs?v=1.0&q=$STR&hl=zh-CN&key=$KEY" 2>/dev/null
}

# usage: google_search <STR>
# Google搜索
google_search()
{
REF="http://codingstandards.iteye.com/"
KEY="ABQIAAAAHg_ENG5Yq9pOZd19v64gyxTMcdcN4KfyGCBxustvF1FXdNe4WBQOej_ZiBgIK6-a4M3hTxcVfSkt2g"
STR="$1"
# 采用网页搜索
curl --retry 5 -e "$REF" "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=$STR&hl=zh-CN&key=$KEY" 2>/dev/null
# 采用博客搜索
#curl -e $REF "http://ajax.googleapis.com/ajax/services/search/blogs?v=1.0&q=$STR&hl=zh-CN" 2>/dev/null
#curl --retry 5 -e $REF "http://ajax.googleapis.com/ajax/services/search/blogs?v=1.0&q=$STR&hl=zh-CN" 2>/dev/null
#curl --retry 5 -e "$REF" "http://ajax.googleapis.com/ajax/services/search/blogs?v=1.0&q=$STR&hl=zh-CN&key=$KEY" 2>/dev/null
}

码农公寓

常用参数

使用示例

示例一 获取指定网页

示例二 查看响应头信息

示例三 检查网页是否可正常访问

示例四 另一个检查网页是否正常的脚本

示例五 使用HttpPost上传数据

示例六 使用proxy的脚本

示例七 使用Google AJAX Search API进行搜索

相关文章