Linux tracepoint分析

tracepoint介绍

Brendan Gregg大神博客对tracepoint做了说明,同时看到taobao技术博客对文章进行了翻译,学习整理如下。

测试系统版本

$ uname -r
5.4.0-72-generic

利用bcc的tplist工具查看当前版本支持的tracepoint点:

$ sudo /usr/share/bcc/tools/tplist -v 'tcp:*'
tcp:tcp_retransmit_skb
   const void * skbaddr;
   const void * skaddr;
   int state;
   __u16 sport;
   __u16 dport;
   __u8 saddr[4];
   __u8 daddr[4];
   __u8 saddr_v6[16];
   __u8 daddr_v6[16];
tcp:tcp_send_reset
   const void * skbaddr;
   const void * skaddr;
   int state;
   __u16 sport;
   __u16 dport;
   __u8 saddr[4];
   __u8 daddr[4];
   __u8 saddr_v6[16];
   __u8 daddr_v6[16];
tcp:tcp_receive_reset
   const void * skaddr;
   __u16 sport;
   __u16 dport;
   __u8 saddr[4];
   __u8 daddr[4];
   __u8 saddr_v6[16];
   __u8 daddr_v6[16];
   __u64 sock_cookie;
tcp:tcp_destroy_sock
   const void * skaddr;
   __u16 sport;
   __u16 dport;
   __u8 saddr[4];
   __u8 daddr[4];
   __u8 saddr_v6[16];
   __u8 daddr_v6[16];
   __u64 sock_cookie;
tcp:tcp_rcv_space_adjust
   const void * skaddr;
   __u16 sport;
   __u16 dport;
   __u8 saddr[4];
   __u8 daddr[4];
   __u8 saddr_v6[16];
   __u8 daddr_v6[16];
   __u64 sock_cookie;
tcp:tcp_retransmit_synack
   const void * skaddr;
   const void * req;
   __u16 sport;
   __u16 dport;
   __u8 saddr[4];
   __u8 daddr[4];
   __u8 saddr_v6[16];
   __u8 daddr_v6[16];
tcp:tcp_probe
   __u8 saddr[sizeof(struct sockaddr_in6)];
   __u8 daddr[sizeof(struct sockaddr_in6)];
   __u16 sport;
   __u16 dport;
   __u32 mark;
   __u16 data_len;
   __u32 snd_nxt;
   __u32 snd_una;
   __u32 snd_cwnd;
   __u32 ssthresh;
   __u32 snd_wnd;
   __u32 srtt;
   __u32 rcv_wnd;
   __u64 sock_cookie;

$ sudo /usr/share/bcc/tools/tplist -v 'sock:*'
sock:sock_rcvqueue_full
   int rmem_alloc;
   unsigned int truesize;
   int sk_rcvbuf;
sock:sock_exceed_buf_limit
   char name[32];
   long * sysctl_mem;
   long allocated;
   int sysctl_rmem;
   int rmem_alloc;
   int sysctl_wmem;
   int wmem_alloc;
   int wmem_queued;
   int kind;
sock:inet_sock_set_state
   const void * skaddr;
   int oldstate;
   int newstate;
   __u16 sport;
   __u16 dport;
   __u16 family;
   __u8 protocol;
   __u8 saddr[4];
   __u8 daddr[4];
   __u8 saddr_v6[16];
   __u8 daddr_v6[16];

对上述tracepoint的说明如下:

  • tcp:tcp_retransmit_skb: 跟踪重传。对于理解包括拥塞在内的网络问题很有用。将会在笔者的 tcpretrans 工具中替换 kprobes。
  • tcp:tcp_retransmit_synack: 跟踪 SYN 和 SYN/ACK 重传。将它们剥离出来很有趣,是因为它们可以表明服务器的饱和度(listen backlog 丢包)而不是网络拥塞。它对应着 LINUX_MIB_TCPSYNRETRANS。
  • tcp:tcp_destroy_sock: 对于需要统计汇总 TCP 会话的内存详情的程序是需要的,它可以通过 sock 地址来作为主键索引。这个探测点可以得知会话是否已经结束,因此接下来sock 地址将会被复用,任何截止到现在的统计信息都应该被使用然后删除。
  • tcp:tcp_send_reset: 这个会跟踪一个有效 socket 下的 RST 发送,用以诊断相关类型的问题。
  • tcp:tcp_receive_reset: 跟踪 RST 接受。
  • tcp:tcp_probe: 用以跟踪 TCP 拥塞窗口,这也让一个更老的 TCP probe 模块废弃并移除。这个是 Masami Hiramatsu 提交并在 4.16 合入。
  • sock:inet_sock_set_state: 可以用来做很多事情。tcplife 工具就是其中一个,并且笔者的 tcpconnect 和 tcpaccept bcc 工具也可以转换为使用这个 tracepoint。我们可以添加单独的 tcp:tcp_connect 和 tcp:tcp_accept tracepoints (或者 tcp:tcp_active_open 和 tcp:tcp_passive_open), 但是可以直接使用 sock:inet_sock_set_state。

使用示例

编写ply程序,利用sock:inet_sock_set_state实现连接状态变化跟踪:

sudo ply 'tracepoint:sock/inet_sock_set_state { printf("saddr: %v sport: %v -> daddr: %v dport: %v, old_state: %v, new_state: %v\n", data->saddr, data->sport, data->daddr, data->dport, data->oldstate, data->newstate);}'

输出如下:

$  sudo ply 'tracepoint:sock/inet_sock_set_state { printf("saddr: %v sport: %v -> daddr: %v dport: %v, old_state: %v, new_state: %v\n", data->saddr, data->sport, data->daddr, data->dport, data->oldstate, data->newstate);}'
ply: active
saddr: [192, 168, 136, 163] sport: 0 -> daddr: [104, 193, 88, 77] dport: 443, old_state: 7, new_state: 2
saddr: [192, 168, 136, 163] sport: 47600 -> daddr: [104, 193, 88, 77] dport: 443, old_state: 2, new_state: 1
saddr: [192, 168, 136, 163] sport: 0 -> daddr: [13, 225, 93, 17] dport: 443, old_state: 7, new_state: 2
saddr: [192, 168, 136, 163] sport: 48316 -> daddr: [13, 225, 93, 17] dport: 443, old_state: 2, new_state: 1
saddr: [192, 168, 136, 163] sport: 0 -> daddr: [99, 84, 203, 13] dport: 443, old_state: 7, new_state: 2
saddr: [192, 168, 136, 163] sport: 42292 -> daddr: [99, 84, 203, 13] dport: 443, old_state: 2, new_state: 1
saddr: [192, 168, 136, 163] sport: 0 -> daddr: [104, 193, 88, 77] dport: 443, old_state: 7, new_state: 2
saddr: [192, 168, 136, 163] sport: 0 -> daddr: [104, 193, 88, 77] dport: 443, old_state: 7, new_state: 2
saddr: [192, 168, 136, 163] sport: 0 -> daddr: [104, 193, 88, 77] dport: 443, old_state: 7, new_state: 2
saddr: [192, 168, 136, 163] sport: 0 -> daddr: [104, 193, 88, 77] dport: 443, old_state: 7, new_state: 2
saddr: [192, 168, 136, 163] sport: 0 -> daddr: [104, 193, 90, 87] dport: 443, old_state: 7, new_state: 2
saddr: [192, 168, 136, 163] sport: 47610 -> daddr: [104, 193, 88, 77] dport: 443, old_state: 2, new_state: 1
saddr: [192, 168, 136, 163] sport: 47612 -> daddr: [104, 193, 88, 77] dport: 443, old_state: 2, new_state: 1
saddr: [192, 168, 136, 163] sport: 0 -> daddr: [104, 193, 88, 77] dport: 443, old_state: 7, new_state: 2
saddr: [192, 168, 136, 163] sport: 0 -> daddr: [104, 193, 88, 77] dport: 443, old_state: 7, new_state: 2
saddr: [192, 168, 136, 163] sport: 47606 -> daddr: [104, 193, 88, 77] dport: 443, old_state: 2, new_state: 1
saddr: [192, 168, 136, 163] sport: 0 -> daddr: [104, 193, 90, 87] dport: 443, old_state: 7, new_state: 2
saddr: [192, 168, 136, 163] sport: 44286 -> daddr: [104, 193, 90, 87] dport: 443, old_state: 2, new_state: 1
saddr: [192, 168, 136, 163] sport: 47618 -> daddr: [104, 193, 88, 77] dport: 443, old_state: 2, new_state: 1
saddr: [192, 168, 136, 163] sport: 44292 -> daddr: [104, 193, 90, 87] dport: 443, old_state: 2, new_state: 1

其中关于连接状态的枚举值,可以参考Linux内核源码:

# /include/net/tcp_states.h
enum {
   TCP_ESTABLISHED = 1,
   TCP_SYN_SENT,
   TCP_SYN_RECV,
   TCP_FIN_WAIT1,
   TCP_FIN_WAIT2,
   TCP_TIME_WAIT,
   TCP_CLOSE,
   TCP_CLOSE_WAIT,
   TCP_LAST_ACK,
   TCP_LISTEN,
   TCP_CLOSING,	/* Now a valid state */
   TCP_NEW_SYN_RECV,

   TCP_MAX_STATES	/* Leave at the end! */
};

参考

http://www.brendangregg.com/blog/2018-03-22/tcp-tracepoints.html
https://kernel.taobao.org/2019/10/TCP-Tracepoints/

上一篇:js 数组对照筛选


下一篇:设计模式学习笔记三:策略模式和状态模式