使用原始套接字和 BPF 编写 Linux 数据包嗅探器
本文介绍如何在linux平台下编写一个不依赖其他库的网络数据包嗅探器。
介绍
PF_PACKET 套接字
PF_SOCKET
本文通过检查创建新套接字时执行的系统调用来更深入地了解套接字:
int socket(int domain, int type, int protocol);
AF_PACKET Low-level packet interface
char* transport_protocol(unsigned int code) {
switch(code) {
case 1: return "icmp";
case 2: return "igmp";
case 6: return "tcp";
case 17: return "udp";
default: return "unknown";
}
}
int main(int argc, char **argv) {
int sock, n;
char buffer[2048];
unsigned char *iphead, *ethhead;
if ((sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_IP))) < 0) {
perror("socket");
exit(1);
}
// bind to eth0 interface only
const char *opt;
opt = "eth0";
if (setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, opt, strlen(opt) + 1) < 0) {
perror("setsockopt bind device");
close(sock);
exit(1);
}
/* set the network card in promiscuos mode*/
// An ioctl() request has encoded in it whether the argument is an in parameter or out parameter
// SIOCGIFFLAGS 0x8913 /* get flags */
// SIOCSIFFLAGS 0x8914 /* set flags */
struct ifreq ethreq;
strncpy(ethreq.ifr_name, "eth0", IF_NAMESIZE);
if (ioctl(sock, SIOCGIFFLAGS, ðreq) == -1) {
perror("ioctl");
close(sock);
exit(1);
}
ethreq.ifr_flags |= IFF_PROMISC;
if (ioctl(sock, SIOCSIFFLAGS, ðreq) == -1) {
perror("ioctl");
close(sock);
exit(1);
}
// attach the filter to the socket
// the filter code is generated by running: tcpdump tcp
struct sock_filter BPF_code[] = {
{ 0x28, 0, 0, 0x0000000c },
{ 0x15, 0, 5, 0x000086dd },
{ 0x30, 0, 0, 0x00000014 },
{ 0x15, 6, 0, 0x00000006 },
{ 0x15, 0, 6, 0x0000002c },
{ 0x30, 0, 0, 0x00000036 },
{ 0x15, 3, 4, 0x00000006 },
{ 0x15, 0, 3, 0x00000800 },
{ 0x30, 0, 0, 0x00000017 },
{ 0x15, 0, 1, 0x00000006 },
{ 0x6, 0, 0, 0x00040000 },
{ 0x6, 0, 0, 0x00000000 }
};
struct sock_fprog Filter;
// error prone code, .len field should be consistent with the real length of the filter code array
Filter.len = sizeof(BPF_code)/sizeof(BPF_code[0]);
Filter.filter = BPF_code;
if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &Filter, sizeof(Filter)) < 0) {
perror("setsockopt attach filter");
close(sock);
exit(1);
}
while(1) {
printf("-----------\n");
n = recvfrom(sock, buffer, 2048, 0, NULL, NULL);
printf("%d bytes read\n", n);
/* Check to see if the packet contains at least
* complete Ethernet (14), IP (20) and TCP/UDP
* (8) headers.
*/
if (n < 42) {
perror("recvfrom():");
printf("Incomplete packet (errno is %d)\n", errno);
close(sock);
exit(0);
}
ethhead = buffer;
printf("Source MAC address: %.2x:%.2x:%.2x:%.2x:%.2x:%.2x\n",
ethhead[0], ethhead[1], ethhead[2], ethhead[3], ethhead[4], ethhead[5]
);
printf("Destination MAC address: %.2x:%.2x:%.2x:%.2x:%.2x:%.2x\n",
ethhead[6], ethhead[7], ethhead[8], ethhead[9], ethhead[10], ethhead[11]
);
iphead = buffer + 14;
if (*iphead==0x45) { /* Double check for IPv4
* and no options present */
printf("Source host %d.%d.%d.%d\n",
iphead[12],iphead[13],
iphead[14],iphead[15]);
printf("Dest host %d.%d.%d.%d\n",
iphead[16],iphead[17],
iphead[18],iphead[19]);
printf("Source,Dest ports %d,%d\n",
(iphead[20]<<8)+iphead[21],
(iphead[22]<<8)+iphead[23]);
printf("Layer-4 protocol %s\n", transport_protocol(iphead[9]));
}
}
}
上面代码片段命名为create_socket.c
请确保包含系统头文件:<sys/socket.h> <sys/types.h>
绑定到一个网络接口
eth0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 192.168.230.49 netmask 255.255.240.0 broadcast 192.168.239.255
inet6 fe80::215:5dff:fefb:e31f prefixlen 64 scopeid 0x20<link>
ether 00:15:5d:fb:e3:1f txqueuelen 1000 (Ethernet)
RX packets 260 bytes 87732 (87.7 KB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 178 bytes 29393 (29.3 KB)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
// bind to eth0 interface only
const char *opt;
opt = "eth0";
if (setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, opt, strlen(opt) + 1) < 0) {
perror("setsockopt bind device");
close(sock);
exit(1);
}
通过调用 setsockopt 系统调用来完成。
现在嗅探器只捕获在指定网卡上接收到的网络数据包。
非混杂和混杂模式
/* set the network card in promiscuos mode*/
// An ioctl() request has encoded in it whether the argument is an in parameter or out parameter
// SIOCGIFFLAGS 0x8913 /* get flags */
// SIOCSIFFLAGS 0x8914 /* set flags */
struct ifreq ethreq;
strncpy(ethreq.ifr_name, "eth0", IF_NAMESIZE);
if (ioctl(sock, SIOCGIFFLAGS, ðreq) == -1) {
perror("ioctl");
close(sock);
exit(1);
}
ethreq.ifr_flags |= IFF_PROMISC;
if (ioctl(sock, SIOCSIFFLAGS, ðreq) == -1) {
perror("ioctl");
close(sock);
exit(1);
}
ioctl 代表 I/O 控制,它操纵特定文件的底层设备参数。ioctl 接受三个参数:
使用 BPF 进行数据包过滤
BPF的背景
1992 年,BPF 首次被引入 BSD Unix 系统,用于过滤不需要的网络数据包。BPF 的提议来自劳伦斯伯克利实验室的研究人员,他们还开发了 libpcap 和 tcpdump。
1997年,Linux Socket Filter(LSF)基于BPF开发,并引入Linux内核版本2.1.75。注意,LSF 和 BPF 有一些明显的区别,但在 Linux 上下文中,谈到 BPF 或 LSF 时,我们指的是 Linux 内核中相同的包过滤机制。在接下来的部分中研究 BPF 的详细理论和设计。
最初,BPF 被设计为网络数据包过滤器。但是在 2013 年,BPF 得到了广泛的扩展,它可以用于非网络用途,例如性能分析和故障排除。如今,扩展的 BPF 被称为 eBPF,而原始和过时的版本被重命名为经典 BPF(cBPF)。注意,在本文中研究的是 cBPF,而 eBPF 不在本文的讨论范围内。eBPF 是当今软件界最火的技术。
BPF 的放置位置
/* source code file of net/packet/af_packet.c */
/* packet_create: create socket */
static int packet_create(struct net *net, struct socket *sock, int protocol)
{
/* some code omitted ... */
po = pkt_sk(sk);
sk->sk_family = PF_PACKET;
po->num = proto;
spin_lock_init(&po->bind_lock);
po->prot_hook.func = packet_rcv; // attach hook function to socket
if (sock->type == SOCK_PACKET)
po->prot_hook.func = packet_rcv_spkt; // attach hook function to socket
if (proto) {
po->prot_hook.type = proto;
dev_add_pack(&po->prot_hook);
sock_hold(sk);
po->running = 1;
}
}
/* hook function packet_rcv is triggered, when the packet is received */
static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
/* some code omitted ... */
sk = pt->af_packet_priv;
snaplen = skb->len;
res = run_filter(skb, sk, snaplen); // filter logic
if (!res)
goto drop_n_restore; // drop the packet
__skb_queue_tail(&sk->sk_receive_queue, skb); // put the packet into the queue
}
BPF机器
虚拟 CPU
累加器是 CPU 中包含的一种寄存器。它充当临时存储位置,在数学和逻辑计算中保存中间值。例如,在“1+2+3”的运算中,累加器会保存值 1,然后是值 3,然后是值 6。累加器的好处是不需要显式引用。
指令集和寻址方式
示例 BPF 程序
现在根据上面的知识来尝试理解下面这个小 BPF 程序,bpf_ip.asm:
(000) ldh [12]
(001) jeq #0x800 jt 2 jf 3
(002) ret #262144
(003) ret #0
BPF的内核实现
/* Copied from net/packet/af_packet.c */
/* function run_filter is called in packet_rcv*/
static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
unsigned int res)
{
struct sk_filter *filter;
rcu_read_lock_bh();
filter = rcu_dereference(sk->sk_filter); // get the filter bound to the socket
if (filter != NULL)
res = sk_run_filter(skb, filter->insns, filter->len); // the filtering is inside sk_run_filter function
rcu_read_unlock_bh();
return res;
}
可以发现真正的过滤逻辑在sk_run_filter里面:
unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
{
struct sock_filter *fentry; /* We walk down these */
void *ptr;
u32 A = 0; /* Accumulator */
u32 X = 0; /* Index Register */
u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
u32 tmp;
int k;
int pc;
/*
* Process array of filter instructions.
*/
for (pc = 0; pc < flen; pc++) {
fentry = &filter[pc];
switch (fentry->code) {
case BPF_ALU|BPF_ADD|BPF_X:
A += X;
continue;
case BPF_ALU|BPF_ADD|BPF_K:
A += fentry->k;
continue;
case BPF_ALU|BPF_SUB|BPF_X:
A -= X;
continue;
case BPF_ALU|BPF_SUB|BPF_K:
A -= fentry->k;
continue;
case BPF_ALU|BPF_MUL|BPF_X:
A *= X;
continue;
/* some code omitted ... */
case BPF_RET|BPF_K:
return fentry->k;
case BPF_RET|BPF_A:
return A;
case BPF_ST:
mem[fentry->k] = A;
continue;
case BPF_STX:
mem[fentry->k] = X;
continue;
default:
WARN_ON(1);
return 0;
}
}
return 0;
}
BPF JIT
在嗅探器中设置 BPF
struct sock_filter { /* Filter block */
__u16 code; /* Actual filter code */
__u8 jt; /* Jump true */
__u8 jf; /* Jump false */
__u32 k; /* Generic multiuse field */
};
-d:以可读的形式将编译的数据包匹配代码转储到标准输出并停止。
-dd:将数据包匹配代码转储为 C 程序片段。
-ddd:将数据包匹配代码转储为十进制数(前面有一个计数)。
baoqger@ubuntu:~$ sudo tcpdump -d ip
[sudo] password for baoqger:
(000) ldh [12]
(001) jeq #0x800 jt 2 jf 3
(002) ret #262144
(003) ret #0
baoqger@SLB-C8JWZH3:~$ sudo tcpdump -dd ip
{ 0x28, 0, 0, 0x0000000c },
{ 0x15, 0, 1, 0x00000800 },
{ 0x6, 0, 0, 0x00040000 },
{ 0x6, 0, 0, 0x00000000 },
baoqger@SLB-C8JWZH3:~$ sudo tcpdump -ddd ip
4
40 0 0 12
21 0 1 2048
6 0 0 262144
6 0 0 0
// attach the filter to the socket
// the filter code is generated by running: tcpdump tcp
struct sock_filter BPF_code[] = {
{ 0x28, 0, 0, 0x0000000c },
{ 0x15, 0, 1, 0x00000800 },
{ 0x6, 0, 0, 0x00040000 },
{ 0x6, 0, 0, 0x00000000 }
};
struct sock_fprog Filter;
// error prone code, .len field should be consistent with the real length of the filter code array
Filter.len = sizeof(BPF_code)/sizeof(BPF_code[0]);
Filter.filter = BPF_code;
if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &Filter, sizeof(Filter)) < 0) {
perror("setsockopt attach filter");
close(sock);
exit(1);
}
处理数据包
- 首先,recvfrom 系统调用从套接字读取数据包。我们将系统调用放在一个while循环中以继续读取传入的数据包。
- 然后打印数据包中的源 MAC 地址和目标 MAC 地址(得到的数据包应该是第 2 层中的原始以太网帧)。如果这个以太网帧包含的是一个 IP4 数据包,那么我们打印出源 IP 地址和目标 IP 地址。要了解更多信息,您可以研究各种网络协议的标头格式。
while(1) {
printf("-----------\n");
n = recvfrom(sock, buffer, 2048, 0, NULL, NULL);
printf("%d bytes read\n", n);
/* Check to see if the packet contains at least
* complete Ethernet (14), IP (20) and TCP/UDP
* (8) headers.
*/
if (n < 42) {
perror("recvfrom():");
printf("Incomplete packet (errno is %d)\n", errno);
close(sock);
exit(0);
}
ethhead = buffer;
printf("Source MAC address: %.2x:%.2x:%.2x:%.2x:%.2x:%.2x\n",
ethhead[0], ethhead[1], ethhead[2], ethhead[3], ethhead[4], ethhead[5]
);
printf("Destination MAC address: %.2x:%.2x:%.2x:%.2x:%.2x:%.2x\n",
ethhead[6], ethhead[7], ethhead[8], ethhead[9], ethhead[10], ethhead[11]
);
iphead = buffer + 14;
if (*iphead==0x45) { /* Double check for IPv4
* and no options present */
printf("Source host %d.%d.%d.%d\n",
iphead[12],iphead[13],
iphead[14],iphead[15]);
printf("Dest host %d.%d.%d.%d\n",
iphead[16],iphead[17],
iphead[18],iphead[19]);
printf("Source,Dest ports %d,%d\n",
(iphead[20]<<8)+iphead[21],
(iphead[22]<<8)+iphead[23]);
printf("Layer-4 protocol %s\n", transport_protocol(iphead[9]));
}
}
总结