netLINK,Linux内核Netlink机制详解与实例

Netlink套接字是用以实现用户进程与内核进程通信的一种特殊的进程间通信(IPC) ,也是网络应用程序与内核通信的最常用的接口,用户态应用使用标准的socket API就可以使用netlink提供的强大功能。

是一种特殊的,它是Linux所特有的,类似于BSD系统中的但又远比它的功能强大,目前在Linux内核中使用进行应用与内核通信的应用很多;包括:路由()、用户态协议()、防火墙()、子系统()、内核事件向用户态通知(NT),通用()等。

嵌入式进阶教程分门别类整理好了,看的时候十分方便,由于内容较多,这里就截取一部分图吧。

netLINK,Linux内核Netlink机制详解与实例

需要的朋友 私信【内核】 即可领取

相对于ioctl、sysfs、proc的优势:

  1. 内核可以主动向用户空间发送异步消息,而不需要用户空间来触发。
  2. 用户与内核间的通信方式,不需要轮询,用户空间应用程序打开套接字,调用recvmsg(),如果没有来自内核的消息,就进入阻塞状态。
  3. 支持组播传输。

Netlink协议簇初始化

Netlink协议簇初始化代码位于net/netlink/af_netlink.c中。

core_initcall(netlink_proto_init); static int __ init netlink_proto_init ( void ) { int i; // 注册netlink协议 int err = proto_register(&netlink_proto, 0 ); if (err != 0 ) goto out ; # if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) err = bpf_iter_register(); if (err) goto out ; # endif BUILD_BUG_ON( sizeof ( struct netlink_skb_parms) sizeof_field( struct sk_buff, cb)); // 申请netlink table,每种协议类型一个 nl_table = kcalloc(MAX_LINKS, sizeof (*nl_table), GFP_KERNEL); if (!nl_table) goto panic; // 初始化netlink table for (i = 0 ; i MAX_LINKS; i++) { // 初始化哈希表 if (rhashtable_init(&nl_table[i].hash, &netlink_rhashtable_params) 0 ) { while (--i 0 ) rhashtable_destroy(&nl_table[i].hash); kfree(nl_table); goto panic; } } // 初始化应用层使用的NETLINK_USERSOCK协议类型的netlink(用于应用层进程间通信) netlink_add_usersock_entry(); // 向内核注册协议处理函数,即将netlink的socket创建处理函数注册到内核中 sock_register(&netlink_family_ops); // 向内核所有的网络命名空间注册”子系统“的初始化和注销函数,在网络命名空间创建和注销时会调用这里注册的初始化和注销函数 register_pernet_subsys(&netlink_net_ops); register_pernet_subsys(&netlink_tap_net_ops); /* The netlink device handler may be needed early. */ // 注册各个消息类型,注册指定的函数指针(至少其中一个必须为非NULL),以便在收到指定协议族和消息类型的请求消息时调用。 rtnetlink_init(); out : return err;panic: panic( "netlink_init: Cannot allocate nl_tablen" );}

创建Netlink

static const struct net_proto_family netlink_family_ops = { .family = PF_NETLINK, .create = netlink_create, .owner = THIS_MODULE, /* for consistency 8) */ }; static int netlink_create (struct net *net, struct socket *sock, int protocol, int kern) { struct module * module = NULL ; struct mutex * cb_mutex ; struct netlink_sock * nlk ; int (*bind)(struct net *net, int group); void (*unbind)(struct net *net, int group); int err = 0 ; sock-state = SS_UNCONNECTED; // 支持raw和dgram类型 if (sock-type != SOCK_RAW && sock-type != SOCK_DGRAM) return -ESOCKTNOSUPPORT; // 检查netlink协议类型,目前22个,最大支持32个 if (protocol 0 || protocol = MAX_LINKS) return -EPROTONOSUPPORT; protocol = array_index_nospec(protocol, MAX_LINKS); // 锁表 netlink_lock_table(); # ifdef CONFIG_MODULES // netlink指定协议未注册,则加载模块并注册 if (!nl_table[protocol].registered) { netlink_unlock_table(); request_module( "net-pf-%d-proto-%d" , PF_NETLINK, protocol); netlink_lock_table(); } # endif // 查找dodulecb_mutexbindunbind if (nl_table[protocol].registered && try_module_get(nl_table[protocol]. module )) module = nl_table[protocol]. module ; else err = -EPROTONOSUPPORT; cb_mutex = nl_table[protocol].cb_mutex; bind = nl_table[protocol].bind; unbind = nl_table[protocol].unbind; // 释放锁 netlink_unlock_table(); if (err 0 ) goto out; err = __netlink_create(net, sock, cb_mutex, protocol, kern); if (err 0 ) goto out_module; // 增加netlink协议inuse计数器 sock_prot_inuse_add(net, &netlink_proto, 1 ); // 继续初始化netlink_sock nlk = nlk_sk(sock-sk); nlk- module = module ; nlk-netlink_bind = bind; nlk-netlink_unbind = unbind;out: return err;out_module: module_put( module ); goto out;} static int __netlink_create(struct net *net, struct socket *sock, struct mutex *cb_mutex, int protocol, int kern){ struct sock * sk ; struct netlink_sock * nlk ; // 注册netlink socket处理函数 sock-ops = &netlink_ops; // 创建内核sock对象 sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, kern); if (!sk) return -ENOMEM; // 使用sockt初始sk sock_init_data(sock, sk); // sk转netlink_sock,并初始化netlink_sock nlk = nlk_sk(sk); if (cb_mutex) { nlk-cb_mutex = cb_mutex; } else { nlk-cb_mutex = &nlk-cb_def_mutex; mutex_init(nlk-cb_mutex); lockdep_set_class_and_name(nlk-cb_mutex, nlk_cb_mutex_keys + protocol, nlk_cb_mutex_key_strings[protocol]); } // 初始化netlink_sock的等待队列 init_waitqueue_head(&nlk-wait); // sk协议和析构 sk-sk_destruct = netlink_sock_destruct; sk-sk_protocol = protocol; return 0 ;} static const struct proto_ops netlink_ops = { .family = PF_NETLINK, .owner = THIS_MODULE, .release = netlink_release, .bind = netlink_bind, .connect = netlink_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = netlink_getname, .poll = datagram_poll, .ioctl = netlink_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = netlink_setsockopt, .getsockopt = netlink_getsockopt, .sendmsg = netlink_sendmsg, .recvmsg = netlink_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage,};

接收Netlink消息

从socket上接收数据包skb,并解析成netlink msg。

/* * As we do 4.4BSD message passing we use a 4.4BSD message passing * system, not 4.3. Thus msg_accrights(len) are now missing. They * belong in an obscure libc emulation or the bin. */ struct msghdr { void *msg_name; /* ptr to socket address structure */ int msg_namelen; /* size of socket address structure */ struct iov_iter msg_iter ; /* data */ /* * Ancillary data. msg_control_user is the user buffer used for the * recv* side when msg_control_is_user is set, msg_control is the kernel * buffer used for all other cases. */ union { void *msg_control; void __user *msg_control_user; }; bool msg_control_is_user : 1 ; __kernel_size_t msg_controllen; /* ancillary data buffer length */ unsigned int msg_flags; /* flags on received message */ struct kiocb * msg_iocb ; /* ptr to iocb for async requests */ };

static int netlink_recvmsg( struct socket *sock, struct msghdr *msg, size_t len, int flags){ struct scm_cookie scm; // 内核sock对象 struct sock *sk = sock-sk; // netink_sock对象 struct netlink_sock *nlk = nlk_sk(sk); int noblock = flags & MSG_DONTWAIT; size_t copied; struct sk_buff *skb, *data_skb; int err, ret; if (flags & MSG_OOB) return -EOPNOTSUPP; copied = 0 ; // 从sk上接收数据包skb skb = skb_recv_datagram(sk, flags, noblock, &err); if (skb == NULL) goto out; data_skb = skb;#ifdef CONFIG_COMPAT_NETLINK_MESSAGES if (unlikely(skb_shinfo(skb)-frag_list)) { /* * If this skb has a frag_list, then here that means that we * will have to use the frag_list skbs data for compat tasks * and the regular skbs data for normal (non-compat) tasks. * * If we need to send the compat skb, assign it to the * data_skb variable so that it will be used below for data * copying. We keep skb for everything else, including * freeing both later. */ if (flags & MSG_CMSG_COMPAT) data_skb = skb_shinfo(skb)-frag_list; }#endif /* Record the max length of recvmsg() calls for future allocations */ nlk-max_recvmsg_len = max(nlk-max_recvmsg_len, len); nlk-max_recvmsg_len = min_t(size_t, nlk-max_recvmsg_len, SKB_WITH_OVERHEAD( 32768 )); // 计算需要拷贝的长度 copied = data_skb-len; if (len copied) { msg-msg_flags |= MSG_TRUNC; copied = len; } // 从skb拷贝数据到msg err = skb_copy_datagram_msg(data_skb, 0 , msg, copied); // socket地址和参数 if (msg-msg_name) { DECLARE_SOCKADDR( struct sockaddr_nl *, addr, msg-msg_name); addr-nl_family = AF_NETLINK; addr-nl_pad = 0 ; addr-nl_pid = NETLINK_CB(skb).portid; addr-nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); msg-msg_namelen = sizeof(*addr); } if (nlk-flags & NETLINK_F_RECV_PKTINFO) netlink_cmsg_recv_pktinfo(msg, skb); if (nlk-flags & NETLINK_F_LISTEN_ALL_NSID) netlink_cmsg_listen_all_nsid(sk, msg, skb); // 初始化scm_coookie memset(&scm, 0 , sizeof(scm)); scm.creds = *NETLINK_CREDS(skb); if (flags & MSG_TRUNC) copied = data_skb-len; // 释放skb skb_free_datagram(sk, skb); if (nlk-cb_running && atomic_read(&sk-sk_rmem_alloc) = sk-sk_rcvbuf / 2 ) { ret = netlink_dump(sk); if (ret) { sk-sk_err = -ret; sk_error_report(sk); } } // scm:Socket level control messages processing,校验,并读取scm_cookie(进程信息、文件描述符等) scm_recv(sock, msg, &scm, flags);out: // 唤醒sk处理 netlink_rcv_wake(sk); return err ? : copied;}

发送Netlink消息

将要发送过的netlink msg构造成skb数据包,然后发送。

static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock-sk; struct netlink_sock *nlk = nlk_sk(sk); DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg-msg_name); u32 dst_portid; u32 dst_group; struct sk_buff *skb; int err; struct scm_cookie scm; u32 netlink_skb_flags = 0; if (msg-msg_flags & MSG_OOB) return -EOPNOTSUPP; if (len == 0) { pr_warn_once("Zero length message leads to an empty skbn"); return -ENODATA; } // scm:Socket level control messages processing,校验msg,并初始化scm_cookie(进程信息、文件描述符等) err = scm_send(sock, msg, &scm, true); if (err 0) return err; // socket地址 if (msg-msg_namelen) { err = -EINVAL; if (msg-msg_namelen sizeof(struct sockaddr_nl)) goto out; if (addr-nl_family != AF_NETLINK) goto out; dst_portid = addr-nl_pid; dst_group = ffs(addr-nl_groups); err = -EPERM; if ((dst_group || dst_portid) && !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) goto out; netlink_skb_flags |= NETLINK_SKB_DST; } else { dst_portid = nlk-dst_portid; dst_group = nlk-dst_group; } /* Paired with WRITE_ONCE() in netlink_insert() */ if (!READ_ONCE(nlk-bound)) { err = netlink_autobind(sock); if (err) goto out; } else { /* Ensure nlk is hashed and visible. */ smp_rmb(); } err = -EMSGSIZE; if (len sk-sk_sndbuf - 32) goto out; err = -ENOBUFS; // 申请skb skb = netlink_alloc_large_skb(len, dst_group); if (skb == NULL) goto out; NETLINK_CB(skb).portid = nlk-portid; NETLINK_CB(skb).dst_group = dst_group; NETLINK_CB(skb).creds = scm.creds; NETLINK_CB(skb).flags = netlink_skb_flags; err = -EFAULT; // 拷贝msg到skb if (memcpy_from_msg(skb_put(skb, len), msg, len)) { kfree_skb(skb); goto out; } // 发送skb err = security_netlink_send(sk, skb); if (err) { kfree_skb(skb); goto out; } // 广播 if (dst_group) { refcount_inc(&skb-users); netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL); } // 单播 err = netlink_unicast(sk, skb, dst_portid, msg-msg_flags & MSG_DONTWAIT); out : scm_destroy(&scm); return err; }

声明:本站所有作品(图文、音视频)均由用户自行上传分享,本文由"泡芙味的饼干哟"自行发布,本站仅供存储和学习交流。若您的权利被侵害,请联系我们删除。如若转载,请注明出处:https://www.flipbrief.com/fresh/8UW1v017.html