1.概述
Linux系統(tǒng)多用于服務(wù)器上,Linux非常牢固的支持網(wǎng)絡(luò)。在Linux,網(wǎng)絡(luò)分為兩個(gè)層,分別是網(wǎng)絡(luò)堆棧協(xié)議支持層,以及接收和發(fā)送網(wǎng)絡(luò)協(xié)議的設(shè)備驅(qū)動(dòng)程序?qū)印>W(wǎng)絡(luò)堆棧是硬件中獨(dú)立出來(lái)的部分,主要用來(lái)支持TCP/IP等多種協(xié)議,而網(wǎng)絡(luò)設(shè)備驅(qū)動(dòng)層是連接網(wǎng)絡(luò)堆棧協(xié)議層和網(wǎng)絡(luò)硬件的中間層。
網(wǎng)絡(luò)設(shè)備驅(qū)動(dòng)程序的主要功能是:
(1)模塊加載或內(nèi)核啟動(dòng)相關(guān)的初始化處理
(2)清除模塊時(shí)的處理
(3)網(wǎng)絡(luò)設(shè)備的檢索和探測(cè)
(4)網(wǎng)絡(luò)設(shè)備的初始化和注冊(cè)
(5)打開或關(guān)閉網(wǎng)絡(luò)設(shè)備
(6)發(fā)送網(wǎng)絡(luò)數(shù)據(jù)
(7)接收網(wǎng)絡(luò)數(shù)據(jù)
(8)中斷處理(在發(fā)送完數(shù)據(jù)時(shí),硬件向內(nèi)核產(chǎn)生一個(gè)中斷,告訴內(nèi)核數(shù)據(jù)已經(jīng)發(fā)送完畢,在網(wǎng)絡(luò)設(shè)備接收到數(shù)據(jù)時(shí),也要發(fā)生一個(gè)中斷,告訴內(nèi)核,數(shù)據(jù)已經(jīng)到達(dá),請(qǐng)及時(shí)處理)
(9)超時(shí)處理
(10)多播處理
(11)網(wǎng)絡(luò)設(shè)備的控制ioctl
而Linux網(wǎng)絡(luò)設(shè)備驅(qū)動(dòng)的主要功能就是網(wǎng)絡(luò)設(shè)備的初始化,網(wǎng)絡(luò)設(shè)備的配置,數(shù)據(jù)包的收發(fā)。
2. Linux網(wǎng)絡(luò)設(shè)備驅(qū)動(dòng)的接口函數(shù)
net_device結(jié)構(gòu)體存儲(chǔ)一個(gè)網(wǎng)絡(luò)接口的重要信息,它是系統(tǒng)中網(wǎng)絡(luò)設(shè)備的代表。
sk_buff是socket buffer,在網(wǎng)絡(luò)傳輸過程中起著重要的作用,內(nèi)核把數(shù)據(jù)包封裝成socket buffer向網(wǎng)絡(luò)硬件發(fā)送,當(dāng)網(wǎng)絡(luò)硬件接收到數(shù)據(jù)包時(shí),再把數(shù)據(jù)包封裝成socket buffer向內(nèi)核傳送。
注冊(cè)網(wǎng)絡(luò)設(shè)備:
int register_netdev(struct net_device *dev);//網(wǎng)絡(luò)設(shè)備與字符設(shè)備,塊設(shè)備不同,沒有主,次設(shè)備號(hào)
注銷網(wǎng)絡(luò)設(shè)備:
void unregister_netdev(struct net_device *dev);
返回網(wǎng)絡(luò)設(shè)備結(jié)構(gòu)體的private data:
void *netdev_priv(struct net_device *dev);
即返回我們定義的設(shè)備結(jié)構(gòu)體。
保存設(shè)備統(tǒng)計(jì)信息的結(jié)構(gòu)體
struct net_device_stats
打開發(fā)送隊(duì)列,能夠發(fā)送數(shù)據(jù)包,在open()中調(diào)用
netif_start_queue(struct net_device *dev);
關(guān)閉發(fā)送隊(duì)列,在stop()中調(diào)用
netif_stop_queue(struct net_device* dev);
重新打開隊(duì)列,一般在關(guān)閉隊(duì)列之后重啟隊(duì)列
netif_wake_queue(struct net_device *dev);
當(dāng)數(shù)據(jù)到達(dá)時(shí),通知內(nèi)核數(shù)據(jù)包到達(dá)
void netif_rx(struct sk_buff *skb);
分配一個(gè)sk_buff結(jié)構(gòu)體
struct sk_buff *dev_alloc_skb(unsigned int len);
釋放sk_buff結(jié)構(gòu)體
void dev_kfree_skb(struct sk_buff *skb);
從數(shù)據(jù)的尾部擴(kuò)展len長(zhǎng)度的空間,為了把數(shù)據(jù)放到skb的尾部
unsigned char* skb_put(struct sk_buff *skb,int len);
SIOCDEVPRIVATE 可用ioctl執(zhí)行的16個(gè)命令的第一個(gè)命令
最后一個(gè)是SIOCDEVPRIVATE+15
3.下面給出一個(gè)虛擬硬件的網(wǎng)絡(luò)驅(qū)動(dòng)的例子
#undef PDEBUG???????????? /* undef it, just in case */
#ifdef SNULL_DEBUG
#? ifdef __KERNEL__
???? /* This one if debugging is on, and kernel space */
#??? define PDEBUG(fmt, args...) printk( KERN_DEBUG "snull: " fmt, ## args)
#? else
???? /* This one for user space */
#??? define PDEBUG(fmt, args...) fprintf(stderr, fmt, ## args)
#? endif
#else
#? define PDEBUG(fmt, args...) /* not debugging: nothing */
#endif
#undef PDEBUGG
#define PDEBUGG(fmt, args...) /* nothing: it's a placeholder */
/* These are the flags in the statusword */
#define SNULL_RX_INTR 0x0001
#define SNULL_TX_INTR 0x0002
/* Default timeout period */
#define SNULL_TIMEOUT 6?? /* In jiffies */
#include
#include
#include /* printk() */
#include /* kmalloc() */
#include ? /* error codes */
#include ? /* size_t */
#include /* mark_bh */
#include
#include ?? /* struct device, and other headers */
#include /* eth_type_trans */
#include ????????? /* struct iphdr */
#include ???????? /* struct tcphdr */
#include
#include
#include
#include
#include
static int lockup = 0;
static int timeout = SNULL_TIMEOUT;
struct net_device snull_devs[2];//這里定義兩個(gè)設(shè)備,一個(gè)是snull0,一個(gè)是snull1
//網(wǎng)絡(luò)設(shè)備結(jié)構(gòu)體,作為net_device->priv
struct snull_priv {
??? struct net_device_stats stats;//有用的統(tǒng)計(jì)信息
??? int status;//網(wǎng)絡(luò)設(shè)備的狀態(tài)信息,是發(fā)完數(shù)據(jù)包,還是接收到網(wǎng)絡(luò)數(shù)據(jù)包
??? int rx_packetlen;//接收到的數(shù)據(jù)包長(zhǎng)度
??? u8 *rx_packetdata;//接收到的數(shù)據(jù)
??? int tx_packetlen;//發(fā)送的數(shù)據(jù)包長(zhǎng)度
??? u8 *tx_packetdata;//發(fā)送的數(shù)據(jù)
??? struct sk_buff *skb;//socket buffer結(jié)構(gòu)體,網(wǎng)絡(luò)各層之間傳送數(shù)據(jù)都是通過這個(gè)結(jié)構(gòu)體來(lái)實(shí)現(xiàn)的
??? spinlock_t lock;//自旋鎖
};
void snull_tx_timeout (struct net_device *dev);
//網(wǎng)絡(luò)接口的打開函數(shù)
int snull_open(struct net_device *dev)
?{
??? printk("call snull_open/n");
??? memcpy(dev->dev_addr, "/0SNUL0", ETH_ALEN);//分配一個(gè)硬件地址,ETH_ALEN是網(wǎng)絡(luò)設(shè)備硬件地址的長(zhǎng)度
???
??? netif_start_queue(dev);//打開傳輸隊(duì)列,這樣才能進(jìn)行數(shù)據(jù)傳輸
??? return 0;
}
int snull_release(struct net_device *dev)
{
??? printk("call snull_release/n");
??? netif_stop_queue(dev); //當(dāng)網(wǎng)絡(luò)接口關(guān)閉的時(shí)候,調(diào)用stop方法,這個(gè)函數(shù)表示不能再發(fā)送數(shù)據(jù)
??? return 0;
}
//接包函數(shù)
void snull_rx(struct net_device *dev, int len, unsigned char *buf)
{
???
??? struct sk_buff *skb;
??? struct snull_priv *priv = (struct snull_priv *) dev->priv;
?
??? /*
???? * The packet has been retrieved from the transmission
???? * medium. Build an skb around it, so upper layers can handle it
???? */
??? skb = dev_alloc_skb(len+2);//分配一個(gè)socket buffer,并且初始化skb->data,skb->tail和skb->head
??? if (!skb) {
??????? printk("snull rx: low on mem - packet dropped/n");
??????? priv->stats.rx_dropped++;
??????? return;
??? }
??? skb_reserve(skb, 2); /* align IP on 16B boundary */?
??? memcpy(skb_put(skb, len), buf, len);//skb_put是把數(shù)據(jù)寫入到socket buffer
??? /* Write metadata, and then pass to the receive level */
??? skb->dev = dev;
??? skb->protocol = eth_type_trans(skb, dev);//返回的是協(xié)議號(hào)
??? skb->ip_summed = CHECKSUM_UNNECESSARY; //此處不校驗(yàn)
??? priv->stats.rx_packets++;//接收到包的個(gè)數(shù)+1
????
??? priv->stats.rx_bytes += len;//接收到包的長(zhǎng)度
??? netif_rx(skb);//通知內(nèi)核已經(jīng)接收到包,并且封裝成socket buffer傳到上層
??? return;
}
???
???????
/*
?* The typical interrupt entry point
?*/
//中斷處理,此程序中沒有硬件,因此,沒有真正的硬件中斷,只是模擬中斷,在發(fā)送完網(wǎng)絡(luò)數(shù)據(jù)包之后,會(huì)產(chǎn)生中斷
//用來(lái)通知內(nèi)核已經(jīng)發(fā)送完數(shù)據(jù)包,當(dāng)新的數(shù)據(jù)包到達(dá)網(wǎng)絡(luò)接口時(shí),會(huì)發(fā)生中斷,通知新的數(shù)據(jù)包已經(jīng)到來(lái)了
void snull_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
??
??? int statusword;//用來(lái)標(biāo)識(shí)是發(fā)送完畢還是接收到新的數(shù)據(jù)包
??? struct snull_priv *priv;
??? /*
???? * As usual, check the "device" pointer for shared handlers.
???? * Then assign "struct device *dev"
???? */
??? struct net_device *dev = (struct net_device *)dev_id;
??? /* ... and check with hw if it's really ours */
??? if (!dev /*paranoid*/ ) return;
??? /* Lock the device */
??? priv = (struct snull_priv *) dev->priv;
??? spin_lock(&priv->lock);
??? /* retrieve statusword: real netdevices use I/O instructions */
??? statusword = priv->status;
??? if (statusword & SNULL_RX_INTR) {//如果是接收
??????? /* send it to snull_rx for handling */
??????? snull_rx(dev, priv->rx_packetlen, priv->rx_packetdata);
??? }
??? if (statusword & SNULL_TX_INTR) {//如果發(fā)送完畢
??????? /* a transmission is over: free the skb */
??????? priv->stats.tx_packets++;
??????? priv->stats.tx_bytes += priv->tx_packetlen;
??????? dev_kfree_skb(priv->skb);//釋放skb 套接字緩沖區(qū)
??? }
??? /* Unlock the device and we are done */
??? spin_unlock(&priv->lock);
??? return;
}
/*
?* Transmit a packet (low level interface)
?*/
//真正的處理的發(fā)送數(shù)據(jù)包
//模擬從一個(gè)網(wǎng)絡(luò)向另一個(gè)網(wǎng)絡(luò)發(fā)送數(shù)據(jù)包
void snull_hw_tx(char *buf, int len, struct net_device *dev)
{
?
??
?? /*
???? * This function deals with hw details. This interface loops
???? * back the packet to the other snull interface (if any).
???? * In other words, this function implements the snull behaviour,
???? * while all other procedures are rather device-independent
???? */
??? struct iphdr *ih;//ip頭部
??? struct net_device *dest;//目標(biāo)設(shè)備結(jié)構(gòu)體,net_device存儲(chǔ)一個(gè)網(wǎng)絡(luò)接口的重要信息,是網(wǎng)絡(luò)驅(qū)動(dòng)程序的核心
??? struct snull_priv *priv;
??? u32 *saddr, *daddr;//源設(shè)備地址與目標(biāo)設(shè)備地址
??? /* I am paranoid. Ain't I? */
??? if (len < sizeof(struct ethhdr) + sizeof(struct iphdr)) {
??????? printk("snull: Hmm... packet too short (%i octets)/n",
?????????????? len);
??????? return;
??? }
??? /*
???? * Ethhdr is 14 bytes, but the kernel arranges for iphdr
???? * to be aligned (i.e., ethhdr is unaligned)
???? */
??? ih = (struct iphdr *)(buf+sizeof(struct ethhdr));
??? saddr = &ih->saddr;
??? daddr = &ih->daddr;
//在同一臺(tái)機(jī)器上模擬兩個(gè)網(wǎng)絡(luò),不同的網(wǎng)段地址,進(jìn)行發(fā)送網(wǎng)絡(luò)數(shù)據(jù)包與接收網(wǎng)絡(luò)數(shù)據(jù)包
??? ((u8 *)saddr)[2] ^= 1; /* change the third octet (class C) ^是位異或操作符把第三個(gè)部分的網(wǎng)絡(luò)地址與1進(jìn)行異或,由于同一網(wǎng)絡(luò)的數(shù)據(jù)不進(jìn)行轉(zhuǎn)發(fā)*/?
??? ((u8 *)daddr)[2] ^= 1;
??? ih->check = 0;???????? /* and rebuild the checksum (ip needs it) */
??? ih->check = ip_fast_csum((unsigned char *)ih,ih->ihl);
??? if (dev == snull_devs)
??????? PDEBUGG("%08x:%05i --> %08x:%05i/n",
?????????????? ntohl(ih->saddr),ntohs(((struct tcphdr *)(ih+1))->source),
?????????????? ntohl(ih->daddr),ntohs(((struct tcphdr *)(ih+1))->dest));
??? else
??????? PDEBUGG("%08x:%05i <-- %08x:%05i/n",
?????????????? ntohl(ih->daddr),ntohs(((struct tcphdr *)(ih+1))->dest),
?????????????? ntohl(ih->saddr),ntohs(((struct tcphdr *)(ih+1))->source));
??? /*
???? * Ok, now the packet is ready for transmission: first simulate a
???? * receive interrupt on the twin device, then? a
???? * transmission-done on the transmitting device
???? */
??? dest = snull_devs + (dev==snull_devs ? 1 : 0);//如果dev是0,那么dest就是1,如果dev是1,那么dest是0
??? priv = (struct snull_priv *) dest->priv;//目標(biāo)dest中的priv
??? priv->status = SNULL_RX_INTR;
??? priv->rx_packetlen = len;
??? priv->rx_packetdata = buf;
??? snull_interrupt(0, dest, NULL);
??? priv = (struct snull_priv *) dev->priv;
??? priv->status = SNULL_TX_INTR;
??? priv->tx_packetlen = len;
??? priv->tx_packetdata = buf;
??? if (lockup && ((priv->stats.tx_packets + 1) % lockup) == 0) {
??????? /* Simulate a dropped transmit interrupt */
??????? netif_stop_queue(dev);
??
??????? PDEBUG("Simulate lockup at %ld, txp %ld/n", jiffies,
??????????????????????? (unsigned long) priv->stats.tx_packets);
??? }
??? else
??????? snull_interrupt(0, dev, NULL);
}
?
/*
?* Transmit a packet (called by the kernel)
?*/
//發(fā)包函數(shù)
int snull_tx(struct sk_buff *skb, struct net_device *dev)
{
??
??? int len;
??? char *data;
??? struct snull_priv *priv = (struct snull_priv *) dev->priv;
??? if ( skb == NULL) {
??????? PDEBUG("tint for %p,? skb %p/n", dev,? skb);
??????? snull_tx_timeout (dev);
??????? if (skb == NULL)
??????????? return 0;
??? }
??? len = skb->len < ETH_ZLEN ? ETH_ZLEN : skb->len;//ETH_ZLEN是所發(fā)的最小數(shù)據(jù)包的長(zhǎng)度
??? data = skb->data;//將要發(fā)送的數(shù)據(jù)包中數(shù)據(jù)部分
??? dev->trans_start = jiffies; //保存當(dāng)前的發(fā)送時(shí)間
??? priv->skb = skb;
??? snull_hw_tx(data, len, dev);//真正的發(fā)送函數(shù)
?? return 0; /* Our simple device can not fail */
}
/*
?* Deal with a transmit timeout.
?*/
//一旦超出watchdog_timeo就會(huì)調(diào)用snull_tx_timeout
void snull_tx_timeout (struct net_device *dev)
{
?? printk("call snull_tx_timeout/n");
??? struct snull_priv *priv = (struct snull_priv *) dev->priv;
??? PDEBUG("Transmit timeout at %ld, latency %ld/n", jiffies,
??????????????????? jiffies - dev->trans_start);
??? priv->status = SNULL_TX_INTR;
??? snull_interrupt(0, dev, NULL);//超時(shí)后發(fā)生中斷
??? priv->stats.tx_errors++;//發(fā)送的錯(cuò)誤數(shù)
??? netif_wake_queue(dev); //為了再次發(fā)送數(shù)據(jù),調(diào)用此函數(shù),重新啟動(dòng)發(fā)送隊(duì)列
??? return;
}
?
/*
?* Ioctl commands
?*/
int snull_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
?
??? PDEBUG("ioctl/n");
??? return 0;
}
/*
?* Return statistics to the caller
?*/
struct net_device_stats *snull_stats(struct net_device *dev)
{
??? struct snull_priv *priv = (struct snull_priv *) dev->priv;
??? return &priv->stats;//得到統(tǒng)計(jì)資料信息
}
//設(shè)備初始化函數(shù)
int snull_init(struct net_device *dev)
{
?? printk("call snull_init/n");
??? /*
???? * Then, assign other fields in dev, using ether_setup() and some
???? * hand assignments
???? */
??? ether_setup(dev);//填充一些以太網(wǎng)中的設(shè)備結(jié)構(gòu)體的項(xiàng)
??? dev->open??????????? = snull_open;
??? dev->stop??????????? = snull_release;
??? //dev->set_config????? = snull_config;
??? dev->hard_start_xmit = snull_tx;
??? dev->do_ioctl??????? = snull_ioctl;
??? dev->get_stats?????? = snull_stats;
??? //dev->change_mtu????? = snull_change_mtu;?
?? // dev->rebuild_header? = snull_rebuild_header;
??? //dev->hard_header???? = snull_header;
??? dev->tx_timeout???? = snull_tx_timeout;//超時(shí)處理
??? dev->watchdog_timeo = timeout;
??? /* keep the default flags, just add NOARP */
??? dev->flags?????????? |= IFF_NOARP;
??? dev->hard_header_cache = NULL;????? /* Disable caching */
??? SET_MODULE_OWNER(dev);
??? /*
???? * Then, allocate the priv field. This encloses the statistics
???? * and a few private fields.
???? */
//為priv分配內(nèi)存
??? dev->priv = kmalloc(sizeof(struct snull_priv), GFP_KERNEL);
? if (dev->priv == NULL)
?????? return -ENOMEM;
??? memset(dev->priv, 0, sizeof(struct snull_priv));
??? spin_lock_init(& ((struct snull_priv *) dev->priv)->lock);
??? return 0;
}
struct net_device snull_devs[2] = {
??? { init: snull_init, },? /* init, nothing more */
??? { init: snull_init, }
};
int snull_init_module(void)
{
?? int i,result=0;
?? strcpy(snull_devs[0].name,"snull0");//net_device結(jié)構(gòu)體中的name表示設(shè)備名
?? strcpy(snull_devs[1].name,"snull1");//即定義了兩個(gè)設(shè)備,snull0與snull1
??? for (i=0; i<2;? i++)
??????? if ( (result = register_netdev(snull_devs+i)) )//注冊(cè)設(shè)備
??????????? printk("snull: error %i registering device /"%s/"/n",
?????????????????? result, snull_devs[i].name);
???? return 0;
}
void snull_cleanup(void)
{
??? int i;
?
??? for (i=0; i<2;? i++) {
??????? kfree(snull_devs[i].priv);
??????? unregister_netdev(snull_devs+i);
??? }
??? return;
}
module_init(snull_init_module);
module_exit(snull_cleanup);
分析:
這個(gè)例子中包括了以下部分:
(1)網(wǎng)絡(luò)設(shè)備初始化 snull_init
(2)發(fā)送數(shù)據(jù)包函數(shù)snull_tx,而真正的發(fā)送數(shù)據(jù)包函數(shù)是snull_hw_tx,在snull_hw_tx,目標(biāo)設(shè)備dest收到數(shù)據(jù)包產(chǎn)生中斷,然后再向源設(shè)備發(fā)送數(shù)據(jù)包,發(fā)送完之后也產(chǎn)生中斷
(3)接收數(shù)據(jù)包的函數(shù)snull_rx
(4)中斷處理snull_interrupt
(5)網(wǎng)絡(luò)超時(shí)處理snull_timeout
(6)網(wǎng)絡(luò)設(shè)備的打開snull_open
測(cè)試:
(1)生成snull.ko, insmod snull.ko
(2)為兩個(gè)網(wǎng)絡(luò)設(shè)備分配IP:
ifconfig snull0 192.168.0.1
ifconfig snull1 192.168.1.2
可以看出,兩個(gè)網(wǎng)絡(luò)設(shè)備在不同的網(wǎng)段
ping 192.168.0.2 由于目標(biāo)daddr經(jīng)過((u8 *)daddr)[2] ^= 1,變成 192.168.1.2,相當(dāng)于ping 192.168.1.2.
而源IP 192.168.0.1 經(jīng)過((u8 *)saddr)[2] ^= 1,變成192.168.1.1,那么dest設(shè)備發(fā)送的數(shù)據(jù)包地址是192.168.1.1,相當(dāng)于發(fā)送給192.168.0.1
如果不經(jīng)過這樣處理,直接ping 192.168.1.2 是不能ping 通的,由于不在同一個(gè)網(wǎng)段上。
可以測(cè)試一下car /var/log/messages.
關(guān)于Linux網(wǎng)絡(luò)驅(qū)動(dòng)就介紹到這里了。
?
評(píng)論
查看更多