背景
最近在排查一個網絡問題,ifconfig eth0 up 后,網卡link up比較慢。因此,分析了下從ifconfig up 到網絡驅動的調用流程。這里順便作個記錄。
ifconfig eth0 up 調用的是busybox 的命令,因此從busybox 源碼入手,逐步分析下調用流程。代碼介紹文件位于:networking/ifenslave.c
ifconfig eth0 up
ifconfig eth0 up 和 ifconfig eth0 down 分別對應busybox 的set_if_up()和set_if_down().
staticintset_if_down(char*ifname,intflags) { intres=set_if_flags(ifname,flags&~IFF_UP); if(res) bb_perror_msg("%s:can'tdown",ifname); returnres; }
staticintset_if_up(char*ifname,intflags) { intres=set_if_flags(ifname,flags|IFF_UP); if(res) bb_perror_msg("%s:can'tup",ifname); returnres; }
比如,當我們敲ifconfig eth0 down時,實則就是調用:
set_if_down("eth0",master_flags.ifr_flags);
set_if_flags()會將網卡名,up / down 標志位flags通過ioctl命令SIOCSIFFLAGS 傳遞給內核網卡驅動。
staticintset_if_flags(char*ifname,intflags) { structifreqifr; ifr.ifr_flags=flags; returnset_ifrname_and_do_ioctl(SIOCSIFFLAGS,&ifr,ifname); }
dev_ifsioc
接著深入到內核代碼中,看下SIOCSIFFLAGS命令在哪里實現。代碼位于kernel etcoredev_ioctl.c。
staticintdev_ifsioc(structnet*net,structifreq*ifr,unsignedintcmd) { interr; structnet_device*dev=__dev_get_by_name(net,ifr->ifr_name); conststructnet_device_ops*ops; if(!dev) return-ENODEV; ops=dev->netdev_ops; switch(cmd){ caseSIOCSIFFLAGS:/*Setinterfaceflags*/ returndev_change_flags(dev,ifr->ifr_flags); caseSIOCSIFMETRIC:/*Setthemetricontheinterface (currentlyunused)*/ return-EOPNOTSUPP; ................... } returnerr; }
dev_ifsioc()會調用__dev_get_by_name()根據 網卡名遍歷 net鏈表,如果匹配到則返回net_device結構體指針。接著,SIOCSIFFLAGS會調用到dev_change_flags(),最后調用到__dev_change_flags()。
dev_change_flags
intdev_change_flags(structnet_device*dev,unsignedintflags) { intret; unsignedintchanges,old_flags=dev->flags,old_gflags=dev->gflags; ret=__dev_change_flags(dev,flags); if(ret0) ??return?ret; ?changes?=?(old_flags?^?dev->flags)|(old_gflags^dev->gflags); __dev_notify_flags(dev,old_flags,changes); returnret; }
int__dev_change_flags(structnet_device*dev,unsignedintflags) { unsignedintold_flags=dev->flags; intret; ASSERT_RTNL(); /* *Settheflagsonourdevice. */ dev->flags=(flags&(IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP| IFF_DYNAMIC|IFF_MULTICAST|IFF_PORTSEL| IFF_AUTOMEDIA))| (dev->flags&(IFF_UP|IFF_VOLATILE|IFF_PROMISC| IFF_ALLMULTI)); /* *Loadinthecorrectmulticastlistnowtheflagshavechanged. */ if((old_flags^flags)&IFF_MULTICAST) dev_change_rx_flags(dev,IFF_MULTICAST); dev_set_rx_mode(dev); /* *Havewedownedtheinterface.WehandleIFF_UPourselves *accordingtouserattemptstosetit,ratherthanblindly *settingit. */ ret=0; /*兩個標識有一個是IFF_UP*/ if((old_flags^flags)&IFF_UP) ret=((old_flags&IFF_UP)?__dev_close:__dev_open)(dev);//通過flags判斷調用__dev_close還是__dev_open if((flags^dev->gflags)&IFF_PROMISC){ intinc=(flags&IFF_PROMISC)?1:-1; unsignedintold_flags=dev->flags; dev->gflags^=IFF_PROMISC; if(__dev_set_promiscuity(dev,inc,false)>=0) if(dev->flags!=old_flags) dev_set_rx_mode(dev); } /*NOTE:orderofsynchronizationofIFF_PROMISCandIFF_ALLMULTI isimportant.Some(broken)driverssetIFF_PROMISC,when IFF_ALLMULTIisrequestednotaskingusandnotreporting. */ if((flags^dev->gflags)&IFF_ALLMULTI){ intinc=(flags&IFF_ALLMULTI)?1:-1; dev->gflags^=IFF_ALLMULTI; __dev_set_allmulti(dev,inc,false); } returnret; }
在__dev_change_flags(dev, flags)函數中,通過判斷flag的IFF_UP位上的值是否相反,來實現是調用__dev_close()還是__dev_open()來開關eth0。
__dev_close
__dev_close中會將當前的net_device加入到等待設備關閉列表中。
staticint__dev_close(structnet_device*dev) { intretval; LIST_HEAD(single); list_add(&dev->close_list,&single); retval=__dev_close_many(&single); list_del(&single); returnretval; }
__dev_close_many
__dev_close_many通知設備正在關閉,等待未發送完的數據發送完,最后清除開啟標記。
staticint__dev_close_many(structlist_head*head) { structnet_device*dev; ASSERT_RTNL(); might_sleep(); list_for_each_entry(dev,head,close_list){ /*Temporarilydisablenetpolluntiltheinterfaceisdown*/ /*禁用netpoll*/ netpoll_poll_disable(dev); /*通知設備正在關閉*/ call_netdevice_notifiers(NETDEV_GOING_DOWN,dev); /*清除start標志位*/ clear_bit(__LINK_STATE_START,&dev->state); /*Synchronizetoscheduledpoll.Wecannottouchpolllist,it *canbeevenondifferentcpu.Sojustclearnetif_running(). * *dev->stop()willinvokenapi_disable()onallofit's *napi_structinstancesonthisdevice. */ smp_mb__after_atomic();/*Commitnetif_running().*/ } /*未發送完的數據發送完*/ dev_deactivate_many(head); list_for_each_entry(dev,head,close_list){ conststructnet_device_ops*ops=dev->netdev_ops; /* *Callthedevicespecificclose.Thiscannotfail. *OnlyifdeviceisUP * *WeallowittobecalledevenafteraDETACHhot-plug *event. */ /*調用設備關閉操作*/ if(ops->ndo_stop) ops->ndo_stop(dev); /*標記設備關閉*/ dev->flags&=~IFF_UP; /*啟用netpoll*/ netpoll_poll_enable(dev); } return0; }
ndo_stop
ndo_stop為關閉網卡時,不同網卡驅動注冊的不同的關閉函數,我們以海思的網卡驅動為例,分析下ndo_stop函數的實現。代碼位于kerneldrivers etethernethisiliconhnshns_enet.c。
hns_nic_net_stop
staticinthns_nic_net_stop(structnet_device*ndev) { hns_nic_net_down(ndev); return0; }
hns_nic_net_down
staticvoidhns_nic_net_down(structnet_device*ndev) { inti; structhnae_ae_ops*ops; structhns_nic_priv*priv=netdev_priv(ndev); if(test_and_set_bit(NIC_STATE_DOWN,&priv->state)) return; (void)del_timer_sync(&priv->service_timer); netif_tx_stop_all_queues(ndev); netif_carrier_off(ndev); netif_tx_disable(ndev); priv->link=0; if(priv->phy) phy_stop(priv->phy); ops=priv->ae_handle->dev->ops; if(ops->stop) ops->stop(priv->ae_handle); netif_tx_stop_all_queues(ndev); for(i=priv->ae_handle->q_num-1;i>=0;i--){ hns_nic_ring_close(ndev,i); hns_nic_ring_close(ndev,i+priv->ae_handle->q_num); /*cleantxbuffers*/ hns_nic_tx_clr_all_bufs(priv->ring_data+i); } }
hns_nic_net_down()中會調用netif_carrier_off()通知內核子系統網絡斷開。下面我們詳細分析下netif_carrier_off()的實現。
netif_carrier_off()
voidnetif_carrier_off(structnet_device*dev) { /*設置網卡為載波斷開狀態即nocarrier狀態,上行時軟中斷下半部讀到該狀態不會進行網卡收包*/ if(!test_and_set_bit(__LINK_STATE_NOCARRIER,&dev->state)){ if(dev->reg_state==NETREG_UNINITIALIZED) return; /*增加設備改變狀態*/ atomic_inc(&dev->carrier_changes); /*加入事件處理隊列進行處理*/ linkwatch_fire_event(dev); } }
linkwatch_fire_event()
linkwatch_fire_event()函數將設備加入到事件隊列,并且進行事件調度,調度中會根據是否為緊急事件做不同處理。
voidlinkwatch_fire_event(structnet_device*dev) { /*判斷是否是緊急處理的事件*/ boolurgent=linkwatch_urgent_event(dev); /*判斷是否是緊急處理的事件*/ if(!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING,&dev->state)){ /*添加事件到事件列表*/ linkwatch_add_event(dev); }elseif(!urgent) /*設備以前已經設置了pending標記,不是緊急事件,直接返回*/ return; /*事件調度*/ linkwatch_schedule_work(urgent); }
linkwatch_urgent_event()
linkwatch_urgent_event()判斷是否是否需要緊急處理。
staticboollinkwatch_urgent_event(structnet_device*dev) { /*設備未運行,非緊急*/ if(!netif_running(dev)) returnfalse; /*設備的索引號與連接索引號不等,緊急*/ if(dev->ifindex!=dev_get_iflink(dev)) returntrue; /*設備作為teamport,緊急*/ if(dev->priv_flags&IFF_TEAM_PORT) returntrue; /*連接與否&&發送隊列排隊規則改變與否*/ returnnetif_carrier_ok(dev)&&qdisc_tx_changing(dev); }
linkwatch_add_event()
linkwatch_add_event()將設備加入到事件處理鏈表。
staticvoidlinkwatch_add_event(structnet_device*dev) { unsignedlongflags; spin_lock_irqsave(&lweventlist_lock,flags); /*若未添加,則添加設備到事件列表*/ if(list_empty(&dev->link_watch_list)){ list_add_tail(&dev->link_watch_list,&lweventlist); dev_hold(dev); } spin_unlock_irqrestore(&lweventlist_lock,flags); }
linkwatch_schedule_work()
linkwatch_schedule_work()對事件處理進行調度,緊急事件立即執行,非緊急事件延后執行。
staticvoidlinkwatch_schedule_work(inturgent) { unsignedlongdelay=linkwatch_nextevent-jiffies; /*已經設置了緊急標記,則返回*/ if(test_bit(LW_URGENT,&linkwatch_flags)) return; /*需要緊急調度*/ if(urgent){ /*之前設置了,則返回*/ if(test_and_set_bit(LW_URGENT,&linkwatch_flags)) return; /*未設置緊急,則立即執行*/ delay=0; } /*如果大于1s則立即執行*/ if(delay>HZ) delay=0; /*如果設置了緊急標記,則立即執行*/ if(test_bit(LW_URGENT,&linkwatch_flags)) mod_delayed_work(system_wq,&linkwatch_work,0); else /*未設置緊急標記,則按照delay執行*/ schedule_delayed_work(&linkwatch_work,delay); }
__linkwatch_run_queue()
__linkwatch_run_queue()完成對事件調度隊列中設備的處理。
staticvoid__linkwatch_run_queue(inturgent_only) { structnet_device*dev; LIST_HEAD(wrk); /* *Limitthenumberoflinkwatcheventstoone *persecondsothatarunawaydriverdoesnot *causeastormofmessagesonthenetlink *socket.Thislimitdoesnotapplytoupevents *whilethedeviceqdiscisdown. */ /*已達到調度時間*/ if(!urgent_only) linkwatch_nextevent=jiffies+HZ; /*Limitwrap-aroundeffectondelay.*/ /* 未到達調度時間,并且下一次調度在當前時間的1s以后 那么設置調度時間是當前時間 */ elseif(time_after(linkwatch_nextevent,jiffies+HZ)) linkwatch_nextevent=jiffies; /*清除緊急標識*/ clear_bit(LW_URGENT,&linkwatch_flags); spin_lock_irq(&lweventlist_lock); list_splice_init(&lweventlist,&wrk); /*遍歷鏈表*/ while(!list_empty(&wrk)){ /*獲取設備*/ dev=list_first_entry(&wrk,structnet_device,link_watch_list); /*從鏈表移除設備*/ list_del_init(&dev->link_watch_list); /*未到達調度時間&&不需要緊急處理*/ if(urgent_only&&!linkwatch_urgent_event(dev)){ /*添加到鏈表尾部*/ list_add_tail(&dev->link_watch_list,&lweventlist); /*繼續處理*/ continue; } spin_unlock_irq(&lweventlist_lock); /*處理設備*/ linkwatch_do_dev(dev); spin_lock_irq(&lweventlist_lock); } /*鏈表有未處理事件,則以非緊急狀態調度隊列*/ if(!list_empty(&lweventlist)) linkwatch_schedule_work(0); spin_unlock_irq(&lweventlist_lock); }
linkwatch_do_dev()
linkwatch_do_dev()完成對某個設備的狀態改變處理。
staticvoidlinkwatch_do_dev(structnet_device*dev) { /* *Makesuretheabovereadiscompletesinceitcanbe *rewrittenassoonasweclearthebitbelow. */ smp_mb__before_atomic(); /*Weareabouttohandlethisdevice, *soneweventscanbeaccepted */ /*清除pending標記*/ clear_bit(__LINK_STATE_LINKWATCH_PENDING,&dev->state); rfc2863_policy(dev); /*如果設備啟動狀態*/ if(dev->flags&IFF_UP){ /*鏈路連接*/ if(netif_carrier_ok(dev)) /*啟用排隊規則*/ dev_activate(dev); else /*關閉排隊規則*/ dev_deactivate(dev); /*設備狀態改變處理,執行netdev_chain上設備狀態變更回調*/ netdev_state_change(dev); } dev_put(dev); }
phy_stop()
最后,hns_nic_net_down()中會調用phy_stop()將網卡link down。
voidphy_stop(structphy_device*phydev) { mutex_lock(&phydev->lock); if(PHY_HALTED==phydev->state) gotoout_unlock; if(phy_interrupt_is_valid(phydev)){ /*DisablePHYInterrupts*/ phy_config_interrupt(phydev,PHY_INTERRUPT_DISABLED); /*Clearanypendinginterrupts*/ phy_clear_interrupt(phydev); } phydev->state=PHY_HALTED; out_unlock: mutex_unlock(&phydev->lock); /*Cannotcallflush_scheduled_work()hereasdesiredbecause *ofrtnl_lock(),butPHY_HALTEDshallguaranteephy_change() *willnotreenableinterrupts. */ }
phy_stop()將phydev->state設置為PHY_HALTED,將網卡關閉。
__dev_open
__dev_open為設備啟用核心函數,該函數打開eth0,設置啟用標記,并且設置接收模式,排隊規則等。
staticint__dev_open(structnet_device*dev) { conststructnet_device_ops*ops=dev->netdev_ops; intret; ASSERT_RTNL(); /*設備不可用*/ if(!netif_device_present(dev)) return-ENODEV; /*Blocknetpollfromtryingtodoanyrxpathservicing. *Ifwedon'tdothisthereisachancendo_poll_controller *orndo_pollmayberunningwhileweopenthedevice */ /*禁用netpoll*/ netpoll_poll_disable(dev); /*設備打開前通知*/ ret=call_netdevice_notifiers(NETDEV_PRE_UP,dev); ret=notifier_to_errno(ret); if(ret) returnret; /*設置設備打開標記,設備將設置IFF_UP標志位*/ set_bit(__LINK_STATE_START,&dev->state); /*校驗地址*/ if(ops->ndo_validate_addr) ret=ops->ndo_validate_addr(dev); /*執行打開*/ if(!ret&&ops->ndo_open) ret=ops->ndo_open(dev); /*啟用netpoll*/ netpoll_poll_enable(dev); /*失敗,清除打開標記*/ if(ret) clear_bit(__LINK_STATE_START,&dev->state); /*設備打開操作*/ else{ /*設置打開標記*/ dev->flags|=IFF_UP; /*設置接收模式*/ dev_set_rx_mode(dev); /*初始化排隊規則*/ dev_activate(dev); /*加入設備數據到熵池*/ add_device_randomness(dev->dev_addr,dev->addr_len); } returnret; }
hns_nic_net_open()
我們以海思的網卡驅動為例,分析下ndo_open()函數的實現。代碼位于kerneldrivers etethernethisiliconhnshns_enet.c。
staticinthns_nic_net_open(structnet_device*ndev) { structhns_nic_priv*priv=netdev_priv(ndev); structhnae_handle*h=priv->ae_handle; intret; if(test_bit(NIC_STATE_TESTING,&priv->state)) return-EBUSY; priv->link=0; netif_carrier_off(ndev); /*設置txqueue的個數*/ ret=netif_set_real_num_tx_queues(ndev,h->q_num); if(ret0)?{ ??netdev_err(ndev,?"netif_set_real_num_tx_queues?fail,?ret=%d! ", ??????ret); ??return?ret; ?} ?/*設置rx?queue的個數*/ ?ret?=?netif_set_real_num_rx_queues(ndev,?h->q_num); if(ret0)?{ ??netdev_err(ndev, ??????"netif_set_real_num_rx_queues?fail,?ret=%d! ",?ret); ??return?ret; ?} ?/*啟動網卡*/ ?ret?=?hns_nic_net_up(ndev); ?if?(ret)?{ ??netdev_err(ndev, ??????"hns?net?up?fail,?ret=%d! ",?ret); ??return?ret; ?} ?return?0; }
hns_nic_net_up()
staticinthns_nic_net_up(structnet_device*ndev) { structhns_nic_priv*priv=netdev_priv(ndev); structhnae_handle*h=priv->ae_handle; inti,j,k; intret; /*初始化中斷,并設置中斷函數為hns_irq_handle,每個rx和txqueue都對應一個中斷*/ ret=hns_nic_init_irq(priv); if(ret!=0){ netdev_err(ndev,"hnsinitirqfailed!ret=%d ",ret); returnret; } for(i=0;iq_num*2;i++){ /*使能中斷,使能napi*/ ret=hns_nic_ring_open(ndev,i); if(ret) gotoout_has_some_queues; } for(k=0;kq_num;k++) h->dev->ops->toggle_queue_status(h->qs[k],1); /*設置mac地址*/ ret=h->dev->ops->set_mac_addr(h,ndev->dev_addr); if(ret) gotoout_set_mac_addr_err; /*hns的start函數為null*/ ret=h->dev->ops->start?h->dev->ops->start(h):0; if(ret) gotoout_start_err; if(priv->phy) /*啟動phy*/ phy_start(priv->phy); clear_bit(NIC_STATE_DOWN,&priv->state); /*修改time每一秒到期一次*/ (void)mod_timer(&priv->service_timer,jiffies+SERVICE_TIMER_HZ); return0; out_start_err: netif_stop_queue(ndev); out_set_mac_addr_err: for(k=0;kq_num;k++) h->dev->ops->toggle_queue_status(h->qs[k],0); out_has_some_queues: for(j=i-1;j>=0;j--) hns_nic_ring_close(ndev,j); set_bit(NIC_STATE_DOWN,&priv->state); returnret; }
phy_start()
最后會調用到phy_start()啟動網卡。
voidphy_start(structphy_device*phydev) { booldo_resume=false; interr=0; mutex_lock(&phydev->lock); switch(phydev->state){ casePHY_STARTING: phydev->state=PHY_PENDING; break; casePHY_READY: phydev->state=PHY_UP; break; casePHY_HALTED: /*makesureinterruptsarere-enabledforthePHY*/ err=phy_enable_interrupts(phydev); if(err0) ???break; ??phydev->state=PHY_RESUMING; do_resume=true; break; default: break; } mutex_unlock(&phydev->lock); /*ifphywassuspended,bringthephysicallinkupagain*/ if(do_resume) phy_resume(phydev); }
審核編輯:劉清
-
網卡驅動
+關注
關注
0文章
35瀏覽量
17690
原文標題:【網絡驅動】ifconfig up 后內核網絡驅動做了什么?
文章出處:【微信號:嵌入式與Linux那些事,微信公眾號:嵌入式與Linux那些事】歡迎添加關注!文章轉載請注明出處。
發布評論請先 登錄
相關推薦
評論