1. Linux kernel network part packet flow
1.1. Data Structure
1.1.1. Ethhdr
Struct Ethhdr
{
Unsigned char h_dest [eth_alent]; / * Destination Eth Addr * /
Unsigned char h_source [eth_alent]; / * Source Ether Addr * /
Unsigned short h_proto; / * packet type id field * /
}
1.1.2. IPhdr
Struct iphdr {
#if Defined (__ little_endian_bitfield)
__U8 IHL: 4,
Version: 4;
#ELIF Defined (__BIG_ENDIAN_BITFIELD)
__U8 VERSION: 4,
IHL: 4;
#ELSE
#Error "please fix
#ENDIF
__U8 TOS;
__U16 TOT_LEN;
__U16 ID;
__U16 FRAG_OFF;
__U8 TTL;
__U8 protocol;
__U16 check;
__U32 SADDR;
__U32 DADDR;
/ * The options start here. * /
}
1.1.3. TCPHDR
struct tcphdr {
__U16 Source;
__U16 DEST;
__U32 SEQ;
__U32 ACK_SEQ;
#if Defined (__ little_endian_bitfield)
__U16 RES1: 4,
Doff: 4,
FIN: 1,
SYN: 1,
RST: 1,
PSH: 1,
ACK: 1,
URG: 1,
Ece: 1,
CWR: 1;
#ELIF Defined (__ big_endian_bitfield)
__U16 DOFF: 4,
RES1: 4,
CWR: 1,
Ece: 1,
URG: 1,
ACK: 1,
PSH: 1,
RST: 1,
SYN: 1,
FIN: 1;
#ELSE
#error "Adjust Your
#ENDIF
__U16 window;
__U16 check;
__U16 URG_PTR;
}
1.1.4. SK_BUFF
Struct SK_BUFF {
/ * THESE TWO MEMBERS MUST BE FIRST. * /
Struct SK_Buff * Next; / * Next Buffer in list * /
Struct Sk_buff * prev; / * previous buffer in list * /
Struct SK_BUFF_HEAD * list; / * list we are on * /
Struct Sock * SK; / * Socket We Are OWNED BY * /
Struct TimeVal Stamp; / * TIME WE ARIVED * / STRUCT NET_DEVICE * DEV; / * Device We Arrived On / Are Leaving By * /
#define has_skbuff_physinoutdev
Struct Net_Device * PhysIndev; / * Physical Device We Arrived on * /
Struct Net_Device * PhysoutDev; / * Physical Device We Well Leave By * /
/ * Transport Layer Header * /
union
{
Struct TCPHDR * TH;
Struct udphdr * uh;
Struct icmphdr * ICMPH;
Struct IGMPHDR * IGMPH;
Struct iphdr * ipiph;
Struct spxhdr * spx;
UNSIGNED Char * RAW;
} h;
/ * NetWork Layer HEADER * /
union
{
Struct iphdr * iph;
Struct IPv6HDR * IPv6h;
Struct arphdr * arph;
Struct ipxhdr * ipxh;
UNSIGNED Char * RAW;
} NH;
/ * Link Layer Header * /
union
{
Struct Ethhdr * ethernet;
UNSIGNED Char * RAW;
} Mac;
Struct DST_ENTRY * DST;
/ *
* This is the control buffer. It is free to use for every
* Layer. please put your private variables there. if you.
* Want to Keep Them Across Layers you have to do a skb_clone ()
* First. this is Owned by Whoever Has The SKB Queued ATM.
* /
CHAR CB [48];
Unsigned int Len; / * Length of actual data * /
Unsigned int Data_len;
Unsigned int CSUM; / * Checksum * /
Unsigned char __unused, / * dead field, may be reuse * /
Clones, / * Head May Be Cloned (Check Refcnt to Be Sure). * /
PKT_TYPE, / * PACKET CLASS * /
IP_SUMMED; / * DRIVER FED US An IP Checksum * /
__U32 priority; / * packet queueing priority * /
Atomic_t users; / * user count - see datagram.c, tcp.c * / unsigned short protocol; / * packet protocol from driver. * /
Unsigned short security; / * security level of packet * /
Unsigned int true * / / * buffer size * /
Unsigned char * head; / * head of buffer * /
Unsigned char * data; / * data head pointer * /
Unsigned char * tail; / * tail pointer * /
Unsigned char * end; / * end pointer * /
Void (* destructor) (STRUCT SK_BUFF *); / * DESTRUCT FUNCTION * /
#ifdef config_netfilter
/ * Can be used for communication beetween hooks. * /
Unsigned long nfmark;
/ * Cache Info * /
__U32 nfcache;
/ * Associated connection, if any * /
Struct NF_CT_INFO * NFCT;
#ifdef config_netfilter_debug
Unsigned int nf_debug;
#ENDIF
#ENDIF / * Config_Netfilter * /
#if Defined (config_hippi)
Union {
__U32 IFIELD;
} privacy;
#ENDIF
#ifdef config_net_sched
__U32 TC_INDEX; / * Traffic Control INDEX * /
#ENDIF
}
1.2. Code pieces related to SK_BUFF
1.2.1. About SK_BUFF-> Data
Creating a SKB structure in the network card driver:
IF (new_skb == null) {
New_skb = (STRUCT SK_BUFF *) DEV_ALLOC_SKB (SKB_SIZE);
}
.........
Rx_struct-> SKB = new_skb;
RX_STRUCT-> DMA_ADDR = PCI_MAP_SINGLE (BDP-> PDEV, New_SKB-> DATA,
SizeOf (RFD_T),
PCI_DMA_FROMDEVICE);
The code segment of the data packet receives in the network card driver:
RX_STRUCT = LIST_ENTRY (BDP-> Active_RX_List.Next,
Struct RX_LIST_ELEM, LIST_ELEM;
SKB = RX_STRUCT-> SKB; RFD = RFD_POINTER (SKB, BDP); / * locate rfd within SKB * /
PCI_DMA_SYNC_SINGLE (BDP-> PDEV, RX_STRUCT-> DMA_ADDR,
BDP-> RFD_SIZE, PCI_DMA_FROMDEVICE);
.........
/ * SET the protocol * /
SKB-> Protocol = Eth_Type_Trans (SKB, DEV);
/ * SET the checksum info * /
IF (BDP-> Flags & DF_CSUM_OFFLOAD) {
IF (BDP-> R_ID> = D102_REV_ID) {
SKB-> ip_summed = E100_D102_Check_Checksum (RFD);
} else {
SKB-> IP_SUMMED = E100_D101M_Checksum (BDP, SKB);
}
} else {
SKB-> IP_SUMMED = Checksum_none;
}
1.2.2. About SK_BUFF-> DEV
In the E100_alloc_skbs function, the E100_alloc_skb is called not only, and the E100_ADD_SKB_TO_END function is also called. There is the following code in the E100_ADD_SKB_TO_END function:
(rx_struct-> skb) -> dev = bdp-> device;
This determines the data that the packet is received from that network card.
1.2.3. About SK_BUFF-> Mac
The above saying that the network card driver receives the packet, one of which is eth_type_trans is the key, the function code is as follows:
Unsigned short eth_type_trans (struct SK_Buff * SKB, STRUCT NET_DEVICE * DEV)
{
Struct Ethhdr * Eth;
UNSIGNED Char * RAWP;
SKB-> Mac.RAW = SKB-> DATA;
SKB_PULL (SKB, DEV-> HARD_HEADER_LEN);
Eth = SKB-> mac.ethernet;
IF (* eth-> h_dest & 1)
{
IF (Memcmp (Eth-> h_dest, dev-> Broadcast, Eth_alent) == 0)
SKB-> PKT_TYPE = Packet_Broadcast;
Else
SKB-> PKT_TYPE = Packet_Multicast;
}
/ *
* This allmulti check sales be redundant by 1.4
* So don't forget to remove it.
*
* Seems, you forgot to remove it. All silly devices
* Seems to set iff_promisc.
* /
Else IF (1 / * dev-> flags & iff_promisc * /)
{
IF (Memcmp (Eth-> H_Dest, dev-> dev_addr, eth_alent))
SKB-> PKT_TYPE = Packet_otherhost;
}
IF (ntoh-> h_proto> = 1536)
Return Eth-> h_proto;
Rawp = SKB-> DATA;
/ *
* This is a magic Hack to Spot IPX Packets. Older Novell Breaks * The Protocol Design and Runs IPX over 802.3 without AN 802.2 LLC
* Layer. WE LOOK for FFFF Which isn't a used 802.2 SSAP / DSAP. THIS
* Won't work for fault tolerant NetWare But Does for the REST.
* /
IF (* (unsigned short *) Rawp == 0xffff)
Return HTONS (Eth_P_802_3);
/ *
* Real 802.2 LLC
* /
Return HTONS (Eth_P_802_2);
}
1.2.4. About SK_BUFF-> PKT_TYPE
The value of SK_BUFF-> PKT_TYPE can be seen above is assigned in the Eth_Type_Trans function, can this be used?
This code is made in the kernel IP_RCV function:
Unsigned char * pOS = (unsigned char *) (SKB-> NH.IPH);
IF (SKB-> PKT_TYPE == Packet_otherhost)
Goto Drop; // Discard
IP_INC_STATS_BH (ipINRECEIVES);
1.2.5. About SK_BUFF-> NH and SK_BUFF-> H
There is the following code in the NET_RX_ACITION function:
.........
SKB-> H.RAW = SKB-> nh.raw = SKB-> DATA;
.........
There is the following code in the IP_LOCAL_DELIVER_FINISH function:
.........
INT IHL = SKB-> NH.IPH-> IHL * 4;
.........
IF (! pskb_may_pull (SKB, IHL))
Goto Out;
__skb_pull (SKB, IHL);
.........
SKB-> H.RAW = SKB-> DATA;
.........
Read the code of the following two functions is helpful.
Static inline char * __ pskb_pull (Struct SK_Buff * SKB, UNSIGNED INT LEN)
{
IF (Len> SKB_HEADLEN (SKB) &&
__pskb_pull_tail (SKB, LEN-SKB_HEADLEN (SKB)) == NULL)
Return NULL;
SKB-> LEN - = LEN;
Return SKB-> DATA = LEN;
}
Static inline unsigned char * pskb_pull (struct SK_Buff * SKB, UNSIGNED INT LEN)
{
IF (Len> SKB-> LEN)
Return NULL;
Return__pskb_pull (SKB, LEN);
}
1.2.6. About SK_BUFF-> NFCT
This member is used to connect tracking.
There is the following code in the resolve_normal_ct function:
.........
SKB-> nfct = & h-> ctrack-> infos [* ctinfo];
.........
There is the following code in the ip_conntrack_attach function:
........
SKB-> nfct = & ct-> infos [ctinfo]; ........
1.3. IP packet process
1.3.1. Network card driver to the kernel process
As shown, when the network card receives an interrupt, the interrupt processing function is called, and the SKB structure is allocated in the interrupt processing function, and the data is obtained, and then the NETIF_RX function is called. In the Netif_RX function, it is mainly to put SKB into SoftNet_Data and the product NET_RX_SOFTIRQ soft interrupt.
1.3.2. Process of IP layers in the kernel
NET_RX_ACITION is the interrupt function of NET_RX_SOFTIRQ.
Forwarding the process of packet
1.3.3. Local packet process