IP function analysis _apue is also @ 2005-03-10 18:29 $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ $$$$$$ VOID __INIT IP_INIT (VOI) When the system is started, INET_INIT (VOID) is called to initialize the network subsystem. This function calls ip_init to initialize the IP protocol processing module (registration processing function, allocate routing processing) Table, etc.). Realization process: * Register the IP protocol handler. Place IP_PACKET_TYPE in PTYPE_BASE (Linux network protocol processing chain), * where the reception function of the IP layer is IP_RCV. * Initialization Routing, Linux routing is more complicated, and the analysis is required. * Initializing the IP (Peer IP) information table, you need another text to analyze. / * * IP protocol layer initialiser * / static struct packet_type ip_packet_type = {__constant_htons (ETH_P_IP), NULL, / * All devices * / ip_rcv, (void *) 1, NULL,}; / * * IP registers the packet type and then calls the subprotocol initialisers * / void __init ip_init (void) {dev_add_pack (& ip_packet_type); ip_rt_init (); inet_initpeers (); #ifdef CONFIG_IP_MULTICAST proc_net_create ( "igmp", 0, ip_mc_procinfo); #endif} $$$$$$$ $$$$$$$$$$$ IP send function analysis of $$$$$$$$$$$$$$$$$$$$$$$: Silicon Valley farmers
int ip_output (struct sk_buff * skb) {#ifdef CONFIG_IP_ROUTE_NAT struct rtable * rt = (struct rtable *) skb-> dst; if (rt-> rt_flags & RTCF_NAT) ip_do_nat (skb); #endif return ip_finish_output (skb);} - -------------------------------------------------- ------------------------------- int ip_finish_output (struct SK_Buff * SKB) implementation process: * Call Netfilter's hook function Then call ip_finish_output2 to further process. __inline__ int ip_finish_output (struct sk_buff * skb) {struct net_device * dev = skb-> dst-> dev; skb-> dev = dev; skb-> protocol = __constant_htons (ETH_P_IP); return NF_HOOK (PF_INET, NF_IP_POST_ROUTING, skb, NULL , DEV, IP_FINISH_OUTPUT2);} ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ----------------------------------------- static inline int ip_finish_output2 (struct SK_BUFF * SKB) implementation process: * Before calling ip_output, the system has been placed in SKB-> DST, if the DST_ENTRY has been sent to the network package, the hardware address headers of the two sides have put into the cache hh_cache, will Ethernet Head (16 bytes) Copy from hH-> hh_data into SKB, then call HH-> HH_Output (in most cases dev_queue_xmit) put this SKB in the send read queue. (HH_Cache is initialized in the ARP module, requiring another article) * If there is no hh_cache, then directly call the NEIGHBOUR's OUTPUT function (NEigh_Resolve_output) processing, this function calls neigh_event_send to get the MAC address of the other party when necessary, and finally Call neigh-> ops -> Queue_Xmit (in most cases dev_queue_xmit) put this SKB in the send read queue.
static inline int ip_finish_output2 (struct sk_buff * skb) {struct dst_entry * dst = skb-> dst; struct hh_cache * hh = dst-> hh; if (hh) {read_lock_bh (& hh-> hh_lock); memcpy (skb-> data - 16, hh-> hh_data, 16); read_unlock_bh (& hh-> hh_lock); SKB_PUSH (SKB, HH-> hh_len); return hh-> hh_output (SKB); Else IF (DST-> neighbour) Return DST- > neighbour-> output (skb); if (net_ratelimit ()) printk (KERN_DEBUG "ip_finish_output2: No header cache and no neighbour n!"); kfree_skb (skb); return -EINVAL;} Note: UDP is send_msg calls ip_build_xmit Processing Fragmentation, which eventually calls IP_output. TCP will call IP_QUEUE_XMIT to process Fragmentation, which will eventually call ip_output. These functions are in ip_output.c, and there must be an article interpretation. $$$$$$$$$$$$$$ IP Receive Function Analysis $$$$$$$$$$$$$$$ INT IP_RCV (Struct Sk_buff * SKB, Struct Net_Device * dev Packet_type * pt) When the system receives the network package, if it is an IP packet, IP_RCV is called. The primary function of IP_RCV is fragment reorganization, depending on the route will give them to the previous layer protocol stack (eg, TCP, UDP), or call ip_forward to transfer IP packets to another network card device. Implementation process: * Check that SKB is not mobilized by another process, if any, then clone (clone) one of the same SKB. * Check the basic properties of the IP package: 1. The length of the IP header is calculated in 4 bytes, and the standard IP header is 20 bytes, so IPH-> IHL must be greater than or equal to 5. 2. The version of the IP package is IPv4. 3. Call IP_FAST_CSUM to check the check.
4. Check the length of the total length NTOHS (iPh-> TOT_LEN) and SKB buffer in the IP package; the length of the IP package is at least greater than the length of the IP header (IPH-> IHL << 2). * If the length of the SKB is greater than the length of the IP package, some empty characters that may be added automatically on the network card are removed behind the buffer, then call __pskb_trim (SKB, LEN) to remove excess characters. * When the above steps are completed, the check is required to call the NetFilter's hook function (Hook function), and after completion, call IP_RCV_FINISH to continue the processing of IP protocols.
INT IP_RCV (STRUCT SK_BUFF * SKB, STRUCT NET_DEVICE * DEV, STRUCT PACKE T_TYPE * PT) {... ... IF ((SKB = SKB_SHARE_CHECK (SKB, GFP_ATOMIC)) == NULL) Goto Out; if (! pskb_may_pull SKB, SIZEOF (STRUCT IPHDR))) Goto Inhdr_ERROR; IPH = SKB-> NH.IPH; IF (iPH-> IHL <5 || iPh-> Version! = 4) goto inhdr_error; if (! pskb_may_pull (SKB, IPH -> ihl * 4)) goto inhdr_error; if (ip_fast_csum ((u8 *) iPh, iPh-> ihl)! = 0) goto inhdr_error; {__U32 len = ntoHS (iph-> tot_len); if (SKB-> LEN)
Static Inline INT IP_RCV_FINISH (STRUCT SK_BUFF * SKB) {struct net_device * dev = SKB-> dev; struct iPhdr * iPh = SKB-> nh.iph; if (SKB-> DST == NULL) {if (IP_ROUTE_INPUT (SKB, IPH-> DADDR, IPH-> SADDR, IPH-> TOS, DEV)) goto drop;} * If IPH-> IHL> 5 (IP header length is greater than 20), indicating that this IP package has other options (option), Then call ip_options_compile to fill in each option IPCB (SKB) -> OPT, hereby the "Source Route), and the specific details are skullped here.
if (iph-> ihl> 5) {struct ip_options * opt;. / * It looks as overkill, because not all IP options require packet mangling But it is the easiest for now, especially taking into account that combination of IP options and running Sniffer Is Extremely Rare Condition. --ank (980813) * / if (SKB_COW (SKB)) Goto Drop; IPH = SKB-> NH.IPH; SKB-> IP_SUMMED = 0; IF (ip_options_compile , SKB)) Goto Inhdr_Error; Opt = & (IPCB (SKB) -> OPT); if (OPT-> SRR) {structure in_Device * in_dev = in_dev_get (dev); IF (in_dev_source_route (in_dev_source_route) {if (in_DEV_LOG_MARTIANS (In_DEV) & ET_RATELIMIT ()) Printk (kern_info "Source Route Option% U.% u.% u.% u ->% u.% u.% u.% un ", nipquad (iph-> saddr), n ipquad (iPh-> daddr)); in_dev_put (in_dev); goto Drop;
} IN_DEV_PUT (IN_DEV);} IF (ip_options_rcv_sr (SKB)) goto drop;}} * Finally, the destination route is called, if the destination address of this IP package is not the address of this machine, call ip_forward processing, otherwise call ip_local_delieveer Continue processing, preparing for the IP package to the previous layer protocol. Return SKB-> DST-> INPUT (SKB); Inhdr_ERROR: IP_INC_STATS_BH (ipinhdrerror); Drop: Kfree_skb (SKB); Return Net_Rx_Drop;} ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -------------------------------------------------- ------------- int ip_local_deliver (STRUCT SK_BUFF * SKB) implementation process: * If the network package is IP fragment, call ip_defrag to perform fragmentation. * Check the hook function of calling the Netfilter, complete the processing of the last IP layer protocol after calling ip_local_deliver_finish. INT IP_LOCAL_DELIVER (STRUCT SK_BUFF * SKB) {/ * * Reassemble IP Fragments. * / if (SKB-> NH.IPH-> FRAG_OFF & HTONS (IP_MF | IP_OFFSET) {SKB = IP_DEFRAG (SKB); if (! SKB) Return 0;} return nf_hook (PF_INET, NF_IP_LOCAL_IN, SKB, SKB-> dev, null, ip_local_deliver_finish);} ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -------------------------------------------------- ------- Static Inline INT IP_LOCAL_DELIVER_FINISH (STRUCT SK_BUFF * SKB) implementation process: * Find the corresponding protocol definition PPROT according to the protocol of the IP header (SKB-> NH.IPH-> Protocol). * If there is RAW socket, call Raw_v4_input to process it.
* In normal case, a hash value has only one protocol (INET_PROTOS [Hash]), call ipprot-> handler (SKB) to send this network package to the previous layer protocol (such as ICMP, UDP, TCP, etc.). * Otherwise, IP_RUN_IPPROT, find the corresponding protocol, and then call the ipprot-> handler (SKB) processing.
static inline int ip_local_deliver_finish (struct sk_buff * skb) {int ihl = skb-> nh.iph-> ihl * 4; / * Pull out additionl 8 bytes to save some space in protocols * / if (pskb_may_pull (skb, ihl.! 8)) Goto Out; __skb_pull (SKB, IHL); / * Point INTO The IP DataGram, Just Past the header. * / SKB-> H.RAW = SKB-> DATA; {/ * Note: See Raw.c and net / raw.h, RAWV4_HTABLE_SIZE == MAX_INET_PROTOS * / int protocol = skb-> nh.iph-> protocol; int hash = protocol & (MAX_INET_PROTOS - 1); struct sock * raw_sk = raw_v4_htable [hash]; struct inet_protocol * Ipprot; Int flag; / * if there mustage a raw socket we must check - if we * don't chess * / if (Raw_sk ! = Null) Raw_sk = Raw_v4_input (SKB, SKB-> NH.IPH, HASH); ipprot = (Struct INET_PROTOCOL *) inet_protos [hash]; flag = 0; if (ipprot! = Null) {if (Raw_sk == Null && == null && odprot-> protocol == protocol) {int R; / * FAST PATH ... * / RET = ipprot-> handler (SKB);
Return Ret;} else {flag = ip_run_ipprot (SKB, SKB-> NH.IPH, IPPROT, (Raw_SK! = NULL);}} / * all protocols checked. * if this packet WAS A Broadcast, we may * not * Reply to it, * Since That Causes (Proven, Grin) ARP Storms and a Leakage * of Memory (IE All ICMP Reply Messages Get Queued Up for * Transmission ...) * / if (Raw_SK! = NULL) {/ * shift to last raw user * / raw_rcv (raw_sk, skb); sock_put (raw_sk);} else if (! flag) {/ * Free and report errors * / icmp_send (skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); out: kfree_skb (skb ); }}} Return 0;} $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ $$$$ IP ip_forward (STRUCT SK_BUFF * SKB) When the system's routing is completed, if you decide to forward this package to another subnet, call the ip_forward. Realization process: * Before calling ip_forward, the system has called IP_ROUTE_INPUT_SLOW to find the route, put it in SKB-> DST, here take it out. * Check the TTL (Time to Live) value of the IP header if less than 1, throw this package away, and send back the ICMP error message, indicating that this IP package has passed too much node when transmitting on the network. * If the routing limit option is set, it is necessary to judge whether the two routes match.
IPH = SKB-> NH.IPH; RT = (Struct RTable *) SKB-> DST; IF (iPh-> TTL <= 1) GOTO TOO_MANY_HOPS; if (OPT-> IS_STRICTROUTE && RT-> RT_DST! = RT-> Rt_gateway) goto sr_failed; * Find out the network card device and MTU to be forwarded. * If you find a better route, send back an ICMP redirected network package to tell the sender's better route. DEV2 = RT-> u.dst.dev; mtu = rt-> u.dst.pmtu; / * We now generate an icmp host redirect giving the route we calculated. * / if (RT-> RT_FLAGS & RTCF_DOREDIRECT &&! OPT-> SRR) IP_RT_SEND_REDIRECT (SKB); * If SKB's headroom is insufficient (generally have 16 bytes to store Ethernet headers), or there is another module to process this * SKB, call SKB_COW to create a new enough long SKB, and copy all the data. * Minus TTL 1. * If the length of data in SKB is greater than the MTU, and the IP package is labeled as a router "not fragment", send back a * ICMP packet, tell the sender to divide the IP package to smaller Block is sent again. / * WE area it! * / If ((SKB = SKB_COW (SKB, DEV2-> HARD_HEADER_LEN)) == NULL) RETURN NET_RX_DROP; IPH = SKB-> NH.IPH; OPT = & (IPCB) (SKB) -> OPT); / * Decrease TTL AFTER SKB COW DONE * / IP_DECREASE_TTL (IPH); / * We Now May Allocate A New Buffer, And Copy The DataGram Into It. * if The indeicated interface is Up and Running, Kick it. * / if (SKB-> LEN> MTU & IP_DF)) GOTO FRAG_NEEDED; * Call the hook function (hook) in the NetFilter frame, generally the process of NAT and firewall module registration Process. After processing, if this network package needs to continue processing, call ip_forward_finish.
Return nf_hook (PF_INET, NF_IP_FORWARD, SKB, SKB-> DEV, DEV2, IP_FORWARD_FINISH); ------------------------------- --------------------------------------------- - static inline INT ip_forward_finish (STRUCT SK_BUFF * SKB) When the system is transferred from ip_forward to the NetFilter's registration function, then turn back to call ip_forward_finish to complete the forwarding function. Implementation process: * If there is no option (Option) of the IP package, call ip_forward_options. * Otherwise, call ip_send directly to turn this package. IF (OPT-> Optlen == 0) {RETURN (ip_send (SKB));} ip_forward_options (SKB); Return (IP_SEND (SKB)); $$$$$$$$$$$$$$ $$$ IP Debris Restructuring Function Analysis $$$$$$$$$$$$$$$$$$$$ Struct SK_Buff * IP_DEFRAG (Struct Sk_buff * SKB) Linux IP_FRAGMENT.C contains a series of available The IP package is subjected to a function of recombination. When the system receives IP debris, save them to the queue. Always wait until all debriss are received, then they reallite, and then give it to the previous layer protocol stack (such as TCP, UDP) processing. Implementation process: * If DEFRAGMENT uses too much memory, call ip_evictor to release those Old Fragment queues. * Find or create an IP Fragment queue (QP) based on the IP Header call ip_find. * Call IP_FRAG_QUEUE to insert this network package SKB into the queue QP. * If the first and last Fragment have arrived, and the queue Fragment length is equal to the length of the IP package, the ip_frag_reasm calls the IP package to re-assemble this IP package. Note When the Fragment queue is operated, add Spin Lock to prevent multiple CPUs from inserting or deleting Fragment.
struct sk_buff * ip_defrag (struct sk_buff * skb) {struct iphdr * iph = skb-> nh.iph; struct ipq * qp; struct net_device * dev; / * Start by cleaning up the memory * / if (atomic_read (& ip_frag_mem). > sysctl_ipfrag_high_thresh) ip_evictor (); dev = SKB-> dev; / * lookup (or create) Queue header * / if ((qp = ip_find (iPh))! = null) {struct SK_Buff * Ret = null; spin_lock (& QP) -> LOCK); IP_FRAG_QUEUE (QP, SKB); if (QP-> Last_in == (first_in | last_in) && qp-> meat == qp-> len) RET = IP_FRAG_REASM (QP, DEV); Spin_Unlock (& QP- > lock); IPQ_PUT (QP); Return Ret;} kfree_skb (SKB); return null;} ---------------------------------------------------------------------------------------------------------------------------------------- ------------------------------------------------ ------ Static Inline struct ipq * ip_find (struct iPhdr * iPh) implementation process: * All Fragment queues are actually placed in a hash table (ipq_hash), so the first step is based on IP packets first ID, source IP address, destination IP address and protocol, call ipqhashfn to calculate the hash value h ASH. * Traverse the same HASH value, find the Reference Counter after adding it, returns this queue. * If you can't find it, call ip_frag_create to create a Fragment queue and return.
static inline struct ipq * ip_find (struct iphdr * iph) {__u16 id = iph-> id; __u32 saddr = iph-> saddr; __u32 daddr = iph-> daddr; __u8 protocol = iph-> protocol; unsigned int hash = ipqhashfn (ID, Saddr, Daddr, Protocol); Struct IPQ * QP; Read_Lock (& IPFrag_lock); for (qp = ipq_hash [hash]; QP; QP = qp-> next) {if (qp-> id == id && qp -> saddr == saddr && qp-> daddr == daddr && qp-> protocol == protocol) {atomic_inc (& qp-> refcnt); read_unlock (& ipfrag_lock); return qp;}} read_unlock (& ipfrag_lock); return ip_frag_create ( Hash, iPh);} -------------------------------------------- --------------------------------------- static void ip_frag_queue (struct ipq * qp, Struct SK_BUFF * SKB) Implementation process: * Calculate the start and end of this Fragment, and end, if it is the last fragment (zero), set the Last_in flag. * If it is not the last Fragment, the length qp-> len = end of the queue (QP).
static void ip_frag_queue (struct ipq * qp, struct sk_buff * skb) {struct sk_buff * prev, * next; int flags, offset; int ihl, end; if (qp-> last_in & COMPLETE) goto err; offset = ntohs (skb -> nh.iph-> frag_off; flags = offset & ~ ip_offset; offset & = ip_offset; offset << = 3; / * Offset is in 8-byte chunks * / hl = SKB-> nh.iph-> ihl * 4; / * determine the position of this fragment. * / END = Offset SKB-> LEN - IHL; / * IS this the final fragment? * / If ((Flags & IP_MF) == 0) {/ * if We Already Have Some Bits Beyond End * or Have Different End, The Segment Is Corrrupted. * / if (End
* If the prev and the fragment to be inserted have overlap, PSKB_PULL (SKB, I) is called SKB, aligned offset. Prev = null; for (Next = qp-> fragments; next! = null; next = next-> next) {if (FRAG_CB (Next) -> Offset> = offset) Break; / * bingo! * / prev = next } If (prev) {INT i = (FRAG_CB (Prev) -> Offset Prev-> LEN) - Offset; if (i> 0) {OFFSET = i; if (end <= offset) goto err; if (! PSKB_PULL (SKB, I)) Goto Err; if (SKB-> IP_SUMMED! = Checksum_unnecessary) SKB-> ip_summed = checksum_none;}} * Starting from Next, check each fragment, if some overlap (I bytes) If you call PSKB_PULL (NEXT, I) Correct the current fragment, if it is overlapping, call FRAG_KFREE_SKB to release the current Fragment. * If it is not the last Fragment, the length qp-> len = end of the queue (QP).