Linux Kernel 2.4.5 i 点 的 一 的 的 i ((http:www.fanqiang.com)

xiaoxiao2021-03-06  41

linux kernel 2.4.5 ipv4 little explanation herein socket layer from: http: //www.xici.net/ Author: scancat (2001-07-30 10:00:00)

Disclaimer: I am limited, so I can't get better, don't yell at me :) 1. New Socket function original: static int inet_create (struct socket * sock, int protocol) In Net / IPv4 / AF_INET.C Detailed explanation static int INET_CREATE (STRUCT SOCKET * SOCK, INT Protocol) {structure {struct Sock * SK; struct proto * prot; sock-> state = ss_unconnected; / * set state is not connected * / SK = SK_alloc (PF_INET, GFP_KERNEL, 1); / * Apply for SOCK * / / * Net / Core / Sock.c * / if (SK == NULL) GOTO DO_OOM; Switch (Sock-> Type) {Case Sock_Stream: / * TCP Protocol * / IF (Protocol && ! protocol = IPPROTO_TCP) goto free_and_noproto; protocol = IPPROTO_TCP; prot = & tcp_prot; / * tcp_prot defined in net / ipv4 / tcp_ipv4.c * / sock-> ops = & inet_stream_ops; / * socket for operating a STREAM * / break; case SOCK_SEQPACKET : / * not supported * / goto free_and_badtype; case SOCK_DGRAM: / * UDP protocol * / if (! protocol && protocol = IPPROTO_UDP) goto free_and_noproto; protocol = IPPROTO_UDP; sk-> no_check = UDP_CSUM_DEFAULT; prot = & udp_prot; / * udp_prot defined In Net / IPv4 / Udp.c * / Sock-> OPS = & INET_DGRAM_OPS; / * For DGRAM Socket Operation * / Break; Case Sock_RAW: / * RA W * / if (! Capable (Capable (Capable (Capable (Capable (Capable) to determine if there is a right to establish SOCK_RAW * / GOTO FREE_AND_BADPERM; if (! Protocol) / * protocol can not be 0 * / goto free_and_noproto; prot = & rAW_PROT; / * RAW_PROT definition in Net /IPv4/Raw.c * / SK-> Reuse = 1; / * Allow addresses to reuse * / SK-> Num = protocol; sock-> ops = & inet_dgram_ops; / * RAW Some features and DGRAM identical * / if (Protocol == IPPROTO_RAW) sk-> protinfo.af_inet.hdrincl = 1; / * ip allow their own custom header * / break; default: goto free_and_badtype;} if (ipv4_config.no_pmtu_disc) sk-> protinfo.af_inet.pmtudisc = IP_PMTUDISC_DONT; else SK-> protinfo.af_Inet.pmtudisc =

IP_PMTUDISC_WANT; SK-> Protinfo.af_Inet.ID = 0; SOCK_INIT_DATA (SOCK, SK); / * Initialization Some Data * / / * Net / Core / Sock.c * / SK-> Destruct = INET_SOCK_DESTRUCT; / * When destroyed Socket Time to call inet_sock_destruct * / SK-> zapped = 0; SK-> Family = PF_INET; SK-> Prot = Prot; SK-> Prot = Prot; SK-> Backlog_rcv = Prot-> Backlog_rcv; / * prot-> backlog_rcv ( See the definition of each type * / SK-> protinfo.af_ININET.TTL = SYSCTL_IP_DEFAULT_TTL; / * Set the default TTL * / / * Modification / Proc / Sys / Net / IPv4 / IP_DEFAULT_TTL * / SK-> Protinfo.af_INET.MC_LOOP = 1; SK-> Protinfo.af_INET.MC_TTL = 1; SK-> Protinfo.af_Inet.mc_index = 0; SK-> Protinfo.af_Inet.mc_list = null; #ifdef = NULL; #IFDEF INET_REFCNT_DEBUG Atomic_inc (& INET_SOCK_NR); #ENDIF IF (SK-> Num) {/ * it assumes That Any Protocol Which Allows * The user to assign a number at solidket * creation time automatically * shares. * / SK-> Sport = HTONS (SK-> Num); / * Setting up local port * / / * Add to protocol hash chains. * / SK-> Prot-> hash (s);} if (SK-> prot-> init) {int err = SK-> prot-> init (sk); / * protocol Initialization to Socket * / if (Err! = 0) { inet_sock_release (sk); return (err);}} return (0); free_and_badtype: sk_free (sk); / * release memory * / return -ESOCKTNOSUPPORT; free_and_badperm: sk_free (sk); return -EPERM; free_and_noproto: sk_free (sk ); return -EPROTONOSUPPORT; do_oom: return -ENOBUFS;} in the net / core / sock.c void sock_init_data (struct socket * sock, struct sock * sk) {skb_queue_head_init (& sk-> receive_queue); / * initialize queue accepts 3 , Send, error * / SKB_QUE_HEAD_INIT (& SK-> Write_Queue); SKB_QUEUE_HEAD_INIT (& SK-> Error_Queue); init_timer (& SK-> Timer; / * Initialization Timer * / SK-> allocation = gfp_kernel; SK->

RCVBUF = SYSCTL_RMEM_DEFAULT; SK-> SNDBUF = SYSCTL_WMEM_DEFAULT; SK-> State = TCP_Close; SK-> Zapped = 1; SK-> Socket = Sock; if (Sock) {SK-> Type = SOCK-> Type; SK-> sleep = & sock-> wait; sock-> sk = sk;} else sk-> sleep = NULL; sk-> dst_lock = RW_LOCK_UNLOCKED; sk-> callback_lock = RW_LOCK_UNLOCKED; / * sock_def_wakeup (), sock_def_readable (), sock_def_write_space () , sock_def_error_report (), sock_def_destruct () in net / core / sock.c * / sk-> state_change = sock_def_wakeup; sk-> data_ready = sock_def_readable; sk-> write_space = sock_def_write_space; sk-> error_report = sock_def_error_report; sk-> destruct = SOCK_DEF_DESTRUCT; SK-> Peercred.PID = 0; SK-> Peercred.uid = -1; SK-> Peercred.gid = -1; SK-> RCVLOWAT = 1; SK-> RCVTIMEO = max_schedule_timeout; / * Settings Accept , transmission timeout * / sk-> sndtimeo = MAX_SCHEDULE_TIMEOUT; atomic_set (& sk-> refcnt, 1);} 1.1 SOCK_STREAM initialized at net / ipv4 / tcp_ipv4.c static int tcp_v4_init_sock (struct sock * sk) {struct tcp_opt * tp = & (SK-> TP_PINFO.AF_TCP); skb_queue_head_init (& tp-> out_of_order_queue); tcp_init_xmit_timers (sk); tcp_prequeue_init (tp); tp-> rto = TCP_TIMEOUT_INIT; tp-> mdev = TCP_TIMEOUT_INIT; / * So many TCP implementations out there (incorrectly) count the * initial SYN frame in their delayed-ACK and congestion control * algorithms that we must have the following bandaid to talk * efficiently to them -DaveM * / tp-> snd_cwnd = 2;. / * See draft-stevens-tcpca-spec-01 for discussion of the * Initialization of these Values. * / TP-> SND_SSTHRESH =

0x7fffffff; / * Infinity * / tp-> snd_cwnd_clamp = ~ 0; tp-> mss_cache = 536; tp-> reordering = sysctl_tcp_reordering; sk-> state = TCP_CLOSE; sk-> write_space = tcp_write_space; / * tcp_write_space () in the net /ipv4/tcp.c * / sk-> use_write_queue = 1; sk-> tp_pinfo.af_tcp.af_specific = & ipv4_specific; / * ipv4_specific in net / ipv4 / tcp_ipv4.c * / sk-> sndbuf = sysctl_tcp_wmem [1]; / * Set the transmission and reception buffer size * / SK-> RCVBUF = SYSCTL_TCP_RMEM [1]; / * sysctl_tcp_ * in Net / IPv4 / TCP.c * / atomic_inc_inc (& TCP_SOCKETS_ALLOCATED); / * TCP_SOCKETS_ALLOCATED is the current TCP Socket * / Return 0;} SOCK_DGRAM No 1.2 Sock_RAW Initialization In Net / IPv4 / Raw.c Static Int Raw_init (Struct Sock * SK) {Struct Raw_opt * TP = & (SK-> TP_PINFO.TP_RAW4); if (SK-> Num = = IPPROTO_ICMP) memset (& tp-> filter, 0, sizeof (tp-> filter)); return 0;} 2.Server 2.1 bind static int inet_bind (struct socket * sock, struct sockaddr * uaddr, int addr_len) {struct sockaddr_in * addr = (struct socmeddr_in *) uaddr; struct soc check * SK = SOCK-> SK; UNSIGNED SHORT SNUM; INT CHK_ADDR_RET; in T Err; / * if The socket Has Its OWN BIND FUNCTION THEN USE. (RAW) * / IF (SK-> Prot-> Bind) Return SK-> Prot-> Bind (SK, Uaddr, Addr_len); / * Only SOCK_RAW defines its own bind function * / if (addr_len sin_addr.s_addr);

/ * Inet_addr_type return type of address * / / * in net / ipv4 / fib_frontend.c * / / * Not specified by any standard per-se, however it breaks too * many applications when removed. It is unfortunate since * allowing applications to make a non-local bind solves * several problems with systems using dynamic addressing. * (ie. your servers still start up even if your ISDN link * is temporarily down) * / if (sysctl_ip_nonlocal_bind == 0 && sk-> protinfo.af_inet !! .freebind == 0 && addr-> sin_addr.s_addr = INADDR_ANY && chk_addr_ret = RTN_LOCAL && chk_addr_ret = RTN_MULTICAST && chk_addr_ret = RTN_BROADCAST) return -EADDRNOTAVAIL;!! snum = ntohs (addr-> sin_port); if (snum && snum State! = TCP_Close ) || (SK-> NUM! = 0)) Goto Out; SK-> RCV_SADDR = SIN_ADDR = addr-> sin_addr.s_addr; if (chK_addr_ret == r__multicast || CHK_ADDR_RET == RTN_Broadcast) SK-> Saddr = 0; / * use device * / / / / * make Sure We are allowed to bind here. * / If (SK-> prot-> get_port (sk, snum)! = 0) {/ * get_port Check if reuse * / SK -> Saddr = SK-> RCV_SADDR = 0; Err = -Eaddrinuse; goto out;

} IF (SK-> RCV_SADDR) SK-> UserLocks | = SOCK_BINDADDR_LOCK; IF (SNUM) SK-> UserLocks | = SOCK_BINDPORT_LOCK; SK-> Sport = Htons (SK-> Num); SK-> DADDR = 0; SK- > DPORT = 0; SK_DST_RESET (SK); Err = 0; OUT: Release_Sock (SK); Return Err;} Sock_Stream and Sock_DGRAM with default bind 2.1.1 sock_raw bind in Net / IPv4 / Raw.c static int Raw_bind struct sock * sk, struct sockaddr * uaddr, int addr_len) {struct sockaddr_in * addr = (struct sockaddr_in *) uaddr; int ret = -EINVAL; int chk_addr_ret;! if (sk-> state = TCP_CLOSE || addr_len sin_addr.s_addr); / * inet_addr_type type return address * / / * the net / ipv4 / fib_frontend.c * / ret = -EADDRNOTAVAIL; if (addr-> sin_addr.s_addr && chk_addr_ret = RTN_LOCAL && chk_addr_ret = RTN_MULTICAST && chk_addr_ret = RTN_BROADCAST) goto out;!!! sk-> rcv_saddr = sk-> saddr = addr-> sin_addr.s_addr; / * sk-> rcv_saddr bundled local address * / / * SK-> SADDR source address * / if (chK_addr_ret == rtn_multicast || CHK_ADDR_RET == RTN_BROADCAST) SK-> Saddr = 0; / * Use device * / / * address type, such as multicast or broadcast source address is 0 * / SK_DST_RESET (SK); RET = 0; OUT: RETURN RET;} 2.2 listen and listen 2.2.1 SOCK_STREAM in net / ipv4 / af_inet.c int inet_listen (struct socket * sock, int backlog) {struct sock * sk = sock-> sk; unsigned char old_state; int err; lock_sock (sk); err = -EINVAL; if (Sock-> State! = SS_UNCONNECTED || SOCK-> TYPE! = SOCK_STREAM) Goto out; Old_State = SK-> State; IF (! ((1

/ * Really, if The Socket IS Already in Listen State

* We can online allow the backlog to be adjusted.

* /

IF (Old_State! = TCP_Listen) {

Err = TCP_Listen_Start (SK); / * Really implemented TCP protocol Listen * /

IF (ERR)

Goto Out;

}

SK-> MAX_ACK_BACKLOG = backlog;

Err = 0;

OUT:

Release_sock (sk);

Return ERR;

}

TCP_LISTEN_START in Net / IPv4 / TCP.H

INT TCP_LISTEN_START (STRUCT SOCK * SK)

{

Struct TCP_OPT * TP = & (SK-> TP_PINFO.AF_TCP);

Struct tcp_listen_opt * lopt;

SK-> MAX_ACK_BACKLOG = 0;

SK-> ACK_BACKLOG = 0;

TP-> accept_queue = tp-> accept_queue_tail = null;

TP-> SYN_WAIT_LOCK = RW_LOCK_UNLOCKED;

TCP_DELACK_INIT (TP); / * TP Qing 0 * /

/ * Incrude / net / tcp.h * /

LOPT = kmalloc (SIZEOF (struct tcp_listen_opt), gfp_kernel);

IF (! LOPT)

Return -ENMEM;

MEMSET (LOPT, 0, SIZEOF (Struct TCP_Listen_opt);

For (LOPT-> max_qlen_log = 6;; lopt-> max_qlen_log )

IF ((1 <

MAX_QLEN_LOG)> = sysctl_max_syn_backlog)

Break;

Write_lock_bh (& TP-> SYN_WAIT_LOCK);

TP-> listen_opt = LOPT;

Write_unlock_bh (& TP-> SYN_WAIT_LOCK);

/ * There is race window here: we announce Ourslves Listening,

* But this transition is still not validated by get_port ().

* IT IS OK, Because this socket Enters to Hash Table ONLY

* After Validation is Complete.

* /

SK-> State = TCP_Listen;

IF (SK-> Prot-> get_port (SK, SK-> NUM) == 0) {/ * confirmation address is not reused * /

SK-> Sport = HTONS (SK-> NUM); / * Set the source port * /

SK_DST_RESET (SK);

SK-> Prot-> Hash (SK); / * Add port to the Hash table * /

Return 0;

}

SK-> State = TCP_Close;

Write_lock_bh (& TP-> SYN_WAIT_LOCK);

TP-> Listen_opt = NULL;

Write_unlock_bh (& TP-> SYN_WAIT_LOCK);

Kfree (LOPT);

Return-eaddrinuse;

}

SOCK_DGRAM and SOCK_RAW do not support Listen

2.3 ACCEPT

2.3.1 SOCK_STREAM Accept

In Net / IPv4 / AF_INET.C

INT INET_ACCEPT (Struct Socket * Sock, Struct Socket * Newsock, Int Flags) {

Struct Sock * SK1 = SOCK-> SK;

Struct Sock * SK2;

Int err = -einval;

IF ((SK2 = SK1-> Prot-> Accept (SK1, FLAGS, & ERR) == NULL)

Goto do_err;

LOCK_SOCK (SK2);

Bug_trap ((1 <

State) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE));

SOCK_GRAFT (SK2, Newsock); / * Connected SK2 to Newsock * /

/ * In include / net / sock.h * /

Newsock-> state = ss_connected;

Release_sock (SK2);

Return 0;

Do_err:

Return ERR;

}

SOCK_DGRAM and SOCK_RAW do not support Accept

2.3.1.1 ACCEPT for TCP Protocol

In Net / IPv4 / TCP.c

Struct Sock * TCP_ACCEPT (Struct Sock * SK, INT FLAGS, INT * ERR)

{

Struct TCP_OPT * TP = & SK-> TP_PINFO.AF_TCP;

Struct Open_Request * Req;

Struct Sock * newsk;

Int error;

LOCK_SOCK (SK);

/ * We need to make sure what this socket is listening,

* AND That it has something pending.

* /

Error = - EINVAL;

IF (SK-> State! = TCP_LISTEN) / * Check if the socket is in the Listen status * /

Goto Out;

/ * Find Already Established Connection * /

if (! tp-> accept_queue) {/ * Judging whether the Accept queue is ready * /

Long Timeo = SOCK_RCVTIMEO (SK, Flags & O_nonblock);

/ * Judgment is a plug mode * /

/ * In include / net / sock.h * /

/ * If this is a non blocking socket don't sleep * /

Error = -eagain;

If (! timeo) / * does not block the mode, return directly * /

Goto Out;

Error = Wait_for_connect (SK, Timeo); / * Enter idle waiting connection * /

IF (Error)

Goto Out;

}

Req = TP-> Accept_queue;

IF ((tp-> accept_queue = req-> dl_next) == NULL)

TP-> accept_queue_tail = null;

Newsk = req-> SK;

TCP_ACCEPTQ_REMOVED (SK); / * SK Current connection 1 * /

/ * In include / net / tcp.h * /

TCP_OPENREQ_FASTFREE (Req); / * Release memory * /

/ * In include / net / tcp.h * /

Bug_trap (newsk-> state! = Tcp_syn_recv);

Release_sock (sk);

Return Newsk; Out:

Release_sock (sk);

* err = error;

Return NULL;

}

/ * Only when socket is a plug mode, the function will be called * /

/ * In Net / IPv4 / TCP.c * /

Static int WAIT_FOR_CONNECT (Struct Sock * SK, Long Timeo)

{

Declare_WaitQueue (Wait, CURRENT);

Int Err;

/ *

* True Wake-One Mechanism for Incoming Connections: ONLY

* ONE Process Gets Woken Up, NOT The 'Whole Herd'.

* Since We do not 'Race & Poll' for Established Sockets

* Anymore, The Common Case Will Execute The loop ONLY ONCE.

*

* SUBTLE ISSUE: "Add_wait_queue_exclusive ()" Will Be Added

*After any current Non-Exclusive Waiters, And We know That

* IT WILL Always _Stay_After Any New Non-Exclusive Waiters

* Because All Non-Exclusive Waiters Areadded At The

* Beginning of the Wait-Queue. As Such, It's Ok to "DROP"

* Our Exclusiveness Temporarily When We get Woken Up With WITHOUT

* Having to remove and re-insert US on The Wait Queue.

* /

Add_wait_queue_exclusive (SK-> Sleep, & Wait);

For (;;) {

Current-> State = Task_Interruptible;

Release_sock (sk);

IF (SK-> TP_PINFO.AF_TCP.ACCEPT_QUEUE == NULL)

Timeo = Schedule_timeout; / * Sleep Timeo Duration * /

LOCK_SOCK (SK);

Err = 0;

IF (SK-> TP_PINFO.AF_TCP.ACCEPT_QUEUE) / * Accept Queue available * /

/ * That is, there is a connection entry * /

Break;

Err =-EinVal;

IF (SK-> State! = TCP_LISTEN)

Break;

Err = SOCK_INTR_ERRNO (TIMEO);

IF (Signal_Pending (CURRENT))

Break;

Err = -eagain;

IF (! Timeo)

Break;

}

Current-> State = Task_Running;

REMOVE_WAIT_QUEUE (SK-> Sleep, & Wait);

Return ERR;

}

3.client

3.1 Connect

3.1.1 Sock_Stream CONNECT

In Net / IPv4 / AF_INET.C

INT INET_STREAM_CONNECT (Struct Socket * Sock, Struct SockAddr * Uaddr,

Int Addr_len, int FLAGS

{

Struct Sock * SK = SOCK-> SK; int ERR;

Long Timeo;

LOCK_SOCK (SK);

IF (uaddr-> sa_family == AF_UNSPEC) {

Err = SK-> Prot-> Disconnect (SK, FLAGS); / * Close connection * /

SOCK-> State = Err? ss_disconnecting: ss_unconnected;

Goto Out;

}

Switch (SOCK-> State) {

DEFAULT:

Err =-EinVal;

Goto Out;

Case SS_CONNECTED:

Err = -eisconn;

Goto Out;

Case SS_CONNECTING:

Err = -EalReady;

/ * Fall Out of Switch with Err, Set for this State * /

Break;

Case SS_UNCONNECTED:

Err = -eisconn;

IF (SK-> State! = TCP_CLOSE)

Goto Out;

Err = -eagain;

IF (SK-> NUM == 0) {

IF (SK-> Prot-> get_port (SK, 0)! = 0) / * Whether to reuse * /

Goto Out;

SK-> Sport = HTONS (SK-> NUM);

}

Err = SK-> Prot-> Connect (SK, UADDR, AddR_len); / * CONNECT * /

IF (Err <0)

Goto Out;

Sock-> state = ss_connecting; / * socket status is set to connect * /

/ * Just Entered SS_CONNECTING STATE; THE ONLY

* Difference is that Return Value in non-blocking

* Case is EinProgress, Rather Than Ealready.

* /

Err = - EinProgress;

Break;

}

Timeo = SOCK_SNDTIMEO (SK, FLAGS & O_NONBLOCK); / * Is it a plug mode * /

/ * In include / net / sock.h * /

IF ((1 <

State) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {/ * connection is completed * /

/ * Error Code is set above * /

IF (! Timeo ||! inet_wait_for_connect (sk, timeo))

/ * Non-blocking mode returns now * /

/ * Clip mode call INET_WAIT_FOR_CONNECT () * /

Goto Out;

Err = SOCK_INTR_ERRNO (TIMEO);

IF (Signal_Pending (CURRENT))

Goto Out;

}

/ * Connection Was Closed By Rst, Timeout, ICMP Error

* OR Another Process Disconnected US.

* /

IF (SK-> State == TCP_CLOSE)

Goto Sock_ERROR;

/ * SK-> Err May Be Not Zero Now, if Recverr Was Ordered by User

* And Error Was Received After Socket Entered Established State. * Hence, IT IS Handled Normal The Connect () Return SuccessFully.

* /

SOCK-> State = SS_CONNECTED; / * Set status is connected * /

Err = 0;

OUT:

Release_sock (sk);

Return ERR;

SOCK_ERROR:

Err = SOCK_ERROR (SK)?: -econnaborted;

SOCK-> State = SS_UNCONNECTED;

IF (SK-> Prot-> Disconnect (SK, FLAGS))

SOCK-> State = SS_DISCONNECTING;

Goto Out;

}

/ * Only when socket is a plug mode, the function will be called * /

/ * In /net/ipv4/af_inet.c * /

Static long inet_wait_for_connect (Struct Sock * SK, Long Timeo)

{

Declare_WaitQueue (Wait, CURRENT);

__set_current_state; Task_Interruptible;

Add_Wait_Queue (SK-> Sleep, & Wait);

/ * Basic Assumption: if someone sets SK-> Err, He _MUST_

* Change State of the socket from tcp_syn_ *.

* Connect () Does Not ALLOW TO GET ERROR NOTIFICATIONS

* WITHOUT Closing The socket.

* /

While ((1 <

State) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {

Release_sock (sk);

Timeo = Schedule_timeout (Timeo); / * Enter Sleep * /

LOCK_SOCK (SK);

IF (Signal_Pending (Current) ||! Timeo)

Break;

set_current_state; Task_INTERRUptible;

}

__set_current_state (task_running);

REMOVE_WAIT_QUEUE (SK-> Sleep, & Wait);

Return Timeo;

}

(

http://www.fanqiang.com)

转载请注明原文地址:https://www.9cbs.com/read-54681.html

New Post(0)