#socket
socket是对TCP/IP协议的封装,Socket本身并不是协议,而是一个调用接口(API)。Socket是应用层与TCP/IP协议族通信的中间软件抽象层,它是一组接口。
socket起源于Unix,而Unix/Linux基本哲学之一就是“一切皆文件”,都可以用“打开open –> 读写write/read –> 关闭close”模式来操作。我的理解就是Socket就是该模式的一个实现,socket即是一种特殊的文件。

socket 数据结构

socket 结构,struct socket是内核中的进程与内核中的网路系统的桥梁

struct socket {
    socket_state        state;   // socket 状态
    short            type;    // socket类型 如 sock_stream
    unsigned long        flags;  // flags
    struct file        *file;   // file回指指针
    struct sock        *sk;   //  socket在网络层的表示
    const struct proto_ops    *ops;  // 套接字操作集
    struct socket_wq    wq;    // 等待队列
};

enum sock_type {
    SOCK_STREAM    = 1,
    SOCK_DGRAM    = 2,
    SOCK_RAW    = 3,
    SOCK_RDM    = 4,
    SOCK_SEQPACKET    = 5,
    SOCK_DCCP    = 6,
    SOCK_PACKET    = 10,
};

typedef enum {
    SS_FREE = 0,            /* 该socket还未分配        */
    SS_UNCONNECTED,            /* 未连向任何socket    */
    SS_CONNECTING,            /* 正在连接过程中    */
    SS_CONNECTED,            /* 已连向一个socket    */
    SS_DISCONNECTING        /* 正在断开连接的过程中    */
} socket_state;

struct socket_wq {
    /* Note: wait MUST be first field of socket_wq */
    wait_queue_head_t    wait;   
    struct fasync_struct    *fasync_list;
    unsigned long        flags; /* %SOCKWQ_ASYNC_NOSPACE, etc */
    struct rcu_head        rcu;
} ____cacheline_aligned_in_smp;

struct wait_queue_head {
    spinlock_t        lock;
    struct list_head    head;
};
typedef struct wait_queue_head wait_queue_head_t;

struct list_head {
    struct list_head *next, *prev;
};

struct sock:

struct sock {
    /*
     * Now struct inet_timewait_sock also uses sock_common, so please just
     * don't add nothing before this first member (__sk_common) --acme
     */
    struct sock_common    __sk_common;
#define sk_node            __sk_common.skc_node
#define sk_nulls_node        __sk_common.skc_nulls_node
#define sk_refcnt        __sk_common.skc_refcnt
#define sk_tx_queue_mapping    __sk_common.skc_tx_queue_mapping
#ifdef CONFIG_XPS
#define sk_rx_queue_mapping    __sk_common.skc_rx_queue_mapping
#endif

#define sk_dontcopy_begin    __sk_common.skc_dontcopy_begin
#define sk_dontcopy_end        __sk_common.skc_dontcopy_end
#define sk_hash            __sk_common.skc_hash
#define sk_portpair        __sk_common.skc_portpair
#define sk_num            __sk_common.skc_num
#define sk_dport        __sk_common.skc_dport
#define sk_addrpair        __sk_common.skc_addrpair
#define sk_daddr        __sk_common.skc_daddr
#define sk_rcv_saddr        __sk_common.skc_rcv_saddr
#define sk_family        __sk_common.skc_family
#define sk_state        __sk_common.skc_state
#define sk_reuse        __sk_common.skc_reuse
#define sk_reuseport        __sk_common.skc_reuseport
#define sk_ipv6only        __sk_common.skc_ipv6only
#define sk_net_refcnt        __sk_common.skc_net_refcnt
#define sk_bound_dev_if        __sk_common.skc_bound_dev_if
#define sk_bind_node        __sk_common.skc_bind_node
#define sk_prot            __sk_common.skc_prot
#define sk_net            __sk_common.skc_net
#define sk_v6_daddr        __sk_common.skc_v6_daddr
#define sk_v6_rcv_saddr    __sk_common.skc_v6_rcv_saddr
#define sk_cookie        __sk_common.skc_cookie
#define sk_incoming_cpu        __sk_common.skc_incoming_cpu
#define sk_flags        __sk_common.skc_flags
#define sk_rxhash        __sk_common.skc_rxhash

    socket_lock_t        sk_lock;    // 同步锁
    atomic_t        sk_drops;     //  
    int            sk_rcvlowat;    // rcvlowat 值
    struct sk_buff_head    sk_error_queue;     
    struct sk_buff        *sk_rx_skb_cache;   // rx skb拷贝
    struct sk_buff_head    sk_receive_queue;  // 接收队列
    /*
     * The backlog queue is special, it is always used with
     * the per-socket spinlock held and requires low latency
     * access. Therefore we special case it's implementation.
     * Note : rmem_alloc is in this structure to fill a hole
     * on 64bit arches, not because its logically part of
     * backlog.
     */
    struct {
        atomic_t    rmem_alloc;    //已接收队列字节数
        int        len;
        struct sk_buff    *head;
        struct sk_buff    *tail;
    } sk_backlog;
#define sk_rmem_alloc sk_backlog.rmem_alloc

    int            sk_forward_alloc;  // 预分配的空间
#ifdef CONFIG_NET_RX_BUSY_POLL
    unsigned int        sk_ll_usec;
    /* ===== mostly read cache line ===== */
    unsigned int        sk_napi_id;
#endif
    int            sk_rcvbuf;    // 接收缓冲区大小

    struct sk_filter __rcu    *sk_filter;  
    union {
        struct socket_wq __rcu    *sk_wq;
        /* private: */
        struct socket_wq    *sk_wq_raw;
        /* public: */
    };
#ifdef CONFIG_XFRM
    struct xfrm_policy __rcu *sk_policy[2];
#endif
    struct dst_entry    *sk_rx_dst;
    struct dst_entry __rcu    *sk_dst_cache;
    atomic_t        sk_omem_alloc;
    int            sk_sndbuf;    // 发送缓冲区大小

    /* ===== cache line for TX ===== */
    int            sk_wmem_queued;      // 等待发送的数据数量
    refcount_t        sk_wmem_alloc;  // 
    unsigned long        sk_tsq_flags;  
    union {
        struct sk_buff    *sk_send_head;         
        struct rb_root    tcp_rtx_queue;
    };
    struct sk_buff        *sk_tx_skb_cache;
    struct sk_buff_head    sk_write_queue; // // 发送包队列
    __s32            sk_peek_off;
    int            sk_write_pending;
    __u32            sk_dst_pending_confirm;
    u32            sk_pacing_status; /* see enum sk_pacing */
    long            sk_sndtimeo;
    struct timer_list    sk_timer;
    __u32            sk_priority;
    __u32            sk_mark;
    unsigned long        sk_pacing_rate; /* bytes per second */
    unsigned long        sk_max_pacing_rate;
    struct page_frag    sk_frag;
    netdev_features_t    sk_route_caps;
    netdev_features_t    sk_route_nocaps;
    netdev_features_t    sk_route_forced_caps;
    int            sk_gso_type;
    unsigned int        sk_gso_max_size;
    gfp_t            sk_allocation;
    __u32            sk_txhash;

    /*
     * Because of non atomicity rules, all
     * changes are protected by socket lock.
     */
    u8            sk_padding : 1,
                sk_kern_sock : 1,
                sk_no_check_tx : 1,
                sk_no_check_rx : 1,
                sk_userlocks : 4;
    u8            sk_pacing_shift;
    u16            sk_type;
    u16            sk_protocol;
    u16            sk_gso_max_segs;
    unsigned long            sk_lingertime;
    struct proto        *sk_prot_creator;
    rwlock_t        sk_callback_lock;
    int            sk_err,
                sk_err_soft;
    u32            sk_ack_backlog;
    u32            sk_max_ack_backlog;
    kuid_t            sk_uid;
    struct pid        *sk_peer_pid;
    const struct cred    *sk_peer_cred;
    long            sk_rcvtimeo;
    ktime_t            sk_stamp;
#if BITS_PER_LONG==32
    seqlock_t        sk_stamp_seq;
#endif
    u16            sk_tsflags;
    u8            sk_shutdown;
    u32            sk_tskey;
    atomic_t        sk_zckey;

    u8            sk_clockid;
    u8            sk_txtime_deadline_mode : 1,
                sk_txtime_report_errors : 1,
                sk_txtime_unused : 6;

    struct socket        *sk_socket;   // 回指指针
    void            *sk_user_data;
#ifdef CONFIG_SECURITY
    void            *sk_security;
#endif
    struct sock_cgroup_data    sk_cgrp_data;
    struct mem_cgroup    *sk_memcg;
    void            (*sk_state_change)(struct sock *sk);
    void            (*sk_data_ready)(struct sock *sk);
    void            (*sk_write_space)(struct sock *sk);
    void            (*sk_error_report)(struct sock *sk);
    int            (*sk_backlog_rcv)(struct sock *sk,
                          struct sk_buff *skb);
#ifdef CONFIG_SOCK_VALIDATE_XMIT
    struct sk_buff*        (*sk_validate_xmit_skb)(struct sock *sk,
                            struct net_device *dev,
                            struct sk_buff *skb);
#endif
    void                    (*sk_destruct)(struct sock *sk);
    struct sock_reuseport __rcu    *sk_reuseport_cb;
#ifdef CONFIG_BPF_SYSCALL
    struct bpf_local_storage __rcu    *sk_bpf_storage;
#endif
    struct rcu_head        sk_rcu;
};

struct sock_common: 套接口在网络层的最小表示

struct sock_common {
    /* skc_daddr and skc_rcv_saddr must be grouped on a 8 bytes aligned
     * address on 64bit arches : cf INET_MATCH()
     */
    union {
        __addrpair    skc_addrpair;  // 8byte 对齐地址对
        struct {
            __be32    skc_daddr;  // 外部地址
            __be32    skc_rcv_saddr;  // 本地地址
        };
    };
    union  {
        unsigned int    skc_hash;     
        __u16        skc_u16hashes[2];
    };
    /* skc_dport && skc_num must be grouped as well */
    union {
        __portpair    skc_portpair;
        struct {
            __be16    skc_dport;
            __u16    skc_num;
        };
    };

    unsigned short        skc_family;  // 地址协议族
    volatile unsigned char    skc_state;  // 连接状态
    unsigned char        skc_reuse:4;   // SO_REUSEADDR 设置
    unsigned char        skc_reuseport:1;   // SO_REUSEPORT 设置
    unsigned char        skc_ipv6only:1;   //  设置只支持ipv6
    unsigned char        skc_net_refcnt:1;  // 使用网络引用计数
    int            skc_bound_dev_if;   // 接口号
    union {
        struct hlist_node    skc_bind_node;
        struct hlist_node    skc_portaddr_node;
    };
    struct proto        *skc_prot;
    possible_net_t        skc_net;

#if IS_ENABLED(CONFIG_IPV6)
    struct in6_addr        skc_v6_daddr;
    struct in6_addr        skc_v6_rcv_saddr;
#endif

    atomic64_t        skc_cookie;   // socket cookie

    /* following fields are padding to force
     * offset(struct sock, sk_refcnt) == 128 on 64bit arches
     * assuming IPV6 is enabled. We use this padding differently
     * for different kind of 'sockets'
     */
    union {
        unsigned long    skc_flags;   //socket flags设置
        struct sock    *skc_listener; /* request_sock */  // 连接请求监听socket
        struct inet_timewait_death_row *skc_tw_dr; /* inet_timewait_sock */
    };
    /*
     * fields between dontcopy_begin/dontcopy_end
     * are not copied in sock_copy()
     */
    /* private: */
    int            skc_dontcopy_begin[0];
    /* public: */
    union {
        struct hlist_node    skc_node;     
        struct hlist_nulls_node skc_nulls_node;
    };
    unsigned short        skc_tx_queue_mapping;  // tx queue number 
#ifdef CONFIG_XPS
    unsigned short        skc_rx_queue_mapping;  // rx queue number
#endif
    union {
        int        skc_incoming_cpu;   // 记录或匹配处理接收包的cpu
        u32        skc_rcv_wnd;     //  TCP 接收窗口大小
        u32        skc_tw_rcv_nxt; /* struct tcp_timewait_sock  */  
    };

    refcount_t        skc_refcnt;    // 引用计数
    /* private: */
    int                     skc_dontcopy_end[0];
    union {
        u32        skc_rxhash;
        u32        skc_window_clamp;
        u32        skc_tw_snd_nxt; /* struct tcp_timewait_sock */
    };
    /* public: */
};

sk_state:

enum {
        TCP_ESTABLISHED = 1,
        TCP_SYN_SENT,
        TCP_SYN_RECV,
        TCP_FIN_WAIT1,
        TCP_FIN_WAIT2,
        TCP_TIME_WAIT,
        TCP_CLOSE,
        TCP_CLOSE_WAIT,
        TCP_LAST_ACK,
        TCP_LISTEN,
        TCP_CLOSING,
        TCP_MAX_STATES
    };
文档更新时间: 2021-02-27 20:15   作者:周国强