5. VPP源码分析(graph node(3))

2.3. vlib_frame_t

从arguments地址开始的内存空间是vector空间,作为结点接收数据包索引的缓冲区。
5. VPP源码分析(graph node(3))

2.3.1. vlib_frame_t内存空间

5. VPP源码分析(graph node(3))
每个线程都会从堆中分配(mmap)一块内存clib_per_cpu_mheaps
vm->heap_base = clib_mem_get_heap(); 默认大小为64MB
每个vlib_frame_t都将在此mheap上申请,并对应一个elt
函数vlib_frame_alloc_to_node用于申请vlib_frame_t和并将其与某个特定node关联起来
5. VPP源码分析(graph node(3))

2.3.2. frame_index的获得

下图所示为从vlib_frame_t地址算成index的过程,本质上是地址算术算法,做了一次归一化处理。
5. VPP源码分析(graph node(3))
当系统为32位系统时,按32Bytes对齐,所以直接通过frame_index * VLIB_FRAME_ALIGN的方式得到frame指针
5. VPP源码分析(graph node(3))

2.4. vlib_next_frame_t

typedef struct {
  /* Frame index. */
  u32 frame_index;

  /* Node runtime for this next. */
  u32 node_runtime_index;

  /* Next frame flags. */
  u32 flags;

  /* Reflects node frame-used flag for this next. */
#define VLIB_FRAME_NO_FREE_AFTER_DISPATCH VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH

  /* This next frame owns enqueue to node corresponding to node_runtime_index. */
#define VLIB_FRAME_OWNER (1 << 15)

  /* Set when frame has been allocated for this next. */
#define VLIB_FRAME_IS_ALLOCATED    VLIB_NODE_FLAG_IS_OUTPUT

  /* Set when frame has been added to pending vector. */
#define VLIB_FRAME_PENDING VLIB_NODE_FLAG_IS_DROP

  /* Set when frame is to be freed after dispatch. */
#define VLIB_FRAME_FREE_AFTER_DISPATCH VLIB_NODE_FLAG_IS_PUNT

  /* Set when frame has traced packets. */
#define VLIB_FRAME_TRACE VLIB_NODE_FLAG_TRACE

  /* Number of vectors enqueue to this next since last overflow. */
  u32 vectors_since_last_overflow;
} vlib_next_frame_t;

2.5. vlib_pending_frame_t

/* A frame pending dispatch by main loop. */
typedef struct {
  /* Node and runtime for this frame. */
  u32 node_runtime_index;

  /* Frame index (in the heap). */
  u32 frame_index;

  /* Start of next frames for this node. */
  u32 next_frame_index;

  /* Special value for next_frame_index when there is no next frame. */
#define VLIB_PENDING_FRAME_NO_NEXT_FRAME ((u32) ~0)
} vlib_pending_frame_t;

2.6. vlib_node_t

typedef struct vlib_node_t {
  /* Vector processing function for this node. */
  vlib_node_function_t *function;

  /* Node name. */
  u8 *name;

  /* Node name index in elog string table. */
  u32 name_elog_string;

  /* Total statistics for this node. */
  vlib_node_stats_t stats_total;

  /* Saved values as of last clear (or zero if never cleared).
     Current values are always stats_total - stats_last_clear. */
  vlib_node_stats_t stats_last_clear;

  /* Type of this node. */
  vlib_node_type_t type;

  /* Node index. */
  u32 index;

  /* Index of corresponding node runtime. */
  u32 runtime_index;

  /* Runtime data for this node. */
  void *runtime_data;

  /* Node flags. */
  u16 flags;

/* Processing function keeps frame.
Tells node dispatching code not to free frame after dispatch is done.  */
#define VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH (1 << 0)

  /* Node counts as output/drop/punt node for stats purposes. */
#define VLIB_NODE_FLAG_IS_OUTPUT (1 << 1)
#define VLIB_NODE_FLAG_IS_DROP (1 << 2)
#define VLIB_NODE_FLAG_IS_PUNT (1 << 3)
#define VLIB_NODE_FLAG_IS_HANDOFF (1 << 4)

  /* Set if current node runtime has traced vectors. */
#define VLIB_NODE_FLAG_TRACE (1 << 5)

#define VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE (1 << 6)
#define VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE (1 << 7)

  /* State for input nodes. */
  u8 state;

  /* Number of bytes of run time data. */
  u8 runtime_data_bytes;

  /* Number of error codes used by this node. */
  u16 n_errors;

  /* Size of scalar and vector arguments in bytes. */
  u16 scalar_size, vector_size;

  /* Handle/index in error heap for this node. */
  u32 error_heap_handle;
  u32 error_heap_index;

  /* Error strings indexed by error code for this node. */
  char **error_strings;

  /* Vector of next node names.
     Only used before next_nodes array is initialized. */
  char **next_node_names;

  /* Next node indices for this node. */
  u32 *next_nodes;

  /* Name of node that we are sibling of. */
  char *sibling_of;

  /* Bitmap of all of this node's siblings. */
  uword *sibling_bitmap;

  /* Total number of vectors sent to each next node. */
  u64 *n_vectors_by_next_node;

  /* Hash table mapping next node index into slot in
     next_nodes vector.  Quickly determines whether this node
     is connected to given next node and, if so, with which slot. */
  uword *next_slot_by_node;

  /* Bitmap of node indices which feed this node. */
  uword *prev_node_bitmap;

  /* Node/next-index which own enqueue rights with to this node. */
  u32 owner_node_index, owner_next_index;

  /* Buffer format/unformat for this node. */
  format_function_t *format_buffer;
  unformat_function_t *unformat_buffer;

  /* Trace buffer format/unformat for this node. */
  format_function_t *format_trace;

  /* Function to validate incoming frames. */
u8 *(*validate_frame) (struct vlib_main_t * vm, struct vlib_node_runtime_t *, struct vlib_frame_t * f);

  /* for pretty-printing, not typically valid */
  u8 *state_string;
} vlib_node_t;

2.7. vlib_node_main_t

typedef struct {
  /* Public nodes. */
  vlib_node_t **nodes;

  /* Node index hashed by node name. */
  uword *node_by_name;

  u32 flags;
#define VLIB_NODE_MAIN_RUNTIME_STARTED (1 << 0)

  /* Nodes segregated by type for cache locality.
     Does not apply to nodes of type VLIB_NODE_TYPE_INTERNAL. */
  vlib_node_runtime_t *nodes_by_type[VLIB_N_NODE_TYPE];

  /* Node runtime indices for input nodes with pending interrupts. */
  u32 *pending_interrupt_node_runtime_indices;
  clib_spinlock_t pending_interrupt_lock;

  /* Input nodes are switched from/to interrupt to/from polling mode
     when average vector length goes above/below polling/interrupt
     thresholds. */
  u32 polling_threshold_vector_length;
  u32 interrupt_threshold_vector_length;

  /* Vector of next frames. */
  vlib_next_frame_t *next_frames;

  /* Vector of internal node's frames waiting to be called. */
  vlib_pending_frame_t *pending_frames;

  /* Timing wheel for scheduling time-based node dispatch. */
  void *timing_wheel;

  vlib_signal_timed_event_data_t *signal_timed_event_data_pool;

  /* Opaque data vector added via timing_wheel_advance. */
  u32 *data_from_advancing_timing_wheel;

  /* CPU time of next process to be ready on timing wheel. */
  f64 time_next_process_ready;

  /* Vector of process nodes.
     One for each node of type VLIB_NODE_TYPE_PROCESS. */
  vlib_process_t **processes;

  /* Current running process or ~0 if no process running. */
  u32 current_process_index;

  /* Pool of pending process frames. */
  vlib_pending_frame_t *suspended_process_frames;

  /* Vector of event data vectors pending recycle. */
  void **recycled_event_data_vectors;

  /* Current counts of nodes in each state. */
  u32 input_node_counts_by_state[VLIB_N_NODE_STATE];

  /* Hash of (scalar_size,vector_size) to frame_sizes index. */
  uword *frame_size_hash;

  /* Per-size frame allocation information. */
  vlib_frame_size_t *frame_sizes;

  /* Time of last node runtime stats clear. */
  f64 time_last_runtime_stats_clear;

  /* Node registrations added by constructors */
  vlib_node_registration_t *node_registrations;
} vlib_node_main_t;

2.8. node调度流程

vlib_main_or_worker_loop的主要流程如下:

  1. 处理pre-input结点——polling形式
    5. VPP源码分析(graph node(3))
  2. 处理input结点——polling形式
    5. VPP源码分析(graph node(3))
  3. 处理来自control-plane API queue的信号事件
    5. VPP源码分析(graph node(3))
  4. 处理input结点——interrupt形式
    5. VPP源码分析(graph node(3))
  5. timing wheel处理
    5. VPP源码分析(graph node(3))
  6. 执行pending_frames中记录的信息
    5. VPP源码分析(graph node(3))

根据pending_frames里面记载的信息进行nodes调度,这里nodes调度类似于流水线处理方式,一直执行直到pending_frames中无任何数据为止。

  1. pending_frames中的internal结点也许会resume process node所以需要在跳回timing wheel做一次调度
    5. VPP源码分析(graph node(3))
上一篇:2. VPP源码分析(内存管理之抽象数据结构)


下一篇:mPaas客户端上线巡检