PostgreSQL数据库查询——parse_analyze对不同语句进行transform

分析原始语法树并将其转变为Query形式。parse_analyze函数还包含parse_analyze勾子的点。

Query *parse_analyze(RawStmt *parseTree, const char *sourceText, Oid *paramTypes, int numParams, QueryEnvironment *queryEnv) {
	ParseState *pstate = make_parsestate(NULL);
	Query	   *query;
	Assert(sourceText != NULL); /* required as of 8.4 */
	
	pstate->p_sourcetext = sourceText;
	if (numParams > 0) parse_fixed_parameters(pstate, paramTypes, numParams);
	pstate->p_queryEnv = queryEnv;
	query = transformTopLevelStmt(pstate, parseTree);

	if (post_parse_analyze_hook) (*post_parse_analyze_hook) (pstate, query);
	free_parsestate(pstate);
	return query;
}

/* Set up to process a query containing references to fixed parameters. */
void parse_fixed_parameters(ParseState *pstate, Oid *paramTypes, int numParams) {
	FixedParamState *parstate = palloc(sizeof(FixedParamState));
	parstate->paramTypes = paramTypes;
	parstate->numParams = numParams;
	pstate->p_ref_hook_state = (void *) parstate;
	pstate->p_paramref_hook = fixed_paramref_hook;
	/* no need to use p_coerce_param_hook */
}

数据结构

/*
 * State information used during parse analysis
 *
 * parentParseState: NULL in a top-level ParseState.  When parsing a subquery,
 * links to current parse state of outer query.
 *
 * p_sourcetext: source string that generated the raw parsetree being
 * analyzed, or NULL if not available.  (The string is used only to
 * generate cursor positions in error messages: we need it to convert
 * byte-wise locations in parse structures to character-wise cursor
 * positions.)
 *
 * p_rtable: list of RTEs that will become the rangetable of the query.
 * Note that neither relname nor refname of these entries are necessarily
 * unique; searching the rtable by name is a bad idea.
 *
 * p_joinexprs: list of JoinExpr nodes associated with p_rtable entries.
 * This is one-for-one with p_rtable, but contains NULLs for non-join
 * RTEs, and may be shorter than p_rtable if the last RTE(s) aren't joins.
 *
 * p_joinlist: list of join items (RangeTblRef and JoinExpr nodes) that
 * will become the fromlist of the query's top-level FromExpr node.
 *
 * p_namespace: list of ParseNamespaceItems that represents the current
 * namespace for table and column lookup.  (The RTEs listed here may be just
 * a subset of the whole rtable.  See ParseNamespaceItem comments below.)
 *
 * p_lateral_active: true if we are currently parsing a LATERAL subexpression
 * of this parse level.  This makes p_lateral_only namespace items visible,
 * whereas they are not visible when p_lateral_active is FALSE.
 *
 * p_ctenamespace: list of CommonTableExprs (WITH items) that are visible
 * at the moment.  This is entirely different from p_namespace because a CTE
 * is not an RTE, rather "visibility" means you could make an RTE from it.
 *
 * p_future_ctes: list of CommonTableExprs (WITH items) that are not yet
 * visible due to scope rules.  This is used to help improve error messages.
 *
 * p_parent_cte: CommonTableExpr that immediately contains the current query,
 * if any.
 *
 * p_target_relation: target relation, if query is INSERT, UPDATE, or DELETE.
 *
 * p_target_rangetblentry: target relation's entry in the rtable list.
 *
 * p_is_insert: true to process assignment expressions like INSERT, false
 * to process them like UPDATE.  (Note this can change intra-statement, for
 * cases like INSERT ON CONFLICT UPDATE.)
 *
 * p_windowdefs: list of WindowDefs representing WINDOW and OVER clauses.
 * We collect these while transforming expressions and then transform them
 * afterwards (so that any resjunk tlist items needed for the sort/group
 * clauses end up at the end of the query tlist).  A WindowDef's location in
 * this list, counting from 1, is the winref number to use to reference it.
 *
 * p_expr_kind: kind of expression we're currently parsing, as per enum above;
 * EXPR_KIND_NONE when not in an expression.
 *
 * p_next_resno: next TargetEntry.resno to assign, starting from 1.
 *
 * p_multiassign_exprs: partially-processed MultiAssignRef source expressions.
 *
 * p_locking_clause: query's FOR UPDATE/FOR SHARE clause, if any.
 *
 * p_locked_from_parent: true if parent query level applies FOR UPDATE/SHARE
 * to this subquery as a whole.
 *
 * p_resolve_unknowns: resolve unknown-type SELECT output columns as type TEXT
 * (this is true by default).
 *
 * p_hasAggs, p_hasWindowFuncs, etc: true if we've found any of the indicated
 * constructs in the query.
 *
 * p_last_srf: the set-returning FuncExpr or OpExpr most recently found in
 * the query, or NULL if none.
 *
 * p_pre_columnref_hook, etc: optional parser hook functions for modifying the
 * interpretation of ColumnRefs and ParamRefs.
 *
 * p_ref_hook_state: passthrough state for the parser hook functions.
 */
struct ParseState
{
	struct ParseState *parentParseState;	/* stack link */
	const char *p_sourcetext;	/* source text, or NULL if not available */
	List	   *p_rtable;		/* range table so far */
	List	   *p_joinexprs;	/* JoinExprs for RTE_JOIN p_rtable entries */
	List	   *p_joinlist;		/* join items so far (will become FromExpr
								 * node's fromlist) */
	List	   *p_namespace;	/* currently-referenceable RTEs (List of
								 * ParseNamespaceItem) */
	bool		p_lateral_active;	/* p_lateral_only items visible? */
	List	   *p_ctenamespace; /* current namespace for common table exprs */
	List	   *p_future_ctes;	/* common table exprs not yet in namespace */
	CommonTableExpr *p_parent_cte;	/* this query's containing CTE */
	Relation	p_target_relation;	/* INSERT/UPDATE/DELETE target rel */
	RangeTblEntry *p_target_rangetblentry;	/* target rel's RTE */
	bool		p_is_insert;	/* process assignment like INSERT not UPDATE */
	List	   *p_windowdefs;	/* raw representations of window clauses */
	ParseExprKind p_expr_kind;	/* what kind of expression we're parsing */
	int			p_next_resno;	/* next targetlist resno to assign */
	List	   *p_multiassign_exprs;	/* junk tlist entries for multiassign */
	List	   *p_locking_clause;	/* raw FOR UPDATE/FOR SHARE info */
	bool		p_locked_from_parent;	/* parent has marked this subquery
										 * with FOR UPDATE/FOR SHARE */
	bool		p_resolve_unknowns; /* resolve unknown-type SELECT outputs as
									 * type text */

	QueryEnvironment *p_queryEnv;	/* curr env, incl refs to enclosing env */

	/* Flags telling about things found in the query: */
	bool		p_hasAggs;
	bool		p_hasWindowFuncs;
	bool		p_hasTargetSRFs;
	bool		p_hasSubLinks;
	bool		p_hasModifyingCTE;

	Node	   *p_last_srf;		/* most recent set-returning func/op found */

	/*
	 * Optional hook functions for parser callbacks.  These are null unless
	 * set up by the caller of make_parsestate.
	 */
	PreParseColumnRefHook p_pre_columnref_hook;
	PostParseColumnRefHook p_post_columnref_hook;
	ParseParamRefHook p_paramref_hook;
	CoerceParamHook p_coerce_param_hook;
	void	   *p_ref_hook_state;	/* common passthrough link for above */
};

transformTopLevelStmt

transformTopLevelStmt函数会调用transformOptionalSelectInto函数将原始语法树转换成查询树。

Query *transformTopLevelStmt(ParseState *pstate, RawStmt *parseTree) {
	Query	   *result;
	/* We're at top level, so allow SELECT INTO */
	result = transformOptionalSelectInto(pstate, parseTree->stmt);
	result->stmt_location = parseTree->stmt_location;
	result->stmt_len = parseTree->stmt_len;
	return result;
}

transformOptionalSelectInto先特殊处理一下含有into子句的select语句,将其转换为CREATE TABLE AS

/* transformOptionalSelectInto -
 *	  If SELECT has INTO, convert it to CREATE TABLE AS.
 *
 * The only thing we do here that we don't do in transformStmt() is to
 * convert SELECT ... INTO into CREATE TABLE AS.  Since utility statements
 * aren't allowed within larger statements, this is only allowed at the top
 * of the parse tree, and so we only try it before entering the recursive
 * transformStmt() processing.
 */
static Query *transformOptionalSelectInto(ParseState *pstate, Node *parseTree) {
	if (IsA(parseTree, SelectStmt)) {
		SelectStmt *stmt = (SelectStmt *) parseTree;
		/* If it's a set-operation tree, drill down to leftmost SelectStmt */
		while (stmt && stmt->op != SETOP_NONE) stmt = stmt->larg;
		Assert(stmt && IsA(stmt, SelectStmt) &&stmt->larg == NULL);
		if (stmt->intoClause) {
			CreateTableAsStmt *ctas = makeNode(CreateTableAsStmt);
			ctas->query = parseTree;
			ctas->into = stmt->intoClause;
			ctas->relkind = OBJECT_TABLE;
			ctas->is_select_into = true;
			/* Remove the intoClause from the SelectStmt.  This makes it safe for transformSelectStmt to complain if it finds intoClause set (implying that the INTO appeared in a disallowed place). */
			stmt->intoClause = NULL;
			parseTree = (Node *) ctas;
		}
	}

	return transformStmt(pstate, parseTree);
}

transformStmt

处理Insert语句(T_InsertStmt):transformInsertStmt(pstate, (InsertStmt *) parseTree)
处理Delete语句(T_DeleteStmt):transformDeleteStmt(pstate, (DeleteStmt *) parseTree)
处理Update语句(T_UpdateStmt):transformUpdateStmt(pstate, (UpdateStmt *) parseTree)
处理Select语句(T_SelectStmt):
处理DeclareCursor语句(T_DeclareCursorStmt):transformDeclareCursorStmt(pstate, (DeclareCursorStmt *) parseTree)
处理Explain语句(T_ExplainStmt):transformExplainStmt(pstate, (ExplainStmt *) parseTree)
等等

/* transformStmt -	  recursively transform a Parse tree into a Query tree. */
Query *transformStmt(ParseState *pstate, Node *parseTree) {
	Query	   *result;
	switch (nodeTag(parseTree)) {
			/* Optimizable statements */
		case T_InsertStmt:
			result = transformInsertStmt(pstate, (InsertStmt *) parseTree); break;
		case T_DeleteStmt:
			result = transformDeleteStmt(pstate, (DeleteStmt *) parseTree); break;
		case T_UpdateStmt:
			result = transformUpdateStmt(pstate, (UpdateStmt *) parseTree); break;
		case T_SelectStmt:
			{
				SelectStmt *n = (SelectStmt *) parseTree;
				if (n->valuesLists) result = transformValuesClause(pstate, n);
				else if (n->op == SETOP_NONE) result = transformSelectStmt(pstate, n);
				else result = transformSetOperationStmt(pstate, n);
			}
			break;
			/* Special cases */
		case T_DeclareCursorStmt:
			result = transformDeclareCursorStmt(pstate, (DeclareCursorStmt *) parseTree);
			break;
		case T_ExplainStmt:
			result = transformExplainStmt(pstate, (ExplainStmt *) parseTree);
			break;
		case T_CreateTableAsStmt:
			result = transformCreateTableAsStmt(pstate, (CreateTableAsStmt *) parseTree);
			break;
		case T_CallStmt:
			result = transformCallStmt(pstate, (CallStmt *) parseTree);
			break;
		default:
			/* other statements don't require any transformation; just return the original parsetree with a Query node plastered on top. */
			result = makeNode(Query);
			result->commandType = CMD_UTILITY;
			result->utilityStmt = (Node *) parseTree;
			break;
	}
	/* Mark as original query until we learn differently */
	result->querySource = QSRC_ORIGINAL;
	result->canSetTag = true;
	return result;
}

上一篇:PostgreSQL常用命令(持续更新)


下一篇:PostgreSQL的时间函数使用整理