查询处理
- 熟悉查询处理流程
查询处理流程
查询流程由下面5个后端进程处理。
- Parser
从客户端接收SQL语句,解析成解析树。 - Analyzer/Analyser
对解析树进行语义分析,生成查询树。 - Rewriter
如果存在规则,使用规则重写查询树。 - Planner
从查询树生成最有效率的计划树。 - Executor
根据计划树创建的顺序访问表和索引来执行查询,将执行结果返回到客户端。
- parsetree结构
parsetree的根结点struct SelectStmt,其结构如下:
typedef struct SelectStmt {
NodeTag type;
/*
* These fields are used only in "leaf" SelectStmts.
*/
List *distinctClause; /* NULL, list of DISTINCT ON exprs, or
* lcons(NIL,NIL) for all (SELECT DISTINCT) */
IntoClause *intoClause; /* target for SELECT INTO */
List *targetList; /* the target list (of ResTarget) */
List *fromClause; /* the FROM clause */
Node *whereClause; /* WHERE qualification */
List *groupClause; /* GROUP BY clauses */
Node *havingClause; /* HAVING conditional-expression */
List *windowClause; /* WINDOW window_name AS (...), ... */
WithClause *withClause; /* WITH clause */
/*
* In a "leaf" node representing a VALUES list, the above fields are all
* null, and instead this field is set. Note that the elements of the
* sublists are just expressions, without ResTarget decoration. Also note
* that a list element can be DEFAULT (represented as a SetToDefault
* node), regardless of the context of the VALUES list. It's up to parse
* analysis to reject that where not valid.
*/
List *valuesLists; /* untransformed list of expression lists */
/*
* These fields are used in both "leaf" SelectStmts and upper-level
* SelectStmts.
*/
List *sortClause; /* sort clause (a list of SortBy's) */
Node *limitOffset; /* # of result tuples to skip */
Node *limitCount; /* # of result tuples to return */
List *lockingClause; /* FOR UPDATE (list of LockingClause's) */
HintState *hintState;
/*
* These fields are used only in upper-level SelectStmts.
*/
SetOperation op; /* type of set op */
bool all; /* ALL specified? */
struct SelectStmt *larg; /* left child */
struct SelectStmt *rarg; /* right child */
/*
* These fields are used by operator "(+)"
*/
bool hasPlus;
/* Eventually add fields for CORRESPONDING spec here */
} SelectStmt;
使用gdb跟踪查询语句:select id,col from a where id<100 order by id; 获得parsetree结构
# gdb
...
(gdb) attach 8580
...
(gdb) b exec_simple_query
Breakpoint 1 at 0x721ea0: file postgres.c, line 915.
(gdb) c
Continuing.
Breakpoint 1, exec_simple_query (
query_string=0x2c2c0b0 "select id,col from a where id<100 order by id;") at postgres.c:915
915 {
(gdb) n
930 pgstat_report_activity(STATE_RUNNING, query_string);
...
(gdb)
999 RawStmt *parsetree = lfirst_node(RawStmt, parsetree_item);
(gdb)
1015 commandTag = CreateCommandTag(parsetree->stmt);
(gdb) p *parsetree
$1 = {type = T_RawStmt, stmt = 0x2c2cf78, stmt_location = 0, stmt_len = 45}
(gdb) p *(parsetree->stmt)
$2 = {type = T_SelectStmt}
(gdb) p *((SelectStmt*)parsetree->stmt)
$3 = {type = T_SelectStmt, distinctClause = 0x0, intoClause = 0x0, targetList = 0x2c2cbc0,
fromClause = 0x2c2cda0, whereClause = 0x2c2ceb8, groupClause = 0x0, havingClause = 0x0,
windowClause = 0x0, valuesLists = 0x0, sortClause = 0x2c2d1b0, limitOffset = 0x0,
limitCount = 0x0, lockingClause = 0x0, withClause = 0x0, op = SETOP_NONE, all = false,
larg = 0x0, rarg = 0x0}
// 分析targetList
(gdb) p *((SelectStmt*)parsetree->stmt)->targetList
$20 = {type = T_List, length = 2, head = 0x2c2cba0, tail = 0x2c2ccf8}
(gdb) p *((Node*)((SelectStmt*)parsetree->stmt)->targetList->head->data->ptr_value)
$22 = {type = T_ResTarget}
(gdb) p *((ResTarget*)((SelectStmt*)parsetree->stmt)->targetList->head->data->ptr_value)
$23 = {type = T_ResTarget, name = 0x0, indirection = 0x0, val = 0x2c2cab0, location = 7}
(gdb) p *((ResTarget*)((SelectStmt*)parsetree->stmt)->targetList->head->data->ptr_value)->val
$24 = {type = T_ColumnRef}
(gdb) p *((ColumnRef*)((ResTarget*)((SelectStmt*)parsetree->stmt)->targetList->head->data->ptr_value)->val)
$25 = {type = T_ColumnRef, fields = 0x2c2cb20, location = 7}
(gdb) p *((Node*)((ColumnRef*)((ResTarget*)((SelectStmt*)parsetree->stmt)->targetList->head->data->ptr_value)->val)->fields->head->data->ptr_value)
$26 = {type = T_String}
(gdb) p *((Value*)((ColumnRef*)((ResTarget*)((SelectStmt*)parsetree->stmt)->targetList->head->data->ptr_value)->val)->fields->head->data->ptr_value)
$27 = {type = T_String, val = {ival = 46320280, str = 0x2c2ca98 "id"}}
(gdb) p *((Value*)((ColumnRef*)((ResTarget*)((SelectStmt*)parsetree->stmt)->targetList->head->next->data->ptr_value)->val)->fields->head->data->ptr_value)
$28 = {type = T_String, val = {ival = 46320624, str = 0x2c2cbf0 "col"}}
(gdb)
// 分析fromClause
(gdb) p *((SelectStmt*)parsetree->stmt)->fromClause
$31 = {type = T_List, length = 1, head = 0x2c2cd80, tail = 0x2c2cd80}
(gdb) p *((Node*)((SelectStmt*)parsetree->stmt)->fromClause->head->data->ptr_value)
$32 = {type = T_RangeVar}
(gdb) p *((RangeVar*)((SelectStmt*)parsetree->stmt)->fromClause->head->data->ptr_value)
$33 = {type = T_RangeVar, catalogname = 0x0, schemaname = 0x0, relname = 0x2c2cd18 "a",
inh = true, relpersistence = 112 'p', alias = 0x0, location = 19}
(gdb)
// 分析whereClause
(gdb) p *((SelectStmt*)parsetree->stmt)->whereClause
$34 = {type = T_A_Expr}
(gdb) p *((A_Expr*)((SelectStmt*)parsetree->stmt)->whereClause)
$35 = {type = T_A_Expr, kind = AEXPR_OP, name = 0x2c2cf48, lexpr = 0x2c2cde8, rexpr = 0x2c2ce88,
location = 29}
(gdb) p *((A_Expr*)((SelectStmt*)parsetree->stmt)->whereClause)->name
$36 = {type = T_List, length = 1, head = 0x2c2cf28, tail = 0x2c2cf28}
(gdb) p *((Node*)((A_Expr*)((SelectStmt*)parsetree->stmt)->whereClause)->name->head->data->ptr_value)
$37 = {type = T_String}
(gdb) p *((Value*)((A_Expr*)((SelectStmt*)parsetree->stmt)->whereClause)->name->head->data->ptr_value)
$38 = {type = T_String, val = {ival = 8936067, str = 0x885a83 "<"}}
(gdb) p *((A_Expr*)((SelectStmt*)parsetree->stmt)->whereClause)->lexpr
$39 = {type = T_ColumnRef}
(gdb) p *((ColumnRef*)((A_Expr*)((SelectStmt*)parsetree->stmt)->whereClause)->lexpr)
$40 = {type = T_ColumnRef, fields = 0x2c2ce58, location = 27}
(gdb) p *((ColumnRef*)((A_Expr*)((SelectStmt*)parsetree->stmt)->whereClause)->lexpr)->fields
$41 = {type = T_List, length = 1, head = 0x2c2ce38, tail = 0x2c2ce38}
(gdb) p *((Node*)((ColumnRef*)((A_Expr*)((SelectStmt*)parsetree->stmt)->whereClause)->lexpr)->fields->head->data->ptr_value)
$42 = {type = T_String}
(gdb) p *((Value*)((ColumnRef*)((A_Expr*)((SelectStmt*)parsetree->stmt)->whereClause)->lexpr)->fields->head->data->ptr_value)
$43 = {type = T_String, val = {ival = 46321104, str = 0x2c2cdd0 "id"}}
(gdb) p *((A_Expr*)((SelectStmt*)parsetree->stmt)->whereClause)->rexpr
$44 = {type = T_A_Const}
(gdb) p *((A_Const*)((A_Expr*)((SelectStmt*)parsetree->stmt)->whereClause)->rexpr)
$45 = {type = T_A_Const, val = {type = T_Integer, val = {ival = 100,
str = 0x64 <Address 0x64 out of bounds>}}, location = 30}
(gdb)
// 分析sortClause
(gdb) p *((SelectStmt*)parsetree->stmt)->sortClause
$4 = {type = T_List, length = 1, head = 0x2c2d190, tail = 0x2c2d190}
(gdb) p *((Node*)((SelectStmt*)parsetree->stmt)->sortClause->head->data->ptr_value)
$5 = {type = T_SortBy}
(gdb) p *((SortBy*)((SelectStmt*)parsetree->stmt)->sortClause->head->data->ptr_value)
$6 = {type = T_SortBy, node = 0x2c2d0a0, sortby_dir = SORTBY_DEFAULT,
sortby_nulls = SORTBY_NULLS_DEFAULT, useOp = 0x0, location = -1}
(gdb) p *((SortBy*)((SelectStmt*)parsetree->stmt)->sortClause->head->data->ptr_value)->node
$10 = {type = T_ColumnRef}
(gdb) p *((ColumnRef*)((SortBy*)((SelectStmt*)parsetree->stmt)->sortClause->head->data->ptr_value)->node)
$11 = {type = T_ColumnRef, fields = 0x2c2d110, location = 43}
(gdb) p *((ColumnRef*)((SortBy*)((SelectStmt*)parsetree->stmt)->sortClause->head->data->ptr_value)->node)->fields
$12 = {type = T_List, length = 1, head = 0x2c2d0f0, tail = 0x2c2d0f0}
(gdb) p *((Node*)((ColumnRef*)((SortBy*)((SelectStmt*)parsetree->stmt)->sortClause->head->data->ptr_value)->node)->fields->head->data->ptr_value)
$14 = {type = T_String}
(gdb) p *((Value*)((ColumnRef*)((SortBy*)((SelectStmt*)parsetree->stmt)->sortClause->head->data->ptr_value)->node)->fields->head->data->ptr_value)
$18 = {type = T_String, val = {ival = 46321800, str = 0x2c2d088 "id"}}
(gdb)
可得parsetree结构为:
- querytree结构
querytree的根结点struct Query,其结构如下:
typedef struct Query {
NodeTag type;
CmdType commandType; /* select|insert|update|delete|merge|utility */
QuerySource querySource; /* where did I come from? */
uint64 queryId; /* query identifier (can be set by plugins) */
bool canSetTag; /* do I set the command result tag? */
Node* utilityStmt; /* non-null if this is DECLARE CURSOR or a
* non-optimizable statement */
int resultRelation; /* rtable index of target relation for
* INSERT/UPDATE/DELETE/MERGE; 0 for SELECT */
bool hasAggs; /* has aggregates in tlist or havingQual */
bool hasWindowFuncs; /* has window functions in tlist */
bool hasSubLinks; /* has subquery SubLink */
bool hasDistinctOn; /* distinctClause is from DISTINCT ON */
bool hasRecursive; /* WITH RECURSIVE was specified */
bool hasModifyingCTE; /* has INSERT/UPDATE/DELETE in WITH */
bool hasForUpdate; /* FOR UPDATE or FOR SHARE was specified */
bool hasRowSecurity; /* rewriter has applied some RLS policy */
bool hasSynonyms; /* has synonym mapping in rtable */
List* cteList; /* WITH list (of CommonTableExpr's) */
List* rtable; /* list of range table entries */
FromExpr* jointree; /* table join tree (FROM and WHERE clauses) */
List* targetList; /* target list (of TargetEntry) */
List* starStart; /* Corresponding p_star_start in ParseState */
List* starEnd; /* Corresponding p_star_end in ParseState */
List* starOnly; /* Corresponding p_star_only in ParseState */
List* returningList; /* return-values list (of TargetEntry) */
List* groupClause; /* a list of SortGroupClause's */
List* groupingSets; /* a list of GroupingSet's if present */
Node* havingQual; /* qualifications applied to groups */
List* windowClause; /* a list of WindowClause's */
List* distinctClause; /* a list of SortGroupClause's */
List* sortClause; /* a list of SortGroupClause's */
Node* limitOffset; /* # of result tuples to skip (int8 expr) */
Node* limitCount; /* # of result tuples to return (int8 expr) */
List* rowMarks; /* a list of RowMarkClause's */
Node* setOperations; /* set-operation tree if this is top level of
* a UNION/INTERSECT/EXCEPT query */
List *constraintDeps; /* a list of pg_constraint OIDs that the query
* depends on to be semantically valid */
HintState* hintState;
#ifdef PGXC
/* need this info for PGXC Planner, may be temporary */
char* sql_statement; /* original query */
bool is_local; /* enforce query execution on local node
* this is used by EXECUTE DIRECT especially. */
bool has_to_save_cmd_id; /* true if the query is such an INSERT SELECT
* that inserts into a child by selecting
* from its parent OR a WITH query that
* updates a table in main query and inserts
* a row to the same table in WITH query */
bool vec_output; /* true if it's vec output. this flag is used in FQS planning */
TdTruncCastStatus tdTruncCastStatus; /* Auto truncation Cast added, only used for stmt in stored procedure or
prepare stmt. */
List* equalVars; /* vars appears in UPDATE/DELETE clause */
#endif
ParamListInfo boundParamsQ;
int mergeTarget_relation;
List* mergeSourceTargetList;
List* mergeActionList; /* list of actions for MERGE (only) */
Query* upsertQuery; /* insert query for INSERT ON DUPLICATE KEY UPDATE (only) */
UpsertExpr* upsertClause; /* DUPLICATE KEY UPDATE [NOTHING | ...] */
bool isRowTriggerShippable; /* true if all row triggers are shippable. */
bool use_star_targets; /* true if use * for targetlist. */
bool is_from_full_join_rewrite; /* true if the query is created when doing
* full join rewrite. If true, we should not
* do some expression processing.
* Please refer to subquery_planner.
*/
uint64 uniqueSQLId; /* used by unique sql id */
bool can_push;
bool unique_check; /* true if the subquery is generated by general
* sublink pullup, and scalar output is needed */
Oid* fixed_paramTypes; /* For plpy CTAS query. CTAS is a recursive call.CREATE query is the first rewrited.
* thd 2nd rewrited query is INSERT SELECT.whithout this attribute, DB will have
* an error that has no idea about $x when INSERT SELECT query is analyzed. */
int fixed_numParams;
} Query;
使用gdb跟踪查询语句:select id,col from a where id<100 order by id; 获得querytree结构
(gdb) p *query
$9 = {type = T_Query, commandType = CMD_SELECT, querySource = QSRC_ORIGINAL, queryId = 0,
canSetTag = true, utilityStmt = 0x0, resultRelation = 0, hasAggs = false,
hasWindowFuncs = false, hasTargetSRFs = false, hasSubLinks = false, hasDistinctOn = false,
hasRecursive = false, hasModifyingCTE = false, hasForUpdate = false, hasRowSecurity = false,
cteList = 0x0, rtable = 0x2329a68, jointree = 0x23e3028, targetList = 0x2329b70,
override = OVERRIDING_NOT_SET, onConflict = 0x0, returningList = 0x0, groupClause = 0x0,
groupingSets = 0x0, havingQual = 0x0, windowClause = 0x0, distinctClause = 0x0,
sortClause = 0x23e2ff8, limitOffset = 0x0, limitCount = 0x0, rowMarks = 0x0,
setOperations = 0x0, constraintDeps = 0x0, withCheckOptions = 0x0, stmt_location = 0,
stmt_len = 45}
(gdb)
可得querytree结构为:
- rewriter,重写查询树
以视图来举例:create view a_view as select id,col from a where id<100 order by id;
// 重写前rtable
p *((RangeTblEntry*)(((Query*)query)->rtable)->head->data->ptr_value)
$89 = {type = T_RangeTblEntry, rtekind = RTE_RELATION, relid = 16587, relkind = 118 'v',
tablesample = 0x0, subquery = 0x0, security_barrier = false, jointype = JOIN_INNER,
joinaliasvars = 0x0, functions = 0x0, funcordinality = false, tablefunc = 0x0,
values_lists = 0x0, ctename = 0x0, ctelevelsup = 0, self_reference = false, coltypes = 0x0,
coltypmods = 0x0, colcollations = 0x0, enrname = 0x0, enrtuples = 0, alias = 0x0,
eref = 0x2328f08, lateral = false, inh = true, inFromCl = true, requiredPerms = 2,
checkAsUser = 0, selectedCols = 0x23293e8, insertedCols = 0x0, updatedCols = 0x0,
securityQuals = 0x0}
// 重写后rtable,增加了subquery,select语句的查询树
(gdb) p *((RangeTblEntry*)((List*)((Query*)((List*)querytrees)->head->data->ptr_value)->rtable)->head->data->ptr_value)
$83 = {type = T_RangeTblEntry, rtekind = RTE_SUBQUERY, relid = 0, relkind = 118 'v',
tablesample = 0x0, subquery = 0x2328bd8, security_barrier = false, jointype = JOIN_INNER,
joinaliasvars = 0x0, functions = 0x0, funcordinality = false, tablefunc = 0x0,
values_lists = 0x0, ctename = 0x0, ctelevelsup = 0, self_reference = false, coltypes = 0x0,
coltypmods = 0x0, colcollations = 0x0, enrname = 0x0, enrtuples = 0, alias = 0x0,
eref = 0x2328f08, lateral = false, inh = false, inFromCl = true, requiredPerms = 0,
checkAsUser = 0, selectedCols = 0x0, insertedCols = 0x0, updatedCols = 0x0, securityQuals = 0x0}
(gdb) p *((RangeTblEntry*)((List*)((Query*)((List*)querytrees)->head->data->ptr_value)->rtable)->head->data->ptr_value)->subquery
$85 = {type = T_Query, commandType = CMD_SELECT, querySource = QSRC_ORIGINAL, queryId = 0,
canSetTag = true, utilityStmt = 0x0, resultRelation = 0, hasAggs = false,
hasWindowFuncs = false, hasTargetSRFs = false, hasSubLinks = false, hasDistinctOn = false,
hasRecursive = false, hasModifyingCTE = false, hasForUpdate = false, hasRowSecurity = false,
cteList = 0x0, rtable = 0x2329590, jointree = 0x245e508, targetList = 0x245e6e0,
override = OVERRIDING_NOT_SET, onConflict = 0x0, returningList = 0x0, groupClause = 0x0,
groupingSets = 0x0, havingQual = 0x0, windowClause = 0x0, distinctClause = 0x0,
sortClause = 0x245e8c0, limitOffset = 0x0, limitCount = 0x0, rowMarks = 0x0,
setOperations = 0x0, constraintDeps = 0x0, withCheckOptions = 0x0, stmt_location = -1,
stmt_len = -1}
(gdb)
- plantree结构和执行器
plantree是一系列plan nodes的集合,它连接到struct PlannedStmt的planTree成员。
使用gdb跟踪查询语句:select id,col from a where id<100 order by id; 获得plantree结构
(gdb) p *((PlannedStmt*)((List*)plantree_list)->head->data->ptr_value)
$93 = {type = T_PlannedStmt, commandType = CMD_SELECT, queryId = 0, hasReturning = false,
hasModifyingCTE = false, canSetTag = true, transientPlan = false, dependsOnRole = false,
parallelModeNeeded = false, jitFlags = 0, planTree = 0x2460080, rtable = 0x24603d0,
resultRelations = 0x0, nonleafResultRelations = 0x0, rootResultRelations = 0x0, subplans = 0x0,
rewindPlanIDs = 0x0, rowMarks = 0x0, relationOids = 0x2460420, invalItems = 0x0,
paramExecTypes = 0x0, utilityStmt = 0x0, stmt_location = 0, stmt_len = 45}
(gdb) p *((Sort*)((PlannedStmt*)((List*)plantree_list)->head->data->ptr_value)->planTree)
$95 = {plan = {type = T_Sort, startup_cost = 74.230245295839836, total_cost = 76.112745295839829,
plan_rows = 753, plan_width = 8, parallel_aware = false, parallel_safe = true,
plan_node_id = 0, targetlist = 0x24604a0, qual = 0x0, lefttree = 0x245ff90, righttree = 0x0,
initPlan = 0x0, extParam = 0x0, allParam = 0x0}, numCols = 1, sortColIdx = 0x2460020,
sortOperators = 0x2460038, collations = 0x2460050, nullsFirst = 0x2460068}
(gdb) p *((SeqScan*)((Sort*)((PlannedStmt*)((List*)plantree_list)->head->data->ptr_value)->planTree)->plan->lefttree)
$98 = {plan = {type = T_SeqScan, startup_cost = 0, total_cost = 38.25, plan_rows = 753,
plan_width = 8, parallel_aware = false, parallel_safe = true, plan_node_id = 1,
targetlist = 0x245fea0, qual = 0x245ff60, lefttree = 0x0, righttree = 0x0, initPlan = 0x0,
extParam = 0x0, allParam = 0x0}, scanrelid = 1}
(gdb) p *((OpExpr*)((List*)((SeqScan*)((Sort*)((PlannedStmt*)((List*)plantree_list)->head->data->ptr_value)->planTree)->plan->lefttree)->plan->qual)->head->data->ptr_value)
$101 = {xpr = {type = T_OpExpr}, opno = 97, opfuncid = 66, opresulttype = 16, opretset = false,
opcollid = 0, inputcollid = 0, args = 0x245eb18, location = 29}
可得plantree结构为:
输出的执行计划为:
postgres=# explain select id,col from a where id<100 order by id;
QUERY PLAN
----------------------------------------------------------
Sort (cost=74.23..76.11 rows=753 width=8)
Sort Key: id
-> Seq Scan on a (cost=0.00..38.25 rows=753 width=8)
Filter: (id < 100)
(4 rows)
执行器根据执行计划执行语句,对于示例中的执行计划,执行器自下而上执行(先顺序扫描表a获取id<100的行,再对得到的行进行排序后输出结果)。