MySQL从8.0.13开始支持functional index。Functional index类似于ORACLE的Function-Based Indexes。该索引可以根据将索引定义的表达式的值按照索引顺序存到索引里,进而减少表达式的计算,加速查询。

    下面我们看一下如何创建一个functional index:

    1. CREATE TABLE t1 (col1 INT, col2 INT, INDEX func_index ((ABS(col1))));
    2. CREATE INDEX idx1 ON t1 ((col1 + col2));
    3. CREATE INDEX idx2 ON t1 ((col1 + col2), (col1 - col2), col1);
    4. ALTER TABLE t1 ADD INDEX ((col1 * 40) DESC);

    接下来我们继续看一下functional index的效果:

    1. mysql> CREATE TABLE t1 (col1 INT, col2 INT);
    2. Query OK, 0 rows affected (0.13 sec)
    3. mysql> SELECT * FROM t1 WHERE col1+col2 > 10;
    4. Empty set (0.01 sec)
    5. mysql> EXPLAIN SELECT * FROM t1 WHERE col1+col2 > 10;
    6. +----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-------------+
    7. | id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
    8. +----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-------------+
    9. | 1 | SIMPLE | t1 | NULL | ALL | NULL | NULL | NULL | NULL | 1 | 100.00 | Using where |
    10. +----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-------------+
    11. 1 row in set, 1 warning (0.01 sec)
    12. mysql> CREATE INDEX idx1 ON t1 ((col1 + col2));
    13. Query OK, 0 rows affected (0.14 sec)
    14. Records: 0 Duplicates: 0 Warnings: 0
    15. mysql> EXPLAIN SELECT * FROM t1 WHERE col1+col2 > 10;
    16. +----+-------------+-------+------------+-------+---------------+------+---------+------+------+----------+-------------+
    17. | id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
    18. +----+-------------+-------+------------+-------+---------------+------+---------+------+------+----------+-------------+
    19. | 1 | SIMPLE | t1 | NULL | range | idx1 | idx1 | 9 | NULL | 1 | 100.00 | Using where |
    20. +----+-------------+-------+------------+-------+---------------+------+---------+------+------+----------+-------------+
    21. 1 row in set, 1 warning (0.00 sec)

    从上面的例子中我们可以看到查询中使用了functional索引 idx1来加速查询。

    MySQL的functinal index是利用generated column来辅助实现的,后面的章节中我们会详细的进行分析。所以对于创建functional index的一些限制可以参考:创建generated column 以及增加generated column

    下面我们从源码来看一下MySQL functional index的实现过程。

    create_index流程

    上面的流程图是MySQL创建functional index的一个基本流程。我们重点看一下add_functional_index_to_create_list这个函数的处理过程。

    1. /**
    2. Prepares a functional index by adding a hidden indexed generated column for the key part.
    3. A functional index is implemented as a hidden generated column over the
    4. expression specified in the index, and the hidden generated column is then indexed. This function adds a hidden generated column to the Create_list, and updates the key specification to point to this new column. The generated column is given a name that is a hash of the key name and the key part number.
    5. */
    6. static bool add_functional_index_to_create_list(THD *thd,
    7. Key_spec *key_spec,
    8. Alter_info *alter_info,
    9. Key_part_spec *kp,
    10. uint key_part_number,
    11. HA_CREATE_INFO *create_info) {
    12. // A functional index cannot be a primary key
    13. /* 这里限制了functional index 不能作为主键,因为它是个generated column */
    14. if (key_spec->type == KEYTYPE_PRIMARY) {
    15. my_error(ER_FUNCTIONAL_INDEX_PRIMARY_KEY, MYF(0));
    16. return true;
    17. }
    18. // If the key isn't given a name explicitly by the user, we must auto-generate
    19. // a name here. "Normal" indexes will be given a name in prepare_key(), but
    20. // that is too late for functional indexes since we want the hidden generated
    21. // column name to be based on the index name.
    22. // 生成一个默认的索引名称
    23. if (key_spec->name.str == nullptr) {
    24. std::string key_name;
    25. int count = 2;
    26. key_name.assign("functional_index");
    27. while (key_name_exists(alter_info->key_list, key_name, nullptr)) {
    28. key_name.assign("functional_index_");
    29. key_name.append(std::to_string(count++));
    30. }
    31. key_spec->name.length = key_name.size();
    32. key_spec->name.str = strmake_root(thd->stmt_arena->mem_root,
    33. key_name.c_str(), key_name.size());
    34. } else { if (key_name_exists(alter_info->key_list,
    35. {key_spec->name.str, key_spec->name.length},
    36. key_spec)) {
    37. my_error(ER_DUP_KEYNAME, MYF(0), key_spec->name.str);
    38. return true;
    39. }
    40. }
    41. // First we need to resolve the expression in the functional index so that we
    42. // know the correct collation, data type, length etc...
    43. ulong saved_privilege = thd->want_privilege;
    44. thd->want_privilege = SELECT_ACL;
    45. {
    46. // Create a scope guard so that we are guaranteed that the privileges are
    47. // set back to the original value.
    48. auto handler_guard = create_scope_guard(
    49. [thd, saved_privilege]() { thd->want_privilege = saved_privilege; });
    50. Functional_index_error_handler error_handler(
    51. {key_spec->name.str, key_spec->name.length}, thd);
    52. Item *expr = kp->get_expression();
    53. if (expr->type() == Item::FIELD_ITEM) {
    54. my_error(ER_FUNCTIONAL_INDEX_ON_FIELD, MYF(0));
    55. return true;
    56. }
    57. // 这里验证表达式的合法性,是否违反generated column的约束条件
    58. if (pre_validate_value_generator_expr(kp->get_expression(),
    59. key_spec->name.str, true)) {
    60. return true;
    61. }
    62. Replace_field_processor_arg replace_field_argument(
    63. thd, &alter_info->create_list, create_info, key_spec->name.str);
    64. if (expr->walk(&Item::replace_field_processor, Item::WALK_PREFIX,
    65. reinterpret_cast<uchar *>(&replace_field_argument))) {
    66. return true;
    67. }
    68. if (kp->resolve_expression(thd)) return true;
    69. }
    70. // 默认隐式列生成一个名字
    71. const char *field_name = make_functional_index_column_name(
    72. {key_spec->name.str, key_spec->name.length}, key_part_number,
    73. thd->stmt_arena->mem_root);
    74. Item *item = kp->get_expression();
    75. // Ensure that we aren't trying to index a field
    76. DBUG_ASSERT(item->type() != Item::FIELD_ITEM); TABLE tmp_table;
    77. TABLE_SHARE share;
    78. tmp_table.s = &share;
    79. init_tmp_table_share(thd, &share, "", 0, "", "", nullptr);
    80. tmp_table.s->db_create_options = 0;
    81. tmp_table.s->db_low_byte_first = false;
    82. tmp_table.set_not_started();
    83. // 生成generated column的创建信息
    84. Create_field *cr = generate_create_field(thd, item, &tmp_table);
    85. if (cr == nullptr) {
    86. return true; /* purecov: deadcode */
    87. }
    88. if (is_blob(cr->sql_type)) {
    89. my_error(ER_FUNCTIONAL_INDEX_ON_LOB, MYF(0));
    90. return true;
    91. }
    92. cr->field_name = field_name;
    93. cr->field = nullptr;
    94. cr->hidden = dd::Column::enum_hidden_type::HT_HIDDEN_SQL;
    95. cr->stored_in_db = false;
    96. Value_generator *gcol_info = new (thd->mem_root) Value_generator();
    97. gcol_info->expr_item = kp->get_expression();
    98. // 生成一个virtual generated column
    99. gcol_info->set_field_stored(false);
    100. gcol_info->set_field_type(cr->sql_type); cr->gcol_info = gcol_info;
    101. alter_info->create_list.push_back(cr);
    102. alter_info->flags |= Alter_info::ALTER_ADD_COLUMN;
    103. // 这里将KEY的索引列设置为隐式generated column
    104. kp->set_name_and_prefix_length(field_name, 0);
    105. return false;
    106. }

    函数的注释里面说的非常详细,functional index的创建过程依赖于generated column来做辅助。创建functional index的时候都要隐式的创建一个generated column,然后在该generated column上创建对应的索引。

    上面我们看到了源码中是如何创建一个functional index。那么接下来我们继续看一下MySQL是如何为查询寻找合适的functional index的。

    就拿上面的例子看一下调用堆栈:

    1. EXPLAIN SELECT * FROM t1 WHERE col1+col2 > 10;
    2. #0 substitute_gc (thd=0x2aab94000be0, select_lex=0x2aab94270298, where_cond=0x2aab94271ec8, group_list=0x0, order=0x0)
    3. #1 0x0000000003049283 in JOIN::optimize (this=0x2aab94272750)
    4. #2 0x0000000003165c32 in SELECT_LEX::optimize (this=0x2aab94270298, thd=0x2aab94000be0)
    5. #3 0x000000000316221c in Sql_cmd_dml::execute_inner (this=0x2aab94272078, thd=0x2aab94000be0)
    6. #4 0x00000000031614d3 in Sql_cmd_dml::execute (this=0x2aab94272078, thd=0x2aab94000be0)
    7. #5 0x00000000030a7396 in mysql_execute_command (thd=0x2aab94000be0, first_level=true)
    8. #6 0x00000000030ac74b in mysql_parse (thd=0x2aab94000be0, parser_state=0x2aab8c2462d0, force_primary_storage_engine=false)
    9. #7 0x0000000003095b0d in dispatch_command (thd=0x2aab94000be0, com_data=0x2aab8c246c40, command=COM_QUERY)
    10. #8 0x0000000003091d7d in do_command (thd=0x2aab94000be0)
    11. #9 0x00000000033d145b in handle_connection (arg=0xcb9cee0)
    12. #10 0x00000000066cd007 in pfs_spawn_thread (arg=0xca3bde0)
    13. #11 0x00002aaaaacd4aa1 in start_thread () from /lib64/libpthread.so.0
    14. #12 0x00002aaaabfb993d in clone () from /lib64/libc.so.6

    上面的堆栈可以看到优化器调用了substitute_gc这个函数,这个函数就可以将WHERE,GROUP_BY 以及ORDER BY中的相关表达式替换为隐式的generated column,进而可以让优化器来选择functional index。我们再来研究一下substitute_gc这个函数的源码。

    1. bool substitute_gc(THD *thd, SELECT_LEX *select_lex, Item *where_cond,
    2. ORDER *group_list, ORDER *order) {
    3. List<Field> indexed_gc;
    4. Opt_trace_context *const trace = &thd->opt_trace;
    5. Opt_trace_object trace_wrapper(trace);
    6. Opt_trace_object subst_gc(trace, "substitute_generated_columns");
    7. // Collect all GCs that are a part of a key
    8. // 这里要遍历所有的表来收集所有可以被替换的generated columns。后面的代码中会分析哪些表达式可以被替换
    9. for (TABLE_LIST *tl = select_lex->leaf_tables; tl; tl = tl->next_leaf) {
    10. if (tl->table->s->keys == 0) continue;
    11. for (uint i = 0; i < tl->table->s->fields; i++) {
    12. Field *fld = tl->table->field[i];
    13. // 这里判断只有在索引中的列并且generated column可以用来替换表达式才会作为候选的列。
    14. if (fld->is_gcol() &&
    15. !(fld->part_of_key.is_clear_all() &&
    16. fld->part_of_prefixkey.is_clear_all()) &&
    17. fld->gcol_info->expr_item->can_be_substituted_for_gc()) {
    18. // Don't check allowed keys here as conditions/group/order use
    19. // different keymaps for that.
    20. indexed_gc.push_back(fld);
    21. }
    22. }
    23. } // No GC in the tables used in the query
    24. if (indexed_gc.elements == 0) return false;
    25. if (where_cond) {
    26. // Item_func::compile will dereference this pointer, provide valid value.
    27. uchar i, *dummy = &i;
    28. /**
    29. 这里会利用generated column来替换where_cond里面对应的表达式。
    30. Item::gc_subst_analyzer 该虚函数定义了每一种Item是否需要进行generated column的替换过程
    31. Item::gc_subst_transformer 该函数定义了每一种可替换的Item如何利用generated column进行替换
    32. */
    33. where_cond->compile(&Item::gc_subst_analyzer, &dummy,
    34. &Item::gc_subst_transformer,
    35. (uchar *)&indexed_gc);
    36. subst_gc.add("resulting_condition", where_cond);
    37. }
    38. if (!(group_list || order)) return false;
    39. // Filter out GCs that do not have index usable for GROUP/ORDER
    40. Field *gc;
    41. List_iterator<Field> li(indexed_gc);
    42. while ((gc = li++)) {
    43. Key_map tkm = gc->part_of_key;
    44. // 这里判断generated column相关的索引是否与group-by 或者 order-by的列有交集,如果没有相关性,就忽略。
    45. tkm.intersect(group_list ? gc->table->keys_in_use_for_group_by
    46. : gc->table->keys_in_use_for_order_by);
    47. if (tkm.is_clear_all()) li.remove();
    48. }
    49. if (!indexed_gc.elements) return false;
    50. // Index could be used for ORDER only if there is no GROUP
    51. ORDER *list = group_list ? group_list : order;
    52. bool changed = false;
    53. for (ORDER *ord = list; ord; ord = ord->next) {
    54. li.rewind();
    55. // 这里判断group-by或者order-by的列是否是表达式或者函数来进行generated column替换。
    56. if (!(*ord->item)->can_be_substituted_for_gc()) continue;
    57. while ((gc = li++)) {
    58. Item_func *tmp = pointer_cast<Item_func *>(*ord->item);
    59. Item_field *field;
    60. // 这里会根据表达式与generated column->gcol_info->expr_item进行比较来获取匹配的generated column
    61. if ((field = get_gc_for_expr(&tmp, gc, gc->result_type()))) {
    62. changed = true;
    63. /* Add new field to field list. */
    64. ord->item = select_lex->add_hidden_item(field);
    65. break;
    66. }
    67. }
    68. }
    69. if (changed && trace->is_started()) {
    70. String str;
    71. SELECT_LEX::print_order(
    72. &str, list,
    73. enum_query_type(QT_TO_SYSTEM_CHARSET | QT_SHOW_SELECT_NUMBER |
    74. QT_NO_DEFAULT_DB));
    75. subst_gc.add_utf8(group_list ? "resulting_GROUP_BY" : "resulting_ORDER_BY",
    76. str.ptr(), str.length());
    77. }
    78. return changed;
    79. }

    综上所述,本篇文章主要从源码层面对MySQL 8.0 实现的Functional index进行了一下简要的分析。Functional index主要依赖于generated column,利用内部隐式的创建一个generated column来辅助创建functional index。代码层面也比较容易理解,希望该篇文章能够帮助广大读者了解MySQL functional index的实现原理。