Part 6 - The Cursor Abstraction

Part 5 - Persistence to Disk

Part 7 - Introduction to the B-Tree

This should be a shorter part than the last one. We’re just going to refactor a bit to make it easier to start the B-Tree implementation.

We’re going to add a Cursor object which represents a location in the table. Things you might want to do with cursors:

  • Create a cursor at the beginning of the table
  • Create a cursor at the end of the table
  • Access the row the cursor is pointing to
  • Advance the cursor to the next row
    Those are the behaviors we’re going to implement now. Later, we will also want to:

  • Delete the row pointed to by a cursor

  • Modify the row pointed to by a cursor
  • Search a table for a given ID, and create a cursor pointing to the row with that ID
    Without further ado, here’s the Cursor type:
  1. +struct Cursor_t {
  2. + Table* table;
  3. + uint32_t row_num;
  4. + bool end_of_table; // Indicates a position one past the last element
  5. +};
  6. +typedef struct Cursor_t Cursor;

Given our current table data structure, all you need to identify a location in a table is the row number.

A cursor also has a reference to the table it’s part of (so our cursor functions can take just the cursor as a parameter).

Finally, it has a boolean called end_of_table. This is so we can represent a position past the end of the table (which is somewhere we may want to insert a row).

table_start() and table_end() create new cursors:

  1. +Cursor* table_start(Table* table) {
  2. + Cursor* cursor = malloc(sizeof(Cursor));
  3. + cursor->table = table;
  4. + cursor->row_num = 0;
  5. + cursor->end_of_table = (table->num_rows == 0);
  6. +
  7. + return cursor;
  8. +}
  9. +
  10. +Cursor* table_end(Table* table) {
  11. + Cursor* cursor = malloc(sizeof(Cursor));
  12. + cursor->table = table;
  13. + cursor->row_num = table->num_rows;
  14. + cursor->end_of_table = true;
  15. +
  16. + return cursor;
  17. +}

Our row_slot() function will become cursor_value(), which returns a pointer to the position described by the cursor:

  1. -void* row_slot(Table* table, uint32_t row_num) {
  2. +void* cursor_value(Cursor* cursor) {
  3. + uint32_t row_num = cursor->row_num;
  4. uint32_t page_num = row_num / ROWS_PER_PAGE;
  5. - void* page = get_page(table->pager, page_num);
  6. + void* page = get_page(cursor->table->pager, page_num);
  7. uint32_t row_offset = row_num % ROWS_PER_PAGE;
  8. uint32_t byte_offset = row_offset * ROW_SIZE;
  9. return page + byte_offset;
  10. }

Advancing the cursor in our current table structure is as simple as incrementing the row number. This will be a bit more complicated in a B-tree.

  1. +void cursor_advance(Cursor* cursor) {
  2. + cursor->row_num += 1;
  3. + if (cursor->row_num >= cursor->table->num_rows) {
  4. + cursor->end_of_table = true;
  5. + }
  6. +}

Finally we can change our “virtual machine” methods to use the cursor abstraction. When inserting a row, we open a cursor at the end of table, write to that cursor location, then close the cursor.

  1. Row* row_to_insert = &(statement->row_to_insert);
  2. + Cursor* cursor = table_end(table);
  3. - serialize_row(row_to_insert, row_slot(table, table->num_rows));
  4. + serialize_row(row_to_insert, cursor_value(cursor));
  5. table->num_rows += 1;
  6. + free(cursor);
  7. +
  8. return EXECUTE_SUCCESS;
  9. }

When selecting all rows in the table, we open a cursor at the start of the table, print the row, then advance the cursor to the next row. Repeat until we’ve reached the end of the table.

  1. ExecuteResult execute_select(Statement* statement, Table* table) {
  2. + Cursor* cursor = table_start(table);
  3. +
  4. Row row;
  5. - for (uint32_t i = 0; i < table->num_rows; i++) {
  6. - deserialize_row(row_slot(table, i), &row);
  7. + while (!(cursor->end_of_table)) {
  8. + deserialize_row(cursor_value(cursor), &row);
  9. print_row(&row);
  10. + cursor_advance(cursor);
  11. }
  12. +
  13. + free(cursor);
  14. +
  15. return EXECUTE_SUCCESS;
  16. }

Alright, that’s it! Like I said, this was a shorter refactor that should help us as we rewrite our table data structure into a B-Tree. execute_select() and execute_insert() can interact with the table entirely through the cursor without assuming anything about how the table is stored.

Here’s the complete diff to this part:

  1. };
  2. typedef struct Table_t Table;
  3. +struct Cursor_t {
  4. + Table* table;
  5. + uint32_t row_num;
  6. + bool end_of_table; // Indicates a position one past the last element
  7. +};
  8. +typedef struct Cursor_t Cursor;
  9. +
  10. void print_row(Row* row) {
  11. printf("(%d, %s, %s)\n", row->id, row->username, row->email);
  12. }
  13. @@ -125,14 +132,40 @@ void* get_page(Pager* pager, uint32_t page_num) {
  14. return pager->pages[page_num];
  15. }
  16. -void* row_slot(Table* table, uint32_t row_num) {
  17. +Cursor* table_start(Table* table) {
  18. + Cursor* cursor = malloc(sizeof(Cursor));
  19. + cursor->table = table;
  20. + cursor->row_num = 0;
  21. + cursor->end_of_table = (table->num_rows == 0);
  22. +
  23. + return cursor;
  24. +}
  25. +
  26. +Cursor* table_end(Table* table) {
  27. + Cursor* cursor = malloc(sizeof(Cursor));
  28. + cursor->table = table;
  29. + cursor->row_num = table->num_rows;
  30. + cursor->end_of_table = true;
  31. +
  32. + return cursor;
  33. +}
  34. +
  35. +void* cursor_value(Cursor* cursor) {
  36. + uint32_t row_num = cursor->row_num;
  37. uint32_t page_num = row_num / ROWS_PER_PAGE;
  38. - void* page = get_page(table->pager, page_num);
  39. + void* page = get_page(cursor->table->pager, page_num);
  40. uint32_t row_offset = row_num % ROWS_PER_PAGE;
  41. uint32_t byte_offset = row_offset * ROW_SIZE;
  42. return page + byte_offset;
  43. }
  44. +void cursor_advance(Cursor* cursor) {
  45. + cursor->row_num += 1;
  46. + if (cursor->row_num >= cursor->table->num_rows) {
  47. + cursor->end_of_table = true;
  48. + }
  49. +}
  50. +
  51. Pager* pager_open(const char* filename) {
  52. int fd = open(filename,
  53. O_RDWR | // Read/Write mode
  54. @@ -315,19 +348,28 @@ ExecuteResult execute_insert(Statement* statement, Table* table) {
  55. }
  56. Row* row_to_insert = &(statement->row_to_insert);
  57. + Cursor* cursor = table_end(table);
  58. - serialize_row(row_to_insert, row_slot(table, table->num_rows));
  59. + serialize_row(row_to_insert, cursor_value(cursor));
  60. table->num_rows += 1;
  61. + free(cursor);
  62. +
  63. return EXECUTE_SUCCESS;
  64. }
  65. ExecuteResult execute_select(Statement* statement, Table* table) {
  66. + Cursor* cursor = table_start(table);
  67. +
  68. Row row;
  69. - for (uint32_t i = 0; i < table->num_rows; i++) {
  70. - deserialize_row(row_slot(table, i), &row);
  71. + while (!(cursor->end_of_table)) {
  72. + deserialize_row(cursor_value(cursor), &row);
  73. print_row(&row);
  74. + cursor_advance(cursor);
  75. }
  76. +
  77. + free(cursor);
  78. +
  79. return EXECUTE_SUCCESS;
  80. }

Part 5 - Persistence to Disk

Part 7 - Introduction to the B-Tree

原文: https://cstack.github.io/db_tutorial/parts/part6.html