btr0btr.c: (72d15bcc) · Commits · Software / OSDI20 Artifacts / mariadb

innobase/btr/btr0btr.c

+29 −32

Original line number	Diff line number	Diff line
		@@ -76,9 +76,6 @@ make them consecutive on disk if possible. From the other file segment
		we allocate pages for the non-leaf levels of the tree.
		*/

		/* If this many inserts occur sequentially, it affects page split */
		#define BTR_PAGE_SEQ_INSERT_LIMIT 5

		/******************************************************************
		Creates a new index page to the tree (not the root, and also not
		used in page reorganization). */
		@@ -1086,18 +1083,18 @@ btr_page_get_split_rec_to_left(
		page = btr_cur_get_page(cursor);
		insert_point = btr_cur_get_rec(cursor);

		if ((page_header_get_ptr(page, PAGE_LAST_INSERT)
		== page_rec_get_next(insert_point))
		&& (page_header_get_field(page, PAGE_DIRECTION) == PAGE_LEFT)
		&& ((page_header_get_field(page, PAGE_N_DIRECTION)
		>= BTR_PAGE_SEQ_INSERT_LIMIT)
		\|\| (page_header_get_field(page, PAGE_N_DIRECTION) + 1
		>= page_get_n_recs(page)))) {
		if (page_header_get_ptr(page, PAGE_LAST_INSERT)
		== page_rec_get_next(insert_point)) {

		infimum = page_get_infimum_rec(page);

		if ((infimum != insert_point)
		&& (page_rec_get_next(infimum) != insert_point)) {
		/* If the convergence is in the middle of a page, include also
		the record immediately before the new insert to the upper
		page. Otherwise, we could repeatedly move from page to page
		lots of records smaller than the convergence point. */

		if (infimum != insert_point
		&& page_rec_get_next(infimum) != insert_point) {

		*split_rec = insert_point;
		} else {
		@@ -1131,29 +1128,29 @@ btr_page_get_split_rec_to_right(
		page = btr_cur_get_page(cursor);
		insert_point = btr_cur_get_rec(cursor);

		if ((page_header_get_ptr(page, PAGE_LAST_INSERT) == insert_point)
		&& (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)
		&& ((page_header_get_field(page, PAGE_N_DIRECTION)
		>= BTR_PAGE_SEQ_INSERT_LIMIT)
		\|\| (page_header_get_field(page, PAGE_N_DIRECTION) + 1
		>= page_get_n_recs(page)))) {
		/* We use eager heuristics: if the new insert would be right after
		the previous insert on the same page, we assume that there is a
		pattern of sequential inserts here. */

		if (page_header_get_ptr(page, PAGE_LAST_INSERT) == insert_point) {

		supremum = page_get_supremum_rec(page);

		if ((page_rec_get_next(insert_point) != supremum)
		&& (page_rec_get_next(page_rec_get_next(insert_point))
		!= supremum)
		&& (page_rec_get_next(page_rec_get_next(
		page_rec_get_next(insert_point)))
		!= supremum)) {
		if (page_rec_get_next(insert_point) != supremum
		&& page_rec_get_next(page_rec_get_next(insert_point))
		!= supremum) {

		/* If there are >= 3 user records up from the insert
		point, split all but 2 off */
		/* If there are >= 2 user records up from the insert
		point, split all but 1 off. We want to keep one because
		then sequential inserts can use the adaptive hash
		index, as they can do the necessary checks of the right
		search position just by looking at the records on this
		page. */

		*split_rec = page_rec_get_next(page_rec_get_next(
		page_rec_get_next(insert_point)));
		*split_rec = page_rec_get_next(
		page_rec_get_next(insert_point));
		} else {
		/* Else split at inserted record */
		/* Else split at the new record to insert */
		*split_rec = NULL;
		}

innobase/btr/btr0cur.c

+20 −4

Original line number	Diff line number	Diff line
		@@ -2671,10 +2671,11 @@ btr_estimate_number_of_different_key_vals(

		btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr);

		/* Count the number of different key values minus one
		for each prefix of the key on this index page: we subtract
		one because otherwise our algorithm would give a wrong
		estimate for an index where there is just one key value */
		/* Count the number of different key values for each prefix of
		the key on this index page. If the prefix does not determine
		the index record uniquely in te B-tree, then we subtract one
		because otherwise our algorithm would give a wrong estimate
		for an index where there is just one key value. */

		page = btr_cur_get_page(&cursor);

		@@ -2696,6 +2697,9 @@ btr_estimate_number_of_different_key_vals(
		&matched_bytes);

		for (j = matched_fields + 1; j <= n_cols; j++) {
		/* We add one if this index record has
		a different prefix from the previous */

		n_diff[j]++;
		}

		@@ -2705,6 +2709,18 @@ btr_estimate_number_of_different_key_vals(
		rec = page_rec_get_next(rec);
		}

		if (n_cols == dict_index_get_n_unique_in_tree(index)) {
		/* We add one because we know that the first record
		on the page certainly had a different prefix than the
		last record on the previous index page in the
		alphabetical order. Before this fix, if there was
		just one big record on each clustered index page, the
		algorithm grossly underestimated the number of rows
		in the table. */

		n_diff[n_cols]++;
		}

		total_external_size +=
		btr_rec_get_externally_stored_len(rec);
		mtr_commit(&mtr);