MySQL-Innodb-刷脏的具体执行

参数

innodb_lru_scan_depth
调用栈
批量刷脏会从lru和flush list中flush。
innodb_lru_scan_depth影响着lru的遍历深度，遍历到的才会执行buf_flush_ready_for_replace与buf_flush_ready_for_flush。
批量flush的时候会把lru中的非脏页刷掉吗?会的
如果bufferpool满的话，free的逻辑是咋样的？文章后面有答案。
会把未提交事务的脏页刷到硬盘上吗，比如大事务？
验证了一下会的，另外跟事务大小没关系
那么脏页刷新到硬盘，做了checkpoint的情况下，故障恢复是咋做的，先用redo，再用undo恢复。那么已经flush到硬盘上的如何恢复呢，因为undo是逻辑日志，所以可以恢复。
批量刷脏如何保证日志先行？

buf_flush_write_block_low
----log_write_up_to(bpage->newest_modification, true);

pc_flush_slot
----buf_flush_LRU_list
--------buf_flush_do_batch(BUF_FLUSH_LRU)
------------buf_flush_batch
----------------buf_do_LRU_batch
--------------------buf_free_from_unzip_LRU_list_batch
--------------------buf_flush_LRU_list_batch
------------------------buf_flush_ready_for_replace=>buf_LRU_free_page
------------------------buf_flush_ready_for_flush=>buf_flush_page_and_try_neighbors
----------------------------buf_flush_page_and_try_neighbors
--------------------------------buf_flush_try_neighbors
------------------------------------buf_flush_page
----------------------------------------buf_flush_write_block_low
----buf_flush_do_batch(BUF_FLUSH_LIST)
--------buf_flush_batch
------------buf_do_flush_list_batch
-----------------buf_flush_page_and_try_neighbors
---------------------buf_flush_try_neighbors
-------------------------buf_flush_page
-----------------------------buf_flush_write_block_low
---------------------------------buf_dblwr_add_to_batch
--------buf_flush_end
------------buf_dblwr_flush_buffered_writes

双写缓存,目前的理解是buf_dblwr_add_to_batch会把page写到dblwr中，如果dblwr没有满就返回，满了就触发同步写dblwr以及异步刷数据页。
有可能flush的页特别少，写不满dblwr，在buf_flush_end再调用buf_dblwr_flush_buffered_writes一次。

/********************************************************************//**
Posts a buffer page for writing. If the doublewrite memory buffer is
full, calls buf_dblwr_flush_buffered_writes and waits for for free
space to appear. */
void
buf_dblwr_add_to_batch(
/*====================*/
    buf_page_t* bpage)  /*!< in: buffer block to write */
{
    ut_a(buf_page_in_file(bpage));

try_again:
    mutex_enter(&buf_dblwr->mutex);

    ut_a(buf_dblwr->first_free <= srv_doublewrite_batch_size);

    if (buf_dblwr->batch_running) {

        /* This not nearly as bad as it looks. There is only
        page_cleaner thread which does background flushing
        in batches therefore it is unlikely to be a contention
        point. The only exception is when a user thread is
        forced to do a flush batch because of a sync
        checkpoint. */
        int64_t sig_count = os_event_reset(buf_dblwr->b_event);
        mutex_exit(&buf_dblwr->mutex);

        os_event_wait_low(buf_dblwr->b_event, sig_count);
        goto try_again;
    }

    if (buf_dblwr->first_free == srv_doublewrite_batch_size) {
        mutex_exit(&(buf_dblwr->mutex));

        buf_dblwr_flush_buffered_writes();

        goto try_again;
    }

    byte*   p = buf_dblwr->write_buf
        + univ_page_size.physical() * buf_dblwr->first_free;

    if (bpage->size.is_compressed()) {
        UNIV_MEM_ASSERT_RW(bpage->zip.data, bpage->size.physical());
        /* Copy the compressed page and clear the rest. */

        memcpy(p, bpage->zip.data, bpage->size.physical());

        memset(p + bpage->size.physical(), 0x0,
               univ_page_size.physical() - bpage->size.physical());
    } else {
        ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);

        UNIV_MEM_ASSERT_RW(((buf_block_t*) bpage)->frame,
                   bpage->size.logical());

        memcpy(p, ((buf_block_t*) bpage)->frame, bpage->size.logical());
    }

    buf_dblwr->buf_block_arr[buf_dblwr->first_free] = bpage;

    buf_dblwr->first_free++;
    buf_dblwr->b_reserved++;

    ut_ad(!buf_dblwr->batch_running);
    ut_ad(buf_dblwr->first_free == buf_dblwr->b_reserved);
    ut_ad(buf_dblwr->b_reserved <= srv_doublewrite_batch_size);

    if (buf_dblwr->first_free == srv_doublewrite_batch_size) {
        mutex_exit(&(buf_dblwr->mutex));

        buf_dblwr_flush_buffered_writes();

        return;
    }

    mutex_exit(&(buf_dblwr->mutex));
}

buf_dblwr_flush_buffered_writes

void
buf_dblwr_flush_buffered_writes(void)
/*=================================*/
{
    byte*       write_buf;
    ulint       first_free;
    ulint       len;

    if (!srv_use_doublewrite_buf || buf_dblwr == NULL) {
        /* Sync the writes to the disk. */
        buf_dblwr_sync_datafiles();
        return;
    }

    ut_ad(!srv_read_only_mode);

try_again:
    mutex_enter(&buf_dblwr->mutex);

    /* Write first to doublewrite buffer blocks. We use synchronous
    aio and thus know that file write has been completed when the
    control returns. */

    if (buf_dblwr->first_free == 0) {

        mutex_exit(&buf_dblwr->mutex);

        /* Wake possible simulated aio thread as there could be
        system temporary tablespace pages active for flushing.
        Note: system temporary tablespace pages are not scheduled
        for doublewrite. */
        os_aio_simulated_wake_handler_threads();

        return;
    }

    if (buf_dblwr->batch_running) {
        /* Another thread is running the batch right now. Wait
        for it to finish. */
        int64_t sig_count = os_event_reset(buf_dblwr->b_event);
        mutex_exit(&buf_dblwr->mutex);

        os_event_wait_low(buf_dblwr->b_event, sig_count);
        goto try_again;
    }

    ut_a(!buf_dblwr->batch_running);
    ut_ad(buf_dblwr->first_free == buf_dblwr->b_reserved);

    /* Disallow anyone else to post to doublewrite buffer or to
    start another batch of flushing. */
    buf_dblwr->batch_running = true;
    first_free = buf_dblwr->first_free;

    /* Now safe to release the mutex. Note that though no other
    thread is allowed to post to the doublewrite batch flushing
    but any threads working on single page flushes are allowed
    to proceed. */
    mutex_exit(&buf_dblwr->mutex);

    write_buf = buf_dblwr->write_buf;

    for (ulint len2 = 0, i = 0;
         i < buf_dblwr->first_free;
         len2 += UNIV_PAGE_SIZE, i++) {

        const buf_block_t*  block;

        block = (buf_block_t*) buf_dblwr->buf_block_arr[i];

        if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE
            || block->page.zip.data) {
            /* No simple validate for compressed
            pages exists. */
            continue;
        }

        /* Check that the actual page in the buffer pool is
        not corrupt and the LSN values are sane. */
        buf_dblwr_check_block(block);

        /* Check that the page as written to the doublewrite
        buffer has sane LSN values. */
        buf_dblwr_check_page_lsn(write_buf + len2);
    }

    /* Write out the first block of the doublewrite buffer */
    len = ut_min(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE,
             buf_dblwr->first_free) * UNIV_PAGE_SIZE;

    fil_io(IORequestWrite, true,
           page_id_t(TRX_SYS_SPACE, buf_dblwr->block1), univ_page_size,
           0, len, (void*) write_buf, NULL);

    if (buf_dblwr->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
        /* No unwritten pages in the second block. */
        goto flush;
    }

    /* Write out the second block of the doublewrite buffer. */
    len = (buf_dblwr->first_free - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
           * UNIV_PAGE_SIZE;

    write_buf = buf_dblwr->write_buf
            + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;

    fil_io(IORequestWrite, true,
           page_id_t(TRX_SYS_SPACE, buf_dblwr->block2), univ_page_size,
           0, len, (void*) write_buf, NULL);

flush:
    /* increment the doublewrite flushed pages counter */
    srv_stats.dblwr_pages_written.add(buf_dblwr->first_free);
    srv_stats.dblwr_writes.inc();

    /* Now flush the doublewrite buffer data to disk */
    fil_flush(TRX_SYS_SPACE);

    /* We know that the writes have been flushed to disk now
    and in recovery we will find them in the doublewrite buffer
    blocks. Next do the writes to the intended positions. */

    /* Up to this point first_free and buf_dblwr->first_free are
    same because we have set the buf_dblwr->batch_running flag
    disallowing any other thread to post any request but we
    can't safely access buf_dblwr->first_free in the loop below.
    This is so because it is possible that after we are done with
    the last iteration and before we terminate the loop, the batch
    gets finished in the IO helper thread and another thread posts
    a new batch setting buf_dblwr->first_free to a higher value.
    If this happens and we are using buf_dblwr->first_free in the
    loop termination condition then we'll end up dispatching
    the same block twice from two different threads. */
    ut_ad(first_free == buf_dblwr->first_free);
    for (ulint i = 0; i < first_free; i++) {
        buf_dblwr_write_block_to_datafile(
            buf_dblwr->buf_block_arr[i], false);
    }

    /* Wake possible simulated aio thread to actually post the
    writes to the operating system. We don't flush the files
    at this point. We leave it to the IO helper thread to flush
    datafiles when the whole batch has been processed. */
    os_aio_simulated_wake_handler_threads();
}

答：如果bufferpool满的话，free的逻辑是咋样的？
看效果图，一直维持在1024。

Snipaste_2020-10-16_00-14-47.png

具体的代码，在buf_flush_LRU_list_batch函数里

image.png

https://developer.aliyun.com/article/41038