Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-22110

InnoDB unnecessarily writes unmodified pages

    XMLWordPrintable

    Details

      Description

      At least since a change in MySQL 5.5.33, InnoDB appears to be invoking buf_flush_note_modification() on pages that were exclusively latched but not modified in a mini-transaction. I encountered this problem while debugging MDEV-22097 (which appears to be a separate problem).

      The unnecessary page writes should at least be hurting performance.

      Initially, I was suspecting that the writes might break crash recovery as well, but it looks like that it is not the case. When a page is added to the flush list and eventually written out, the FIL_PAGE_LSN field will be updated to the commit LSN of the mini-transaction (to the value of log_sys.lsn at the end of the mini-transaction).

      1. Let us assume that the latest real modification to the page was at LSN 10000.
      2. Then, a mini-transaction (LSN 15000) X-latches but does not modify the page.
      3. The page will be scheduled to be flushed with FIL_PAGE_LSN 15000. (Earlier, it might have been written with FIL_PAGE_LSN 10000 already.)
      4. The server is killed.
      5. Recovery will scan logs for the page since the latest checkpoint C.
      6. If C>=10000, there will be no log to apply for the page.
      7. If C<10000, we will find some records for the page, and we may have to read the page (but could also skip it thanks to MDEV-12699).
      8. If we read the page, then, if FIL_PAGE_LSN is at least 10000, we will not apply any log records. Only if neither the ‘extra’ page flush nor the necessary page flush took place, we will apply the log records.

      Here is a work-in-progress patch to expose the problem on 10.5:

      diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc
      index e77df7c61d5..12bbc1a61a9 100644
      --- a/storage/innobase/mtr/mtr0mtr.cc
      +++ b/storage/innobase/mtr/mtr0mtr.cc
      @@ -294,50 +294,56 @@ struct DebugCheck {
       };
       #endif
       
      -/** Release a resource acquired by the mini-transaction. */
      -struct ReleaseBlocks {
      -	/** Release specific object */
      -	ReleaseBlocks(lsn_t start_lsn, lsn_t end_lsn)
      -		:
      -		m_end_lsn(end_lsn),
      -		m_start_lsn(start_lsn)
      -	{
      -		/* Do nothing */
      -	}
      -
      -	/** Add the modified page to the buffer flush list. */
      -	void add_dirty_page_to_flush_list(mtr_memo_slot_t* slot) const
      -	{
      -		ut_ad(m_end_lsn > 0);
      -		ut_ad(m_start_lsn > 0);
      -
      -		buf_block_t*	block;
      -
      -		block = reinterpret_cast<buf_block_t*>(slot->object);
      -
      -		buf_flush_note_modification(block, m_start_lsn, m_end_lsn);
      -	}
      -
      -	/** @return true always. */
      -	bool operator()(mtr_memo_slot_t* slot) const
      -	{
      -		if (slot->object != NULL) {
      -
      -			if (slot->type == MTR_MEMO_PAGE_X_FIX
      -			    || slot->type == MTR_MEMO_PAGE_SX_FIX) {
      -
      -				add_dirty_page_to_flush_list(slot);
      -			}
      -		}
      +/** Release page latches held by the mini-transaction. */
      +struct ReleaseBlocks
      +{
      +  const lsn_t start, end;
      +#ifdef UNIV_DEBUG
      +  const mtr_buf_t &memo;
       
      -		return(true);
      -	}
      +  ReleaseBlocks(lsn_t start, lsn_t end, const mtr_buf_t &memo) :
      +    start(start), end(end), memo(memo)
      +#else /* UNIV_DEBUG */
      +  ReleaseBlocks(lsn_t start, lsn_t end, const mtr_buf_t&) :
      +    start(start), end(end)
      +#endif /* UNIV_DEBUG */
      +  {
      +    ut_ad(start);
      +    ut_ad(end);
      +  }
       
      -	/** Mini-transaction REDO start LSN */
      -	lsn_t		m_end_lsn;
      +  /** @return true always */
      +  bool operator()(mtr_memo_slot_t* slot) const
      +  {
      +    if (!slot->object)
      +      return true;
      +    switch (slot->type) {
      +    case MTR_MEMO_PAGE_X_FIX:
      +    case MTR_MEMO_PAGE_SX_FIX:
      +      break;
      +    default:
      +      return true;
      +    }
       
      -	/** Mini-transaction REDO end LSN */
      -	lsn_t		m_start_lsn;
      +    buf_block_t *block= static_cast<buf_block_t*>(slot->object);
      +#ifdef UNIV_DEBUG
      +    /* We add to the flush list only blocks for which redo log records
      +    have been written, or blocks that have really been modified. */
      +    extern my_bool opt_bootstrap;
      +    if (opt_bootstrap && block->page.id == page_id_t(0, FSP_TRX_SYS_PAGE_NO))
      +      /* FIXME: On bootstrap, buf_dblwr_create() is X-latching the TRX_SYS page
      +      even though it is not writing redo log for it. */;
      +    else if (block->page.id.space() == SRV_TMP_SPACE_ID)
      +      /* modify() is not invoked on pages of temporary tablespaces */;
      +    else
      +    {
      +      Iterate<FindPage> iteration(FindPage(block->frame, MTR_MEMO_MODIFY));
      +      ut_ad(!memo.for_each_block_in_reverse(iteration));
      +    }
      +#endif /* UNIV_DEBUG */
      +    buf_flush_note_modification(block, start, end);
      +    return true;
      +  }
       };
       
       /** Write the block contents to the REDO log */
      @@ -414,7 +420,8 @@ void mtr_t::commit()
           log_mutex_exit();
       
           m_memo.for_each_block_in_reverse(CIterate<const ReleaseBlocks>
      -                                     (ReleaseBlocks(start_lsn, m_commit_lsn)));
      +                                     (ReleaseBlocks(start_lsn, m_commit_lsn,
      +                                                    m_memo)));
           if (m_made_dirty)
             log_flush_order_mutex_exit();
       
      

      We might want to edit the slot->type when the X-latched or SX-latched page is actually modified for the first time, so that ReleaseBlocks will be able to skip the unnecessary call. Instead of traversing the mtr_t::m_memo, it might be more efficient to use a flag in buf_block_t for this purpose.

        Attachments

          Issue Links

            Activity

              People

              Assignee:
              marko Marko Mäkelä
              Reporter:
              marko Marko Mäkelä
              Votes:
              0 Vote for this issue
              Watchers:
              4 Start watching this issue

                Dates

                Created:
                Updated:
                Resolved: