Skip to content

Commit 07e4c1b

Browse files
JIT: Cache start of cold section for reuse in 3-opt layout (#111365)
During cold code motion, if fgMoveColdBlocks finds a call-finally pair that begins with a cold block, it will try to move the entire pair, regardless of whether the tail is cold or not. If the tail isn't cold, 3-opt layout's detection of the last hot block may terminate early, causing the cold part of an EH region to also be considered for reordering. Because 3-opt expects each EH subregion being reordered to be contiguous, this split causes asserts. Caching the cold region start point in fgMoveColdBlocks means we don't need to find the first cold block in 3-opt, thus removing the potential of getting it wrong. While I was here, I decided to enhance fgMoveColdBlocks so that it can move cold call-finally pairs at the end of the hot main method body, and clean up 3-opt's initialization logic to reduce its allocation sizes. Fixes #111207.
1 parent 1dc213f commit 07e4c1b

1 file changed

Lines changed: 52 additions & 39 deletions

File tree

src/coreclr/jit/fgopt.cpp

Lines changed: 52 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -4828,7 +4828,7 @@ void Compiler::fgMoveColdBlocks()
48284828
}
48294829
};
48304830

4831-
BasicBlock* const lastMainBB = fgLastBBInMainFunction();
4831+
BasicBlock* lastMainBB = fgLastBBInMainFunction();
48324832
if (lastMainBB->IsFirst())
48334833
{
48344834
return;
@@ -4855,15 +4855,41 @@ void Compiler::fgMoveColdBlocks()
48554855

48564856
// We have moved all cold main blocks before lastMainBB to after lastMainBB.
48574857
// If lastMainBB itself is cold, move it to the end of the method to restore its relative ordering.
4858+
// But first, we can't move just the tail of a call-finally pair,
4859+
// so point lastMainBB to the pair's head, if necessary.
48584860
//
4859-
if (lastMainBB->isBBWeightCold(this) && !lastMainBB->isBBCallFinallyPairTail())
4861+
if (lastMainBB->isBBCallFinallyPairTail())
48604862
{
4863+
lastMainBB = lastMainBB->Prev();
4864+
}
4865+
4866+
BasicBlock* lastHotBB = nullptr;
4867+
if (lastMainBB->isBBWeightCold(this))
4868+
{
4869+
// lastMainBB is cold, so the block behind it (if there is one) is the last hot block
4870+
//
4871+
lastHotBB = lastMainBB->Prev();
4872+
4873+
// Move lastMainBB
4874+
//
48614875
BasicBlock* const newLastMainBB = fgLastBBInMainFunction();
48624876
if (lastMainBB != newLastMainBB)
48634877
{
48644878
moveBlock(lastMainBB, newLastMainBB);
48654879
}
48664880
}
4881+
else
4882+
{
4883+
// lastMainBB isn't cold, so it (or its call-finally pair tail) the last hot block
4884+
//
4885+
lastHotBB = lastMainBB->isBBCallFinallyPair() ? lastMainBB->Next() : lastMainBB;
4886+
}
4887+
4888+
// Save the beginning of the cold section for later.
4889+
// If lastHotBB is null, there isn't a hot section,
4890+
// so there's no point in differentiating between sections for layout purposes.
4891+
//
4892+
fgFirstColdBlock = (lastHotBB == nullptr) ? nullptr : lastHotBB->Next();
48674893
}
48684894

48694895
//-----------------------------------------------------------------------------
@@ -5136,10 +5162,8 @@ void Compiler::ThreeOptLayout::ConsiderEdge(FlowEdge* edge)
51365162
const unsigned srcPos = ordinals[srcBlk->bbPostorderNum];
51375163
const unsigned dstPos = ordinals[dstBlk->bbPostorderNum];
51385164

5139-
// Don't consider edges from outside the hot range.
5140-
// If 'srcBlk' has an ordinal of zero and it isn't the first block,
5141-
// it's not tracked by 'ordinals', so it's not in the hot section.
5142-
if ((srcPos == 0) && !srcBlk->IsFirst())
5165+
// Don't consider edges to or from outside the hot range (i.e. ordinal doesn't match 'blockOrder' position).
5166+
if ((srcBlk != blockOrder[srcPos]) || (dstBlk != blockOrder[dstPos]))
51435167
{
51445168
return;
51455169
}
@@ -5212,19 +5236,19 @@ void Compiler::ThreeOptLayout::AddNonFallthroughPreds(unsigned blockPos)
52125236
//
52135237
void Compiler::ThreeOptLayout::Run()
52145238
{
5215-
// Walk backwards through the main method body, looking for the last hot block.
52165239
// Since we moved all cold blocks to the end of the method already,
5217-
// we should have a span of hot blocks to consider reordering at the beginning of the method.
5218-
// While doing this, try to get as tight an upper bound for the number of hot blocks as possible.
5219-
// For methods without funclet regions, 'numBlocksUpperBound' is exact.
5220-
// Otherwise, it's off by the number of handler blocks.
5221-
BasicBlock* finalBlock;
5222-
unsigned numBlocksUpperBound = compiler->fgBBcount;
5223-
for (finalBlock = compiler->fgLastBBInMainFunction();
5224-
!finalBlock->IsFirst() && finalBlock->isBBWeightCold(compiler); finalBlock = finalBlock->Prev())
5225-
{
5226-
numBlocksUpperBound--;
5227-
}
5240+
// we should have a span of hot blocks to consider reordering at the beginning of the method
5241+
// (unless none of the blocks are cold relative to the rest of the method,
5242+
// in which case we will reorder the whole main method body).
5243+
BasicBlock* const finalBlock = (compiler->fgFirstColdBlock != nullptr) ? compiler->fgFirstColdBlock->Prev()
5244+
: compiler->fgLastBBInMainFunction();
5245+
5246+
// Reset cold section pointer, in case we decide to do hot/cold splitting later
5247+
compiler->fgFirstColdBlock = nullptr;
5248+
5249+
// We better have an end block for the hot section, and it better not be the start of a call-finally pair.
5250+
assert(finalBlock != nullptr);
5251+
assert(!finalBlock->isBBCallFinallyPair());
52285252

52295253
// For methods with fewer than three candidate blocks, we cannot partition anything
52305254
if (finalBlock->IsFirst() || finalBlock->Prev()->IsFirst())
@@ -5233,39 +5257,28 @@ void Compiler::ThreeOptLayout::Run()
52335257
return;
52345258
}
52355259

5236-
// If only the first block of a call-finally pair is hot, include the whole pair in the hot section anyway.
5237-
// This ensures the call-finally pair won't be split up when swapping partitions.
5238-
if (finalBlock->isBBCallFinallyPair())
5239-
{
5240-
finalBlock = finalBlock->Next();
5241-
numBlocksUpperBound++;
5242-
}
5243-
5260+
// Get an upper bound on the number of hot blocks without walking the whole block list.
5261+
// We will only consider blocks reachable via normal flow.
5262+
const unsigned numBlocksUpperBound = compiler->m_dfsTree->GetPostOrderCount();
52445263
assert(numBlocksUpperBound != 0);
5245-
blockOrder = new (compiler, CMK_BasicBlock) BasicBlock*[numBlocksUpperBound];
5246-
tempOrder = new (compiler, CMK_BasicBlock) BasicBlock*[numBlocksUpperBound];
5264+
blockOrder = new (compiler, CMK_BasicBlock) BasicBlock*[numBlocksUpperBound * 2];
5265+
tempOrder = (blockOrder + numBlocksUpperBound);
52475266

52485267
// Initialize the current block order.
52495268
// Note that we default-initialized 'ordinals' with zeros.
52505269
// Block reordering shouldn't change the method's entry point,
52515270
// so if a block has an ordinal of zero and it's not 'fgFirstBB',
5252-
// the block wasn't visited below, so it's not in the range of candidate blocks.
5253-
unsigned nextPostorderNum = compiler->m_dfsTree->GetPostOrderCount();
5271+
// the block wasn't visited below, meaning it's not in the range of candidate blocks.
52545272
for (BasicBlock* const block : compiler->Blocks(compiler->fgFirstBB, finalBlock))
52555273
{
5256-
assert(numCandidateBlocks < numBlocksUpperBound);
5257-
blockOrder[numCandidateBlocks] = tempOrder[numCandidateBlocks] = block;
5258-
5259-
// Unreachable blocks should have been pushed out of the candidate set of blocks.
5260-
// However, the entries of unreachable EH regions are left in-place to facilitate reestablishing contiguity,
5261-
// so it is possible for us to encounter unreachable blocks.
5262-
// When we do, assign them postorder numbers that can be used as keys into 'ordinals'.
52635274
if (!compiler->m_dfsTree->Contains(block))
52645275
{
5265-
assert(nextPostorderNum < compiler->fgBBcount);
5266-
block->bbPostorderNum = nextPostorderNum++;
5276+
continue;
52675277
}
52685278

5279+
assert(numCandidateBlocks < numBlocksUpperBound);
5280+
blockOrder[numCandidateBlocks] = tempOrder[numCandidateBlocks] = block;
5281+
52695282
assert(ordinals[block->bbPostorderNum] == 0);
52705283
ordinals[block->bbPostorderNum] = numCandidateBlocks++;
52715284

0 commit comments

Comments
 (0)