Skip to content

[CodeGen][NPM] Support CodeGenSCCOrder in pipeline#136818

Merged
optimisan merged 7 commits into
mainfrom
users/optimisan/pb/codegenscc-order
Jul 9, 2025
Merged

[CodeGen][NPM] Support CodeGenSCCOrder in pipeline#136818
optimisan merged 7 commits into
mainfrom
users/optimisan/pb/codegenscc-order

Conversation

@optimisan

@optimisan optimisan commented Apr 23, 2025

Copy link
Copy Markdown
Contributor

Wrap passes into Post order CGSCC pass manager in codegen pass builder.

I am adding the pipeline test in this but it is not yet complete.

@optimisan optimisan changed the title [CodeGen][NPM] Support CodeGenSCCOrder in pipeline pb/codegenscc-order [CodeGen][NPM] Support CodeGenSCCOrder in pipeline Apr 23, 2025
@optimisan optimisan marked this pull request as ready for review April 23, 2025 06:41
@llvmbot

llvmbot commented Apr 23, 2025

Copy link
Copy Markdown
Member

@llvm/pr-subscribers-backend-amdgpu

Author: Akshat Oke (optimisan)

Changes

Wrap passes into Post order CGSCC pass manager in codegen pass builder.


Full diff: https://github.com/llvm/llvm-project/pull/136818.diff

3 Files Affected:

  • (modified) llvm/include/llvm/Passes/CodeGenPassBuilder.h (+73-16)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+2)
  • (added) llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll (+144)
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 61e377de0c424..ce67cdb6eb8ff 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CGSCCPassManager.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/ScopedNoAliasAA.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
@@ -207,10 +208,7 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
   class AddIRPass {
   public:
     AddIRPass(ModulePassManager &MPM, const DerivedT &PB) : MPM(MPM), PB(PB) {}
-    ~AddIRPass() {
-      if (!FPM.isEmpty())
-        MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
-    }
+    ~AddIRPass() { flushFPMToMPM(); }
 
     template <typename PassT>
     void operator()(PassT &&Pass, StringRef Name = PassT::name()) {
@@ -228,16 +226,40 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
         FPM.addPass(std::forward<PassT>(Pass));
       } else {
         // Add Module Pass
-        if (!FPM.isEmpty()) {
-          MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
-          FPM = FunctionPassManager();
-        }
-
+        flushFPMToMPM();
         MPM.addPass(std::forward<PassT>(Pass));
       }
     }
 
+    /// Setting this will add passes to the CGSCC pass manager.
+    void requireCGSCCOrder() {
+      if (PB.AddInCGSCCOrder)
+        return;
+      flushFPMToMPM();
+      PB.AddInCGSCCOrder = true;
+    }
+
+    /// Stop adding passes to the CGSCC pass manager.
+    /// Existing passes won't be removed.
+    void stopAddingInCGSCCOrder() {
+      if (!PB.AddInCGSCCOrder)
+        return;
+      flushFPMToMPM();
+      PB.AddInCGSCCOrder = false;
+    }
+
   private:
+    void flushFPMToMPM() {
+      if (!FPM.isEmpty()) {
+        if (PB.AddInCGSCCOrder) {
+          MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
+              createCGSCCToFunctionPassAdaptor(std::move(FPM))));
+        } else {
+          MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+        }
+        FPM = FunctionPassManager();
+      }
+    }
     ModulePassManager &MPM;
     FunctionPassManager FPM;
     const DerivedT &PB;
@@ -254,7 +276,11 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
         FPM.addPass(
             createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)));
         FPM.addPass(InvalidateAnalysisPass<MachineFunctionAnalysis>());
-        MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+        if (this->PB.AddInCGSCCOrder) {
+          MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
+              createCGSCCToFunctionPassAdaptor(std::move(FPM))));
+        } else
+          MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
       }
     }
 
@@ -273,12 +299,7 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
         MFPM.addPass(std::forward<PassT>(Pass));
       } else {
         // Add Module Pass
-        if (!MFPM.isEmpty()) {
-          MPM.addPass(createModuleToFunctionPassAdaptor(
-              createFunctionToMachineFunctionPassAdaptor(std::move(MFPM))));
-          MFPM = MachineFunctionPassManager();
-        }
-
+        flushMFPMToMPM();
         MPM.addPass(std::forward<PassT>(Pass));
       }
 
@@ -286,7 +307,39 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
         C(Name, MFPM);
     }
 
+    /// Setting this will add passes to the CGSCC pass manager.
+    void requireCGSCCOrder() {
+      if (PB.AddInCGSCCOrder)
+        return;
+      flushMFPMToMPM();
+      PB.AddInCGSCCOrder = true;
+    }
+
+    /// Stop adding passes to the CGSCC pass manager.
+    /// Existing passes won't be removed.
+    void stopAddingInCGSCCOrder() {
+      if (!PB.AddInCGSCCOrder)
+        return;
+      flushMFPMToMPM();
+      PB.AddInCGSCCOrder = false;
+    }
+
   private:
+    void flushMFPMToMPM() {
+      if (!MFPM.isEmpty()) {
+        if (PB.AddInCGSCCOrder) {
+          MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
+              createCGSCCToFunctionPassAdaptor(
+                  createFunctionToMachineFunctionPassAdaptor(
+                      std::move(MFPM)))));
+        } else {
+          MPM.addPass(createModuleToFunctionPassAdaptor(
+              createFunctionToMachineFunctionPassAdaptor(std::move(MFPM))));
+        }
+        MFPM = MachineFunctionPassManager();
+      }
+    }
+
     ModulePassManager &MPM;
     MachineFunctionPassManager MFPM;
     const DerivedT &PB;
@@ -552,6 +605,7 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
   /// Helper variable for `-start-before/-start-after/-stop-before/-stop-after`
   mutable bool Started = true;
   mutable bool Stopped = true;
+  mutable bool AddInCGSCCOrder = false;
 };
 
 template <typename Derived, typename TargetMachineT>
@@ -810,6 +864,9 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addISelPrepare(
     AddIRPass &addPass) const {
   derived().addPreISel(addPass);
 
+  if (Opt.RequiresCodeGenSCCOrder)
+    addPass.requireCGSCCOrder();
+
   addPass(CallBrPreparePass());
   // Add both the safe stack and the stack protection passes: each of them will
   // only protect functions that have corresponding attributes.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index b6cc5137d711a..c2b89fd8188f8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -2067,6 +2067,8 @@ void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const {
   // being run on them, which causes crashes in the resource usage analysis).
   addPass(AMDGPULowerBufferFatPointersPass(TM));
 
+  addPass.requireCGSCCOrder();
+
   Base::addCodeGenPrepare(addPass);
 
   if (isPassEnabled(EnableLoadStoreVectorizer))
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
new file mode 100644
index 0000000000000..96a533a19c88a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -0,0 +1,144 @@
+; UNSUPPORTED: expensive_checks
+; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -disable-verify -print-pipeline-passes < %s 2>&1 \
+; RUN:   | tr ',' '\n' | FileCheck -check-prefix=GCN-O3 %s
+
+; REQUIRES: asserts
+
+; GCN-O3: require<MachineModuleAnalysis>
+; GCN-O3-NEXT: require<profile-summary>
+; GCN-O3-NEXT: require<collector-metadata>
+; GCN-O3-NEXT: pre-isel-intrinsic-lowering
+; GCN-O3-NEXT: function(expand-large-div-rem
+; GCN-O3-NEXT: expand-fp)
+; GCN-O3-NEXT: amdgpu-remove-incompatible-functions
+; GCN-O3-NEXT: amdgpu-printf-runtime-binding
+; GCN-O3-NEXT: amdgpu-lower-ctor-dtor
+; GCN-O3-NEXT: function(amdgpu-image-intrinsic-opt)
+; GCN-O3-NEXT: expand-variadics
+; GCN-O3-NEXT: amdgpu-always-inline
+; GCN-O3-NEXT: always-inline
+; GCN-O3-NEXT: amdgpu-export-kernel-runtime-handles
+; GCN-O3-NEXT: amdgpu-sw-lower-lds
+; GCN-O3-NEXT: amdgpu-lower-module-lds
+; GCN-O3-NEXT: function(infer-address-spaces
+; GCN-O3-NEXT: amdgpu-atomic-optimizer
+; GCN-O3-NEXT: atomic-expand
+; GCN-O3-NEXT: amdgpu-promote-alloca
+; GCN-O3-NEXT: separate-const-offset-from-gep<>
+; GCN-O3-NEXT: slsr
+; GCN-O3-NEXT: gvn<>
+; GCN-O3-NEXT: nary-reassociate
+; GCN-O3-NEXT: early-cse<>
+; GCN-O3-NEXT: amdgpu-codegenprepare
+; GCN-O3-NEXT: loop-mssa(loop-reduce)
+; GCN-O3-NEXT: mergeicmps
+; GCN-O3-NEXT: expand-memcmp
+; GCN-O3-NEXT: gc-lowering
+; GCN-O3-NEXT: lower-constant-intrinsics
+; GCN-O3-NEXT: UnreachableBlockElimPass
+; GCN-O3-NEXT: consthoist
+; GCN-O3-NEXT: ReplaceWithVeclib
+; GCN-O3-NEXT: partially-inline-libcalls
+; GCN-O3-NEXT: ee-instrument<post-inline>
+; GCN-O3-NEXT: scalarize-masked-mem-intrin
+; GCN-O3-NEXT: ExpandReductionsPass
+; GCN-O3-NEXT: gvn<>
+; GCN-O3-NEXT: amdgpu-lower-kernel-arguments)
+; GCN-O3-NEXT: amdgpu-lower-buffer-fat-pointers
+; GCN-O3-NEXT: cgscc(function(codegenprepare
+; GCN-O3-NEXT: load-store-vectorizer
+; GCN-O3-NEXT: lower-switch
+; GCN-O3-NEXT: lower-invoke
+; GCN-O3-NEXT: UnreachableBlockElimPass
+; GCN-O3-NEXT: flatten-cfg
+; GCN-O3-NEXT: sink
+; GCN-O3-NEXT: amdgpu-late-codegenprepare
+; GCN-O3-NEXT: amdgpu-unify-divergent-exit-nodes
+; GCN-O3-NEXT: fix-irreducible
+; GCN-O3-NEXT: unify-loop-exits
+; GCN-O3-NEXT: StructurizeCFGPass
+; GCN-O3-NEXT: amdgpu-annotate-uniform
+; GCN-O3-NEXT: si-annotate-control-flow
+; GCN-O3-NEXT: amdgpu-rewrite-undef-for-phi
+; GCN-O3-NEXT: lcssa))
+; GCN-O3-NEXT: amdgpu-perf-hint
+; GCN-O3-NEXT: cgscc(function(require<uniformity>
+; GCN-O3-NEXT: callbr-prepare
+; GCN-O3-NEXT: safe-stack
+; GCN-O3-NEXT: stack-protector))
+; GCN-O3-NEXT: cgscc(function(machine-function(amdgpu-isel
+; GCN-O3-NEXT: si-fix-sgpr-copies
+; GCN-O3-NEXT: si-i1-copies
+; GCN-O3-NEXT: finalize-isel
+; GCN-O3-NEXT: early-tailduplication
+; GCN-O3-NEXT: opt-phis
+; GCN-O3-NEXT: stack-coloring
+; GCN-O3-NEXT: localstackalloc
+; GCN-O3-NEXT: dead-mi-elimination
+; GCN-O3-NEXT: early-machinelicm
+; GCN-O3-NEXT: machine-cse
+; GCN-O3-NEXT: machine-sink
+; GCN-O3-NEXT: peephole-opt
+; GCN-O3-NEXT: dead-mi-elimination
+; GCN-O3-NEXT: si-fold-operands
+; GCN-O3-NEXT: gcn-dpp-combine
+; GCN-O3-NEXT: si-load-store-opt
+; GCN-O3-NEXT: si-peephole-sdwa
+; GCN-O3-NEXT: early-machinelicm
+; GCN-O3-NEXT: machine-cse
+; GCN-O3-NEXT: si-fold-operands
+; GCN-O3-NEXT: dead-mi-elimination
+; GCN-O3-NEXT: si-shrink-instructions
+; GCN-O3-NEXT: detect-dead-lanes
+; GCN-O3-NEXT: InitUndefPass
+; GCN-O3-NEXT: ProcessImplicitDefsPass
+; GCN-O3-NEXT: unreachable-mbb-elimination
+; GCN-O3-NEXT: require<live-vars>
+; GCN-O3-NEXT: require<machine-loops>
+; GCN-O3-NEXT: phi-node-elimination
+; GCN-O3-NEXT: two-address-instruction
+; GCN-O3-NEXT: register-coalescer
+; GCN-O3-NEXT: rename-independent-subregs
+; GCN-O3-NEXT: machine-scheduler
+; GCN-O3-NEXT: greedy<all>
+; GCN-O3-NEXT: amdgpu-nsa-reassign
+; GCN-O3-NEXT: VirtRegRewriterPass
+; GCN-O3-NEXT: stack-slot-coloring
+; GCN-O3-NEXT: machine-cp
+; GCN-O3-NEXT: machinelicm
+; GCN-O3-NEXT: si-fix-vgpr-copies
+; GCN-O3-NEXT: si-optimize-exec-masking
+; GCN-O3-NEXT: remove-redundant-debug-values
+; GCN-O3-NEXT: fixup-statepoint-caller-saved
+; GCN-O3-NEXT: PostRAMachineSinkingPass
+; GCN-O3-NEXT: ShrinkWrapPass
+; GCN-O3-NEXT: PrologEpilogInserterPass
+; GCN-O3-NEXT: branch-folder
+; GCN-O3-NEXT: tailduplication
+; GCN-O3-NEXT: machine-latecleanup
+; GCN-O3-NEXT: machine-cp
+; GCN-O3-NEXT: post-ra-pseudos
+; GCN-O3-NEXT: postmisched
+; GCN-O3-NEXT: block-placement
+; GCN-O3-NEXT: fentry-insert
+; GCN-O3-NEXT: xray-instrumentation
+; GCN-O3-NEXT: patchable-function
+; GCN-O3-NEXT: gcn-create-vopd
+; GCN-O3-NEXT: si-memory-legalizer
+; GCN-O3-NEXT: si-insert-waitcnts
+; GCN-O3-NEXT: si-late-branch-lowering
+; GCN-O3-NEXT: si-pre-emit-peephole
+; GCN-O3-NEXT: post-RA-hazard-rec
+; GCN-O3-NEXT: AMDGPUWaitSGPRHazardsPass
+; GCN-O3-NEXT: amdgpu-insert-delay-alu
+; GCN-O3-NEXT: branch-relaxation
+; GCN-O3-NEXT: remove-loads-into-fake-uses
+; GCN-O3-NEXT: live-debug-values
+; GCN-O3-NEXT: machine-sanmd
+; GCN-O3-NEXT: stack-frame-layout)
+; GCN-O3-NEXT: invalidate<machine-function-info>))
+
+
+define void @empty() {
+  ret void
+}

@arsenm arsenm requested a review from aeubanks April 23, 2025 08:17
Comment thread llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll Outdated
Comment thread llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll Outdated
Comment thread llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll Outdated
Comment thread llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll Outdated
Comment thread llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll Outdated
Comment thread llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll Outdated
Comment thread llvm/include/llvm/Passes/CodeGenPassBuilder.h Outdated
optimisan added 4 commits July 7, 2025 05:42
pipeline is printed on a single line, so having CHECK lines on separate
lines can allow extra characters in between (and will not error out on
extra passes being in the pipeline)
@optimisan optimisan force-pushed the users/optimisan/pb/codegenscc-order branch from a5389e1 to 032da65 Compare July 8, 2025 06:26

@cdevadas cdevadas left a comment

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@optimisan optimisan merged commit f786916 into main Jul 9, 2025
7 checks passed
@optimisan optimisan deleted the users/optimisan/pb/codegenscc-order branch July 9, 2025 06:02
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants