[CodeGen][NPM] Port PostRAMachineSinking to NPM#138497
Closed
optimisan wants to merge 1 commit into
Closed
Conversation
This was referenced May 5, 2025
Contributor
Author
|
Warning This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
bd51fd9 to
7b10c2c
Compare
742f16c to
72ba54e
Compare
Member
|
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-backend-amdgpu Author: Akshat Oke (optimisan) ChangesPatch is 22.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/138497.diff 9 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/MachineSink.h b/llvm/include/llvm/CodeGen/MachineSink.h
index 71bd7229b7598..eb9548dc82250 100644
--- a/llvm/include/llvm/CodeGen/MachineSink.h
+++ b/llvm/include/llvm/CodeGen/MachineSink.h
@@ -26,5 +26,16 @@ class MachineSinkingPass : public PassInfoMixin<MachineSinkingPass> {
function_ref<StringRef(StringRef)> MapClassName2PassName);
};
+class PostRAMachineSinkingPass
+ : public PassInfoMixin<PostRAMachineSinkingPass> {
+public:
+ PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &);
+
+ MachineFunctionProperties getRequiredProperties() const {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+};
+
} // namespace llvm
#endif // LLVM_CODEGEN_MACHINESINK_H
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 07dc86c6fccf2..e75f9c7a2cfe8 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -241,7 +241,7 @@ void initializePostDominatorTreeWrapperPassPass(PassRegistry &);
void initializePostInlineEntryExitInstrumenterPass(PassRegistry &);
void initializePostMachineSchedulerLegacyPass(PassRegistry &);
void initializePostRAHazardRecognizerLegacyPass(PassRegistry &);
-void initializePostRAMachineSinkingPass(PassRegistry &);
+void initializePostRAMachineSinkingLegacyPass(PassRegistry &);
void initializePostRASchedulerLegacyPass(PassRegistry &);
void initializePreISelIntrinsicLoweringLegacyPassPass(PassRegistry &);
void initializePrintFunctionPassWrapperPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 436b26852ce90..c6c00e8f25882 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -164,6 +164,7 @@ MACHINE_FUNCTION_PASS("phi-node-elimination", PHIEliminationPass())
MACHINE_FUNCTION_PASS("post-RA-hazard-rec", PostRAHazardRecognizerPass())
MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass(TM))
MACHINE_FUNCTION_PASS("postmisched", PostMachineSchedulerPass(TM))
+MACHINE_FUNCTION_PASS("postra-machine-sink", PostRAMachineSinkingPass())
MACHINE_FUNCTION_PASS("post-ra-pseudos", ExpandPostRAPseudosPass())
MACHINE_FUNCTION_PASS("print", PrintMIRPass())
MACHINE_FUNCTION_PASS("print<livedebugvars>", LiveDebugVariablesPrinterPass(errs()))
@@ -315,7 +316,6 @@ DUMMY_MACHINE_FUNCTION_PASS("static-data-splitter", StaticDataSplitter)
DUMMY_MACHINE_FUNCTION_PASS("machine-function-splitter", MachineFunctionSplitterPass)
DUMMY_MACHINE_FUNCTION_PASS("machineinstr-printer", MachineFunctionPrinterPass)
DUMMY_MACHINE_FUNCTION_PASS("mirfs-discriminators", MIRAddFSDiscriminatorsPass)
-DUMMY_MACHINE_FUNCTION_PASS("postra-machine-sink", PostRAMachineSinkingPass)
DUMMY_MACHINE_FUNCTION_PASS("processimpdefs", ProcessImplicitDefsPass)
DUMMY_MACHINE_FUNCTION_PASS("prologepilog-code", PrologEpilogCodeInserterPass)
DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass)
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index aa3591cb6be58..065fd4704ccfb 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -107,7 +107,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializePeepholeOptimizerLegacyPass(Registry);
initializePostMachineSchedulerLegacyPass(Registry);
initializePostRAHazardRecognizerLegacyPass(Registry);
- initializePostRAMachineSinkingPass(Registry);
+ initializePostRAMachineSinkingLegacyPass(Registry);
initializePostRASchedulerLegacyPass(Registry);
initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
initializeProcessImplicitDefsPass(Registry);
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index aa2987b6710a3..be1a3ac125c65 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -2068,12 +2068,12 @@ void MachineSinking::SalvageUnsunkDebugUsersOfCopy(
//===----------------------------------------------------------------------===//
namespace {
-class PostRAMachineSinking : public MachineFunctionPass {
+class PostRAMachineSinkingLegacy : public MachineFunctionPass {
public:
bool runOnMachineFunction(MachineFunction &MF) override;
static char ID;
- PostRAMachineSinking() : MachineFunctionPass(ID) {}
+ PostRAMachineSinkingLegacy() : MachineFunctionPass(ID) {}
StringRef getPassName() const override { return "PostRA Machine Sink"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -2085,6 +2085,11 @@ class PostRAMachineSinking : public MachineFunctionPass {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
}
+};
+
+class PostRAMachineSinking {
+public:
+ bool run(MachineFunction &MF);
private:
/// Track which register units have been modified and used.
@@ -2103,10 +2108,10 @@ class PostRAMachineSinking : public MachineFunctionPass {
};
} // namespace
-char PostRAMachineSinking::ID = 0;
-char &llvm::PostRAMachineSinkingID = PostRAMachineSinking::ID;
+char PostRAMachineSinkingLegacy::ID = 0;
+char &llvm::PostRAMachineSinkingID = PostRAMachineSinkingLegacy::ID;
-INITIALIZE_PASS(PostRAMachineSinking, "postra-machine-sink",
+INITIALIZE_PASS(PostRAMachineSinkingLegacy, "postra-machine-sink",
"PostRA Machine Sink", false, false)
static bool aliasWithRegsInLiveIn(MachineBasicBlock &MBB, Register Reg,
@@ -2355,10 +2360,24 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
return Changed;
}
-bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) {
+bool PostRAMachineSinkingLegacy::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
+ return PostRAMachineSinking().run(MF);
+}
+
+PreservedAnalyses
+PostRAMachineSinkingPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ MFPropsModifier _(*this, MF);
+ if (!PostRAMachineSinking().run(MF))
+ return PreservedAnalyses::all();
+ auto PA = getMachineFunctionPassPreservedAnalyses();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+bool PostRAMachineSinking::run(MachineFunction &MF) {
bool Changed = false;
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
diff --git a/llvm/test/CodeGen/AArch64/bisect-post-ra-machine-sink.mir b/llvm/test/CodeGen/AArch64/bisect-post-ra-machine-sink.mir
index 15c1ccb0e609b..8c9bb88eb75c3 100644
--- a/llvm/test/CodeGen/AArch64/bisect-post-ra-machine-sink.mir
+++ b/llvm/test/CodeGen/AArch64/bisect-post-ra-machine-sink.mir
@@ -1,5 +1,6 @@
# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=postra-machine-sink -verify-machineinstrs -o - %s | FileCheck -check-prefix=RUN-POSTRA %s
# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=postra-machine-sink -opt-bisect-limit=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=BISECT-NO-RUN-POSTRA %s
+# RUN: llc -mtriple=aarch64-none-linux-gnu -passes=postra-machine-sink -opt-bisect-limit=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=BISECT-NO-RUN-POSTRA %s
---
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
index 468e4208c510a..232d5b86ac035 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -9,9 +9,9 @@
; GCN-O0: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument<post-inline>,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate<machine-function-info>))
-; GCN-O2: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,ExpandReductionsPass,early-cse<>,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,dead-mi-elimination,init-undef,ProcessImplicitDefsPass,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate<machine-function-info>))
+; GCN-O2: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,ExpandReductionsPass,early-cse<>,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,dead-mi-elimination,init-undef,ProcessImplicitDefsPass,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate<machine-function-info>))
-; GCN-O3: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,ExpandReductionsPass,gvn<>,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,dead-mi-elimination,init-undef,ProcessImplicitDefsPass,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate<machine-function-info>))
+; GCN-O3: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,ExpandReductionsPass,gvn<>,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,dead-mi-elimination,init-undef,ProcessImplicitDefsPass,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-add...
[truncated]
|
arsenm
approved these changes
May 5, 2025
7b10c2c to
c7a0a3b
Compare
72ba54e to
15c1c3d
Compare
This was referenced May 6, 2025
c7a0a3b to
ff19035
Compare
15c1c3d to
41492e4
Compare
This was referenced May 12, 2025
ff19035 to
c644259
Compare
41492e4 to
aac1ecb
Compare
c644259 to
6ca0744
Compare
6ca0744 to
961d9a4
Compare
aac1ecb to
f39a691
Compare
Contributor
|
handled via #129690 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.

No description provided.