Skip to content

Dropping boxes with unsized tails generates a bunch of duplicate IR #152773

@scottmcm

Description

@scottmcm

Demo: https://rust.godbolt.org/z/no6Txcee5 (that's pre-optimization; LLVM of course cleans it up even in -O1)

Specifically, notice that in here it's calculating the size+align of the innards twice:

define internal void @<alloc[eaa61ef137c44e19]::boxed::Box<example[2f01af99ac6d4a46]::Foo<dyn core[b32a8ec1c97161b7]::fmt::Debug>> as core[b32a8ec1c97161b7]::ops::drop::Drop>::drop(ptr noalias noundef align 8 dereferenceable(16) %self) unnamed_addr {
start:
  %0 = alloca [8 x i8], align 8
  %1 = alloca [8 x i8], align 8
  %ptr.0 = load ptr, ptr %self, align 8
  %2 = getelementptr inbounds i8, ptr %self, i64 8
  %ptr.1 = load ptr, ptr %2, align 8
  %3 = icmp ne ptr %ptr.0, null
  call void @llvm.assume(i1 %3)
  %4 = icmp ne ptr %ptr.0, null
  call void @llvm.assume(i1 %4)
  call void @llvm.lifetime.start.p0(ptr %1)
  %5 = getelementptr inbounds i8, ptr %ptr.1, i64 8
  %6 = load i64, ptr %5, align 8
  %7 = getelementptr inbounds i8, ptr %ptr.1, i64 16
  %8 = load i64, ptr %7, align 8
  %9 = icmp ugt i64 4, %8
  %10 = select i1 %9, i64 4, i64 %8
  %11 = add i64 4, %6
  %12 = sub i64 %10, 1
  %13 = add i64 %11, %12
  %14 = sub i64 0, %10
  %15 = and i64 %13, %14
  store i64 %15, ptr %1, align 8
  %size = load i64, ptr %1, align 8
  call void @llvm.lifetime.end.p0(ptr %1)
  call void @llvm.lifetime.start.p0(ptr %0)
  %16 = getelementptr inbounds i8, ptr %ptr.1, i64 8
  %17 = load i64, ptr %16, align 8
  %18 = getelementptr inbounds i8, ptr %ptr.1, i64 16
  %19 = load i64, ptr %18, align 8
  %20 = icmp ugt i64 4, %19
  %21 = select i1 %20, i64 4, i64 %19
  %22 = add i64 4, %17
  %23 = sub i64 %21, 1
  %24 = add i64 %22, %23
  %25 = sub i64 0, %21
  %26 = and i64 %24, %25
  store i64 %21, ptr %0, align 8
  %align = load i64, ptr %0, align 8
  call void @llvm.lifetime.end.p0(ptr %0)
  br label %bb7

bb7:
  %27 = sub i64 %align, 1
  %28 = icmp ule i64 %27, 9223372036854775807
  call void @llvm.assume(i1 %28)
  br label %bb9

bb9:
  %29 = icmp eq i64 %size, 0
  br i1 %29, label %bb3, label %bb1

bb3:
  ret void

bb1:
  %_7 = getelementptr inbounds i8, ptr %self, i64 16
  %30 = icmp ne ptr %ptr.0, null
  call void @llvm.assume(i1 %30)
  call void @<alloc[eaa61ef137c44e19]::alloc::Global as core[b32a8ec1c97161b7]::alloc::Allocator>::deallocate(ptr noalias noundef nonnull readonly align 1 captures(address, read_provenance) %_7, ptr noundef nonnull %ptr.0, i64 noundef %align, i64 noundef %size) #7
  br label %bb3

bb6:
  unreachable

bb8:
  unreachable
}

Metadata

Metadata

Assignees

Labels

A-boxArea: Our favorite opsem complicationA-codegenArea: Code generationC-optimizationCategory: An issue highlighting optimization opportunities or PRs implementing suchI-heavyIssue: Problems and improvements with respect to binary size of generated code.T-compilerRelevant to the compiler team, which will review and decide on the PR/issue.

Type

No type
No fields configured for issues without a type.

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions