Unroll StringBuilder.Append for const string#85894
Conversation
|
Very nice use-case! One problem that it might slightly regress the case when it's not inlined and Mono. Another option is to tune inliner - if it sees |
|
this seems to be enough to make it inlineable for const input (it gives inliner extra hint) |
|
You are right we need to keep small size optimization, I figured it was only there for constant separators/newline, and I didn't think about Mono.
That's a cool trick! It seem to make it inline it pretty aggressively unfortunately, even when the input is not constant. It causes the code size to increase a lot where its not really necessary.
What's funny is that if I use the byte variant of It seems to hit a pretty good spot now where it inlines for a constant length, but not otherwise. I know very little about how the jit works, so I'm out of my league here - but is it possible an even better solution is to somehow make both variants of Buffer.Memmove have the same inlining "weight"? It feels a bit odd that they behave differently here. |
|
With current code: public static void Example(StringBuilder builder, string s)
{
builder.Append(s);
builder.Append("1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN");
builder.AppendLine();
}; Method Program:Example(System.Text.StringBuilder,System.String)
G_M17123_IG01: ;; offset=0000H
push rsi
sub rsp, 32
vzeroupper
mov rsi, rcx
;; size=11 bbWeight=1 PerfScore 2.50
G_M17123_IG02: ;; offset=000BH
cmp byte ptr [rsi], sil
test rdx, rdx
je SHORT G_M17123_IG04
;; size=8 bbWeight=1 PerfScore 4.25
G_M17123_IG03: ;; offset=0013H
mov r8d, dword ptr [rdx+08H]
add rdx, 12
mov rcx, rsi
call [System.Text.StringBuilder:Append(byref,int):this]
;; size=17 bbWeight=0.50 PerfScore 2.75
G_M17123_IG04: ;; offset=0024H
mov rdx, 0x1EB802028D8 ; '1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN'
add rdx, 12
mov rcx, gword ptr [rsi+08H]
mov r8d, dword ptr [rsi+18H]
lea eax, [r8+32H]
cmp dword ptr [rcx+08H], eax
jb SHORT G_M17123_IG06
;; size=31 bbWeight=1 PerfScore 9.00
G_M17123_IG05: ;; offset=0043H
movsxd rax, r8d
lea rcx, bword ptr [rcx+2*rax+10H]
vmovdqu ymm0, ymmword ptr [rdx]
vmovdqu ymm1, ymmword ptr [rdx+20H]
vmovdqu ymm2, ymmword ptr [rdx+40H]
vmovdqu xmm3, xmmword ptr [rdx+54H]
vmovdqu ymmword ptr [rcx], ymm0
vmovdqu ymmword ptr [rcx+20H], ymm1
vmovdqu ymmword ptr [rcx+40H], ymm2
vmovdqu xmmword ptr [rcx+54H], xmm3
add r8d, 50
mov dword ptr [rsi+18H], r8d
jmp SHORT G_M17123_IG07
;; size=56 bbWeight=0.50 PerfScore 15.75
G_M17123_IG06: ;; offset=007BH
mov rcx, rsi
mov r8d, 50
call [System.Text.StringBuilder:AppendWithExpansion(byref,int):this]
;; size=15 bbWeight=0.50 PerfScore 1.75
G_M17123_IG07: ;; offset=008AH
mov rdx, 0x1EB80202958 ; ' '
add rdx, 12
mov rcx, gword ptr [rsi+08H]
mov r8d, dword ptr [rsi+18H]
lea eax, [r8+02H]
cmp dword ptr [rcx+08H], eax
jb SHORT G_M17123_IG09
;; size=31 bbWeight=1 PerfScore 9.00
G_M17123_IG08: ;; offset=00A9H
movsxd rdx, r8d
lea rcx, bword ptr [rcx+2*rdx+10H]
mov word ptr [rcx], 13
mov word ptr [rcx+02H], 10
mov dword ptr [rsi+18H], eax
jmp SHORT G_M17123_IG10
;; size=24 bbWeight=0.50 PerfScore 3.12
G_M17123_IG09: ;; offset=00C1H
mov rcx, rsi
mov r8d, 2
call [System.Text.StringBuilder:AppendWithExpansion(byref,int):this]
;; size=15 bbWeight=0.50 PerfScore 1.75
G_M17123_IG10: ;; offset=00D0H
nop
;; size=1 bbWeight=1 PerfScore 0.25
G_M17123_IG11: ;; offset=00D1H
add rsp, 32
pop rsi
ret
;; size=6 bbWeight=1 PerfScore 1.75
; Total bytes of code: 215 |
Works perfectly for this scenario, thanks! I'll post some examples here. I assume you will want to take it from here and close this? Append non-constpublic static void Append(StringBuilder builder, string s)
{
builder.Append(s);
}Before ; Method Program:Append(System.Text.StringBuilder,System.String)
G_M43621_IG01: ;; offset=0000H
sub rsp, 40
;; size=4 bbWeight=1 PerfScore 0.25
G_M43621_IG02: ;; offset=0004H
cmp byte ptr [rcx], cl
test rdx, rdx
je SHORT G_M43621_IG04
;; size=7 bbWeight=1 PerfScore 4.25
G_M43621_IG03: ;; offset=000BH
mov r8d, dword ptr [rdx+08H]
add rdx, 12
call [System.Text.StringBuilder:Append(byref,int):this]
;; size=14 bbWeight=0.49 PerfScore 2.57
G_M43621_IG04: ;; offset=0019H
nop
;; size=1 bbWeight=1 PerfScore 0.25
G_M43621_IG05: ;; offset=001AH
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 31After ; Method Program:Append(System.Text.StringBuilder,System.String)
G_M43621_IG01: ;; offset=0000H
sub rsp, 40
;; size=4 bbWeight=1 PerfScore 0.25
G_M43621_IG02: ;; offset=0004H
cmp byte ptr [rcx], cl
test rdx, rdx
je SHORT G_M43621_IG04
;; size=7 bbWeight=1 PerfScore 4.25
G_M43621_IG03: ;; offset=000BH
mov r8d, dword ptr [rdx+08H]
add rdx, 12
call [System.Text.StringBuilder:Append(byref,int):this]
;; size=14 bbWeight=0.50 PerfScore 2.62
G_M43621_IG04: ;; offset=0019H
nop
;; size=1 bbWeight=1 PerfScore 0.25
G_M43621_IG05: ;; offset=001AH
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 31Append by index non-constpublic static void AppendIndex(StringBuilder builder, string s)
{
builder.Append(s, 2, 16);
}Before ; Method Program:AppendIndex(System.Text.StringBuilder,System.String)
G_M19771_IG01: ;; offset=0000H
sub rsp, 40
;; size=4 bbWeight=1 PerfScore 0.25
G_M19771_IG02: ;; offset=0004H
mov r8d, 2
mov r9d, 16
cmp dword ptr [rcx], ecx
call [System.Text.StringBuilder:Append(System.String,int,int):System.Text.StringBuilder:this]
nop
;; size=21 bbWeight=1 PerfScore 6.75
G_M19771_IG03: ;; offset=0019H
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 30After ; Method Program:AppendIndex(System.Text.StringBuilder,System.String)
G_M19771_IG01: ;; offset=0000H
sub rsp, 40
;; size=4 bbWeight=1 PerfScore 0.25
G_M19771_IG02: ;; offset=0004H
mov r8d, 2
mov r9d, 16
cmp dword ptr [rcx], ecx
call [System.Text.StringBuilder:Append(System.String,int,int):System.Text.StringBuilder:this]
nop
;; size=21 bbWeight=1 PerfScore 6.75
G_M19771_IG03: ;; offset=0019H
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 30AppendLinepublic static void AppendLine(StringBuilder builder, string s)
{
builder.AppendLine(s);
}Before ; Method Program:AppendLine(System.Text.StringBuilder,System.String)
G_M48267_IG01: ;; offset=0000H
push rsi
sub rsp, 32
mov rsi, rcx
;; size=8 bbWeight=1 PerfScore 1.50
G_M48267_IG02: ;; offset=0008H
cmp byte ptr [rsi], sil
test rdx, rdx
je SHORT G_M48267_IG04
;; size=8 bbWeight=1 PerfScore 4.25
G_M48267_IG03: ;; offset=0010H
mov r8d, dword ptr [rdx+08H]
add rdx, 12
mov rcx, rsi
call [System.Text.StringBuilder:Append(byref,int):this]
;; size=17 bbWeight=0.49 PerfScore 2.70
G_M48267_IG04: ;; offset=0021H
mov rdx, 0x283002028D8 ; ' '
add rdx, 12
mov rcx, rsi
mov r8d, 2
call [System.Text.StringBuilder:Append(byref,int):this]
nop
;; size=30 bbWeight=1 PerfScore 4.25
G_M48267_IG05: ;; offset=003FH
add rsp, 32
pop rsi
ret
;; size=6 bbWeight=1 PerfScore 1.75
; Total bytes of code: 69After ; Method Program:AppendLine(System.Text.StringBuilder,System.String)
G_M48267_IG01: ;; offset=0000H
push rsi
sub rsp, 32
mov rsi, rcx
;; size=8 bbWeight=1 PerfScore 1.50
G_M48267_IG02: ;; offset=0008H
cmp byte ptr [rsi], sil
test rdx, rdx
je SHORT G_M48267_IG04
;; size=8 bbWeight=1 PerfScore 4.25
G_M48267_IG03: ;; offset=0010H
mov r8d, dword ptr [rdx+08H]
add rdx, 12
mov rcx, rsi
call [System.Text.StringBuilder:Append(byref,int):this]
;; size=17 bbWeight=0.50 PerfScore 2.75
G_M48267_IG04: ;; offset=0021H
mov rdx, 0x207003028D8 ; ' '
add rdx, 12
mov rcx, gword ptr [rsi+08H]
mov r8d, dword ptr [rsi+18H]
lea eax, [r8+02H]
cmp dword ptr [rcx+08H], eax
jb SHORT G_M48267_IG06
;; size=31 bbWeight=1 PerfScore 9.00
G_M48267_IG05: ;; offset=0040H
movsxd rdx, r8d
lea rcx, bword ptr [rcx+2*rdx+10H]
mov word ptr [rcx], 13
mov word ptr [rcx+02H], 10
mov dword ptr [rsi+18H], eax
jmp SHORT G_M48267_IG07
;; size=24 bbWeight=0.50 PerfScore 3.12
G_M48267_IG06: ;; offset=0058H
mov rcx, rsi
mov r8d, 2
call [System.Text.StringBuilder:AppendWithExpansion(byref,int):this]
;; size=15 bbWeight=0.50 PerfScore 1.75
G_M48267_IG07: ;; offset=0067H
nop
;; size=1 bbWeight=1 PerfScore 0.25
G_M48267_IG08: ;; offset=0068H
add rsp, 32
pop rsi
ret
;; size=6 bbWeight=1 PerfScore 1.75
; Total bytes of code: 110Append constpublic static void AppendConst(StringBuilder builder)
{
builder.Append("1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN");
}Before ; Method Program:AppendConst(System.Text.StringBuilder)
G_M5650_IG01: ;; offset=0000H
sub rsp, 40
;; size=4 bbWeight=1 PerfScore 0.25
G_M5650_IG02: ;; offset=0004H
cmp byte ptr [rcx], cl
mov rdx, 0x14A003028D8 ; '1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN'
add rdx, 12
mov r8d, 50
call [System.Text.StringBuilder:Append(byref,int):this]
nop
;; size=29 bbWeight=1 PerfScore 7.00
G_M5650_IG03: ;; offset=0021H
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 38After ; Method Program:AppendConst(System.Text.StringBuilder)
G_M5650_IG01: ;; offset=0000H
sub rsp, 40
vzeroupper
;; size=7 bbWeight=1 PerfScore 1.25
G_M5650_IG02: ;; offset=0007H
mov rdx, 0x20B002028D8 ; '1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN'
add rdx, 12
mov r8, gword ptr [rcx+08H]
mov eax, dword ptr [rcx+18H]
lea r9d, [rax+32H]
cmp dword ptr [r8+08H], r9d
jb SHORT G_M5650_IG04
;; size=31 bbWeight=1 PerfScore 9.00
G_M5650_IG03: ;; offset=0026H
movsxd r9, eax
lea r8, bword ptr [r8+2*r9+10H]
vmovdqu ymm0, ymmword ptr [rdx]
vmovdqu ymm1, ymmword ptr [rdx+20H]
vmovdqu ymm2, ymmword ptr [rdx+40H]
vmovdqu xmm3, xmmword ptr [rdx+54H]
vmovdqu ymmword ptr [r8], ymm0
vmovdqu ymmword ptr [r8+20H], ymm1
vmovdqu ymmword ptr [r8+40H], ymm2
vmovdqu xmmword ptr [r8+54H], xmm3
add eax, 50
mov dword ptr [rcx+18H], eax
jmp SHORT G_M5650_IG05
;; size=58 bbWeight=0.50 PerfScore 15.75
G_M5650_IG04: ;; offset=0060H
mov r8d, 50
call [System.Text.StringBuilder:AppendWithExpansion(byref,int):this]
;; size=12 bbWeight=0.50 PerfScore 1.62
G_M5650_IG05: ;; offset=006CH
nop
;; size=1 bbWeight=1 PerfScore 0.25
G_M5650_IG06: ;; offset=006DH
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 114Append by index constpublic static void AppendConstIndex(StringBuilder builder)
{
builder.Append("1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN", 2, 16);
}Before ; Method Program:AppendConstIndex(System.Text.StringBuilder)
G_M5612_IG01: ;; offset=0000H
sub rsp, 40
vzeroupper
;; size=7 bbWeight=1 PerfScore 1.25
G_M5612_IG02: ;; offset=0007H
mov rdx, 0x257002028D8 ; '1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN'
add rdx, 12
add rdx, 4
mov r8, gword ptr [rcx+08H]
mov eax, dword ptr [rcx+18H]
lea r9d, [rax+10H]
cmp dword ptr [r8+08H], r9d
jb SHORT G_M5612_IG04
;; size=35 bbWeight=1 PerfScore 9.25
G_M5612_IG03: ;; offset=002AH
movsxd r9, eax
lea r8, bword ptr [r8+2*r9+10H]
vmovdqu ymm0, ymmword ptr [rdx]
vmovdqu ymmword ptr [r8], ymm0
add eax, 16
mov dword ptr [rcx+18H], eax
jmp SHORT G_M5612_IG05
;; size=25 bbWeight=0.50 PerfScore 5.75
G_M5612_IG04: ;; offset=0043H
mov r8d, 16
call [System.Text.StringBuilder:AppendWithExpansion(byref,int):this]
;; size=12 bbWeight=0.50 PerfScore 1.62
G_M5612_IG05: ;; offset=004FH
nop
;; size=1 bbWeight=1 PerfScore 0.25
G_M5612_IG06: ;; offset=0050H
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 85After ; Method Program:AppendConstIndex(System.Text.StringBuilder)
G_M5612_IG01: ;; offset=0000H
sub rsp, 40
vzeroupper
;; size=7 bbWeight=1 PerfScore 1.25
G_M5612_IG02: ;; offset=0007H
mov rdx, 0x18E802028D8 ; '1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN'
add rdx, 12
add rdx, 4
mov r8, gword ptr [rcx+08H]
mov eax, dword ptr [rcx+18H]
lea r9d, [rax+10H]
cmp dword ptr [r8+08H], r9d
jb SHORT G_M5612_IG04
;; size=35 bbWeight=1 PerfScore 9.25
G_M5612_IG03: ;; offset=002AH
movsxd r9, eax
lea r8, bword ptr [r8+2*r9+10H]
vmovdqu ymm0, ymmword ptr [rdx]
vmovdqu ymmword ptr [r8], ymm0
add eax, 16
mov dword ptr [rcx+18H], eax
jmp SHORT G_M5612_IG05
;; size=25 bbWeight=0.50 PerfScore 5.75
G_M5612_IG04: ;; offset=0043H
mov r8d, 16
call [System.Text.StringBuilder:AppendWithExpansion(byref,int):this]
;; size=12 bbWeight=0.50 PerfScore 1.62
G_M5612_IG05: ;; offset=004FH
nop
;; size=1 bbWeight=1 PerfScore 0.25
G_M5612_IG06: ;; offset=0050H
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 85 |
Feel free to integrate into your PR so we can merge it |
EgorBo
left a comment
There was a problem hiding this comment.
Thanks for noticing the opportunity!
|
Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch Issue DetailsBuffer.Memmove is now being unrolled for constant lengths. If we simplify Append(ref char, int) a bit, the JIT can inline it. Const string example public static void Example(StringBuilder stringBuilder)
{
stringBuilder.Append("1234567890abcdefghijklmnopqrstuvwxyzåäö");
}Before: ; Method Program:Example(System.Text.StringBuilder)
G_M35345_IG01: ;; offset=0000H
sub rsp, 40
;; size=4 bbWeight=1 PerfScore 0.25
G_M35345_IG02: ;; offset=0004H
cmp byte ptr [rcx], cl
mov rdx, 0x154802028D8 ; '1'
add rdx, 12
mov r8d, 39
call [System.Text.StringBuilder:Append(byref,int):this]
nop
;; size=29 bbWeight=1 PerfScore 7.00
G_M35345_IG03: ;; offset=0021H
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 38After: ; Method Program:Example(System.Text.StringBuilder)
G_M35345_IG01: ;; offset=0000H
sub rsp, 40
vzeroupper
;; size=7 bbWeight=1 PerfScore 1.25
G_M35345_IG02: ;; offset=0007H
mov rdx, 0x18C802028D8 ; '1'
add rdx, 12
mov r8, gword ptr [rcx+08H]
mov eax, dword ptr [rcx+18H]
lea r9d, [rax+27H]
cmp dword ptr [r8+08H], r9d
jb SHORT G_M35345_IG04
;; size=31 bbWeight=1 PerfScore 9.00
G_M35345_IG03: ;; offset=0026H
movsxd r9, eax
lea r8, bword ptr [r8+2*r9+10H]
vmovdqu ymm0, ymmword ptr [rdx]
vmovdqu ymm1, ymmword ptr [rdx+20H]
vmovdqu xmm2, xmmword ptr [rdx+3EH]
vmovdqu ymmword ptr [r8], ymm0
vmovdqu ymmword ptr [r8+20H], ymm1
vmovdqu xmmword ptr [r8+3EH], xmm2
add eax, 39
mov dword ptr [rcx+18H], eax
jmp SHORT G_M35345_IG05
;; size=47 bbWeight=0.50 PerfScore 12.25
G_M35345_IG04: ;; offset=0055H
mov r8d, 39
call [System.Text.StringBuilder:AppendWithExpansion(byref,int):this]
;; size=12 bbWeight=0.50 PerfScore 1.62
G_M35345_IG05: ;; offset=0061H
nop
;; size=1 bbWeight=1 PerfScore 0.25
G_M35345_IG06: ;; offset=0062H
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 103AppendLine example public static void Example(StringBuilder stringBuilder)
{
stringBuilder.AppendLine();
}Before: ; Method Program:Example(System.Text.StringBuilder)
G_M35345_IG01: ;; offset=0000H
sub rsp, 40
;; size=4 bbWeight=1 PerfScore 0.25
G_M35345_IG02: ;; offset=0004H
cmp byte ptr [rcx], cl
mov rdx, 0x195802028D8 ; ' '
add rdx, 12
mov r8d, 2
call [System.Text.StringBuilder:Append(byref,int):this]
nop
;; size=29 bbWeight=1 PerfScore 7.00
G_M35345_IG03: ;; offset=0021H
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 38After: ; Method Program:Example(System.Text.StringBuilder)
G_M35345_IG01: ;; offset=0000H
sub rsp, 40
;; size=4 bbWeight=1 PerfScore 0.25
G_M35345_IG02: ;; offset=0004H
mov rdx, 0x17C002028D8 ; ' '
add rdx, 12
mov r8, gword ptr [rcx+08H]
mov eax, dword ptr [rcx+18H]
lea r9d, [rax+02H]
cmp dword ptr [r8+08H], r9d
jb SHORT G_M35345_IG04
;; size=31 bbWeight=1 PerfScore 9.00
G_M35345_IG03: ;; offset=0023H
movsxd r9, eax
lea r8, bword ptr [r8+2*r9+10H]
mov r9d, dword ptr [rdx]
mov dword ptr [r8], r9d
add eax, 2
mov dword ptr [rcx+18H], eax
jmp SHORT G_M35345_IG05
;; size=22 bbWeight=0.50 PerfScore 3.75
G_M35345_IG04: ;; offset=0039H
mov r8d, 2
call [System.Text.StringBuilder:AppendWithExpansion(byref,int):this]
;; size=12 bbWeight=0.50 PerfScore 1.62
G_M35345_IG05: ;; offset=0045H
nop
;; size=1 bbWeight=1 PerfScore 0.25
G_M35345_IG06: ;; offset=0046H
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 75@EgorBo can you please take a look at this
|
|
@AndyAyersMS PTAL inliner change |
|
This apparently is regressing Fortunes benchmarks with middleware in aspnet. |
|
Thanks! The one that keeps giving. Makes sense since I couldn't repro the regression on the newer machines (aspnet-perf-lin). |
Buffer.Memmove is now being unrolled for constant lengths. If we simplify Append(ref char, int) a bit, the JIT can inline it.
As a result, this allows the methods StringBuilder.Append(string) and StringBuilder.AppendLine() to be able to be unrolled.
I didn't seem to need to add any AggressiveInlining attribute, so I left it out.
Const string example
Before:
After:
AppendLine example
Before:
After:
@EgorBo can you please take a look at this