1
0
mirror of https://github.com/DataDog/go-profiler-notes.git synced 2025-07-12 23:50:13 +02:00

stack-unwind-overhead

This commit is contained in:
Felix Geisendörfer
2021-04-04 14:39:53 +02:00
parent f8f6e88e3a
commit b393c68281
4 changed files with 695 additions and 0 deletions

View File

@ -0,0 +1,46 @@
# stack-unwind-overhead
This directory contains benchmarks to explore which factors impact stack unwinding in Go. It's informed by an analysis of the `gopclntab` unwinding implementation.
## Result 1: O(N) Stack Depth
The benchmark below shows that stack unwinding has O(N) complexity with regard to the number of call frames on the stack:
```
BenchmarkStackDepth/8-12 2208600 547.2 ns/op
BenchmarkStackDepth/16-12 1447922 810.8 ns/op
BenchmarkStackDepth/32-12 915291 1338 ns/op
BenchmarkStackDepth/64-12 488719 2366 ns/op
BenchmarkStackDepth/128-12 264735 4462 ns/op
BenchmarkStackDepth/256-12 137575 8643 ns/op
BenchmarkStackDepth/512-12 68355 17316 ns/op
BenchmarkStackDepth/1024-12 34710 34810 ns/op
```
## Result 2: O(N) Function Size
Perhaps suprisingly, stack unwinding is also O(N) with regard to the size of the generated machine code for the function:
```
BenchmarkFunctionSize/1-12 2562176 462.8 ns/op
BenchmarkFunctionSize/2-12 2509465 484.7 ns/op
BenchmarkFunctionSize/4-12 2356609 504.6 ns/op
BenchmarkFunctionSize/8-12 2095870 568.3 ns/op
BenchmarkFunctionSize/16-12 1778889 669.7 ns/op
BenchmarkFunctionSize/32-12 1396009 856.0 ns/op
BenchmarkFunctionSize/64-12 943807 1269 ns/op
BenchmarkFunctionSize/128-12 516487 2271 ns/op
BenchmarkFunctionSize/256-12 277821 4490 ns/op
```
## Disclaimer
YMMV, and especially the function size also depends on the program counter at which the function is being unwound. All tests were done on my local machine:
```
go test -bench .
goos: darwin
goarch: amd64
pkg: github.com/felixge/go-profiler-notes/examples/stack-unwind-overhead
cpu: Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
```

View File

@ -0,0 +1,3 @@
module github.com/felixge/go-profiler-notes/examples/stack-unwind-overhead
go 1.16

View File

@ -0,0 +1,18 @@
// Package main is a code generator that creates functions of different size.
package main
import (
"fmt"
"os"
)
func main() {
if err := run(); err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
}
func run() error {
return nil
}

View File

@ -0,0 +1,628 @@
package main
import (
"fmt"
"runtime"
"testing"
)
// BenchmarkStackDepth measures the impact of stack depth on the time it takes
// to create a stack trace.
func BenchmarkStackDepth(b *testing.B) {
for d := 8; d <= 1024; d = d * 2 {
b.Run(fmt.Sprintf("%d", d), func(b *testing.B) {
callers := make([]uintptr, d)
atStackDepth(d, func() {
for i := 0; i < b.N; i++ {
if n := runtime.Callers(0, callers); n != d {
b.Fatalf("got=%d want=%d", n, d)
}
}
})
})
}
}
// atStackDepth recursively calls itself until the number of stack frames
// reaches the desired depth, and then calls fn.
func atStackDepth(depth int, fn func()) {
pcs := make([]uintptr, depth+10)
n := runtime.Callers(1, pcs)
if n > depth {
panic("depth exceeded")
} else if n < depth {
atStackDepth(depth, fn)
return
}
fn()
}
func BenchmarkFunctionSize(b *testing.B) {
m := map[int]func(func()){
1: dostuff1,
2: dostuff2,
4: dostuff4,
8: dostuff8,
16: dostuff16,
32: dostuff32,
64: dostuff64,
128: dostuff128,
256: dostuff256,
}
var callers = make([]uintptr, 32)
for s := 1; s <= 256; s = s * 2 {
b.Run(fmt.Sprintf("%d", s), func(b *testing.B) {
m[s](func() {
for i := 0; i < b.N; i++ {
runtime.Callers(0, callers)
}
})
})
}
//b.Run("long", func(b *testing.B) {
//long(func() {
//for i := 0; i < b.N; i++ {
//runtime.Callers(0, callers)
//}
//})
//})
}
// dostuff is supposed to generate a bunch of machine code that will hopefully
// be inlined into its callers.
func dostuff() int {
m := map[int]string{}
m[0] = "foo"
m[100] = "bar"
return len(m)
}
func dostuff1(fn func()) {
dostuff()
fn()
}
func dostuff2(fn func()) {
dostuff()
dostuff()
fn()
}
func dostuff4(fn func()) {
dostuff()
dostuff()
dostuff()
dostuff()
fn()
}
func dostuff8(fn func()) {
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
fn()
}
func dostuff16(fn func()) {
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
fn()
}
func dostuff32(fn func()) {
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
fn()
}
func dostuff64(fn func()) {
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
fn()
}
func dostuff128(fn func()) {
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
fn()
}
func dostuff256(fn func()) {
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
dostuff()
fn()
}