diff --git a/examples/stack-unwind-overhead/README.md b/examples/stack-unwind-overhead/README.md new file mode 100644 index 0000000..69a6f02 --- /dev/null +++ b/examples/stack-unwind-overhead/README.md @@ -0,0 +1,46 @@ +# stack-unwind-overhead + +This directory contains benchmarks to explore which factors impact stack unwinding in Go. It's informed by an analysis of the `gopclntab` unwinding implementation. + +## Result 1: O(N) Stack Depth + +The benchmark below shows that stack unwinding has O(N) complexity with regard to the number of call frames on the stack: + +``` +BenchmarkStackDepth/8-12 2208600 547.2 ns/op +BenchmarkStackDepth/16-12 1447922 810.8 ns/op +BenchmarkStackDepth/32-12 915291 1338 ns/op +BenchmarkStackDepth/64-12 488719 2366 ns/op +BenchmarkStackDepth/128-12 264735 4462 ns/op +BenchmarkStackDepth/256-12 137575 8643 ns/op +BenchmarkStackDepth/512-12 68355 17316 ns/op +BenchmarkStackDepth/1024-12 34710 34810 ns/op +``` + +## Result 2: O(N) Function Size + +Perhaps suprisingly, stack unwinding is also O(N) with regard to the size of the generated machine code for the function: + +``` +BenchmarkFunctionSize/1-12 2562176 462.8 ns/op +BenchmarkFunctionSize/2-12 2509465 484.7 ns/op +BenchmarkFunctionSize/4-12 2356609 504.6 ns/op +BenchmarkFunctionSize/8-12 2095870 568.3 ns/op +BenchmarkFunctionSize/16-12 1778889 669.7 ns/op +BenchmarkFunctionSize/32-12 1396009 856.0 ns/op +BenchmarkFunctionSize/64-12 943807 1269 ns/op +BenchmarkFunctionSize/128-12 516487 2271 ns/op +BenchmarkFunctionSize/256-12 277821 4490 ns/op +``` + +## Disclaimer + +YMMV, and especially the function size also depends on the program counter at which the function is being unwound. All tests were done on my local machine: + +``` +go test -bench . +goos: darwin +goarch: amd64 +pkg: github.com/felixge/go-profiler-notes/examples/stack-unwind-overhead +cpu: Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz +``` diff --git a/examples/stack-unwind-overhead/go.mod b/examples/stack-unwind-overhead/go.mod new file mode 100644 index 0000000..1838b50 --- /dev/null +++ b/examples/stack-unwind-overhead/go.mod @@ -0,0 +1,3 @@ +module github.com/felixge/go-profiler-notes/examples/stack-unwind-overhead + +go 1.16 diff --git a/examples/stack-unwind-overhead/main.go b/examples/stack-unwind-overhead/main.go new file mode 100644 index 0000000..d8dc63c --- /dev/null +++ b/examples/stack-unwind-overhead/main.go @@ -0,0 +1,18 @@ +// Package main is a code generator that creates functions of different size. +package main + +import ( + "fmt" + "os" +) + +func main() { + if err := run(); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } +} + +func run() error { + return nil +} diff --git a/examples/stack-unwind-overhead/main_test.go b/examples/stack-unwind-overhead/main_test.go new file mode 100644 index 0000000..827888e --- /dev/null +++ b/examples/stack-unwind-overhead/main_test.go @@ -0,0 +1,628 @@ +package main + +import ( + "fmt" + "runtime" + "testing" +) + +// BenchmarkStackDepth measures the impact of stack depth on the time it takes +// to create a stack trace. +func BenchmarkStackDepth(b *testing.B) { + for d := 8; d <= 1024; d = d * 2 { + b.Run(fmt.Sprintf("%d", d), func(b *testing.B) { + callers := make([]uintptr, d) + atStackDepth(d, func() { + for i := 0; i < b.N; i++ { + if n := runtime.Callers(0, callers); n != d { + b.Fatalf("got=%d want=%d", n, d) + } + } + }) + }) + } +} + +// atStackDepth recursively calls itself until the number of stack frames +// reaches the desired depth, and then calls fn. +func atStackDepth(depth int, fn func()) { + pcs := make([]uintptr, depth+10) + n := runtime.Callers(1, pcs) + if n > depth { + panic("depth exceeded") + } else if n < depth { + atStackDepth(depth, fn) + return + } + + fn() +} + +func BenchmarkFunctionSize(b *testing.B) { + m := map[int]func(func()){ + 1: dostuff1, + 2: dostuff2, + 4: dostuff4, + 8: dostuff8, + 16: dostuff16, + 32: dostuff32, + 64: dostuff64, + 128: dostuff128, + 256: dostuff256, + } + + var callers = make([]uintptr, 32) + for s := 1; s <= 256; s = s * 2 { + b.Run(fmt.Sprintf("%d", s), func(b *testing.B) { + m[s](func() { + for i := 0; i < b.N; i++ { + runtime.Callers(0, callers) + } + }) + }) + } + + //b.Run("long", func(b *testing.B) { + //long(func() { + //for i := 0; i < b.N; i++ { + //runtime.Callers(0, callers) + //} + //}) + //}) +} + +// dostuff is supposed to generate a bunch of machine code that will hopefully +// be inlined into its callers. +func dostuff() int { + m := map[int]string{} + m[0] = "foo" + m[100] = "bar" + return len(m) +} + +func dostuff1(fn func()) { + dostuff() + fn() +} + +func dostuff2(fn func()) { + dostuff() + dostuff() + fn() +} + +func dostuff4(fn func()) { + dostuff() + dostuff() + dostuff() + dostuff() + fn() +} + +func dostuff8(fn func()) { + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + fn() +} + +func dostuff16(fn func()) { + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + fn() +} + +func dostuff32(fn func()) { + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + fn() +} + +func dostuff64(fn func()) { + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + fn() +} + +func dostuff128(fn func()) { + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + fn() +} + +func dostuff256(fn func()) { + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + dostuff() + fn() +}