diff --git a/examples/ch3-xx/add/add.go b/examples/ch3-xx/add/add.go new file mode 100644 index 0000000..307c12e --- /dev/null +++ b/examples/ch3-xx/add/add.go @@ -0,0 +1,17 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +// Go版本, 支持内联优化 + +package add + +func Add(a, b int) int { + return a + b +} + +func AddSlice(dst, a, b []int) { + for i := 0; i < len(dst) && i < len(a) && i < len(b); i++ { + dst[i] = a[i] + b[i] + } + return +} diff --git a/examples/ch3-xx/add/add_asm.go b/examples/ch3-xx/add/add_asm.go new file mode 100644 index 0000000..054d6d6 --- /dev/null +++ b/examples/ch3-xx/add/add_asm.go @@ -0,0 +1,16 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +// +build amd64 + +// 汇编版本, 不支持内联优化 + +package add + +func AsmAdd(a, b int) int + +func AsmAddSlice(dst, a, b []int) { + AddSlice(dst, a, b) +} + +func AsmAddSlice__todo(dst, a, b []int) diff --git a/examples/ch3-xx/add/add_asm_amd64.s b/examples/ch3-xx/add/add_asm_amd64.s new file mode 100644 index 0000000..d7492d8 --- /dev/null +++ b/examples/ch3-xx/add/add_asm_amd64.s @@ -0,0 +1,23 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +#include "textflag.h" + +// func AsmAdd(a, b int) int +TEXT ·AsmAdd(SB), NOSPLIT, $0-24 + MOVQ a+0(FP), AX // a + MOVQ b+8(FP), BX // b + ADDQ AX, BX // a+b + MOVQ BX, ret+16(FP) // return a+b + RET + +// func AsmAddSlice(dst, a, b []int) +TEXT ·AsmAddSlice__todo(SB), NOSPLIT, $0-72 + MOVQ dst+0(FP), AX // AX: dst + MOVQ a+24(FP), BX // BX: &a + MOVQ b+48(FP), CX // CX: &b + MOVQ dst_len+8(FP), DX // DX: len(dst) + MOVQ a_len+32(FP), R8 // R8: len(a) + MOVQ b_len+56(FP), R9 // R9: len(b) + // TODO: DX = min(DX,R8,R9) + RET diff --git a/examples/ch3-xx/add/add_asm_generic.go b/examples/ch3-xx/add/add_asm_generic.go new file mode 100644 index 0000000..b096716 --- /dev/null +++ b/examples/ch3-xx/add/add_asm_generic.go @@ -0,0 +1,16 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +// +build !amd64 + +// 对于没有汇编实现的环境, 临时采用Go版本代替 + +package add + +func AsmAdd(a, b int) int { + return Add(a, b) +} + +func AsmAddSlice(dst, a, b []int) { + AddSlice(dst, a, b) +} diff --git a/examples/ch3-xx/add/add_test.go b/examples/ch3-xx/add/add_test.go new file mode 100644 index 0000000..b5cc477 --- /dev/null +++ b/examples/ch3-xx/add/add_test.go @@ -0,0 +1,97 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +// go test -bench=. + +package add + +import ( + "testing" +) + +func TestAdd(t *testing.T) { + t.Run("go", func(t *testing.T) { + if x := Add(1, 2); x != 3 { + t.Fatalf("expect = %d, got = %d", 3, x) + } + }) + t.Run("asm", func(t *testing.T) { + if x := AsmAdd(1, 2); x != 3 { + t.Fatalf("expect = %d, got = %d", 3, x) + } + }) +} + +func TestAddSlice(t *testing.T) { + a := []int{1, 2, 3, 4, 5} + b := []int{10, 20, 30, 40, 50, 60} + + t.Run("go", func(t *testing.T) { + x := make([]int, len(a)) + AddSlice(x, a, b) + + for i := 0; i < len(x) && i < len(a) && i < len(b); i++ { + if x[i] != a[i]+b[i] { + t.Fatalf("expect = %d, got = %d", x[i], a[i]+b[i]) + } + } + }) + + t.Run("asm", func(t *testing.T) { + x := make([]int, len(a)) + AsmAddSlice(x, a, b) + + for i := 0; i < len(x) && i < len(a) && i < len(b); i++ { + if x[i] != a[i]+b[i] { + t.Fatalf("expect = %d, got = %d", x[i], a[i]+b[i]) + } + } + }) +} + +func BenchmarkAdd(b *testing.B) { + b.Run("go", func(b *testing.B) { + for i := 0; i < b.N; i++ { + Add(1, 2) + } + }) + b.Run("asm", func(b *testing.B) { + for i := 0; i < b.N; i++ { + AsmAdd(1, 2) + } + }) +} + +func BenchmarkAddSlice(b *testing.B) { + s0 := make([]int, 10<<10) + s1 := make([]int, 10<<10) + dst := make([]int, 10<<10) + + b.Run("len=10", func(b *testing.B) { + dst := dst[:10] + for i := 0; i < b.N; i++ { + AddSlice(dst, s0, s1) + } + }) + b.Run("len=50", func(b *testing.B) { + dst := dst[:50] + for i := 0; i < b.N; i++ { + AddSlice(dst, s0, s1) + _ = dst + } + }) + b.Run("len=100", func(b *testing.B) { + dst := dst[:100] + for i := 0; i < b.N; i++ { + AddSlice(dst, s0, s1) + _ = dst + } + }) + b.Run("len=1000", func(b *testing.B) { + dst := dst[:1000] + for i := 0; i < b.N; i++ { + AddSlice(dst, s0, s1) + _ = dst + } + }) +} diff --git a/examples/ch3-xx/add/runme.go b/examples/ch3-xx/add/runme.go new file mode 100644 index 0000000..8a8f2c0 --- /dev/null +++ b/examples/ch3-xx/add/runme.go @@ -0,0 +1,14 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +// +build ignore + +package main + +import ( + . "." +) + +func main() { + println("Add(1+2) =", Add(1, 2)) +} diff --git a/examples/ch3-xx/binary_search/binary_search.go b/examples/ch3-xx/binary_search/binary_search.go new file mode 100644 index 0000000..1d72aa4 --- /dev/null +++ b/examples/ch3-xx/binary_search/binary_search.go @@ -0,0 +1,6 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +package bsearch + +func BinarySearch(arr []int, num int) bool diff --git a/examples/ch3-xx/binary_search/binary_search_amd64.s b/examples/ch3-xx/binary_search/binary_search_amd64.s new file mode 100644 index 0000000..de3827e --- /dev/null +++ b/examples/ch3-xx/binary_search/binary_search_amd64.s @@ -0,0 +1,45 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +TEXT ·BinarySearch+0(SB),$0 + +start: + MOVQ arr+0(FP), CX + MOVQ len+8(FP), AX + JMP find_index + +find_index: + XORQ DX, DX + MOVQ $2, BX + IDIVQ BX + JMP comp + +comp: + LEAQ (AX * 8), BX + ADDQ BX, CX + MOVQ num+24(FP), DX + CMPQ DX, (CX) + JE found + JG right + JL left + JMP not_found + +left: + CMPQ len+8(FP), $1 + JE not_found + MOVQ AX, len+8(FP) + JMP start + +right: + CMPQ len+8(FP), $1 + JE not_found + MOVQ CX, arr+0(FP) + JMP start + +not_found: + MOVQ $0, ret+32(FP) + RET + +found: + MOVQ $1, ret+32(FP) + RET diff --git a/examples/ch3-xx/binary_search/binary_search_test.go b/examples/ch3-xx/binary_search/binary_search_test.go new file mode 100644 index 0000000..504225c --- /dev/null +++ b/examples/ch3-xx/binary_search/binary_search_test.go @@ -0,0 +1,26 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +package bsearch + +import "testing" + +func TestBinarySearch(t *testing.T) { + data := []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10} + + if result := BinarySearch(data, 8); result != true { + t.Errorf("Expected true value for binary search.") + } + + if result := BinarySearch(data, 1); result != true { + t.Errorf("Expected true value for binary search.") + } + + if result := BinarySearch(data, 10); result != true { + t.Errorf("Expected true value for binary search.") + } + + if result := BinarySearch(data, 12); result != false { + t.Errorf("Expected false value for binary search.") + } +} diff --git a/examples/ch3-xx/globalvar/asm_amd64.s b/examples/ch3-xx/globalvar/asm_amd64.s new file mode 100644 index 0000000..b32a02e --- /dev/null +++ b/examples/ch3-xx/globalvar/asm_amd64.s @@ -0,0 +1,26 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +#include "textflag.h" + +// func GetPkgValue() int +TEXT ·GetPkgValue(SB), NOSPLIT, $0-8 + MOVQ ·gopkgValue(SB), AX + MOVQ AX, ret+0(FP) + RET + +// func GetPkgInfo() PkgInfo +TEXT ·GetPkgInfo(SB), NOSPLIT, $0-24 + MOVBLZX ·gInfo+0(SB), AX // .V0 byte + MOVQ AX, ret+0(FP) + MOVWLZX ·gInfo+2(SB), AX // .V1 uint16 + MOVQ AX, ret+2(FP) + MOVLQZX ·gInfo+4(SB), AX // .V2 int32 + MOVQ AX, ret+4(FP) + MOVQ ·gInfo+8(SB), AX // .V3 int32 + MOVQ AX, ret+8(FP) + MOVBLZX ·gInfo+(16+0)(SB), AX // .V4 bool + MOVQ AX, ret+(16+0)(FP) + MOVBLZX ·gInfo+(16+1)(SB), AX // .V5 bool + MOVQ AX, ret+(16+1)(FP) + RET diff --git a/examples/ch3-xx/globalvar/globalvar.go b/examples/ch3-xx/globalvar/globalvar.go new file mode 100644 index 0000000..65696c6 --- /dev/null +++ b/examples/ch3-xx/globalvar/globalvar.go @@ -0,0 +1,32 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +// 汇编中访问Go中定义的全局变量 + +package globalvar + +var gopkgValue int = 42 + +type PkgInfo struct { + V0 byte + V1 uint16 + V2 int32 + V3 int64 + V4 bool + V5 bool +} + +var gInfo PkgInfo + +func init() { + gInfo.V0 = 101 + gInfo.V1 = 102 + gInfo.V2 = 103 + gInfo.V3 = 104 + gInfo.V4 = true + gInfo.V5 = false +} + +func GetPkgValue() int + +func GetPkgInfo() PkgInfo diff --git a/examples/ch3-xx/globalvar/runme.go b/examples/ch3-xx/globalvar/runme.go new file mode 100644 index 0000000..db42bb5 --- /dev/null +++ b/examples/ch3-xx/globalvar/runme.go @@ -0,0 +1,17 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +// +build ignore + +package main + +import ( + "fmt" + + . "." +) + +func main() { + fmt.Println(GetPkgValue()) + fmt.Println(GetPkgInfo()) +} diff --git a/examples/ch3-xx/hello/hello.go b/examples/ch3-xx/hello/hello.go new file mode 100644 index 0000000..ff96b94 --- /dev/null +++ b/examples/ch3-xx/hello/hello.go @@ -0,0 +1,10 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +package hello + +var text = "你好, 世界, 包变量\n" + +func PrintHelloWorld() +func PrintHelloWorld_zh() +func PrintHelloWorld_var() diff --git a/examples/ch3-xx/hello/hello_amd64.s b/examples/ch3-xx/hello/hello_amd64.s new file mode 100644 index 0000000..8276bdb --- /dev/null +++ b/examples/ch3-xx/hello/hello_amd64.s @@ -0,0 +1,51 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +#include "textflag.h" +#include "funcdata.h" + +// "Hello World!\n" +DATA text<>+0(SB)/8,$"Hello Wo" +DATA text<>+8(SB)/8,$"rld!\n" +GLOBL text<>(SB),NOPTR,$16 + +// utf8: "你好, 世界!\n" +// hex: e4bda0e5a5bd2c20 e4b896e7958c210a +// len: 16 +DATA text_zh<>+0(SB)/8,$"\xe4\xbd\xa0\xe5\xa5\xbd\x2c\x20" +DATA text_zh<>+8(SB)/8,$"\xe4\xb8\x96\xe7\x95\x8c\x21\x0a" +GLOBL text_zh<>(SB),NOPTR,$16 + +// func PrintHelloWorld_var() +TEXT ·PrintHelloWorld_var(SB), $16-0 + NO_LOCAL_POINTERS + CALL runtime·printlock(SB) + MOVQ ·text+0(SB), AX + MOVQ AX, (SP) + MOVQ ·text+8(SB), AX + MOVQ AX, 8(SP) + CALL runtime·printstring(SB) + CALL runtime·printunlock(SB) + RET + +// func PrintHelloWorld() +TEXT ·PrintHelloWorld(SB), $16-0 + NO_LOCAL_POINTERS + CALL runtime·printlock(SB) + MOVQ $text<>+0(SB), AX + MOVQ AX, (SP) + MOVQ $16, 8(SP) + CALL runtime·printstring(SB) + CALL runtime·printunlock(SB) + RET + +// func PrintHelloWorld_zh() +TEXT ·PrintHelloWorld_zh(SB), $16-0 + NO_LOCAL_POINTERS + CALL runtime·printlock(SB) + MOVQ $text_zh<>+0(SB), AX + MOVQ AX, (SP) + MOVQ $16, 8(SP) + CALL runtime·printstring(SB) + CALL runtime·printunlock(SB) + RET diff --git a/examples/ch3-xx/hello/runme.go b/examples/ch3-xx/hello/runme.go new file mode 100644 index 0000000..4abd431 --- /dev/null +++ b/examples/ch3-xx/hello/runme.go @@ -0,0 +1,20 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +// +build ignore + +package main + +import ( + "fmt" + + . "." +) + +func main() { + s := "你好, 世界!\n" + fmt.Printf("%d: %x\n", len(s), s) + PrintHelloWorld() + PrintHelloWorld_zh() + PrintHelloWorld_var() +} diff --git a/examples/ch3-xx/ifelse/ifelse.go b/examples/ch3-xx/ifelse/ifelse.go new file mode 100644 index 0000000..0a9a0e7 --- /dev/null +++ b/examples/ch3-xx/ifelse/ifelse.go @@ -0,0 +1,15 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +// Go版本, 支持内联优化 + +package ifelse + +func If(ok bool, a, b int) int { + if ok { + return a + } + return b +} + +func AsmIf(ok bool, a, b int) int diff --git a/examples/ch3-xx/ifelse/ifelse_ams_amd64.s b/examples/ch3-xx/ifelse/ifelse_ams_amd64.s new file mode 100644 index 0000000..c10b73f --- /dev/null +++ b/examples/ch3-xx/ifelse/ifelse_ams_amd64.s @@ -0,0 +1,31 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +#include "textflag.h" + +// +// https://github.com/golang/go/issues/14288 +// +// from rsc: +// But expanding what I said yesterday just a bit: +// never use MOVB or MOVW with a register destination, +// since it's inefficient (it's a read-modify-write on the target register). +// Instead use MOVL for reg->reg and use MOVBLZX or MOVWLZX for mem->reg; +// those are pure writes on the target register. +// +// 因此, 加载bool型参数到寄存器时, 建议使用 MOVBLZX. +// 如果使用 MOVB 的话, go test 虽然通过了, +// 但是 go run runme.go 则出现错误结果. +// + +// func AsmIf(ok bool, a, b int) int +TEXT ·AsmIf(SB), NOSPLIT, $0-32 + MOVBQZX ok+0(FP), AX // ok + MOVQ a+8(FP), BX // a + MOVQ b+16(FP), CX // b + CMPQ AX, $0 // test ok + JEQ 3(PC) // if !ok, skip 2 line + MOVQ BX, ret+24(FP) // return a + RET + MOVQ CX, ret+24(FP) // return b + RET diff --git a/examples/ch3-xx/ifelse/ifelse_test.go b/examples/ch3-xx/ifelse/ifelse_test.go new file mode 100644 index 0000000..8309cba --- /dev/null +++ b/examples/ch3-xx/ifelse/ifelse_test.go @@ -0,0 +1,32 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +// go test -bench=. + +package ifelse + +import ( + "testing" +) + +func TestMin(t *testing.T) { + t.Run("go", func(t *testing.T) { + if x := If(true, 1, 2); x != 1 { + t.Fatalf("expect = %d, got = %d", 1, x) + } + if x := If(false, 1, 2); x != 2 { + t.Fatalf("expect = %d, got = %d", 2, x) + } + }) + t.Run("asm", func(t *testing.T) { + if x := AsmIf(true, 1, 2); x != 1 { + t.Fatalf("expect = %d, got = %d", 1, x) + } + if x := AsmIf(false, 1, 2); x != 2 { + t.Fatalf("expect = %d, got = %d", 2, x) + } + if x := AsmIf(false, 2, 1); x != 1 { + t.Fatalf("expect = %d, got = %d", 1, x) + } + }) +} diff --git a/examples/ch3-xx/ifelse/runme.go b/examples/ch3-xx/ifelse/runme.go new file mode 100644 index 0000000..1920942 --- /dev/null +++ b/examples/ch3-xx/ifelse/runme.go @@ -0,0 +1,18 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +// +build ignore + +package main + +import ( + . "." +) + +func main() { + println("If(true, 1, 2) =", If(true, 1, 2)) + println("If(false, 1, 2) =", If(false, 1, 2)) + println("AsmIf(true, 1, 2) =", AsmIf(true, 1, 2)) + println("AsmIf(false, 1, 2) =", AsmIf(false, 1, 2)) + println("AsmIf(false, 2, 1) =", AsmIf(false, 2, 1)) +} diff --git a/examples/ch3-xx/instr/bench_test.go b/examples/ch3-xx/instr/bench_test.go new file mode 100755 index 0000000..2ae33f5 --- /dev/null +++ b/examples/ch3-xx/instr/bench_test.go @@ -0,0 +1,22 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +package instr + +import "testing" + +var g int64 + +func BenchmarkSum(b *testing.B) { + ns := []int64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10} + for i := 0; i < b.N; i++ { + g = Sum(ns) + } +} + +func BenchmarkSum2(b *testing.B) { + ns := []int64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10} + for i := 0; i < b.N; i++ { + g = Sum2(ns) + } +} diff --git a/examples/ch3-xx/instr/instr.go b/examples/ch3-xx/instr/instr.go new file mode 100755 index 0000000..2bbe396 --- /dev/null +++ b/examples/ch3-xx/instr/instr.go @@ -0,0 +1,26 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +package instr + +func Add(n, m int64) int64 { + return n + m +} + +func Add2(n, m int64) int64 + +// BSF returns the index of the least significant set bit, +// or -1 if the input contains no set bits. +func BSF(n int64) int + +func BSF32(n int32) int32 + +func Sum(s []int64) int64 { + var ss int64 + for _, n := range s { + ss += n + } + return ss +} + +func Sum2(s []int64) int64 diff --git a/examples/ch3-xx/instr/instr_amd64.s b/examples/ch3-xx/instr/instr_amd64.s new file mode 100755 index 0000000..bfac1f3 --- /dev/null +++ b/examples/ch3-xx/instr/instr_amd64.s @@ -0,0 +1,57 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +#include "textflag.h" + +// func Add2(n, m int64) int32 +TEXT ·Add2(SB), NOSPLIT, $0-24 + MOVQ n+0(FP), AX + MOVQ m+8(FP), BX + ADDQ AX, BX + MOVQ BX, ret+16(FP) + RET + +// func BSF(n int64) int +TEXT ·BSF(SB), NOSPLIT, $0 + BSFQ n+0(FP), AX + JEQ allZero + MOVQ AX, ret+8(FP) + RET + +allZero: + MOVQ $-1, ret+8(FP) + RET + +// func BSF32(n int32) int32 +TEXT ·BSF32(SB), NOSPLIT, $0 + BSFL n+0(FP), AX + JEQ allZero32 + MOVL AX, ret+8(FP) + RET + +allZero32: + MOVL $-1, ret+8(FP) + RET + +// func Sum2(s []int64) int64 +TEXT ·Sum2(SB), NOSPLIT, $0 + MOVQ $0, DX + MOVQ s_base+0(FP), AX + MOVQ s_len+8(FP), DI + MOVQ $0, CX + CMPQ CX, DI + JGE Sum2End + +Sum2Loop: + MOVQ (AX), BP + ADDQ BP, DX + ADDQ $8, AX + INCQ CX + CMPQ CX, DI + JL Sum2Loop + +Sum2End: + MOVQ DX, ret+24(FP) + RET + +// vim: set ft=txt: diff --git a/examples/ch3-xx/loop/loop.go b/examples/ch3-xx/loop/loop.go new file mode 100644 index 0000000..b5174f5 --- /dev/null +++ b/examples/ch3-xx/loop/loop.go @@ -0,0 +1,16 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +// Go版本, 支持内联优化 + +package loop + +func LoopAdd(cnt, v0, step int) int { + result := v0 + for i := 0; i < cnt; i++ { + result += step + } + return result +} + +func AsmLoopAdd(cnt, v0, step int) int diff --git a/examples/ch3-xx/loop/loop_asm_amd64.s b/examples/ch3-xx/loop/loop_asm_amd64.s new file mode 100644 index 0000000..38e7f03 --- /dev/null +++ b/examples/ch3-xx/loop/loop_asm_amd64.s @@ -0,0 +1,21 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +#include "textflag.h" + +// func AsmLoopAdd(cnt, v0, step int) int +TEXT ·AsmLoopAdd(SB), NOSPLIT, $0-32 + MOVQ cnt+0(FP), AX // cnt + MOVQ v0+8(FP), BX // v0 + MOVQ step+16(FP), CX // step + +loop: + CMPQ AX, $0 // compare cnt,0 + JLE end // if cnt <= 0: go end + DECQ AX // cnt-- + ADDQ CX, BX // v0 += step + JMP loop // goto loop + +end: + MOVQ BX, ret+24(FP) // return v0 + RET diff --git a/examples/ch3-xx/loop/loop_test.go b/examples/ch3-xx/loop/loop_test.go new file mode 100644 index 0000000..076042a --- /dev/null +++ b/examples/ch3-xx/loop/loop_test.go @@ -0,0 +1,54 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +// go test -bench=. + +package loop + +import ( + "testing" +) + +func TestLoopAdd(t *testing.T) { + t.Run("go", func(t *testing.T) { + if x := LoopAdd(100, 0, 1); x != 100 { + t.Fatalf("expect = %d, got = %d", 100, x) + } + if x := LoopAdd(100, 0, 2); x != 200 { + t.Fatalf("expect = %d, got = %d", 200, x) + } + if x := LoopAdd(100, 0, -1); x != -100 { + t.Fatalf("expect = %d, got = %d", -100, x) + } + if x := LoopAdd(100, 50, 1); x != 150 { + t.Fatalf("expect = %d, got = %d", 150, x) + } + }) + t.Run("asm", func(t *testing.T) { + if x := AsmLoopAdd(100, 0, 1); x != 100 { + t.Fatalf("expect = %d, got = %d", 100, x) + } + if x := AsmLoopAdd(100, 0, 2); x != 200 { + t.Fatalf("expect = %d, got = %d", 200, x) + } + if x := AsmLoopAdd(100, 0, -1); x != -100 { + t.Fatalf("expect = %d, got = %d", -100, x) + } + if x := AsmLoopAdd(100, 50, 1); x != 150 { + t.Fatalf("expect = %d, got = %d", 150, x) + } + }) +} + +func BenchmarkLoopAdd(b *testing.B) { + b.Run("go", func(b *testing.B) { + for i := 0; i < b.N; i++ { + LoopAdd(1000, 0, 1) + } + }) + b.Run("asm", func(b *testing.B) { + for i := 0; i < b.N; i++ { + AsmLoopAdd(1000, 0, 1) + } + }) +} diff --git a/examples/ch3-xx/loop/runme.go b/examples/ch3-xx/loop/runme.go new file mode 100644 index 0000000..3505c60 --- /dev/null +++ b/examples/ch3-xx/loop/runme.go @@ -0,0 +1,17 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +// +build ignore + +package main + +import ( + . "." +) + +func main() { + println("LoopAdd(100,0,1) =", LoopAdd(100, 0, 1)) + println("LoopAdd(100,0,2) =", LoopAdd(100, 0, 2)) + println("LoopAdd(100,200,-1) =", LoopAdd(100, 200, -1)) + println("LoopAdd(100,0,-1) =", LoopAdd(100, 0, -1)) +} diff --git a/examples/ch3-xx/min/min.go b/examples/ch3-xx/min/min.go new file mode 100644 index 0000000..7faa628 --- /dev/null +++ b/examples/ch3-xx/min/min.go @@ -0,0 +1,31 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +// Go版本, 支持内联优化 + +package min + +func Min(a, b int) int { + if a < b { + return a + } + return b +} + +//go:noinline +func MinNoInline(a, b int) int { + if a < b { + return a + } + return b +} + +func Max(a, b int) int { + if a > b { + return a + } + return b +} + +func AsmMin(a, b int) int +func AsmMax(a, b int) int diff --git a/examples/ch3-xx/min/min_asm_amd64.s b/examples/ch3-xx/min/min_asm_amd64.s new file mode 100644 index 0000000..60043a9 --- /dev/null +++ b/examples/ch3-xx/min/min_asm_amd64.s @@ -0,0 +1,26 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +#include "textflag.h" + +// func AsmMin(a, b int) int +TEXT ·AsmMin(SB), NOSPLIT, $0-24 + MOVQ a+0(FP), AX // a + MOVQ b+8(FP), BX // b + CMPQ AX, BX // compare a, b + JGT 3(PC) // if a>b, skip 2 line + MOVQ AX, ret+16(FP) // return a + RET + MOVQ BX, ret+16(FP) // return b + RET + +// func AsmMax(a, b int) int +TEXT ·AsmMax(SB), NOSPLIT, $0-24 + MOVQ a+0(FP), AX // a + MOVQ b+8(FP), BX // b + CMPQ AX, BX // compare a, b + JLT 3(PC) // if a. +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +// go test -bench=. + +package min + +import ( + "testing" +) + +func TestMin(t *testing.T) { + t.Run("go", func(t *testing.T) { + if x := Min(1, 2); x != 1 { + t.Fatalf("expect = %d, got = %d", 1, x) + } + if x := Min(2, 1); x != 1 { + t.Fatalf("expect = %d, got = %d", 1, x) + } + }) + t.Run("asm", func(t *testing.T) { + if x := AsmMin(1, 2); x != 1 { + t.Fatalf("expect = %d, got = %d", 1, x) + } + if x := AsmMin(2, 1); x != 1 { + t.Fatalf("expect = %d, got = %d", 1, x) + } + }) +} +func TestMax(t *testing.T) { + t.Run("go", func(t *testing.T) { + if x := Max(1, 2); x != 2 { + t.Fatalf("expect = %d, got = %d", 2, x) + } + if x := Max(2, 1); x != 2 { + t.Fatalf("expect = %d, got = %d", 2, x) + } + }) + t.Run("asm", func(t *testing.T) { + if x := AsmMax(1, 2); x != 2 { + t.Fatalf("expect = %d, got = %d", 2, x) + } + if x := AsmMax(2, 1); x != 2 { + t.Fatalf("expect = %d, got = %d", 2, x) + } + }) +} + +func BenchmarkMin(b *testing.B) { + b.Run("go", func(b *testing.B) { + for i := 0; i < b.N; i++ { + Min(1, 2) + } + }) + b.Run("go.noinline", func(b *testing.B) { + for i := 0; i < b.N; i++ { + MinNoInline(1, 2) + } + }) + b.Run("asm", func(b *testing.B) { + for i := 0; i < b.N; i++ { + AsmMin(1, 2) + } + }) +} diff --git a/examples/ch3-xx/min/runme.go b/examples/ch3-xx/min/runme.go new file mode 100644 index 0000000..66fc42e --- /dev/null +++ b/examples/ch3-xx/min/runme.go @@ -0,0 +1,14 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +// +build ignore + +package main + +import ( + . "." +) + +func main() { + println("Min(1,2) =", Min(1, 2)) +} diff --git a/examples/ch3-xx/slice/runme.go b/examples/ch3-xx/slice/runme.go new file mode 100644 index 0000000..8336e92 --- /dev/null +++ b/examples/ch3-xx/slice/runme.go @@ -0,0 +1,15 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +// +build ignore + +package main + +import ( + . "." +) + +func main() { + println("SumIntSlice([]int{1,2,3}) =", SumIntSlice([]int{1, 2, 3})) + println("AsmSumIntSlice([]int{1,2,3}) =", AsmSumIntSlice([]int{1, 2, 3})) +} diff --git a/examples/ch3-xx/slice/slice.go b/examples/ch3-xx/slice/slice.go new file mode 100644 index 0000000..f3cf545 --- /dev/null +++ b/examples/ch3-xx/slice/slice.go @@ -0,0 +1,35 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +// Go版本, 支持内联优化 + +package slice + +func SumIntSlice(s []int) int { + var sum int + for _, v := range s { + sum += v + } + return sum +} + +func SumFloat32Slice(s []float32) float32 { + var sum float32 + for _, v := range s { + sum += v + } + return sum +} + +func SumFloat64Slice(s []float64) float64 { + var sum float64 + for _, v := range s { + sum += v + } + return sum +} + +func AsmSumInt16Slice(v []int16) int16 + +func AsmSumIntSlice(s []int) int +func AsmSumIntSliceV2(s []int) int diff --git a/examples/ch3-xx/slice/slice_asm_amd64.s b/examples/ch3-xx/slice/slice_asm_amd64.s new file mode 100644 index 0000000..e1eb290 --- /dev/null +++ b/examples/ch3-xx/slice/slice_asm_amd64.s @@ -0,0 +1,59 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +#include "textflag.h" + +// func AsmSumInt16Slice(v []int16) int16 +TEXT ·AsmSumInt16Slice(SB), NOSPLIT, $0-26 + MOVQ v_base+0(FP), R8 + MOVQ v_len+8(FP), R9 + SHLQ $1, R9 + ADDQ R8, R9 + MOVQ $0, R10 + +loop: + CMPQ R8, R9 + JE end + ADDW (R8), R10 + ADDQ $2, R8 + JMP loop + +end: + MOVW R10, ret+24(FP) + RET + +// func AsmSumIntSlice(s []int) int +TEXT ·AsmSumIntSlice(SB), NOSPLIT, $0-32 + MOVQ s+0(FP), AX // &s[0] + MOVQ s_len+8(FP), BX // len(s) + MOVQ $0, CX // sum = 0 + +loop: + CMPQ BX, $0 // compare cnt,0 + JLE end // if cnt <= 0: goto end + DECQ BX // cnt-- + ADDQ (AX), CX // sum += s[i] + ADDQ $8, AX // i++ + JMP loop // goto loop + +end: + MOVQ CX, ret+24(FP) // return sum + RET + +// func AsmSumIntSliceV2(s []int) int +TEXT ·AsmSumIntSliceV2(SB), NOSPLIT, $0-32 + MOVQ s+0(FP), AX // p := &s[0] + MOVQ s_len+8(FP), BX + LEAQ 0(AX)(BX*8), BX // p_end := &s[len(s)] + MOVQ $0, CX // sum = 0 + +loop: + CMPQ AX, BX // compare p,p_end + JGE end // if p >= p_end: goto end + ADDQ (AX), CX // sum += s[i] + ADDQ $8, AX // p++ + JMP loop // goto loop + +end: + MOVQ CX, ret+24(FP) // return sum + RET diff --git a/examples/ch3-xx/slice/slice_test.go b/examples/ch3-xx/slice/slice_test.go new file mode 100644 index 0000000..eaeca23 --- /dev/null +++ b/examples/ch3-xx/slice/slice_test.go @@ -0,0 +1,82 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +// go test -bench=. + +package slice + +import ( + "testing" +) + +func TestLoopAdd(t *testing.T) { + t.Run("go", func(t *testing.T) { + if x := SumIntSlice([]int{1, 2, 3}); x != 6 { + t.Fatalf("expect = %d, got = %d", 6, x) + } + }) + t.Run("asm", func(t *testing.T) { + if x := AsmSumIntSlice([]int{1, 2, 3}); x != 6 { + t.Fatalf("expect = %d, got = %d", 6, x) + } + }) + t.Run("asm.v2", func(t *testing.T) { + if x := AsmSumIntSliceV2([]int{1, 2, 3}); x != 6 { + t.Fatalf("expect = %d, got = %d", 6, x) + } + }) +} + +func BenchmarkLoopAdd(b *testing.B) { + s10 := make([]int, 10) + s100 := make([]int, 100) + s1000 := make([]int, 1000) + + b.Run("go/len=10", func(b *testing.B) { + for i := 0; i < b.N; i++ { + SumIntSlice(s10) + } + }) + b.Run("asm/len=10", func(b *testing.B) { + for i := 0; i < b.N; i++ { + AsmSumIntSlice(s10) + } + }) + b.Run("asm.v2/len=10", func(b *testing.B) { + for i := 0; i < b.N; i++ { + AsmSumIntSliceV2(s10) + } + }) + + b.Run("go/len=100", func(b *testing.B) { + for i := 0; i < b.N; i++ { + SumIntSlice(s100) + } + }) + b.Run("asm/len=100", func(b *testing.B) { + for i := 0; i < b.N; i++ { + AsmSumIntSlice(s100) + } + }) + b.Run("asm.v2/len=100", func(b *testing.B) { + for i := 0; i < b.N; i++ { + AsmSumIntSliceV2(s100) + } + }) + + b.Run("go/len=1000", func(b *testing.B) { + for i := 0; i < b.N; i++ { + SumIntSlice(s1000) + } + }) + b.Run("asm/len=1000", func(b *testing.B) { + for i := 0; i < b.N; i++ { + AsmSumIntSlice(s1000) + } + }) + b.Run("asm.v2/len=1000", func(b *testing.B) { + for i := 0; i < b.N; i++ { + AsmSumIntSliceV2(s1000) + } + }) +} diff --git a/examples/ch3-xx/stackmap/stackmap.go b/examples/ch3-xx/stackmap/stackmap.go new file mode 100755 index 0000000..82964c8 --- /dev/null +++ b/examples/ch3-xx/stackmap/stackmap.go @@ -0,0 +1,25 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +package stackmap + +func X(b []byte) []byte + +//func X(b []byte) []byte { +// if len(b) == cap(b) { +// b = growSlice(b) +// } +// b = b[:len(b)+1] +// b[len(b)-1] = 3 +// return b +//} + +func growSlice(b []byte) []byte { + newCap := 10 + if cap(b) > 5 { + newCap = cap(b) * 2 + } + b1 := make([]byte, len(b), newCap) + copy(b1, b) + return b1 +} diff --git a/examples/ch3-xx/stackmap/stackmap_amd64.s b/examples/ch3-xx/stackmap/stackmap_amd64.s new file mode 100755 index 0000000..55cb134 --- /dev/null +++ b/examples/ch3-xx/stackmap/stackmap_amd64.s @@ -0,0 +1,44 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +#include "funcdata.h" +#include "textflag.h" + +// func X(b []byte) []byte +TEXT ·X(SB), $48-48 + MOVQ b_base+0(FP), BX + MOVQ b_len+8(FP), CX + MOVQ b_cap+16(FP), DX + + CMPQ CX, DX + JL afterGrow + + // Set up the growSlice call. + MOVQ BX, gs_base-48(SP) + MOVQ CX, gs_len-40(SP) + MOVQ DX, gs_cap-32(SP) + + CALL ·growSlice(SB) + + MOVQ gs_base-24(SP), BX + MOVQ gs_len-16(SP), CX + MOVQ gs_cap-8(SP), DX + +afterGrow: + // At this point, we have adequate capacity to increase len + 1 and the + // following register scheme: + // BX - b_base + // CX - b_len + // DX - b_cap + + // Write base/cap results. + MOVQ BX, ret_base+24(FP) + MOVQ DX, ret_cap+40(FP) + + // Write new element to b and increment the length. + LEAQ (BX)(CX*1), BX + MOVB $3, (BX) + ADDQ $1, CX + MOVQ CX, ret_len+32(FP) + + RET diff --git a/examples/ch3-xx/stackmap/stackmap_test.go b/examples/ch3-xx/stackmap/stackmap_test.go new file mode 100755 index 0000000..4116132 --- /dev/null +++ b/examples/ch3-xx/stackmap/stackmap_test.go @@ -0,0 +1,47 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +package stackmap + +import ( + "bytes" + "testing" +) + +func TestX(t *testing.T) { + b := make([]byte, 0, 3) + + for _, want := range [][]byte{ + mkSlice(3, 3), + mkSlice(3, 3, 3), + mkSlice(3, 3, 3, 3), + mkSlice(10, 3, 3, 3, 3), + mkSlice(10, 3, 3, 3, 3, 3), + mkSlice(10, 3, 3, 3, 3, 3, 3), + mkSlice(10, 3, 3, 3, 3, 3, 3, 3), + mkSlice(10, 3, 3, 3, 3, 3, 3, 3, 3), + mkSlice(10, 3, 3, 3, 3, 3, 3, 3, 3, 3), + mkSlice(10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3), + mkSlice(20, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3), + } { + b = X(b) + if !slicesEqual(b, want) { + t.Fatalf("got %v[cap=%d]; want %v[cap=%d]", + b, cap(b), want, cap(want)) + } + } +} + +func mkSlice(cap int, vs ...byte) []byte { + b1 := make([]byte, 0, cap) + for _, v := range vs { + b1 = append(b1, v) + } + return b1 +} +func slicesEqual(b0, b1 []byte) bool { + if cap(b0) != cap(b1) { + return false + } + return bytes.Equal(b0, b1) +} diff --git a/examples/ch3-xx/sum/sum.go b/examples/ch3-xx/sum/sum.go new file mode 100644 index 0000000..ddc536c --- /dev/null +++ b/examples/ch3-xx/sum/sum.go @@ -0,0 +1,6 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +package sum + +func Sum(a int, b int) int diff --git a/examples/ch3-xx/sum/sum_amd64.s b/examples/ch3-xx/sum/sum_amd64.s new file mode 100644 index 0000000..e88c85e --- /dev/null +++ b/examples/ch3-xx/sum/sum_amd64.s @@ -0,0 +1,9 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +TEXT ·Sum+0(SB),$0 + MOVQ a+0(FP), BX + MOVQ b+8(FP), BP + ADDQ BP, BX + MOVQ BX, return+16(FP) + RET diff --git a/examples/ch3-xx/sum/sum_test.go b/examples/ch3-xx/sum/sum_test.go new file mode 100644 index 0000000..f8a4551 --- /dev/null +++ b/examples/ch3-xx/sum/sum_test.go @@ -0,0 +1,14 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +package sum + +import "testing" + +func TestSum(t *testing.T) { + result := Sum(1, 1) + + if result != 2 { + t.Errorf("%d does not equal 2", result) + } +} diff --git a/examples/ch3-xx/vector/sum_amd64.s b/examples/ch3-xx/vector/sum_amd64.s new file mode 100644 index 0000000..caa74db --- /dev/null +++ b/examples/ch3-xx/vector/sum_amd64.s @@ -0,0 +1,11 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +TEXT ·SumVec+0(SB), $0 + MOVQ vec1+0(FP), BX // Move the first vector into BX + MOVQ vec2+24(FP), CX // Move the second vector into BX + MOVUPS (BX), X0 + MOVUPS (CX), X1 + ADDPS X0, X1 + MOVUPS X1, result+48(FP) + RET diff --git a/examples/ch3-xx/vector/vector.go b/examples/ch3-xx/vector/vector.go new file mode 100644 index 0000000..705d591 --- /dev/null +++ b/examples/ch3-xx/vector/vector.go @@ -0,0 +1,8 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +package vector + +func Find(vec []int, num int) bool + +func SumVec(vec1 []int32, vec2 []int32) [4]int32 diff --git a/examples/ch3-xx/vector/vector_amd64.s b/examples/ch3-xx/vector/vector_amd64.s new file mode 100644 index 0000000..2698edc --- /dev/null +++ b/examples/ch3-xx/vector/vector_amd64.s @@ -0,0 +1,28 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +TEXT ·Find+0(SB),$0 + MOVQ $0, SI // zero the iterator + MOVQ vec+0(FP), BX // BX = &vec[0] + MOVQ vec+8(FP), CX // len(vec) + MOVQ num+24(FP), DX + +start: + CMPQ SI, CX + JG notfound + CMPQ (BX), DX + JNE notequal + JE found + +found: + MOVQ $1, return+32(FP) + RET + +notequal: + INCQ SI + LEAQ +8(BX), BX + JMP start + +notfound: + MOVQ $0, return+32(FP) + RET diff --git a/examples/ch3-xx/vector/vector_test.go b/examples/ch3-xx/vector/vector_test.go new file mode 100644 index 0000000..97b6949 --- /dev/null +++ b/examples/ch3-xx/vector/vector_test.go @@ -0,0 +1,40 @@ +// Copyright © 2017 ChaiShushan . +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +package vector + +import "testing" + +func TestFind(t *testing.T) { + vec := []int{1, 2, 3, 4, 5, 6, 7, 8} + if result := Find(vec, 5); result != true { + t.Errorf("Could not find number in vector, got: %v", result) + } + + if result := Find(vec, 10); result != false { + t.Errorf("Returned true when false was expected") + } +} + +func TestSum(t *testing.T) { + vec1 := []int32{1, 2, 3, 5} + vec2 := []int32{1, 2, 3, 5} + + result := SumVec(vec1, vec2) + + if result[0] != 2 { + t.Errorf("Expected 2, got %v, result was: %v", result[0], result) + } + + if result[1] != 4 { + t.Errorf("Expected 4, got %v, result was: %v", result[0], result) + } + + if result[2] != 6 { + t.Errorf("Expected 6, got %v, result was: %v", result[0], result) + } + + if result[3] != 10 { + t.Errorf("Expected 10, got %v, result was: %v", result[0], result) + } +}