diff --git a/go.mod b/go.mod index 63b90af8..c4fcaa4f 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,7 @@ require ( github.com/lestrrat-go/strftime v1.0.6 github.com/lithammer/shortuuid/v4 v4.0.0 github.com/mattn/go-isatty v0.0.17 - github.com/minio/minio-go/v7 v7.0.47 + github.com/minio/minio-go/v7 v7.0.50 github.com/prep/average v0.0.0-20200506183628-d26c465f48c3 github.com/prometheus/client_golang v1.14.0 github.com/shirou/gopsutil/v3 v3.22.11 @@ -31,8 +31,7 @@ require ( github.com/vektah/gqlparser/v2 v2.5.1 github.com/xeipuuv/gojsonschema v1.2.0 go.uber.org/zap v1.24.0 - golang.org/x/mod v0.7.0 - golang.org/x/net v0.7.0 + golang.org/x/mod v0.8.0 ) require ( @@ -58,8 +57,8 @@ require ( github.com/iancoleman/orderedmap v0.2.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect - github.com/klauspost/compress v1.15.15 // indirect - github.com/klauspost/cpuid/v2 v2.2.3 // indirect + github.com/klauspost/compress v1.16.4 // indirect + github.com/klauspost/cpuid/v2 v2.2.4 // indirect github.com/labstack/gommon v0.4.0 // indirect github.com/leodido/go-urn v1.2.1 // indirect github.com/libdns/libdns v0.2.1 // indirect @@ -97,11 +96,12 @@ require ( go.uber.org/atomic v1.10.0 // indirect go.uber.org/goleak v1.1.12 // indirect go.uber.org/multierr v1.9.0 // indirect - golang.org/x/crypto v0.5.0 // indirect - golang.org/x/sys v0.5.0 // indirect - golang.org/x/text v0.7.0 // indirect + golang.org/x/crypto v0.7.0 // indirect + golang.org/x/net v0.8.0 // indirect + golang.org/x/sys v0.7.0 // indirect + golang.org/x/text v0.8.0 // indirect golang.org/x/time v0.3.0 // indirect - golang.org/x/tools v0.4.0 // indirect + golang.org/x/tools v0.6.0 // indirect google.golang.org/protobuf v1.28.1 // indirect gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index 5c44f7e1..6260c227 100644 --- a/go.sum +++ b/go.sum @@ -108,12 +108,12 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/kevinmbeaulieu/eq-go v1.0.0/go.mod h1:G3S8ajA56gKBZm4UB9AOyoOS37JO3roToPzKNM8dtdM= -github.com/klauspost/compress v1.15.15 h1:EF27CXIuDsYJ6mmvtBRlEuB2UVOqHG1tAXgZ7yIO+lw= -github.com/klauspost/compress v1.15.15/go.mod h1:ZcK2JAFqKOpnBlxcLsJzYfrS9X1akm9fHZNnD9+Vo/4= +github.com/klauspost/compress v1.16.4 h1:91KN02FnsOYhuunwU4ssRe8lc2JosWmizWa91B5v1PU= +github.com/klauspost/compress v1.16.4/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= -github.com/klauspost/cpuid/v2 v2.2.3 h1:sxCkb+qR91z4vsqw4vGGZlDgPz3G7gjaLyK3V8y70BU= -github.com/klauspost/cpuid/v2 v2.2.3/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= +github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk= +github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= @@ -163,8 +163,8 @@ github.com/miekg/dns v1.1.50 h1:DQUfb9uc6smULcREF09Uc+/Gd46YWqJd5DbpPE9xkcA= github.com/miekg/dns v1.1.50/go.mod h1:e3IlAVfNqAllflbibAZEWOXOQ+Ynzk/dDozDxY7XnME= github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM= -github.com/minio/minio-go/v7 v7.0.47 h1:sLiuCKGSIcn/MI6lREmTzX91DX/oRau4ia0j6e6eOSs= -github.com/minio/minio-go/v7 v7.0.47/go.mod h1:nCrRzjoSUQh8hgKKtu3Y708OLvRLtuASMg2/nvmbarw= +github.com/minio/minio-go/v7 v7.0.50 h1:4IL4V8m/kI90ZL6GupCARZVrBv8/XrcKcJhaJ3iz68k= +github.com/minio/minio-go/v7 v7.0.50/go.mod h1:IbbodHyjUAguneyucUaahv+VMNs/EOTV9du7A7/Z3HU= github.com/minio/sha256-simd v1.0.0 h1:v1ta+49hkWZyvaKwrQB8elexRqm6Y0aMLjCNsrYxo6g= github.com/minio/sha256-simd v1.0.0/go.mod h1:OuYzVNI5vcoYIAmbIvHPl3N3jUzVedXbKy5RFepssQM= github.com/mitchellh/mapstructure v1.3.1/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= @@ -287,14 +287,14 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.0.0-20211215153901-e495a2d5b3d3/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.1.0/go.mod h1:RecgLatLF4+eUMCP1PoPZQb+cVrJcOPbHkTkbkB9sbw= -golang.org/x/crypto v0.5.0 h1:U/0M97KRkSFvyD/3FSmdP5W5swImpNgle/EHFhOsQPE= -golang.org/x/crypto v0.5.0/go.mod h1:NK/OQwhpMQP3MwtdjgLlYHnH9ebylxKWv3e0fK+mkQU= +golang.org/x/crypto v0.7.0 h1:AvwMYaRytfdeVt3u6mLaxYtErKYjxA2OXjJ1HHq6t3A= +golang.org/x/crypto v0.7.0/go.mod h1:pYwdfH91IfpZVANVyUOhSIPZaFoJGxTFbZhFTx+dXZU= golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/mod v0.7.0 h1:LapD9S96VoQRhi/GrNTqeBJFrUjs5UHCAtTlgwA5oZA= -golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.8.0 h1:LUYupSeNrTNCGzR/hVBk2NHZO4hXcVaW1k4Qx7rjPx8= +golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= @@ -309,8 +309,8 @@ golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4/go.mod h1:CfG3xpIq0wQ8r1q4Su golang.org/x/net v0.0.0-20220630215102-69896b714898/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= -golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= -golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ= +golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -341,8 +341,8 @@ golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= -golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= +golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= @@ -351,8 +351,8 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo= -golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68= +golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/time v0.0.0-20201208040808-7e3f01d25324/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= @@ -366,8 +366,8 @@ golang.org/x/tools v0.1.6-0.20210726203631-07bc1bf47fb2/go.mod h1:o0xws9oXOQQZyj golang.org/x/tools v0.1.7/go.mod h1:LGqMHiF4EqQNHR1JncWGqT5BVaXmza+X+BDGol+dOxo= golang.org/x/tools v0.1.10/go.mod h1:Uh6Zz+xoGYZom868N8YTex3t7RhtHDBrE8Gzo9bV56E= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.4.0 h1:7mTAgkunk3fr4GAloyyCasadO6h9zSsQZbwvcaIciV4= -golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ= +golang.org/x/tools v0.6.0 h1:BOw41kyTf3PuCW1pVQf8+Cyg8pMlkYB1oo9iJ6D/lKM= +golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/io/fs/disk.go b/io/fs/disk.go index f9a9aea5..3ecc5dd5 100644 --- a/io/fs/disk.go +++ b/io/fs/disk.go @@ -366,7 +366,7 @@ func (fs *diskFilesystem) WriteFileReader(path string, r io.Reader) (int64, bool } func (fs *diskFilesystem) WriteFile(path string, data []byte) (int64, bool, error) { - return fs.WriteFileReader(path, bytes.NewBuffer(data)) + return fs.WriteFileReader(path, bytes.NewReader(data)) } func (fs *diskFilesystem) WriteFileSafe(path string, data []byte) (int64, bool, error) { diff --git a/io/fs/mem.go b/io/fs/mem.go index dd7efd1a..769d5883 100644 --- a/io/fs/mem.go +++ b/io/fs/mem.go @@ -420,11 +420,11 @@ func (fs *memFilesystem) WriteFileReader(path string, r io.Reader) (int64, bool, } func (fs *memFilesystem) WriteFile(path string, data []byte) (int64, bool, error) { - return fs.WriteFileReader(path, bytes.NewBuffer(data)) + return fs.WriteFileReader(path, bytes.NewReader(data)) } func (fs *memFilesystem) WriteFileSafe(path string, data []byte) (int64, bool, error) { - return fs.WriteFileReader(path, bytes.NewBuffer(data)) + return fs.WriteFileReader(path, bytes.NewReader(data)) } func (fs *memFilesystem) Purge(size int64) int64 { diff --git a/io/fs/s3.go b/io/fs/s3.go index 20c64599..2707522b 100644 --- a/io/fs/s3.go +++ b/io/fs/s3.go @@ -302,6 +302,16 @@ func (fs *s3Filesystem) write(path string, r io.Reader) (int64, bool, error) { size = sizer.Size() } + disableMultipart := false + var partSize uint64 = 0 + if size == -1 { + partSize = (16 * 1024 * 1024) + } else { + if size < (32 * 1024 * 1024) { + disableMultipart = true + } + } + info, err := fs.client.PutObject(ctx, fs.bucket, path, r, size, minio.PutObjectOptions{ UserMetadata: map[string]string{}, UserTags: map[string]string{}, @@ -317,11 +327,11 @@ func (fs *s3Filesystem) write(path string, r io.Reader) (int64, bool, error) { NumThreads: 0, StorageClass: "", WebsiteRedirectLocation: "", - PartSize: 0, + PartSize: partSize, LegalHold: "", SendContentMd5: false, DisableContentSha256: false, - DisableMultipart: false, + DisableMultipart: disableMultipart, Internal: minio.AdvancedPutOptions{}, }) if err != nil { @@ -343,13 +353,11 @@ func (fs *s3Filesystem) WriteFileReader(path string, r io.Reader) (int64, bool, } func (fs *s3Filesystem) WriteFile(path string, data []byte) (int64, bool, error) { - rs := NewReadSizer(bytes.NewBuffer(data), int64(len(data))) - return fs.WriteFileReader(path, rs) + return fs.WriteFileReader(path, bytes.NewReader(data)) } func (fs *s3Filesystem) WriteFileSafe(path string, data []byte) (int64, bool, error) { - rs := NewReadSizer(bytes.NewBuffer(data), int64(len(data))) - return fs.WriteFileReader(path, rs) + return fs.WriteFileReader(path, bytes.NewReader(data)) } func (fs *s3Filesystem) Rename(src, dst string) error { diff --git a/vendor/github.com/klauspost/compress/s2/README.md b/vendor/github.com/klauspost/compress/s2/README.md index 1d80c42a..8284bb08 100644 --- a/vendor/github.com/klauspost/compress/s2/README.md +++ b/vendor/github.com/klauspost/compress/s2/README.md @@ -20,11 +20,12 @@ This is important, so you don't have to worry about spending CPU cycles on alrea * Concurrent stream compression * Faster decompression, even for Snappy compatible content * Concurrent Snappy/S2 stream decompression -* Ability to quickly skip forward in compressed stream +* Skip forward in compressed stream * Random seeking with indexes * Compatible with reading Snappy compressed content * Smaller block size overhead on incompressible blocks * Block concatenation +* Block Dictionary support * Uncompressed stream mode * Automatic stream size padding * Snappy compatible block compression @@ -594,6 +595,123 @@ Best... 10737418240 -> 4210602774 [39.21%]; 42.96s, 254.4MB/s Decompression speed should be around the same as using the 'better' compression mode. +## Dictionaries + +*Note: S2 dictionary compression is currently at an early implementation stage, with no assembly for +neither encoding nor decoding. Performance improvements can be expected in the future.* + +Adding dictionaries allow providing a custom dictionary that will serve as lookup in the beginning of blocks. + +The same dictionary *must* be used for both encoding and decoding. +S2 does not keep track of whether the same dictionary is used, +and using the wrong dictionary will most often not result in an error when decompressing. + +Blocks encoded *without* dictionaries can be decompressed seamlessly *with* a dictionary. +This means it is possible to switch from an encoding without dictionaries to an encoding with dictionaries +and treat the blocks similarly. + +Similar to [zStandard dictionaries](https://github.com/facebook/zstd#the-case-for-small-data-compression), +the same usage scenario applies to S2 dictionaries. + +> Training works if there is some correlation in a family of small data samples. The more data-specific a dictionary is, the more efficient it is (there is no universal dictionary). Hence, deploying one dictionary per type of data will provide the greatest benefits. Dictionary gains are mostly effective in the first few KB. Then, the compression algorithm will gradually use previously decoded content to better compress the rest of the file. + +S2 further limits the dictionary to only be enabled on the first 64KB of a block. +This will remove any negative (speed) impacts of the dictionaries on bigger blocks. + +### Compression + +Using the [github_users_sample_set](https://github.com/facebook/zstd/releases/download/v1.1.3/github_users_sample_set.tar.zst) +and a 64KB dictionary trained with zStandard the following sizes can be achieved. + +| | Default | Better | Best | +|--------------------|------------------|------------------|-----------------------| +| Without Dictionary | 3362023 (44.92%) | 3083163 (41.19%) | 3057944 (40.86%) | +| With Dictionary | 921524 (12.31%) | 873154 (11.67%) | 785503 bytes (10.49%) | + +So for highly repetitive content, this case provides an almost 3x reduction in size. + +For less uniform data we will use the Go source code tree. +Compressing First 64KB of all `.go` files in `go/src`, Go 1.19.5, 8912 files, 51253563 bytes input: + +| | Default | Better | Best | +|--------------------|-------------------|-------------------|-------------------| +| Without Dictionary | 22955767 (44.79%) | 20189613 (39.39% | 19482828 (38.01%) | +| With Dictionary | 19654568 (38.35%) | 16289357 (31.78%) | 15184589 (29.63%) | +| Saving/file | 362 bytes | 428 bytes | 472 bytes | + + +### Creating Dictionaries + +There are no tools to create dictionaries in S2. +However, there are multiple ways to create a useful dictionary: + +#### Using a Sample File + +If your input is very uniform, you can just use a sample file as the dictionary. + +For example in the `github_users_sample_set` above, the average compression only goes up from +10.49% to 11.48% by using the first file as dictionary compared to using a dedicated dictionary. + +```Go + // Read a sample + sample, err := os.ReadFile("sample.json") + + // Create a dictionary. + dict := s2.MakeDict(sample, nil) + + // b := dict.Bytes() will provide a dictionary that can be saved + // and reloaded with s2.NewDict(b). + + // To encode: + encoded := dict.Encode(nil, file) + + // To decode: + decoded, err := dict.Decode(nil, file) +``` + +#### Using Zstandard + +Zstandard dictionaries can easily be converted to S2 dictionaries. + +This can be helpful to generate dictionaries for files that don't have a fixed structure. + + +Example, with training set files placed in `./training-set`: + +`λ zstd -r --train-fastcover training-set/* --maxdict=65536 -o name.dict` + +This will create a dictionary of 64KB, that can be converted to a dictionary like this: + +```Go + // Decode the Zstandard dictionary. + insp, err := zstd.InspectDictionary(zdict) + if err != nil { + panic(err) + } + + // We are only interested in the contents. + // Assume that files start with "// Copyright (c) 2023". + // Search for the longest match for that. + // This may save a few bytes. + dict := s2.MakeDict(insp.Content(), []byte("// Copyright (c) 2023")) + + // b := dict.Bytes() will provide a dictionary that can be saved + // and reloaded with s2.NewDict(b). + + // We can now encode using this dictionary + encodedWithDict := dict.Encode(nil, payload) + + // To decode content: + decoded, err := dict.Decode(nil, encodedWithDict) +``` + +It is recommended to save the dictionary returned by ` b:= dict.Bytes()`, since that will contain only the S2 dictionary. + +This dictionary can later be loaded using `s2.NewDict(b)`. The dictionary then no longer requires `zstd` to be initialized. + +Also note how `s2.MakeDict` allows you to search for a common starting sequence of your files. +This can be omitted, at the expense of a few bytes. + # Snappy Compatibility S2 now offers full compatibility with Snappy. @@ -929,6 +1047,72 @@ The first copy of a block cannot be a repeat offset and the offset is reset on e Default streaming block size is 1MB. +# Dictionary Encoding + +Adding dictionaries allow providing a custom dictionary that will serve as lookup in the beginning of blocks. + +A dictionary provides an initial repeat value that can be used to point to a common header. + +Other than that the dictionary contains values that can be used as back-references. + +Often used data should be placed at the *end* of the dictionary since offsets < 2048 bytes will be smaller. + +## Format + +Dictionary *content* must at least 16 bytes and less or equal to 64KiB (65536 bytes). + +Encoding: `[repeat value (uvarint)][dictionary content...]` + +Before the dictionary content, an unsigned base-128 (uvarint) encoded value specifying the initial repeat offset. +This value is an offset into the dictionary content and not a back-reference offset, +so setting this to 0 will make the repeat value point to the first value of the dictionary. + +The value must be less than the dictionary length-8 + +## Encoding + +From the decoder point of view the dictionary content is seen as preceding the encoded content. + +`[dictionary content][decoded output]` + +Backreferences to the dictionary are encoded as ordinary backreferences that have an offset before the start of the decoded block. + +Matches copying from the dictionary are **not** allowed to cross from the dictionary into the decoded data. +However, if a copy ends at the end of the dictionary the next repeat will point to the start of the decoded buffer, which is allowed. + +The first match can be a repeat value, which will use the repeat offset stored in the dictionary. + +When 64KB (65536 bytes) has been en/decoded it is no longer allowed to reference the dictionary, +neither by a copy nor repeat operations. +If the boundary is crossed while copying from the dictionary, the operation should complete, +but the next instruction is not allowed to reference the dictionary. + +Valid blocks encoded *without* a dictionary can be decoded with any dictionary. +There are no checks whether the supplied dictionary is the correct for a block. +Because of this there is no overhead by using a dictionary. + +## Example + +This is the dictionary content. Elements are separated by `[]`. + +Dictionary: `[0x0a][Yesterday 25 bananas were added to Benjamins brown bag]`. + +Initial repeat offset is set at 10, which is the letter `2`. + +Encoded `[LIT "10"][REPEAT len=10][LIT "hich"][MATCH off=50 len=6][MATCH off=31 len=6][MATCH off=61 len=10]` + +Decoded: `[10][ bananas w][hich][ were ][brown ][were added]` + +Output: `10 bananas which were brown were added` + + +## Streams + +For streams each block can use the dictionary. + +The dictionary cannot not currently be provided on the stream. + + # LICENSE This code is based on the [Snappy-Go](https://github.com/golang/snappy) implementation. diff --git a/vendor/github.com/klauspost/compress/s2/decode.go b/vendor/github.com/klauspost/compress/s2/decode.go index 00c5cc72..6c7feafc 100644 --- a/vendor/github.com/klauspost/compress/s2/decode.go +++ b/vendor/github.com/klauspost/compress/s2/decode.go @@ -9,11 +9,7 @@ import ( "encoding/binary" "errors" "fmt" - "io" - "io/ioutil" - "math" - "runtime" - "sync" + "strconv" ) var ( @@ -27,16 +23,6 @@ var ( ErrUnsupported = errors.New("s2: unsupported input") ) -// ErrCantSeek is returned if the stream cannot be seeked. -type ErrCantSeek struct { - Reason string -} - -// Error returns the error as string. -func (e ErrCantSeek) Error() string { - return fmt.Sprintf("s2: Can't seek because %s", e.Reason) -} - // DecodedLen returns the length of the decoded block. func DecodedLen(src []byte) (int, error) { v, _, err := decodedLen(src) @@ -83,968 +69,369 @@ func Decode(dst, src []byte) ([]byte, error) { return dst, nil } -// NewReader returns a new Reader that decompresses from r, using the framing -// format described at -// https://github.com/google/snappy/blob/master/framing_format.txt with S2 changes. -func NewReader(r io.Reader, opts ...ReaderOption) *Reader { - nr := Reader{ - r: r, - maxBlock: maxBlockSize, - } - for _, opt := range opts { - if err := opt(&nr); err != nil { - nr.err = err - return &nr - } - } - nr.maxBufSize = MaxEncodedLen(nr.maxBlock) + checksumSize - if nr.lazyBuf > 0 { - nr.buf = make([]byte, MaxEncodedLen(nr.lazyBuf)+checksumSize) - } else { - nr.buf = make([]byte, MaxEncodedLen(defaultBlockSize)+checksumSize) - } - nr.readHeader = nr.ignoreStreamID - nr.paramsOK = true - return &nr -} - -// ReaderOption is an option for creating a decoder. -type ReaderOption func(*Reader) error - -// ReaderMaxBlockSize allows to control allocations if the stream -// has been compressed with a smaller WriterBlockSize, or with the default 1MB. -// Blocks must be this size or smaller to decompress, -// otherwise the decoder will return ErrUnsupported. +// s2DecodeDict writes the decoding of src to dst. It assumes that the varint-encoded +// length of the decompressed bytes has already been read, and that len(dst) +// equals that length. // -// For streams compressed with Snappy this can safely be set to 64KB (64 << 10). -// -// Default is the maximum limit of 4MB. -func ReaderMaxBlockSize(blockSize int) ReaderOption { - return func(r *Reader) error { - if blockSize > maxBlockSize || blockSize <= 0 { - return errors.New("s2: block size too large. Must be <= 4MB and > 0") - } - if r.lazyBuf == 0 && blockSize < defaultBlockSize { - r.lazyBuf = blockSize - } - r.maxBlock = blockSize - return nil +// It returns 0 on success or a decodeErrCodeXxx error code on failure. +func s2DecodeDict(dst, src []byte, dict *Dict) int { + if dict == nil { + return s2Decode(dst, src) } -} + const debug = false + const debugErrs = debug -// ReaderAllocBlock allows to control upfront stream allocations -// and not allocate for frames bigger than this initially. -// If frames bigger than this is seen a bigger buffer will be allocated. -// -// Default is 1MB, which is default output size. -func ReaderAllocBlock(blockSize int) ReaderOption { - return func(r *Reader) error { - if blockSize > maxBlockSize || blockSize < 1024 { - return errors.New("s2: invalid ReaderAllocBlock. Must be <= 4MB and >= 1024") - } - r.lazyBuf = blockSize - return nil + if debug { + fmt.Println("Starting decode, dst len:", len(dst)) } -} + var d, s, length int + offset := len(dict.dict) - dict.repeat -// ReaderIgnoreStreamIdentifier will make the reader skip the expected -// stream identifier at the beginning of the stream. -// This can be used when serving a stream that has been forwarded to a specific point. -func ReaderIgnoreStreamIdentifier() ReaderOption { - return func(r *Reader) error { - r.ignoreStreamID = true - return nil - } -} - -// ReaderSkippableCB will register a callback for chuncks with the specified ID. -// ID must be a Reserved skippable chunks ID, 0x80-0xfd (inclusive). -// For each chunk with the ID, the callback is called with the content. -// Any returned non-nil error will abort decompression. -// Only one callback per ID is supported, latest sent will be used. -func ReaderSkippableCB(id uint8, fn func(r io.Reader) error) ReaderOption { - return func(r *Reader) error { - if id < 0x80 || id > 0xfd { - return fmt.Errorf("ReaderSkippableCB: Invalid id provided, must be 0x80-0xfd (inclusive)") - } - r.skippableCB[id] = fn - return nil - } -} - -// ReaderIgnoreCRC will make the reader skip CRC calculation and checks. -func ReaderIgnoreCRC() ReaderOption { - return func(r *Reader) error { - r.ignoreCRC = true - return nil - } -} - -// Reader is an io.Reader that can read Snappy-compressed bytes. -type Reader struct { - r io.Reader - err error - decoded []byte - buf []byte - skippableCB [0x80]func(r io.Reader) error - blockStart int64 // Uncompressed offset at start of current. - index *Index - - // decoded[i:j] contains decoded bytes that have not yet been passed on. - i, j int - // maximum block size allowed. - maxBlock int - // maximum expected buffer size. - maxBufSize int - // alloc a buffer this size if > 0. - lazyBuf int - readHeader bool - paramsOK bool - snappyFrame bool - ignoreStreamID bool - ignoreCRC bool -} - -// ensureBufferSize will ensure that the buffer can take at least n bytes. -// If false is returned the buffer exceeds maximum allowed size. -func (r *Reader) ensureBufferSize(n int) bool { - if n > r.maxBufSize { - r.err = ErrCorrupt - return false - } - if cap(r.buf) >= n { - return true - } - // Realloc buffer. - r.buf = make([]byte, n) - return true -} - -// Reset discards any buffered data, resets all state, and switches the Snappy -// reader to read from r. This permits reusing a Reader rather than allocating -// a new one. -func (r *Reader) Reset(reader io.Reader) { - if !r.paramsOK { - return - } - r.index = nil - r.r = reader - r.err = nil - r.i = 0 - r.j = 0 - r.blockStart = 0 - r.readHeader = r.ignoreStreamID -} - -func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) { - if _, r.err = io.ReadFull(r.r, p); r.err != nil { - if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) { - r.err = ErrCorrupt - } - return false - } - return true -} - -// skippable will skip n bytes. -// If the supplied reader supports seeking that is used. -// tmp is used as a temporary buffer for reading. -// The supplied slice does not need to be the size of the read. -func (r *Reader) skippable(tmp []byte, n int, allowEOF bool, id uint8) (ok bool) { - if id < 0x80 { - r.err = fmt.Errorf("interbal error: skippable id < 0x80") - return false - } - if fn := r.skippableCB[id-0x80]; fn != nil { - rd := io.LimitReader(r.r, int64(n)) - r.err = fn(rd) - if r.err != nil { - return false - } - _, r.err = io.CopyBuffer(ioutil.Discard, rd, tmp) - return r.err == nil - } - if rs, ok := r.r.(io.ReadSeeker); ok { - _, err := rs.Seek(int64(n), io.SeekCurrent) - if err == nil { - return true - } - if err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) { - r.err = ErrCorrupt - return false - } - } - for n > 0 { - if n < len(tmp) { - tmp = tmp[:n] - } - if _, r.err = io.ReadFull(r.r, tmp); r.err != nil { - if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) { - r.err = ErrCorrupt + // As long as we can read at least 5 bytes... + for s < len(src)-5 { + // Removing bounds checks is SLOWER, when if doing + // in := src[s:s+5] + // Checked on Go 1.18 + switch src[s] & 0x03 { + case tagLiteral: + x := uint32(src[s] >> 2) + switch { + case x < 60: + s++ + case x == 60: + s += 2 + x = uint32(src[s-1]) + case x == 61: + in := src[s : s+3] + x = uint32(in[1]) | uint32(in[2])<<8 + s += 3 + case x == 62: + in := src[s : s+4] + // Load as 32 bit and shift down. + x = uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24 + x >>= 8 + s += 4 + case x == 63: + in := src[s : s+5] + x = uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24 + s += 5 } - return false - } - n -= len(tmp) - } - return true -} - -// Read satisfies the io.Reader interface. -func (r *Reader) Read(p []byte) (int, error) { - if r.err != nil { - return 0, r.err - } - for { - if r.i < r.j { - n := copy(p, r.decoded[r.i:r.j]) - r.i += n - return n, nil - } - if !r.readFull(r.buf[:4], true) { - return 0, r.err - } - chunkType := r.buf[0] - if !r.readHeader { - if chunkType != chunkTypeStreamIdentifier { - r.err = ErrCorrupt - return 0, r.err + length = int(x) + 1 + if debug { + fmt.Println("literals, length:", length, "d-after:", d+length) } - r.readHeader = true - } - chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16 - - // The chunk types are specified at - // https://github.com/google/snappy/blob/master/framing_format.txt - switch chunkType { - case chunkTypeCompressedData: - r.blockStart += int64(r.j) - // Section 4.2. Compressed data (chunk type 0x00). - if chunkLen < checksumSize { - r.err = ErrCorrupt - return 0, r.err - } - if !r.ensureBufferSize(chunkLen) { - if r.err == nil { - r.err = ErrUnsupported + if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) { + if debugErrs { + fmt.Println("corrupt literal: length:", length, "d-left:", len(dst)-d, "src-left:", len(src)-s) } - return 0, r.err - } - buf := r.buf[:chunkLen] - if !r.readFull(buf, false) { - return 0, r.err - } - checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 - buf = buf[checksumSize:] - - n, err := DecodedLen(buf) - if err != nil { - r.err = err - return 0, r.err - } - if r.snappyFrame && n > maxSnappyBlockSize { - r.err = ErrCorrupt - return 0, r.err + return decodeErrCodeCorrupt } - if n > len(r.decoded) { - if n > r.maxBlock { - r.err = ErrCorrupt - return 0, r.err - } - r.decoded = make([]byte, n) - } - if _, err := Decode(r.decoded, buf); err != nil { - r.err = err - return 0, r.err - } - if !r.ignoreCRC && crc(r.decoded[:n]) != checksum { - r.err = ErrCRC - return 0, r.err - } - r.i, r.j = 0, n + copy(dst[d:], src[s:s+length]) + d += length + s += length continue - case chunkTypeUncompressedData: - r.blockStart += int64(r.j) - // Section 4.3. Uncompressed data (chunk type 0x01). - if chunkLen < checksumSize { - r.err = ErrCorrupt - return 0, r.err - } - if !r.ensureBufferSize(chunkLen) { - if r.err == nil { - r.err = ErrUnsupported + case tagCopy1: + s += 2 + toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) + length = int(src[s-2]) >> 2 & 0x7 + if toffset == 0 { + if debug { + fmt.Print("(repeat) ") } - return 0, r.err - } - buf := r.buf[:checksumSize] - if !r.readFull(buf, false) { - return 0, r.err - } - checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 - // Read directly into r.decoded instead of via r.buf. - n := chunkLen - checksumSize - if r.snappyFrame && n > maxSnappyBlockSize { - r.err = ErrCorrupt - return 0, r.err - } - if n > len(r.decoded) { - if n > r.maxBlock { - r.err = ErrCorrupt - return 0, r.err - } - r.decoded = make([]byte, n) - } - if !r.readFull(r.decoded[:n], false) { - return 0, r.err - } - if !r.ignoreCRC && crc(r.decoded[:n]) != checksum { - r.err = ErrCRC - return 0, r.err - } - r.i, r.j = 0, n - continue - - case chunkTypeStreamIdentifier: - // Section 4.1. Stream identifier (chunk type 0xff). - if chunkLen != len(magicBody) { - r.err = ErrCorrupt - return 0, r.err - } - if !r.readFull(r.buf[:len(magicBody)], false) { - return 0, r.err - } - if string(r.buf[:len(magicBody)]) != magicBody { - if string(r.buf[:len(magicBody)]) != magicBodySnappy { - r.err = ErrCorrupt - return 0, r.err - } else { - r.snappyFrame = true + // keep last offset + switch length { + case 5: + length = int(src[s]) + 4 + s += 1 + case 6: + in := src[s : s+2] + length = int(uint32(in[0])|(uint32(in[1])<<8)) + (1 << 8) + s += 2 + case 7: + in := src[s : s+3] + length = int((uint32(in[2])<<16)|(uint32(in[1])<<8)|uint32(in[0])) + (1 << 16) + s += 3 + default: // 0-> 4 } } else { - r.snappyFrame = false + offset = toffset } - continue + length += 4 + case tagCopy2: + in := src[s : s+3] + offset = int(uint32(in[1]) | uint32(in[2])<<8) + length = 1 + int(in[0])>>2 + s += 3 + + case tagCopy4: + in := src[s : s+5] + offset = int(uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24) + length = 1 + int(in[0])>>2 + s += 5 } - if chunkType <= 0x7f { - // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f). - // fmt.Printf("ERR chunktype: 0x%x\n", chunkType) - r.err = ErrUnsupported - return 0, r.err - } - // Section 4.4 Padding (chunk type 0xfe). - // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd). - if chunkLen > maxChunkSize { - // fmt.Printf("ERR chunkLen: 0x%x\n", chunkLen) - r.err = ErrUnsupported - return 0, r.err + if offset <= 0 || length > len(dst)-d { + if debugErrs { + fmt.Println("match error; offset:", offset, "length:", length, "dst-left:", len(dst)-d) + } + return decodeErrCodeCorrupt } - // fmt.Printf("skippable: ID: 0x%x, len: 0x%x\n", chunkType, chunkLen) - if !r.skippable(r.buf, chunkLen, false, chunkType) { - return 0, r.err - } - } -} - -// DecodeConcurrent will decode the full stream to w. -// This function should not be combined with reading, seeking or other operations. -// Up to 'concurrent' goroutines will be used. -// If <= 0, runtime.NumCPU will be used. -// On success the number of bytes decompressed nil and is returned. -// This is mainly intended for bigger streams. -func (r *Reader) DecodeConcurrent(w io.Writer, concurrent int) (written int64, err error) { - if r.i > 0 || r.j > 0 || r.blockStart > 0 { - return 0, errors.New("DecodeConcurrent called after ") - } - if concurrent <= 0 { - concurrent = runtime.NumCPU() - } - - // Write to output - var errMu sync.Mutex - var aErr error - setErr := func(e error) (ok bool) { - errMu.Lock() - defer errMu.Unlock() - if e == nil { - return aErr == nil - } - if aErr == nil { - aErr = e - } - return false - } - hasErr := func() (ok bool) { - errMu.Lock() - v := aErr != nil - errMu.Unlock() - return v - } - - var aWritten int64 - toRead := make(chan []byte, concurrent) - writtenBlocks := make(chan []byte, concurrent) - queue := make(chan chan []byte, concurrent) - reUse := make(chan chan []byte, concurrent) - for i := 0; i < concurrent; i++ { - toRead <- make([]byte, 0, r.maxBufSize) - writtenBlocks <- make([]byte, 0, r.maxBufSize) - reUse <- make(chan []byte, 1) - } - // Writer - var wg sync.WaitGroup - wg.Add(1) - go func() { - defer wg.Done() - for toWrite := range queue { - entry := <-toWrite - reUse <- toWrite - if hasErr() { - writtenBlocks <- entry - continue - } - n, err := w.Write(entry) - want := len(entry) - writtenBlocks <- entry - if err != nil { - setErr(err) - continue - } - if n != want { - setErr(io.ErrShortWrite) - continue - } - aWritten += int64(n) - } - }() - - // Reader - defer func() { - close(queue) - if r.err != nil { - err = r.err - setErr(r.err) - } - wg.Wait() - if err == nil { - err = aErr - } - written = aWritten - }() - - for !hasErr() { - if !r.readFull(r.buf[:4], true) { - if r.err == io.EOF { - r.err = nil - } - return 0, r.err - } - chunkType := r.buf[0] - if !r.readHeader { - if chunkType != chunkTypeStreamIdentifier { - r.err = ErrCorrupt - return 0, r.err - } - r.readHeader = true - } - chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16 - - // The chunk types are specified at - // https://github.com/google/snappy/blob/master/framing_format.txt - switch chunkType { - case chunkTypeCompressedData: - r.blockStart += int64(r.j) - // Section 4.2. Compressed data (chunk type 0x00). - if chunkLen < checksumSize { - r.err = ErrCorrupt - return 0, r.err - } - if chunkLen > r.maxBufSize { - r.err = ErrCorrupt - return 0, r.err - } - orgBuf := <-toRead - buf := orgBuf[:chunkLen] - - if !r.readFull(buf, false) { - return 0, r.err - } - - checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 - buf = buf[checksumSize:] - - n, err := DecodedLen(buf) - if err != nil { - r.err = err - return 0, r.err - } - if r.snappyFrame && n > maxSnappyBlockSize { - r.err = ErrCorrupt - return 0, r.err - } - - if n > r.maxBlock { - r.err = ErrCorrupt - return 0, r.err - } - wg.Add(1) - - decoded := <-writtenBlocks - entry := <-reUse - queue <- entry - go func() { - defer wg.Done() - decoded = decoded[:n] - _, err := Decode(decoded, buf) - toRead <- orgBuf - if err != nil { - writtenBlocks <- decoded - setErr(err) - return + // copy from dict + if d < offset { + if d > MaxDictSrcOffset { + if debugErrs { + fmt.Println("dict after", MaxDictSrcOffset, "d:", d, "offset:", offset, "length:", length) } - if !r.ignoreCRC && crc(decoded) != checksum { - writtenBlocks <- decoded - setErr(ErrCRC) - return + return decodeErrCodeCorrupt + } + startOff := len(dict.dict) - offset + d + if startOff < 0 || startOff+length > len(dict.dict) { + if debugErrs { + fmt.Printf("offset (%d) + length (%d) bigger than dict (%d)\n", offset, length, len(dict.dict)) } - entry <- decoded - }() + return decodeErrCodeCorrupt + } + if debug { + fmt.Println("dict copy, length:", length, "offset:", offset, "d-after:", d+length, "dict start offset:", startOff) + } + copy(dst[d:d+length], dict.dict[startOff:]) + d += length + continue + } + + if debug { + fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length) + } + + // Copy from an earlier sub-slice of dst to a later sub-slice. + // If no overlap, use the built-in copy: + if offset > length { + copy(dst[d:d+length], dst[d-offset:]) + d += length + continue + } + + // Unlike the built-in copy function, this byte-by-byte copy always runs + // forwards, even if the slices overlap. Conceptually, this is: + // + // d += forwardCopy(dst[d:d+length], dst[d-offset:]) + // + // We align the slices into a and b and show the compiler they are the same size. + // This allows the loop to run without bounds checks. + a := dst[d : d+length] + b := dst[d-offset:] + b = b[:len(a)] + for i := range a { + a[i] = b[i] + } + d += length + } + + // Remaining with extra checks... + for s < len(src) { + switch src[s] & 0x03 { + case tagLiteral: + x := uint32(src[s] >> 2) + switch { + case x < 60: + s++ + case x == 60: + s += 2 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + if debugErrs { + fmt.Println("src went oob") + } + return decodeErrCodeCorrupt + } + x = uint32(src[s-1]) + case x == 61: + s += 3 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + if debugErrs { + fmt.Println("src went oob") + } + return decodeErrCodeCorrupt + } + x = uint32(src[s-2]) | uint32(src[s-1])<<8 + case x == 62: + s += 4 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + if debugErrs { + fmt.Println("src went oob") + } + return decodeErrCodeCorrupt + } + x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 + case x == 63: + s += 5 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + if debugErrs { + fmt.Println("src went oob") + } + return decodeErrCodeCorrupt + } + x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 + } + length = int(x) + 1 + if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) { + if debugErrs { + fmt.Println("corrupt literal: length:", length, "d-left:", len(dst)-d, "src-left:", len(src)-s) + } + return decodeErrCodeCorrupt + } + if debug { + fmt.Println("literals, length:", length, "d-after:", d+length) + } + + copy(dst[d:], src[s:s+length]) + d += length + s += length continue - case chunkTypeUncompressedData: - - // Section 4.3. Uncompressed data (chunk type 0x01). - if chunkLen < checksumSize { - r.err = ErrCorrupt - return 0, r.err + case tagCopy1: + s += 2 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + if debugErrs { + fmt.Println("src went oob") + } + return decodeErrCodeCorrupt } - if chunkLen > r.maxBufSize { - r.err = ErrCorrupt - return 0, r.err - } - // Grab write buffer - orgBuf := <-writtenBlocks - buf := orgBuf[:checksumSize] - if !r.readFull(buf, false) { - return 0, r.err - } - checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 - // Read content. - n := chunkLen - checksumSize - - if r.snappyFrame && n > maxSnappyBlockSize { - r.err = ErrCorrupt - return 0, r.err - } - if n > r.maxBlock { - r.err = ErrCorrupt - return 0, r.err - } - // Read uncompressed - buf = orgBuf[:n] - if !r.readFull(buf, false) { - return 0, r.err - } - - if !r.ignoreCRC && crc(buf) != checksum { - r.err = ErrCRC - return 0, r.err - } - entry := <-reUse - queue <- entry - entry <- buf - continue - - case chunkTypeStreamIdentifier: - // Section 4.1. Stream identifier (chunk type 0xff). - if chunkLen != len(magicBody) { - r.err = ErrCorrupt - return 0, r.err - } - if !r.readFull(r.buf[:len(magicBody)], false) { - return 0, r.err - } - if string(r.buf[:len(magicBody)]) != magicBody { - if string(r.buf[:len(magicBody)]) != magicBodySnappy { - r.err = ErrCorrupt - return 0, r.err - } else { - r.snappyFrame = true + length = int(src[s-2]) >> 2 & 0x7 + toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) + if toffset == 0 { + if debug { + fmt.Print("(repeat) ") + } + // keep last offset + switch length { + case 5: + s += 1 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + if debugErrs { + fmt.Println("src went oob") + } + return decodeErrCodeCorrupt + } + length = int(uint32(src[s-1])) + 4 + case 6: + s += 2 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + if debugErrs { + fmt.Println("src went oob") + } + return decodeErrCodeCorrupt + } + length = int(uint32(src[s-2])|(uint32(src[s-1])<<8)) + (1 << 8) + case 7: + s += 3 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + if debugErrs { + fmt.Println("src went oob") + } + return decodeErrCodeCorrupt + } + length = int(uint32(src[s-3])|(uint32(src[s-2])<<8)|(uint32(src[s-1])<<16)) + (1 << 16) + default: // 0-> 4 } } else { - r.snappyFrame = false + offset = toffset } + length += 4 + case tagCopy2: + s += 3 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + if debugErrs { + fmt.Println("src went oob") + } + return decodeErrCodeCorrupt + } + length = 1 + int(src[s-3])>>2 + offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) + + case tagCopy4: + s += 5 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + if debugErrs { + fmt.Println("src went oob") + } + return decodeErrCodeCorrupt + } + length = 1 + int(src[s-5])>>2 + offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) + } + + if offset <= 0 || length > len(dst)-d { + if debugErrs { + fmt.Println("match error; offset:", offset, "length:", length, "dst-left:", len(dst)-d) + } + return decodeErrCodeCorrupt + } + + // copy from dict + if d < offset { + if d > MaxDictSrcOffset { + if debugErrs { + fmt.Println("dict after", MaxDictSrcOffset, "d:", d, "offset:", offset, "length:", length) + } + return decodeErrCodeCorrupt + } + rOff := len(dict.dict) - (offset - d) + if debug { + fmt.Println("starting dict entry from dict offset", len(dict.dict)-rOff) + } + if rOff+length > len(dict.dict) { + if debugErrs { + fmt.Println("err: END offset", rOff+length, "bigger than dict", len(dict.dict), "dict offset:", rOff, "length:", length) + } + return decodeErrCodeCorrupt + } + if rOff < 0 { + if debugErrs { + fmt.Println("err: START offset", rOff, "less than 0", len(dict.dict), "dict offset:", rOff, "length:", length) + } + return decodeErrCodeCorrupt + } + copy(dst[d:d+length], dict.dict[rOff:]) + d += length continue } - if chunkType <= 0x7f { - // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f). - // fmt.Printf("ERR chunktype: 0x%x\n", chunkType) - r.err = ErrUnsupported - return 0, r.err - } - // Section 4.4 Padding (chunk type 0xfe). - // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd). - if chunkLen > maxChunkSize { - // fmt.Printf("ERR chunkLen: 0x%x\n", chunkLen) - r.err = ErrUnsupported - return 0, r.err + if debug { + fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length) } - // fmt.Printf("skippable: ID: 0x%x, len: 0x%x\n", chunkType, chunkLen) - if !r.skippable(r.buf, chunkLen, false, chunkType) { - return 0, r.err - } - } - return 0, r.err -} - -// Skip will skip n bytes forward in the decompressed output. -// For larger skips this consumes less CPU and is faster than reading output and discarding it. -// CRC is not checked on skipped blocks. -// io.ErrUnexpectedEOF is returned if the stream ends before all bytes have been skipped. -// If a decoding error is encountered subsequent calls to Read will also fail. -func (r *Reader) Skip(n int64) error { - if n < 0 { - return errors.New("attempted negative skip") - } - if r.err != nil { - return r.err - } - - for n > 0 { - if r.i < r.j { - // Skip in buffer. - // decoded[i:j] contains decoded bytes that have not yet been passed on. - left := int64(r.j - r.i) - if left >= n { - tmp := int64(r.i) + n - if tmp > math.MaxInt32 { - return errors.New("s2: internal overflow in skip") - } - r.i = int(tmp) - return nil - } - n -= int64(r.j - r.i) - r.i = r.j - } - - // Buffer empty; read blocks until we have content. - if !r.readFull(r.buf[:4], true) { - if r.err == io.EOF { - r.err = io.ErrUnexpectedEOF - } - return r.err - } - chunkType := r.buf[0] - if !r.readHeader { - if chunkType != chunkTypeStreamIdentifier { - r.err = ErrCorrupt - return r.err - } - r.readHeader = true - } - chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16 - - // The chunk types are specified at - // https://github.com/google/snappy/blob/master/framing_format.txt - switch chunkType { - case chunkTypeCompressedData: - r.blockStart += int64(r.j) - // Section 4.2. Compressed data (chunk type 0x00). - if chunkLen < checksumSize { - r.err = ErrCorrupt - return r.err - } - if !r.ensureBufferSize(chunkLen) { - if r.err == nil { - r.err = ErrUnsupported - } - return r.err - } - buf := r.buf[:chunkLen] - if !r.readFull(buf, false) { - return r.err - } - checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 - buf = buf[checksumSize:] - - dLen, err := DecodedLen(buf) - if err != nil { - r.err = err - return r.err - } - if dLen > r.maxBlock { - r.err = ErrCorrupt - return r.err - } - // Check if destination is within this block - if int64(dLen) > n { - if len(r.decoded) < dLen { - r.decoded = make([]byte, dLen) - } - if _, err := Decode(r.decoded, buf); err != nil { - r.err = err - return r.err - } - if crc(r.decoded[:dLen]) != checksum { - r.err = ErrCorrupt - return r.err - } - } else { - // Skip block completely - n -= int64(dLen) - r.blockStart += int64(dLen) - dLen = 0 - } - r.i, r.j = 0, dLen - continue - case chunkTypeUncompressedData: - r.blockStart += int64(r.j) - // Section 4.3. Uncompressed data (chunk type 0x01). - if chunkLen < checksumSize { - r.err = ErrCorrupt - return r.err - } - if !r.ensureBufferSize(chunkLen) { - if r.err != nil { - r.err = ErrUnsupported - } - return r.err - } - buf := r.buf[:checksumSize] - if !r.readFull(buf, false) { - return r.err - } - checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 - // Read directly into r.decoded instead of via r.buf. - n2 := chunkLen - checksumSize - if n2 > len(r.decoded) { - if n2 > r.maxBlock { - r.err = ErrCorrupt - return r.err - } - r.decoded = make([]byte, n2) - } - if !r.readFull(r.decoded[:n2], false) { - return r.err - } - if int64(n2) < n { - if crc(r.decoded[:n2]) != checksum { - r.err = ErrCorrupt - return r.err - } - } - r.i, r.j = 0, n2 - continue - case chunkTypeStreamIdentifier: - // Section 4.1. Stream identifier (chunk type 0xff). - if chunkLen != len(magicBody) { - r.err = ErrCorrupt - return r.err - } - if !r.readFull(r.buf[:len(magicBody)], false) { - return r.err - } - if string(r.buf[:len(magicBody)]) != magicBody { - if string(r.buf[:len(magicBody)]) != magicBodySnappy { - r.err = ErrCorrupt - return r.err - } - } - + // Copy from an earlier sub-slice of dst to a later sub-slice. + // If no overlap, use the built-in copy: + if offset > length { + copy(dst[d:d+length], dst[d-offset:]) + d += length continue } - if chunkType <= 0x7f { - // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f). - r.err = ErrUnsupported - return r.err - } - if chunkLen > maxChunkSize { - r.err = ErrUnsupported - return r.err - } - // Section 4.4 Padding (chunk type 0xfe). - // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd). - if !r.skippable(r.buf, chunkLen, false, chunkType) { - return r.err + // Unlike the built-in copy function, this byte-by-byte copy always runs + // forwards, even if the slices overlap. Conceptually, this is: + // + // d += forwardCopy(dst[d:d+length], dst[d-offset:]) + // + // We align the slices into a and b and show the compiler they are the same size. + // This allows the loop to run without bounds checks. + a := dst[d : d+length] + b := dst[d-offset:] + b = b[:len(a)] + for i := range a { + a[i] = b[i] } + d += length } - return nil -} - -// ReadSeeker provides random or forward seeking in compressed content. -// See Reader.ReadSeeker -type ReadSeeker struct { - *Reader -} - -// ReadSeeker will return an io.ReadSeeker compatible version of the reader. -// If 'random' is specified the returned io.Seeker can be used for -// random seeking, otherwise only forward seeking is supported. -// Enabling random seeking requires the original input to support -// the io.Seeker interface. -// A custom index can be specified which will be used if supplied. -// When using a custom index, it will not be read from the input stream. -// The returned ReadSeeker contains a shallow reference to the existing Reader, -// meaning changes performed to one is reflected in the other. -func (r *Reader) ReadSeeker(random bool, index []byte) (*ReadSeeker, error) { - // Read index if provided. - if len(index) != 0 { - if r.index == nil { - r.index = &Index{} - } - if _, err := r.index.Load(index); err != nil { - return nil, ErrCantSeek{Reason: "loading index returned: " + err.Error()} - } - } - - // Check if input is seekable - rs, ok := r.r.(io.ReadSeeker) - if !ok { - if !random { - return &ReadSeeker{Reader: r}, nil - } - return nil, ErrCantSeek{Reason: "input stream isn't seekable"} - } - - if r.index != nil { - // Seekable and index, ok... - return &ReadSeeker{Reader: r}, nil - } - - // Load from stream. - r.index = &Index{} - - // Read current position. - pos, err := rs.Seek(0, io.SeekCurrent) - if err != nil { - return nil, ErrCantSeek{Reason: "seeking input returned: " + err.Error()} - } - err = r.index.LoadStream(rs) - if err != nil { - if err == ErrUnsupported { - // If we don't require random seeking, reset input and return. - if !random { - _, err = rs.Seek(pos, io.SeekStart) - if err != nil { - return nil, ErrCantSeek{Reason: "resetting stream returned: " + err.Error()} - } - r.index = nil - return &ReadSeeker{Reader: r}, nil - } - return nil, ErrCantSeek{Reason: "input stream does not contain an index"} - } - return nil, ErrCantSeek{Reason: "reading index returned: " + err.Error()} - } - - // reset position. - _, err = rs.Seek(pos, io.SeekStart) - if err != nil { - return nil, ErrCantSeek{Reason: "seeking input returned: " + err.Error()} - } - return &ReadSeeker{Reader: r}, nil -} - -// Seek allows seeking in compressed data. -func (r *ReadSeeker) Seek(offset int64, whence int) (int64, error) { - if r.err != nil { - if !errors.Is(r.err, io.EOF) { - return 0, r.err - } - // Reset on EOF - r.err = nil - } - if offset == 0 && whence == io.SeekCurrent { - return r.blockStart + int64(r.i), nil - } - if !r.readHeader { - // Make sure we read the header. - _, r.err = r.Read([]byte{}) - } - rs, ok := r.r.(io.ReadSeeker) - if r.index == nil || !ok { - if whence == io.SeekCurrent && offset >= 0 { - err := r.Skip(offset) - return r.blockStart + int64(r.i), err - } - if whence == io.SeekStart && offset >= r.blockStart+int64(r.i) { - err := r.Skip(offset - r.blockStart - int64(r.i)) - return r.blockStart + int64(r.i), err - } - return 0, ErrUnsupported - - } - - switch whence { - case io.SeekCurrent: - offset += r.blockStart + int64(r.i) - case io.SeekEnd: - if offset > 0 { - return 0, errors.New("seek after end of file") - } - offset = r.index.TotalUncompressed + offset - } - - if offset < 0 { - return 0, errors.New("seek before start of file") - } - - c, u, err := r.index.Find(offset) - if err != nil { - return r.blockStart + int64(r.i), err - } - - // Seek to next block - _, err = rs.Seek(c, io.SeekStart) - if err != nil { - return 0, err - } - - r.i = r.j // Remove rest of current block. - if u < offset { - // Forward inside block - return offset, r.Skip(offset - u) - } - return offset, nil -} - -// ReadByte satisfies the io.ByteReader interface. -func (r *Reader) ReadByte() (byte, error) { - if r.err != nil { - return 0, r.err - } - if r.i < r.j { - c := r.decoded[r.i] - r.i++ - return c, nil - } - var tmp [1]byte - for i := 0; i < 10; i++ { - n, err := r.Read(tmp[:]) - if err != nil { - return 0, err - } - if n == 1 { - return tmp[0], nil - } - } - return 0, io.ErrNoProgress -} - -// SkippableCB will register a callback for chunks with the specified ID. -// ID must be a Reserved skippable chunks ID, 0x80-0xfe (inclusive). -// For each chunk with the ID, the callback is called with the content. -// Any returned non-nil error will abort decompression. -// Only one callback per ID is supported, latest sent will be used. -// Sending a nil function will disable previous callbacks. -func (r *Reader) SkippableCB(id uint8, fn func(r io.Reader) error) error { - if id < 0x80 || id > chunkTypePadding { - return fmt.Errorf("ReaderSkippableCB: Invalid id provided, must be 0x80-0xfe (inclusive)") - } - r.skippableCB[id] = fn - return nil + + if d != len(dst) { + if debugErrs { + fmt.Println("wanted length", len(dst), "got", d) + } + return decodeErrCodeCorrupt + } + return 0 } diff --git a/vendor/github.com/klauspost/compress/s2/decode_other.go b/vendor/github.com/klauspost/compress/s2/decode_other.go index 11300c3a..2cb55c2c 100644 --- a/vendor/github.com/klauspost/compress/s2/decode_other.go +++ b/vendor/github.com/klauspost/compress/s2/decode_other.go @@ -57,6 +57,9 @@ func s2Decode(dst, src []byte) int { } length = int(x) + 1 if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) { + if debug { + fmt.Println("corrupt: lit size", length) + } return decodeErrCodeCorrupt } if debug { @@ -109,6 +112,10 @@ func s2Decode(dst, src []byte) int { } if offset <= 0 || d < offset || length > len(dst)-d { + if debug { + fmt.Println("corrupt: match, length", length, "offset:", offset, "dst avail:", len(dst)-d, "dst pos:", d) + } + return decodeErrCodeCorrupt } @@ -175,6 +182,9 @@ func s2Decode(dst, src []byte) int { } length = int(x) + 1 if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) { + if debug { + fmt.Println("corrupt: lit size", length) + } return decodeErrCodeCorrupt } if debug { @@ -241,6 +251,9 @@ func s2Decode(dst, src []byte) int { } if offset <= 0 || d < offset || length > len(dst)-d { + if debug { + fmt.Println("corrupt: match, length", length, "offset:", offset, "dst avail:", len(dst)-d, "dst pos:", d) + } return decodeErrCodeCorrupt } diff --git a/vendor/github.com/klauspost/compress/s2/dict.go b/vendor/github.com/klauspost/compress/s2/dict.go new file mode 100644 index 00000000..24f7ce80 --- /dev/null +++ b/vendor/github.com/klauspost/compress/s2/dict.go @@ -0,0 +1,331 @@ +// Copyright (c) 2022+ Klaus Post. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package s2 + +import ( + "bytes" + "encoding/binary" + "sync" +) + +const ( + // MinDictSize is the minimum dictionary size when repeat has been read. + MinDictSize = 16 + + // MaxDictSize is the maximum dictionary size when repeat has been read. + MaxDictSize = 65536 + + // MaxDictSrcOffset is the maximum offset where a dictionary entry can start. + MaxDictSrcOffset = 65535 +) + +// Dict contains a dictionary that can be used for encoding and decoding s2 +type Dict struct { + dict []byte + repeat int // Repeat as index of dict + + fast, better, best sync.Once + fastTable *[1 << 14]uint16 + + betterTableShort *[1 << 14]uint16 + betterTableLong *[1 << 17]uint16 + + bestTableShort *[1 << 16]uint32 + bestTableLong *[1 << 19]uint32 +} + +// NewDict will read a dictionary. +// It will return nil if the dictionary is invalid. +func NewDict(dict []byte) *Dict { + if len(dict) == 0 { + return nil + } + var d Dict + // Repeat is the first value of the dict + r, n := binary.Uvarint(dict) + if n <= 0 { + return nil + } + dict = dict[n:] + d.dict = dict + if cap(d.dict) < len(d.dict)+16 { + d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...) + } + if len(dict) < MinDictSize || len(dict) > MaxDictSize { + return nil + } + d.repeat = int(r) + if d.repeat > len(dict) { + return nil + } + return &d +} + +// Bytes will return a serialized version of the dictionary. +// The output can be sent to NewDict. +func (d *Dict) Bytes() []byte { + dst := make([]byte, binary.MaxVarintLen16+len(d.dict)) + return append(dst[:binary.PutUvarint(dst, uint64(d.repeat))], d.dict...) +} + +// MakeDict will create a dictionary. +// 'data' must be at least MinDictSize. +// If data is longer than MaxDictSize only the last MaxDictSize bytes will be used. +// If searchStart is set the start repeat value will be set to the last +// match of this content. +// If no matches are found, it will attempt to find shorter matches. +// This content should match the typical start of a block. +// If at least 4 bytes cannot be matched, repeat is set to start of block. +func MakeDict(data []byte, searchStart []byte) *Dict { + if len(data) == 0 { + return nil + } + if len(data) > MaxDictSize { + data = data[len(data)-MaxDictSize:] + } + var d Dict + dict := data + d.dict = dict + if cap(d.dict) < len(d.dict)+16 { + d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...) + } + if len(dict) < MinDictSize { + return nil + } + + // Find the longest match possible, last entry if multiple. + for s := len(searchStart); s > 4; s-- { + if idx := bytes.LastIndex(data, searchStart[:s]); idx >= 0 && idx <= len(data)-8 { + d.repeat = idx + break + } + } + + return &d +} + +// Encode returns the encoded form of src. The returned slice may be a sub- +// slice of dst if dst was large enough to hold the entire encoded block. +// Otherwise, a newly allocated slice will be returned. +// +// The dst and src must not overlap. It is valid to pass a nil dst. +// +// The blocks will require the same amount of memory to decode as encoding, +// and does not make for concurrent decoding. +// Also note that blocks do not contain CRC information, so corruption may be undetected. +// +// If you need to encode larger amounts of data, consider using +// the streaming interface which gives all of these features. +func (d *Dict) Encode(dst, src []byte) []byte { + if n := MaxEncodedLen(len(src)); n < 0 { + panic(ErrTooLarge) + } else if cap(dst) < n { + dst = make([]byte, n) + } else { + dst = dst[:n] + } + + // The block starts with the varint-encoded length of the decompressed bytes. + dstP := binary.PutUvarint(dst, uint64(len(src))) + + if len(src) == 0 { + return dst[:dstP] + } + if len(src) < minNonLiteralBlockSize { + dstP += emitLiteral(dst[dstP:], src) + return dst[:dstP] + } + n := encodeBlockDictGo(dst[dstP:], src, d) + if n > 0 { + dstP += n + return dst[:dstP] + } + // Not compressible + dstP += emitLiteral(dst[dstP:], src) + return dst[:dstP] +} + +// EncodeBetter returns the encoded form of src. The returned slice may be a sub- +// slice of dst if dst was large enough to hold the entire encoded block. +// Otherwise, a newly allocated slice will be returned. +// +// EncodeBetter compresses better than Encode but typically with a +// 10-40% speed decrease on both compression and decompression. +// +// The dst and src must not overlap. It is valid to pass a nil dst. +// +// The blocks will require the same amount of memory to decode as encoding, +// and does not make for concurrent decoding. +// Also note that blocks do not contain CRC information, so corruption may be undetected. +// +// If you need to encode larger amounts of data, consider using +// the streaming interface which gives all of these features. +func (d *Dict) EncodeBetter(dst, src []byte) []byte { + if n := MaxEncodedLen(len(src)); n < 0 { + panic(ErrTooLarge) + } else if len(dst) < n { + dst = make([]byte, n) + } + + // The block starts with the varint-encoded length of the decompressed bytes. + dstP := binary.PutUvarint(dst, uint64(len(src))) + + if len(src) == 0 { + return dst[:dstP] + } + if len(src) < minNonLiteralBlockSize { + dstP += emitLiteral(dst[dstP:], src) + return dst[:dstP] + } + n := encodeBlockBetterDict(dst[dstP:], src, d) + if n > 0 { + dstP += n + return dst[:dstP] + } + // Not compressible + dstP += emitLiteral(dst[dstP:], src) + return dst[:dstP] +} + +// EncodeBest returns the encoded form of src. The returned slice may be a sub- +// slice of dst if dst was large enough to hold the entire encoded block. +// Otherwise, a newly allocated slice will be returned. +// +// EncodeBest compresses as good as reasonably possible but with a +// big speed decrease. +// +// The dst and src must not overlap. It is valid to pass a nil dst. +// +// The blocks will require the same amount of memory to decode as encoding, +// and does not make for concurrent decoding. +// Also note that blocks do not contain CRC information, so corruption may be undetected. +// +// If you need to encode larger amounts of data, consider using +// the streaming interface which gives all of these features. +func (d *Dict) EncodeBest(dst, src []byte) []byte { + if n := MaxEncodedLen(len(src)); n < 0 { + panic(ErrTooLarge) + } else if len(dst) < n { + dst = make([]byte, n) + } + + // The block starts with the varint-encoded length of the decompressed bytes. + dstP := binary.PutUvarint(dst, uint64(len(src))) + + if len(src) == 0 { + return dst[:dstP] + } + if len(src) < minNonLiteralBlockSize { + dstP += emitLiteral(dst[dstP:], src) + return dst[:dstP] + } + n := encodeBlockBest(dst[dstP:], src, d) + if n > 0 { + dstP += n + return dst[:dstP] + } + // Not compressible + dstP += emitLiteral(dst[dstP:], src) + return dst[:dstP] +} + +// Decode returns the decoded form of src. The returned slice may be a sub- +// slice of dst if dst was large enough to hold the entire decoded block. +// Otherwise, a newly allocated slice will be returned. +// +// The dst and src must not overlap. It is valid to pass a nil dst. +func (d *Dict) Decode(dst, src []byte) ([]byte, error) { + dLen, s, err := decodedLen(src) + if err != nil { + return nil, err + } + if dLen <= cap(dst) { + dst = dst[:dLen] + } else { + dst = make([]byte, dLen) + } + if s2DecodeDict(dst, src[s:], d) != 0 { + return nil, ErrCorrupt + } + return dst, nil +} + +func (d *Dict) initFast() { + d.fast.Do(func() { + const ( + tableBits = 14 + maxTableSize = 1 << tableBits + ) + + var table [maxTableSize]uint16 + // We stop so any entry of length 8 can always be read. + for i := 0; i < len(d.dict)-8-2; i += 3 { + x0 := load64(d.dict, i) + h0 := hash6(x0, tableBits) + h1 := hash6(x0>>8, tableBits) + h2 := hash6(x0>>16, tableBits) + table[h0] = uint16(i) + table[h1] = uint16(i + 1) + table[h2] = uint16(i + 2) + } + d.fastTable = &table + }) +} + +func (d *Dict) initBetter() { + d.better.Do(func() { + const ( + // Long hash matches. + lTableBits = 17 + maxLTableSize = 1 << lTableBits + + // Short hash matches. + sTableBits = 14 + maxSTableSize = 1 << sTableBits + ) + + var lTable [maxLTableSize]uint16 + var sTable [maxSTableSize]uint16 + + // We stop so any entry of length 8 can always be read. + for i := 0; i < len(d.dict)-8; i++ { + cv := load64(d.dict, i) + lTable[hash7(cv, lTableBits)] = uint16(i) + sTable[hash4(cv, sTableBits)] = uint16(i) + } + d.betterTableShort = &sTable + d.betterTableLong = &lTable + }) +} + +func (d *Dict) initBest() { + d.best.Do(func() { + const ( + // Long hash matches. + lTableBits = 19 + maxLTableSize = 1 << lTableBits + + // Short hash matches. + sTableBits = 16 + maxSTableSize = 1 << sTableBits + ) + + var lTable [maxLTableSize]uint32 + var sTable [maxSTableSize]uint32 + + // We stop so any entry of length 8 can always be read. + for i := 0; i < len(d.dict)-8; i++ { + cv := load64(d.dict, i) + hashL := hash8(cv, lTableBits) + hashS := hash4(cv, sTableBits) + candidateL := lTable[hashL] + candidateS := sTable[hashS] + lTable[hashL] = uint32(i) | candidateL<<16 + sTable[hashS] = uint32(i) | candidateS<<16 + } + d.bestTableShort = &sTable + d.bestTableLong = &lTable + }) +} diff --git a/vendor/github.com/klauspost/compress/s2/encode.go b/vendor/github.com/klauspost/compress/s2/encode.go index 1aefabf3..e6c23102 100644 --- a/vendor/github.com/klauspost/compress/s2/encode.go +++ b/vendor/github.com/klauspost/compress/s2/encode.go @@ -6,15 +6,9 @@ package s2 import ( - "crypto/rand" "encoding/binary" - "errors" - "fmt" - "io" "math" "math/bits" - "runtime" - "sync" ) // Encode returns the encoded form of src. The returned slice may be a sub- @@ -58,6 +52,32 @@ func Encode(dst, src []byte) []byte { return dst[:d] } +// EstimateBlockSize will perform a very fast compression +// without outputting the result and return the compressed output size. +// The function returns -1 if no improvement could be achieved. +// Using actual compression will most often produce better compression than the estimate. +func EstimateBlockSize(src []byte) (d int) { + if len(src) < 6 || int64(len(src)) > 0xffffffff { + return -1 + } + if len(src) <= 1024 { + d = calcBlockSizeSmall(src) + } else { + d = calcBlockSize(src) + } + + if d == 0 { + return -1 + } + // Size of the varint encoded block size. + d += (bits.Len64(uint64(len(src))) + 7) / 7 + + if d >= len(src) { + return -1 + } + return d +} + // EncodeBetter returns the encoded form of src. The returned slice may be a sub- // slice of dst if dst was large enough to hold the entire encoded block. // Otherwise, a newly allocated slice will be returned. @@ -132,7 +152,7 @@ func EncodeBest(dst, src []byte) []byte { d += emitLiteral(dst[d:], src) return dst[:d] } - n := encodeBlockBest(dst[d:], src) + n := encodeBlockBest(dst[d:], src, nil) if n > 0 { d += n return dst[:d] @@ -329,9 +349,12 @@ const inputMargin = 8 // will be accepted by the encoder. const minNonLiteralBlockSize = 32 +const intReduction = 2 - (1 << (^uint(0) >> 63)) // 1 (32 bits) or 0 (64 bits) + // MaxBlockSize is the maximum value where MaxEncodedLen will return a valid block size. // Blocks this big are highly discouraged, though. -const MaxBlockSize = math.MaxUint32 - binary.MaxVarintLen32 - 5 +// Half the size on 32 bit systems. +const MaxBlockSize = (1<<(32-intReduction) - 1) - binary.MaxVarintLen32 - 5 // MaxEncodedLen returns the maximum length of a snappy block, given its // uncompressed length. @@ -340,7 +363,14 @@ const MaxBlockSize = math.MaxUint32 - binary.MaxVarintLen32 - 5 // 32 bit platforms will have lower thresholds for rejecting big content. func MaxEncodedLen(srcLen int) int { n := uint64(srcLen) - if n > 0xffffffff { + if intReduction == 1 { + // 32 bits + if n > math.MaxInt32 { + // Also includes negative. + return -1 + } + } else if n > 0xffffffff { + // 64 bits // Also includes negative. return -1 } @@ -349,993 +379,15 @@ func MaxEncodedLen(srcLen int) int { // Add maximum size of encoding block as literals. n += uint64(literalExtraSize(int64(srcLen))) - if n > 0xffffffff { + if intReduction == 1 { + // 32 bits + if n > math.MaxInt32 { + return -1 + } + } else if n > 0xffffffff { + // 64 bits + // Also includes negative. return -1 } return int(n) } - -var errClosed = errors.New("s2: Writer is closed") - -// NewWriter returns a new Writer that compresses to w, using the -// framing format described at -// https://github.com/google/snappy/blob/master/framing_format.txt -// -// Users must call Close to guarantee all data has been forwarded to -// the underlying io.Writer and that resources are released. -// They may also call Flush zero or more times before calling Close. -func NewWriter(w io.Writer, opts ...WriterOption) *Writer { - w2 := Writer{ - blockSize: defaultBlockSize, - concurrency: runtime.GOMAXPROCS(0), - randSrc: rand.Reader, - level: levelFast, - } - for _, opt := range opts { - if err := opt(&w2); err != nil { - w2.errState = err - return &w2 - } - } - w2.obufLen = obufHeaderLen + MaxEncodedLen(w2.blockSize) - w2.paramsOK = true - w2.ibuf = make([]byte, 0, w2.blockSize) - w2.buffers.New = func() interface{} { - return make([]byte, w2.obufLen) - } - w2.Reset(w) - return &w2 -} - -// Writer is an io.Writer that can write Snappy-compressed bytes. -type Writer struct { - errMu sync.Mutex - errState error - - // ibuf is a buffer for the incoming (uncompressed) bytes. - ibuf []byte - - blockSize int - obufLen int - concurrency int - written int64 - uncompWritten int64 // Bytes sent to compression - output chan chan result - buffers sync.Pool - pad int - - writer io.Writer - randSrc io.Reader - writerWg sync.WaitGroup - index Index - - // wroteStreamHeader is whether we have written the stream header. - wroteStreamHeader bool - paramsOK bool - snappy bool - flushOnWrite bool - appendIndex bool - level uint8 -} - -const ( - levelUncompressed = iota + 1 - levelFast - levelBetter - levelBest -) - -type result struct { - b []byte - // Uncompressed start offset - startOffset int64 -} - -// err returns the previously set error. -// If no error has been set it is set to err if not nil. -func (w *Writer) err(err error) error { - w.errMu.Lock() - errSet := w.errState - if errSet == nil && err != nil { - w.errState = err - errSet = err - } - w.errMu.Unlock() - return errSet -} - -// Reset discards the writer's state and switches the Snappy writer to write to w. -// This permits reusing a Writer rather than allocating a new one. -func (w *Writer) Reset(writer io.Writer) { - if !w.paramsOK { - return - } - // Close previous writer, if any. - if w.output != nil { - close(w.output) - w.writerWg.Wait() - w.output = nil - } - w.errState = nil - w.ibuf = w.ibuf[:0] - w.wroteStreamHeader = false - w.written = 0 - w.writer = writer - w.uncompWritten = 0 - w.index.reset(w.blockSize) - - // If we didn't get a writer, stop here. - if writer == nil { - return - } - // If no concurrency requested, don't spin up writer goroutine. - if w.concurrency == 1 { - return - } - - toWrite := make(chan chan result, w.concurrency) - w.output = toWrite - w.writerWg.Add(1) - - // Start a writer goroutine that will write all output in order. - go func() { - defer w.writerWg.Done() - - // Get a queued write. - for write := range toWrite { - // Wait for the data to be available. - input := <-write - in := input.b - if len(in) > 0 { - if w.err(nil) == nil { - // Don't expose data from previous buffers. - toWrite := in[:len(in):len(in)] - // Write to output. - n, err := writer.Write(toWrite) - if err == nil && n != len(toWrite) { - err = io.ErrShortBuffer - } - _ = w.err(err) - w.err(w.index.add(w.written, input.startOffset)) - w.written += int64(n) - } - } - if cap(in) >= w.obufLen { - w.buffers.Put(in) - } - // close the incoming write request. - // This can be used for synchronizing flushes. - close(write) - } - }() -} - -// Write satisfies the io.Writer interface. -func (w *Writer) Write(p []byte) (nRet int, errRet error) { - if err := w.err(nil); err != nil { - return 0, err - } - if w.flushOnWrite { - return w.write(p) - } - // If we exceed the input buffer size, start writing - for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err(nil) == nil { - var n int - if len(w.ibuf) == 0 { - // Large write, empty buffer. - // Write directly from p to avoid copy. - n, _ = w.write(p) - } else { - n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) - w.ibuf = w.ibuf[:len(w.ibuf)+n] - w.write(w.ibuf) - w.ibuf = w.ibuf[:0] - } - nRet += n - p = p[n:] - } - if err := w.err(nil); err != nil { - return nRet, err - } - // p should always be able to fit into w.ibuf now. - n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) - w.ibuf = w.ibuf[:len(w.ibuf)+n] - nRet += n - return nRet, nil -} - -// ReadFrom implements the io.ReaderFrom interface. -// Using this is typically more efficient since it avoids a memory copy. -// ReadFrom reads data from r until EOF or error. -// The return value n is the number of bytes read. -// Any error except io.EOF encountered during the read is also returned. -func (w *Writer) ReadFrom(r io.Reader) (n int64, err error) { - if err := w.err(nil); err != nil { - return 0, err - } - if len(w.ibuf) > 0 { - err := w.Flush() - if err != nil { - return 0, err - } - } - if br, ok := r.(byter); ok { - buf := br.Bytes() - if err := w.EncodeBuffer(buf); err != nil { - return 0, err - } - return int64(len(buf)), w.Flush() - } - for { - inbuf := w.buffers.Get().([]byte)[:w.blockSize+obufHeaderLen] - n2, err := io.ReadFull(r, inbuf[obufHeaderLen:]) - if err != nil { - if err == io.ErrUnexpectedEOF { - err = io.EOF - } - if err != io.EOF { - return n, w.err(err) - } - } - if n2 == 0 { - break - } - n += int64(n2) - err2 := w.writeFull(inbuf[:n2+obufHeaderLen]) - if w.err(err2) != nil { - break - } - - if err != nil { - // We got EOF and wrote everything - break - } - } - - return n, w.err(nil) -} - -// AddSkippableBlock will add a skippable block to the stream. -// The ID must be 0x80-0xfe (inclusive). -// Length of the skippable block must be <= 16777215 bytes. -func (w *Writer) AddSkippableBlock(id uint8, data []byte) (err error) { - if err := w.err(nil); err != nil { - return err - } - if len(data) == 0 { - return nil - } - if id < 0x80 || id > chunkTypePadding { - return fmt.Errorf("invalid skippable block id %x", id) - } - if len(data) > maxChunkSize { - return fmt.Errorf("skippable block excessed maximum size") - } - var header [4]byte - chunkLen := 4 + len(data) - header[0] = id - header[1] = uint8(chunkLen >> 0) - header[2] = uint8(chunkLen >> 8) - header[3] = uint8(chunkLen >> 16) - if w.concurrency == 1 { - write := func(b []byte) error { - n, err := w.writer.Write(b) - if err = w.err(err); err != nil { - return err - } - if n != len(data) { - return w.err(io.ErrShortWrite) - } - w.written += int64(n) - return w.err(nil) - } - if !w.wroteStreamHeader { - w.wroteStreamHeader = true - if w.snappy { - if err := write([]byte(magicChunkSnappy)); err != nil { - return err - } - } else { - if err := write([]byte(magicChunk)); err != nil { - return err - } - } - } - if err := write(header[:]); err != nil { - return err - } - if err := write(data); err != nil { - return err - } - } - - // Create output... - if !w.wroteStreamHeader { - w.wroteStreamHeader = true - hWriter := make(chan result) - w.output <- hWriter - if w.snappy { - hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)} - } else { - hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)} - } - } - - // Copy input. - inbuf := w.buffers.Get().([]byte)[:4] - copy(inbuf, header[:]) - inbuf = append(inbuf, data...) - - output := make(chan result, 1) - // Queue output. - w.output <- output - output <- result{startOffset: w.uncompWritten, b: inbuf} - - return nil -} - -// EncodeBuffer will add a buffer to the stream. -// This is the fastest way to encode a stream, -// but the input buffer cannot be written to by the caller -// until Flush or Close has been called when concurrency != 1. -// -// If you cannot control that, use the regular Write function. -// -// Note that input is not buffered. -// This means that each write will result in discrete blocks being created. -// For buffered writes, use the regular Write function. -func (w *Writer) EncodeBuffer(buf []byte) (err error) { - if err := w.err(nil); err != nil { - return err - } - - if w.flushOnWrite { - _, err := w.write(buf) - return err - } - // Flush queued data first. - if len(w.ibuf) > 0 { - err := w.Flush() - if err != nil { - return err - } - } - if w.concurrency == 1 { - _, err := w.writeSync(buf) - return err - } - - // Spawn goroutine and write block to output channel. - if !w.wroteStreamHeader { - w.wroteStreamHeader = true - hWriter := make(chan result) - w.output <- hWriter - if w.snappy { - hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)} - } else { - hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)} - } - } - - for len(buf) > 0 { - // Cut input. - uncompressed := buf - if len(uncompressed) > w.blockSize { - uncompressed = uncompressed[:w.blockSize] - } - buf = buf[len(uncompressed):] - // Get an output buffer. - obuf := w.buffers.Get().([]byte)[:len(uncompressed)+obufHeaderLen] - output := make(chan result) - // Queue output now, so we keep order. - w.output <- output - res := result{ - startOffset: w.uncompWritten, - } - w.uncompWritten += int64(len(uncompressed)) - go func() { - checksum := crc(uncompressed) - - // Set to uncompressed. - chunkType := uint8(chunkTypeUncompressedData) - chunkLen := 4 + len(uncompressed) - - // Attempt compressing. - n := binary.PutUvarint(obuf[obufHeaderLen:], uint64(len(uncompressed))) - n2 := w.encodeBlock(obuf[obufHeaderLen+n:], uncompressed) - - // Check if we should use this, or store as uncompressed instead. - if n2 > 0 { - chunkType = uint8(chunkTypeCompressedData) - chunkLen = 4 + n + n2 - obuf = obuf[:obufHeaderLen+n+n2] - } else { - // copy uncompressed - copy(obuf[obufHeaderLen:], uncompressed) - } - - // Fill in the per-chunk header that comes before the body. - obuf[0] = chunkType - obuf[1] = uint8(chunkLen >> 0) - obuf[2] = uint8(chunkLen >> 8) - obuf[3] = uint8(chunkLen >> 16) - obuf[4] = uint8(checksum >> 0) - obuf[5] = uint8(checksum >> 8) - obuf[6] = uint8(checksum >> 16) - obuf[7] = uint8(checksum >> 24) - - // Queue final output. - res.b = obuf - output <- res - }() - } - return nil -} - -func (w *Writer) encodeBlock(obuf, uncompressed []byte) int { - if w.snappy { - switch w.level { - case levelFast: - return encodeBlockSnappy(obuf, uncompressed) - case levelBetter: - return encodeBlockBetterSnappy(obuf, uncompressed) - case levelBest: - return encodeBlockBestSnappy(obuf, uncompressed) - } - return 0 - } - switch w.level { - case levelFast: - return encodeBlock(obuf, uncompressed) - case levelBetter: - return encodeBlockBetter(obuf, uncompressed) - case levelBest: - return encodeBlockBest(obuf, uncompressed) - } - return 0 -} - -func (w *Writer) write(p []byte) (nRet int, errRet error) { - if err := w.err(nil); err != nil { - return 0, err - } - if w.concurrency == 1 { - return w.writeSync(p) - } - - // Spawn goroutine and write block to output channel. - for len(p) > 0 { - if !w.wroteStreamHeader { - w.wroteStreamHeader = true - hWriter := make(chan result) - w.output <- hWriter - if w.snappy { - hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)} - } else { - hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)} - } - } - - var uncompressed []byte - if len(p) > w.blockSize { - uncompressed, p = p[:w.blockSize], p[w.blockSize:] - } else { - uncompressed, p = p, nil - } - - // Copy input. - // If the block is incompressible, this is used for the result. - inbuf := w.buffers.Get().([]byte)[:len(uncompressed)+obufHeaderLen] - obuf := w.buffers.Get().([]byte)[:w.obufLen] - copy(inbuf[obufHeaderLen:], uncompressed) - uncompressed = inbuf[obufHeaderLen:] - - output := make(chan result) - // Queue output now, so we keep order. - w.output <- output - res := result{ - startOffset: w.uncompWritten, - } - w.uncompWritten += int64(len(uncompressed)) - - go func() { - checksum := crc(uncompressed) - - // Set to uncompressed. - chunkType := uint8(chunkTypeUncompressedData) - chunkLen := 4 + len(uncompressed) - - // Attempt compressing. - n := binary.PutUvarint(obuf[obufHeaderLen:], uint64(len(uncompressed))) - n2 := w.encodeBlock(obuf[obufHeaderLen+n:], uncompressed) - - // Check if we should use this, or store as uncompressed instead. - if n2 > 0 { - chunkType = uint8(chunkTypeCompressedData) - chunkLen = 4 + n + n2 - obuf = obuf[:obufHeaderLen+n+n2] - } else { - // Use input as output. - obuf, inbuf = inbuf, obuf - } - - // Fill in the per-chunk header that comes before the body. - obuf[0] = chunkType - obuf[1] = uint8(chunkLen >> 0) - obuf[2] = uint8(chunkLen >> 8) - obuf[3] = uint8(chunkLen >> 16) - obuf[4] = uint8(checksum >> 0) - obuf[5] = uint8(checksum >> 8) - obuf[6] = uint8(checksum >> 16) - obuf[7] = uint8(checksum >> 24) - - // Queue final output. - res.b = obuf - output <- res - - // Put unused buffer back in pool. - w.buffers.Put(inbuf) - }() - nRet += len(uncompressed) - } - return nRet, nil -} - -// writeFull is a special version of write that will always write the full buffer. -// Data to be compressed should start at offset obufHeaderLen and fill the remainder of the buffer. -// The data will be written as a single block. -// The caller is not allowed to use inbuf after this function has been called. -func (w *Writer) writeFull(inbuf []byte) (errRet error) { - if err := w.err(nil); err != nil { - return err - } - - if w.concurrency == 1 { - _, err := w.writeSync(inbuf[obufHeaderLen:]) - return err - } - - // Spawn goroutine and write block to output channel. - if !w.wroteStreamHeader { - w.wroteStreamHeader = true - hWriter := make(chan result) - w.output <- hWriter - if w.snappy { - hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)} - } else { - hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)} - } - } - - // Get an output buffer. - obuf := w.buffers.Get().([]byte)[:w.obufLen] - uncompressed := inbuf[obufHeaderLen:] - - output := make(chan result) - // Queue output now, so we keep order. - w.output <- output - res := result{ - startOffset: w.uncompWritten, - } - w.uncompWritten += int64(len(uncompressed)) - - go func() { - checksum := crc(uncompressed) - - // Set to uncompressed. - chunkType := uint8(chunkTypeUncompressedData) - chunkLen := 4 + len(uncompressed) - - // Attempt compressing. - n := binary.PutUvarint(obuf[obufHeaderLen:], uint64(len(uncompressed))) - n2 := w.encodeBlock(obuf[obufHeaderLen+n:], uncompressed) - - // Check if we should use this, or store as uncompressed instead. - if n2 > 0 { - chunkType = uint8(chunkTypeCompressedData) - chunkLen = 4 + n + n2 - obuf = obuf[:obufHeaderLen+n+n2] - } else { - // Use input as output. - obuf, inbuf = inbuf, obuf - } - - // Fill in the per-chunk header that comes before the body. - obuf[0] = chunkType - obuf[1] = uint8(chunkLen >> 0) - obuf[2] = uint8(chunkLen >> 8) - obuf[3] = uint8(chunkLen >> 16) - obuf[4] = uint8(checksum >> 0) - obuf[5] = uint8(checksum >> 8) - obuf[6] = uint8(checksum >> 16) - obuf[7] = uint8(checksum >> 24) - - // Queue final output. - res.b = obuf - output <- res - - // Put unused buffer back in pool. - w.buffers.Put(inbuf) - }() - return nil -} - -func (w *Writer) writeSync(p []byte) (nRet int, errRet error) { - if err := w.err(nil); err != nil { - return 0, err - } - if !w.wroteStreamHeader { - w.wroteStreamHeader = true - var n int - var err error - if w.snappy { - n, err = w.writer.Write([]byte(magicChunkSnappy)) - } else { - n, err = w.writer.Write([]byte(magicChunk)) - } - if err != nil { - return 0, w.err(err) - } - if n != len(magicChunk) { - return 0, w.err(io.ErrShortWrite) - } - w.written += int64(n) - } - - for len(p) > 0 { - var uncompressed []byte - if len(p) > w.blockSize { - uncompressed, p = p[:w.blockSize], p[w.blockSize:] - } else { - uncompressed, p = p, nil - } - - obuf := w.buffers.Get().([]byte)[:w.obufLen] - checksum := crc(uncompressed) - - // Set to uncompressed. - chunkType := uint8(chunkTypeUncompressedData) - chunkLen := 4 + len(uncompressed) - - // Attempt compressing. - n := binary.PutUvarint(obuf[obufHeaderLen:], uint64(len(uncompressed))) - n2 := w.encodeBlock(obuf[obufHeaderLen+n:], uncompressed) - - if n2 > 0 { - chunkType = uint8(chunkTypeCompressedData) - chunkLen = 4 + n + n2 - obuf = obuf[:obufHeaderLen+n+n2] - } else { - obuf = obuf[:8] - } - - // Fill in the per-chunk header that comes before the body. - obuf[0] = chunkType - obuf[1] = uint8(chunkLen >> 0) - obuf[2] = uint8(chunkLen >> 8) - obuf[3] = uint8(chunkLen >> 16) - obuf[4] = uint8(checksum >> 0) - obuf[5] = uint8(checksum >> 8) - obuf[6] = uint8(checksum >> 16) - obuf[7] = uint8(checksum >> 24) - - n, err := w.writer.Write(obuf) - if err != nil { - return 0, w.err(err) - } - if n != len(obuf) { - return 0, w.err(io.ErrShortWrite) - } - w.err(w.index.add(w.written, w.uncompWritten)) - w.written += int64(n) - w.uncompWritten += int64(len(uncompressed)) - - if chunkType == chunkTypeUncompressedData { - // Write uncompressed data. - n, err := w.writer.Write(uncompressed) - if err != nil { - return 0, w.err(err) - } - if n != len(uncompressed) { - return 0, w.err(io.ErrShortWrite) - } - w.written += int64(n) - } - w.buffers.Put(obuf) - // Queue final output. - nRet += len(uncompressed) - } - return nRet, nil -} - -// Flush flushes the Writer to its underlying io.Writer. -// This does not apply padding. -func (w *Writer) Flush() error { - if err := w.err(nil); err != nil { - return err - } - - // Queue any data still in input buffer. - if len(w.ibuf) != 0 { - if !w.wroteStreamHeader { - _, err := w.writeSync(w.ibuf) - w.ibuf = w.ibuf[:0] - return w.err(err) - } else { - _, err := w.write(w.ibuf) - w.ibuf = w.ibuf[:0] - err = w.err(err) - if err != nil { - return err - } - } - } - if w.output == nil { - return w.err(nil) - } - - // Send empty buffer - res := make(chan result) - w.output <- res - // Block until this has been picked up. - res <- result{b: nil, startOffset: w.uncompWritten} - // When it is closed, we have flushed. - <-res - return w.err(nil) -} - -// Close calls Flush and then closes the Writer. -// Calling Close multiple times is ok, -// but calling CloseIndex after this will make it not return the index. -func (w *Writer) Close() error { - _, err := w.closeIndex(w.appendIndex) - return err -} - -// CloseIndex calls Close and returns an index on first call. -// This is not required if you are only adding index to a stream. -func (w *Writer) CloseIndex() ([]byte, error) { - return w.closeIndex(true) -} - -func (w *Writer) closeIndex(idx bool) ([]byte, error) { - err := w.Flush() - if w.output != nil { - close(w.output) - w.writerWg.Wait() - w.output = nil - } - - var index []byte - if w.err(nil) == nil && w.writer != nil { - // Create index. - if idx { - compSize := int64(-1) - if w.pad <= 1 { - compSize = w.written - } - index = w.index.appendTo(w.ibuf[:0], w.uncompWritten, compSize) - // Count as written for padding. - if w.appendIndex { - w.written += int64(len(index)) - } - } - - if w.pad > 1 { - tmp := w.ibuf[:0] - if len(index) > 0 { - // Allocate another buffer. - tmp = w.buffers.Get().([]byte)[:0] - defer w.buffers.Put(tmp) - } - add := calcSkippableFrame(w.written, int64(w.pad)) - frame, err := skippableFrame(tmp, add, w.randSrc) - if err = w.err(err); err != nil { - return nil, err - } - n, err2 := w.writer.Write(frame) - if err2 == nil && n != len(frame) { - err2 = io.ErrShortWrite - } - _ = w.err(err2) - } - if len(index) > 0 && w.appendIndex { - n, err2 := w.writer.Write(index) - if err2 == nil && n != len(index) { - err2 = io.ErrShortWrite - } - _ = w.err(err2) - } - } - err = w.err(errClosed) - if err == errClosed { - return index, nil - } - return nil, err -} - -// calcSkippableFrame will return a total size to be added for written -// to be divisible by multiple. -// The value will always be > skippableFrameHeader. -// The function will panic if written < 0 or wantMultiple <= 0. -func calcSkippableFrame(written, wantMultiple int64) int { - if wantMultiple <= 0 { - panic("wantMultiple <= 0") - } - if written < 0 { - panic("written < 0") - } - leftOver := written % wantMultiple - if leftOver == 0 { - return 0 - } - toAdd := wantMultiple - leftOver - for toAdd < skippableFrameHeader { - toAdd += wantMultiple - } - return int(toAdd) -} - -// skippableFrame will add a skippable frame with a total size of bytes. -// total should be >= skippableFrameHeader and < maxBlockSize + skippableFrameHeader -func skippableFrame(dst []byte, total int, r io.Reader) ([]byte, error) { - if total == 0 { - return dst, nil - } - if total < skippableFrameHeader { - return dst, fmt.Errorf("s2: requested skippable frame (%d) < 4", total) - } - if int64(total) >= maxBlockSize+skippableFrameHeader { - return dst, fmt.Errorf("s2: requested skippable frame (%d) >= max 1<<24", total) - } - // Chunk type 0xfe "Section 4.4 Padding (chunk type 0xfe)" - dst = append(dst, chunkTypePadding) - f := uint32(total - skippableFrameHeader) - // Add chunk length. - dst = append(dst, uint8(f), uint8(f>>8), uint8(f>>16)) - // Add data - start := len(dst) - dst = append(dst, make([]byte, f)...) - _, err := io.ReadFull(r, dst[start:]) - return dst, err -} - -// WriterOption is an option for creating a encoder. -type WriterOption func(*Writer) error - -// WriterConcurrency will set the concurrency, -// meaning the maximum number of decoders to run concurrently. -// The value supplied must be at least 1. -// By default this will be set to GOMAXPROCS. -func WriterConcurrency(n int) WriterOption { - return func(w *Writer) error { - if n <= 0 { - return errors.New("concurrency must be at least 1") - } - w.concurrency = n - return nil - } -} - -// WriterAddIndex will append an index to the end of a stream -// when it is closed. -func WriterAddIndex() WriterOption { - return func(w *Writer) error { - w.appendIndex = true - return nil - } -} - -// WriterBetterCompression will enable better compression. -// EncodeBetter compresses better than Encode but typically with a -// 10-40% speed decrease on both compression and decompression. -func WriterBetterCompression() WriterOption { - return func(w *Writer) error { - w.level = levelBetter - return nil - } -} - -// WriterBestCompression will enable better compression. -// EncodeBetter compresses better than Encode but typically with a -// big speed decrease on compression. -func WriterBestCompression() WriterOption { - return func(w *Writer) error { - w.level = levelBest - return nil - } -} - -// WriterUncompressed will bypass compression. -// The stream will be written as uncompressed blocks only. -// If concurrency is > 1 CRC and output will still be done async. -func WriterUncompressed() WriterOption { - return func(w *Writer) error { - w.level = levelUncompressed - return nil - } -} - -// WriterBlockSize allows to override the default block size. -// Blocks will be this size or smaller. -// Minimum size is 4KB and and maximum size is 4MB. -// -// Bigger blocks may give bigger throughput on systems with many cores, -// and will increase compression slightly, but it will limit the possible -// concurrency for smaller payloads for both encoding and decoding. -// Default block size is 1MB. -// -// When writing Snappy compatible output using WriterSnappyCompat, -// the maximum block size is 64KB. -func WriterBlockSize(n int) WriterOption { - return func(w *Writer) error { - if w.snappy && n > maxSnappyBlockSize || n < minBlockSize { - return errors.New("s2: block size too large. Must be <= 64K and >=4KB on for snappy compatible output") - } - if n > maxBlockSize || n < minBlockSize { - return errors.New("s2: block size too large. Must be <= 4MB and >=4KB") - } - w.blockSize = n - return nil - } -} - -// WriterPadding will add padding to all output so the size will be a multiple of n. -// This can be used to obfuscate the exact output size or make blocks of a certain size. -// The contents will be a skippable frame, so it will be invisible by the decoder. -// n must be > 0 and <= 4MB. -// The padded area will be filled with data from crypto/rand.Reader. -// The padding will be applied whenever Close is called on the writer. -func WriterPadding(n int) WriterOption { - return func(w *Writer) error { - if n <= 0 { - return fmt.Errorf("s2: padding must be at least 1") - } - // No need to waste our time. - if n == 1 { - w.pad = 0 - } - if n > maxBlockSize { - return fmt.Errorf("s2: padding must less than 4MB") - } - w.pad = n - return nil - } -} - -// WriterPaddingSrc will get random data for padding from the supplied source. -// By default crypto/rand is used. -func WriterPaddingSrc(reader io.Reader) WriterOption { - return func(w *Writer) error { - w.randSrc = reader - return nil - } -} - -// WriterSnappyCompat will write snappy compatible output. -// The output can be decompressed using either snappy or s2. -// If block size is more than 64KB it is set to that. -func WriterSnappyCompat() WriterOption { - return func(w *Writer) error { - w.snappy = true - if w.blockSize > 64<<10 { - // We choose 8 bytes less than 64K, since that will make literal emits slightly more effective. - // And allows us to skip some size checks. - w.blockSize = (64 << 10) - 8 - } - return nil - } -} - -// WriterFlushOnWrite will compress blocks on each call to the Write function. -// -// This is quite inefficient as blocks size will depend on the write size. -// -// Use WriterConcurrency(1) to also make sure that output is flushed. -// When Write calls return, otherwise they will be written when compression is done. -func WriterFlushOnWrite() WriterOption { - return func(w *Writer) error { - w.flushOnWrite = true - return nil - } -} diff --git a/vendor/github.com/klauspost/compress/s2/encode_all.go b/vendor/github.com/klauspost/compress/s2/encode_all.go index 54c71d3b..11657f09 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_all.go +++ b/vendor/github.com/klauspost/compress/s2/encode_all.go @@ -8,6 +8,7 @@ package s2 import ( "bytes" "encoding/binary" + "fmt" "math/bits" ) @@ -455,3 +456,594 @@ emitRemainder: } return d } + +// encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It +// assumes that the varint-encoded length of the decompressed bytes has already +// been written. +// +// It also assumes that: +// +// len(dst) >= MaxEncodedLen(len(src)) && +// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize +func encodeBlockDictGo(dst, src []byte, dict *Dict) (d int) { + // Initialize the hash table. + const ( + tableBits = 14 + maxTableSize = 1 << tableBits + maxAhead = 8 // maximum bytes ahead without checking sLimit + + debug = false + ) + dict.initFast() + + var table [maxTableSize]uint32 + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := len(src) - inputMargin + if sLimit > MaxDictSrcOffset-maxAhead { + sLimit = MaxDictSrcOffset - maxAhead + } + + // Bail if we can't compress to at least this. + dstLimit := len(src) - len(src)>>5 - 5 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := 0 + + // The encoded form can start with a dict entry (copy or repeat). + s := 0 + + // Convert dict repeat to offset + repeat := len(dict.dict) - dict.repeat + cv := load64(src, 0) + + // While in dict +searchDict: + for { + // Next src position to check + nextS := s + (s-nextEmit)>>6 + 4 + hash0 := hash6(cv, tableBits) + hash1 := hash6(cv>>8, tableBits) + if nextS > sLimit { + if debug { + fmt.Println("slimit reached", s, nextS) + } + break searchDict + } + candidateDict := int(dict.fastTable[hash0]) + candidateDict2 := int(dict.fastTable[hash1]) + candidate2 := int(table[hash1]) + candidate := int(table[hash0]) + table[hash0] = uint32(s) + table[hash1] = uint32(s + 1) + hash2 := hash6(cv>>16, tableBits) + + // Check repeat at offset checkRep. + const checkRep = 1 + + if repeat > s { + candidate := len(dict.dict) - repeat + s + if repeat-s >= 4 && uint32(cv) == load32(dict.dict, candidate) { + // Extend back + base := s + for i := candidate; base > nextEmit && i > 0 && dict.dict[i-1] == src[base-1]; { + i-- + base-- + } + d += emitLiteral(dst[d:], src[nextEmit:base]) + if debug && nextEmit != base { + fmt.Println("emitted ", base-nextEmit, "literals") + } + s += 4 + candidate += 4 + for candidate < len(dict.dict)-8 && s <= len(src)-8 { + if diff := load64(src, s) ^ load64(dict.dict, candidate); diff != 0 { + s += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + candidate += 8 + } + d += emitRepeat(dst[d:], repeat, s-base) + if debug { + fmt.Println("emitted dict repeat length", s-base, "offset:", repeat, "s:", s) + } + nextEmit = s + if s >= sLimit { + break searchDict + } + cv = load64(src, s) + continue + } + } else if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) { + base := s + checkRep + // Extend back + for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { + i-- + base-- + } + d += emitLiteral(dst[d:], src[nextEmit:base]) + if debug && nextEmit != base { + fmt.Println("emitted ", base-nextEmit, "literals") + } + + // Extend forward + candidate := s - repeat + 4 + checkRep + s += 4 + checkRep + for s <= sLimit { + if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { + s += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + candidate += 8 + } + if debug { + // Validate match. + if s <= candidate { + panic("s <= candidate") + } + a := src[base:s] + b := src[base-repeat : base-repeat+(s-base)] + if !bytes.Equal(a, b) { + panic("mismatch") + } + } + + if nextEmit > 0 { + // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. + d += emitRepeat(dst[d:], repeat, s-base) + } else { + // First match, cannot be repeat. + d += emitCopy(dst[d:], repeat, s-base) + } + + nextEmit = s + if s >= sLimit { + break searchDict + } + if debug { + fmt.Println("emitted reg repeat", s-base, "s:", s) + } + cv = load64(src, s) + continue searchDict + } + if s == 0 { + cv = load64(src, nextS) + s = nextS + continue searchDict + } + // Start with table. These matches will always be closer. + if uint32(cv) == load32(src, candidate) { + goto emitMatch + } + candidate = int(table[hash2]) + if uint32(cv>>8) == load32(src, candidate2) { + table[hash2] = uint32(s + 2) + candidate = candidate2 + s++ + goto emitMatch + } + + // Check dict. Dicts have longer offsets, so we want longer matches. + if cv == load64(dict.dict, candidateDict) { + table[hash2] = uint32(s + 2) + goto emitDict + } + + candidateDict = int(dict.fastTable[hash2]) + // Check if upper 7 bytes match + if candidateDict2 >= 1 { + if cv^load64(dict.dict, candidateDict2-1) < (1 << 8) { + table[hash2] = uint32(s + 2) + candidateDict = candidateDict2 + s++ + goto emitDict + } + } + + table[hash2] = uint32(s + 2) + if uint32(cv>>16) == load32(src, candidate) { + s += 2 + goto emitMatch + } + if candidateDict >= 2 { + // Check if upper 6 bytes match + if cv^load64(dict.dict, candidateDict-2) < (1 << 16) { + s += 2 + goto emitDict + } + } + + cv = load64(src, nextS) + s = nextS + continue searchDict + + emitDict: + { + if debug { + if load32(dict.dict, candidateDict) != load32(src, s) { + panic("dict emit mismatch") + } + } + // Extend backwards. + // The top bytes will be rechecked to get the full match. + for candidateDict > 0 && s > nextEmit && dict.dict[candidateDict-1] == src[s-1] { + candidateDict-- + s-- + } + + // Bail if we exceed the maximum size. + if d+(s-nextEmit) > dstLimit { + return 0 + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + d += emitLiteral(dst[d:], src[nextEmit:s]) + if debug && nextEmit != s { + fmt.Println("emitted ", s-nextEmit, "literals") + } + { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + base := s + repeat = s + (len(dict.dict)) - candidateDict + + // Extend the 4-byte match as long as possible. + s += 4 + candidateDict += 4 + for s <= len(src)-8 && len(dict.dict)-candidateDict >= 8 { + if diff := load64(src, s) ^ load64(dict.dict, candidateDict); diff != 0 { + s += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + candidateDict += 8 + } + + // Matches longer than 64 are split. + if s <= sLimit || s-base < 8 { + d += emitCopy(dst[d:], repeat, s-base) + } else { + // Split to ensure we don't start a copy within next block + d += emitCopy(dst[d:], repeat, 4) + d += emitRepeat(dst[d:], repeat, s-base-4) + } + if false { + // Validate match. + if s <= candidate { + panic("s <= candidate") + } + a := src[base:s] + b := dict.dict[base-repeat : base-repeat+(s-base)] + if !bytes.Equal(a, b) { + panic("mismatch") + } + } + if debug { + fmt.Println("emitted dict copy, length", s-base, "offset:", repeat, "s:", s) + } + nextEmit = s + if s >= sLimit { + break searchDict + } + + if d > dstLimit { + // Do we have space for more, if not bail. + return 0 + } + + // Index and continue loop to try new candidate. + x := load64(src, s-2) + m2Hash := hash6(x, tableBits) + currHash := hash6(x>>8, tableBits) + candidate = int(table[currHash]) + table[m2Hash] = uint32(s - 2) + table[currHash] = uint32(s - 1) + cv = load64(src, s) + } + continue + } + emitMatch: + + // Extend backwards. + // The top bytes will be rechecked to get the full match. + for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] { + candidate-- + s-- + } + + // Bail if we exceed the maximum size. + if d+(s-nextEmit) > dstLimit { + return 0 + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + d += emitLiteral(dst[d:], src[nextEmit:s]) + if debug && nextEmit != s { + fmt.Println("emitted ", s-nextEmit, "literals") + } + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + base := s + repeat = base - candidate + + // Extend the 4-byte match as long as possible. + s += 4 + candidate += 4 + for s <= len(src)-8 { + if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { + s += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + candidate += 8 + } + + d += emitCopy(dst[d:], repeat, s-base) + if debug { + // Validate match. + if s <= candidate { + panic("s <= candidate") + } + a := src[base:s] + b := src[base-repeat : base-repeat+(s-base)] + if !bytes.Equal(a, b) { + panic("mismatch") + } + } + if debug { + fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s) + } + nextEmit = s + if s >= sLimit { + break searchDict + } + + if d > dstLimit { + // Do we have space for more, if not bail. + return 0 + } + // Check for an immediate match, otherwise start search at s+1 + x := load64(src, s-2) + m2Hash := hash6(x, tableBits) + currHash := hash6(x>>16, tableBits) + candidate = int(table[currHash]) + table[m2Hash] = uint32(s - 2) + table[currHash] = uint32(s) + if debug && s == candidate { + panic("s == candidate") + } + if uint32(x>>16) != load32(src, candidate) { + cv = load64(src, s+1) + s++ + break + } + } + } + + // Search without dict: + if repeat > s { + repeat = 0 + } + + // No more dict + sLimit = len(src) - inputMargin + if s >= sLimit { + goto emitRemainder + } + if debug { + fmt.Println("non-dict matching at", s, "repeat:", repeat) + } + cv = load64(src, s) + if debug { + fmt.Println("now", s, "->", sLimit, "out:", d, "left:", len(src)-s, "nextemit:", nextEmit, "dstLimit:", dstLimit, "s:", s) + } + for { + candidate := 0 + for { + // Next src position to check + nextS := s + (s-nextEmit)>>6 + 4 + if nextS > sLimit { + goto emitRemainder + } + hash0 := hash6(cv, tableBits) + hash1 := hash6(cv>>8, tableBits) + candidate = int(table[hash0]) + candidate2 := int(table[hash1]) + table[hash0] = uint32(s) + table[hash1] = uint32(s + 1) + hash2 := hash6(cv>>16, tableBits) + + // Check repeat at offset checkRep. + const checkRep = 1 + if repeat > 0 && uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) { + base := s + checkRep + // Extend back + for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { + i-- + base-- + } + d += emitLiteral(dst[d:], src[nextEmit:base]) + if debug && nextEmit != base { + fmt.Println("emitted ", base-nextEmit, "literals") + } + // Extend forward + candidate := s - repeat + 4 + checkRep + s += 4 + checkRep + for s <= sLimit { + if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { + s += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + candidate += 8 + } + if debug { + // Validate match. + if s <= candidate { + panic("s <= candidate") + } + a := src[base:s] + b := src[base-repeat : base-repeat+(s-base)] + if !bytes.Equal(a, b) { + panic("mismatch") + } + } + if nextEmit > 0 { + // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. + d += emitRepeat(dst[d:], repeat, s-base) + } else { + // First match, cannot be repeat. + d += emitCopy(dst[d:], repeat, s-base) + } + if debug { + fmt.Println("emitted src repeat length", s-base, "offset:", repeat, "s:", s) + } + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + + cv = load64(src, s) + continue + } + + if uint32(cv) == load32(src, candidate) { + break + } + candidate = int(table[hash2]) + if uint32(cv>>8) == load32(src, candidate2) { + table[hash2] = uint32(s + 2) + candidate = candidate2 + s++ + break + } + table[hash2] = uint32(s + 2) + if uint32(cv>>16) == load32(src, candidate) { + s += 2 + break + } + + cv = load64(src, nextS) + s = nextS + } + + // Extend backwards. + // The top bytes will be rechecked to get the full match. + for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] { + candidate-- + s-- + } + + // Bail if we exceed the maximum size. + if d+(s-nextEmit) > dstLimit { + return 0 + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + d += emitLiteral(dst[d:], src[nextEmit:s]) + if debug && nextEmit != s { + fmt.Println("emitted ", s-nextEmit, "literals") + } + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + base := s + repeat = base - candidate + + // Extend the 4-byte match as long as possible. + s += 4 + candidate += 4 + for s <= len(src)-8 { + if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { + s += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + candidate += 8 + } + + d += emitCopy(dst[d:], repeat, s-base) + if debug { + // Validate match. + if s <= candidate { + panic("s <= candidate") + } + a := src[base:s] + b := src[base-repeat : base-repeat+(s-base)] + if !bytes.Equal(a, b) { + panic("mismatch") + } + } + if debug { + fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s) + } + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + + if d > dstLimit { + // Do we have space for more, if not bail. + return 0 + } + // Check for an immediate match, otherwise start search at s+1 + x := load64(src, s-2) + m2Hash := hash6(x, tableBits) + currHash := hash6(x>>16, tableBits) + candidate = int(table[currHash]) + table[m2Hash] = uint32(s - 2) + table[currHash] = uint32(s) + if debug && s == candidate { + panic("s == candidate") + } + if uint32(x>>16) != load32(src, candidate) { + cv = load64(src, s+1) + s++ + break + } + } + } + +emitRemainder: + if nextEmit < len(src) { + // Bail if we exceed the maximum size. + if d+len(src)-nextEmit > dstLimit { + return 0 + } + d += emitLiteral(dst[d:], src[nextEmit:]) + if debug && nextEmit != s { + fmt.Println("emitted ", len(src)-nextEmit, "literals") + } + } + return d +} diff --git a/vendor/github.com/klauspost/compress/s2/encode_amd64.go b/vendor/github.com/klauspost/compress/s2/encode_amd64.go index 6b93daa5..ebc332ad 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_amd64.go +++ b/vendor/github.com/klauspost/compress/s2/encode_amd64.go @@ -3,6 +3,8 @@ package s2 +const hasAmd64Asm = true + // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It // assumes that the varint-encoded length of the decompressed bytes has already // been written. diff --git a/vendor/github.com/klauspost/compress/s2/encode_best.go b/vendor/github.com/klauspost/compress/s2/encode_best.go index 1b7ea394..1d13e869 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_best.go +++ b/vendor/github.com/klauspost/compress/s2/encode_best.go @@ -7,6 +7,7 @@ package s2 import ( "fmt" + "math" "math/bits" ) @@ -18,7 +19,7 @@ import ( // // len(dst) >= MaxEncodedLen(len(src)) && // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize -func encodeBlockBest(dst, src []byte) (d int) { +func encodeBlockBest(dst, src []byte, dict *Dict) (d int) { // Initialize the hash tables. const ( // Long hash matches. @@ -30,6 +31,8 @@ func encodeBlockBest(dst, src []byte) (d int) { maxSTableSize = 1 << sTableBits inputMargin = 8 + 2 + + debug = false ) // sLimit is when to stop looking for offset/length copies. The inputMargin @@ -39,6 +42,10 @@ func encodeBlockBest(dst, src []byte) (d int) { if len(src) < minNonLiteralBlockSize { return 0 } + sLimitDict := len(src) - inputMargin + if sLimitDict > MaxDictSrcOffset-inputMargin { + sLimitDict = MaxDictSrcOffset - inputMargin + } var lTable [maxLTableSize]uint64 var sTable [maxSTableSize]uint64 @@ -52,10 +59,15 @@ func encodeBlockBest(dst, src []byte) (d int) { // The encoded form must start with a literal, as there are no previous // bytes to copy, so we start looking for hash matches at s == 1. s := 1 + repeat := 1 + if dict != nil { + dict.initBest() + s = 0 + repeat = len(dict.dict) - dict.repeat + } cv := load64(src, s) // We search for a repeat at -1, but don't output repeats when nextEmit == 0 - repeat := 1 const lowbitMask = 0xffffffff getCur := func(x uint64) int { return int(x & lowbitMask) @@ -67,11 +79,11 @@ func encodeBlockBest(dst, src []byte) (d int) { for { type match struct { - offset int - s int - length int - score int - rep bool + offset int + s int + length int + score int + rep, dict bool } var best match for { @@ -85,6 +97,12 @@ func encodeBlockBest(dst, src []byte) (d int) { if nextS > sLimit { goto emitRemainder } + if dict != nil && s >= MaxDictSrcOffset { + dict = nil + if repeat > s { + repeat = math.MinInt32 + } + } hashL := hash8(cv, lTableBits) hashS := hash4(cv, sTableBits) candidateL := lTable[hashL] @@ -114,7 +132,15 @@ func encodeBlockBest(dst, src []byte) (d int) { } m := match{offset: offset, s: s, length: 4 + offset, rep: rep} s += 4 - for s <= sLimit { + for s < len(src) { + if len(src)-s < 8 { + if src[s] == src[m.length] { + m.length++ + s++ + continue + } + break + } if diff := load64(src, s) ^ load64(src, m.length); diff != 0 { m.length += bits.TrailingZeros64(diff) >> 3 break @@ -130,6 +156,62 @@ func encodeBlockBest(dst, src []byte) (d int) { } return m } + matchDict := func(candidate, s int, first uint32, rep bool) match { + // Calculate offset as if in continuous array with s + offset := -len(dict.dict) + candidate + if best.length != 0 && best.s-best.offset == s-offset && !rep { + // Don't retest if we have the same offset. + return match{offset: offset, s: s} + } + + if load32(dict.dict, candidate) != first { + return match{offset: offset, s: s} + } + m := match{offset: offset, s: s, length: 4 + candidate, rep: rep, dict: true} + s += 4 + if !rep { + for s < sLimitDict && m.length < len(dict.dict) { + if len(src)-s < 8 || len(dict.dict)-m.length < 8 { + if src[s] == dict.dict[m.length] { + m.length++ + s++ + continue + } + break + } + if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 { + m.length += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + m.length += 8 + } + } else { + for s < len(src) && m.length < len(dict.dict) { + if len(src)-s < 8 || len(dict.dict)-m.length < 8 { + if src[s] == dict.dict[m.length] { + m.length++ + s++ + continue + } + break + } + if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 { + m.length += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + m.length += 8 + } + } + m.length -= candidate + m.score = score(m) + if m.score <= -m.s { + // Eliminate if no savings, we might find a better one. + m.length = 0 + } + return m + } bestOf := func(a, b match) match { if b.length == 0 { @@ -146,35 +228,82 @@ func encodeBlockBest(dst, src []byte) (d int) { return b } - best = bestOf(matchAt(getCur(candidateL), s, uint32(cv), false), matchAt(getPrev(candidateL), s, uint32(cv), false)) - best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv), false)) - best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv), false)) - + if s > 0 { + best = bestOf(matchAt(getCur(candidateL), s, uint32(cv), false), matchAt(getPrev(candidateL), s, uint32(cv), false)) + best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv), false)) + best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv), false)) + } + if dict != nil { + candidateL := dict.bestTableLong[hashL] + candidateS := dict.bestTableShort[hashS] + best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false)) + best = bestOf(best, matchDict(int(candidateL>>16), s, uint32(cv), false)) + best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false)) + best = bestOf(best, matchDict(int(candidateS>>16), s, uint32(cv), false)) + } { - best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8), true)) + if (dict == nil || repeat <= s) && repeat > 0 { + best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8), true)) + } else if s-repeat < -4 && dict != nil { + candidate := len(dict.dict) - (repeat - s) + best = bestOf(best, matchDict(candidate, s, uint32(cv), true)) + candidate++ + best = bestOf(best, matchDict(candidate, s+1, uint32(cv>>8), true)) + } + if best.length > 0 { + hashS := hash4(cv>>8, sTableBits) // s+1 - nextShort := sTable[hash4(cv>>8, sTableBits)] + nextShort := sTable[hashS] s := s + 1 cv := load64(src, s) - nextLong := lTable[hash8(cv, lTableBits)] + hashL := hash8(cv, lTableBits) + nextLong := lTable[hashL] best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false)) best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false)) best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false)) best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false)) - // Repeat at + 2 - best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8), true)) + + // Dict at + 1 + if dict != nil { + candidateL := dict.bestTableLong[hashL] + candidateS := dict.bestTableShort[hashS] + + best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false)) + best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false)) + } // s+2 if true { - nextShort = sTable[hash4(cv>>8, sTableBits)] + hashS := hash4(cv>>8, sTableBits) + + nextShort = sTable[hashS] s++ cv = load64(src, s) - nextLong = lTable[hash8(cv, lTableBits)] + hashL := hash8(cv, lTableBits) + nextLong = lTable[hashL] + + if (dict == nil || repeat <= s) && repeat > 0 { + // Repeat at + 2 + best = bestOf(best, matchAt(s-repeat, s, uint32(cv), true)) + } else if repeat-s > 4 && dict != nil { + candidate := len(dict.dict) - (repeat - s) + best = bestOf(best, matchDict(candidate, s, uint32(cv), true)) + } best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false)) best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false)) best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false)) best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false)) + + // Dict at +2 + // Very small gain + if dict != nil { + candidateL := dict.bestTableLong[hashL] + candidateS := dict.bestTableShort[hashS] + + best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false)) + best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false)) + } } // Search for a match at best match end, see if that is better. // Allow some bytes at the beginning to mismatch. @@ -227,7 +356,7 @@ func encodeBlockBest(dst, src []byte) (d int) { // Extend backwards, not needed for repeats... s = best.s - if !best.rep { + if !best.rep && !best.dict { for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] { best.offset-- best.length++ @@ -244,7 +373,6 @@ func encodeBlockBest(dst, src []byte) (d int) { base := s offset := s - best.offset - s += best.length if offset > 65535 && s-base <= 5 && !best.rep { @@ -256,16 +384,28 @@ func encodeBlockBest(dst, src []byte) (d int) { cv = load64(src, s) continue } + if debug && nextEmit != base { + fmt.Println("EMIT", base-nextEmit, "literals. base-after:", base) + } d += emitLiteral(dst[d:], src[nextEmit:base]) if best.rep { - if nextEmit > 0 { + if nextEmit > 0 || best.dict { + if debug { + fmt.Println("REPEAT, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best) + } // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. d += emitRepeat(dst[d:], offset, best.length) } else { - // First match, cannot be repeat. + // First match without dict cannot be a repeat. + if debug { + fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best) + } d += emitCopy(dst[d:], offset, best.length) } } else { + if debug { + fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best) + } d += emitCopy(dst[d:], offset, best.length) } repeat = offset @@ -296,6 +436,9 @@ emitRemainder: if d+len(src)-nextEmit > dstLimit { return 0 } + if debug && nextEmit != s { + fmt.Println("emitted ", len(src)-nextEmit, "literals") + } d += emitLiteral(dst[d:], src[nextEmit:]) } return d @@ -642,7 +785,6 @@ func emitRepeatSize(offset, length int) int { left := 0 if length > maxRepeat { left = length - maxRepeat + 4 - length = maxRepeat - 4 } if left > 0 { return 5 + emitRepeatSize(offset, left) diff --git a/vendor/github.com/klauspost/compress/s2/encode_better.go b/vendor/github.com/klauspost/compress/s2/encode_better.go index 3b66ba42..f46adb41 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_better.go +++ b/vendor/github.com/klauspost/compress/s2/encode_better.go @@ -6,6 +6,8 @@ package s2 import ( + "bytes" + "fmt" "math/bits" ) @@ -476,3 +478,623 @@ emitRemainder: } return d } + +// encodeBlockBetterDict encodes a non-empty src to a guaranteed-large-enough dst. It +// assumes that the varint-encoded length of the decompressed bytes has already +// been written. +// +// It also assumes that: +// +// len(dst) >= MaxEncodedLen(len(src)) && +// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize +func encodeBlockBetterDict(dst, src []byte, dict *Dict) (d int) { + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + // Initialize the hash tables. + const ( + // Long hash matches. + lTableBits = 17 + maxLTableSize = 1 << lTableBits + + // Short hash matches. + sTableBits = 14 + maxSTableSize = 1 << sTableBits + + maxAhead = 8 // maximum bytes ahead without checking sLimit + + debug = false + ) + + sLimit := len(src) - inputMargin + if sLimit > MaxDictSrcOffset-maxAhead { + sLimit = MaxDictSrcOffset - maxAhead + } + if len(src) < minNonLiteralBlockSize { + return 0 + } + + dict.initBetter() + + var lTable [maxLTableSize]uint32 + var sTable [maxSTableSize]uint32 + + // Bail if we can't compress to at least this. + dstLimit := len(src) - len(src)>>5 - 6 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := 0 + + // The encoded form must start with a literal, as there are no previous + // bytes to copy, so we start looking for hash matches at s == 1. + s := 0 + cv := load64(src, s) + + // We initialize repeat to 0, so we never match on first attempt + repeat := len(dict.dict) - dict.repeat + + // While in dict +searchDict: + for { + candidateL := 0 + nextS := 0 + for { + // Next src position to check + nextS = s + (s-nextEmit)>>7 + 1 + if nextS > sLimit { + break searchDict + } + hashL := hash7(cv, lTableBits) + hashS := hash4(cv, sTableBits) + candidateL = int(lTable[hashL]) + candidateS := int(sTable[hashS]) + dictL := int(dict.betterTableLong[hashL]) + dictS := int(dict.betterTableShort[hashS]) + lTable[hashL] = uint32(s) + sTable[hashS] = uint32(s) + + valLong := load64(src, candidateL) + valShort := load64(src, candidateS) + + // If long matches at least 8 bytes, use that. + if s != 0 { + if cv == valLong { + goto emitMatch + } + if cv == valShort { + candidateL = candidateS + goto emitMatch + } + } + + // Check dict repeat. + if repeat >= s+4 { + candidate := len(dict.dict) - repeat + s + if candidate > 0 && uint32(cv) == load32(dict.dict, candidate) { + // Extend back + base := s + for i := candidate; base > nextEmit && i > 0 && dict.dict[i-1] == src[base-1]; { + i-- + base-- + } + d += emitLiteral(dst[d:], src[nextEmit:base]) + if debug && nextEmit != base { + fmt.Println("emitted ", base-nextEmit, "literals") + } + s += 4 + candidate += 4 + for candidate < len(dict.dict)-8 && s <= len(src)-8 { + if diff := load64(src, s) ^ load64(dict.dict, candidate); diff != 0 { + s += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + candidate += 8 + } + d += emitRepeat(dst[d:], repeat, s-base) + if debug { + fmt.Println("emitted dict repeat length", s-base, "offset:", repeat, "s:", s) + } + nextEmit = s + if s >= sLimit { + break searchDict + } + cv = load64(src, s) + // Index in-between + index0 := base + 1 + index1 := s - 2 + + cv = load64(src, s) + for index0 < index1 { + cv0 := load64(src, index0) + cv1 := load64(src, index1) + lTable[hash7(cv0, lTableBits)] = uint32(index0) + sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1) + + lTable[hash7(cv1, lTableBits)] = uint32(index1) + sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1) + index0 += 2 + index1 -= 2 + } + continue + } + } + // Don't try to find match at s==0 + if s == 0 { + cv = load64(src, nextS) + s = nextS + continue + } + + // Long likely matches 7, so take that. + if uint32(cv) == uint32(valLong) { + goto emitMatch + } + + // Long dict... + if uint32(cv) == load32(dict.dict, dictL) { + candidateL = dictL + goto emitDict + } + + // Check our short candidate + if uint32(cv) == uint32(valShort) { + // Try a long candidate at s+1 + hashL = hash7(cv>>8, lTableBits) + candidateL = int(lTable[hashL]) + lTable[hashL] = uint32(s + 1) + if uint32(cv>>8) == load32(src, candidateL) { + s++ + goto emitMatch + } + // Use our short candidate. + candidateL = candidateS + goto emitMatch + } + if uint32(cv) == load32(dict.dict, dictS) { + // Try a long candidate at s+1 + hashL = hash7(cv>>8, lTableBits) + candidateL = int(lTable[hashL]) + lTable[hashL] = uint32(s + 1) + if uint32(cv>>8) == load32(src, candidateL) { + s++ + goto emitMatch + } + candidateL = dictS + goto emitDict + } + cv = load64(src, nextS) + s = nextS + } + emitDict: + { + if debug { + if load32(dict.dict, candidateL) != load32(src, s) { + panic("dict emit mismatch") + } + } + // Extend backwards. + // The top bytes will be rechecked to get the full match. + for candidateL > 0 && s > nextEmit && dict.dict[candidateL-1] == src[s-1] { + candidateL-- + s-- + } + + // Bail if we exceed the maximum size. + if d+(s-nextEmit) > dstLimit { + return 0 + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + d += emitLiteral(dst[d:], src[nextEmit:s]) + if debug && nextEmit != s { + fmt.Println("emitted ", s-nextEmit, "literals") + } + { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + base := s + offset := s + (len(dict.dict)) - candidateL + + // Extend the 4-byte match as long as possible. + s += 4 + candidateL += 4 + for s <= len(src)-8 && len(dict.dict)-candidateL >= 8 { + if diff := load64(src, s) ^ load64(dict.dict, candidateL); diff != 0 { + s += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + candidateL += 8 + } + + if repeat == offset { + if debug { + fmt.Println("emitted dict repeat, length", s-base, "offset:", offset, "s:", s, "dict offset:", candidateL) + } + d += emitRepeat(dst[d:], offset, s-base) + } else { + if debug { + fmt.Println("emitted dict copy, length", s-base, "offset:", offset, "s:", s, "dict offset:", candidateL) + } + // Matches longer than 64 are split. + if s <= sLimit || s-base < 8 { + d += emitCopy(dst[d:], offset, s-base) + } else { + // Split to ensure we don't start a copy within next block. + d += emitCopy(dst[d:], offset, 4) + d += emitRepeat(dst[d:], offset, s-base-4) + } + repeat = offset + } + if false { + // Validate match. + if s <= candidateL { + panic("s <= candidate") + } + a := src[base:s] + b := dict.dict[base-repeat : base-repeat+(s-base)] + if !bytes.Equal(a, b) { + panic("mismatch") + } + } + + nextEmit = s + if s >= sLimit { + break searchDict + } + + if d > dstLimit { + // Do we have space for more, if not bail. + return 0 + } + + // Index short & long + index0 := base + 1 + index1 := s - 2 + + cv0 := load64(src, index0) + cv1 := load64(src, index1) + lTable[hash7(cv0, lTableBits)] = uint32(index0) + sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1) + + lTable[hash7(cv1, lTableBits)] = uint32(index1) + sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1) + index0 += 1 + index1 -= 1 + cv = load64(src, s) + + // index every second long in between. + for index0 < index1 { + lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0) + lTable[hash7(load64(src, index1), lTableBits)] = uint32(index1) + index0 += 2 + index1 -= 2 + } + } + continue + } + emitMatch: + + // Extend backwards + for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] { + candidateL-- + s-- + } + + // Bail if we exceed the maximum size. + if d+(s-nextEmit) > dstLimit { + return 0 + } + + base := s + offset := base - candidateL + + // Extend the 4-byte match as long as possible. + s += 4 + candidateL += 4 + for s < len(src) { + if len(src)-s < 8 { + if src[s] == src[candidateL] { + s++ + candidateL++ + continue + } + break + } + if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 { + s += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + candidateL += 8 + } + + if offset > 65535 && s-base <= 5 && repeat != offset { + // Bail if the match is equal or worse to the encoding. + s = nextS + 1 + if s >= sLimit { + goto emitRemainder + } + cv = load64(src, s) + continue + } + + d += emitLiteral(dst[d:], src[nextEmit:base]) + if debug && nextEmit != s { + fmt.Println("emitted ", s-nextEmit, "literals") + } + if repeat == offset { + if debug { + fmt.Println("emitted match repeat, length", s-base, "offset:", offset, "s:", s) + } + d += emitRepeat(dst[d:], offset, s-base) + } else { + if debug { + fmt.Println("emitted match copy, length", s-base, "offset:", offset, "s:", s) + } + d += emitCopy(dst[d:], offset, s-base) + repeat = offset + } + + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + + if d > dstLimit { + // Do we have space for more, if not bail. + return 0 + } + + // Index short & long + index0 := base + 1 + index1 := s - 2 + + cv0 := load64(src, index0) + cv1 := load64(src, index1) + lTable[hash7(cv0, lTableBits)] = uint32(index0) + sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1) + + lTable[hash7(cv1, lTableBits)] = uint32(index1) + sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1) + index0 += 1 + index1 -= 1 + cv = load64(src, s) + + // index every second long in between. + for index0 < index1 { + lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0) + lTable[hash7(load64(src, index1), lTableBits)] = uint32(index1) + index0 += 2 + index1 -= 2 + } + } + + // Search without dict: + if repeat > s { + repeat = 0 + } + + // No more dict + sLimit = len(src) - inputMargin + if s >= sLimit { + goto emitRemainder + } + cv = load64(src, s) + if debug { + fmt.Println("now", s, "->", sLimit, "out:", d, "left:", len(src)-s, "nextemit:", nextEmit, "dstLimit:", dstLimit, "s:", s) + } + for { + candidateL := 0 + nextS := 0 + for { + // Next src position to check + nextS = s + (s-nextEmit)>>7 + 1 + if nextS > sLimit { + goto emitRemainder + } + hashL := hash7(cv, lTableBits) + hashS := hash4(cv, sTableBits) + candidateL = int(lTable[hashL]) + candidateS := int(sTable[hashS]) + lTable[hashL] = uint32(s) + sTable[hashS] = uint32(s) + + valLong := load64(src, candidateL) + valShort := load64(src, candidateS) + + // If long matches at least 8 bytes, use that. + if cv == valLong { + break + } + if cv == valShort { + candidateL = candidateS + break + } + + // Check repeat at offset checkRep. + const checkRep = 1 + // Minimum length of a repeat. Tested with various values. + // While 4-5 offers improvements in some, 6 reduces + // regressions significantly. + const wantRepeatBytes = 6 + const repeatMask = ((1 << (wantRepeatBytes * 8)) - 1) << (8 * checkRep) + if false && repeat > 0 && cv&repeatMask == load64(src, s-repeat)&repeatMask { + base := s + checkRep + // Extend back + for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { + i-- + base-- + } + d += emitLiteral(dst[d:], src[nextEmit:base]) + + // Extend forward + candidate := s - repeat + wantRepeatBytes + checkRep + s += wantRepeatBytes + checkRep + for s < len(src) { + if len(src)-s < 8 { + if src[s] == src[candidate] { + s++ + candidate++ + continue + } + break + } + if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { + s += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + candidate += 8 + } + // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. + d += emitRepeat(dst[d:], repeat, s-base) + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + // Index in-between + index0 := base + 1 + index1 := s - 2 + + cv = load64(src, s) + for index0 < index1 { + cv0 := load64(src, index0) + cv1 := load64(src, index1) + lTable[hash7(cv0, lTableBits)] = uint32(index0) + sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1) + + lTable[hash7(cv1, lTableBits)] = uint32(index1) + sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1) + index0 += 2 + index1 -= 2 + } + + cv = load64(src, s) + continue + } + + // Long likely matches 7, so take that. + if uint32(cv) == uint32(valLong) { + break + } + + // Check our short candidate + if uint32(cv) == uint32(valShort) { + // Try a long candidate at s+1 + hashL = hash7(cv>>8, lTableBits) + candidateL = int(lTable[hashL]) + lTable[hashL] = uint32(s + 1) + if uint32(cv>>8) == load32(src, candidateL) { + s++ + break + } + // Use our short candidate. + candidateL = candidateS + break + } + + cv = load64(src, nextS) + s = nextS + } + + // Extend backwards + for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] { + candidateL-- + s-- + } + + // Bail if we exceed the maximum size. + if d+(s-nextEmit) > dstLimit { + return 0 + } + + base := s + offset := base - candidateL + + // Extend the 4-byte match as long as possible. + s += 4 + candidateL += 4 + for s < len(src) { + if len(src)-s < 8 { + if src[s] == src[candidateL] { + s++ + candidateL++ + continue + } + break + } + if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 { + s += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + candidateL += 8 + } + + if offset > 65535 && s-base <= 5 && repeat != offset { + // Bail if the match is equal or worse to the encoding. + s = nextS + 1 + if s >= sLimit { + goto emitRemainder + } + cv = load64(src, s) + continue + } + + d += emitLiteral(dst[d:], src[nextEmit:base]) + if repeat == offset { + d += emitRepeat(dst[d:], offset, s-base) + } else { + d += emitCopy(dst[d:], offset, s-base) + repeat = offset + } + + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + + if d > dstLimit { + // Do we have space for more, if not bail. + return 0 + } + + // Index short & long + index0 := base + 1 + index1 := s - 2 + + cv0 := load64(src, index0) + cv1 := load64(src, index1) + lTable[hash7(cv0, lTableBits)] = uint32(index0) + sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1) + + lTable[hash7(cv1, lTableBits)] = uint32(index1) + sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1) + index0 += 1 + index1 -= 1 + cv = load64(src, s) + + // index every second long in between. + for index0 < index1 { + lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0) + lTable[hash7(load64(src, index1), lTableBits)] = uint32(index1) + index0 += 2 + index1 -= 2 + } + } + +emitRemainder: + if nextEmit < len(src) { + // Bail if we exceed the maximum size. + if d+len(src)-nextEmit > dstLimit { + return 0 + } + d += emitLiteral(dst[d:], src[nextEmit:]) + } + return d +} diff --git a/vendor/github.com/klauspost/compress/s2/encode_go.go b/vendor/github.com/klauspost/compress/s2/encode_go.go index db08fc35..0d39c7b0 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_go.go +++ b/vendor/github.com/klauspost/compress/s2/encode_go.go @@ -4,9 +4,12 @@ package s2 import ( + "bytes" "math/bits" ) +const hasAmd64Asm = false + // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It // assumes that the varint-encoded length of the decompressed bytes has already // been written. @@ -312,3 +315,413 @@ func matchLen(a []byte, b []byte) int { } return len(a) + checked } + +func calcBlockSize(src []byte) (d int) { + // Initialize the hash table. + const ( + tableBits = 13 + maxTableSize = 1 << tableBits + ) + + var table [maxTableSize]uint32 + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := len(src) - inputMargin + + // Bail if we can't compress to at least this. + dstLimit := len(src) - len(src)>>5 - 5 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := 0 + + // The encoded form must start with a literal, as there are no previous + // bytes to copy, so we start looking for hash matches at s == 1. + s := 1 + cv := load64(src, s) + + // We search for a repeat at -1, but don't output repeats when nextEmit == 0 + repeat := 1 + + for { + candidate := 0 + for { + // Next src position to check + nextS := s + (s-nextEmit)>>6 + 4 + if nextS > sLimit { + goto emitRemainder + } + hash0 := hash6(cv, tableBits) + hash1 := hash6(cv>>8, tableBits) + candidate = int(table[hash0]) + candidate2 := int(table[hash1]) + table[hash0] = uint32(s) + table[hash1] = uint32(s + 1) + hash2 := hash6(cv>>16, tableBits) + + // Check repeat at offset checkRep. + const checkRep = 1 + if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) { + base := s + checkRep + // Extend back + for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { + i-- + base-- + } + d += emitLiteralSize(src[nextEmit:base]) + + // Extend forward + candidate := s - repeat + 4 + checkRep + s += 4 + checkRep + for s <= sLimit { + if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { + s += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + candidate += 8 + } + + d += emitCopyNoRepeatSize(repeat, s-base) + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + + cv = load64(src, s) + continue + } + + if uint32(cv) == load32(src, candidate) { + break + } + candidate = int(table[hash2]) + if uint32(cv>>8) == load32(src, candidate2) { + table[hash2] = uint32(s + 2) + candidate = candidate2 + s++ + break + } + table[hash2] = uint32(s + 2) + if uint32(cv>>16) == load32(src, candidate) { + s += 2 + break + } + + cv = load64(src, nextS) + s = nextS + } + + // Extend backwards + for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] { + candidate-- + s-- + } + + // Bail if we exceed the maximum size. + if d+(s-nextEmit) > dstLimit { + return 0 + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + d += emitLiteralSize(src[nextEmit:s]) + + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + base := s + repeat = base - candidate + + // Extend the 4-byte match as long as possible. + s += 4 + candidate += 4 + for s <= len(src)-8 { + if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { + s += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + candidate += 8 + } + + d += emitCopyNoRepeatSize(repeat, s-base) + if false { + // Validate match. + a := src[base:s] + b := src[base-repeat : base-repeat+(s-base)] + if !bytes.Equal(a, b) { + panic("mismatch") + } + } + + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + + if d > dstLimit { + // Do we have space for more, if not bail. + return 0 + } + // Check for an immediate match, otherwise start search at s+1 + x := load64(src, s-2) + m2Hash := hash6(x, tableBits) + currHash := hash6(x>>16, tableBits) + candidate = int(table[currHash]) + table[m2Hash] = uint32(s - 2) + table[currHash] = uint32(s) + if uint32(x>>16) != load32(src, candidate) { + cv = load64(src, s+1) + s++ + break + } + } + } + +emitRemainder: + if nextEmit < len(src) { + // Bail if we exceed the maximum size. + if d+len(src)-nextEmit > dstLimit { + return 0 + } + d += emitLiteralSize(src[nextEmit:]) + } + return d +} + +func calcBlockSizeSmall(src []byte) (d int) { + // Initialize the hash table. + const ( + tableBits = 9 + maxTableSize = 1 << tableBits + ) + + var table [maxTableSize]uint32 + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := len(src) - inputMargin + + // Bail if we can't compress to at least this. + dstLimit := len(src) - len(src)>>5 - 5 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := 0 + + // The encoded form must start with a literal, as there are no previous + // bytes to copy, so we start looking for hash matches at s == 1. + s := 1 + cv := load64(src, s) + + // We search for a repeat at -1, but don't output repeats when nextEmit == 0 + repeat := 1 + + for { + candidate := 0 + for { + // Next src position to check + nextS := s + (s-nextEmit)>>6 + 4 + if nextS > sLimit { + goto emitRemainder + } + hash0 := hash6(cv, tableBits) + hash1 := hash6(cv>>8, tableBits) + candidate = int(table[hash0]) + candidate2 := int(table[hash1]) + table[hash0] = uint32(s) + table[hash1] = uint32(s + 1) + hash2 := hash6(cv>>16, tableBits) + + // Check repeat at offset checkRep. + const checkRep = 1 + if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) { + base := s + checkRep + // Extend back + for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { + i-- + base-- + } + d += emitLiteralSize(src[nextEmit:base]) + + // Extend forward + candidate := s - repeat + 4 + checkRep + s += 4 + checkRep + for s <= sLimit { + if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { + s += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + candidate += 8 + } + + d += emitCopyNoRepeatSize(repeat, s-base) + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + + cv = load64(src, s) + continue + } + + if uint32(cv) == load32(src, candidate) { + break + } + candidate = int(table[hash2]) + if uint32(cv>>8) == load32(src, candidate2) { + table[hash2] = uint32(s + 2) + candidate = candidate2 + s++ + break + } + table[hash2] = uint32(s + 2) + if uint32(cv>>16) == load32(src, candidate) { + s += 2 + break + } + + cv = load64(src, nextS) + s = nextS + } + + // Extend backwards + for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] { + candidate-- + s-- + } + + // Bail if we exceed the maximum size. + if d+(s-nextEmit) > dstLimit { + return 0 + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + d += emitLiteralSize(src[nextEmit:s]) + + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + base := s + repeat = base - candidate + + // Extend the 4-byte match as long as possible. + s += 4 + candidate += 4 + for s <= len(src)-8 { + if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { + s += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + candidate += 8 + } + + d += emitCopyNoRepeatSize(repeat, s-base) + if false { + // Validate match. + a := src[base:s] + b := src[base-repeat : base-repeat+(s-base)] + if !bytes.Equal(a, b) { + panic("mismatch") + } + } + + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + + if d > dstLimit { + // Do we have space for more, if not bail. + return 0 + } + // Check for an immediate match, otherwise start search at s+1 + x := load64(src, s-2) + m2Hash := hash6(x, tableBits) + currHash := hash6(x>>16, tableBits) + candidate = int(table[currHash]) + table[m2Hash] = uint32(s - 2) + table[currHash] = uint32(s) + if uint32(x>>16) != load32(src, candidate) { + cv = load64(src, s+1) + s++ + break + } + } + } + +emitRemainder: + if nextEmit < len(src) { + // Bail if we exceed the maximum size. + if d+len(src)-nextEmit > dstLimit { + return 0 + } + d += emitLiteralSize(src[nextEmit:]) + } + return d +} + +// emitLiteral writes a literal chunk and returns the number of bytes written. +// +// It assumes that: +// +// dst is long enough to hold the encoded bytes +// 0 <= len(lit) && len(lit) <= math.MaxUint32 +func emitLiteralSize(lit []byte) int { + if len(lit) == 0 { + return 0 + } + switch { + case len(lit) <= 60: + return len(lit) + 1 + case len(lit) <= 1<<8: + return len(lit) + 2 + case len(lit) <= 1<<16: + return len(lit) + 3 + case len(lit) <= 1<<24: + return len(lit) + 4 + default: + return len(lit) + 5 + } +} + +func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) { + panic("cvtLZ4BlockAsm should be unreachable") +} + +func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) { + panic("cvtLZ4BlockSnappyAsm should be unreachable") +} + +func cvtLZ4sBlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) { + panic("cvtLZ4sBlockAsm should be unreachable") +} + +func cvtLZ4sBlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) { + panic("cvtLZ4sBlockSnappyAsm should be unreachable") +} diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go index 7e00bac3..297e4150 100644 --- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go +++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go @@ -146,6 +146,20 @@ func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int //go:noescape func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int +// calcBlockSize encodes a non-empty src to a guaranteed-large-enough dst. +// Maximum input 4294967295 bytes. +// It assumes that the varint-encoded length of the decompressed bytes has already been written. +// +//go:noescape +func calcBlockSize(src []byte) int + +// calcBlockSizeSmall encodes a non-empty src to a guaranteed-large-enough dst. +// Maximum input 1024 bytes. +// It assumes that the varint-encoded length of the decompressed bytes has already been written. +// +//go:noescape +func calcBlockSizeSmall(src []byte) int + // emitLiteral writes a literal chunk and returns the number of bytes written. // // It assumes that: @@ -192,3 +206,23 @@ func emitCopyNoRepeat(dst []byte, offset int, length int) int // //go:noescape func matchLen(a []byte, b []byte) int + +// cvtLZ4Block converts an LZ4 block to S2 +// +//go:noescape +func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) + +// cvtLZ4sBlock converts an LZ4s block to S2 +// +//go:noescape +func cvtLZ4sBlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) + +// cvtLZ4Block converts an LZ4 block to Snappy +// +//go:noescape +func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) + +// cvtLZ4sBlock converts an LZ4s block to Snappy +// +//go:noescape +func cvtLZ4sBlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s index 81a487d6..9222d179 100644 --- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s +++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s @@ -36,8 +36,8 @@ zero_loop_encodeBlockAsm: MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX - LEAQ -8(CX), SI - MOVL SI, 8(SP) + LEAQ -8(CX), BX + MOVL BX, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX @@ -47,781 +47,173 @@ zero_loop_encodeBlockAsm: MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - SHRL $0x06, SI - LEAL 4(CX)(SI*1), SI - CMPL SI, 8(SP) - JGE emit_remainder_encodeBlockAsm - MOVQ (DX)(CX*1), DI - MOVL SI, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R9 - MOVQ DI, R10 - MOVQ DI, R11 - SHRQ $0x08, R11 + MOVL CX, BX + SUBL 12(SP), BX + SHRL $0x06, BX + LEAL 4(CX)(BX*1), BX + CMPL BX, 8(SP) + JAE emit_remainder_encodeBlockAsm + MOVQ (DX)(CX*1), SI + MOVL BX, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R8 + MOVQ SI, R9 + MOVQ SI, R10 + SHRQ $0x08, R10 + SHLQ $0x10, R9 + IMULQ R8, R9 + SHRQ $0x32, R9 SHLQ $0x10, R10 - IMULQ R9, R10 + IMULQ R8, R10 SHRQ $0x32, R10 - SHLQ $0x10, R11 - IMULQ R9, R11 - SHRQ $0x32, R11 - MOVL 24(SP)(R10*4), SI - MOVL 24(SP)(R11*4), R8 - MOVL CX, 24(SP)(R10*4) - LEAL 1(CX), R10 - MOVL R10, 24(SP)(R11*4) - MOVQ DI, R10 - SHRQ $0x10, R10 - SHLQ $0x10, R10 - IMULQ R9, R10 - SHRQ $0x32, R10 - MOVL CX, R9 - SUBL 16(SP), R9 - MOVL 1(DX)(R9*1), R11 - MOVQ DI, R9 - SHRQ $0x08, R9 - CMPL R9, R11 + MOVL 24(SP)(R9*4), BX + MOVL 24(SP)(R10*4), DI + MOVL CX, 24(SP)(R9*4) + LEAL 1(CX), R9 + MOVL R9, 24(SP)(R10*4) + MOVQ SI, R9 + SHRQ $0x10, R9 + SHLQ $0x10, R9 + IMULQ R8, R9 + SHRQ $0x32, R9 + MOVL CX, R8 + SUBL 16(SP), R8 + MOVL 1(DX)(R8*1), R10 + MOVQ SI, R8 + SHRQ $0x08, R8 + CMPL R8, R10 JNE no_repeat_found_encodeBlockAsm - LEAL 1(CX), DI - MOVL 12(SP), R8 - MOVL DI, SI - SUBL 16(SP), SI + LEAL 1(CX), SI + MOVL 12(SP), DI + MOVL SI, BX + SUBL 16(SP), BX JZ repeat_extend_back_end_encodeBlockAsm repeat_extend_back_loop_encodeBlockAsm: - CMPL DI, R8 - JLE repeat_extend_back_end_encodeBlockAsm - MOVB -1(DX)(SI*1), BL - MOVB -1(DX)(DI*1), R9 - CMPB BL, R9 + CMPL SI, DI + JBE repeat_extend_back_end_encodeBlockAsm + MOVB -1(DX)(BX*1), R8 + MOVB -1(DX)(SI*1), R9 + CMPB R8, R9 JNE repeat_extend_back_end_encodeBlockAsm - LEAL -1(DI), DI - DECL SI + LEAL -1(SI), SI + DECL BX JNZ repeat_extend_back_loop_encodeBlockAsm repeat_extend_back_end_encodeBlockAsm: - MOVL 12(SP), SI - CMPL SI, DI + MOVL 12(SP), BX + CMPL BX, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R10 - SUBL SI, R9 - LEAL -1(R9), SI - CMPL SI, $0x3c - JLT one_byte_repeat_emit_encodeBlockAsm - CMPL SI, $0x00000100 - JLT two_bytes_repeat_emit_encodeBlockAsm - CMPL SI, $0x00010000 - JLT three_bytes_repeat_emit_encodeBlockAsm - CMPL SI, $0x01000000 - JLT four_bytes_repeat_emit_encodeBlockAsm + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R9 + SUBL BX, R8 + LEAL -1(R8), BX + CMPL BX, $0x3c + JB one_byte_repeat_emit_encodeBlockAsm + CMPL BX, $0x00000100 + JB two_bytes_repeat_emit_encodeBlockAsm + CMPL BX, $0x00010000 + JB three_bytes_repeat_emit_encodeBlockAsm + CMPL BX, $0x01000000 + JB four_bytes_repeat_emit_encodeBlockAsm MOVB $0xfc, (AX) - MOVL SI, 1(AX) + MOVL BX, 1(AX) ADDQ $0x05, AX JMP memmove_long_repeat_emit_encodeBlockAsm four_bytes_repeat_emit_encodeBlockAsm: - MOVL SI, R11 - SHRL $0x10, R11 + MOVL BX, R10 + SHRL $0x10, R10 MOVB $0xf8, (AX) - MOVW SI, 1(AX) - MOVB R11, 3(AX) + MOVW BX, 1(AX) + MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_long_repeat_emit_encodeBlockAsm three_bytes_repeat_emit_encodeBlockAsm: MOVB $0xf4, (AX) - MOVW SI, 1(AX) + MOVW BX, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeBlockAsm two_bytes_repeat_emit_encodeBlockAsm: MOVB $0xf0, (AX) - MOVB SI, 1(AX) + MOVB BL, 1(AX) ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_repeat_emit_encodeBlockAsm + CMPL BX, $0x40 + JB memmove_repeat_emit_encodeBlockAsm JMP memmove_long_repeat_emit_encodeBlockAsm one_byte_repeat_emit_encodeBlockAsm: - SHLB $0x02, SI - MOVB SI, (AX) + SHLB $0x02, BL + MOVB BL, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveShort - CMPQ R9, $0x08 - JLE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8 - CMPQ R9, $0x10 + CMPQ R8, $0x08 + JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16 - CMPQ R9, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8: - MOVQ (R10), R11 - MOVQ R11, (AX) + MOVQ (R9), R10 + MOVQ R10, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16: - MOVQ (R10), R11 - MOVQ -8(R10)(R9*1), R10 - MOVQ R11, (AX) - MOVQ R10, -8(AX)(R9*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (AX) + MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32: - MOVOU (R10), X0 - MOVOU -16(R10)(R9*1), X1 + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) + MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64: - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) memmove_end_copy_repeat_emit_encodeBlockAsm: - MOVQ SI, AX + MOVQ BX, AX JMP emit_literal_done_repeat_emit_encodeBlockAsm memmove_long_repeat_emit_encodeBlockAsm: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveLong - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 - MOVQ R9, R12 - SHRQ $0x05, R12 - MOVQ AX, R11 - ANDL $0x0000001f, R11 - MOVQ $0x00000040, R13 - SUBQ R11, R13 - DECQ R12 - JA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R10)(R13*1), R11 - LEAQ -32(AX)(R13*1), R14 - -emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back: - MOVOU (R11), X4 - MOVOU 16(R11), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) - ADDQ $0x20, R14 - ADDQ $0x20, R11 - ADDQ $0x20, R13 - DECQ R12 - JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back - -emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R10)(R13*1), X4 - MOVOU -16(R10)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R9, R13 - JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ SI, AX - -emit_literal_done_repeat_emit_encodeBlockAsm: - ADDL $0x05, CX - MOVL CX, SI - SUBL 16(SP), SI - MOVQ src_len+32(FP), R9 - SUBL CX, R9 - LEAQ (DX)(CX*1), R10 - LEAQ (DX)(SI*1), SI - - // matchLen - XORL R12, R12 - CMPL R9, $0x08 - JL matchlen_match4_repeat_extend_encodeBlockAsm - -matchlen_loopback_repeat_extend_encodeBlockAsm: - MOVQ (R10)(R12*1), R11 - XORQ (SI)(R12*1), R11 - TESTQ R11, R11 - JZ matchlen_loop_repeat_extend_encodeBlockAsm - -#ifdef GOAMD64_v3 - TZCNTQ R11, R11 - -#else - BSFQ R11, R11 - -#endif - SARQ $0x03, R11 - LEAL (R12)(R11*1), R12 - JMP repeat_extend_forward_end_encodeBlockAsm - -matchlen_loop_repeat_extend_encodeBlockAsm: - LEAL -8(R9), R9 - LEAL 8(R12), R12 - CMPL R9, $0x08 - JGE matchlen_loopback_repeat_extend_encodeBlockAsm - JZ repeat_extend_forward_end_encodeBlockAsm - -matchlen_match4_repeat_extend_encodeBlockAsm: - CMPL R9, $0x04 - JL matchlen_match2_repeat_extend_encodeBlockAsm - MOVL (R10)(R12*1), R11 - CMPL (SI)(R12*1), R11 - JNE matchlen_match2_repeat_extend_encodeBlockAsm - SUBL $0x04, R9 - LEAL 4(R12), R12 - -matchlen_match2_repeat_extend_encodeBlockAsm: - CMPL R9, $0x02 - JL matchlen_match1_repeat_extend_encodeBlockAsm - MOVW (R10)(R12*1), R11 - CMPW (SI)(R12*1), R11 - JNE matchlen_match1_repeat_extend_encodeBlockAsm - SUBL $0x02, R9 - LEAL 2(R12), R12 - -matchlen_match1_repeat_extend_encodeBlockAsm: - CMPL R9, $0x01 - JL repeat_extend_forward_end_encodeBlockAsm - MOVB (R10)(R12*1), R11 - CMPB (SI)(R12*1), R11 - JNE repeat_extend_forward_end_encodeBlockAsm - LEAL 1(R12), R12 - -repeat_extend_forward_end_encodeBlockAsm: - ADDL R12, CX - MOVL CX, SI - SUBL DI, SI - MOVL 16(SP), DI - TESTL R8, R8 - JZ repeat_as_copy_encodeBlockAsm - - // emitRepeat -emit_repeat_again_match_repeat_encodeBlockAsm: - MOVL SI, R8 - LEAL -4(SI), SI - CMPL R8, $0x08 - JLE repeat_two_match_repeat_encodeBlockAsm - CMPL R8, $0x0c - JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm - CMPL DI, $0x00000800 - JLT repeat_two_offset_match_repeat_encodeBlockAsm - -cant_repeat_two_offset_match_repeat_encodeBlockAsm: - CMPL SI, $0x00000104 - JLT repeat_three_match_repeat_encodeBlockAsm - CMPL SI, $0x00010100 - JLT repeat_four_match_repeat_encodeBlockAsm - CMPL SI, $0x0100ffff - JLT repeat_five_match_repeat_encodeBlockAsm - LEAL -16842747(SI), SI - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX - JMP emit_repeat_again_match_repeat_encodeBlockAsm - -repeat_five_match_repeat_encodeBlockAsm: - LEAL -65536(SI), SI - MOVL SI, DI - MOVW $0x001d, (AX) - MOVW SI, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX - JMP repeat_end_emit_encodeBlockAsm - -repeat_four_match_repeat_encodeBlockAsm: - LEAL -256(SI), SI - MOVW $0x0019, (AX) - MOVW SI, 2(AX) - ADDQ $0x04, AX - JMP repeat_end_emit_encodeBlockAsm - -repeat_three_match_repeat_encodeBlockAsm: - LEAL -4(SI), SI - MOVW $0x0015, (AX) - MOVB SI, 2(AX) - ADDQ $0x03, AX - JMP repeat_end_emit_encodeBlockAsm - -repeat_two_match_repeat_encodeBlockAsm: - SHLL $0x02, SI - ORL $0x01, SI - MOVW SI, (AX) - ADDQ $0x02, AX - JMP repeat_end_emit_encodeBlockAsm - -repeat_two_offset_match_repeat_encodeBlockAsm: - XORQ R8, R8 - LEAL 1(R8)(SI*4), SI - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) - ADDQ $0x02, AX - JMP repeat_end_emit_encodeBlockAsm - -repeat_as_copy_encodeBlockAsm: - // emitCopy - CMPL DI, $0x00010000 - JL two_byte_offset_repeat_as_copy_encodeBlockAsm - -four_bytes_loop_back_repeat_as_copy_encodeBlockAsm: - CMPL SI, $0x40 - JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm - MOVB $0xff, (AX) - MOVL DI, 1(AX) - LEAL -64(SI), SI - ADDQ $0x05, AX - CMPL SI, $0x04 - JL four_bytes_remain_repeat_as_copy_encodeBlockAsm - - // emitRepeat -emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy: - MOVL SI, R8 - LEAL -4(SI), SI - CMPL R8, $0x08 - JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL R8, $0x0c - JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL DI, $0x00000800 - JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy - -cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: - CMPL SI, $0x00000104 - JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL SI, $0x00010100 - JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL SI, $0x0100ffff - JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy - LEAL -16842747(SI), SI - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX - JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy - -repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy: - LEAL -65536(SI), SI - MOVL SI, DI - MOVW $0x001d, (AX) - MOVW SI, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX - JMP repeat_end_emit_encodeBlockAsm - -repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy: - LEAL -256(SI), SI - MOVW $0x0019, (AX) - MOVW SI, 2(AX) - ADDQ $0x04, AX - JMP repeat_end_emit_encodeBlockAsm - -repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy: - LEAL -4(SI), SI - MOVW $0x0015, (AX) - MOVB SI, 2(AX) - ADDQ $0x03, AX - JMP repeat_end_emit_encodeBlockAsm - -repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy: - SHLL $0x02, SI - ORL $0x01, SI - MOVW SI, (AX) - ADDQ $0x02, AX - JMP repeat_end_emit_encodeBlockAsm - -repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: - XORQ R8, R8 - LEAL 1(R8)(SI*4), SI - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) - ADDQ $0x02, AX - JMP repeat_end_emit_encodeBlockAsm - JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm - -four_bytes_remain_repeat_as_copy_encodeBlockAsm: - TESTL SI, SI - JZ repeat_end_emit_encodeBlockAsm - MOVB $0x03, BL - LEAL -4(BX)(SI*4), SI - MOVB SI, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX - JMP repeat_end_emit_encodeBlockAsm - -two_byte_offset_repeat_as_copy_encodeBlockAsm: - CMPL SI, $0x40 - JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm - CMPL DI, $0x00000800 - JAE long_offset_short_repeat_as_copy_encodeBlockAsm - MOVL $0x00000001, R8 - LEAL 16(R8), R8 - MOVB DI, 1(AX) - MOVL DI, R9 - SHRL $0x08, R9 - SHLL $0x05, R9 - ORL R9, R8 - MOVB R8, (AX) - ADDQ $0x02, AX - SUBL $0x08, SI - - // emitRepeat - LEAL -4(SI), SI - JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - -emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - MOVL SI, R8 - LEAL -4(SI), SI - CMPL R8, $0x08 - JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL R8, $0x0c - JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL DI, $0x00000800 - JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - -cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - CMPL SI, $0x00000104 - JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL SI, $0x00010100 - JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL SI, $0x0100ffff - JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - LEAL -16842747(SI), SI - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX - JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - -repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - LEAL -65536(SI), SI - MOVL SI, DI - MOVW $0x001d, (AX) - MOVW SI, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX - JMP repeat_end_emit_encodeBlockAsm - -repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - LEAL -256(SI), SI - MOVW $0x0019, (AX) - MOVW SI, 2(AX) - ADDQ $0x04, AX - JMP repeat_end_emit_encodeBlockAsm - -repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - LEAL -4(SI), SI - MOVW $0x0015, (AX) - MOVB SI, 2(AX) - ADDQ $0x03, AX - JMP repeat_end_emit_encodeBlockAsm - -repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - SHLL $0x02, SI - ORL $0x01, SI - MOVW SI, (AX) - ADDQ $0x02, AX - JMP repeat_end_emit_encodeBlockAsm - -repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - XORQ R8, R8 - LEAL 1(R8)(SI*4), SI - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) - ADDQ $0x02, AX - JMP repeat_end_emit_encodeBlockAsm - -long_offset_short_repeat_as_copy_encodeBlockAsm: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(SI), SI - ADDQ $0x03, AX - - // emitRepeat -emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short: - MOVL SI, R8 - LEAL -4(SI), SI - CMPL R8, $0x08 - JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL R8, $0x0c - JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL DI, $0x00000800 - JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short - -cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: - CMPL SI, $0x00000104 - JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL SI, $0x00010100 - JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL SI, $0x0100ffff - JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short - LEAL -16842747(SI), SI - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX - JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short - -repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short: - LEAL -65536(SI), SI - MOVL SI, DI - MOVW $0x001d, (AX) - MOVW SI, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX - JMP repeat_end_emit_encodeBlockAsm - -repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short: - LEAL -256(SI), SI - MOVW $0x0019, (AX) - MOVW SI, 2(AX) - ADDQ $0x04, AX - JMP repeat_end_emit_encodeBlockAsm - -repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short: - LEAL -4(SI), SI - MOVW $0x0015, (AX) - MOVB SI, 2(AX) - ADDQ $0x03, AX - JMP repeat_end_emit_encodeBlockAsm - -repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short: - SHLL $0x02, SI - ORL $0x01, SI - MOVW SI, (AX) - ADDQ $0x02, AX - JMP repeat_end_emit_encodeBlockAsm - -repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: - XORQ R8, R8 - LEAL 1(R8)(SI*4), SI - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) - ADDQ $0x02, AX - JMP repeat_end_emit_encodeBlockAsm - JMP two_byte_offset_repeat_as_copy_encodeBlockAsm - -two_byte_offset_short_repeat_as_copy_encodeBlockAsm: - CMPL SI, $0x0c - JGE emit_copy_three_repeat_as_copy_encodeBlockAsm - CMPL DI, $0x00000800 - JGE emit_copy_three_repeat_as_copy_encodeBlockAsm - MOVB $0x01, BL - LEAL -16(BX)(SI*4), SI - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) - ADDQ $0x02, AX - JMP repeat_end_emit_encodeBlockAsm - -emit_copy_three_repeat_as_copy_encodeBlockAsm: - MOVB $0x02, BL - LEAL -4(BX)(SI*4), SI - MOVB SI, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX - -repeat_end_emit_encodeBlockAsm: - MOVL CX, 12(SP) - JMP search_loop_encodeBlockAsm - -no_repeat_found_encodeBlockAsm: - CMPL (DX)(SI*1), DI - JEQ candidate_match_encodeBlockAsm - SHRQ $0x08, DI - MOVL 24(SP)(R10*4), SI - LEAL 2(CX), R9 - CMPL (DX)(R8*1), DI - JEQ candidate2_match_encodeBlockAsm - MOVL R9, 24(SP)(R10*4) - SHRQ $0x08, DI - CMPL (DX)(SI*1), DI - JEQ candidate3_match_encodeBlockAsm - MOVL 20(SP), CX - JMP search_loop_encodeBlockAsm - -candidate3_match_encodeBlockAsm: - ADDL $0x02, CX - JMP candidate_match_encodeBlockAsm - -candidate2_match_encodeBlockAsm: - MOVL R9, 24(SP)(R10*4) - INCL CX - MOVL R8, SI - -candidate_match_encodeBlockAsm: - MOVL 12(SP), DI - TESTL SI, SI - JZ match_extend_back_end_encodeBlockAsm - -match_extend_back_loop_encodeBlockAsm: - CMPL CX, DI - JLE match_extend_back_end_encodeBlockAsm - MOVB -1(DX)(SI*1), BL - MOVB -1(DX)(CX*1), R8 - CMPB BL, R8 - JNE match_extend_back_end_encodeBlockAsm - LEAL -1(CX), CX - DECL SI - JZ match_extend_back_end_encodeBlockAsm - JMP match_extend_back_loop_encodeBlockAsm - -match_extend_back_end_encodeBlockAsm: - MOVL CX, DI - SUBL 12(SP), DI - LEAQ 5(AX)(DI*1), DI - CMPQ DI, (SP) - JL match_dst_size_check_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) - RET - -match_dst_size_check_encodeBlockAsm: - MOVL CX, DI - MOVL 12(SP), R8 - CMPL R8, DI - JEQ emit_literal_done_match_emit_encodeBlockAsm - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(R8*1), DI - SUBL R8, R9 - LEAL -1(R9), R8 - CMPL R8, $0x3c - JLT one_byte_match_emit_encodeBlockAsm - CMPL R8, $0x00000100 - JLT two_bytes_match_emit_encodeBlockAsm - CMPL R8, $0x00010000 - JLT three_bytes_match_emit_encodeBlockAsm - CMPL R8, $0x01000000 - JLT four_bytes_match_emit_encodeBlockAsm - MOVB $0xfc, (AX) - MOVL R8, 1(AX) - ADDQ $0x05, AX - JMP memmove_long_match_emit_encodeBlockAsm - -four_bytes_match_emit_encodeBlockAsm: - MOVL R8, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW R8, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX - JMP memmove_long_match_emit_encodeBlockAsm - -three_bytes_match_emit_encodeBlockAsm: - MOVB $0xf4, (AX) - MOVW R8, 1(AX) - ADDQ $0x03, AX - JMP memmove_long_match_emit_encodeBlockAsm - -two_bytes_match_emit_encodeBlockAsm: - MOVB $0xf0, (AX) - MOVB R8, 1(AX) - ADDQ $0x02, AX - CMPL R8, $0x40 - JL memmove_match_emit_encodeBlockAsm - JMP memmove_long_match_emit_encodeBlockAsm - -one_byte_match_emit_encodeBlockAsm: - SHLB $0x02, R8 - MOVB R8, (AX) - ADDQ $0x01, AX - -memmove_match_emit_encodeBlockAsm: - LEAQ (AX)(R9*1), R8 - - // genMemMoveShort - CMPQ R9, $0x08 - JLE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8 - CMPQ R9, $0x10 - JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16 - CMPQ R9, $0x20 - JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32 - JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64 - -emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8: - MOVQ (DI), R10 - MOVQ R10, (AX) - JMP memmove_end_copy_match_emit_encodeBlockAsm - -emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16: - MOVQ (DI), R10 - MOVQ -8(DI)(R9*1), DI - MOVQ R10, (AX) - MOVQ DI, -8(AX)(R9*1) - JMP memmove_end_copy_match_emit_encodeBlockAsm - -emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32: - MOVOU (DI), X0 - MOVOU -16(DI)(R9*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) - JMP memmove_end_copy_match_emit_encodeBlockAsm - -emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64: - MOVOU (DI), X0 - MOVOU 16(DI), X1 - MOVOU -32(DI)(R9*1), X2 - MOVOU -16(DI)(R9*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - -memmove_end_copy_match_emit_encodeBlockAsm: - MOVQ R8, AX - JMP emit_literal_done_match_emit_encodeBlockAsm - -memmove_long_match_emit_encodeBlockAsm: - LEAQ (AX)(R9*1), R8 - - // genMemMoveLong - MOVOU (DI), X0 - MOVOU 16(DI), X1 - MOVOU -32(DI)(R9*1), X2 - MOVOU -16(DI)(R9*1), X3 - MOVQ R9, R11 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 SHRQ $0x05, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 - JA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(DI)(R12*1), R10 + JA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 + LEAQ -32(R9)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 -emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back: +emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R13) @@ -830,192 +222,254 @@ emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back: ADDQ $0x20, R10 ADDQ $0x20, R12 DECQ R11 - JNA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back + JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back -emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(DI)(R12*1), X4 - MOVOU -16(DI)(R12*1), X5 +emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32: + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 - CMPQ R9, R12 - JAE emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 + CMPQ R8, R12 + JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ R8, AX + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ BX, AX -emit_literal_done_match_emit_encodeBlockAsm: -match_nolit_loop_encodeBlockAsm: - MOVL CX, DI - SUBL SI, DI - MOVL DI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, SI - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(SI*1), SI +emit_literal_done_repeat_emit_encodeBlockAsm: + ADDL $0x05, CX + MOVL CX, BX + SUBL 16(SP), BX + MOVQ src_len+32(FP), R8 + SUBL CX, R8 + LEAQ (DX)(CX*1), R9 + LEAQ (DX)(BX*1), BX // matchLen - XORL R10, R10 - CMPL DI, $0x08 - JL matchlen_match4_match_nolit_encodeBlockAsm + XORL R11, R11 + CMPL R8, $0x08 + JB matchlen_match4_repeat_extend_encodeBlockAsm -matchlen_loopback_match_nolit_encodeBlockAsm: - MOVQ (R8)(R10*1), R9 - XORQ (SI)(R10*1), R9 - TESTQ R9, R9 - JZ matchlen_loop_match_nolit_encodeBlockAsm +matchlen_loopback_repeat_extend_encodeBlockAsm: + MOVQ (R9)(R11*1), R10 + XORQ (BX)(R11*1), R10 + TESTQ R10, R10 + JZ matchlen_loop_repeat_extend_encodeBlockAsm #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 - JMP match_nolit_end_encodeBlockAsm + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 + JMP repeat_extend_forward_end_encodeBlockAsm -matchlen_loop_match_nolit_encodeBlockAsm: - LEAL -8(DI), DI - LEAL 8(R10), R10 - CMPL DI, $0x08 - JGE matchlen_loopback_match_nolit_encodeBlockAsm - JZ match_nolit_end_encodeBlockAsm +matchlen_loop_repeat_extend_encodeBlockAsm: + LEAL -8(R8), R8 + LEAL 8(R11), R11 + CMPL R8, $0x08 + JAE matchlen_loopback_repeat_extend_encodeBlockAsm + JZ repeat_extend_forward_end_encodeBlockAsm -matchlen_match4_match_nolit_encodeBlockAsm: - CMPL DI, $0x04 - JL matchlen_match2_match_nolit_encodeBlockAsm - MOVL (R8)(R10*1), R9 - CMPL (SI)(R10*1), R9 - JNE matchlen_match2_match_nolit_encodeBlockAsm - SUBL $0x04, DI - LEAL 4(R10), R10 +matchlen_match4_repeat_extend_encodeBlockAsm: + CMPL R8, $0x04 + JB matchlen_match2_repeat_extend_encodeBlockAsm + MOVL (R9)(R11*1), R10 + CMPL (BX)(R11*1), R10 + JNE matchlen_match2_repeat_extend_encodeBlockAsm + SUBL $0x04, R8 + LEAL 4(R11), R11 -matchlen_match2_match_nolit_encodeBlockAsm: - CMPL DI, $0x02 - JL matchlen_match1_match_nolit_encodeBlockAsm - MOVW (R8)(R10*1), R9 - CMPW (SI)(R10*1), R9 - JNE matchlen_match1_match_nolit_encodeBlockAsm - SUBL $0x02, DI - LEAL 2(R10), R10 +matchlen_match2_repeat_extend_encodeBlockAsm: + CMPL R8, $0x02 + JB matchlen_match1_repeat_extend_encodeBlockAsm + MOVW (R9)(R11*1), R10 + CMPW (BX)(R11*1), R10 + JNE matchlen_match1_repeat_extend_encodeBlockAsm + SUBL $0x02, R8 + LEAL 2(R11), R11 -matchlen_match1_match_nolit_encodeBlockAsm: - CMPL DI, $0x01 - JL match_nolit_end_encodeBlockAsm - MOVB (R8)(R10*1), R9 - CMPB (SI)(R10*1), R9 - JNE match_nolit_end_encodeBlockAsm - LEAL 1(R10), R10 +matchlen_match1_repeat_extend_encodeBlockAsm: + CMPL R8, $0x01 + JB repeat_extend_forward_end_encodeBlockAsm + MOVB (R9)(R11*1), R10 + CMPB (BX)(R11*1), R10 + JNE repeat_extend_forward_end_encodeBlockAsm + LEAL 1(R11), R11 -match_nolit_end_encodeBlockAsm: - ADDL R10, CX - MOVL 16(SP), SI - ADDL $0x04, R10 - MOVL CX, 12(SP) - - // emitCopy - CMPL SI, $0x00010000 - JL two_byte_offset_match_nolit_encodeBlockAsm - -four_bytes_loop_back_match_nolit_encodeBlockAsm: - CMPL R10, $0x40 - JLE four_bytes_remain_match_nolit_encodeBlockAsm - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(R10), R10 - ADDQ $0x05, AX - CMPL R10, $0x04 - JL four_bytes_remain_match_nolit_encodeBlockAsm +repeat_extend_forward_end_encodeBlockAsm: + ADDL R11, CX + MOVL CX, BX + SUBL SI, BX + MOVL 16(SP), SI + TESTL DI, DI + JZ repeat_as_copy_encodeBlockAsm // emitRepeat -emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy: - MOVL R10, DI - LEAL -4(R10), R10 +emit_repeat_again_match_repeat_encodeBlockAsm: + MOVL BX, DI + LEAL -4(BX), BX CMPL DI, $0x08 - JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy + JBE repeat_two_match_repeat_encodeBlockAsm CMPL DI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy + JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm CMPL SI, $0x00000800 - JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy + JB repeat_two_offset_match_repeat_encodeBlockAsm -cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: - CMPL R10, $0x00000104 - JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy - CMPL R10, $0x00010100 - JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy - CMPL R10, $0x0100ffff - JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy - LEAL -16842747(R10), R10 - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) +cant_repeat_two_offset_match_repeat_encodeBlockAsm: + CMPL BX, $0x00000104 + JB repeat_three_match_repeat_encodeBlockAsm + CMPL BX, $0x00010100 + JB repeat_four_match_repeat_encodeBlockAsm + CMPL BX, $0x0100ffff + JB repeat_five_match_repeat_encodeBlockAsm + LEAL -16842747(BX), BX + MOVL $0xfffb001d, (AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX - JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy + JMP emit_repeat_again_match_repeat_encodeBlockAsm -repeat_five_match_nolit_encodeBlockAsm_emit_copy: - LEAL -65536(R10), R10 - MOVL R10, SI +repeat_five_match_repeat_encodeBlockAsm: + LEAL -65536(BX), BX + MOVL BX, SI MOVW $0x001d, (AX) - MOVW R10, 2(AX) + MOVW BX, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm + JMP repeat_end_emit_encodeBlockAsm -repeat_four_match_nolit_encodeBlockAsm_emit_copy: - LEAL -256(R10), R10 +repeat_four_match_repeat_encodeBlockAsm: + LEAL -256(BX), BX MOVW $0x0019, (AX) - MOVW R10, 2(AX) + MOVW BX, 2(AX) ADDQ $0x04, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm + JMP repeat_end_emit_encodeBlockAsm -repeat_three_match_nolit_encodeBlockAsm_emit_copy: - LEAL -4(R10), R10 +repeat_three_match_repeat_encodeBlockAsm: + LEAL -4(BX), BX MOVW $0x0015, (AX) - MOVB R10, 2(AX) + MOVB BL, 2(AX) ADDQ $0x03, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm + JMP repeat_end_emit_encodeBlockAsm -repeat_two_match_nolit_encodeBlockAsm_emit_copy: - SHLL $0x02, R10 - ORL $0x01, R10 - MOVW R10, (AX) +repeat_two_match_repeat_encodeBlockAsm: + SHLL $0x02, BX + ORL $0x01, BX + MOVW BX, (AX) ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm + JMP repeat_end_emit_encodeBlockAsm -repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: +repeat_two_offset_match_repeat_encodeBlockAsm: XORQ DI, DI - LEAL 1(DI)(R10*4), R10 + LEAL 1(DI)(BX*4), BX MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI - ORL SI, R10 - MOVB R10, (AX) + ORL SI, BX + MOVB BL, (AX) ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm - JMP four_bytes_loop_back_match_nolit_encodeBlockAsm + JMP repeat_end_emit_encodeBlockAsm -four_bytes_remain_match_nolit_encodeBlockAsm: - TESTL R10, R10 - JZ match_nolit_emitcopy_end_encodeBlockAsm - MOVB $0x03, BL - LEAL -4(BX)(R10*4), R10 - MOVB R10, (AX) +repeat_as_copy_encodeBlockAsm: + // emitCopy + CMPL SI, $0x00010000 + JB two_byte_offset_repeat_as_copy_encodeBlockAsm + CMPL BX, $0x40 + JBE four_bytes_remain_repeat_as_copy_encodeBlockAsm + MOVB $0xff, (AX) + MOVL SI, 1(AX) + LEAL -64(BX), BX + ADDQ $0x05, AX + CMPL BX, $0x04 + JB four_bytes_remain_repeat_as_copy_encodeBlockAsm + + // emitRepeat +emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy: + MOVL BX, DI + LEAL -4(BX), BX + CMPL DI, $0x08 + JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy + CMPL DI, $0x0c + JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy + CMPL SI, $0x00000800 + JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy + +cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: + CMPL BX, $0x00000104 + JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy + CMPL BX, $0x00010100 + JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy + CMPL BX, $0x0100ffff + JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy + LEAL -16842747(BX), BX + MOVL $0xfffb001d, (AX) + MOVB $0xff, 4(AX) + ADDQ $0x05, AX + JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy + +repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy: + LEAL -65536(BX), BX + MOVL BX, SI + MOVW $0x001d, (AX) + MOVW BX, 2(AX) + SARL $0x10, SI + MOVB SI, 4(AX) + ADDQ $0x05, AX + JMP repeat_end_emit_encodeBlockAsm + +repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy: + LEAL -256(BX), BX + MOVW $0x0019, (AX) + MOVW BX, 2(AX) + ADDQ $0x04, AX + JMP repeat_end_emit_encodeBlockAsm + +repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy: + LEAL -4(BX), BX + MOVW $0x0015, (AX) + MOVB BL, 2(AX) + ADDQ $0x03, AX + JMP repeat_end_emit_encodeBlockAsm + +repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy: + SHLL $0x02, BX + ORL $0x01, BX + MOVW BX, (AX) + ADDQ $0x02, AX + JMP repeat_end_emit_encodeBlockAsm + +repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: + XORQ DI, DI + LEAL 1(DI)(BX*4), BX + MOVB SI, 1(AX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, BX + MOVB BL, (AX) + ADDQ $0x02, AX + JMP repeat_end_emit_encodeBlockAsm + +four_bytes_remain_repeat_as_copy_encodeBlockAsm: + TESTL BX, BX + JZ repeat_end_emit_encodeBlockAsm + XORL DI, DI + LEAL -1(DI)(BX*4), BX + MOVB BL, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm + JMP repeat_end_emit_encodeBlockAsm -two_byte_offset_match_nolit_encodeBlockAsm: - CMPL R10, $0x40 - JLE two_byte_offset_short_match_nolit_encodeBlockAsm +two_byte_offset_repeat_as_copy_encodeBlockAsm: + CMPL BX, $0x40 + JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm CMPL SI, $0x00000800 - JAE long_offset_short_match_nolit_encodeBlockAsm + JAE long_offset_short_repeat_as_copy_encodeBlockAsm MOVL $0x00000001, DI LEAL 16(DI), DI MOVB SI, 1(AX) @@ -1025,200 +479,731 @@ two_byte_offset_match_nolit_encodeBlockAsm: ORL R8, DI MOVB DI, (AX) ADDQ $0x02, AX - SUBL $0x08, R10 + SUBL $0x08, BX // emitRepeat - LEAL -4(R10), R10 + LEAL -4(BX), BX + JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b + +emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: + MOVL BX, DI + LEAL -4(BX), BX + CMPL DI, $0x08 + JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b + CMPL DI, $0x0c + JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b + CMPL SI, $0x00000800 + JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b + +cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: + CMPL BX, $0x00000104 + JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b + CMPL BX, $0x00010100 + JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b + CMPL BX, $0x0100ffff + JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b + LEAL -16842747(BX), BX + MOVL $0xfffb001d, (AX) + MOVB $0xff, 4(AX) + ADDQ $0x05, AX + JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b + +repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: + LEAL -65536(BX), BX + MOVL BX, SI + MOVW $0x001d, (AX) + MOVW BX, 2(AX) + SARL $0x10, SI + MOVB SI, 4(AX) + ADDQ $0x05, AX + JMP repeat_end_emit_encodeBlockAsm + +repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: + LEAL -256(BX), BX + MOVW $0x0019, (AX) + MOVW BX, 2(AX) + ADDQ $0x04, AX + JMP repeat_end_emit_encodeBlockAsm + +repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: + LEAL -4(BX), BX + MOVW $0x0015, (AX) + MOVB BL, 2(AX) + ADDQ $0x03, AX + JMP repeat_end_emit_encodeBlockAsm + +repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: + SHLL $0x02, BX + ORL $0x01, BX + MOVW BX, (AX) + ADDQ $0x02, AX + JMP repeat_end_emit_encodeBlockAsm + +repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: + XORQ DI, DI + LEAL 1(DI)(BX*4), BX + MOVB SI, 1(AX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, BX + MOVB BL, (AX) + ADDQ $0x02, AX + JMP repeat_end_emit_encodeBlockAsm + +long_offset_short_repeat_as_copy_encodeBlockAsm: + MOVB $0xee, (AX) + MOVW SI, 1(AX) + LEAL -60(BX), BX + ADDQ $0x03, AX + + // emitRepeat +emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short: + MOVL BX, DI + LEAL -4(BX), BX + CMPL DI, $0x08 + JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short + CMPL DI, $0x0c + JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short + CMPL SI, $0x00000800 + JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short + +cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: + CMPL BX, $0x00000104 + JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short + CMPL BX, $0x00010100 + JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short + CMPL BX, $0x0100ffff + JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short + LEAL -16842747(BX), BX + MOVL $0xfffb001d, (AX) + MOVB $0xff, 4(AX) + ADDQ $0x05, AX + JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short + +repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short: + LEAL -65536(BX), BX + MOVL BX, SI + MOVW $0x001d, (AX) + MOVW BX, 2(AX) + SARL $0x10, SI + MOVB SI, 4(AX) + ADDQ $0x05, AX + JMP repeat_end_emit_encodeBlockAsm + +repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short: + LEAL -256(BX), BX + MOVW $0x0019, (AX) + MOVW BX, 2(AX) + ADDQ $0x04, AX + JMP repeat_end_emit_encodeBlockAsm + +repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short: + LEAL -4(BX), BX + MOVW $0x0015, (AX) + MOVB BL, 2(AX) + ADDQ $0x03, AX + JMP repeat_end_emit_encodeBlockAsm + +repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short: + SHLL $0x02, BX + ORL $0x01, BX + MOVW BX, (AX) + ADDQ $0x02, AX + JMP repeat_end_emit_encodeBlockAsm + +repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: + XORQ DI, DI + LEAL 1(DI)(BX*4), BX + MOVB SI, 1(AX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, BX + MOVB BL, (AX) + ADDQ $0x02, AX + JMP repeat_end_emit_encodeBlockAsm + +two_byte_offset_short_repeat_as_copy_encodeBlockAsm: + MOVL BX, DI + SHLL $0x02, DI + CMPL BX, $0x0c + JAE emit_copy_three_repeat_as_copy_encodeBlockAsm + CMPL SI, $0x00000800 + JAE emit_copy_three_repeat_as_copy_encodeBlockAsm + LEAL -15(DI), DI + MOVB SI, 1(AX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (AX) + ADDQ $0x02, AX + JMP repeat_end_emit_encodeBlockAsm + +emit_copy_three_repeat_as_copy_encodeBlockAsm: + LEAL -2(DI), DI + MOVB DI, (AX) + MOVW SI, 1(AX) + ADDQ $0x03, AX + +repeat_end_emit_encodeBlockAsm: + MOVL CX, 12(SP) + JMP search_loop_encodeBlockAsm + +no_repeat_found_encodeBlockAsm: + CMPL (DX)(BX*1), SI + JEQ candidate_match_encodeBlockAsm + SHRQ $0x08, SI + MOVL 24(SP)(R9*4), BX + LEAL 2(CX), R8 + CMPL (DX)(DI*1), SI + JEQ candidate2_match_encodeBlockAsm + MOVL R8, 24(SP)(R9*4) + SHRQ $0x08, SI + CMPL (DX)(BX*1), SI + JEQ candidate3_match_encodeBlockAsm + MOVL 20(SP), CX + JMP search_loop_encodeBlockAsm + +candidate3_match_encodeBlockAsm: + ADDL $0x02, CX + JMP candidate_match_encodeBlockAsm + +candidate2_match_encodeBlockAsm: + MOVL R8, 24(SP)(R9*4) + INCL CX + MOVL DI, BX + +candidate_match_encodeBlockAsm: + MOVL 12(SP), SI + TESTL BX, BX + JZ match_extend_back_end_encodeBlockAsm + +match_extend_back_loop_encodeBlockAsm: + CMPL CX, SI + JBE match_extend_back_end_encodeBlockAsm + MOVB -1(DX)(BX*1), DI + MOVB -1(DX)(CX*1), R8 + CMPB DI, R8 + JNE match_extend_back_end_encodeBlockAsm + LEAL -1(CX), CX + DECL BX + JZ match_extend_back_end_encodeBlockAsm + JMP match_extend_back_loop_encodeBlockAsm + +match_extend_back_end_encodeBlockAsm: + MOVL CX, SI + SUBL 12(SP), SI + LEAQ 5(AX)(SI*1), SI + CMPQ SI, (SP) + JB match_dst_size_check_encodeBlockAsm + MOVQ $0x00000000, ret+48(FP) + RET + +match_dst_size_check_encodeBlockAsm: + MOVL CX, SI + MOVL 12(SP), DI + CMPL DI, SI + JEQ emit_literal_done_match_emit_encodeBlockAsm + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(DI*1), SI + SUBL DI, R8 + LEAL -1(R8), DI + CMPL DI, $0x3c + JB one_byte_match_emit_encodeBlockAsm + CMPL DI, $0x00000100 + JB two_bytes_match_emit_encodeBlockAsm + CMPL DI, $0x00010000 + JB three_bytes_match_emit_encodeBlockAsm + CMPL DI, $0x01000000 + JB four_bytes_match_emit_encodeBlockAsm + MOVB $0xfc, (AX) + MOVL DI, 1(AX) + ADDQ $0x05, AX + JMP memmove_long_match_emit_encodeBlockAsm + +four_bytes_match_emit_encodeBlockAsm: + MOVL DI, R9 + SHRL $0x10, R9 + MOVB $0xf8, (AX) + MOVW DI, 1(AX) + MOVB R9, 3(AX) + ADDQ $0x04, AX + JMP memmove_long_match_emit_encodeBlockAsm + +three_bytes_match_emit_encodeBlockAsm: + MOVB $0xf4, (AX) + MOVW DI, 1(AX) + ADDQ $0x03, AX + JMP memmove_long_match_emit_encodeBlockAsm + +two_bytes_match_emit_encodeBlockAsm: + MOVB $0xf0, (AX) + MOVB DI, 1(AX) + ADDQ $0x02, AX + CMPL DI, $0x40 + JB memmove_match_emit_encodeBlockAsm + JMP memmove_long_match_emit_encodeBlockAsm + +one_byte_match_emit_encodeBlockAsm: + SHLB $0x02, DI + MOVB DI, (AX) + ADDQ $0x01, AX + +memmove_match_emit_encodeBlockAsm: + LEAQ (AX)(R8*1), DI + + // genMemMoveShort + CMPQ R8, $0x08 + JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8 + CMPQ R8, $0x10 + JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16 + CMPQ R8, $0x20 + JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32 + JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64 + +emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8: + MOVQ (SI), R9 + MOVQ R9, (AX) + JMP memmove_end_copy_match_emit_encodeBlockAsm + +emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16: + MOVQ (SI), R9 + MOVQ -8(SI)(R8*1), SI + MOVQ R9, (AX) + MOVQ SI, -8(AX)(R8*1) + JMP memmove_end_copy_match_emit_encodeBlockAsm + +emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32: + MOVOU (SI), X0 + MOVOU -16(SI)(R8*1), X1 + MOVOU X0, (AX) + MOVOU X1, -16(AX)(R8*1) + JMP memmove_end_copy_match_emit_encodeBlockAsm + +emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64: + MOVOU (SI), X0 + MOVOU 16(SI), X1 + MOVOU -32(SI)(R8*1), X2 + MOVOU -16(SI)(R8*1), X3 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + +memmove_end_copy_match_emit_encodeBlockAsm: + MOVQ DI, AX + JMP emit_literal_done_match_emit_encodeBlockAsm + +memmove_long_match_emit_encodeBlockAsm: + LEAQ (AX)(R8*1), DI + + // genMemMoveLong + MOVOU (SI), X0 + MOVOU 16(SI), X1 + MOVOU -32(SI)(R8*1), X2 + MOVOU -16(SI)(R8*1), X3 + MOVQ R8, R10 + SHRQ $0x05, R10 + MOVQ AX, R9 + ANDL $0x0000001f, R9 + MOVQ $0x00000040, R11 + SUBQ R9, R11 + DECQ R10 + JA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 + LEAQ -32(SI)(R11*1), R9 + LEAQ -32(AX)(R11*1), R12 + +emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back: + MOVOU (R9), X4 + MOVOU 16(R9), X5 + MOVOA X4, (R12) + MOVOA X5, 16(R12) + ADDQ $0x20, R12 + ADDQ $0x20, R9 + ADDQ $0x20, R11 + DECQ R10 + JNA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back + +emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32: + MOVOU -32(SI)(R11*1), X4 + MOVOU -16(SI)(R11*1), X5 + MOVOA X4, -32(AX)(R11*1) + MOVOA X5, -16(AX)(R11*1) + ADDQ $0x20, R11 + CMPQ R8, R11 + JAE emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ DI, AX + +emit_literal_done_match_emit_encodeBlockAsm: +match_nolit_loop_encodeBlockAsm: + MOVL CX, SI + SUBL BX, SI + MOVL SI, 16(SP) + ADDL $0x04, CX + ADDL $0x04, BX + MOVQ src_len+32(FP), SI + SUBL CX, SI + LEAQ (DX)(CX*1), DI + LEAQ (DX)(BX*1), BX + + // matchLen + XORL R9, R9 + CMPL SI, $0x08 + JB matchlen_match4_match_nolit_encodeBlockAsm + +matchlen_loopback_match_nolit_encodeBlockAsm: + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + TESTQ R8, R8 + JZ matchlen_loop_match_nolit_encodeBlockAsm + +#ifdef GOAMD64_v3 + TZCNTQ R8, R8 + +#else + BSFQ R8, R8 + +#endif + SARQ $0x03, R8 + LEAL (R9)(R8*1), R9 + JMP match_nolit_end_encodeBlockAsm + +matchlen_loop_match_nolit_encodeBlockAsm: + LEAL -8(SI), SI + LEAL 8(R9), R9 + CMPL SI, $0x08 + JAE matchlen_loopback_match_nolit_encodeBlockAsm + JZ match_nolit_end_encodeBlockAsm + +matchlen_match4_match_nolit_encodeBlockAsm: + CMPL SI, $0x04 + JB matchlen_match2_match_nolit_encodeBlockAsm + MOVL (DI)(R9*1), R8 + CMPL (BX)(R9*1), R8 + JNE matchlen_match2_match_nolit_encodeBlockAsm + SUBL $0x04, SI + LEAL 4(R9), R9 + +matchlen_match2_match_nolit_encodeBlockAsm: + CMPL SI, $0x02 + JB matchlen_match1_match_nolit_encodeBlockAsm + MOVW (DI)(R9*1), R8 + CMPW (BX)(R9*1), R8 + JNE matchlen_match1_match_nolit_encodeBlockAsm + SUBL $0x02, SI + LEAL 2(R9), R9 + +matchlen_match1_match_nolit_encodeBlockAsm: + CMPL SI, $0x01 + JB match_nolit_end_encodeBlockAsm + MOVB (DI)(R9*1), R8 + CMPB (BX)(R9*1), R8 + JNE match_nolit_end_encodeBlockAsm + LEAL 1(R9), R9 + +match_nolit_end_encodeBlockAsm: + ADDL R9, CX + MOVL 16(SP), BX + ADDL $0x04, R9 + MOVL CX, 12(SP) + + // emitCopy + CMPL BX, $0x00010000 + JB two_byte_offset_match_nolit_encodeBlockAsm + CMPL R9, $0x40 + JBE four_bytes_remain_match_nolit_encodeBlockAsm + MOVB $0xff, (AX) + MOVL BX, 1(AX) + LEAL -64(R9), R9 + ADDQ $0x05, AX + CMPL R9, $0x04 + JB four_bytes_remain_match_nolit_encodeBlockAsm + + // emitRepeat +emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy: + MOVL R9, SI + LEAL -4(R9), R9 + CMPL SI, $0x08 + JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy + CMPL SI, $0x0c + JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy + CMPL BX, $0x00000800 + JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy + +cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: + CMPL R9, $0x00000104 + JB repeat_three_match_nolit_encodeBlockAsm_emit_copy + CMPL R9, $0x00010100 + JB repeat_four_match_nolit_encodeBlockAsm_emit_copy + CMPL R9, $0x0100ffff + JB repeat_five_match_nolit_encodeBlockAsm_emit_copy + LEAL -16842747(R9), R9 + MOVL $0xfffb001d, (AX) + MOVB $0xff, 4(AX) + ADDQ $0x05, AX + JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy + +repeat_five_match_nolit_encodeBlockAsm_emit_copy: + LEAL -65536(R9), R9 + MOVL R9, BX + MOVW $0x001d, (AX) + MOVW R9, 2(AX) + SARL $0x10, BX + MOVB BL, 4(AX) + ADDQ $0x05, AX + JMP match_nolit_emitcopy_end_encodeBlockAsm + +repeat_four_match_nolit_encodeBlockAsm_emit_copy: + LEAL -256(R9), R9 + MOVW $0x0019, (AX) + MOVW R9, 2(AX) + ADDQ $0x04, AX + JMP match_nolit_emitcopy_end_encodeBlockAsm + +repeat_three_match_nolit_encodeBlockAsm_emit_copy: + LEAL -4(R9), R9 + MOVW $0x0015, (AX) + MOVB R9, 2(AX) + ADDQ $0x03, AX + JMP match_nolit_emitcopy_end_encodeBlockAsm + +repeat_two_match_nolit_encodeBlockAsm_emit_copy: + SHLL $0x02, R9 + ORL $0x01, R9 + MOVW R9, (AX) + ADDQ $0x02, AX + JMP match_nolit_emitcopy_end_encodeBlockAsm + +repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: + XORQ SI, SI + LEAL 1(SI)(R9*4), R9 + MOVB BL, 1(AX) + SARL $0x08, BX + SHLL $0x05, BX + ORL BX, R9 + MOVB R9, (AX) + ADDQ $0x02, AX + JMP match_nolit_emitcopy_end_encodeBlockAsm + +four_bytes_remain_match_nolit_encodeBlockAsm: + TESTL R9, R9 + JZ match_nolit_emitcopy_end_encodeBlockAsm + XORL SI, SI + LEAL -1(SI)(R9*4), R9 + MOVB R9, (AX) + MOVL BX, 1(AX) + ADDQ $0x05, AX + JMP match_nolit_emitcopy_end_encodeBlockAsm + +two_byte_offset_match_nolit_encodeBlockAsm: + CMPL R9, $0x40 + JBE two_byte_offset_short_match_nolit_encodeBlockAsm + CMPL BX, $0x00000800 + JAE long_offset_short_match_nolit_encodeBlockAsm + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB BL, 1(AX) + MOVL BX, DI + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (AX) + ADDQ $0x02, AX + SUBL $0x08, R9 + + // emitRepeat + LEAL -4(R9), R9 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b: - MOVL R10, DI - LEAL -4(R10), R10 - CMPL DI, $0x08 - JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL DI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL SI, $0x00000800 - JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b + MOVL R9, SI + LEAL -4(R9), R9 + CMPL SI, $0x08 + JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b + CMPL SI, $0x0c + JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b + CMPL BX, $0x00000800 + JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b: - CMPL R10, $0x00000104 - JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL R10, $0x00010100 - JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL R10, $0x0100ffff - JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b - LEAL -16842747(R10), R10 - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) + CMPL R9, $0x00000104 + JB repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b + CMPL R9, $0x00010100 + JB repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b + CMPL R9, $0x0100ffff + JB repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b + LEAL -16842747(R9), R9 + MOVL $0xfffb001d, (AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b: - LEAL -65536(R10), R10 - MOVL R10, SI + LEAL -65536(R9), R9 + MOVL R9, BX MOVW $0x001d, (AX) - MOVW R10, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) + MOVW R9, 2(AX) + SARL $0x10, BX + MOVB BL, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b: - LEAL -256(R10), R10 + LEAL -256(R9), R9 MOVW $0x0019, (AX) - MOVW R10, 2(AX) + MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b: - LEAL -4(R10), R10 + LEAL -4(R9), R9 MOVW $0x0015, (AX) - MOVB R10, 2(AX) + MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b: - SHLL $0x02, R10 - ORL $0x01, R10 - MOVW R10, (AX) + SHLL $0x02, R9 + ORL $0x01, R9 + MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(R10*4), R10 - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, R10 - MOVB R10, (AX) + XORQ SI, SI + LEAL 1(SI)(R9*4), R9 + MOVB BL, 1(AX) + SARL $0x08, BX + SHLL $0x05, BX + ORL BX, R9 + MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm long_offset_short_match_nolit_encodeBlockAsm: MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(R10), R10 + MOVW BX, 1(AX) + LEAL -60(R9), R9 ADDQ $0x03, AX // emitRepeat emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short: - MOVL R10, DI - LEAL -4(R10), R10 - CMPL DI, $0x08 - JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short - CMPL DI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short - CMPL SI, $0x00000800 - JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short + MOVL R9, SI + LEAL -4(R9), R9 + CMPL SI, $0x08 + JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short + CMPL SI, $0x0c + JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short + CMPL BX, $0x00000800 + JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: - CMPL R10, $0x00000104 - JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy_short - CMPL R10, $0x00010100 - JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy_short - CMPL R10, $0x0100ffff - JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy_short - LEAL -16842747(R10), R10 - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) + CMPL R9, $0x00000104 + JB repeat_three_match_nolit_encodeBlockAsm_emit_copy_short + CMPL R9, $0x00010100 + JB repeat_four_match_nolit_encodeBlockAsm_emit_copy_short + CMPL R9, $0x0100ffff + JB repeat_five_match_nolit_encodeBlockAsm_emit_copy_short + LEAL -16842747(R9), R9 + MOVL $0xfffb001d, (AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short repeat_five_match_nolit_encodeBlockAsm_emit_copy_short: - LEAL -65536(R10), R10 - MOVL R10, SI + LEAL -65536(R9), R9 + MOVL R9, BX MOVW $0x001d, (AX) - MOVW R10, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) + MOVW R9, 2(AX) + SARL $0x10, BX + MOVB BL, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy_short: - LEAL -256(R10), R10 + LEAL -256(R9), R9 MOVW $0x0019, (AX) - MOVW R10, 2(AX) + MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy_short: - LEAL -4(R10), R10 + LEAL -4(R9), R9 MOVW $0x0015, (AX) - MOVB R10, 2(AX) + MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy_short: - SHLL $0x02, R10 - ORL $0x01, R10 - MOVW R10, (AX) + SHLL $0x02, R9 + ORL $0x01, R9 + MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(R10*4), R10 - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, R10 - MOVB R10, (AX) + XORQ SI, SI + LEAL 1(SI)(R9*4), R9 + MOVB BL, 1(AX) + SARL $0x08, BX + SHLL $0x05, BX + ORL BX, R9 + MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm - JMP two_byte_offset_match_nolit_encodeBlockAsm two_byte_offset_short_match_nolit_encodeBlockAsm: - CMPL R10, $0x0c - JGE emit_copy_three_match_nolit_encodeBlockAsm - CMPL SI, $0x00000800 - JGE emit_copy_three_match_nolit_encodeBlockAsm - MOVB $0x01, BL - LEAL -16(BX)(R10*4), R10 - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, R10 - MOVB R10, (AX) + MOVL R9, SI + SHLL $0x02, SI + CMPL R9, $0x0c + JAE emit_copy_three_match_nolit_encodeBlockAsm + CMPL BX, $0x00000800 + JAE emit_copy_three_match_nolit_encodeBlockAsm + LEAL -15(SI), SI + MOVB BL, 1(AX) + SHRL $0x08, BX + SHLL $0x05, BX + ORL BX, SI + MOVB SI, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm emit_copy_three_match_nolit_encodeBlockAsm: - MOVB $0x02, BL - LEAL -4(BX)(R10*4), R10 - MOVB R10, (AX) - MOVW SI, 1(AX) + LEAL -2(SI), SI + MOVB SI, (AX) + MOVW BX, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm: CMPL CX, 8(SP) - JGE emit_remainder_encodeBlockAsm - MOVQ -2(DX)(CX*1), DI + JAE emit_remainder_encodeBlockAsm + MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) - JL match_nolit_dst_ok_encodeBlockAsm + JB match_nolit_dst_ok_encodeBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm: - MOVQ $0x0000cf1bbcdcbf9b, R9 - MOVQ DI, R8 - SHRQ $0x10, DI - MOVQ DI, SI - SHLQ $0x10, R8 - IMULQ R9, R8 - SHRQ $0x32, R8 - SHLQ $0x10, SI - IMULQ R9, SI - SHRQ $0x32, SI - LEAL -2(CX), R9 - LEAQ 24(SP)(SI*4), R10 - MOVL (R10), SI - MOVL R9, 24(SP)(R8*4) - MOVL CX, (R10) - CMPL (DX)(SI*1), DI + MOVQ $0x0000cf1bbcdcbf9b, R8 + MOVQ SI, DI + SHRQ $0x10, SI + MOVQ SI, BX + SHLQ $0x10, DI + IMULQ R8, DI + SHRQ $0x32, DI + SHLQ $0x10, BX + IMULQ R8, BX + SHRQ $0x32, BX + LEAL -2(CX), R8 + LEAQ 24(SP)(BX*4), R9 + MOVL (R9), BX + MOVL R8, 24(SP)(DI*4) + MOVL CX, (R9) + CMPL (DX)(BX*1), SI JEQ match_nolit_loop_encodeBlockAsm INCL CX JMP search_loop_encodeBlockAsm @@ -1228,7 +1213,7 @@ emit_remainder_encodeBlockAsm: SUBL 12(SP), CX LEAQ 5(AX)(CX*1), CX CMPQ CX, (SP) - JL emit_remainder_ok_encodeBlockAsm + JB emit_remainder_ok_encodeBlockAsm MOVQ $0x00000000, ret+48(FP) RET @@ -1243,13 +1228,13 @@ emit_remainder_ok_encodeBlockAsm: SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c - JLT one_byte_emit_remainder_encodeBlockAsm + JB one_byte_emit_remainder_encodeBlockAsm CMPL DX, $0x00000100 - JLT two_bytes_emit_remainder_encodeBlockAsm + JB two_bytes_emit_remainder_encodeBlockAsm CMPL DX, $0x00010000 - JLT three_bytes_emit_remainder_encodeBlockAsm + JB three_bytes_emit_remainder_encodeBlockAsm CMPL DX, $0x01000000 - JLT four_bytes_emit_remainder_encodeBlockAsm + JB four_bytes_emit_remainder_encodeBlockAsm MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX @@ -1275,7 +1260,7 @@ two_bytes_emit_remainder_encodeBlockAsm: MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 - JL memmove_emit_remainder_encodeBlockAsm + JB memmove_emit_remainder_encodeBlockAsm JMP memmove_long_emit_remainder_encodeBlockAsm one_byte_emit_remainder_encodeBlockAsm: @@ -1422,8 +1407,8 @@ zero_loop_encodeBlockAsm4MB: MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX - LEAQ -8(CX), SI - MOVL SI, 8(SP) + LEAQ -8(CX), BX + MOVL BX, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX @@ -1433,555 +1418,551 @@ zero_loop_encodeBlockAsm4MB: MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm4MB: - MOVL CX, SI - SUBL 12(SP), SI - SHRL $0x06, SI - LEAL 4(CX)(SI*1), SI - CMPL SI, 8(SP) - JGE emit_remainder_encodeBlockAsm4MB - MOVQ (DX)(CX*1), DI - MOVL SI, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R9 - MOVQ DI, R10 - MOVQ DI, R11 - SHRQ $0x08, R11 + MOVL CX, BX + SUBL 12(SP), BX + SHRL $0x06, BX + LEAL 4(CX)(BX*1), BX + CMPL BX, 8(SP) + JAE emit_remainder_encodeBlockAsm4MB + MOVQ (DX)(CX*1), SI + MOVL BX, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R8 + MOVQ SI, R9 + MOVQ SI, R10 + SHRQ $0x08, R10 + SHLQ $0x10, R9 + IMULQ R8, R9 + SHRQ $0x32, R9 SHLQ $0x10, R10 - IMULQ R9, R10 + IMULQ R8, R10 SHRQ $0x32, R10 - SHLQ $0x10, R11 - IMULQ R9, R11 - SHRQ $0x32, R11 - MOVL 24(SP)(R10*4), SI - MOVL 24(SP)(R11*4), R8 - MOVL CX, 24(SP)(R10*4) - LEAL 1(CX), R10 - MOVL R10, 24(SP)(R11*4) - MOVQ DI, R10 - SHRQ $0x10, R10 - SHLQ $0x10, R10 - IMULQ R9, R10 - SHRQ $0x32, R10 - MOVL CX, R9 - SUBL 16(SP), R9 - MOVL 1(DX)(R9*1), R11 - MOVQ DI, R9 - SHRQ $0x08, R9 - CMPL R9, R11 + MOVL 24(SP)(R9*4), BX + MOVL 24(SP)(R10*4), DI + MOVL CX, 24(SP)(R9*4) + LEAL 1(CX), R9 + MOVL R9, 24(SP)(R10*4) + MOVQ SI, R9 + SHRQ $0x10, R9 + SHLQ $0x10, R9 + IMULQ R8, R9 + SHRQ $0x32, R9 + MOVL CX, R8 + SUBL 16(SP), R8 + MOVL 1(DX)(R8*1), R10 + MOVQ SI, R8 + SHRQ $0x08, R8 + CMPL R8, R10 JNE no_repeat_found_encodeBlockAsm4MB - LEAL 1(CX), DI - MOVL 12(SP), R8 - MOVL DI, SI - SUBL 16(SP), SI + LEAL 1(CX), SI + MOVL 12(SP), DI + MOVL SI, BX + SUBL 16(SP), BX JZ repeat_extend_back_end_encodeBlockAsm4MB repeat_extend_back_loop_encodeBlockAsm4MB: - CMPL DI, R8 - JLE repeat_extend_back_end_encodeBlockAsm4MB - MOVB -1(DX)(SI*1), BL - MOVB -1(DX)(DI*1), R9 - CMPB BL, R9 + CMPL SI, DI + JBE repeat_extend_back_end_encodeBlockAsm4MB + MOVB -1(DX)(BX*1), R8 + MOVB -1(DX)(SI*1), R9 + CMPB R8, R9 JNE repeat_extend_back_end_encodeBlockAsm4MB - LEAL -1(DI), DI - DECL SI + LEAL -1(SI), SI + DECL BX JNZ repeat_extend_back_loop_encodeBlockAsm4MB repeat_extend_back_end_encodeBlockAsm4MB: - MOVL 12(SP), SI - CMPL SI, DI + MOVL 12(SP), BX + CMPL BX, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm4MB - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R10 - SUBL SI, R9 - LEAL -1(R9), SI - CMPL SI, $0x3c - JLT one_byte_repeat_emit_encodeBlockAsm4MB - CMPL SI, $0x00000100 - JLT two_bytes_repeat_emit_encodeBlockAsm4MB - CMPL SI, $0x00010000 - JLT three_bytes_repeat_emit_encodeBlockAsm4MB - MOVL SI, R11 - SHRL $0x10, R11 + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R9 + SUBL BX, R8 + LEAL -1(R8), BX + CMPL BX, $0x3c + JB one_byte_repeat_emit_encodeBlockAsm4MB + CMPL BX, $0x00000100 + JB two_bytes_repeat_emit_encodeBlockAsm4MB + CMPL BX, $0x00010000 + JB three_bytes_repeat_emit_encodeBlockAsm4MB + MOVL BX, R10 + SHRL $0x10, R10 MOVB $0xf8, (AX) - MOVW SI, 1(AX) - MOVB R11, 3(AX) + MOVW BX, 1(AX) + MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_long_repeat_emit_encodeBlockAsm4MB three_bytes_repeat_emit_encodeBlockAsm4MB: MOVB $0xf4, (AX) - MOVW SI, 1(AX) + MOVW BX, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeBlockAsm4MB two_bytes_repeat_emit_encodeBlockAsm4MB: MOVB $0xf0, (AX) - MOVB SI, 1(AX) + MOVB BL, 1(AX) ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_repeat_emit_encodeBlockAsm4MB + CMPL BX, $0x40 + JB memmove_repeat_emit_encodeBlockAsm4MB JMP memmove_long_repeat_emit_encodeBlockAsm4MB one_byte_repeat_emit_encodeBlockAsm4MB: - SHLB $0x02, SI - MOVB SI, (AX) + SHLB $0x02, BL + MOVB BL, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm4MB: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveShort - CMPQ R9, $0x08 - JLE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8 - CMPQ R9, $0x10 + CMPQ R8, $0x08 + JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16 - CMPQ R9, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8: - MOVQ (R10), R11 - MOVQ R11, (AX) + MOVQ (R9), R10 + MOVQ R10, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16: - MOVQ (R10), R11 - MOVQ -8(R10)(R9*1), R10 - MOVQ R11, (AX) - MOVQ R10, -8(AX)(R9*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (AX) + MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32: - MOVOU (R10), X0 - MOVOU -16(R10)(R9*1), X1 + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) + MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64: - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) memmove_end_copy_repeat_emit_encodeBlockAsm4MB: - MOVQ SI, AX + MOVQ BX, AX JMP emit_literal_done_repeat_emit_encodeBlockAsm4MB memmove_long_repeat_emit_encodeBlockAsm4MB: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveLong - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 - MOVQ R9, R12 - SHRQ $0x05, R12 - MOVQ AX, R11 - ANDL $0x0000001f, R11 - MOVQ $0x00000040, R13 - SUBQ R11, R13 - DECQ R12 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ AX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(R10)(R13*1), R11 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back: - MOVOU (R11), X4 - MOVOU 16(R11), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) - ADDQ $0x20, R14 - ADDQ $0x20, R11 + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) ADDQ $0x20, R13 - DECQ R12 + ADDQ $0x20, R10 + ADDQ $0x20, R12 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(R10)(R13*1), X4 - MOVOU -16(R10)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R9, R13 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(AX)(R12*1) + MOVOA X5, -16(AX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ SI, AX + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ BX, AX emit_literal_done_repeat_emit_encodeBlockAsm4MB: ADDL $0x05, CX - MOVL CX, SI - SUBL 16(SP), SI - MOVQ src_len+32(FP), R9 - SUBL CX, R9 - LEAQ (DX)(CX*1), R10 - LEAQ (DX)(SI*1), SI + MOVL CX, BX + SUBL 16(SP), BX + MOVQ src_len+32(FP), R8 + SUBL CX, R8 + LEAQ (DX)(CX*1), R9 + LEAQ (DX)(BX*1), BX // matchLen - XORL R12, R12 - CMPL R9, $0x08 - JL matchlen_match4_repeat_extend_encodeBlockAsm4MB + XORL R11, R11 + CMPL R8, $0x08 + JB matchlen_match4_repeat_extend_encodeBlockAsm4MB matchlen_loopback_repeat_extend_encodeBlockAsm4MB: - MOVQ (R10)(R12*1), R11 - XORQ (SI)(R12*1), R11 - TESTQ R11, R11 + MOVQ (R9)(R11*1), R10 + XORQ (BX)(R11*1), R10 + TESTQ R10, R10 JZ matchlen_loop_repeat_extend_encodeBlockAsm4MB #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R10, R10 #else - BSFQ R11, R11 + BSFQ R10, R10 #endif - SARQ $0x03, R11 - LEAL (R12)(R11*1), R12 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm4MB matchlen_loop_repeat_extend_encodeBlockAsm4MB: - LEAL -8(R9), R9 - LEAL 8(R12), R12 - CMPL R9, $0x08 - JGE matchlen_loopback_repeat_extend_encodeBlockAsm4MB + LEAL -8(R8), R8 + LEAL 8(R11), R11 + CMPL R8, $0x08 + JAE matchlen_loopback_repeat_extend_encodeBlockAsm4MB JZ repeat_extend_forward_end_encodeBlockAsm4MB matchlen_match4_repeat_extend_encodeBlockAsm4MB: - CMPL R9, $0x04 - JL matchlen_match2_repeat_extend_encodeBlockAsm4MB - MOVL (R10)(R12*1), R11 - CMPL (SI)(R12*1), R11 + CMPL R8, $0x04 + JB matchlen_match2_repeat_extend_encodeBlockAsm4MB + MOVL (R9)(R11*1), R10 + CMPL (BX)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeBlockAsm4MB - SUBL $0x04, R9 - LEAL 4(R12), R12 + SUBL $0x04, R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeBlockAsm4MB: - CMPL R9, $0x02 - JL matchlen_match1_repeat_extend_encodeBlockAsm4MB - MOVW (R10)(R12*1), R11 - CMPW (SI)(R12*1), R11 + CMPL R8, $0x02 + JB matchlen_match1_repeat_extend_encodeBlockAsm4MB + MOVW (R9)(R11*1), R10 + CMPW (BX)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeBlockAsm4MB - SUBL $0x02, R9 - LEAL 2(R12), R12 + SUBL $0x02, R8 + LEAL 2(R11), R11 matchlen_match1_repeat_extend_encodeBlockAsm4MB: - CMPL R9, $0x01 - JL repeat_extend_forward_end_encodeBlockAsm4MB - MOVB (R10)(R12*1), R11 - CMPB (SI)(R12*1), R11 + CMPL R8, $0x01 + JB repeat_extend_forward_end_encodeBlockAsm4MB + MOVB (R9)(R11*1), R10 + CMPB (BX)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm4MB - LEAL 1(R12), R12 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeBlockAsm4MB: - ADDL R12, CX - MOVL CX, SI - SUBL DI, SI - MOVL 16(SP), DI - TESTL R8, R8 + ADDL R11, CX + MOVL CX, BX + SUBL SI, BX + MOVL 16(SP), SI + TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm4MB // emitRepeat - MOVL SI, R8 - LEAL -4(SI), SI - CMPL R8, $0x08 - JLE repeat_two_match_repeat_encodeBlockAsm4MB - CMPL R8, $0x0c - JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB - CMPL DI, $0x00000800 - JLT repeat_two_offset_match_repeat_encodeBlockAsm4MB + MOVL BX, DI + LEAL -4(BX), BX + CMPL DI, $0x08 + JBE repeat_two_match_repeat_encodeBlockAsm4MB + CMPL DI, $0x0c + JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB + CMPL SI, $0x00000800 + JB repeat_two_offset_match_repeat_encodeBlockAsm4MB cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB: - CMPL SI, $0x00000104 - JLT repeat_three_match_repeat_encodeBlockAsm4MB - CMPL SI, $0x00010100 - JLT repeat_four_match_repeat_encodeBlockAsm4MB - LEAL -65536(SI), SI - MOVL SI, DI + CMPL BX, $0x00000104 + JB repeat_three_match_repeat_encodeBlockAsm4MB + CMPL BX, $0x00010100 + JB repeat_four_match_repeat_encodeBlockAsm4MB + LEAL -65536(BX), BX + MOVL BX, SI MOVW $0x001d, (AX) - MOVW SI, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) + MOVW BX, 2(AX) + SARL $0x10, SI + MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_match_repeat_encodeBlockAsm4MB: - LEAL -256(SI), SI + LEAL -256(BX), BX MOVW $0x0019, (AX) - MOVW SI, 2(AX) + MOVW BX, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_match_repeat_encodeBlockAsm4MB: - LEAL -4(SI), SI + LEAL -4(BX), BX MOVW $0x0015, (AX) - MOVB SI, 2(AX) + MOVB BL, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_match_repeat_encodeBlockAsm4MB: - SHLL $0x02, SI - ORL $0x01, SI - MOVW SI, (AX) + SHLL $0x02, BX + ORL $0x01, BX + MOVW BX, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_match_repeat_encodeBlockAsm4MB: - XORQ R8, R8 - LEAL 1(R8)(SI*4), SI - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) + XORQ DI, DI + LEAL 1(DI)(BX*4), BX + MOVB SI, 1(AX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, BX + MOVB BL, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_as_copy_encodeBlockAsm4MB: // emitCopy - CMPL DI, $0x00010000 - JL two_byte_offset_repeat_as_copy_encodeBlockAsm4MB - -four_bytes_loop_back_repeat_as_copy_encodeBlockAsm4MB: - CMPL SI, $0x40 - JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB + CMPL SI, $0x00010000 + JB two_byte_offset_repeat_as_copy_encodeBlockAsm4MB + CMPL BX, $0x40 + JBE four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB MOVB $0xff, (AX) - MOVL DI, 1(AX) - LEAL -64(SI), SI + MOVL SI, 1(AX) + LEAL -64(BX), BX ADDQ $0x05, AX - CMPL SI, $0x04 - JL four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB + CMPL BX, $0x04 + JB four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB // emitRepeat - MOVL SI, R8 - LEAL -4(SI), SI - CMPL R8, $0x08 - JLE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy - CMPL R8, $0x0c - JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy - CMPL DI, $0x00000800 - JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy + MOVL BX, DI + LEAL -4(BX), BX + CMPL DI, $0x08 + JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy + CMPL DI, $0x0c + JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy + CMPL SI, $0x00000800 + JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - CMPL SI, $0x00000104 - JLT repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy - CMPL SI, $0x00010100 - JLT repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy - LEAL -65536(SI), SI - MOVL SI, DI + CMPL BX, $0x00000104 + JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy + CMPL BX, $0x00010100 + JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy + LEAL -65536(BX), BX + MOVL BX, SI MOVW $0x001d, (AX) - MOVW SI, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) + MOVW BX, 2(AX) + SARL $0x10, SI + MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - LEAL -256(SI), SI + LEAL -256(BX), BX MOVW $0x0019, (AX) - MOVW SI, 2(AX) + MOVW BX, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - LEAL -4(SI), SI + LEAL -4(BX), BX MOVW $0x0015, (AX) - MOVB SI, 2(AX) + MOVB BL, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - SHLL $0x02, SI - ORL $0x01, SI - MOVW SI, (AX) + SHLL $0x02, BX + ORL $0x01, BX + MOVW BX, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - XORQ R8, R8 - LEAL 1(R8)(SI*4), SI - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) + XORQ DI, DI + LEAL 1(DI)(BX*4), BX + MOVB SI, 1(AX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, BX + MOVB BL, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm4MB - JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm4MB four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB: - TESTL SI, SI + TESTL BX, BX JZ repeat_end_emit_encodeBlockAsm4MB - MOVB $0x03, BL - LEAL -4(BX)(SI*4), SI - MOVB SI, (AX) - MOVL DI, 1(AX) + XORL DI, DI + LEAL -1(DI)(BX*4), BX + MOVB BL, (AX) + MOVL SI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm4MB two_byte_offset_repeat_as_copy_encodeBlockAsm4MB: - CMPL SI, $0x40 - JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB - CMPL DI, $0x00000800 + CMPL BX, $0x40 + JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB + CMPL SI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm4MB - MOVL $0x00000001, R8 - LEAL 16(R8), R8 - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, R8 - MOVB R8, (AX) + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(AX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (AX) ADDQ $0x02, AX - SUBL $0x08, SI + SUBL $0x08, BX // emitRepeat - LEAL -4(SI), SI + LEAL -4(BX), BX JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - MOVL SI, R8 - LEAL -4(SI), SI - CMPL R8, $0x08 - JLE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - CMPL R8, $0x0c - JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - CMPL DI, $0x00000800 - JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b + MOVL BX, DI + LEAL -4(BX), BX + CMPL DI, $0x08 + JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b + CMPL DI, $0x0c + JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b + CMPL SI, $0x00000800 + JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - CMPL SI, $0x00000104 - JLT repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - CMPL SI, $0x00010100 - JLT repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - LEAL -65536(SI), SI - MOVL SI, DI + CMPL BX, $0x00000104 + JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b + CMPL BX, $0x00010100 + JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b + LEAL -65536(BX), BX + MOVL BX, SI MOVW $0x001d, (AX) - MOVW SI, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) + MOVW BX, 2(AX) + SARL $0x10, SI + MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -256(SI), SI + LEAL -256(BX), BX MOVW $0x0019, (AX) - MOVW SI, 2(AX) + MOVW BX, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -4(SI), SI + LEAL -4(BX), BX MOVW $0x0015, (AX) - MOVB SI, 2(AX) + MOVB BL, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - SHLL $0x02, SI - ORL $0x01, SI - MOVW SI, (AX) + SHLL $0x02, BX + ORL $0x01, BX + MOVW BX, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - XORQ R8, R8 - LEAL 1(R8)(SI*4), SI - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) + XORQ DI, DI + LEAL 1(DI)(BX*4), BX + MOVB SI, 1(AX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, BX + MOVB BL, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm4MB long_offset_short_repeat_as_copy_encodeBlockAsm4MB: MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(SI), SI + MOVW SI, 1(AX) + LEAL -60(BX), BX ADDQ $0x03, AX // emitRepeat - MOVL SI, R8 - LEAL -4(SI), SI - CMPL R8, $0x08 - JLE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - CMPL R8, $0x0c - JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - CMPL DI, $0x00000800 - JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short + MOVL BX, DI + LEAL -4(BX), BX + CMPL DI, $0x08 + JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short + CMPL DI, $0x0c + JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short + CMPL SI, $0x00000800 + JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - CMPL SI, $0x00000104 - JLT repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - CMPL SI, $0x00010100 - JLT repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - LEAL -65536(SI), SI - MOVL SI, DI + CMPL BX, $0x00000104 + JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short + CMPL BX, $0x00010100 + JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short + LEAL -65536(BX), BX + MOVL BX, SI MOVW $0x001d, (AX) - MOVW SI, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) + MOVW BX, 2(AX) + SARL $0x10, SI + MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - LEAL -256(SI), SI + LEAL -256(BX), BX MOVW $0x0019, (AX) - MOVW SI, 2(AX) + MOVW BX, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - LEAL -4(SI), SI + LEAL -4(BX), BX MOVW $0x0015, (AX) - MOVB SI, 2(AX) + MOVB BL, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - SHLL $0x02, SI - ORL $0x01, SI - MOVW SI, (AX) + SHLL $0x02, BX + ORL $0x01, BX + MOVW BX, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - XORQ R8, R8 - LEAL 1(R8)(SI*4), SI - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) + XORQ DI, DI + LEAL 1(DI)(BX*4), BX + MOVB SI, 1(AX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, BX + MOVB BL, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm4MB - JMP two_byte_offset_repeat_as_copy_encodeBlockAsm4MB two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB: - CMPL SI, $0x0c - JGE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB - CMPL DI, $0x00000800 - JGE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB - MOVB $0x01, BL - LEAL -16(BX)(SI*4), SI - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) + MOVL BX, DI + SHLL $0x02, DI + CMPL BX, $0x0c + JAE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB + CMPL SI, $0x00000800 + JAE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB + LEAL -15(DI), DI + MOVB SI, 1(AX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm4MB emit_copy_three_repeat_as_copy_encodeBlockAsm4MB: - MOVB $0x02, BL - LEAL -4(BX)(SI*4), SI - MOVB SI, (AX) - MOVW DI, 1(AX) + LEAL -2(DI), DI + MOVB DI, (AX) + MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm4MB: @@ -1989,16 +1970,16 @@ repeat_end_emit_encodeBlockAsm4MB: JMP search_loop_encodeBlockAsm4MB no_repeat_found_encodeBlockAsm4MB: - CMPL (DX)(SI*1), DI + CMPL (DX)(BX*1), SI JEQ candidate_match_encodeBlockAsm4MB - SHRQ $0x08, DI - MOVL 24(SP)(R10*4), SI - LEAL 2(CX), R9 - CMPL (DX)(R8*1), DI + SHRQ $0x08, SI + MOVL 24(SP)(R9*4), BX + LEAL 2(CX), R8 + CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm4MB - MOVL R9, 24(SP)(R10*4) - SHRQ $0x08, DI - CMPL (DX)(SI*1), DI + MOVL R8, 24(SP)(R9*4) + SHRQ $0x08, SI + CMPL (DX)(BX*1), SI JEQ candidate3_match_encodeBlockAsm4MB MOVL 20(SP), CX JMP search_loop_encodeBlockAsm4MB @@ -2008,506 +1989,502 @@ candidate3_match_encodeBlockAsm4MB: JMP candidate_match_encodeBlockAsm4MB candidate2_match_encodeBlockAsm4MB: - MOVL R9, 24(SP)(R10*4) + MOVL R8, 24(SP)(R9*4) INCL CX - MOVL R8, SI + MOVL DI, BX candidate_match_encodeBlockAsm4MB: - MOVL 12(SP), DI - TESTL SI, SI + MOVL 12(SP), SI + TESTL BX, BX JZ match_extend_back_end_encodeBlockAsm4MB match_extend_back_loop_encodeBlockAsm4MB: - CMPL CX, DI - JLE match_extend_back_end_encodeBlockAsm4MB - MOVB -1(DX)(SI*1), BL + CMPL CX, SI + JBE match_extend_back_end_encodeBlockAsm4MB + MOVB -1(DX)(BX*1), DI MOVB -1(DX)(CX*1), R8 - CMPB BL, R8 + CMPB DI, R8 JNE match_extend_back_end_encodeBlockAsm4MB LEAL -1(CX), CX - DECL SI + DECL BX JZ match_extend_back_end_encodeBlockAsm4MB JMP match_extend_back_loop_encodeBlockAsm4MB match_extend_back_end_encodeBlockAsm4MB: - MOVL CX, DI - SUBL 12(SP), DI - LEAQ 4(AX)(DI*1), DI - CMPQ DI, (SP) - JL match_dst_size_check_encodeBlockAsm4MB + MOVL CX, SI + SUBL 12(SP), SI + LEAQ 4(AX)(SI*1), SI + CMPQ SI, (SP) + JB match_dst_size_check_encodeBlockAsm4MB MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm4MB: - MOVL CX, DI - MOVL 12(SP), R8 - CMPL R8, DI + MOVL CX, SI + MOVL 12(SP), DI + CMPL DI, SI JEQ emit_literal_done_match_emit_encodeBlockAsm4MB + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(DI*1), SI + SUBL DI, R8 + LEAL -1(R8), DI + CMPL DI, $0x3c + JB one_byte_match_emit_encodeBlockAsm4MB + CMPL DI, $0x00000100 + JB two_bytes_match_emit_encodeBlockAsm4MB + CMPL DI, $0x00010000 + JB three_bytes_match_emit_encodeBlockAsm4MB MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(R8*1), DI - SUBL R8, R9 - LEAL -1(R9), R8 - CMPL R8, $0x3c - JLT one_byte_match_emit_encodeBlockAsm4MB - CMPL R8, $0x00000100 - JLT two_bytes_match_emit_encodeBlockAsm4MB - CMPL R8, $0x00010000 - JLT three_bytes_match_emit_encodeBlockAsm4MB - MOVL R8, R10 - SHRL $0x10, R10 + SHRL $0x10, R9 MOVB $0xf8, (AX) - MOVW R8, 1(AX) - MOVB R10, 3(AX) + MOVW DI, 1(AX) + MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_long_match_emit_encodeBlockAsm4MB three_bytes_match_emit_encodeBlockAsm4MB: MOVB $0xf4, (AX) - MOVW R8, 1(AX) + MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBlockAsm4MB two_bytes_match_emit_encodeBlockAsm4MB: MOVB $0xf0, (AX) - MOVB R8, 1(AX) + MOVB DI, 1(AX) ADDQ $0x02, AX - CMPL R8, $0x40 - JL memmove_match_emit_encodeBlockAsm4MB + CMPL DI, $0x40 + JB memmove_match_emit_encodeBlockAsm4MB JMP memmove_long_match_emit_encodeBlockAsm4MB one_byte_match_emit_encodeBlockAsm4MB: - SHLB $0x02, R8 - MOVB R8, (AX) + SHLB $0x02, DI + MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm4MB: - LEAQ (AX)(R9*1), R8 + LEAQ (AX)(R8*1), DI // genMemMoveShort - CMPQ R9, $0x08 - JLE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8 - CMPQ R9, $0x10 + CMPQ R8, $0x08 + JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8 + CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16 - CMPQ R9, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8: - MOVQ (DI), R10 - MOVQ R10, (AX) + MOVQ (SI), R9 + MOVQ R9, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16: - MOVQ (DI), R10 - MOVQ -8(DI)(R9*1), DI - MOVQ R10, (AX) - MOVQ DI, -8(AX)(R9*1) + MOVQ (SI), R9 + MOVQ -8(SI)(R8*1), SI + MOVQ R9, (AX) + MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32: - MOVOU (DI), X0 - MOVOU -16(DI)(R9*1), X1 + MOVOU (SI), X0 + MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) + MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64: - MOVOU (DI), X0 - MOVOU 16(DI), X1 - MOVOU -32(DI)(R9*1), X2 - MOVOU -16(DI)(R9*1), X3 + MOVOU (SI), X0 + MOVOU 16(SI), X1 + MOVOU -32(SI)(R8*1), X2 + MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeBlockAsm4MB: - MOVQ R8, AX + MOVQ DI, AX JMP emit_literal_done_match_emit_encodeBlockAsm4MB memmove_long_match_emit_encodeBlockAsm4MB: - LEAQ (AX)(R9*1), R8 + LEAQ (AX)(R8*1), DI // genMemMoveLong - MOVOU (DI), X0 - MOVOU 16(DI), X1 - MOVOU -32(DI)(R9*1), X2 - MOVOU -16(DI)(R9*1), X3 - MOVQ R9, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (SI), X0 + MOVOU 16(SI), X1 + MOVOU -32(SI)(R8*1), X2 + MOVOU -16(SI)(R8*1), X3 + MOVQ R8, R10 + SHRQ $0x05, R10 + MOVQ AX, R9 + ANDL $0x0000001f, R9 + MOVQ $0x00000040, R11 + SUBQ R9, R11 + DECQ R10 JA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(DI)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(SI)(R11*1), R9 + LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) - ADDQ $0x20, R13 - ADDQ $0x20, R10 + MOVOU (R9), X4 + MOVOU 16(R9), X5 + MOVOA X4, (R12) + MOVOA X5, 16(R12) ADDQ $0x20, R12 - DECQ R11 + ADDQ $0x20, R9 + ADDQ $0x20, R11 + DECQ R10 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(DI)(R12*1), X4 - MOVOU -16(DI)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R9, R12 + MOVOU -32(SI)(R11*1), X4 + MOVOU -16(SI)(R11*1), X5 + MOVOA X4, -32(AX)(R11*1) + MOVOA X5, -16(AX)(R11*1) + ADDQ $0x20, R11 + CMPQ R8, R11 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ R8, AX + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ DI, AX emit_literal_done_match_emit_encodeBlockAsm4MB: match_nolit_loop_encodeBlockAsm4MB: - MOVL CX, DI - SUBL SI, DI - MOVL DI, 16(SP) + MOVL CX, SI + SUBL BX, SI + MOVL SI, 16(SP) ADDL $0x04, CX - ADDL $0x04, SI - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(SI*1), SI + ADDL $0x04, BX + MOVQ src_len+32(FP), SI + SUBL CX, SI + LEAQ (DX)(CX*1), DI + LEAQ (DX)(BX*1), BX // matchLen - XORL R10, R10 - CMPL DI, $0x08 - JL matchlen_match4_match_nolit_encodeBlockAsm4MB + XORL R9, R9 + CMPL SI, $0x08 + JB matchlen_match4_match_nolit_encodeBlockAsm4MB matchlen_loopback_match_nolit_encodeBlockAsm4MB: - MOVQ (R8)(R10*1), R9 - XORQ (SI)(R10*1), R9 - TESTQ R9, R9 + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeBlockAsm4MB #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R8, R8 #else - BSFQ R9, R9 + BSFQ R8, R8 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R8 + LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm4MB matchlen_loop_match_nolit_encodeBlockAsm4MB: - LEAL -8(DI), DI - LEAL 8(R10), R10 - CMPL DI, $0x08 - JGE matchlen_loopback_match_nolit_encodeBlockAsm4MB + LEAL -8(SI), SI + LEAL 8(R9), R9 + CMPL SI, $0x08 + JAE matchlen_loopback_match_nolit_encodeBlockAsm4MB JZ match_nolit_end_encodeBlockAsm4MB matchlen_match4_match_nolit_encodeBlockAsm4MB: - CMPL DI, $0x04 - JL matchlen_match2_match_nolit_encodeBlockAsm4MB - MOVL (R8)(R10*1), R9 - CMPL (SI)(R10*1), R9 + CMPL SI, $0x04 + JB matchlen_match2_match_nolit_encodeBlockAsm4MB + MOVL (DI)(R9*1), R8 + CMPL (BX)(R9*1), R8 JNE matchlen_match2_match_nolit_encodeBlockAsm4MB - SUBL $0x04, DI - LEAL 4(R10), R10 + SUBL $0x04, SI + LEAL 4(R9), R9 matchlen_match2_match_nolit_encodeBlockAsm4MB: - CMPL DI, $0x02 - JL matchlen_match1_match_nolit_encodeBlockAsm4MB - MOVW (R8)(R10*1), R9 - CMPW (SI)(R10*1), R9 + CMPL SI, $0x02 + JB matchlen_match1_match_nolit_encodeBlockAsm4MB + MOVW (DI)(R9*1), R8 + CMPW (BX)(R9*1), R8 JNE matchlen_match1_match_nolit_encodeBlockAsm4MB - SUBL $0x02, DI - LEAL 2(R10), R10 + SUBL $0x02, SI + LEAL 2(R9), R9 matchlen_match1_match_nolit_encodeBlockAsm4MB: - CMPL DI, $0x01 - JL match_nolit_end_encodeBlockAsm4MB - MOVB (R8)(R10*1), R9 - CMPB (SI)(R10*1), R9 + CMPL SI, $0x01 + JB match_nolit_end_encodeBlockAsm4MB + MOVB (DI)(R9*1), R8 + CMPB (BX)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm4MB - LEAL 1(R10), R10 + LEAL 1(R9), R9 match_nolit_end_encodeBlockAsm4MB: - ADDL R10, CX - MOVL 16(SP), SI - ADDL $0x04, R10 + ADDL R9, CX + MOVL 16(SP), BX + ADDL $0x04, R9 MOVL CX, 12(SP) // emitCopy - CMPL SI, $0x00010000 - JL two_byte_offset_match_nolit_encodeBlockAsm4MB - -four_bytes_loop_back_match_nolit_encodeBlockAsm4MB: - CMPL R10, $0x40 - JLE four_bytes_remain_match_nolit_encodeBlockAsm4MB + CMPL BX, $0x00010000 + JB two_byte_offset_match_nolit_encodeBlockAsm4MB + CMPL R9, $0x40 + JBE four_bytes_remain_match_nolit_encodeBlockAsm4MB MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(R10), R10 + MOVL BX, 1(AX) + LEAL -64(R9), R9 ADDQ $0x05, AX - CMPL R10, $0x04 - JL four_bytes_remain_match_nolit_encodeBlockAsm4MB + CMPL R9, $0x04 + JB four_bytes_remain_match_nolit_encodeBlockAsm4MB // emitRepeat - MOVL R10, DI - LEAL -4(R10), R10 - CMPL DI, $0x08 - JLE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy - CMPL DI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy - CMPL SI, $0x00000800 - JLT repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy + MOVL R9, SI + LEAL -4(R9), R9 + CMPL SI, $0x08 + JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy + CMPL SI, $0x0c + JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy + CMPL BX, $0x00000800 + JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy: - CMPL R10, $0x00000104 - JLT repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy - CMPL R10, $0x00010100 - JLT repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy - LEAL -65536(R10), R10 - MOVL R10, SI + CMPL R9, $0x00000104 + JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy + CMPL R9, $0x00010100 + JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy + LEAL -65536(R9), R9 + MOVL R9, BX MOVW $0x001d, (AX) - MOVW R10, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) + MOVW R9, 2(AX) + SARL $0x10, BX + MOVB BL, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy: - LEAL -256(R10), R10 + LEAL -256(R9), R9 MOVW $0x0019, (AX) - MOVW R10, 2(AX) + MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy: - LEAL -4(R10), R10 + LEAL -4(R9), R9 MOVW $0x0015, (AX) - MOVB R10, 2(AX) + MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy: - SHLL $0x02, R10 - ORL $0x01, R10 - MOVW R10, (AX) + SHLL $0x02, R9 + ORL $0x01, R9 + MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy: - XORQ DI, DI - LEAL 1(DI)(R10*4), R10 - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, R10 - MOVB R10, (AX) + XORQ SI, SI + LEAL 1(SI)(R9*4), R9 + MOVB BL, 1(AX) + SARL $0x08, BX + SHLL $0x05, BX + ORL BX, R9 + MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB - JMP four_bytes_loop_back_match_nolit_encodeBlockAsm4MB four_bytes_remain_match_nolit_encodeBlockAsm4MB: - TESTL R10, R10 + TESTL R9, R9 JZ match_nolit_emitcopy_end_encodeBlockAsm4MB - MOVB $0x03, BL - LEAL -4(BX)(R10*4), R10 - MOVB R10, (AX) - MOVL SI, 1(AX) + XORL SI, SI + LEAL -1(SI)(R9*4), R9 + MOVB R9, (AX) + MOVL BX, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB two_byte_offset_match_nolit_encodeBlockAsm4MB: - CMPL R10, $0x40 - JLE two_byte_offset_short_match_nolit_encodeBlockAsm4MB - CMPL SI, $0x00000800 + CMPL R9, $0x40 + JBE two_byte_offset_short_match_nolit_encodeBlockAsm4MB + CMPL BX, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm4MB - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB BL, 1(AX) + SHRL $0x08, BX + SHLL $0x05, BX + ORL BX, SI + MOVB SI, (AX) ADDQ $0x02, AX - SUBL $0x08, R10 + SUBL $0x08, R9 // emitRepeat - LEAL -4(R10), R10 + LEAL -4(R9), R9 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - MOVL R10, DI - LEAL -4(R10), R10 - CMPL DI, $0x08 - JLE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - CMPL DI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - CMPL SI, $0x00000800 - JLT repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b + MOVL R9, SI + LEAL -4(R9), R9 + CMPL SI, $0x08 + JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b + CMPL SI, $0x0c + JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b + CMPL BX, $0x00000800 + JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - CMPL R10, $0x00000104 - JLT repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - CMPL R10, $0x00010100 - JLT repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - LEAL -65536(R10), R10 - MOVL R10, SI + CMPL R9, $0x00000104 + JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b + CMPL R9, $0x00010100 + JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b + LEAL -65536(R9), R9 + MOVL R9, BX MOVW $0x001d, (AX) - MOVW R10, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) + MOVW R9, 2(AX) + SARL $0x10, BX + MOVB BL, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -256(R10), R10 + LEAL -256(R9), R9 MOVW $0x0019, (AX) - MOVW R10, 2(AX) + MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -4(R10), R10 + LEAL -4(R9), R9 MOVW $0x0015, (AX) - MOVB R10, 2(AX) + MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - SHLL $0x02, R10 - ORL $0x01, R10 - MOVW R10, (AX) + SHLL $0x02, R9 + ORL $0x01, R9 + MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(R10*4), R10 - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, R10 - MOVB R10, (AX) + XORQ SI, SI + LEAL 1(SI)(R9*4), R9 + MOVB BL, 1(AX) + SARL $0x08, BX + SHLL $0x05, BX + ORL BX, R9 + MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB long_offset_short_match_nolit_encodeBlockAsm4MB: MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(R10), R10 + MOVW BX, 1(AX) + LEAL -60(R9), R9 ADDQ $0x03, AX // emitRepeat - MOVL R10, DI - LEAL -4(R10), R10 - CMPL DI, $0x08 - JLE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short - CMPL DI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short - CMPL SI, $0x00000800 - JLT repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short + MOVL R9, SI + LEAL -4(R9), R9 + CMPL SI, $0x08 + JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short + CMPL SI, $0x0c + JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short + CMPL BX, $0x00000800 + JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short: - CMPL R10, $0x00000104 - JLT repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short - CMPL R10, $0x00010100 - JLT repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short - LEAL -65536(R10), R10 - MOVL R10, SI + CMPL R9, $0x00000104 + JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short + CMPL R9, $0x00010100 + JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short + LEAL -65536(R9), R9 + MOVL R9, BX MOVW $0x001d, (AX) - MOVW R10, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) + MOVW R9, 2(AX) + SARL $0x10, BX + MOVB BL, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short: - LEAL -256(R10), R10 + LEAL -256(R9), R9 MOVW $0x0019, (AX) - MOVW R10, 2(AX) + MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short: - LEAL -4(R10), R10 + LEAL -4(R9), R9 MOVW $0x0015, (AX) - MOVB R10, 2(AX) + MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short: - SHLL $0x02, R10 - ORL $0x01, R10 - MOVW R10, (AX) + SHLL $0x02, R9 + ORL $0x01, R9 + MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(R10*4), R10 - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, R10 - MOVB R10, (AX) + XORQ SI, SI + LEAL 1(SI)(R9*4), R9 + MOVB BL, 1(AX) + SARL $0x08, BX + SHLL $0x05, BX + ORL BX, R9 + MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB - JMP two_byte_offset_match_nolit_encodeBlockAsm4MB two_byte_offset_short_match_nolit_encodeBlockAsm4MB: - CMPL R10, $0x0c - JGE emit_copy_three_match_nolit_encodeBlockAsm4MB - CMPL SI, $0x00000800 - JGE emit_copy_three_match_nolit_encodeBlockAsm4MB - MOVB $0x01, BL - LEAL -16(BX)(R10*4), R10 - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, R10 - MOVB R10, (AX) + MOVL R9, SI + SHLL $0x02, SI + CMPL R9, $0x0c + JAE emit_copy_three_match_nolit_encodeBlockAsm4MB + CMPL BX, $0x00000800 + JAE emit_copy_three_match_nolit_encodeBlockAsm4MB + LEAL -15(SI), SI + MOVB BL, 1(AX) + SHRL $0x08, BX + SHLL $0x05, BX + ORL BX, SI + MOVB SI, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB emit_copy_three_match_nolit_encodeBlockAsm4MB: - MOVB $0x02, BL - LEAL -4(BX)(R10*4), R10 - MOVB R10, (AX) - MOVW SI, 1(AX) + LEAL -2(SI), SI + MOVB SI, (AX) + MOVW BX, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm4MB: CMPL CX, 8(SP) - JGE emit_remainder_encodeBlockAsm4MB - MOVQ -2(DX)(CX*1), DI + JAE emit_remainder_encodeBlockAsm4MB + MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) - JL match_nolit_dst_ok_encodeBlockAsm4MB + JB match_nolit_dst_ok_encodeBlockAsm4MB MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm4MB: - MOVQ $0x0000cf1bbcdcbf9b, R9 - MOVQ DI, R8 - SHRQ $0x10, DI - MOVQ DI, SI - SHLQ $0x10, R8 - IMULQ R9, R8 - SHRQ $0x32, R8 - SHLQ $0x10, SI - IMULQ R9, SI - SHRQ $0x32, SI - LEAL -2(CX), R9 - LEAQ 24(SP)(SI*4), R10 - MOVL (R10), SI - MOVL R9, 24(SP)(R8*4) - MOVL CX, (R10) - CMPL (DX)(SI*1), DI + MOVQ $0x0000cf1bbcdcbf9b, R8 + MOVQ SI, DI + SHRQ $0x10, SI + MOVQ SI, BX + SHLQ $0x10, DI + IMULQ R8, DI + SHRQ $0x32, DI + SHLQ $0x10, BX + IMULQ R8, BX + SHRQ $0x32, BX + LEAL -2(CX), R8 + LEAQ 24(SP)(BX*4), R9 + MOVL (R9), BX + MOVL R8, 24(SP)(DI*4) + MOVL CX, (R9) + CMPL (DX)(BX*1), SI JEQ match_nolit_loop_encodeBlockAsm4MB INCL CX JMP search_loop_encodeBlockAsm4MB @@ -2517,7 +2494,7 @@ emit_remainder_encodeBlockAsm4MB: SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) - JL emit_remainder_ok_encodeBlockAsm4MB + JB emit_remainder_ok_encodeBlockAsm4MB MOVQ $0x00000000, ret+48(FP) RET @@ -2532,11 +2509,11 @@ emit_remainder_ok_encodeBlockAsm4MB: SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c - JLT one_byte_emit_remainder_encodeBlockAsm4MB + JB one_byte_emit_remainder_encodeBlockAsm4MB CMPL DX, $0x00000100 - JLT two_bytes_emit_remainder_encodeBlockAsm4MB + JB two_bytes_emit_remainder_encodeBlockAsm4MB CMPL DX, $0x00010000 - JLT three_bytes_emit_remainder_encodeBlockAsm4MB + JB three_bytes_emit_remainder_encodeBlockAsm4MB MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) @@ -2556,7 +2533,7 @@ two_bytes_emit_remainder_encodeBlockAsm4MB: MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 - JL memmove_emit_remainder_encodeBlockAsm4MB + JB memmove_emit_remainder_encodeBlockAsm4MB JMP memmove_long_emit_remainder_encodeBlockAsm4MB one_byte_emit_remainder_encodeBlockAsm4MB: @@ -2703,8 +2680,8 @@ zero_loop_encodeBlockAsm12B: MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX - LEAQ -8(CX), SI - MOVL SI, 8(SP) + LEAQ -8(CX), BX + MOVL BX, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX @@ -2714,428 +2691,429 @@ zero_loop_encodeBlockAsm12B: MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - SHRL $0x05, SI - LEAL 4(CX)(SI*1), SI - CMPL SI, 8(SP) - JGE emit_remainder_encodeBlockAsm12B - MOVQ (DX)(CX*1), DI - MOVL SI, 20(SP) - MOVQ $0x000000cf1bbcdcbb, R9 - MOVQ DI, R10 - MOVQ DI, R11 - SHRQ $0x08, R11 + MOVL CX, BX + SUBL 12(SP), BX + SHRL $0x05, BX + LEAL 4(CX)(BX*1), BX + CMPL BX, 8(SP) + JAE emit_remainder_encodeBlockAsm12B + MOVQ (DX)(CX*1), SI + MOVL BX, 20(SP) + MOVQ $0x000000cf1bbcdcbb, R8 + MOVQ SI, R9 + MOVQ SI, R10 + SHRQ $0x08, R10 + SHLQ $0x18, R9 + IMULQ R8, R9 + SHRQ $0x34, R9 SHLQ $0x18, R10 - IMULQ R9, R10 + IMULQ R8, R10 SHRQ $0x34, R10 - SHLQ $0x18, R11 - IMULQ R9, R11 - SHRQ $0x34, R11 - MOVL 24(SP)(R10*4), SI - MOVL 24(SP)(R11*4), R8 - MOVL CX, 24(SP)(R10*4) - LEAL 1(CX), R10 - MOVL R10, 24(SP)(R11*4) - MOVQ DI, R10 - SHRQ $0x10, R10 - SHLQ $0x18, R10 - IMULQ R9, R10 - SHRQ $0x34, R10 - MOVL CX, R9 - SUBL 16(SP), R9 - MOVL 1(DX)(R9*1), R11 - MOVQ DI, R9 - SHRQ $0x08, R9 - CMPL R9, R11 + MOVL 24(SP)(R9*4), BX + MOVL 24(SP)(R10*4), DI + MOVL CX, 24(SP)(R9*4) + LEAL 1(CX), R9 + MOVL R9, 24(SP)(R10*4) + MOVQ SI, R9 + SHRQ $0x10, R9 + SHLQ $0x18, R9 + IMULQ R8, R9 + SHRQ $0x34, R9 + MOVL CX, R8 + SUBL 16(SP), R8 + MOVL 1(DX)(R8*1), R10 + MOVQ SI, R8 + SHRQ $0x08, R8 + CMPL R8, R10 JNE no_repeat_found_encodeBlockAsm12B - LEAL 1(CX), DI - MOVL 12(SP), R8 - MOVL DI, SI - SUBL 16(SP), SI + LEAL 1(CX), SI + MOVL 12(SP), DI + MOVL SI, BX + SUBL 16(SP), BX JZ repeat_extend_back_end_encodeBlockAsm12B repeat_extend_back_loop_encodeBlockAsm12B: - CMPL DI, R8 - JLE repeat_extend_back_end_encodeBlockAsm12B - MOVB -1(DX)(SI*1), BL - MOVB -1(DX)(DI*1), R9 - CMPB BL, R9 + CMPL SI, DI + JBE repeat_extend_back_end_encodeBlockAsm12B + MOVB -1(DX)(BX*1), R8 + MOVB -1(DX)(SI*1), R9 + CMPB R8, R9 JNE repeat_extend_back_end_encodeBlockAsm12B - LEAL -1(DI), DI - DECL SI + LEAL -1(SI), SI + DECL BX JNZ repeat_extend_back_loop_encodeBlockAsm12B repeat_extend_back_end_encodeBlockAsm12B: - MOVL 12(SP), SI - CMPL SI, DI + MOVL 12(SP), BX + CMPL BX, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm12B - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R10 - SUBL SI, R9 - LEAL -1(R9), SI - CMPL SI, $0x3c - JLT one_byte_repeat_emit_encodeBlockAsm12B - CMPL SI, $0x00000100 - JLT two_bytes_repeat_emit_encodeBlockAsm12B + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R9 + SUBL BX, R8 + LEAL -1(R8), BX + CMPL BX, $0x3c + JB one_byte_repeat_emit_encodeBlockAsm12B + CMPL BX, $0x00000100 + JB two_bytes_repeat_emit_encodeBlockAsm12B + JB three_bytes_repeat_emit_encodeBlockAsm12B + +three_bytes_repeat_emit_encodeBlockAsm12B: MOVB $0xf4, (AX) - MOVW SI, 1(AX) + MOVW BX, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeBlockAsm12B two_bytes_repeat_emit_encodeBlockAsm12B: MOVB $0xf0, (AX) - MOVB SI, 1(AX) + MOVB BL, 1(AX) ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_repeat_emit_encodeBlockAsm12B + CMPL BX, $0x40 + JB memmove_repeat_emit_encodeBlockAsm12B JMP memmove_long_repeat_emit_encodeBlockAsm12B one_byte_repeat_emit_encodeBlockAsm12B: - SHLB $0x02, SI - MOVB SI, (AX) + SHLB $0x02, BL + MOVB BL, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm12B: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveShort - CMPQ R9, $0x08 - JLE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8 - CMPQ R9, $0x10 + CMPQ R8, $0x08 + JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16 - CMPQ R9, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8: - MOVQ (R10), R11 - MOVQ R11, (AX) + MOVQ (R9), R10 + MOVQ R10, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16: - MOVQ (R10), R11 - MOVQ -8(R10)(R9*1), R10 - MOVQ R11, (AX) - MOVQ R10, -8(AX)(R9*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (AX) + MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32: - MOVOU (R10), X0 - MOVOU -16(R10)(R9*1), X1 + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) + MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64: - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) memmove_end_copy_repeat_emit_encodeBlockAsm12B: - MOVQ SI, AX + MOVQ BX, AX JMP emit_literal_done_repeat_emit_encodeBlockAsm12B memmove_long_repeat_emit_encodeBlockAsm12B: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveLong - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 - MOVQ R9, R12 - SHRQ $0x05, R12 - MOVQ AX, R11 - ANDL $0x0000001f, R11 - MOVQ $0x00000040, R13 - SUBQ R11, R13 - DECQ R12 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ AX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R10)(R13*1), R11 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back: - MOVOU (R11), X4 - MOVOU 16(R11), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) - ADDQ $0x20, R14 - ADDQ $0x20, R11 + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) ADDQ $0x20, R13 - DECQ R12 + ADDQ $0x20, R10 + ADDQ $0x20, R12 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R10)(R13*1), X4 - MOVOU -16(R10)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R9, R13 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(AX)(R12*1) + MOVOA X5, -16(AX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ SI, AX + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ BX, AX emit_literal_done_repeat_emit_encodeBlockAsm12B: ADDL $0x05, CX - MOVL CX, SI - SUBL 16(SP), SI - MOVQ src_len+32(FP), R9 - SUBL CX, R9 - LEAQ (DX)(CX*1), R10 - LEAQ (DX)(SI*1), SI + MOVL CX, BX + SUBL 16(SP), BX + MOVQ src_len+32(FP), R8 + SUBL CX, R8 + LEAQ (DX)(CX*1), R9 + LEAQ (DX)(BX*1), BX // matchLen - XORL R12, R12 - CMPL R9, $0x08 - JL matchlen_match4_repeat_extend_encodeBlockAsm12B + XORL R11, R11 + CMPL R8, $0x08 + JB matchlen_match4_repeat_extend_encodeBlockAsm12B matchlen_loopback_repeat_extend_encodeBlockAsm12B: - MOVQ (R10)(R12*1), R11 - XORQ (SI)(R12*1), R11 - TESTQ R11, R11 + MOVQ (R9)(R11*1), R10 + XORQ (BX)(R11*1), R10 + TESTQ R10, R10 JZ matchlen_loop_repeat_extend_encodeBlockAsm12B #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R10, R10 #else - BSFQ R11, R11 + BSFQ R10, R10 #endif - SARQ $0x03, R11 - LEAL (R12)(R11*1), R12 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm12B matchlen_loop_repeat_extend_encodeBlockAsm12B: - LEAL -8(R9), R9 - LEAL 8(R12), R12 - CMPL R9, $0x08 - JGE matchlen_loopback_repeat_extend_encodeBlockAsm12B + LEAL -8(R8), R8 + LEAL 8(R11), R11 + CMPL R8, $0x08 + JAE matchlen_loopback_repeat_extend_encodeBlockAsm12B JZ repeat_extend_forward_end_encodeBlockAsm12B matchlen_match4_repeat_extend_encodeBlockAsm12B: - CMPL R9, $0x04 - JL matchlen_match2_repeat_extend_encodeBlockAsm12B - MOVL (R10)(R12*1), R11 - CMPL (SI)(R12*1), R11 + CMPL R8, $0x04 + JB matchlen_match2_repeat_extend_encodeBlockAsm12B + MOVL (R9)(R11*1), R10 + CMPL (BX)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeBlockAsm12B - SUBL $0x04, R9 - LEAL 4(R12), R12 + SUBL $0x04, R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeBlockAsm12B: - CMPL R9, $0x02 - JL matchlen_match1_repeat_extend_encodeBlockAsm12B - MOVW (R10)(R12*1), R11 - CMPW (SI)(R12*1), R11 + CMPL R8, $0x02 + JB matchlen_match1_repeat_extend_encodeBlockAsm12B + MOVW (R9)(R11*1), R10 + CMPW (BX)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeBlockAsm12B - SUBL $0x02, R9 - LEAL 2(R12), R12 + SUBL $0x02, R8 + LEAL 2(R11), R11 matchlen_match1_repeat_extend_encodeBlockAsm12B: - CMPL R9, $0x01 - JL repeat_extend_forward_end_encodeBlockAsm12B - MOVB (R10)(R12*1), R11 - CMPB (SI)(R12*1), R11 + CMPL R8, $0x01 + JB repeat_extend_forward_end_encodeBlockAsm12B + MOVB (R9)(R11*1), R10 + CMPB (BX)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm12B - LEAL 1(R12), R12 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeBlockAsm12B: - ADDL R12, CX - MOVL CX, SI - SUBL DI, SI - MOVL 16(SP), DI - TESTL R8, R8 + ADDL R11, CX + MOVL CX, BX + SUBL SI, BX + MOVL 16(SP), SI + TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm12B // emitRepeat - MOVL SI, R8 - LEAL -4(SI), SI - CMPL R8, $0x08 - JLE repeat_two_match_repeat_encodeBlockAsm12B - CMPL R8, $0x0c - JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm12B - CMPL DI, $0x00000800 - JLT repeat_two_offset_match_repeat_encodeBlockAsm12B + MOVL BX, DI + LEAL -4(BX), BX + CMPL DI, $0x08 + JBE repeat_two_match_repeat_encodeBlockAsm12B + CMPL DI, $0x0c + JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm12B + CMPL SI, $0x00000800 + JB repeat_two_offset_match_repeat_encodeBlockAsm12B cant_repeat_two_offset_match_repeat_encodeBlockAsm12B: - CMPL SI, $0x00000104 - JLT repeat_three_match_repeat_encodeBlockAsm12B - LEAL -256(SI), SI + CMPL BX, $0x00000104 + JB repeat_three_match_repeat_encodeBlockAsm12B + LEAL -256(BX), BX MOVW $0x0019, (AX) - MOVW SI, 2(AX) + MOVW BX, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_match_repeat_encodeBlockAsm12B: - LEAL -4(SI), SI + LEAL -4(BX), BX MOVW $0x0015, (AX) - MOVB SI, 2(AX) + MOVB BL, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_match_repeat_encodeBlockAsm12B: - SHLL $0x02, SI - ORL $0x01, SI - MOVW SI, (AX) + SHLL $0x02, BX + ORL $0x01, BX + MOVW BX, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_match_repeat_encodeBlockAsm12B: - XORQ R8, R8 - LEAL 1(R8)(SI*4), SI - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) + XORQ DI, DI + LEAL 1(DI)(BX*4), BX + MOVB SI, 1(AX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, BX + MOVB BL, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_as_copy_encodeBlockAsm12B: // emitCopy -two_byte_offset_repeat_as_copy_encodeBlockAsm12B: - CMPL SI, $0x40 - JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B - CMPL DI, $0x00000800 + CMPL BX, $0x40 + JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B + CMPL SI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm12B - MOVL $0x00000001, R8 - LEAL 16(R8), R8 - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, R8 - MOVB R8, (AX) + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(AX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (AX) ADDQ $0x02, AX - SUBL $0x08, SI + SUBL $0x08, BX // emitRepeat - LEAL -4(SI), SI + LEAL -4(BX), BX JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - MOVL SI, R8 - LEAL -4(SI), SI - CMPL R8, $0x08 - JLE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - CMPL R8, $0x0c - JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - CMPL DI, $0x00000800 - JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b + MOVL BX, DI + LEAL -4(BX), BX + CMPL DI, $0x08 + JBE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b + CMPL DI, $0x0c + JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b + CMPL SI, $0x00000800 + JB repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - CMPL SI, $0x00000104 - JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - LEAL -256(SI), SI + CMPL BX, $0x00000104 + JB repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b + LEAL -256(BX), BX MOVW $0x0019, (AX) - MOVW SI, 2(AX) + MOVW BX, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - LEAL -4(SI), SI + LEAL -4(BX), BX MOVW $0x0015, (AX) - MOVB SI, 2(AX) + MOVB BL, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - SHLL $0x02, SI - ORL $0x01, SI - MOVW SI, (AX) + SHLL $0x02, BX + ORL $0x01, BX + MOVW BX, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - XORQ R8, R8 - LEAL 1(R8)(SI*4), SI - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) + XORQ DI, DI + LEAL 1(DI)(BX*4), BX + MOVB SI, 1(AX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, BX + MOVB BL, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B long_offset_short_repeat_as_copy_encodeBlockAsm12B: MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(SI), SI + MOVW SI, 1(AX) + LEAL -60(BX), BX ADDQ $0x03, AX // emitRepeat - MOVL SI, R8 - LEAL -4(SI), SI - CMPL R8, $0x08 - JLE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - CMPL R8, $0x0c - JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - CMPL DI, $0x00000800 - JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short + MOVL BX, DI + LEAL -4(BX), BX + CMPL DI, $0x08 + JBE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short + CMPL DI, $0x0c + JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short + CMPL SI, $0x00000800 + JB repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - CMPL SI, $0x00000104 - JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - LEAL -256(SI), SI + CMPL BX, $0x00000104 + JB repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short + LEAL -256(BX), BX MOVW $0x0019, (AX) - MOVW SI, 2(AX) + MOVW BX, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - LEAL -4(SI), SI + LEAL -4(BX), BX MOVW $0x0015, (AX) - MOVB SI, 2(AX) + MOVB BL, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - SHLL $0x02, SI - ORL $0x01, SI - MOVW SI, (AX) + SHLL $0x02, BX + ORL $0x01, BX + MOVW BX, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - XORQ R8, R8 - LEAL 1(R8)(SI*4), SI - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) + XORQ DI, DI + LEAL 1(DI)(BX*4), BX + MOVB SI, 1(AX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, BX + MOVB BL, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B - JMP two_byte_offset_repeat_as_copy_encodeBlockAsm12B two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B: - CMPL SI, $0x0c - JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B - CMPL DI, $0x00000800 - JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B - MOVB $0x01, BL - LEAL -16(BX)(SI*4), SI - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) + MOVL BX, DI + SHLL $0x02, DI + CMPL BX, $0x0c + JAE emit_copy_three_repeat_as_copy_encodeBlockAsm12B + CMPL SI, $0x00000800 + JAE emit_copy_three_repeat_as_copy_encodeBlockAsm12B + LEAL -15(DI), DI + MOVB SI, 1(AX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B emit_copy_three_repeat_as_copy_encodeBlockAsm12B: - MOVB $0x02, BL - LEAL -4(BX)(SI*4), SI - MOVB SI, (AX) - MOVW DI, 1(AX) + LEAL -2(DI), DI + MOVB DI, (AX) + MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm12B: @@ -3143,16 +3121,16 @@ repeat_end_emit_encodeBlockAsm12B: JMP search_loop_encodeBlockAsm12B no_repeat_found_encodeBlockAsm12B: - CMPL (DX)(SI*1), DI + CMPL (DX)(BX*1), SI JEQ candidate_match_encodeBlockAsm12B - SHRQ $0x08, DI - MOVL 24(SP)(R10*4), SI - LEAL 2(CX), R9 - CMPL (DX)(R8*1), DI + SHRQ $0x08, SI + MOVL 24(SP)(R9*4), BX + LEAL 2(CX), R8 + CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm12B - MOVL R9, 24(SP)(R10*4) - SHRQ $0x08, DI - CMPL (DX)(SI*1), DI + MOVL R8, 24(SP)(R9*4) + SHRQ $0x08, SI + CMPL (DX)(BX*1), SI JEQ candidate3_match_encodeBlockAsm12B MOVL 20(SP), CX JMP search_loop_encodeBlockAsm12B @@ -3162,391 +3140,392 @@ candidate3_match_encodeBlockAsm12B: JMP candidate_match_encodeBlockAsm12B candidate2_match_encodeBlockAsm12B: - MOVL R9, 24(SP)(R10*4) + MOVL R8, 24(SP)(R9*4) INCL CX - MOVL R8, SI + MOVL DI, BX candidate_match_encodeBlockAsm12B: - MOVL 12(SP), DI - TESTL SI, SI + MOVL 12(SP), SI + TESTL BX, BX JZ match_extend_back_end_encodeBlockAsm12B match_extend_back_loop_encodeBlockAsm12B: - CMPL CX, DI - JLE match_extend_back_end_encodeBlockAsm12B - MOVB -1(DX)(SI*1), BL + CMPL CX, SI + JBE match_extend_back_end_encodeBlockAsm12B + MOVB -1(DX)(BX*1), DI MOVB -1(DX)(CX*1), R8 - CMPB BL, R8 + CMPB DI, R8 JNE match_extend_back_end_encodeBlockAsm12B LEAL -1(CX), CX - DECL SI + DECL BX JZ match_extend_back_end_encodeBlockAsm12B JMP match_extend_back_loop_encodeBlockAsm12B match_extend_back_end_encodeBlockAsm12B: - MOVL CX, DI - SUBL 12(SP), DI - LEAQ 3(AX)(DI*1), DI - CMPQ DI, (SP) - JL match_dst_size_check_encodeBlockAsm12B + MOVL CX, SI + SUBL 12(SP), SI + LEAQ 3(AX)(SI*1), SI + CMPQ SI, (SP) + JB match_dst_size_check_encodeBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm12B: - MOVL CX, DI - MOVL 12(SP), R8 - CMPL R8, DI + MOVL CX, SI + MOVL 12(SP), DI + CMPL DI, SI JEQ emit_literal_done_match_emit_encodeBlockAsm12B - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(R8*1), DI - SUBL R8, R9 - LEAL -1(R9), R8 - CMPL R8, $0x3c - JLT one_byte_match_emit_encodeBlockAsm12B - CMPL R8, $0x00000100 - JLT two_bytes_match_emit_encodeBlockAsm12B + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(DI*1), SI + SUBL DI, R8 + LEAL -1(R8), DI + CMPL DI, $0x3c + JB one_byte_match_emit_encodeBlockAsm12B + CMPL DI, $0x00000100 + JB two_bytes_match_emit_encodeBlockAsm12B + JB three_bytes_match_emit_encodeBlockAsm12B + +three_bytes_match_emit_encodeBlockAsm12B: MOVB $0xf4, (AX) - MOVW R8, 1(AX) + MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBlockAsm12B two_bytes_match_emit_encodeBlockAsm12B: MOVB $0xf0, (AX) - MOVB R8, 1(AX) + MOVB DI, 1(AX) ADDQ $0x02, AX - CMPL R8, $0x40 - JL memmove_match_emit_encodeBlockAsm12B + CMPL DI, $0x40 + JB memmove_match_emit_encodeBlockAsm12B JMP memmove_long_match_emit_encodeBlockAsm12B one_byte_match_emit_encodeBlockAsm12B: - SHLB $0x02, R8 - MOVB R8, (AX) + SHLB $0x02, DI + MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm12B: - LEAQ (AX)(R9*1), R8 + LEAQ (AX)(R8*1), DI // genMemMoveShort - CMPQ R9, $0x08 - JLE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8 - CMPQ R9, $0x10 + CMPQ R8, $0x08 + JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8 + CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16 - CMPQ R9, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8: - MOVQ (DI), R10 - MOVQ R10, (AX) + MOVQ (SI), R9 + MOVQ R9, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16: - MOVQ (DI), R10 - MOVQ -8(DI)(R9*1), DI - MOVQ R10, (AX) - MOVQ DI, -8(AX)(R9*1) + MOVQ (SI), R9 + MOVQ -8(SI)(R8*1), SI + MOVQ R9, (AX) + MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32: - MOVOU (DI), X0 - MOVOU -16(DI)(R9*1), X1 + MOVOU (SI), X0 + MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) + MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64: - MOVOU (DI), X0 - MOVOU 16(DI), X1 - MOVOU -32(DI)(R9*1), X2 - MOVOU -16(DI)(R9*1), X3 + MOVOU (SI), X0 + MOVOU 16(SI), X1 + MOVOU -32(SI)(R8*1), X2 + MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeBlockAsm12B: - MOVQ R8, AX + MOVQ DI, AX JMP emit_literal_done_match_emit_encodeBlockAsm12B memmove_long_match_emit_encodeBlockAsm12B: - LEAQ (AX)(R9*1), R8 + LEAQ (AX)(R8*1), DI // genMemMoveLong - MOVOU (DI), X0 - MOVOU 16(DI), X1 - MOVOU -32(DI)(R9*1), X2 - MOVOU -16(DI)(R9*1), X3 - MOVQ R9, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (SI), X0 + MOVOU 16(SI), X1 + MOVOU -32(SI)(R8*1), X2 + MOVOU -16(SI)(R8*1), X3 + MOVQ R8, R10 + SHRQ $0x05, R10 + MOVQ AX, R9 + ANDL $0x0000001f, R9 + MOVQ $0x00000040, R11 + SUBQ R9, R11 + DECQ R10 JA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(DI)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(SI)(R11*1), R9 + LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) - ADDQ $0x20, R13 - ADDQ $0x20, R10 + MOVOU (R9), X4 + MOVOU 16(R9), X5 + MOVOA X4, (R12) + MOVOA X5, 16(R12) ADDQ $0x20, R12 - DECQ R11 + ADDQ $0x20, R9 + ADDQ $0x20, R11 + DECQ R10 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(DI)(R12*1), X4 - MOVOU -16(DI)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R9, R12 + MOVOU -32(SI)(R11*1), X4 + MOVOU -16(SI)(R11*1), X5 + MOVOA X4, -32(AX)(R11*1) + MOVOA X5, -16(AX)(R11*1) + ADDQ $0x20, R11 + CMPQ R8, R11 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ R8, AX + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ DI, AX emit_literal_done_match_emit_encodeBlockAsm12B: match_nolit_loop_encodeBlockAsm12B: - MOVL CX, DI - SUBL SI, DI - MOVL DI, 16(SP) + MOVL CX, SI + SUBL BX, SI + MOVL SI, 16(SP) ADDL $0x04, CX - ADDL $0x04, SI - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(SI*1), SI + ADDL $0x04, BX + MOVQ src_len+32(FP), SI + SUBL CX, SI + LEAQ (DX)(CX*1), DI + LEAQ (DX)(BX*1), BX // matchLen - XORL R10, R10 - CMPL DI, $0x08 - JL matchlen_match4_match_nolit_encodeBlockAsm12B + XORL R9, R9 + CMPL SI, $0x08 + JB matchlen_match4_match_nolit_encodeBlockAsm12B matchlen_loopback_match_nolit_encodeBlockAsm12B: - MOVQ (R8)(R10*1), R9 - XORQ (SI)(R10*1), R9 - TESTQ R9, R9 + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeBlockAsm12B #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R8, R8 #else - BSFQ R9, R9 + BSFQ R8, R8 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R8 + LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm12B matchlen_loop_match_nolit_encodeBlockAsm12B: - LEAL -8(DI), DI - LEAL 8(R10), R10 - CMPL DI, $0x08 - JGE matchlen_loopback_match_nolit_encodeBlockAsm12B + LEAL -8(SI), SI + LEAL 8(R9), R9 + CMPL SI, $0x08 + JAE matchlen_loopback_match_nolit_encodeBlockAsm12B JZ match_nolit_end_encodeBlockAsm12B matchlen_match4_match_nolit_encodeBlockAsm12B: - CMPL DI, $0x04 - JL matchlen_match2_match_nolit_encodeBlockAsm12B - MOVL (R8)(R10*1), R9 - CMPL (SI)(R10*1), R9 + CMPL SI, $0x04 + JB matchlen_match2_match_nolit_encodeBlockAsm12B + MOVL (DI)(R9*1), R8 + CMPL (BX)(R9*1), R8 JNE matchlen_match2_match_nolit_encodeBlockAsm12B - SUBL $0x04, DI - LEAL 4(R10), R10 + SUBL $0x04, SI + LEAL 4(R9), R9 matchlen_match2_match_nolit_encodeBlockAsm12B: - CMPL DI, $0x02 - JL matchlen_match1_match_nolit_encodeBlockAsm12B - MOVW (R8)(R10*1), R9 - CMPW (SI)(R10*1), R9 + CMPL SI, $0x02 + JB matchlen_match1_match_nolit_encodeBlockAsm12B + MOVW (DI)(R9*1), R8 + CMPW (BX)(R9*1), R8 JNE matchlen_match1_match_nolit_encodeBlockAsm12B - SUBL $0x02, DI - LEAL 2(R10), R10 + SUBL $0x02, SI + LEAL 2(R9), R9 matchlen_match1_match_nolit_encodeBlockAsm12B: - CMPL DI, $0x01 - JL match_nolit_end_encodeBlockAsm12B - MOVB (R8)(R10*1), R9 - CMPB (SI)(R10*1), R9 + CMPL SI, $0x01 + JB match_nolit_end_encodeBlockAsm12B + MOVB (DI)(R9*1), R8 + CMPB (BX)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm12B - LEAL 1(R10), R10 + LEAL 1(R9), R9 match_nolit_end_encodeBlockAsm12B: - ADDL R10, CX - MOVL 16(SP), SI - ADDL $0x04, R10 + ADDL R9, CX + MOVL 16(SP), BX + ADDL $0x04, R9 MOVL CX, 12(SP) // emitCopy -two_byte_offset_match_nolit_encodeBlockAsm12B: - CMPL R10, $0x40 - JLE two_byte_offset_short_match_nolit_encodeBlockAsm12B - CMPL SI, $0x00000800 + CMPL R9, $0x40 + JBE two_byte_offset_short_match_nolit_encodeBlockAsm12B + CMPL BX, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm12B - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB BL, 1(AX) + SHRL $0x08, BX + SHLL $0x05, BX + ORL BX, SI + MOVB SI, (AX) ADDQ $0x02, AX - SUBL $0x08, R10 + SUBL $0x08, R9 // emitRepeat - LEAL -4(R10), R10 + LEAL -4(R9), R9 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - MOVL R10, DI - LEAL -4(R10), R10 - CMPL DI, $0x08 - JLE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - CMPL DI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - CMPL SI, $0x00000800 - JLT repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b + MOVL R9, SI + LEAL -4(R9), R9 + CMPL SI, $0x08 + JBE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b + CMPL SI, $0x0c + JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b + CMPL BX, $0x00000800 + JB repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - CMPL R10, $0x00000104 - JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - LEAL -256(R10), R10 + CMPL R9, $0x00000104 + JB repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b + LEAL -256(R9), R9 MOVW $0x0019, (AX) - MOVW R10, 2(AX) + MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - LEAL -4(R10), R10 + LEAL -4(R9), R9 MOVW $0x0015, (AX) - MOVB R10, 2(AX) + MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - SHLL $0x02, R10 - ORL $0x01, R10 - MOVW R10, (AX) + SHLL $0x02, R9 + ORL $0x01, R9 + MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(R10*4), R10 - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, R10 - MOVB R10, (AX) + XORQ SI, SI + LEAL 1(SI)(R9*4), R9 + MOVB BL, 1(AX) + SARL $0x08, BX + SHLL $0x05, BX + ORL BX, R9 + MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B long_offset_short_match_nolit_encodeBlockAsm12B: MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(R10), R10 + MOVW BX, 1(AX) + LEAL -60(R9), R9 ADDQ $0x03, AX // emitRepeat - MOVL R10, DI - LEAL -4(R10), R10 - CMPL DI, $0x08 - JLE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short - CMPL DI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short - CMPL SI, $0x00000800 - JLT repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short + MOVL R9, SI + LEAL -4(R9), R9 + CMPL SI, $0x08 + JBE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short + CMPL SI, $0x0c + JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short + CMPL BX, $0x00000800 + JB repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: - CMPL R10, $0x00000104 - JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short - LEAL -256(R10), R10 + CMPL R9, $0x00000104 + JB repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short + LEAL -256(R9), R9 MOVW $0x0019, (AX) - MOVW R10, 2(AX) + MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short: - LEAL -4(R10), R10 + LEAL -4(R9), R9 MOVW $0x0015, (AX) - MOVB R10, 2(AX) + MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short: - SHLL $0x02, R10 - ORL $0x01, R10 - MOVW R10, (AX) + SHLL $0x02, R9 + ORL $0x01, R9 + MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(R10*4), R10 - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, R10 - MOVB R10, (AX) + XORQ SI, SI + LEAL 1(SI)(R9*4), R9 + MOVB BL, 1(AX) + SARL $0x08, BX + SHLL $0x05, BX + ORL BX, R9 + MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B - JMP two_byte_offset_match_nolit_encodeBlockAsm12B two_byte_offset_short_match_nolit_encodeBlockAsm12B: - CMPL R10, $0x0c - JGE emit_copy_three_match_nolit_encodeBlockAsm12B - CMPL SI, $0x00000800 - JGE emit_copy_three_match_nolit_encodeBlockAsm12B - MOVB $0x01, BL - LEAL -16(BX)(R10*4), R10 - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, R10 - MOVB R10, (AX) + MOVL R9, SI + SHLL $0x02, SI + CMPL R9, $0x0c + JAE emit_copy_three_match_nolit_encodeBlockAsm12B + CMPL BX, $0x00000800 + JAE emit_copy_three_match_nolit_encodeBlockAsm12B + LEAL -15(SI), SI + MOVB BL, 1(AX) + SHRL $0x08, BX + SHLL $0x05, BX + ORL BX, SI + MOVB SI, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B emit_copy_three_match_nolit_encodeBlockAsm12B: - MOVB $0x02, BL - LEAL -4(BX)(R10*4), R10 - MOVB R10, (AX) - MOVW SI, 1(AX) + LEAL -2(SI), SI + MOVB SI, (AX) + MOVW BX, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm12B: CMPL CX, 8(SP) - JGE emit_remainder_encodeBlockAsm12B - MOVQ -2(DX)(CX*1), DI + JAE emit_remainder_encodeBlockAsm12B + MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) - JL match_nolit_dst_ok_encodeBlockAsm12B + JB match_nolit_dst_ok_encodeBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm12B: - MOVQ $0x000000cf1bbcdcbb, R9 - MOVQ DI, R8 - SHRQ $0x10, DI - MOVQ DI, SI - SHLQ $0x18, R8 - IMULQ R9, R8 - SHRQ $0x34, R8 - SHLQ $0x18, SI - IMULQ R9, SI - SHRQ $0x34, SI - LEAL -2(CX), R9 - LEAQ 24(SP)(SI*4), R10 - MOVL (R10), SI - MOVL R9, 24(SP)(R8*4) - MOVL CX, (R10) - CMPL (DX)(SI*1), DI + MOVQ $0x000000cf1bbcdcbb, R8 + MOVQ SI, DI + SHRQ $0x10, SI + MOVQ SI, BX + SHLQ $0x18, DI + IMULQ R8, DI + SHRQ $0x34, DI + SHLQ $0x18, BX + IMULQ R8, BX + SHRQ $0x34, BX + LEAL -2(CX), R8 + LEAQ 24(SP)(BX*4), R9 + MOVL (R9), BX + MOVL R8, 24(SP)(DI*4) + MOVL CX, (R9) + CMPL (DX)(BX*1), SI JEQ match_nolit_loop_encodeBlockAsm12B INCL CX JMP search_loop_encodeBlockAsm12B @@ -3556,7 +3535,7 @@ emit_remainder_encodeBlockAsm12B: SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) - JL emit_remainder_ok_encodeBlockAsm12B + JB emit_remainder_ok_encodeBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET @@ -3571,9 +3550,12 @@ emit_remainder_ok_encodeBlockAsm12B: SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c - JLT one_byte_emit_remainder_encodeBlockAsm12B + JB one_byte_emit_remainder_encodeBlockAsm12B CMPL DX, $0x00000100 - JLT two_bytes_emit_remainder_encodeBlockAsm12B + JB two_bytes_emit_remainder_encodeBlockAsm12B + JB three_bytes_emit_remainder_encodeBlockAsm12B + +three_bytes_emit_remainder_encodeBlockAsm12B: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX @@ -3584,7 +3566,7 @@ two_bytes_emit_remainder_encodeBlockAsm12B: MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 - JL memmove_emit_remainder_encodeBlockAsm12B + JB memmove_emit_remainder_encodeBlockAsm12B JMP memmove_long_emit_remainder_encodeBlockAsm12B one_byte_emit_remainder_encodeBlockAsm12B: @@ -3731,8 +3713,8 @@ zero_loop_encodeBlockAsm10B: MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX - LEAQ -8(CX), SI - MOVL SI, 8(SP) + LEAQ -8(CX), BX + MOVL BX, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX @@ -3742,428 +3724,429 @@ zero_loop_encodeBlockAsm10B: MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - SHRL $0x05, SI - LEAL 4(CX)(SI*1), SI - CMPL SI, 8(SP) - JGE emit_remainder_encodeBlockAsm10B - MOVQ (DX)(CX*1), DI - MOVL SI, 20(SP) - MOVQ $0x9e3779b1, R9 - MOVQ DI, R10 - MOVQ DI, R11 - SHRQ $0x08, R11 + MOVL CX, BX + SUBL 12(SP), BX + SHRL $0x05, BX + LEAL 4(CX)(BX*1), BX + CMPL BX, 8(SP) + JAE emit_remainder_encodeBlockAsm10B + MOVQ (DX)(CX*1), SI + MOVL BX, 20(SP) + MOVQ $0x9e3779b1, R8 + MOVQ SI, R9 + MOVQ SI, R10 + SHRQ $0x08, R10 + SHLQ $0x20, R9 + IMULQ R8, R9 + SHRQ $0x36, R9 SHLQ $0x20, R10 - IMULQ R9, R10 + IMULQ R8, R10 SHRQ $0x36, R10 - SHLQ $0x20, R11 - IMULQ R9, R11 - SHRQ $0x36, R11 - MOVL 24(SP)(R10*4), SI - MOVL 24(SP)(R11*4), R8 - MOVL CX, 24(SP)(R10*4) - LEAL 1(CX), R10 - MOVL R10, 24(SP)(R11*4) - MOVQ DI, R10 - SHRQ $0x10, R10 - SHLQ $0x20, R10 - IMULQ R9, R10 - SHRQ $0x36, R10 - MOVL CX, R9 - SUBL 16(SP), R9 - MOVL 1(DX)(R9*1), R11 - MOVQ DI, R9 - SHRQ $0x08, R9 - CMPL R9, R11 + MOVL 24(SP)(R9*4), BX + MOVL 24(SP)(R10*4), DI + MOVL CX, 24(SP)(R9*4) + LEAL 1(CX), R9 + MOVL R9, 24(SP)(R10*4) + MOVQ SI, R9 + SHRQ $0x10, R9 + SHLQ $0x20, R9 + IMULQ R8, R9 + SHRQ $0x36, R9 + MOVL CX, R8 + SUBL 16(SP), R8 + MOVL 1(DX)(R8*1), R10 + MOVQ SI, R8 + SHRQ $0x08, R8 + CMPL R8, R10 JNE no_repeat_found_encodeBlockAsm10B - LEAL 1(CX), DI - MOVL 12(SP), R8 - MOVL DI, SI - SUBL 16(SP), SI + LEAL 1(CX), SI + MOVL 12(SP), DI + MOVL SI, BX + SUBL 16(SP), BX JZ repeat_extend_back_end_encodeBlockAsm10B repeat_extend_back_loop_encodeBlockAsm10B: - CMPL DI, R8 - JLE repeat_extend_back_end_encodeBlockAsm10B - MOVB -1(DX)(SI*1), BL - MOVB -1(DX)(DI*1), R9 - CMPB BL, R9 + CMPL SI, DI + JBE repeat_extend_back_end_encodeBlockAsm10B + MOVB -1(DX)(BX*1), R8 + MOVB -1(DX)(SI*1), R9 + CMPB R8, R9 JNE repeat_extend_back_end_encodeBlockAsm10B - LEAL -1(DI), DI - DECL SI + LEAL -1(SI), SI + DECL BX JNZ repeat_extend_back_loop_encodeBlockAsm10B repeat_extend_back_end_encodeBlockAsm10B: - MOVL 12(SP), SI - CMPL SI, DI + MOVL 12(SP), BX + CMPL BX, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm10B - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R10 - SUBL SI, R9 - LEAL -1(R9), SI - CMPL SI, $0x3c - JLT one_byte_repeat_emit_encodeBlockAsm10B - CMPL SI, $0x00000100 - JLT two_bytes_repeat_emit_encodeBlockAsm10B + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R9 + SUBL BX, R8 + LEAL -1(R8), BX + CMPL BX, $0x3c + JB one_byte_repeat_emit_encodeBlockAsm10B + CMPL BX, $0x00000100 + JB two_bytes_repeat_emit_encodeBlockAsm10B + JB three_bytes_repeat_emit_encodeBlockAsm10B + +three_bytes_repeat_emit_encodeBlockAsm10B: MOVB $0xf4, (AX) - MOVW SI, 1(AX) + MOVW BX, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeBlockAsm10B two_bytes_repeat_emit_encodeBlockAsm10B: MOVB $0xf0, (AX) - MOVB SI, 1(AX) + MOVB BL, 1(AX) ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_repeat_emit_encodeBlockAsm10B + CMPL BX, $0x40 + JB memmove_repeat_emit_encodeBlockAsm10B JMP memmove_long_repeat_emit_encodeBlockAsm10B one_byte_repeat_emit_encodeBlockAsm10B: - SHLB $0x02, SI - MOVB SI, (AX) + SHLB $0x02, BL + MOVB BL, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm10B: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveShort - CMPQ R9, $0x08 - JLE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8 - CMPQ R9, $0x10 + CMPQ R8, $0x08 + JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16 - CMPQ R9, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8: - MOVQ (R10), R11 - MOVQ R11, (AX) + MOVQ (R9), R10 + MOVQ R10, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16: - MOVQ (R10), R11 - MOVQ -8(R10)(R9*1), R10 - MOVQ R11, (AX) - MOVQ R10, -8(AX)(R9*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (AX) + MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32: - MOVOU (R10), X0 - MOVOU -16(R10)(R9*1), X1 + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) + MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64: - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) memmove_end_copy_repeat_emit_encodeBlockAsm10B: - MOVQ SI, AX + MOVQ BX, AX JMP emit_literal_done_repeat_emit_encodeBlockAsm10B memmove_long_repeat_emit_encodeBlockAsm10B: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveLong - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 - MOVQ R9, R12 - SHRQ $0x05, R12 - MOVQ AX, R11 - ANDL $0x0000001f, R11 - MOVQ $0x00000040, R13 - SUBQ R11, R13 - DECQ R12 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ AX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R10)(R13*1), R11 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back: - MOVOU (R11), X4 - MOVOU 16(R11), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) - ADDQ $0x20, R14 - ADDQ $0x20, R11 + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) ADDQ $0x20, R13 - DECQ R12 + ADDQ $0x20, R10 + ADDQ $0x20, R12 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R10)(R13*1), X4 - MOVOU -16(R10)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R9, R13 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(AX)(R12*1) + MOVOA X5, -16(AX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ SI, AX + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ BX, AX emit_literal_done_repeat_emit_encodeBlockAsm10B: ADDL $0x05, CX - MOVL CX, SI - SUBL 16(SP), SI - MOVQ src_len+32(FP), R9 - SUBL CX, R9 - LEAQ (DX)(CX*1), R10 - LEAQ (DX)(SI*1), SI + MOVL CX, BX + SUBL 16(SP), BX + MOVQ src_len+32(FP), R8 + SUBL CX, R8 + LEAQ (DX)(CX*1), R9 + LEAQ (DX)(BX*1), BX // matchLen - XORL R12, R12 - CMPL R9, $0x08 - JL matchlen_match4_repeat_extend_encodeBlockAsm10B + XORL R11, R11 + CMPL R8, $0x08 + JB matchlen_match4_repeat_extend_encodeBlockAsm10B matchlen_loopback_repeat_extend_encodeBlockAsm10B: - MOVQ (R10)(R12*1), R11 - XORQ (SI)(R12*1), R11 - TESTQ R11, R11 + MOVQ (R9)(R11*1), R10 + XORQ (BX)(R11*1), R10 + TESTQ R10, R10 JZ matchlen_loop_repeat_extend_encodeBlockAsm10B #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R10, R10 #else - BSFQ R11, R11 + BSFQ R10, R10 #endif - SARQ $0x03, R11 - LEAL (R12)(R11*1), R12 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm10B matchlen_loop_repeat_extend_encodeBlockAsm10B: - LEAL -8(R9), R9 - LEAL 8(R12), R12 - CMPL R9, $0x08 - JGE matchlen_loopback_repeat_extend_encodeBlockAsm10B + LEAL -8(R8), R8 + LEAL 8(R11), R11 + CMPL R8, $0x08 + JAE matchlen_loopback_repeat_extend_encodeBlockAsm10B JZ repeat_extend_forward_end_encodeBlockAsm10B matchlen_match4_repeat_extend_encodeBlockAsm10B: - CMPL R9, $0x04 - JL matchlen_match2_repeat_extend_encodeBlockAsm10B - MOVL (R10)(R12*1), R11 - CMPL (SI)(R12*1), R11 + CMPL R8, $0x04 + JB matchlen_match2_repeat_extend_encodeBlockAsm10B + MOVL (R9)(R11*1), R10 + CMPL (BX)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeBlockAsm10B - SUBL $0x04, R9 - LEAL 4(R12), R12 + SUBL $0x04, R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeBlockAsm10B: - CMPL R9, $0x02 - JL matchlen_match1_repeat_extend_encodeBlockAsm10B - MOVW (R10)(R12*1), R11 - CMPW (SI)(R12*1), R11 + CMPL R8, $0x02 + JB matchlen_match1_repeat_extend_encodeBlockAsm10B + MOVW (R9)(R11*1), R10 + CMPW (BX)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeBlockAsm10B - SUBL $0x02, R9 - LEAL 2(R12), R12 + SUBL $0x02, R8 + LEAL 2(R11), R11 matchlen_match1_repeat_extend_encodeBlockAsm10B: - CMPL R9, $0x01 - JL repeat_extend_forward_end_encodeBlockAsm10B - MOVB (R10)(R12*1), R11 - CMPB (SI)(R12*1), R11 + CMPL R8, $0x01 + JB repeat_extend_forward_end_encodeBlockAsm10B + MOVB (R9)(R11*1), R10 + CMPB (BX)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm10B - LEAL 1(R12), R12 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeBlockAsm10B: - ADDL R12, CX - MOVL CX, SI - SUBL DI, SI - MOVL 16(SP), DI - TESTL R8, R8 + ADDL R11, CX + MOVL CX, BX + SUBL SI, BX + MOVL 16(SP), SI + TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm10B // emitRepeat - MOVL SI, R8 - LEAL -4(SI), SI - CMPL R8, $0x08 - JLE repeat_two_match_repeat_encodeBlockAsm10B - CMPL R8, $0x0c - JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm10B - CMPL DI, $0x00000800 - JLT repeat_two_offset_match_repeat_encodeBlockAsm10B + MOVL BX, DI + LEAL -4(BX), BX + CMPL DI, $0x08 + JBE repeat_two_match_repeat_encodeBlockAsm10B + CMPL DI, $0x0c + JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm10B + CMPL SI, $0x00000800 + JB repeat_two_offset_match_repeat_encodeBlockAsm10B cant_repeat_two_offset_match_repeat_encodeBlockAsm10B: - CMPL SI, $0x00000104 - JLT repeat_three_match_repeat_encodeBlockAsm10B - LEAL -256(SI), SI + CMPL BX, $0x00000104 + JB repeat_three_match_repeat_encodeBlockAsm10B + LEAL -256(BX), BX MOVW $0x0019, (AX) - MOVW SI, 2(AX) + MOVW BX, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_match_repeat_encodeBlockAsm10B: - LEAL -4(SI), SI + LEAL -4(BX), BX MOVW $0x0015, (AX) - MOVB SI, 2(AX) + MOVB BL, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_match_repeat_encodeBlockAsm10B: - SHLL $0x02, SI - ORL $0x01, SI - MOVW SI, (AX) + SHLL $0x02, BX + ORL $0x01, BX + MOVW BX, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_match_repeat_encodeBlockAsm10B: - XORQ R8, R8 - LEAL 1(R8)(SI*4), SI - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) + XORQ DI, DI + LEAL 1(DI)(BX*4), BX + MOVB SI, 1(AX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, BX + MOVB BL, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_as_copy_encodeBlockAsm10B: // emitCopy -two_byte_offset_repeat_as_copy_encodeBlockAsm10B: - CMPL SI, $0x40 - JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B - CMPL DI, $0x00000800 + CMPL BX, $0x40 + JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B + CMPL SI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm10B - MOVL $0x00000001, R8 - LEAL 16(R8), R8 - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, R8 - MOVB R8, (AX) + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(AX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (AX) ADDQ $0x02, AX - SUBL $0x08, SI + SUBL $0x08, BX // emitRepeat - LEAL -4(SI), SI + LEAL -4(BX), BX JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - MOVL SI, R8 - LEAL -4(SI), SI - CMPL R8, $0x08 - JLE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - CMPL R8, $0x0c - JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - CMPL DI, $0x00000800 - JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b + MOVL BX, DI + LEAL -4(BX), BX + CMPL DI, $0x08 + JBE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b + CMPL DI, $0x0c + JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b + CMPL SI, $0x00000800 + JB repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - CMPL SI, $0x00000104 - JLT repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - LEAL -256(SI), SI + CMPL BX, $0x00000104 + JB repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b + LEAL -256(BX), BX MOVW $0x0019, (AX) - MOVW SI, 2(AX) + MOVW BX, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - LEAL -4(SI), SI + LEAL -4(BX), BX MOVW $0x0015, (AX) - MOVB SI, 2(AX) + MOVB BL, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - SHLL $0x02, SI - ORL $0x01, SI - MOVW SI, (AX) + SHLL $0x02, BX + ORL $0x01, BX + MOVW BX, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - XORQ R8, R8 - LEAL 1(R8)(SI*4), SI - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) + XORQ DI, DI + LEAL 1(DI)(BX*4), BX + MOVB SI, 1(AX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, BX + MOVB BL, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B long_offset_short_repeat_as_copy_encodeBlockAsm10B: MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(SI), SI + MOVW SI, 1(AX) + LEAL -60(BX), BX ADDQ $0x03, AX // emitRepeat - MOVL SI, R8 - LEAL -4(SI), SI - CMPL R8, $0x08 - JLE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - CMPL R8, $0x0c - JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - CMPL DI, $0x00000800 - JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short + MOVL BX, DI + LEAL -4(BX), BX + CMPL DI, $0x08 + JBE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short + CMPL DI, $0x0c + JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short + CMPL SI, $0x00000800 + JB repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - CMPL SI, $0x00000104 - JLT repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - LEAL -256(SI), SI + CMPL BX, $0x00000104 + JB repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short + LEAL -256(BX), BX MOVW $0x0019, (AX) - MOVW SI, 2(AX) + MOVW BX, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - LEAL -4(SI), SI + LEAL -4(BX), BX MOVW $0x0015, (AX) - MOVB SI, 2(AX) + MOVB BL, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - SHLL $0x02, SI - ORL $0x01, SI - MOVW SI, (AX) + SHLL $0x02, BX + ORL $0x01, BX + MOVW BX, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - XORQ R8, R8 - LEAL 1(R8)(SI*4), SI - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) + XORQ DI, DI + LEAL 1(DI)(BX*4), BX + MOVB SI, 1(AX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, BX + MOVB BL, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B - JMP two_byte_offset_repeat_as_copy_encodeBlockAsm10B two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B: - CMPL SI, $0x0c - JGE emit_copy_three_repeat_as_copy_encodeBlockAsm10B - CMPL DI, $0x00000800 - JGE emit_copy_three_repeat_as_copy_encodeBlockAsm10B - MOVB $0x01, BL - LEAL -16(BX)(SI*4), SI - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) + MOVL BX, DI + SHLL $0x02, DI + CMPL BX, $0x0c + JAE emit_copy_three_repeat_as_copy_encodeBlockAsm10B + CMPL SI, $0x00000800 + JAE emit_copy_three_repeat_as_copy_encodeBlockAsm10B + LEAL -15(DI), DI + MOVB SI, 1(AX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B emit_copy_three_repeat_as_copy_encodeBlockAsm10B: - MOVB $0x02, BL - LEAL -4(BX)(SI*4), SI - MOVB SI, (AX) - MOVW DI, 1(AX) + LEAL -2(DI), DI + MOVB DI, (AX) + MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm10B: @@ -4171,16 +4154,16 @@ repeat_end_emit_encodeBlockAsm10B: JMP search_loop_encodeBlockAsm10B no_repeat_found_encodeBlockAsm10B: - CMPL (DX)(SI*1), DI + CMPL (DX)(BX*1), SI JEQ candidate_match_encodeBlockAsm10B - SHRQ $0x08, DI - MOVL 24(SP)(R10*4), SI - LEAL 2(CX), R9 - CMPL (DX)(R8*1), DI + SHRQ $0x08, SI + MOVL 24(SP)(R9*4), BX + LEAL 2(CX), R8 + CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm10B - MOVL R9, 24(SP)(R10*4) - SHRQ $0x08, DI - CMPL (DX)(SI*1), DI + MOVL R8, 24(SP)(R9*4) + SHRQ $0x08, SI + CMPL (DX)(BX*1), SI JEQ candidate3_match_encodeBlockAsm10B MOVL 20(SP), CX JMP search_loop_encodeBlockAsm10B @@ -4190,391 +4173,392 @@ candidate3_match_encodeBlockAsm10B: JMP candidate_match_encodeBlockAsm10B candidate2_match_encodeBlockAsm10B: - MOVL R9, 24(SP)(R10*4) + MOVL R8, 24(SP)(R9*4) INCL CX - MOVL R8, SI + MOVL DI, BX candidate_match_encodeBlockAsm10B: - MOVL 12(SP), DI - TESTL SI, SI + MOVL 12(SP), SI + TESTL BX, BX JZ match_extend_back_end_encodeBlockAsm10B match_extend_back_loop_encodeBlockAsm10B: - CMPL CX, DI - JLE match_extend_back_end_encodeBlockAsm10B - MOVB -1(DX)(SI*1), BL + CMPL CX, SI + JBE match_extend_back_end_encodeBlockAsm10B + MOVB -1(DX)(BX*1), DI MOVB -1(DX)(CX*1), R8 - CMPB BL, R8 + CMPB DI, R8 JNE match_extend_back_end_encodeBlockAsm10B LEAL -1(CX), CX - DECL SI + DECL BX JZ match_extend_back_end_encodeBlockAsm10B JMP match_extend_back_loop_encodeBlockAsm10B match_extend_back_end_encodeBlockAsm10B: - MOVL CX, DI - SUBL 12(SP), DI - LEAQ 3(AX)(DI*1), DI - CMPQ DI, (SP) - JL match_dst_size_check_encodeBlockAsm10B + MOVL CX, SI + SUBL 12(SP), SI + LEAQ 3(AX)(SI*1), SI + CMPQ SI, (SP) + JB match_dst_size_check_encodeBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm10B: - MOVL CX, DI - MOVL 12(SP), R8 - CMPL R8, DI + MOVL CX, SI + MOVL 12(SP), DI + CMPL DI, SI JEQ emit_literal_done_match_emit_encodeBlockAsm10B - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(R8*1), DI - SUBL R8, R9 - LEAL -1(R9), R8 - CMPL R8, $0x3c - JLT one_byte_match_emit_encodeBlockAsm10B - CMPL R8, $0x00000100 - JLT two_bytes_match_emit_encodeBlockAsm10B + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(DI*1), SI + SUBL DI, R8 + LEAL -1(R8), DI + CMPL DI, $0x3c + JB one_byte_match_emit_encodeBlockAsm10B + CMPL DI, $0x00000100 + JB two_bytes_match_emit_encodeBlockAsm10B + JB three_bytes_match_emit_encodeBlockAsm10B + +three_bytes_match_emit_encodeBlockAsm10B: MOVB $0xf4, (AX) - MOVW R8, 1(AX) + MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBlockAsm10B two_bytes_match_emit_encodeBlockAsm10B: MOVB $0xf0, (AX) - MOVB R8, 1(AX) + MOVB DI, 1(AX) ADDQ $0x02, AX - CMPL R8, $0x40 - JL memmove_match_emit_encodeBlockAsm10B + CMPL DI, $0x40 + JB memmove_match_emit_encodeBlockAsm10B JMP memmove_long_match_emit_encodeBlockAsm10B one_byte_match_emit_encodeBlockAsm10B: - SHLB $0x02, R8 - MOVB R8, (AX) + SHLB $0x02, DI + MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm10B: - LEAQ (AX)(R9*1), R8 + LEAQ (AX)(R8*1), DI // genMemMoveShort - CMPQ R9, $0x08 - JLE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8 - CMPQ R9, $0x10 + CMPQ R8, $0x08 + JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8 + CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16 - CMPQ R9, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8: - MOVQ (DI), R10 - MOVQ R10, (AX) + MOVQ (SI), R9 + MOVQ R9, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16: - MOVQ (DI), R10 - MOVQ -8(DI)(R9*1), DI - MOVQ R10, (AX) - MOVQ DI, -8(AX)(R9*1) + MOVQ (SI), R9 + MOVQ -8(SI)(R8*1), SI + MOVQ R9, (AX) + MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32: - MOVOU (DI), X0 - MOVOU -16(DI)(R9*1), X1 + MOVOU (SI), X0 + MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) + MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64: - MOVOU (DI), X0 - MOVOU 16(DI), X1 - MOVOU -32(DI)(R9*1), X2 - MOVOU -16(DI)(R9*1), X3 + MOVOU (SI), X0 + MOVOU 16(SI), X1 + MOVOU -32(SI)(R8*1), X2 + MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeBlockAsm10B: - MOVQ R8, AX + MOVQ DI, AX JMP emit_literal_done_match_emit_encodeBlockAsm10B memmove_long_match_emit_encodeBlockAsm10B: - LEAQ (AX)(R9*1), R8 + LEAQ (AX)(R8*1), DI // genMemMoveLong - MOVOU (DI), X0 - MOVOU 16(DI), X1 - MOVOU -32(DI)(R9*1), X2 - MOVOU -16(DI)(R9*1), X3 - MOVQ R9, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (SI), X0 + MOVOU 16(SI), X1 + MOVOU -32(SI)(R8*1), X2 + MOVOU -16(SI)(R8*1), X3 + MOVQ R8, R10 + SHRQ $0x05, R10 + MOVQ AX, R9 + ANDL $0x0000001f, R9 + MOVQ $0x00000040, R11 + SUBQ R9, R11 + DECQ R10 JA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(DI)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(SI)(R11*1), R9 + LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) - ADDQ $0x20, R13 - ADDQ $0x20, R10 + MOVOU (R9), X4 + MOVOU 16(R9), X5 + MOVOA X4, (R12) + MOVOA X5, 16(R12) ADDQ $0x20, R12 - DECQ R11 + ADDQ $0x20, R9 + ADDQ $0x20, R11 + DECQ R10 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(DI)(R12*1), X4 - MOVOU -16(DI)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R9, R12 + MOVOU -32(SI)(R11*1), X4 + MOVOU -16(SI)(R11*1), X5 + MOVOA X4, -32(AX)(R11*1) + MOVOA X5, -16(AX)(R11*1) + ADDQ $0x20, R11 + CMPQ R8, R11 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ R8, AX + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ DI, AX emit_literal_done_match_emit_encodeBlockAsm10B: match_nolit_loop_encodeBlockAsm10B: - MOVL CX, DI - SUBL SI, DI - MOVL DI, 16(SP) + MOVL CX, SI + SUBL BX, SI + MOVL SI, 16(SP) ADDL $0x04, CX - ADDL $0x04, SI - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(SI*1), SI + ADDL $0x04, BX + MOVQ src_len+32(FP), SI + SUBL CX, SI + LEAQ (DX)(CX*1), DI + LEAQ (DX)(BX*1), BX // matchLen - XORL R10, R10 - CMPL DI, $0x08 - JL matchlen_match4_match_nolit_encodeBlockAsm10B + XORL R9, R9 + CMPL SI, $0x08 + JB matchlen_match4_match_nolit_encodeBlockAsm10B matchlen_loopback_match_nolit_encodeBlockAsm10B: - MOVQ (R8)(R10*1), R9 - XORQ (SI)(R10*1), R9 - TESTQ R9, R9 + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeBlockAsm10B #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R8, R8 #else - BSFQ R9, R9 + BSFQ R8, R8 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R8 + LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm10B matchlen_loop_match_nolit_encodeBlockAsm10B: - LEAL -8(DI), DI - LEAL 8(R10), R10 - CMPL DI, $0x08 - JGE matchlen_loopback_match_nolit_encodeBlockAsm10B + LEAL -8(SI), SI + LEAL 8(R9), R9 + CMPL SI, $0x08 + JAE matchlen_loopback_match_nolit_encodeBlockAsm10B JZ match_nolit_end_encodeBlockAsm10B matchlen_match4_match_nolit_encodeBlockAsm10B: - CMPL DI, $0x04 - JL matchlen_match2_match_nolit_encodeBlockAsm10B - MOVL (R8)(R10*1), R9 - CMPL (SI)(R10*1), R9 + CMPL SI, $0x04 + JB matchlen_match2_match_nolit_encodeBlockAsm10B + MOVL (DI)(R9*1), R8 + CMPL (BX)(R9*1), R8 JNE matchlen_match2_match_nolit_encodeBlockAsm10B - SUBL $0x04, DI - LEAL 4(R10), R10 + SUBL $0x04, SI + LEAL 4(R9), R9 matchlen_match2_match_nolit_encodeBlockAsm10B: - CMPL DI, $0x02 - JL matchlen_match1_match_nolit_encodeBlockAsm10B - MOVW (R8)(R10*1), R9 - CMPW (SI)(R10*1), R9 + CMPL SI, $0x02 + JB matchlen_match1_match_nolit_encodeBlockAsm10B + MOVW (DI)(R9*1), R8 + CMPW (BX)(R9*1), R8 JNE matchlen_match1_match_nolit_encodeBlockAsm10B - SUBL $0x02, DI - LEAL 2(R10), R10 + SUBL $0x02, SI + LEAL 2(R9), R9 matchlen_match1_match_nolit_encodeBlockAsm10B: - CMPL DI, $0x01 - JL match_nolit_end_encodeBlockAsm10B - MOVB (R8)(R10*1), R9 - CMPB (SI)(R10*1), R9 + CMPL SI, $0x01 + JB match_nolit_end_encodeBlockAsm10B + MOVB (DI)(R9*1), R8 + CMPB (BX)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm10B - LEAL 1(R10), R10 + LEAL 1(R9), R9 match_nolit_end_encodeBlockAsm10B: - ADDL R10, CX - MOVL 16(SP), SI - ADDL $0x04, R10 + ADDL R9, CX + MOVL 16(SP), BX + ADDL $0x04, R9 MOVL CX, 12(SP) // emitCopy -two_byte_offset_match_nolit_encodeBlockAsm10B: - CMPL R10, $0x40 - JLE two_byte_offset_short_match_nolit_encodeBlockAsm10B - CMPL SI, $0x00000800 + CMPL R9, $0x40 + JBE two_byte_offset_short_match_nolit_encodeBlockAsm10B + CMPL BX, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm10B - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB BL, 1(AX) + SHRL $0x08, BX + SHLL $0x05, BX + ORL BX, SI + MOVB SI, (AX) ADDQ $0x02, AX - SUBL $0x08, R10 + SUBL $0x08, R9 // emitRepeat - LEAL -4(R10), R10 + LEAL -4(R9), R9 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - MOVL R10, DI - LEAL -4(R10), R10 - CMPL DI, $0x08 - JLE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - CMPL DI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - CMPL SI, $0x00000800 - JLT repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b + MOVL R9, SI + LEAL -4(R9), R9 + CMPL SI, $0x08 + JBE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b + CMPL SI, $0x0c + JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b + CMPL BX, $0x00000800 + JB repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - CMPL R10, $0x00000104 - JLT repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - LEAL -256(R10), R10 + CMPL R9, $0x00000104 + JB repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b + LEAL -256(R9), R9 MOVW $0x0019, (AX) - MOVW R10, 2(AX) + MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - LEAL -4(R10), R10 + LEAL -4(R9), R9 MOVW $0x0015, (AX) - MOVB R10, 2(AX) + MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - SHLL $0x02, R10 - ORL $0x01, R10 - MOVW R10, (AX) + SHLL $0x02, R9 + ORL $0x01, R9 + MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(R10*4), R10 - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, R10 - MOVB R10, (AX) + XORQ SI, SI + LEAL 1(SI)(R9*4), R9 + MOVB BL, 1(AX) + SARL $0x08, BX + SHLL $0x05, BX + ORL BX, R9 + MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B long_offset_short_match_nolit_encodeBlockAsm10B: MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(R10), R10 + MOVW BX, 1(AX) + LEAL -60(R9), R9 ADDQ $0x03, AX // emitRepeat - MOVL R10, DI - LEAL -4(R10), R10 - CMPL DI, $0x08 - JLE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short - CMPL DI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short - CMPL SI, $0x00000800 - JLT repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short + MOVL R9, SI + LEAL -4(R9), R9 + CMPL SI, $0x08 + JBE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short + CMPL SI, $0x0c + JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short + CMPL BX, $0x00000800 + JB repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: - CMPL R10, $0x00000104 - JLT repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short - LEAL -256(R10), R10 + CMPL R9, $0x00000104 + JB repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short + LEAL -256(R9), R9 MOVW $0x0019, (AX) - MOVW R10, 2(AX) + MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short: - LEAL -4(R10), R10 + LEAL -4(R9), R9 MOVW $0x0015, (AX) - MOVB R10, 2(AX) + MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short: - SHLL $0x02, R10 - ORL $0x01, R10 - MOVW R10, (AX) + SHLL $0x02, R9 + ORL $0x01, R9 + MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(R10*4), R10 - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, R10 - MOVB R10, (AX) + XORQ SI, SI + LEAL 1(SI)(R9*4), R9 + MOVB BL, 1(AX) + SARL $0x08, BX + SHLL $0x05, BX + ORL BX, R9 + MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B - JMP two_byte_offset_match_nolit_encodeBlockAsm10B two_byte_offset_short_match_nolit_encodeBlockAsm10B: - CMPL R10, $0x0c - JGE emit_copy_three_match_nolit_encodeBlockAsm10B - CMPL SI, $0x00000800 - JGE emit_copy_three_match_nolit_encodeBlockAsm10B - MOVB $0x01, BL - LEAL -16(BX)(R10*4), R10 - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, R10 - MOVB R10, (AX) + MOVL R9, SI + SHLL $0x02, SI + CMPL R9, $0x0c + JAE emit_copy_three_match_nolit_encodeBlockAsm10B + CMPL BX, $0x00000800 + JAE emit_copy_three_match_nolit_encodeBlockAsm10B + LEAL -15(SI), SI + MOVB BL, 1(AX) + SHRL $0x08, BX + SHLL $0x05, BX + ORL BX, SI + MOVB SI, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B emit_copy_three_match_nolit_encodeBlockAsm10B: - MOVB $0x02, BL - LEAL -4(BX)(R10*4), R10 - MOVB R10, (AX) - MOVW SI, 1(AX) + LEAL -2(SI), SI + MOVB SI, (AX) + MOVW BX, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm10B: CMPL CX, 8(SP) - JGE emit_remainder_encodeBlockAsm10B - MOVQ -2(DX)(CX*1), DI + JAE emit_remainder_encodeBlockAsm10B + MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) - JL match_nolit_dst_ok_encodeBlockAsm10B + JB match_nolit_dst_ok_encodeBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm10B: - MOVQ $0x9e3779b1, R9 - MOVQ DI, R8 - SHRQ $0x10, DI - MOVQ DI, SI - SHLQ $0x20, R8 - IMULQ R9, R8 - SHRQ $0x36, R8 - SHLQ $0x20, SI - IMULQ R9, SI - SHRQ $0x36, SI - LEAL -2(CX), R9 - LEAQ 24(SP)(SI*4), R10 - MOVL (R10), SI - MOVL R9, 24(SP)(R8*4) - MOVL CX, (R10) - CMPL (DX)(SI*1), DI + MOVQ $0x9e3779b1, R8 + MOVQ SI, DI + SHRQ $0x10, SI + MOVQ SI, BX + SHLQ $0x20, DI + IMULQ R8, DI + SHRQ $0x36, DI + SHLQ $0x20, BX + IMULQ R8, BX + SHRQ $0x36, BX + LEAL -2(CX), R8 + LEAQ 24(SP)(BX*4), R9 + MOVL (R9), BX + MOVL R8, 24(SP)(DI*4) + MOVL CX, (R9) + CMPL (DX)(BX*1), SI JEQ match_nolit_loop_encodeBlockAsm10B INCL CX JMP search_loop_encodeBlockAsm10B @@ -4584,7 +4568,7 @@ emit_remainder_encodeBlockAsm10B: SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) - JL emit_remainder_ok_encodeBlockAsm10B + JB emit_remainder_ok_encodeBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET @@ -4599,9 +4583,12 @@ emit_remainder_ok_encodeBlockAsm10B: SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c - JLT one_byte_emit_remainder_encodeBlockAsm10B + JB one_byte_emit_remainder_encodeBlockAsm10B CMPL DX, $0x00000100 - JLT two_bytes_emit_remainder_encodeBlockAsm10B + JB two_bytes_emit_remainder_encodeBlockAsm10B + JB three_bytes_emit_remainder_encodeBlockAsm10B + +three_bytes_emit_remainder_encodeBlockAsm10B: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX @@ -4612,7 +4599,7 @@ two_bytes_emit_remainder_encodeBlockAsm10B: MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 - JL memmove_emit_remainder_encodeBlockAsm10B + JB memmove_emit_remainder_encodeBlockAsm10B JMP memmove_long_emit_remainder_encodeBlockAsm10B one_byte_emit_remainder_encodeBlockAsm10B: @@ -4759,8 +4746,8 @@ zero_loop_encodeBlockAsm8B: MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX - LEAQ -8(CX), SI - MOVL SI, 8(SP) + LEAQ -8(CX), BX + MOVL BX, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX @@ -4770,414 +4757,415 @@ zero_loop_encodeBlockAsm8B: MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - SHRL $0x04, SI - LEAL 4(CX)(SI*1), SI - CMPL SI, 8(SP) - JGE emit_remainder_encodeBlockAsm8B - MOVQ (DX)(CX*1), DI - MOVL SI, 20(SP) - MOVQ $0x9e3779b1, R9 - MOVQ DI, R10 - MOVQ DI, R11 - SHRQ $0x08, R11 + MOVL CX, BX + SUBL 12(SP), BX + SHRL $0x04, BX + LEAL 4(CX)(BX*1), BX + CMPL BX, 8(SP) + JAE emit_remainder_encodeBlockAsm8B + MOVQ (DX)(CX*1), SI + MOVL BX, 20(SP) + MOVQ $0x9e3779b1, R8 + MOVQ SI, R9 + MOVQ SI, R10 + SHRQ $0x08, R10 + SHLQ $0x20, R9 + IMULQ R8, R9 + SHRQ $0x38, R9 SHLQ $0x20, R10 - IMULQ R9, R10 + IMULQ R8, R10 SHRQ $0x38, R10 - SHLQ $0x20, R11 - IMULQ R9, R11 - SHRQ $0x38, R11 - MOVL 24(SP)(R10*4), SI - MOVL 24(SP)(R11*4), R8 - MOVL CX, 24(SP)(R10*4) - LEAL 1(CX), R10 - MOVL R10, 24(SP)(R11*4) - MOVQ DI, R10 - SHRQ $0x10, R10 - SHLQ $0x20, R10 - IMULQ R9, R10 - SHRQ $0x38, R10 - MOVL CX, R9 - SUBL 16(SP), R9 - MOVL 1(DX)(R9*1), R11 - MOVQ DI, R9 - SHRQ $0x08, R9 - CMPL R9, R11 + MOVL 24(SP)(R9*4), BX + MOVL 24(SP)(R10*4), DI + MOVL CX, 24(SP)(R9*4) + LEAL 1(CX), R9 + MOVL R9, 24(SP)(R10*4) + MOVQ SI, R9 + SHRQ $0x10, R9 + SHLQ $0x20, R9 + IMULQ R8, R9 + SHRQ $0x38, R9 + MOVL CX, R8 + SUBL 16(SP), R8 + MOVL 1(DX)(R8*1), R10 + MOVQ SI, R8 + SHRQ $0x08, R8 + CMPL R8, R10 JNE no_repeat_found_encodeBlockAsm8B - LEAL 1(CX), DI - MOVL 12(SP), R8 - MOVL DI, SI - SUBL 16(SP), SI + LEAL 1(CX), SI + MOVL 12(SP), DI + MOVL SI, BX + SUBL 16(SP), BX JZ repeat_extend_back_end_encodeBlockAsm8B repeat_extend_back_loop_encodeBlockAsm8B: - CMPL DI, R8 - JLE repeat_extend_back_end_encodeBlockAsm8B - MOVB -1(DX)(SI*1), BL - MOVB -1(DX)(DI*1), R9 - CMPB BL, R9 + CMPL SI, DI + JBE repeat_extend_back_end_encodeBlockAsm8B + MOVB -1(DX)(BX*1), R8 + MOVB -1(DX)(SI*1), R9 + CMPB R8, R9 JNE repeat_extend_back_end_encodeBlockAsm8B - LEAL -1(DI), DI - DECL SI + LEAL -1(SI), SI + DECL BX JNZ repeat_extend_back_loop_encodeBlockAsm8B repeat_extend_back_end_encodeBlockAsm8B: - MOVL 12(SP), SI - CMPL SI, DI + MOVL 12(SP), BX + CMPL BX, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm8B - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R10 - SUBL SI, R9 - LEAL -1(R9), SI - CMPL SI, $0x3c - JLT one_byte_repeat_emit_encodeBlockAsm8B - CMPL SI, $0x00000100 - JLT two_bytes_repeat_emit_encodeBlockAsm8B + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R9 + SUBL BX, R8 + LEAL -1(R8), BX + CMPL BX, $0x3c + JB one_byte_repeat_emit_encodeBlockAsm8B + CMPL BX, $0x00000100 + JB two_bytes_repeat_emit_encodeBlockAsm8B + JB three_bytes_repeat_emit_encodeBlockAsm8B + +three_bytes_repeat_emit_encodeBlockAsm8B: MOVB $0xf4, (AX) - MOVW SI, 1(AX) + MOVW BX, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeBlockAsm8B two_bytes_repeat_emit_encodeBlockAsm8B: MOVB $0xf0, (AX) - MOVB SI, 1(AX) + MOVB BL, 1(AX) ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_repeat_emit_encodeBlockAsm8B + CMPL BX, $0x40 + JB memmove_repeat_emit_encodeBlockAsm8B JMP memmove_long_repeat_emit_encodeBlockAsm8B one_byte_repeat_emit_encodeBlockAsm8B: - SHLB $0x02, SI - MOVB SI, (AX) + SHLB $0x02, BL + MOVB BL, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm8B: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveShort - CMPQ R9, $0x08 - JLE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8 - CMPQ R9, $0x10 + CMPQ R8, $0x08 + JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16 - CMPQ R9, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8: - MOVQ (R10), R11 - MOVQ R11, (AX) + MOVQ (R9), R10 + MOVQ R10, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16: - MOVQ (R10), R11 - MOVQ -8(R10)(R9*1), R10 - MOVQ R11, (AX) - MOVQ R10, -8(AX)(R9*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (AX) + MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32: - MOVOU (R10), X0 - MOVOU -16(R10)(R9*1), X1 + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) + MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64: - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) memmove_end_copy_repeat_emit_encodeBlockAsm8B: - MOVQ SI, AX + MOVQ BX, AX JMP emit_literal_done_repeat_emit_encodeBlockAsm8B memmove_long_repeat_emit_encodeBlockAsm8B: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveLong - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 - MOVQ R9, R12 - SHRQ $0x05, R12 - MOVQ AX, R11 - ANDL $0x0000001f, R11 - MOVQ $0x00000040, R13 - SUBQ R11, R13 - DECQ R12 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ AX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R10)(R13*1), R11 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back: - MOVOU (R11), X4 - MOVOU 16(R11), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) - ADDQ $0x20, R14 - ADDQ $0x20, R11 + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) ADDQ $0x20, R13 - DECQ R12 + ADDQ $0x20, R10 + ADDQ $0x20, R12 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R10)(R13*1), X4 - MOVOU -16(R10)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R9, R13 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(AX)(R12*1) + MOVOA X5, -16(AX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ SI, AX + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ BX, AX emit_literal_done_repeat_emit_encodeBlockAsm8B: ADDL $0x05, CX - MOVL CX, SI - SUBL 16(SP), SI - MOVQ src_len+32(FP), R9 - SUBL CX, R9 - LEAQ (DX)(CX*1), R10 - LEAQ (DX)(SI*1), SI + MOVL CX, BX + SUBL 16(SP), BX + MOVQ src_len+32(FP), R8 + SUBL CX, R8 + LEAQ (DX)(CX*1), R9 + LEAQ (DX)(BX*1), BX // matchLen - XORL R12, R12 - CMPL R9, $0x08 - JL matchlen_match4_repeat_extend_encodeBlockAsm8B + XORL R11, R11 + CMPL R8, $0x08 + JB matchlen_match4_repeat_extend_encodeBlockAsm8B matchlen_loopback_repeat_extend_encodeBlockAsm8B: - MOVQ (R10)(R12*1), R11 - XORQ (SI)(R12*1), R11 - TESTQ R11, R11 + MOVQ (R9)(R11*1), R10 + XORQ (BX)(R11*1), R10 + TESTQ R10, R10 JZ matchlen_loop_repeat_extend_encodeBlockAsm8B #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R10, R10 #else - BSFQ R11, R11 + BSFQ R10, R10 #endif - SARQ $0x03, R11 - LEAL (R12)(R11*1), R12 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm8B matchlen_loop_repeat_extend_encodeBlockAsm8B: - LEAL -8(R9), R9 - LEAL 8(R12), R12 - CMPL R9, $0x08 - JGE matchlen_loopback_repeat_extend_encodeBlockAsm8B + LEAL -8(R8), R8 + LEAL 8(R11), R11 + CMPL R8, $0x08 + JAE matchlen_loopback_repeat_extend_encodeBlockAsm8B JZ repeat_extend_forward_end_encodeBlockAsm8B matchlen_match4_repeat_extend_encodeBlockAsm8B: - CMPL R9, $0x04 - JL matchlen_match2_repeat_extend_encodeBlockAsm8B - MOVL (R10)(R12*1), R11 - CMPL (SI)(R12*1), R11 + CMPL R8, $0x04 + JB matchlen_match2_repeat_extend_encodeBlockAsm8B + MOVL (R9)(R11*1), R10 + CMPL (BX)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeBlockAsm8B - SUBL $0x04, R9 - LEAL 4(R12), R12 + SUBL $0x04, R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeBlockAsm8B: - CMPL R9, $0x02 - JL matchlen_match1_repeat_extend_encodeBlockAsm8B - MOVW (R10)(R12*1), R11 - CMPW (SI)(R12*1), R11 + CMPL R8, $0x02 + JB matchlen_match1_repeat_extend_encodeBlockAsm8B + MOVW (R9)(R11*1), R10 + CMPW (BX)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeBlockAsm8B - SUBL $0x02, R9 - LEAL 2(R12), R12 + SUBL $0x02, R8 + LEAL 2(R11), R11 matchlen_match1_repeat_extend_encodeBlockAsm8B: - CMPL R9, $0x01 - JL repeat_extend_forward_end_encodeBlockAsm8B - MOVB (R10)(R12*1), R11 - CMPB (SI)(R12*1), R11 + CMPL R8, $0x01 + JB repeat_extend_forward_end_encodeBlockAsm8B + MOVB (R9)(R11*1), R10 + CMPB (BX)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm8B - LEAL 1(R12), R12 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeBlockAsm8B: - ADDL R12, CX - MOVL CX, SI - SUBL DI, SI - MOVL 16(SP), DI - TESTL R8, R8 + ADDL R11, CX + MOVL CX, BX + SUBL SI, BX + MOVL 16(SP), SI + TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm8B // emitRepeat - MOVL SI, DI - LEAL -4(SI), SI - CMPL DI, $0x08 - JLE repeat_two_match_repeat_encodeBlockAsm8B - CMPL DI, $0x0c - JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm8B + MOVL BX, SI + LEAL -4(BX), BX + CMPL SI, $0x08 + JBE repeat_two_match_repeat_encodeBlockAsm8B + CMPL SI, $0x0c + JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm8B cant_repeat_two_offset_match_repeat_encodeBlockAsm8B: - CMPL SI, $0x00000104 - JLT repeat_three_match_repeat_encodeBlockAsm8B - LEAL -256(SI), SI + CMPL BX, $0x00000104 + JB repeat_three_match_repeat_encodeBlockAsm8B + LEAL -256(BX), BX MOVW $0x0019, (AX) - MOVW SI, 2(AX) + MOVW BX, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_match_repeat_encodeBlockAsm8B: - LEAL -4(SI), SI + LEAL -4(BX), BX MOVW $0x0015, (AX) - MOVB SI, 2(AX) + MOVB BL, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_match_repeat_encodeBlockAsm8B: - SHLL $0x02, SI - ORL $0x01, SI - MOVW SI, (AX) + SHLL $0x02, BX + ORL $0x01, BX + MOVW BX, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B - XORQ R8, R8 - LEAL 1(R8)(SI*4), SI - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) + XORQ DI, DI + LEAL 1(DI)(BX*4), BX + MOVB SI, 1(AX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, BX + MOVB BL, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_as_copy_encodeBlockAsm8B: // emitCopy -two_byte_offset_repeat_as_copy_encodeBlockAsm8B: - CMPL SI, $0x40 - JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B - CMPL DI, $0x00000800 + CMPL BX, $0x40 + JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B + CMPL SI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm8B - MOVL $0x00000001, R8 - LEAL 16(R8), R8 - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, R8 - MOVB R8, (AX) + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(AX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (AX) ADDQ $0x02, AX - SUBL $0x08, SI + SUBL $0x08, BX // emitRepeat - LEAL -4(SI), SI + LEAL -4(BX), BX JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b - MOVL SI, DI - LEAL -4(SI), SI - CMPL DI, $0x08 - JLE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b - CMPL DI, $0x0c - JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b + MOVL BX, SI + LEAL -4(BX), BX + CMPL SI, $0x08 + JBE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b + CMPL SI, $0x0c + JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b: - CMPL SI, $0x00000104 - JLT repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b - LEAL -256(SI), SI + CMPL BX, $0x00000104 + JB repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b + LEAL -256(BX), BX MOVW $0x0019, (AX) - MOVW SI, 2(AX) + MOVW BX, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b: - LEAL -4(SI), SI + LEAL -4(BX), BX MOVW $0x0015, (AX) - MOVB SI, 2(AX) + MOVB BL, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b: - SHLL $0x02, SI - ORL $0x01, SI - MOVW SI, (AX) + SHLL $0x02, BX + ORL $0x01, BX + MOVW BX, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B - XORQ R8, R8 - LEAL 1(R8)(SI*4), SI - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) + XORQ DI, DI + LEAL 1(DI)(BX*4), BX + MOVB SI, 1(AX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, BX + MOVB BL, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B long_offset_short_repeat_as_copy_encodeBlockAsm8B: MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(SI), SI + MOVW SI, 1(AX) + LEAL -60(BX), BX ADDQ $0x03, AX // emitRepeat - MOVL SI, DI - LEAL -4(SI), SI - CMPL DI, $0x08 - JLE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short - CMPL DI, $0x0c - JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short + MOVL BX, SI + LEAL -4(BX), BX + CMPL SI, $0x08 + JBE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short + CMPL SI, $0x0c + JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: - CMPL SI, $0x00000104 - JLT repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short - LEAL -256(SI), SI + CMPL BX, $0x00000104 + JB repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short + LEAL -256(BX), BX MOVW $0x0019, (AX) - MOVW SI, 2(AX) + MOVW BX, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: - LEAL -4(SI), SI + LEAL -4(BX), BX MOVW $0x0015, (AX) - MOVB SI, 2(AX) + MOVB BL, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: - SHLL $0x02, SI - ORL $0x01, SI - MOVW SI, (AX) + SHLL $0x02, BX + ORL $0x01, BX + MOVW BX, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B - XORQ R8, R8 - LEAL 1(R8)(SI*4), SI - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) + XORQ DI, DI + LEAL 1(DI)(BX*4), BX + MOVB SI, 1(AX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, BX + MOVB BL, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B - JMP two_byte_offset_repeat_as_copy_encodeBlockAsm8B two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B: - CMPL SI, $0x0c - JGE emit_copy_three_repeat_as_copy_encodeBlockAsm8B - MOVB $0x01, BL - LEAL -16(BX)(SI*4), SI - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) + MOVL BX, DI + SHLL $0x02, DI + CMPL BX, $0x0c + JAE emit_copy_three_repeat_as_copy_encodeBlockAsm8B + LEAL -15(DI), DI + MOVB SI, 1(AX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B emit_copy_three_repeat_as_copy_encodeBlockAsm8B: - MOVB $0x02, BL - LEAL -4(BX)(SI*4), SI - MOVB SI, (AX) - MOVW DI, 1(AX) + LEAL -2(DI), DI + MOVB DI, (AX) + MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm8B: @@ -5185,16 +5173,16 @@ repeat_end_emit_encodeBlockAsm8B: JMP search_loop_encodeBlockAsm8B no_repeat_found_encodeBlockAsm8B: - CMPL (DX)(SI*1), DI + CMPL (DX)(BX*1), SI JEQ candidate_match_encodeBlockAsm8B - SHRQ $0x08, DI - MOVL 24(SP)(R10*4), SI - LEAL 2(CX), R9 - CMPL (DX)(R8*1), DI + SHRQ $0x08, SI + MOVL 24(SP)(R9*4), BX + LEAL 2(CX), R8 + CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm8B - MOVL R9, 24(SP)(R10*4) - SHRQ $0x08, DI - CMPL (DX)(SI*1), DI + MOVL R8, 24(SP)(R9*4) + SHRQ $0x08, SI + CMPL (DX)(BX*1), SI JEQ candidate3_match_encodeBlockAsm8B MOVL 20(SP), CX JMP search_loop_encodeBlockAsm8B @@ -5204,381 +5192,382 @@ candidate3_match_encodeBlockAsm8B: JMP candidate_match_encodeBlockAsm8B candidate2_match_encodeBlockAsm8B: - MOVL R9, 24(SP)(R10*4) + MOVL R8, 24(SP)(R9*4) INCL CX - MOVL R8, SI + MOVL DI, BX candidate_match_encodeBlockAsm8B: - MOVL 12(SP), DI - TESTL SI, SI + MOVL 12(SP), SI + TESTL BX, BX JZ match_extend_back_end_encodeBlockAsm8B match_extend_back_loop_encodeBlockAsm8B: - CMPL CX, DI - JLE match_extend_back_end_encodeBlockAsm8B - MOVB -1(DX)(SI*1), BL + CMPL CX, SI + JBE match_extend_back_end_encodeBlockAsm8B + MOVB -1(DX)(BX*1), DI MOVB -1(DX)(CX*1), R8 - CMPB BL, R8 + CMPB DI, R8 JNE match_extend_back_end_encodeBlockAsm8B LEAL -1(CX), CX - DECL SI + DECL BX JZ match_extend_back_end_encodeBlockAsm8B JMP match_extend_back_loop_encodeBlockAsm8B match_extend_back_end_encodeBlockAsm8B: - MOVL CX, DI - SUBL 12(SP), DI - LEAQ 3(AX)(DI*1), DI - CMPQ DI, (SP) - JL match_dst_size_check_encodeBlockAsm8B + MOVL CX, SI + SUBL 12(SP), SI + LEAQ 3(AX)(SI*1), SI + CMPQ SI, (SP) + JB match_dst_size_check_encodeBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm8B: - MOVL CX, DI - MOVL 12(SP), R8 - CMPL R8, DI + MOVL CX, SI + MOVL 12(SP), DI + CMPL DI, SI JEQ emit_literal_done_match_emit_encodeBlockAsm8B - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(R8*1), DI - SUBL R8, R9 - LEAL -1(R9), R8 - CMPL R8, $0x3c - JLT one_byte_match_emit_encodeBlockAsm8B - CMPL R8, $0x00000100 - JLT two_bytes_match_emit_encodeBlockAsm8B + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(DI*1), SI + SUBL DI, R8 + LEAL -1(R8), DI + CMPL DI, $0x3c + JB one_byte_match_emit_encodeBlockAsm8B + CMPL DI, $0x00000100 + JB two_bytes_match_emit_encodeBlockAsm8B + JB three_bytes_match_emit_encodeBlockAsm8B + +three_bytes_match_emit_encodeBlockAsm8B: MOVB $0xf4, (AX) - MOVW R8, 1(AX) + MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBlockAsm8B two_bytes_match_emit_encodeBlockAsm8B: MOVB $0xf0, (AX) - MOVB R8, 1(AX) + MOVB DI, 1(AX) ADDQ $0x02, AX - CMPL R8, $0x40 - JL memmove_match_emit_encodeBlockAsm8B + CMPL DI, $0x40 + JB memmove_match_emit_encodeBlockAsm8B JMP memmove_long_match_emit_encodeBlockAsm8B one_byte_match_emit_encodeBlockAsm8B: - SHLB $0x02, R8 - MOVB R8, (AX) + SHLB $0x02, DI + MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm8B: - LEAQ (AX)(R9*1), R8 + LEAQ (AX)(R8*1), DI // genMemMoveShort - CMPQ R9, $0x08 - JLE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8 - CMPQ R9, $0x10 + CMPQ R8, $0x08 + JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8 + CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16 - CMPQ R9, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8: - MOVQ (DI), R10 - MOVQ R10, (AX) + MOVQ (SI), R9 + MOVQ R9, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16: - MOVQ (DI), R10 - MOVQ -8(DI)(R9*1), DI - MOVQ R10, (AX) - MOVQ DI, -8(AX)(R9*1) + MOVQ (SI), R9 + MOVQ -8(SI)(R8*1), SI + MOVQ R9, (AX) + MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32: - MOVOU (DI), X0 - MOVOU -16(DI)(R9*1), X1 + MOVOU (SI), X0 + MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) + MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64: - MOVOU (DI), X0 - MOVOU 16(DI), X1 - MOVOU -32(DI)(R9*1), X2 - MOVOU -16(DI)(R9*1), X3 + MOVOU (SI), X0 + MOVOU 16(SI), X1 + MOVOU -32(SI)(R8*1), X2 + MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeBlockAsm8B: - MOVQ R8, AX + MOVQ DI, AX JMP emit_literal_done_match_emit_encodeBlockAsm8B memmove_long_match_emit_encodeBlockAsm8B: - LEAQ (AX)(R9*1), R8 + LEAQ (AX)(R8*1), DI // genMemMoveLong - MOVOU (DI), X0 - MOVOU 16(DI), X1 - MOVOU -32(DI)(R9*1), X2 - MOVOU -16(DI)(R9*1), X3 - MOVQ R9, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (SI), X0 + MOVOU 16(SI), X1 + MOVOU -32(SI)(R8*1), X2 + MOVOU -16(SI)(R8*1), X3 + MOVQ R8, R10 + SHRQ $0x05, R10 + MOVQ AX, R9 + ANDL $0x0000001f, R9 + MOVQ $0x00000040, R11 + SUBQ R9, R11 + DECQ R10 JA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(DI)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(SI)(R11*1), R9 + LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) - ADDQ $0x20, R13 - ADDQ $0x20, R10 + MOVOU (R9), X4 + MOVOU 16(R9), X5 + MOVOA X4, (R12) + MOVOA X5, 16(R12) ADDQ $0x20, R12 - DECQ R11 + ADDQ $0x20, R9 + ADDQ $0x20, R11 + DECQ R10 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(DI)(R12*1), X4 - MOVOU -16(DI)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R9, R12 + MOVOU -32(SI)(R11*1), X4 + MOVOU -16(SI)(R11*1), X5 + MOVOA X4, -32(AX)(R11*1) + MOVOA X5, -16(AX)(R11*1) + ADDQ $0x20, R11 + CMPQ R8, R11 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ R8, AX + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ DI, AX emit_literal_done_match_emit_encodeBlockAsm8B: match_nolit_loop_encodeBlockAsm8B: - MOVL CX, DI - SUBL SI, DI - MOVL DI, 16(SP) + MOVL CX, SI + SUBL BX, SI + MOVL SI, 16(SP) ADDL $0x04, CX - ADDL $0x04, SI - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(SI*1), SI + ADDL $0x04, BX + MOVQ src_len+32(FP), SI + SUBL CX, SI + LEAQ (DX)(CX*1), DI + LEAQ (DX)(BX*1), BX // matchLen - XORL R10, R10 - CMPL DI, $0x08 - JL matchlen_match4_match_nolit_encodeBlockAsm8B + XORL R9, R9 + CMPL SI, $0x08 + JB matchlen_match4_match_nolit_encodeBlockAsm8B matchlen_loopback_match_nolit_encodeBlockAsm8B: - MOVQ (R8)(R10*1), R9 - XORQ (SI)(R10*1), R9 - TESTQ R9, R9 + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeBlockAsm8B #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R8, R8 #else - BSFQ R9, R9 + BSFQ R8, R8 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R8 + LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm8B matchlen_loop_match_nolit_encodeBlockAsm8B: - LEAL -8(DI), DI - LEAL 8(R10), R10 - CMPL DI, $0x08 - JGE matchlen_loopback_match_nolit_encodeBlockAsm8B + LEAL -8(SI), SI + LEAL 8(R9), R9 + CMPL SI, $0x08 + JAE matchlen_loopback_match_nolit_encodeBlockAsm8B JZ match_nolit_end_encodeBlockAsm8B matchlen_match4_match_nolit_encodeBlockAsm8B: - CMPL DI, $0x04 - JL matchlen_match2_match_nolit_encodeBlockAsm8B - MOVL (R8)(R10*1), R9 - CMPL (SI)(R10*1), R9 + CMPL SI, $0x04 + JB matchlen_match2_match_nolit_encodeBlockAsm8B + MOVL (DI)(R9*1), R8 + CMPL (BX)(R9*1), R8 JNE matchlen_match2_match_nolit_encodeBlockAsm8B - SUBL $0x04, DI - LEAL 4(R10), R10 + SUBL $0x04, SI + LEAL 4(R9), R9 matchlen_match2_match_nolit_encodeBlockAsm8B: - CMPL DI, $0x02 - JL matchlen_match1_match_nolit_encodeBlockAsm8B - MOVW (R8)(R10*1), R9 - CMPW (SI)(R10*1), R9 + CMPL SI, $0x02 + JB matchlen_match1_match_nolit_encodeBlockAsm8B + MOVW (DI)(R9*1), R8 + CMPW (BX)(R9*1), R8 JNE matchlen_match1_match_nolit_encodeBlockAsm8B - SUBL $0x02, DI - LEAL 2(R10), R10 + SUBL $0x02, SI + LEAL 2(R9), R9 matchlen_match1_match_nolit_encodeBlockAsm8B: - CMPL DI, $0x01 - JL match_nolit_end_encodeBlockAsm8B - MOVB (R8)(R10*1), R9 - CMPB (SI)(R10*1), R9 + CMPL SI, $0x01 + JB match_nolit_end_encodeBlockAsm8B + MOVB (DI)(R9*1), R8 + CMPB (BX)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm8B - LEAL 1(R10), R10 + LEAL 1(R9), R9 match_nolit_end_encodeBlockAsm8B: - ADDL R10, CX - MOVL 16(SP), SI - ADDL $0x04, R10 + ADDL R9, CX + MOVL 16(SP), BX + ADDL $0x04, R9 MOVL CX, 12(SP) // emitCopy -two_byte_offset_match_nolit_encodeBlockAsm8B: - CMPL R10, $0x40 - JLE two_byte_offset_short_match_nolit_encodeBlockAsm8B - CMPL SI, $0x00000800 + CMPL R9, $0x40 + JBE two_byte_offset_short_match_nolit_encodeBlockAsm8B + CMPL BX, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm8B - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB BL, 1(AX) + SHRL $0x08, BX + SHLL $0x05, BX + ORL BX, SI + MOVB SI, (AX) ADDQ $0x02, AX - SUBL $0x08, R10 + SUBL $0x08, R9 // emitRepeat - LEAL -4(R10), R10 + LEAL -4(R9), R9 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b - MOVL R10, SI - LEAL -4(R10), R10 - CMPL SI, $0x08 - JLE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b - CMPL SI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b + MOVL R9, BX + LEAL -4(R9), R9 + CMPL BX, $0x08 + JBE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b + CMPL BX, $0x0c + JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b: - CMPL R10, $0x00000104 - JLT repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b - LEAL -256(R10), R10 + CMPL R9, $0x00000104 + JB repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b + LEAL -256(R9), R9 MOVW $0x0019, (AX) - MOVW R10, 2(AX) + MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b: - LEAL -4(R10), R10 + LEAL -4(R9), R9 MOVW $0x0015, (AX) - MOVB R10, 2(AX) + MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b: - SHLL $0x02, R10 - ORL $0x01, R10 - MOVW R10, (AX) + SHLL $0x02, R9 + ORL $0x01, R9 + MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B - XORQ DI, DI - LEAL 1(DI)(R10*4), R10 - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, R10 - MOVB R10, (AX) + XORQ SI, SI + LEAL 1(SI)(R9*4), R9 + MOVB BL, 1(AX) + SARL $0x08, BX + SHLL $0x05, BX + ORL BX, R9 + MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B long_offset_short_match_nolit_encodeBlockAsm8B: MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(R10), R10 + MOVW BX, 1(AX) + LEAL -60(R9), R9 ADDQ $0x03, AX // emitRepeat - MOVL R10, SI - LEAL -4(R10), R10 - CMPL SI, $0x08 - JLE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short - CMPL SI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short + MOVL R9, BX + LEAL -4(R9), R9 + CMPL BX, $0x08 + JBE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short + CMPL BX, $0x0c + JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short: - CMPL R10, $0x00000104 - JLT repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short - LEAL -256(R10), R10 + CMPL R9, $0x00000104 + JB repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short + LEAL -256(R9), R9 MOVW $0x0019, (AX) - MOVW R10, 2(AX) + MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short: - LEAL -4(R10), R10 + LEAL -4(R9), R9 MOVW $0x0015, (AX) - MOVB R10, 2(AX) + MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short: - SHLL $0x02, R10 - ORL $0x01, R10 - MOVW R10, (AX) + SHLL $0x02, R9 + ORL $0x01, R9 + MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B - XORQ DI, DI - LEAL 1(DI)(R10*4), R10 - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, R10 - MOVB R10, (AX) + XORQ SI, SI + LEAL 1(SI)(R9*4), R9 + MOVB BL, 1(AX) + SARL $0x08, BX + SHLL $0x05, BX + ORL BX, R9 + MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B - JMP two_byte_offset_match_nolit_encodeBlockAsm8B two_byte_offset_short_match_nolit_encodeBlockAsm8B: - CMPL R10, $0x0c - JGE emit_copy_three_match_nolit_encodeBlockAsm8B - MOVB $0x01, BL - LEAL -16(BX)(R10*4), R10 - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, R10 - MOVB R10, (AX) + MOVL R9, SI + SHLL $0x02, SI + CMPL R9, $0x0c + JAE emit_copy_three_match_nolit_encodeBlockAsm8B + LEAL -15(SI), SI + MOVB BL, 1(AX) + SHRL $0x08, BX + SHLL $0x05, BX + ORL BX, SI + MOVB SI, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B emit_copy_three_match_nolit_encodeBlockAsm8B: - MOVB $0x02, BL - LEAL -4(BX)(R10*4), R10 - MOVB R10, (AX) - MOVW SI, 1(AX) + LEAL -2(SI), SI + MOVB SI, (AX) + MOVW BX, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm8B: CMPL CX, 8(SP) - JGE emit_remainder_encodeBlockAsm8B - MOVQ -2(DX)(CX*1), DI + JAE emit_remainder_encodeBlockAsm8B + MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) - JL match_nolit_dst_ok_encodeBlockAsm8B + JB match_nolit_dst_ok_encodeBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm8B: - MOVQ $0x9e3779b1, R9 - MOVQ DI, R8 - SHRQ $0x10, DI - MOVQ DI, SI - SHLQ $0x20, R8 - IMULQ R9, R8 - SHRQ $0x38, R8 - SHLQ $0x20, SI - IMULQ R9, SI - SHRQ $0x38, SI - LEAL -2(CX), R9 - LEAQ 24(SP)(SI*4), R10 - MOVL (R10), SI - MOVL R9, 24(SP)(R8*4) - MOVL CX, (R10) - CMPL (DX)(SI*1), DI + MOVQ $0x9e3779b1, R8 + MOVQ SI, DI + SHRQ $0x10, SI + MOVQ SI, BX + SHLQ $0x20, DI + IMULQ R8, DI + SHRQ $0x38, DI + SHLQ $0x20, BX + IMULQ R8, BX + SHRQ $0x38, BX + LEAL -2(CX), R8 + LEAQ 24(SP)(BX*4), R9 + MOVL (R9), BX + MOVL R8, 24(SP)(DI*4) + MOVL CX, (R9) + CMPL (DX)(BX*1), SI JEQ match_nolit_loop_encodeBlockAsm8B INCL CX JMP search_loop_encodeBlockAsm8B @@ -5588,7 +5577,7 @@ emit_remainder_encodeBlockAsm8B: SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) - JL emit_remainder_ok_encodeBlockAsm8B + JB emit_remainder_ok_encodeBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET @@ -5603,9 +5592,12 @@ emit_remainder_ok_encodeBlockAsm8B: SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c - JLT one_byte_emit_remainder_encodeBlockAsm8B + JB one_byte_emit_remainder_encodeBlockAsm8B CMPL DX, $0x00000100 - JLT two_bytes_emit_remainder_encodeBlockAsm8B + JB two_bytes_emit_remainder_encodeBlockAsm8B + JB three_bytes_emit_remainder_encodeBlockAsm8B + +three_bytes_emit_remainder_encodeBlockAsm8B: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX @@ -5616,7 +5608,7 @@ two_bytes_emit_remainder_encodeBlockAsm8B: MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 - JL memmove_emit_remainder_encodeBlockAsm8B + JB memmove_emit_remainder_encodeBlockAsm8B JMP memmove_long_emit_remainder_encodeBlockAsm8B one_byte_emit_remainder_encodeBlockAsm8B: @@ -5763,8 +5755,8 @@ zero_loop_encodeBetterBlockAsm: MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -6(CX), DX - LEAQ -8(CX), SI - MOVL SI, 8(SP) + LEAQ -8(CX), BX + MOVL BX, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX @@ -5774,873 +5766,865 @@ zero_loop_encodeBetterBlockAsm: MOVQ src_base+24(FP), DX search_loop_encodeBetterBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - SHRL $0x07, SI - CMPL SI, $0x63 - JLE check_maxskip_ok_encodeBetterBlockAsm - LEAL 100(CX), SI + MOVL CX, BX + SUBL 12(SP), BX + SHRL $0x07, BX + CMPL BX, $0x63 + JBE check_maxskip_ok_encodeBetterBlockAsm + LEAL 100(CX), BX JMP check_maxskip_cont_encodeBetterBlockAsm check_maxskip_ok_encodeBetterBlockAsm: - LEAL 1(CX)(SI*1), SI + LEAL 1(CX)(BX*1), BX check_maxskip_cont_encodeBetterBlockAsm: - CMPL SI, 8(SP) - JGE emit_remainder_encodeBetterBlockAsm - MOVQ (DX)(CX*1), DI - MOVL SI, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R9 - MOVQ $0x9e3779b1, SI - MOVQ DI, R10 - MOVQ DI, R11 - SHLQ $0x08, R10 - IMULQ R9, R10 - SHRQ $0x2f, R10 - SHLQ $0x20, R11 - IMULQ SI, R11 - SHRQ $0x32, R11 - MOVL 24(SP)(R10*4), SI - MOVL 524312(SP)(R11*4), R8 - MOVL CX, 24(SP)(R10*4) - MOVL CX, 524312(SP)(R11*4) - MOVQ (DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - CMPQ R10, DI + CMPL BX, 8(SP) + JAE emit_remainder_encodeBetterBlockAsm + MOVQ (DX)(CX*1), SI + MOVL BX, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R8 + MOVQ $0x9e3779b1, BX + MOVQ SI, R9 + MOVQ SI, R10 + SHLQ $0x08, R9 + IMULQ R8, R9 + SHRQ $0x2f, R9 + SHLQ $0x20, R10 + IMULQ BX, R10 + SHRQ $0x32, R10 + MOVL 24(SP)(R9*4), BX + MOVL 524312(SP)(R10*4), DI + MOVL CX, 24(SP)(R9*4) + MOVL CX, 524312(SP)(R10*4) + MOVQ (DX)(BX*1), R9 + MOVQ (DX)(DI*1), R10 + CMPQ R9, SI JEQ candidate_match_encodeBetterBlockAsm - CMPQ R11, DI + CMPQ R10, SI JNE no_short_found_encodeBetterBlockAsm - MOVL R8, SI + MOVL DI, BX JMP candidate_match_encodeBetterBlockAsm no_short_found_encodeBetterBlockAsm: - CMPL R10, DI + CMPL R9, SI JEQ candidate_match_encodeBetterBlockAsm - CMPL R11, DI + CMPL R10, SI JEQ candidateS_match_encodeBetterBlockAsm MOVL 20(SP), CX JMP search_loop_encodeBetterBlockAsm candidateS_match_encodeBetterBlockAsm: - SHRQ $0x08, DI - MOVQ DI, R10 - SHLQ $0x08, R10 - IMULQ R9, R10 - SHRQ $0x2f, R10 - MOVL 24(SP)(R10*4), SI + SHRQ $0x08, SI + MOVQ SI, R9 + SHLQ $0x08, R9 + IMULQ R8, R9 + SHRQ $0x2f, R9 + MOVL 24(SP)(R9*4), BX INCL CX - MOVL CX, 24(SP)(R10*4) - CMPL (DX)(SI*1), DI + MOVL CX, 24(SP)(R9*4) + CMPL (DX)(BX*1), SI JEQ candidate_match_encodeBetterBlockAsm DECL CX - MOVL R8, SI + MOVL DI, BX candidate_match_encodeBetterBlockAsm: - MOVL 12(SP), DI - TESTL SI, SI + MOVL 12(SP), SI + TESTL BX, BX JZ match_extend_back_end_encodeBetterBlockAsm match_extend_back_loop_encodeBetterBlockAsm: - CMPL CX, DI - JLE match_extend_back_end_encodeBetterBlockAsm - MOVB -1(DX)(SI*1), BL + CMPL CX, SI + JBE match_extend_back_end_encodeBetterBlockAsm + MOVB -1(DX)(BX*1), DI MOVB -1(DX)(CX*1), R8 - CMPB BL, R8 + CMPB DI, R8 JNE match_extend_back_end_encodeBetterBlockAsm LEAL -1(CX), CX - DECL SI + DECL BX JZ match_extend_back_end_encodeBetterBlockAsm JMP match_extend_back_loop_encodeBetterBlockAsm match_extend_back_end_encodeBetterBlockAsm: - MOVL CX, DI - SUBL 12(SP), DI - LEAQ 5(AX)(DI*1), DI - CMPQ DI, (SP) - JL match_dst_size_check_encodeBetterBlockAsm + MOVL CX, SI + SUBL 12(SP), SI + LEAQ 5(AX)(SI*1), SI + CMPQ SI, (SP) + JB match_dst_size_check_encodeBetterBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBetterBlockAsm: - MOVL CX, DI + MOVL CX, SI ADDL $0x04, CX - ADDL $0x04, SI - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(SI*1), R10 + ADDL $0x04, BX + MOVQ src_len+32(FP), DI + SUBL CX, DI + LEAQ (DX)(CX*1), R8 + LEAQ (DX)(BX*1), R9 // matchLen - XORL R12, R12 - CMPL R8, $0x08 - JL matchlen_match4_match_nolit_encodeBetterBlockAsm + XORL R11, R11 + CMPL DI, $0x08 + JB matchlen_match4_match_nolit_encodeBetterBlockAsm matchlen_loopback_match_nolit_encodeBetterBlockAsm: - MOVQ (R9)(R12*1), R11 - XORQ (R10)(R12*1), R11 - TESTQ R11, R11 + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + TESTQ R10, R10 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R10, R10 #else - BSFQ R11, R11 + BSFQ R10, R10 #endif - SARQ $0x03, R11 - LEAL (R12)(R11*1), R12 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeBetterBlockAsm matchlen_loop_match_nolit_encodeBetterBlockAsm: - LEAL -8(R8), R8 - LEAL 8(R12), R12 - CMPL R8, $0x08 - JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm + LEAL -8(DI), DI + LEAL 8(R11), R11 + CMPL DI, $0x08 + JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm JZ match_nolit_end_encodeBetterBlockAsm matchlen_match4_match_nolit_encodeBetterBlockAsm: - CMPL R8, $0x04 - JL matchlen_match2_match_nolit_encodeBetterBlockAsm - MOVL (R9)(R12*1), R11 - CMPL (R10)(R12*1), R11 + CMPL DI, $0x04 + JB matchlen_match2_match_nolit_encodeBetterBlockAsm + MOVL (R8)(R11*1), R10 + CMPL (R9)(R11*1), R10 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm - SUBL $0x04, R8 - LEAL 4(R12), R12 + SUBL $0x04, DI + LEAL 4(R11), R11 matchlen_match2_match_nolit_encodeBetterBlockAsm: - CMPL R8, $0x02 - JL matchlen_match1_match_nolit_encodeBetterBlockAsm - MOVW (R9)(R12*1), R11 - CMPW (R10)(R12*1), R11 + CMPL DI, $0x02 + JB matchlen_match1_match_nolit_encodeBetterBlockAsm + MOVW (R8)(R11*1), R10 + CMPW (R9)(R11*1), R10 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm - SUBL $0x02, R8 - LEAL 2(R12), R12 + SUBL $0x02, DI + LEAL 2(R11), R11 matchlen_match1_match_nolit_encodeBetterBlockAsm: - CMPL R8, $0x01 - JL match_nolit_end_encodeBetterBlockAsm - MOVB (R9)(R12*1), R11 - CMPB (R10)(R12*1), R11 + CMPL DI, $0x01 + JB match_nolit_end_encodeBetterBlockAsm + MOVB (R8)(R11*1), R10 + CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeBetterBlockAsm - LEAL 1(R12), R12 + LEAL 1(R11), R11 match_nolit_end_encodeBetterBlockAsm: - MOVL CX, R8 - SUBL SI, R8 + MOVL CX, DI + SUBL BX, DI // Check if repeat - CMPL 16(SP), R8 + CMPL 16(SP), DI JEQ match_is_repeat_encodeBetterBlockAsm - CMPL R12, $0x01 - JG match_length_ok_encodeBetterBlockAsm - CMPL R8, $0x0000ffff - JLE match_length_ok_encodeBetterBlockAsm + CMPL R11, $0x01 + JA match_length_ok_encodeBetterBlockAsm + CMPL DI, $0x0000ffff + JBE match_length_ok_encodeBetterBlockAsm MOVL 20(SP), CX INCL CX JMP search_loop_encodeBetterBlockAsm match_length_ok_encodeBetterBlockAsm: - MOVL R8, 16(SP) - MOVL 12(SP), SI - CMPL SI, DI + MOVL DI, 16(SP) + MOVL 12(SP), BX + CMPL BX, SI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R10 - SUBL SI, R9 - LEAL -1(R9), SI - CMPL SI, $0x3c - JLT one_byte_match_emit_encodeBetterBlockAsm - CMPL SI, $0x00000100 - JLT two_bytes_match_emit_encodeBetterBlockAsm - CMPL SI, $0x00010000 - JLT three_bytes_match_emit_encodeBetterBlockAsm - CMPL SI, $0x01000000 - JLT four_bytes_match_emit_encodeBetterBlockAsm + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R9 + SUBL BX, R8 + LEAL -1(R8), BX + CMPL BX, $0x3c + JB one_byte_match_emit_encodeBetterBlockAsm + CMPL BX, $0x00000100 + JB two_bytes_match_emit_encodeBetterBlockAsm + CMPL BX, $0x00010000 + JB three_bytes_match_emit_encodeBetterBlockAsm + CMPL BX, $0x01000000 + JB four_bytes_match_emit_encodeBetterBlockAsm MOVB $0xfc, (AX) - MOVL SI, 1(AX) + MOVL BX, 1(AX) ADDQ $0x05, AX JMP memmove_long_match_emit_encodeBetterBlockAsm four_bytes_match_emit_encodeBetterBlockAsm: - MOVL SI, R11 - SHRL $0x10, R11 + MOVL BX, R10 + SHRL $0x10, R10 MOVB $0xf8, (AX) - MOVW SI, 1(AX) - MOVB R11, 3(AX) + MOVW BX, 1(AX) + MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_long_match_emit_encodeBetterBlockAsm three_bytes_match_emit_encodeBetterBlockAsm: MOVB $0xf4, (AX) - MOVW SI, 1(AX) + MOVW BX, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBetterBlockAsm two_bytes_match_emit_encodeBetterBlockAsm: MOVB $0xf0, (AX) - MOVB SI, 1(AX) + MOVB BL, 1(AX) ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_match_emit_encodeBetterBlockAsm + CMPL BX, $0x40 + JB memmove_match_emit_encodeBetterBlockAsm JMP memmove_long_match_emit_encodeBetterBlockAsm one_byte_match_emit_encodeBetterBlockAsm: - SHLB $0x02, SI - MOVB SI, (AX) + SHLB $0x02, BL + MOVB BL, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBetterBlockAsm: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveShort - CMPQ R9, $0x04 - JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4 - CMPQ R9, $0x08 + CMPQ R8, $0x04 + JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4 + CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7 - CMPQ R9, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16 - CMPQ R9, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4: - MOVL (R10), R11 - MOVL R11, (AX) + MOVL (R9), R10 + MOVL R10, (AX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7: - MOVL (R10), R11 - MOVL -4(R10)(R9*1), R10 - MOVL R11, (AX) - MOVL R10, -4(AX)(R9*1) + MOVL (R9), R10 + MOVL -4(R9)(R8*1), R9 + MOVL R10, (AX) + MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16: - MOVQ (R10), R11 - MOVQ -8(R10)(R9*1), R10 - MOVQ R11, (AX) - MOVQ R10, -8(AX)(R9*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (AX) + MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32: - MOVOU (R10), X0 - MOVOU -16(R10)(R9*1), X1 + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) + MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64: - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeBetterBlockAsm: - MOVQ SI, AX + MOVQ BX, AX JMP emit_literal_done_match_emit_encodeBetterBlockAsm memmove_long_match_emit_encodeBetterBlockAsm: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveLong - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 - MOVQ R9, R13 - SHRQ $0x05, R13 - MOVQ AX, R11 - ANDL $0x0000001f, R11 - MOVQ $0x00000040, R14 - SUBQ R11, R14 - DECQ R13 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R12 + SHRQ $0x05, R12 + MOVQ AX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R13 + SUBQ R10, R13 + DECQ R12 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R10)(R14*1), R11 - LEAQ -32(AX)(R14*1), R15 + LEAQ -32(R9)(R13*1), R10 + LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back: - MOVOU (R11), X4 - MOVOU 16(R11), X5 - MOVOA X4, (R15) - MOVOA X5, 16(R15) - ADDQ $0x20, R15 - ADDQ $0x20, R11 + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) ADDQ $0x20, R14 - DECQ R13 + ADDQ $0x20, R10 + ADDQ $0x20, R13 + DECQ R12 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R10)(R14*1), X4 - MOVOU -16(R10)(R14*1), X5 - MOVOA X4, -32(AX)(R14*1) - MOVOA X5, -16(AX)(R14*1) - ADDQ $0x20, R14 - CMPQ R9, R14 + MOVOU -32(R9)(R13*1), X4 + MOVOU -16(R9)(R13*1), X5 + MOVOA X4, -32(AX)(R13*1) + MOVOA X5, -16(AX)(R13*1) + ADDQ $0x20, R13 + CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ SI, AX + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ BX, AX emit_literal_done_match_emit_encodeBetterBlockAsm: - ADDL R12, CX - ADDL $0x04, R12 + ADDL R11, CX + ADDL $0x04, R11 MOVL CX, 12(SP) // emitCopy - CMPL R8, $0x00010000 - JL two_byte_offset_match_nolit_encodeBetterBlockAsm - -four_bytes_loop_back_match_nolit_encodeBetterBlockAsm: - CMPL R12, $0x40 - JLE four_bytes_remain_match_nolit_encodeBetterBlockAsm + CMPL DI, $0x00010000 + JB two_byte_offset_match_nolit_encodeBetterBlockAsm + CMPL R11, $0x40 + JBE four_bytes_remain_match_nolit_encodeBetterBlockAsm MOVB $0xff, (AX) - MOVL R8, 1(AX) - LEAL -64(R12), R12 + MOVL DI, 1(AX) + LEAL -64(R11), R11 ADDQ $0x05, AX - CMPL R12, $0x04 - JL four_bytes_remain_match_nolit_encodeBetterBlockAsm + CMPL R11, $0x04 + JB four_bytes_remain_match_nolit_encodeBetterBlockAsm // emitRepeat emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy: - MOVL R12, SI - LEAL -4(R12), R12 - CMPL SI, $0x08 - JLE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL SI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL R8, $0x00000800 - JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy + MOVL R11, BX + LEAL -4(R11), R11 + CMPL BX, $0x08 + JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy + CMPL BX, $0x0c + JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy + CMPL DI, $0x00000800 + JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy: - CMPL R12, $0x00000104 - JLT repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL R12, $0x00010100 - JLT repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL R12, $0x0100ffff - JLT repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy - LEAL -16842747(R12), R12 - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) + CMPL R11, $0x00000104 + JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy + CMPL R11, $0x00010100 + JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy + CMPL R11, $0x0100ffff + JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy + LEAL -16842747(R11), R11 + MOVL $0xfffb001d, (AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy: - LEAL -65536(R12), R12 - MOVL R12, R8 + LEAL -65536(R11), R11 + MOVL R11, DI MOVW $0x001d, (AX) - MOVW R12, 2(AX) - SARL $0x10, R8 - MOVB R8, 4(AX) + MOVW R11, 2(AX) + SARL $0x10, DI + MOVB DI, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy: - LEAL -256(R12), R12 + LEAL -256(R11), R11 MOVW $0x0019, (AX) - MOVW R12, 2(AX) + MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy: - LEAL -4(R12), R12 + LEAL -4(R11), R11 MOVW $0x0015, (AX) - MOVB R12, 2(AX) + MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy: - SHLL $0x02, R12 - ORL $0x01, R12 - MOVW R12, (AX) + SHLL $0x02, R11 + ORL $0x01, R11 + MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy: - XORQ SI, SI - LEAL 1(SI)(R12*4), R12 - MOVB R8, 1(AX) - SARL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) + XORQ BX, BX + LEAL 1(BX)(R11*4), R11 + MOVB DI, 1(AX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, R11 + MOVB R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm - JMP four_bytes_loop_back_match_nolit_encodeBetterBlockAsm four_bytes_remain_match_nolit_encodeBetterBlockAsm: - TESTL R12, R12 + TESTL R11, R11 JZ match_nolit_emitcopy_end_encodeBetterBlockAsm - MOVB $0x03, BL - LEAL -4(BX)(R12*4), R12 - MOVB R12, (AX) - MOVL R8, 1(AX) + XORL BX, BX + LEAL -1(BX)(R11*4), R11 + MOVB R11, (AX) + MOVL DI, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm two_byte_offset_match_nolit_encodeBetterBlockAsm: - CMPL R12, $0x40 - JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm - CMPL R8, $0x00000800 + CMPL R11, $0x40 + JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm + CMPL DI, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB R8, 1(AX) - MOVL R8, R9 - SHRL $0x08, R9 - SHLL $0x05, R9 - ORL R9, SI - MOVB SI, (AX) + MOVL $0x00000001, BX + LEAL 16(BX), BX + MOVB DI, 1(AX) + MOVL DI, R8 + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, BX + MOVB BL, (AX) ADDQ $0x02, AX - SUBL $0x08, R12 + SUBL $0x08, R11 // emitRepeat - LEAL -4(R12), R12 + LEAL -4(R11), R11 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - MOVL R12, SI - LEAL -4(R12), R12 - CMPL SI, $0x08 - JLE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL SI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL R8, $0x00000800 - JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b + MOVL R11, BX + LEAL -4(R11), R11 + CMPL BX, $0x08 + JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b + CMPL BX, $0x0c + JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b + CMPL DI, $0x00000800 + JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - CMPL R12, $0x00000104 - JLT repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL R12, $0x00010100 - JLT repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL R12, $0x0100ffff - JLT repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - LEAL -16842747(R12), R12 - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) + CMPL R11, $0x00000104 + JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b + CMPL R11, $0x00010100 + JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b + CMPL R11, $0x0100ffff + JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b + LEAL -16842747(R11), R11 + MOVL $0xfffb001d, (AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - LEAL -65536(R12), R12 - MOVL R12, R8 + LEAL -65536(R11), R11 + MOVL R11, DI MOVW $0x001d, (AX) - MOVW R12, 2(AX) - SARL $0x10, R8 - MOVB R8, 4(AX) + MOVW R11, 2(AX) + SARL $0x10, DI + MOVB DI, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - LEAL -256(R12), R12 + LEAL -256(R11), R11 MOVW $0x0019, (AX) - MOVW R12, 2(AX) + MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - LEAL -4(R12), R12 + LEAL -4(R11), R11 MOVW $0x0015, (AX) - MOVB R12, 2(AX) + MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - SHLL $0x02, R12 - ORL $0x01, R12 - MOVW R12, (AX) + SHLL $0x02, R11 + ORL $0x01, R11 + MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R12*4), R12 - MOVB R8, 1(AX) - SARL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) + XORQ BX, BX + LEAL 1(BX)(R11*4), R11 + MOVB DI, 1(AX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, R11 + MOVB R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm long_offset_short_match_nolit_encodeBetterBlockAsm: MOVB $0xee, (AX) - MOVW R8, 1(AX) - LEAL -60(R12), R12 + MOVW DI, 1(AX) + LEAL -60(R11), R11 ADDQ $0x03, AX // emitRepeat emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short: - MOVL R12, SI - LEAL -4(R12), R12 - CMPL SI, $0x08 - JLE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL SI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL R8, $0x00000800 - JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short + MOVL R11, BX + LEAL -4(R11), R11 + CMPL BX, $0x08 + JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short + CMPL BX, $0x0c + JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short + CMPL DI, $0x00000800 + JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short: - CMPL R12, $0x00000104 - JLT repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL R12, $0x00010100 - JLT repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL R12, $0x0100ffff - JLT repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short - LEAL -16842747(R12), R12 - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) + CMPL R11, $0x00000104 + JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short + CMPL R11, $0x00010100 + JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short + CMPL R11, $0x0100ffff + JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short + LEAL -16842747(R11), R11 + MOVL $0xfffb001d, (AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short: - LEAL -65536(R12), R12 - MOVL R12, R8 + LEAL -65536(R11), R11 + MOVL R11, DI MOVW $0x001d, (AX) - MOVW R12, 2(AX) - SARL $0x10, R8 - MOVB R8, 4(AX) + MOVW R11, 2(AX) + SARL $0x10, DI + MOVB DI, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short: - LEAL -256(R12), R12 + LEAL -256(R11), R11 MOVW $0x0019, (AX) - MOVW R12, 2(AX) + MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short: - LEAL -4(R12), R12 + LEAL -4(R11), R11 MOVW $0x0015, (AX) - MOVB R12, 2(AX) + MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short: - SHLL $0x02, R12 - ORL $0x01, R12 - MOVW R12, (AX) + SHLL $0x02, R11 + ORL $0x01, R11 + MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R12*4), R12 - MOVB R8, 1(AX) - SARL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) + XORQ BX, BX + LEAL 1(BX)(R11*4), R11 + MOVB DI, 1(AX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, R11 + MOVB R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm - JMP two_byte_offset_match_nolit_encodeBetterBlockAsm two_byte_offset_short_match_nolit_encodeBetterBlockAsm: - CMPL R12, $0x0c - JGE emit_copy_three_match_nolit_encodeBetterBlockAsm - CMPL R8, $0x00000800 - JGE emit_copy_three_match_nolit_encodeBetterBlockAsm - MOVB $0x01, BL - LEAL -16(BX)(R12*4), R12 - MOVB R8, 1(AX) - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) + MOVL R11, BX + SHLL $0x02, BX + CMPL R11, $0x0c + JAE emit_copy_three_match_nolit_encodeBetterBlockAsm + CMPL DI, $0x00000800 + JAE emit_copy_three_match_nolit_encodeBetterBlockAsm + LEAL -15(BX), BX + MOVB DI, 1(AX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, BX + MOVB BL, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm emit_copy_three_match_nolit_encodeBetterBlockAsm: - MOVB $0x02, BL - LEAL -4(BX)(R12*4), R12 - MOVB R12, (AX) - MOVW R8, 1(AX) + LEAL -2(BX), BX + MOVB BL, (AX) + MOVW DI, 1(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm match_is_repeat_encodeBetterBlockAsm: - MOVL 12(SP), SI - CMPL SI, DI + MOVL 12(SP), BX + CMPL BX, SI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R10 - SUBL SI, R9 - LEAL -1(R9), SI - CMPL SI, $0x3c - JLT one_byte_match_emit_repeat_encodeBetterBlockAsm - CMPL SI, $0x00000100 - JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm - CMPL SI, $0x00010000 - JLT three_bytes_match_emit_repeat_encodeBetterBlockAsm - CMPL SI, $0x01000000 - JLT four_bytes_match_emit_repeat_encodeBetterBlockAsm + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R9 + SUBL BX, R8 + LEAL -1(R8), BX + CMPL BX, $0x3c + JB one_byte_match_emit_repeat_encodeBetterBlockAsm + CMPL BX, $0x00000100 + JB two_bytes_match_emit_repeat_encodeBetterBlockAsm + CMPL BX, $0x00010000 + JB three_bytes_match_emit_repeat_encodeBetterBlockAsm + CMPL BX, $0x01000000 + JB four_bytes_match_emit_repeat_encodeBetterBlockAsm MOVB $0xfc, (AX) - MOVL SI, 1(AX) + MOVL BX, 1(AX) ADDQ $0x05, AX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm four_bytes_match_emit_repeat_encodeBetterBlockAsm: - MOVL SI, R11 - SHRL $0x10, R11 + MOVL BX, R10 + SHRL $0x10, R10 MOVB $0xf8, (AX) - MOVW SI, 1(AX) - MOVB R11, 3(AX) + MOVW BX, 1(AX) + MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm three_bytes_match_emit_repeat_encodeBetterBlockAsm: MOVB $0xf4, (AX) - MOVW SI, 1(AX) + MOVW BX, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm two_bytes_match_emit_repeat_encodeBetterBlockAsm: MOVB $0xf0, (AX) - MOVB SI, 1(AX) + MOVB BL, 1(AX) ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_match_emit_repeat_encodeBetterBlockAsm + CMPL BX, $0x40 + JB memmove_match_emit_repeat_encodeBetterBlockAsm JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm one_byte_match_emit_repeat_encodeBetterBlockAsm: - SHLB $0x02, SI - MOVB SI, (AX) + SHLB $0x02, BL + MOVB BL, (AX) ADDQ $0x01, AX memmove_match_emit_repeat_encodeBetterBlockAsm: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveShort - CMPQ R9, $0x04 - JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4 - CMPQ R9, $0x08 + CMPQ R8, $0x04 + JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4 + CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7 - CMPQ R9, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16 - CMPQ R9, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4: - MOVL (R10), R11 - MOVL R11, (AX) + MOVL (R9), R10 + MOVL R10, (AX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7: - MOVL (R10), R11 - MOVL -4(R10)(R9*1), R10 - MOVL R11, (AX) - MOVL R10, -4(AX)(R9*1) + MOVL (R9), R10 + MOVL -4(R9)(R8*1), R9 + MOVL R10, (AX) + MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16: - MOVQ (R10), R11 - MOVQ -8(R10)(R9*1), R10 - MOVQ R11, (AX) - MOVQ R10, -8(AX)(R9*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (AX) + MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32: - MOVOU (R10), X0 - MOVOU -16(R10)(R9*1), X1 + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) + MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64: - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm: - MOVQ SI, AX + MOVQ BX, AX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm memmove_long_match_emit_repeat_encodeBetterBlockAsm: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveLong - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 - MOVQ R9, R13 - SHRQ $0x05, R13 - MOVQ AX, R11 - ANDL $0x0000001f, R11 - MOVQ $0x00000040, R14 - SUBQ R11, R14 - DECQ R13 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R12 + SHRQ $0x05, R12 + MOVQ AX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R13 + SUBQ R10, R13 + DECQ R12 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R10)(R14*1), R11 - LEAQ -32(AX)(R14*1), R15 + LEAQ -32(R9)(R13*1), R10 + LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back: - MOVOU (R11), X4 - MOVOU 16(R11), X5 - MOVOA X4, (R15) - MOVOA X5, 16(R15) - ADDQ $0x20, R15 - ADDQ $0x20, R11 + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) ADDQ $0x20, R14 - DECQ R13 + ADDQ $0x20, R10 + ADDQ $0x20, R13 + DECQ R12 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R10)(R14*1), X4 - MOVOU -16(R10)(R14*1), X5 - MOVOA X4, -32(AX)(R14*1) - MOVOA X5, -16(AX)(R14*1) - ADDQ $0x20, R14 - CMPQ R9, R14 + MOVOU -32(R9)(R13*1), X4 + MOVOU -16(R9)(R13*1), X5 + MOVOA X4, -32(AX)(R13*1) + MOVOA X5, -16(AX)(R13*1) + ADDQ $0x20, R13 + CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ SI, AX + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ BX, AX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm: - ADDL R12, CX - ADDL $0x04, R12 + ADDL R11, CX + ADDL $0x04, R11 MOVL CX, 12(SP) // emitRepeat emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm: - MOVL R12, SI - LEAL -4(R12), R12 - CMPL SI, $0x08 - JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm - CMPL SI, $0x0c - JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm - CMPL R8, $0x00000800 - JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm + MOVL R11, BX + LEAL -4(R11), R11 + CMPL BX, $0x08 + JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm + CMPL BX, $0x0c + JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm + CMPL DI, $0x00000800 + JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm: - CMPL R12, $0x00000104 - JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm - CMPL R12, $0x00010100 - JLT repeat_four_match_nolit_repeat_encodeBetterBlockAsm - CMPL R12, $0x0100ffff - JLT repeat_five_match_nolit_repeat_encodeBetterBlockAsm - LEAL -16842747(R12), R12 - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) + CMPL R11, $0x00000104 + JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm + CMPL R11, $0x00010100 + JB repeat_four_match_nolit_repeat_encodeBetterBlockAsm + CMPL R11, $0x0100ffff + JB repeat_five_match_nolit_repeat_encodeBetterBlockAsm + LEAL -16842747(R11), R11 + MOVL $0xfffb001d, (AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm repeat_five_match_nolit_repeat_encodeBetterBlockAsm: - LEAL -65536(R12), R12 - MOVL R12, R8 + LEAL -65536(R11), R11 + MOVL R11, DI MOVW $0x001d, (AX) - MOVW R12, 2(AX) - SARL $0x10, R8 - MOVB R8, 4(AX) + MOVW R11, 2(AX) + SARL $0x10, DI + MOVB DI, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_repeat_encodeBetterBlockAsm: - LEAL -256(R12), R12 + LEAL -256(R11), R11 MOVW $0x0019, (AX) - MOVW R12, 2(AX) + MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_repeat_encodeBetterBlockAsm: - LEAL -4(R12), R12 + LEAL -4(R11), R11 MOVW $0x0015, (AX) - MOVB R12, 2(AX) + MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_repeat_encodeBetterBlockAsm: - SHLL $0x02, R12 - ORL $0x01, R12 - MOVW R12, (AX) + SHLL $0x02, R11 + ORL $0x01, R11 + MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm: - XORQ SI, SI - LEAL 1(SI)(R12*4), R12 - MOVB R8, 1(AX) - SARL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) + XORQ BX, BX + LEAL 1(BX)(R11*4), R11 + MOVB DI, 1(AX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, R11 + MOVB R11, (AX) ADDQ $0x02, AX match_nolit_emitcopy_end_encodeBetterBlockAsm: CMPL CX, 8(SP) - JGE emit_remainder_encodeBetterBlockAsm + JAE emit_remainder_encodeBetterBlockAsm CMPQ AX, (SP) - JL match_nolit_dst_ok_encodeBetterBlockAsm + JB match_nolit_dst_ok_encodeBetterBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm: - MOVQ $0x00cf1bbcdcbfa563, SI - MOVQ $0x9e3779b1, R8 - LEAQ 1(DI), DI - LEAQ -2(CX), R9 - MOVQ (DX)(DI*1), R10 - MOVQ 1(DX)(DI*1), R11 - MOVQ (DX)(R9*1), R12 - MOVQ 1(DX)(R9*1), R13 - SHLQ $0x08, R10 - IMULQ SI, R10 - SHRQ $0x2f, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x32, R11 - SHLQ $0x08, R12 - IMULQ SI, R12 - SHRQ $0x2f, R12 - SHLQ $0x20, R13 - IMULQ R8, R13 - SHRQ $0x32, R13 - LEAQ 1(DI), R8 - LEAQ 1(R9), R14 - MOVL DI, 24(SP)(R10*4) - MOVL R9, 24(SP)(R12*4) - MOVL R8, 524312(SP)(R11*4) - MOVL R14, 524312(SP)(R13*4) - ADDQ $0x01, DI - SUBQ $0x01, R9 + MOVQ $0x00cf1bbcdcbfa563, BX + MOVQ $0x9e3779b1, DI + LEAQ 1(SI), SI + LEAQ -2(CX), R8 + MOVQ (DX)(SI*1), R9 + MOVQ 1(DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + MOVQ 1(DX)(R8*1), R12 + SHLQ $0x08, R9 + IMULQ BX, R9 + SHRQ $0x2f, R9 + SHLQ $0x20, R10 + IMULQ DI, R10 + SHRQ $0x32, R10 + SHLQ $0x08, R11 + IMULQ BX, R11 + SHRQ $0x2f, R11 + SHLQ $0x20, R12 + IMULQ DI, R12 + SHRQ $0x32, R12 + LEAQ 1(SI), DI + LEAQ 1(R8), R13 + MOVL SI, 24(SP)(R9*4) + MOVL R8, 24(SP)(R11*4) + MOVL DI, 524312(SP)(R10*4) + MOVL R13, 524312(SP)(R12*4) + ADDQ $0x01, SI + SUBQ $0x01, R8 index_loop_encodeBetterBlockAsm: - CMPQ DI, R9 + CMPQ SI, R8 JAE search_loop_encodeBetterBlockAsm - MOVQ (DX)(DI*1), R8 - MOVQ (DX)(R9*1), R10 - SHLQ $0x08, R8 - IMULQ SI, R8 - SHRQ $0x2f, R8 - SHLQ $0x08, R10 - IMULQ SI, R10 - SHRQ $0x2f, R10 - MOVL DI, 24(SP)(R8*4) - MOVL R9, 24(SP)(R10*4) - ADDQ $0x02, DI - SUBQ $0x02, R9 + MOVQ (DX)(SI*1), DI + MOVQ (DX)(R8*1), R9 + SHLQ $0x08, DI + IMULQ BX, DI + SHRQ $0x2f, DI + SHLQ $0x08, R9 + IMULQ BX, R9 + SHRQ $0x2f, R9 + MOVL SI, 24(SP)(DI*4) + MOVL R8, 24(SP)(R9*4) + ADDQ $0x02, SI + SUBQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm emit_remainder_encodeBetterBlockAsm: @@ -6648,7 +6632,7 @@ emit_remainder_encodeBetterBlockAsm: SUBL 12(SP), CX LEAQ 5(AX)(CX*1), CX CMPQ CX, (SP) - JL emit_remainder_ok_encodeBetterBlockAsm + JB emit_remainder_ok_encodeBetterBlockAsm MOVQ $0x00000000, ret+48(FP) RET @@ -6663,13 +6647,13 @@ emit_remainder_ok_encodeBetterBlockAsm: SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c - JLT one_byte_emit_remainder_encodeBetterBlockAsm + JB one_byte_emit_remainder_encodeBetterBlockAsm CMPL DX, $0x00000100 - JLT two_bytes_emit_remainder_encodeBetterBlockAsm + JB two_bytes_emit_remainder_encodeBetterBlockAsm CMPL DX, $0x00010000 - JLT three_bytes_emit_remainder_encodeBetterBlockAsm + JB three_bytes_emit_remainder_encodeBetterBlockAsm CMPL DX, $0x01000000 - JLT four_bytes_emit_remainder_encodeBetterBlockAsm + JB four_bytes_emit_remainder_encodeBetterBlockAsm MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX @@ -6695,7 +6679,7 @@ two_bytes_emit_remainder_encodeBetterBlockAsm: MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 - JL memmove_emit_remainder_encodeBetterBlockAsm + JB memmove_emit_remainder_encodeBetterBlockAsm JMP memmove_long_emit_remainder_encodeBetterBlockAsm one_byte_emit_remainder_encodeBetterBlockAsm: @@ -6842,8 +6826,8 @@ zero_loop_encodeBetterBlockAsm4MB: MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -6(CX), DX - LEAQ -8(CX), SI - MOVL SI, 8(SP) + LEAQ -8(CX), BX + MOVL BX, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX @@ -6853,811 +6837,807 @@ zero_loop_encodeBetterBlockAsm4MB: MOVQ src_base+24(FP), DX search_loop_encodeBetterBlockAsm4MB: - MOVL CX, SI - SUBL 12(SP), SI - SHRL $0x07, SI - CMPL SI, $0x63 - JLE check_maxskip_ok_encodeBetterBlockAsm4MB - LEAL 100(CX), SI + MOVL CX, BX + SUBL 12(SP), BX + SHRL $0x07, BX + CMPL BX, $0x63 + JBE check_maxskip_ok_encodeBetterBlockAsm4MB + LEAL 100(CX), BX JMP check_maxskip_cont_encodeBetterBlockAsm4MB check_maxskip_ok_encodeBetterBlockAsm4MB: - LEAL 1(CX)(SI*1), SI + LEAL 1(CX)(BX*1), BX check_maxskip_cont_encodeBetterBlockAsm4MB: - CMPL SI, 8(SP) - JGE emit_remainder_encodeBetterBlockAsm4MB - MOVQ (DX)(CX*1), DI - MOVL SI, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R9 - MOVQ $0x9e3779b1, SI - MOVQ DI, R10 - MOVQ DI, R11 - SHLQ $0x08, R10 - IMULQ R9, R10 - SHRQ $0x2f, R10 - SHLQ $0x20, R11 - IMULQ SI, R11 - SHRQ $0x32, R11 - MOVL 24(SP)(R10*4), SI - MOVL 524312(SP)(R11*4), R8 - MOVL CX, 24(SP)(R10*4) - MOVL CX, 524312(SP)(R11*4) - MOVQ (DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - CMPQ R10, DI + CMPL BX, 8(SP) + JAE emit_remainder_encodeBetterBlockAsm4MB + MOVQ (DX)(CX*1), SI + MOVL BX, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R8 + MOVQ $0x9e3779b1, BX + MOVQ SI, R9 + MOVQ SI, R10 + SHLQ $0x08, R9 + IMULQ R8, R9 + SHRQ $0x2f, R9 + SHLQ $0x20, R10 + IMULQ BX, R10 + SHRQ $0x32, R10 + MOVL 24(SP)(R9*4), BX + MOVL 524312(SP)(R10*4), DI + MOVL CX, 24(SP)(R9*4) + MOVL CX, 524312(SP)(R10*4) + MOVQ (DX)(BX*1), R9 + MOVQ (DX)(DI*1), R10 + CMPQ R9, SI JEQ candidate_match_encodeBetterBlockAsm4MB - CMPQ R11, DI + CMPQ R10, SI JNE no_short_found_encodeBetterBlockAsm4MB - MOVL R8, SI + MOVL DI, BX JMP candidate_match_encodeBetterBlockAsm4MB no_short_found_encodeBetterBlockAsm4MB: - CMPL R10, DI + CMPL R9, SI JEQ candidate_match_encodeBetterBlockAsm4MB - CMPL R11, DI + CMPL R10, SI JEQ candidateS_match_encodeBetterBlockAsm4MB MOVL 20(SP), CX JMP search_loop_encodeBetterBlockAsm4MB candidateS_match_encodeBetterBlockAsm4MB: - SHRQ $0x08, DI - MOVQ DI, R10 - SHLQ $0x08, R10 - IMULQ R9, R10 - SHRQ $0x2f, R10 - MOVL 24(SP)(R10*4), SI + SHRQ $0x08, SI + MOVQ SI, R9 + SHLQ $0x08, R9 + IMULQ R8, R9 + SHRQ $0x2f, R9 + MOVL 24(SP)(R9*4), BX INCL CX - MOVL CX, 24(SP)(R10*4) - CMPL (DX)(SI*1), DI + MOVL CX, 24(SP)(R9*4) + CMPL (DX)(BX*1), SI JEQ candidate_match_encodeBetterBlockAsm4MB DECL CX - MOVL R8, SI + MOVL DI, BX candidate_match_encodeBetterBlockAsm4MB: - MOVL 12(SP), DI - TESTL SI, SI + MOVL 12(SP), SI + TESTL BX, BX JZ match_extend_back_end_encodeBetterBlockAsm4MB match_extend_back_loop_encodeBetterBlockAsm4MB: - CMPL CX, DI - JLE match_extend_back_end_encodeBetterBlockAsm4MB - MOVB -1(DX)(SI*1), BL + CMPL CX, SI + JBE match_extend_back_end_encodeBetterBlockAsm4MB + MOVB -1(DX)(BX*1), DI MOVB -1(DX)(CX*1), R8 - CMPB BL, R8 + CMPB DI, R8 JNE match_extend_back_end_encodeBetterBlockAsm4MB LEAL -1(CX), CX - DECL SI + DECL BX JZ match_extend_back_end_encodeBetterBlockAsm4MB JMP match_extend_back_loop_encodeBetterBlockAsm4MB match_extend_back_end_encodeBetterBlockAsm4MB: - MOVL CX, DI - SUBL 12(SP), DI - LEAQ 4(AX)(DI*1), DI - CMPQ DI, (SP) - JL match_dst_size_check_encodeBetterBlockAsm4MB + MOVL CX, SI + SUBL 12(SP), SI + LEAQ 4(AX)(SI*1), SI + CMPQ SI, (SP) + JB match_dst_size_check_encodeBetterBlockAsm4MB MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBetterBlockAsm4MB: - MOVL CX, DI + MOVL CX, SI ADDL $0x04, CX - ADDL $0x04, SI - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(SI*1), R10 + ADDL $0x04, BX + MOVQ src_len+32(FP), DI + SUBL CX, DI + LEAQ (DX)(CX*1), R8 + LEAQ (DX)(BX*1), R9 // matchLen - XORL R12, R12 - CMPL R8, $0x08 - JL matchlen_match4_match_nolit_encodeBetterBlockAsm4MB + XORL R11, R11 + CMPL DI, $0x08 + JB matchlen_match4_match_nolit_encodeBetterBlockAsm4MB matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB: - MOVQ (R9)(R12*1), R11 - XORQ (R10)(R12*1), R11 - TESTQ R11, R11 + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + TESTQ R10, R10 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm4MB #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R10, R10 #else - BSFQ R11, R11 + BSFQ R10, R10 #endif - SARQ $0x03, R11 - LEAL (R12)(R11*1), R12 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeBetterBlockAsm4MB matchlen_loop_match_nolit_encodeBetterBlockAsm4MB: - LEAL -8(R8), R8 - LEAL 8(R12), R12 - CMPL R8, $0x08 - JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB + LEAL -8(DI), DI + LEAL 8(R11), R11 + CMPL DI, $0x08 + JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB JZ match_nolit_end_encodeBetterBlockAsm4MB matchlen_match4_match_nolit_encodeBetterBlockAsm4MB: - CMPL R8, $0x04 - JL matchlen_match2_match_nolit_encodeBetterBlockAsm4MB - MOVL (R9)(R12*1), R11 - CMPL (R10)(R12*1), R11 + CMPL DI, $0x04 + JB matchlen_match2_match_nolit_encodeBetterBlockAsm4MB + MOVL (R8)(R11*1), R10 + CMPL (R9)(R11*1), R10 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm4MB - SUBL $0x04, R8 - LEAL 4(R12), R12 + SUBL $0x04, DI + LEAL 4(R11), R11 matchlen_match2_match_nolit_encodeBetterBlockAsm4MB: - CMPL R8, $0x02 - JL matchlen_match1_match_nolit_encodeBetterBlockAsm4MB - MOVW (R9)(R12*1), R11 - CMPW (R10)(R12*1), R11 + CMPL DI, $0x02 + JB matchlen_match1_match_nolit_encodeBetterBlockAsm4MB + MOVW (R8)(R11*1), R10 + CMPW (R9)(R11*1), R10 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm4MB - SUBL $0x02, R8 - LEAL 2(R12), R12 + SUBL $0x02, DI + LEAL 2(R11), R11 matchlen_match1_match_nolit_encodeBetterBlockAsm4MB: - CMPL R8, $0x01 - JL match_nolit_end_encodeBetterBlockAsm4MB - MOVB (R9)(R12*1), R11 - CMPB (R10)(R12*1), R11 + CMPL DI, $0x01 + JB match_nolit_end_encodeBetterBlockAsm4MB + MOVB (R8)(R11*1), R10 + CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeBetterBlockAsm4MB - LEAL 1(R12), R12 + LEAL 1(R11), R11 match_nolit_end_encodeBetterBlockAsm4MB: - MOVL CX, R8 - SUBL SI, R8 + MOVL CX, DI + SUBL BX, DI // Check if repeat - CMPL 16(SP), R8 + CMPL 16(SP), DI JEQ match_is_repeat_encodeBetterBlockAsm4MB - CMPL R12, $0x01 - JG match_length_ok_encodeBetterBlockAsm4MB - CMPL R8, $0x0000ffff - JLE match_length_ok_encodeBetterBlockAsm4MB + CMPL R11, $0x01 + JA match_length_ok_encodeBetterBlockAsm4MB + CMPL DI, $0x0000ffff + JBE match_length_ok_encodeBetterBlockAsm4MB MOVL 20(SP), CX INCL CX JMP search_loop_encodeBetterBlockAsm4MB match_length_ok_encodeBetterBlockAsm4MB: - MOVL R8, 16(SP) - MOVL 12(SP), SI - CMPL SI, DI + MOVL DI, 16(SP) + MOVL 12(SP), BX + CMPL BX, SI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm4MB - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R10 - SUBL SI, R9 - LEAL -1(R9), SI - CMPL SI, $0x3c - JLT one_byte_match_emit_encodeBetterBlockAsm4MB - CMPL SI, $0x00000100 - JLT two_bytes_match_emit_encodeBetterBlockAsm4MB - CMPL SI, $0x00010000 - JLT three_bytes_match_emit_encodeBetterBlockAsm4MB - MOVL SI, R11 - SHRL $0x10, R11 + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R9 + SUBL BX, R8 + LEAL -1(R8), BX + CMPL BX, $0x3c + JB one_byte_match_emit_encodeBetterBlockAsm4MB + CMPL BX, $0x00000100 + JB two_bytes_match_emit_encodeBetterBlockAsm4MB + CMPL BX, $0x00010000 + JB three_bytes_match_emit_encodeBetterBlockAsm4MB + MOVL BX, R10 + SHRL $0x10, R10 MOVB $0xf8, (AX) - MOVW SI, 1(AX) - MOVB R11, 3(AX) + MOVW BX, 1(AX) + MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_long_match_emit_encodeBetterBlockAsm4MB three_bytes_match_emit_encodeBetterBlockAsm4MB: MOVB $0xf4, (AX) - MOVW SI, 1(AX) + MOVW BX, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBetterBlockAsm4MB two_bytes_match_emit_encodeBetterBlockAsm4MB: MOVB $0xf0, (AX) - MOVB SI, 1(AX) + MOVB BL, 1(AX) ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_match_emit_encodeBetterBlockAsm4MB + CMPL BX, $0x40 + JB memmove_match_emit_encodeBetterBlockAsm4MB JMP memmove_long_match_emit_encodeBetterBlockAsm4MB one_byte_match_emit_encodeBetterBlockAsm4MB: - SHLB $0x02, SI - MOVB SI, (AX) + SHLB $0x02, BL + MOVB BL, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBetterBlockAsm4MB: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveShort - CMPQ R9, $0x04 - JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4 - CMPQ R9, $0x08 + CMPQ R8, $0x04 + JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4 + CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7 - CMPQ R9, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16 - CMPQ R9, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4: - MOVL (R10), R11 - MOVL R11, (AX) + MOVL (R9), R10 + MOVL R10, (AX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7: - MOVL (R10), R11 - MOVL -4(R10)(R9*1), R10 - MOVL R11, (AX) - MOVL R10, -4(AX)(R9*1) + MOVL (R9), R10 + MOVL -4(R9)(R8*1), R9 + MOVL R10, (AX) + MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16: - MOVQ (R10), R11 - MOVQ -8(R10)(R9*1), R10 - MOVQ R11, (AX) - MOVQ R10, -8(AX)(R9*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (AX) + MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32: - MOVOU (R10), X0 - MOVOU -16(R10)(R9*1), X1 + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) + MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64: - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeBetterBlockAsm4MB: - MOVQ SI, AX + MOVQ BX, AX JMP emit_literal_done_match_emit_encodeBetterBlockAsm4MB memmove_long_match_emit_encodeBetterBlockAsm4MB: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveLong - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 - MOVQ R9, R13 - SHRQ $0x05, R13 - MOVQ AX, R11 - ANDL $0x0000001f, R11 - MOVQ $0x00000040, R14 - SUBQ R11, R14 - DECQ R13 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R12 + SHRQ $0x05, R12 + MOVQ AX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R13 + SUBQ R10, R13 + DECQ R12 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(R10)(R14*1), R11 - LEAQ -32(AX)(R14*1), R15 + LEAQ -32(R9)(R13*1), R10 + LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back: - MOVOU (R11), X4 - MOVOU 16(R11), X5 - MOVOA X4, (R15) - MOVOA X5, 16(R15) - ADDQ $0x20, R15 - ADDQ $0x20, R11 + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) ADDQ $0x20, R14 - DECQ R13 + ADDQ $0x20, R10 + ADDQ $0x20, R13 + DECQ R12 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(R10)(R14*1), X4 - MOVOU -16(R10)(R14*1), X5 - MOVOA X4, -32(AX)(R14*1) - MOVOA X5, -16(AX)(R14*1) - ADDQ $0x20, R14 - CMPQ R9, R14 + MOVOU -32(R9)(R13*1), X4 + MOVOU -16(R9)(R13*1), X5 + MOVOA X4, -32(AX)(R13*1) + MOVOA X5, -16(AX)(R13*1) + ADDQ $0x20, R13 + CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ SI, AX + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ BX, AX emit_literal_done_match_emit_encodeBetterBlockAsm4MB: - ADDL R12, CX - ADDL $0x04, R12 + ADDL R11, CX + ADDL $0x04, R11 MOVL CX, 12(SP) // emitCopy - CMPL R8, $0x00010000 - JL two_byte_offset_match_nolit_encodeBetterBlockAsm4MB - -four_bytes_loop_back_match_nolit_encodeBetterBlockAsm4MB: - CMPL R12, $0x40 - JLE four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB + CMPL DI, $0x00010000 + JB two_byte_offset_match_nolit_encodeBetterBlockAsm4MB + CMPL R11, $0x40 + JBE four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB MOVB $0xff, (AX) - MOVL R8, 1(AX) - LEAL -64(R12), R12 + MOVL DI, 1(AX) + LEAL -64(R11), R11 ADDQ $0x05, AX - CMPL R12, $0x04 - JL four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB + CMPL R11, $0x04 + JB four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB // emitRepeat - MOVL R12, SI - LEAL -4(R12), R12 - CMPL SI, $0x08 - JLE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy - CMPL SI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy - CMPL R8, $0x00000800 - JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy + MOVL R11, BX + LEAL -4(R11), R11 + CMPL BX, $0x08 + JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy + CMPL BX, $0x0c + JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy + CMPL DI, $0x00000800 + JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - CMPL R12, $0x00000104 - JLT repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy - CMPL R12, $0x00010100 - JLT repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy - LEAL -65536(R12), R12 - MOVL R12, R8 + CMPL R11, $0x00000104 + JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy + CMPL R11, $0x00010100 + JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy + LEAL -65536(R11), R11 + MOVL R11, DI MOVW $0x001d, (AX) - MOVW R12, 2(AX) - SARL $0x10, R8 - MOVB R8, 4(AX) + MOVW R11, 2(AX) + SARL $0x10, DI + MOVB DI, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - LEAL -256(R12), R12 + LEAL -256(R11), R11 MOVW $0x0019, (AX) - MOVW R12, 2(AX) + MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - LEAL -4(R12), R12 + LEAL -4(R11), R11 MOVW $0x0015, (AX) - MOVB R12, 2(AX) + MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - SHLL $0x02, R12 - ORL $0x01, R12 - MOVW R12, (AX) + SHLL $0x02, R11 + ORL $0x01, R11 + MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - XORQ SI, SI - LEAL 1(SI)(R12*4), R12 - MOVB R8, 1(AX) - SARL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) + XORQ BX, BX + LEAL 1(BX)(R11*4), R11 + MOVB DI, 1(AX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, R11 + MOVB R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB - JMP four_bytes_loop_back_match_nolit_encodeBetterBlockAsm4MB four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB: - TESTL R12, R12 + TESTL R11, R11 JZ match_nolit_emitcopy_end_encodeBetterBlockAsm4MB - MOVB $0x03, BL - LEAL -4(BX)(R12*4), R12 - MOVB R12, (AX) - MOVL R8, 1(AX) + XORL BX, BX + LEAL -1(BX)(R11*4), R11 + MOVB R11, (AX) + MOVL DI, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB two_byte_offset_match_nolit_encodeBetterBlockAsm4MB: - CMPL R12, $0x40 - JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB - CMPL R8, $0x00000800 + CMPL R11, $0x40 + JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB + CMPL DI, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm4MB - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB R8, 1(AX) - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, SI - MOVB SI, (AX) + MOVL $0x00000001, BX + LEAL 16(BX), BX + MOVB DI, 1(AX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, BX + MOVB BL, (AX) ADDQ $0x02, AX - SUBL $0x08, R12 + SUBL $0x08, R11 // emitRepeat - LEAL -4(R12), R12 + LEAL -4(R11), R11 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - MOVL R12, SI - LEAL -4(R12), R12 - CMPL SI, $0x08 - JLE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - CMPL SI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - CMPL R8, $0x00000800 - JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b + MOVL R11, BX + LEAL -4(R11), R11 + CMPL BX, $0x08 + JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b + CMPL BX, $0x0c + JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b + CMPL DI, $0x00000800 + JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - CMPL R12, $0x00000104 - JLT repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - CMPL R12, $0x00010100 - JLT repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - LEAL -65536(R12), R12 - MOVL R12, R8 + CMPL R11, $0x00000104 + JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b + CMPL R11, $0x00010100 + JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b + LEAL -65536(R11), R11 + MOVL R11, DI MOVW $0x001d, (AX) - MOVW R12, 2(AX) - SARL $0x10, R8 - MOVB R8, 4(AX) + MOVW R11, 2(AX) + SARL $0x10, DI + MOVB DI, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - LEAL -256(R12), R12 + LEAL -256(R11), R11 MOVW $0x0019, (AX) - MOVW R12, 2(AX) + MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - LEAL -4(R12), R12 + LEAL -4(R11), R11 MOVW $0x0015, (AX) - MOVB R12, 2(AX) + MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - SHLL $0x02, R12 - ORL $0x01, R12 - MOVW R12, (AX) + SHLL $0x02, R11 + ORL $0x01, R11 + MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R12*4), R12 - MOVB R8, 1(AX) - SARL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) + XORQ BX, BX + LEAL 1(BX)(R11*4), R11 + MOVB DI, 1(AX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, R11 + MOVB R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB long_offset_short_match_nolit_encodeBetterBlockAsm4MB: MOVB $0xee, (AX) - MOVW R8, 1(AX) - LEAL -60(R12), R12 + MOVW DI, 1(AX) + LEAL -60(R11), R11 ADDQ $0x03, AX // emitRepeat - MOVL R12, SI - LEAL -4(R12), R12 - CMPL SI, $0x08 - JLE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - CMPL SI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - CMPL R8, $0x00000800 - JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short + MOVL R11, BX + LEAL -4(R11), R11 + CMPL BX, $0x08 + JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short + CMPL BX, $0x0c + JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short + CMPL DI, $0x00000800 + JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - CMPL R12, $0x00000104 - JLT repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - CMPL R12, $0x00010100 - JLT repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - LEAL -65536(R12), R12 - MOVL R12, R8 + CMPL R11, $0x00000104 + JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short + CMPL R11, $0x00010100 + JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short + LEAL -65536(R11), R11 + MOVL R11, DI MOVW $0x001d, (AX) - MOVW R12, 2(AX) - SARL $0x10, R8 - MOVB R8, 4(AX) + MOVW R11, 2(AX) + SARL $0x10, DI + MOVB DI, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - LEAL -256(R12), R12 + LEAL -256(R11), R11 MOVW $0x0019, (AX) - MOVW R12, 2(AX) + MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - LEAL -4(R12), R12 + LEAL -4(R11), R11 MOVW $0x0015, (AX) - MOVB R12, 2(AX) + MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - SHLL $0x02, R12 - ORL $0x01, R12 - MOVW R12, (AX) + SHLL $0x02, R11 + ORL $0x01, R11 + MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R12*4), R12 - MOVB R8, 1(AX) - SARL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) + XORQ BX, BX + LEAL 1(BX)(R11*4), R11 + MOVB DI, 1(AX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, R11 + MOVB R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB - JMP two_byte_offset_match_nolit_encodeBetterBlockAsm4MB two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB: - CMPL R12, $0x0c - JGE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB - CMPL R8, $0x00000800 - JGE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB - MOVB $0x01, BL - LEAL -16(BX)(R12*4), R12 - MOVB R8, 1(AX) - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) + MOVL R11, BX + SHLL $0x02, BX + CMPL R11, $0x0c + JAE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB + CMPL DI, $0x00000800 + JAE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB + LEAL -15(BX), BX + MOVB DI, 1(AX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, BX + MOVB BL, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB emit_copy_three_match_nolit_encodeBetterBlockAsm4MB: - MOVB $0x02, BL - LEAL -4(BX)(R12*4), R12 - MOVB R12, (AX) - MOVW R8, 1(AX) + LEAL -2(BX), BX + MOVB BL, (AX) + MOVW DI, 1(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB match_is_repeat_encodeBetterBlockAsm4MB: - MOVL 12(SP), SI - CMPL SI, DI + MOVL 12(SP), BX + CMPL BX, SI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R10 - SUBL SI, R9 - LEAL -1(R9), SI - CMPL SI, $0x3c - JLT one_byte_match_emit_repeat_encodeBetterBlockAsm4MB - CMPL SI, $0x00000100 - JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB - CMPL SI, $0x00010000 - JLT three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB - MOVL SI, R11 - SHRL $0x10, R11 + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R9 + SUBL BX, R8 + LEAL -1(R8), BX + CMPL BX, $0x3c + JB one_byte_match_emit_repeat_encodeBetterBlockAsm4MB + CMPL BX, $0x00000100 + JB two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB + CMPL BX, $0x00010000 + JB three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB + MOVL BX, R10 + SHRL $0x10, R10 MOVB $0xf8, (AX) - MOVW SI, 1(AX) - MOVB R11, 3(AX) + MOVW BX, 1(AX) + MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB: MOVB $0xf4, (AX) - MOVW SI, 1(AX) + MOVW BX, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB: MOVB $0xf0, (AX) - MOVB SI, 1(AX) + MOVB BL, 1(AX) ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_match_emit_repeat_encodeBetterBlockAsm4MB + CMPL BX, $0x40 + JB memmove_match_emit_repeat_encodeBetterBlockAsm4MB JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB one_byte_match_emit_repeat_encodeBetterBlockAsm4MB: - SHLB $0x02, SI - MOVB SI, (AX) + SHLB $0x02, BL + MOVB BL, (AX) ADDQ $0x01, AX memmove_match_emit_repeat_encodeBetterBlockAsm4MB: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveShort - CMPQ R9, $0x04 - JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4 - CMPQ R9, $0x08 + CMPQ R8, $0x04 + JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4 + CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7 - CMPQ R9, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16 - CMPQ R9, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4: - MOVL (R10), R11 - MOVL R11, (AX) + MOVL (R9), R10 + MOVL R10, (AX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7: - MOVL (R10), R11 - MOVL -4(R10)(R9*1), R10 - MOVL R11, (AX) - MOVL R10, -4(AX)(R9*1) + MOVL (R9), R10 + MOVL -4(R9)(R8*1), R9 + MOVL R10, (AX) + MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16: - MOVQ (R10), R11 - MOVQ -8(R10)(R9*1), R10 - MOVQ R11, (AX) - MOVQ R10, -8(AX)(R9*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (AX) + MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32: - MOVOU (R10), X0 - MOVOU -16(R10)(R9*1), X1 + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) + MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64: - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB: - MOVQ SI, AX + MOVQ BX, AX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveLong - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 - MOVQ R9, R13 - SHRQ $0x05, R13 - MOVQ AX, R11 - ANDL $0x0000001f, R11 - MOVQ $0x00000040, R14 - SUBQ R11, R14 - DECQ R13 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R12 + SHRQ $0x05, R12 + MOVQ AX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R13 + SUBQ R10, R13 + DECQ R12 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(R10)(R14*1), R11 - LEAQ -32(AX)(R14*1), R15 + LEAQ -32(R9)(R13*1), R10 + LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back: - MOVOU (R11), X4 - MOVOU 16(R11), X5 - MOVOA X4, (R15) - MOVOA X5, 16(R15) - ADDQ $0x20, R15 - ADDQ $0x20, R11 + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) ADDQ $0x20, R14 - DECQ R13 + ADDQ $0x20, R10 + ADDQ $0x20, R13 + DECQ R12 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(R10)(R14*1), X4 - MOVOU -16(R10)(R14*1), X5 - MOVOA X4, -32(AX)(R14*1) - MOVOA X5, -16(AX)(R14*1) - ADDQ $0x20, R14 - CMPQ R9, R14 + MOVOU -32(R9)(R13*1), X4 + MOVOU -16(R9)(R13*1), X5 + MOVOA X4, -32(AX)(R13*1) + MOVOA X5, -16(AX)(R13*1) + ADDQ $0x20, R13 + CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ SI, AX + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ BX, AX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB: - ADDL R12, CX - ADDL $0x04, R12 + ADDL R11, CX + ADDL $0x04, R11 MOVL CX, 12(SP) // emitRepeat - MOVL R12, SI - LEAL -4(R12), R12 - CMPL SI, $0x08 - JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB - CMPL SI, $0x0c - JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB - CMPL R8, $0x00000800 - JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB + MOVL R11, BX + LEAL -4(R11), R11 + CMPL BX, $0x08 + JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB + CMPL BX, $0x0c + JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB + CMPL DI, $0x00000800 + JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB: - CMPL R12, $0x00000104 - JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB - CMPL R12, $0x00010100 - JLT repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB - LEAL -65536(R12), R12 - MOVL R12, R8 + CMPL R11, $0x00000104 + JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB + CMPL R11, $0x00010100 + JB repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB + LEAL -65536(R11), R11 + MOVL R11, DI MOVW $0x001d, (AX) - MOVW R12, 2(AX) - SARL $0x10, R8 - MOVB R8, 4(AX) + MOVW R11, 2(AX) + SARL $0x10, DI + MOVB DI, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB: - LEAL -256(R12), R12 + LEAL -256(R11), R11 MOVW $0x0019, (AX) - MOVW R12, 2(AX) + MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB: - LEAL -4(R12), R12 + LEAL -4(R11), R11 MOVW $0x0015, (AX) - MOVB R12, 2(AX) + MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB: - SHLL $0x02, R12 - ORL $0x01, R12 - MOVW R12, (AX) + SHLL $0x02, R11 + ORL $0x01, R11 + MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB: - XORQ SI, SI - LEAL 1(SI)(R12*4), R12 - MOVB R8, 1(AX) - SARL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) + XORQ BX, BX + LEAL 1(BX)(R11*4), R11 + MOVB DI, 1(AX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, R11 + MOVB R11, (AX) ADDQ $0x02, AX match_nolit_emitcopy_end_encodeBetterBlockAsm4MB: CMPL CX, 8(SP) - JGE emit_remainder_encodeBetterBlockAsm4MB + JAE emit_remainder_encodeBetterBlockAsm4MB CMPQ AX, (SP) - JL match_nolit_dst_ok_encodeBetterBlockAsm4MB + JB match_nolit_dst_ok_encodeBetterBlockAsm4MB MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm4MB: - MOVQ $0x00cf1bbcdcbfa563, SI - MOVQ $0x9e3779b1, R8 - LEAQ 1(DI), DI - LEAQ -2(CX), R9 - MOVQ (DX)(DI*1), R10 - MOVQ 1(DX)(DI*1), R11 - MOVQ (DX)(R9*1), R12 - MOVQ 1(DX)(R9*1), R13 - SHLQ $0x08, R10 - IMULQ SI, R10 - SHRQ $0x2f, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x32, R11 - SHLQ $0x08, R12 - IMULQ SI, R12 - SHRQ $0x2f, R12 - SHLQ $0x20, R13 - IMULQ R8, R13 - SHRQ $0x32, R13 - LEAQ 1(DI), R8 - LEAQ 1(R9), R14 - MOVL DI, 24(SP)(R10*4) - MOVL R9, 24(SP)(R12*4) - MOVL R8, 524312(SP)(R11*4) - MOVL R14, 524312(SP)(R13*4) - ADDQ $0x01, DI - SUBQ $0x01, R9 + MOVQ $0x00cf1bbcdcbfa563, BX + MOVQ $0x9e3779b1, DI + LEAQ 1(SI), SI + LEAQ -2(CX), R8 + MOVQ (DX)(SI*1), R9 + MOVQ 1(DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + MOVQ 1(DX)(R8*1), R12 + SHLQ $0x08, R9 + IMULQ BX, R9 + SHRQ $0x2f, R9 + SHLQ $0x20, R10 + IMULQ DI, R10 + SHRQ $0x32, R10 + SHLQ $0x08, R11 + IMULQ BX, R11 + SHRQ $0x2f, R11 + SHLQ $0x20, R12 + IMULQ DI, R12 + SHRQ $0x32, R12 + LEAQ 1(SI), DI + LEAQ 1(R8), R13 + MOVL SI, 24(SP)(R9*4) + MOVL R8, 24(SP)(R11*4) + MOVL DI, 524312(SP)(R10*4) + MOVL R13, 524312(SP)(R12*4) + ADDQ $0x01, SI + SUBQ $0x01, R8 index_loop_encodeBetterBlockAsm4MB: - CMPQ DI, R9 + CMPQ SI, R8 JAE search_loop_encodeBetterBlockAsm4MB - MOVQ (DX)(DI*1), R8 - MOVQ (DX)(R9*1), R10 - SHLQ $0x08, R8 - IMULQ SI, R8 - SHRQ $0x2f, R8 - SHLQ $0x08, R10 - IMULQ SI, R10 - SHRQ $0x2f, R10 - MOVL DI, 24(SP)(R8*4) - MOVL R9, 24(SP)(R10*4) - ADDQ $0x02, DI - SUBQ $0x02, R9 + MOVQ (DX)(SI*1), DI + MOVQ (DX)(R8*1), R9 + SHLQ $0x08, DI + IMULQ BX, DI + SHRQ $0x2f, DI + SHLQ $0x08, R9 + IMULQ BX, R9 + SHRQ $0x2f, R9 + MOVL SI, 24(SP)(DI*4) + MOVL R8, 24(SP)(R9*4) + ADDQ $0x02, SI + SUBQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm4MB emit_remainder_encodeBetterBlockAsm4MB: @@ -7665,7 +7645,7 @@ emit_remainder_encodeBetterBlockAsm4MB: SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) - JL emit_remainder_ok_encodeBetterBlockAsm4MB + JB emit_remainder_ok_encodeBetterBlockAsm4MB MOVQ $0x00000000, ret+48(FP) RET @@ -7680,11 +7660,11 @@ emit_remainder_ok_encodeBetterBlockAsm4MB: SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c - JLT one_byte_emit_remainder_encodeBetterBlockAsm4MB + JB one_byte_emit_remainder_encodeBetterBlockAsm4MB CMPL DX, $0x00000100 - JLT two_bytes_emit_remainder_encodeBetterBlockAsm4MB + JB two_bytes_emit_remainder_encodeBetterBlockAsm4MB CMPL DX, $0x00010000 - JLT three_bytes_emit_remainder_encodeBetterBlockAsm4MB + JB three_bytes_emit_remainder_encodeBetterBlockAsm4MB MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) @@ -7704,7 +7684,7 @@ two_bytes_emit_remainder_encodeBetterBlockAsm4MB: MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 - JL memmove_emit_remainder_encodeBetterBlockAsm4MB + JB memmove_emit_remainder_encodeBetterBlockAsm4MB JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB one_byte_emit_remainder_encodeBetterBlockAsm4MB: @@ -7851,8 +7831,8 @@ zero_loop_encodeBetterBlockAsm12B: MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -6(CX), DX - LEAQ -8(CX), SI - MOVL SI, 8(SP) + LEAQ -8(CX), BX + MOVL BX, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX @@ -7862,656 +7842,660 @@ zero_loop_encodeBetterBlockAsm12B: MOVQ src_base+24(FP), DX search_loop_encodeBetterBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - SHRL $0x06, SI - LEAL 1(CX)(SI*1), SI - CMPL SI, 8(SP) - JGE emit_remainder_encodeBetterBlockAsm12B - MOVQ (DX)(CX*1), DI - MOVL SI, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R9 - MOVQ $0x9e3779b1, SI - MOVQ DI, R10 - MOVQ DI, R11 - SHLQ $0x10, R10 - IMULQ R9, R10 - SHRQ $0x32, R10 - SHLQ $0x20, R11 - IMULQ SI, R11 - SHRQ $0x34, R11 - MOVL 24(SP)(R10*4), SI - MOVL 65560(SP)(R11*4), R8 - MOVL CX, 24(SP)(R10*4) - MOVL CX, 65560(SP)(R11*4) - MOVQ (DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - CMPQ R10, DI + MOVL CX, BX + SUBL 12(SP), BX + SHRL $0x06, BX + LEAL 1(CX)(BX*1), BX + CMPL BX, 8(SP) + JAE emit_remainder_encodeBetterBlockAsm12B + MOVQ (DX)(CX*1), SI + MOVL BX, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R8 + MOVQ $0x9e3779b1, BX + MOVQ SI, R9 + MOVQ SI, R10 + SHLQ $0x10, R9 + IMULQ R8, R9 + SHRQ $0x32, R9 + SHLQ $0x20, R10 + IMULQ BX, R10 + SHRQ $0x34, R10 + MOVL 24(SP)(R9*4), BX + MOVL 65560(SP)(R10*4), DI + MOVL CX, 24(SP)(R9*4) + MOVL CX, 65560(SP)(R10*4) + MOVQ (DX)(BX*1), R9 + MOVQ (DX)(DI*1), R10 + CMPQ R9, SI JEQ candidate_match_encodeBetterBlockAsm12B - CMPQ R11, DI + CMPQ R10, SI JNE no_short_found_encodeBetterBlockAsm12B - MOVL R8, SI + MOVL DI, BX JMP candidate_match_encodeBetterBlockAsm12B no_short_found_encodeBetterBlockAsm12B: - CMPL R10, DI + CMPL R9, SI JEQ candidate_match_encodeBetterBlockAsm12B - CMPL R11, DI + CMPL R10, SI JEQ candidateS_match_encodeBetterBlockAsm12B MOVL 20(SP), CX JMP search_loop_encodeBetterBlockAsm12B candidateS_match_encodeBetterBlockAsm12B: - SHRQ $0x08, DI - MOVQ DI, R10 - SHLQ $0x10, R10 - IMULQ R9, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R10*4), SI + SHRQ $0x08, SI + MOVQ SI, R9 + SHLQ $0x10, R9 + IMULQ R8, R9 + SHRQ $0x32, R9 + MOVL 24(SP)(R9*4), BX INCL CX - MOVL CX, 24(SP)(R10*4) - CMPL (DX)(SI*1), DI + MOVL CX, 24(SP)(R9*4) + CMPL (DX)(BX*1), SI JEQ candidate_match_encodeBetterBlockAsm12B DECL CX - MOVL R8, SI + MOVL DI, BX candidate_match_encodeBetterBlockAsm12B: - MOVL 12(SP), DI - TESTL SI, SI + MOVL 12(SP), SI + TESTL BX, BX JZ match_extend_back_end_encodeBetterBlockAsm12B match_extend_back_loop_encodeBetterBlockAsm12B: - CMPL CX, DI - JLE match_extend_back_end_encodeBetterBlockAsm12B - MOVB -1(DX)(SI*1), BL + CMPL CX, SI + JBE match_extend_back_end_encodeBetterBlockAsm12B + MOVB -1(DX)(BX*1), DI MOVB -1(DX)(CX*1), R8 - CMPB BL, R8 + CMPB DI, R8 JNE match_extend_back_end_encodeBetterBlockAsm12B LEAL -1(CX), CX - DECL SI + DECL BX JZ match_extend_back_end_encodeBetterBlockAsm12B JMP match_extend_back_loop_encodeBetterBlockAsm12B match_extend_back_end_encodeBetterBlockAsm12B: - MOVL CX, DI - SUBL 12(SP), DI - LEAQ 3(AX)(DI*1), DI - CMPQ DI, (SP) - JL match_dst_size_check_encodeBetterBlockAsm12B + MOVL CX, SI + SUBL 12(SP), SI + LEAQ 3(AX)(SI*1), SI + CMPQ SI, (SP) + JB match_dst_size_check_encodeBetterBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBetterBlockAsm12B: - MOVL CX, DI + MOVL CX, SI ADDL $0x04, CX - ADDL $0x04, SI - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(SI*1), R10 + ADDL $0x04, BX + MOVQ src_len+32(FP), DI + SUBL CX, DI + LEAQ (DX)(CX*1), R8 + LEAQ (DX)(BX*1), R9 // matchLen - XORL R12, R12 - CMPL R8, $0x08 - JL matchlen_match4_match_nolit_encodeBetterBlockAsm12B + XORL R11, R11 + CMPL DI, $0x08 + JB matchlen_match4_match_nolit_encodeBetterBlockAsm12B matchlen_loopback_match_nolit_encodeBetterBlockAsm12B: - MOVQ (R9)(R12*1), R11 - XORQ (R10)(R12*1), R11 - TESTQ R11, R11 + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + TESTQ R10, R10 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm12B #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R10, R10 #else - BSFQ R11, R11 + BSFQ R10, R10 #endif - SARQ $0x03, R11 - LEAL (R12)(R11*1), R12 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeBetterBlockAsm12B matchlen_loop_match_nolit_encodeBetterBlockAsm12B: - LEAL -8(R8), R8 - LEAL 8(R12), R12 - CMPL R8, $0x08 - JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm12B + LEAL -8(DI), DI + LEAL 8(R11), R11 + CMPL DI, $0x08 + JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm12B JZ match_nolit_end_encodeBetterBlockAsm12B matchlen_match4_match_nolit_encodeBetterBlockAsm12B: - CMPL R8, $0x04 - JL matchlen_match2_match_nolit_encodeBetterBlockAsm12B - MOVL (R9)(R12*1), R11 - CMPL (R10)(R12*1), R11 + CMPL DI, $0x04 + JB matchlen_match2_match_nolit_encodeBetterBlockAsm12B + MOVL (R8)(R11*1), R10 + CMPL (R9)(R11*1), R10 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm12B - SUBL $0x04, R8 - LEAL 4(R12), R12 + SUBL $0x04, DI + LEAL 4(R11), R11 matchlen_match2_match_nolit_encodeBetterBlockAsm12B: - CMPL R8, $0x02 - JL matchlen_match1_match_nolit_encodeBetterBlockAsm12B - MOVW (R9)(R12*1), R11 - CMPW (R10)(R12*1), R11 + CMPL DI, $0x02 + JB matchlen_match1_match_nolit_encodeBetterBlockAsm12B + MOVW (R8)(R11*1), R10 + CMPW (R9)(R11*1), R10 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm12B - SUBL $0x02, R8 - LEAL 2(R12), R12 + SUBL $0x02, DI + LEAL 2(R11), R11 matchlen_match1_match_nolit_encodeBetterBlockAsm12B: - CMPL R8, $0x01 - JL match_nolit_end_encodeBetterBlockAsm12B - MOVB (R9)(R12*1), R11 - CMPB (R10)(R12*1), R11 + CMPL DI, $0x01 + JB match_nolit_end_encodeBetterBlockAsm12B + MOVB (R8)(R11*1), R10 + CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeBetterBlockAsm12B - LEAL 1(R12), R12 + LEAL 1(R11), R11 match_nolit_end_encodeBetterBlockAsm12B: - MOVL CX, R8 - SUBL SI, R8 + MOVL CX, DI + SUBL BX, DI // Check if repeat - CMPL 16(SP), R8 + CMPL 16(SP), DI JEQ match_is_repeat_encodeBetterBlockAsm12B - MOVL R8, 16(SP) - MOVL 12(SP), SI - CMPL SI, DI + MOVL DI, 16(SP) + MOVL 12(SP), BX + CMPL BX, SI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm12B - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R10 - SUBL SI, R9 - LEAL -1(R9), SI - CMPL SI, $0x3c - JLT one_byte_match_emit_encodeBetterBlockAsm12B - CMPL SI, $0x00000100 - JLT two_bytes_match_emit_encodeBetterBlockAsm12B + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R9 + SUBL BX, R8 + LEAL -1(R8), BX + CMPL BX, $0x3c + JB one_byte_match_emit_encodeBetterBlockAsm12B + CMPL BX, $0x00000100 + JB two_bytes_match_emit_encodeBetterBlockAsm12B + JB three_bytes_match_emit_encodeBetterBlockAsm12B + +three_bytes_match_emit_encodeBetterBlockAsm12B: MOVB $0xf4, (AX) - MOVW SI, 1(AX) + MOVW BX, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBetterBlockAsm12B two_bytes_match_emit_encodeBetterBlockAsm12B: MOVB $0xf0, (AX) - MOVB SI, 1(AX) + MOVB BL, 1(AX) ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_match_emit_encodeBetterBlockAsm12B + CMPL BX, $0x40 + JB memmove_match_emit_encodeBetterBlockAsm12B JMP memmove_long_match_emit_encodeBetterBlockAsm12B one_byte_match_emit_encodeBetterBlockAsm12B: - SHLB $0x02, SI - MOVB SI, (AX) + SHLB $0x02, BL + MOVB BL, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBetterBlockAsm12B: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveShort - CMPQ R9, $0x04 - JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4 - CMPQ R9, $0x08 + CMPQ R8, $0x04 + JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4 + CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7 - CMPQ R9, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16 - CMPQ R9, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4: - MOVL (R10), R11 - MOVL R11, (AX) + MOVL (R9), R10 + MOVL R10, (AX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7: - MOVL (R10), R11 - MOVL -4(R10)(R9*1), R10 - MOVL R11, (AX) - MOVL R10, -4(AX)(R9*1) + MOVL (R9), R10 + MOVL -4(R9)(R8*1), R9 + MOVL R10, (AX) + MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16: - MOVQ (R10), R11 - MOVQ -8(R10)(R9*1), R10 - MOVQ R11, (AX) - MOVQ R10, -8(AX)(R9*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (AX) + MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32: - MOVOU (R10), X0 - MOVOU -16(R10)(R9*1), X1 + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) + MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64: - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeBetterBlockAsm12B: - MOVQ SI, AX + MOVQ BX, AX JMP emit_literal_done_match_emit_encodeBetterBlockAsm12B memmove_long_match_emit_encodeBetterBlockAsm12B: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveLong - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 - MOVQ R9, R13 - SHRQ $0x05, R13 - MOVQ AX, R11 - ANDL $0x0000001f, R11 - MOVQ $0x00000040, R14 - SUBQ R11, R14 - DECQ R13 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R12 + SHRQ $0x05, R12 + MOVQ AX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R13 + SUBQ R10, R13 + DECQ R12 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R10)(R14*1), R11 - LEAQ -32(AX)(R14*1), R15 + LEAQ -32(R9)(R13*1), R10 + LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back: - MOVOU (R11), X4 - MOVOU 16(R11), X5 - MOVOA X4, (R15) - MOVOA X5, 16(R15) - ADDQ $0x20, R15 - ADDQ $0x20, R11 + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) ADDQ $0x20, R14 - DECQ R13 + ADDQ $0x20, R10 + ADDQ $0x20, R13 + DECQ R12 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R10)(R14*1), X4 - MOVOU -16(R10)(R14*1), X5 - MOVOA X4, -32(AX)(R14*1) - MOVOA X5, -16(AX)(R14*1) - ADDQ $0x20, R14 - CMPQ R9, R14 + MOVOU -32(R9)(R13*1), X4 + MOVOU -16(R9)(R13*1), X5 + MOVOA X4, -32(AX)(R13*1) + MOVOA X5, -16(AX)(R13*1) + ADDQ $0x20, R13 + CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ SI, AX + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ BX, AX emit_literal_done_match_emit_encodeBetterBlockAsm12B: - ADDL R12, CX - ADDL $0x04, R12 + ADDL R11, CX + ADDL $0x04, R11 MOVL CX, 12(SP) // emitCopy -two_byte_offset_match_nolit_encodeBetterBlockAsm12B: - CMPL R12, $0x40 - JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B - CMPL R8, $0x00000800 + CMPL R11, $0x40 + JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B + CMPL DI, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm12B - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB R8, 1(AX) - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, SI - MOVB SI, (AX) + MOVL $0x00000001, BX + LEAL 16(BX), BX + MOVB DI, 1(AX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, BX + MOVB BL, (AX) ADDQ $0x02, AX - SUBL $0x08, R12 + SUBL $0x08, R11 // emitRepeat - LEAL -4(R12), R12 + LEAL -4(R11), R11 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - MOVL R12, SI - LEAL -4(R12), R12 - CMPL SI, $0x08 - JLE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - CMPL SI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - CMPL R8, $0x00000800 - JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b + MOVL R11, BX + LEAL -4(R11), R11 + CMPL BX, $0x08 + JBE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b + CMPL BX, $0x0c + JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b + CMPL DI, $0x00000800 + JB repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - CMPL R12, $0x00000104 - JLT repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - LEAL -256(R12), R12 + CMPL R11, $0x00000104 + JB repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b + LEAL -256(R11), R11 MOVW $0x0019, (AX) - MOVW R12, 2(AX) + MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - LEAL -4(R12), R12 + LEAL -4(R11), R11 MOVW $0x0015, (AX) - MOVB R12, 2(AX) + MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - SHLL $0x02, R12 - ORL $0x01, R12 - MOVW R12, (AX) + SHLL $0x02, R11 + ORL $0x01, R11 + MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R12*4), R12 - MOVB R8, 1(AX) - SARL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) + XORQ BX, BX + LEAL 1(BX)(R11*4), R11 + MOVB DI, 1(AX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, R11 + MOVB R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B long_offset_short_match_nolit_encodeBetterBlockAsm12B: MOVB $0xee, (AX) - MOVW R8, 1(AX) - LEAL -60(R12), R12 + MOVW DI, 1(AX) + LEAL -60(R11), R11 ADDQ $0x03, AX // emitRepeat - MOVL R12, SI - LEAL -4(R12), R12 - CMPL SI, $0x08 - JLE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short - CMPL SI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short - CMPL R8, $0x00000800 - JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short + MOVL R11, BX + LEAL -4(R11), R11 + CMPL BX, $0x08 + JBE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short + CMPL BX, $0x0c + JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short + CMPL DI, $0x00000800 + JB repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - CMPL R12, $0x00000104 - JLT repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short - LEAL -256(R12), R12 + CMPL R11, $0x00000104 + JB repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short + LEAL -256(R11), R11 MOVW $0x0019, (AX) - MOVW R12, 2(AX) + MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - LEAL -4(R12), R12 + LEAL -4(R11), R11 MOVW $0x0015, (AX) - MOVB R12, 2(AX) + MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - SHLL $0x02, R12 - ORL $0x01, R12 - MOVW R12, (AX) + SHLL $0x02, R11 + ORL $0x01, R11 + MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R12*4), R12 - MOVB R8, 1(AX) - SARL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) + XORQ BX, BX + LEAL 1(BX)(R11*4), R11 + MOVB DI, 1(AX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, R11 + MOVB R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B - JMP two_byte_offset_match_nolit_encodeBetterBlockAsm12B two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B: - CMPL R12, $0x0c - JGE emit_copy_three_match_nolit_encodeBetterBlockAsm12B - CMPL R8, $0x00000800 - JGE emit_copy_three_match_nolit_encodeBetterBlockAsm12B - MOVB $0x01, BL - LEAL -16(BX)(R12*4), R12 - MOVB R8, 1(AX) - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) + MOVL R11, BX + SHLL $0x02, BX + CMPL R11, $0x0c + JAE emit_copy_three_match_nolit_encodeBetterBlockAsm12B + CMPL DI, $0x00000800 + JAE emit_copy_three_match_nolit_encodeBetterBlockAsm12B + LEAL -15(BX), BX + MOVB DI, 1(AX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, BX + MOVB BL, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B emit_copy_three_match_nolit_encodeBetterBlockAsm12B: - MOVB $0x02, BL - LEAL -4(BX)(R12*4), R12 - MOVB R12, (AX) - MOVW R8, 1(AX) + LEAL -2(BX), BX + MOVB BL, (AX) + MOVW DI, 1(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B match_is_repeat_encodeBetterBlockAsm12B: - MOVL 12(SP), SI - CMPL SI, DI + MOVL 12(SP), BX + CMPL BX, SI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R10 - SUBL SI, R9 - LEAL -1(R9), SI - CMPL SI, $0x3c - JLT one_byte_match_emit_repeat_encodeBetterBlockAsm12B - CMPL SI, $0x00000100 - JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm12B + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R9 + SUBL BX, R8 + LEAL -1(R8), BX + CMPL BX, $0x3c + JB one_byte_match_emit_repeat_encodeBetterBlockAsm12B + CMPL BX, $0x00000100 + JB two_bytes_match_emit_repeat_encodeBetterBlockAsm12B + JB three_bytes_match_emit_repeat_encodeBetterBlockAsm12B + +three_bytes_match_emit_repeat_encodeBetterBlockAsm12B: MOVB $0xf4, (AX) - MOVW SI, 1(AX) + MOVW BX, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B two_bytes_match_emit_repeat_encodeBetterBlockAsm12B: MOVB $0xf0, (AX) - MOVB SI, 1(AX) + MOVB BL, 1(AX) ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_match_emit_repeat_encodeBetterBlockAsm12B + CMPL BX, $0x40 + JB memmove_match_emit_repeat_encodeBetterBlockAsm12B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B one_byte_match_emit_repeat_encodeBetterBlockAsm12B: - SHLB $0x02, SI - MOVB SI, (AX) + SHLB $0x02, BL + MOVB BL, (AX) ADDQ $0x01, AX memmove_match_emit_repeat_encodeBetterBlockAsm12B: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveShort - CMPQ R9, $0x04 - JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4 - CMPQ R9, $0x08 + CMPQ R8, $0x04 + JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4 + CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7 - CMPQ R9, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16 - CMPQ R9, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4: - MOVL (R10), R11 - MOVL R11, (AX) + MOVL (R9), R10 + MOVL R10, (AX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7: - MOVL (R10), R11 - MOVL -4(R10)(R9*1), R10 - MOVL R11, (AX) - MOVL R10, -4(AX)(R9*1) + MOVL (R9), R10 + MOVL -4(R9)(R8*1), R9 + MOVL R10, (AX) + MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16: - MOVQ (R10), R11 - MOVQ -8(R10)(R9*1), R10 - MOVQ R11, (AX) - MOVQ R10, -8(AX)(R9*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (AX) + MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32: - MOVOU (R10), X0 - MOVOU -16(R10)(R9*1), X1 + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) + MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64: - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B: - MOVQ SI, AX + MOVQ BX, AX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B memmove_long_match_emit_repeat_encodeBetterBlockAsm12B: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveLong - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 - MOVQ R9, R13 - SHRQ $0x05, R13 - MOVQ AX, R11 - ANDL $0x0000001f, R11 - MOVQ $0x00000040, R14 - SUBQ R11, R14 - DECQ R13 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R12 + SHRQ $0x05, R12 + MOVQ AX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R13 + SUBQ R10, R13 + DECQ R12 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R10)(R14*1), R11 - LEAQ -32(AX)(R14*1), R15 + LEAQ -32(R9)(R13*1), R10 + LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back: - MOVOU (R11), X4 - MOVOU 16(R11), X5 - MOVOA X4, (R15) - MOVOA X5, 16(R15) - ADDQ $0x20, R15 - ADDQ $0x20, R11 + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) ADDQ $0x20, R14 - DECQ R13 + ADDQ $0x20, R10 + ADDQ $0x20, R13 + DECQ R12 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R10)(R14*1), X4 - MOVOU -16(R10)(R14*1), X5 - MOVOA X4, -32(AX)(R14*1) - MOVOA X5, -16(AX)(R14*1) - ADDQ $0x20, R14 - CMPQ R9, R14 + MOVOU -32(R9)(R13*1), X4 + MOVOU -16(R9)(R13*1), X5 + MOVOA X4, -32(AX)(R13*1) + MOVOA X5, -16(AX)(R13*1) + ADDQ $0x20, R13 + CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ SI, AX + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ BX, AX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B: - ADDL R12, CX - ADDL $0x04, R12 + ADDL R11, CX + ADDL $0x04, R11 MOVL CX, 12(SP) // emitRepeat - MOVL R12, SI - LEAL -4(R12), R12 - CMPL SI, $0x08 - JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B - CMPL SI, $0x0c - JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B - CMPL R8, $0x00000800 - JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B + MOVL R11, BX + LEAL -4(R11), R11 + CMPL BX, $0x08 + JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B + CMPL BX, $0x0c + JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B + CMPL DI, $0x00000800 + JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B: - CMPL R12, $0x00000104 - JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B - LEAL -256(R12), R12 + CMPL R11, $0x00000104 + JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B + LEAL -256(R11), R11 MOVW $0x0019, (AX) - MOVW R12, 2(AX) + MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B: - LEAL -4(R12), R12 + LEAL -4(R11), R11 MOVW $0x0015, (AX) - MOVB R12, 2(AX) + MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B: - SHLL $0x02, R12 - ORL $0x01, R12 - MOVW R12, (AX) + SHLL $0x02, R11 + ORL $0x01, R11 + MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B: - XORQ SI, SI - LEAL 1(SI)(R12*4), R12 - MOVB R8, 1(AX) - SARL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) + XORQ BX, BX + LEAL 1(BX)(R11*4), R11 + MOVB DI, 1(AX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, R11 + MOVB R11, (AX) ADDQ $0x02, AX match_nolit_emitcopy_end_encodeBetterBlockAsm12B: CMPL CX, 8(SP) - JGE emit_remainder_encodeBetterBlockAsm12B + JAE emit_remainder_encodeBetterBlockAsm12B CMPQ AX, (SP) - JL match_nolit_dst_ok_encodeBetterBlockAsm12B + JB match_nolit_dst_ok_encodeBetterBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm12B: - MOVQ $0x0000cf1bbcdcbf9b, SI - MOVQ $0x9e3779b1, R8 - LEAQ 1(DI), DI - LEAQ -2(CX), R9 - MOVQ (DX)(DI*1), R10 - MOVQ 1(DX)(DI*1), R11 - MOVQ (DX)(R9*1), R12 - MOVQ 1(DX)(R9*1), R13 - SHLQ $0x10, R10 - IMULQ SI, R10 - SHRQ $0x32, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x34, R11 - SHLQ $0x10, R12 - IMULQ SI, R12 - SHRQ $0x32, R12 - SHLQ $0x20, R13 - IMULQ R8, R13 - SHRQ $0x34, R13 - LEAQ 1(DI), R8 - LEAQ 1(R9), R14 - MOVL DI, 24(SP)(R10*4) - MOVL R9, 24(SP)(R12*4) - MOVL R8, 65560(SP)(R11*4) - MOVL R14, 65560(SP)(R13*4) - ADDQ $0x01, DI - SUBQ $0x01, R9 + MOVQ $0x0000cf1bbcdcbf9b, BX + MOVQ $0x9e3779b1, DI + LEAQ 1(SI), SI + LEAQ -2(CX), R8 + MOVQ (DX)(SI*1), R9 + MOVQ 1(DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + MOVQ 1(DX)(R8*1), R12 + SHLQ $0x10, R9 + IMULQ BX, R9 + SHRQ $0x32, R9 + SHLQ $0x20, R10 + IMULQ DI, R10 + SHRQ $0x34, R10 + SHLQ $0x10, R11 + IMULQ BX, R11 + SHRQ $0x32, R11 + SHLQ $0x20, R12 + IMULQ DI, R12 + SHRQ $0x34, R12 + LEAQ 1(SI), DI + LEAQ 1(R8), R13 + MOVL SI, 24(SP)(R9*4) + MOVL R8, 24(SP)(R11*4) + MOVL DI, 65560(SP)(R10*4) + MOVL R13, 65560(SP)(R12*4) + ADDQ $0x01, SI + SUBQ $0x01, R8 index_loop_encodeBetterBlockAsm12B: - CMPQ DI, R9 + CMPQ SI, R8 JAE search_loop_encodeBetterBlockAsm12B - MOVQ (DX)(DI*1), R8 - MOVQ (DX)(R9*1), R10 - SHLQ $0x10, R8 - IMULQ SI, R8 - SHRQ $0x32, R8 - SHLQ $0x10, R10 - IMULQ SI, R10 - SHRQ $0x32, R10 - MOVL DI, 24(SP)(R8*4) - MOVL R9, 24(SP)(R10*4) - ADDQ $0x02, DI - SUBQ $0x02, R9 + MOVQ (DX)(SI*1), DI + MOVQ (DX)(R8*1), R9 + SHLQ $0x10, DI + IMULQ BX, DI + SHRQ $0x32, DI + SHLQ $0x10, R9 + IMULQ BX, R9 + SHRQ $0x32, R9 + MOVL SI, 24(SP)(DI*4) + MOVL R8, 24(SP)(R9*4) + ADDQ $0x02, SI + SUBQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm12B emit_remainder_encodeBetterBlockAsm12B: @@ -8519,7 +8503,7 @@ emit_remainder_encodeBetterBlockAsm12B: SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) - JL emit_remainder_ok_encodeBetterBlockAsm12B + JB emit_remainder_ok_encodeBetterBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET @@ -8534,9 +8518,12 @@ emit_remainder_ok_encodeBetterBlockAsm12B: SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c - JLT one_byte_emit_remainder_encodeBetterBlockAsm12B + JB one_byte_emit_remainder_encodeBetterBlockAsm12B CMPL DX, $0x00000100 - JLT two_bytes_emit_remainder_encodeBetterBlockAsm12B + JB two_bytes_emit_remainder_encodeBetterBlockAsm12B + JB three_bytes_emit_remainder_encodeBetterBlockAsm12B + +three_bytes_emit_remainder_encodeBetterBlockAsm12B: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX @@ -8547,7 +8534,7 @@ two_bytes_emit_remainder_encodeBetterBlockAsm12B: MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 - JL memmove_emit_remainder_encodeBetterBlockAsm12B + JB memmove_emit_remainder_encodeBetterBlockAsm12B JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B one_byte_emit_remainder_encodeBetterBlockAsm12B: @@ -8694,8 +8681,8 @@ zero_loop_encodeBetterBlockAsm10B: MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -6(CX), DX - LEAQ -8(CX), SI - MOVL SI, 8(SP) + LEAQ -8(CX), BX + MOVL BX, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX @@ -8705,656 +8692,660 @@ zero_loop_encodeBetterBlockAsm10B: MOVQ src_base+24(FP), DX search_loop_encodeBetterBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - SHRL $0x05, SI - LEAL 1(CX)(SI*1), SI - CMPL SI, 8(SP) - JGE emit_remainder_encodeBetterBlockAsm10B - MOVQ (DX)(CX*1), DI - MOVL SI, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R9 - MOVQ $0x9e3779b1, SI - MOVQ DI, R10 - MOVQ DI, R11 - SHLQ $0x10, R10 - IMULQ R9, R10 - SHRQ $0x34, R10 - SHLQ $0x20, R11 - IMULQ SI, R11 - SHRQ $0x36, R11 - MOVL 24(SP)(R10*4), SI - MOVL 16408(SP)(R11*4), R8 - MOVL CX, 24(SP)(R10*4) - MOVL CX, 16408(SP)(R11*4) - MOVQ (DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - CMPQ R10, DI + MOVL CX, BX + SUBL 12(SP), BX + SHRL $0x05, BX + LEAL 1(CX)(BX*1), BX + CMPL BX, 8(SP) + JAE emit_remainder_encodeBetterBlockAsm10B + MOVQ (DX)(CX*1), SI + MOVL BX, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R8 + MOVQ $0x9e3779b1, BX + MOVQ SI, R9 + MOVQ SI, R10 + SHLQ $0x10, R9 + IMULQ R8, R9 + SHRQ $0x34, R9 + SHLQ $0x20, R10 + IMULQ BX, R10 + SHRQ $0x36, R10 + MOVL 24(SP)(R9*4), BX + MOVL 16408(SP)(R10*4), DI + MOVL CX, 24(SP)(R9*4) + MOVL CX, 16408(SP)(R10*4) + MOVQ (DX)(BX*1), R9 + MOVQ (DX)(DI*1), R10 + CMPQ R9, SI JEQ candidate_match_encodeBetterBlockAsm10B - CMPQ R11, DI + CMPQ R10, SI JNE no_short_found_encodeBetterBlockAsm10B - MOVL R8, SI + MOVL DI, BX JMP candidate_match_encodeBetterBlockAsm10B no_short_found_encodeBetterBlockAsm10B: - CMPL R10, DI + CMPL R9, SI JEQ candidate_match_encodeBetterBlockAsm10B - CMPL R11, DI + CMPL R10, SI JEQ candidateS_match_encodeBetterBlockAsm10B MOVL 20(SP), CX JMP search_loop_encodeBetterBlockAsm10B candidateS_match_encodeBetterBlockAsm10B: - SHRQ $0x08, DI - MOVQ DI, R10 - SHLQ $0x10, R10 - IMULQ R9, R10 - SHRQ $0x34, R10 - MOVL 24(SP)(R10*4), SI + SHRQ $0x08, SI + MOVQ SI, R9 + SHLQ $0x10, R9 + IMULQ R8, R9 + SHRQ $0x34, R9 + MOVL 24(SP)(R9*4), BX INCL CX - MOVL CX, 24(SP)(R10*4) - CMPL (DX)(SI*1), DI + MOVL CX, 24(SP)(R9*4) + CMPL (DX)(BX*1), SI JEQ candidate_match_encodeBetterBlockAsm10B DECL CX - MOVL R8, SI + MOVL DI, BX candidate_match_encodeBetterBlockAsm10B: - MOVL 12(SP), DI - TESTL SI, SI + MOVL 12(SP), SI + TESTL BX, BX JZ match_extend_back_end_encodeBetterBlockAsm10B match_extend_back_loop_encodeBetterBlockAsm10B: - CMPL CX, DI - JLE match_extend_back_end_encodeBetterBlockAsm10B - MOVB -1(DX)(SI*1), BL + CMPL CX, SI + JBE match_extend_back_end_encodeBetterBlockAsm10B + MOVB -1(DX)(BX*1), DI MOVB -1(DX)(CX*1), R8 - CMPB BL, R8 + CMPB DI, R8 JNE match_extend_back_end_encodeBetterBlockAsm10B LEAL -1(CX), CX - DECL SI + DECL BX JZ match_extend_back_end_encodeBetterBlockAsm10B JMP match_extend_back_loop_encodeBetterBlockAsm10B match_extend_back_end_encodeBetterBlockAsm10B: - MOVL CX, DI - SUBL 12(SP), DI - LEAQ 3(AX)(DI*1), DI - CMPQ DI, (SP) - JL match_dst_size_check_encodeBetterBlockAsm10B + MOVL CX, SI + SUBL 12(SP), SI + LEAQ 3(AX)(SI*1), SI + CMPQ SI, (SP) + JB match_dst_size_check_encodeBetterBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBetterBlockAsm10B: - MOVL CX, DI + MOVL CX, SI ADDL $0x04, CX - ADDL $0x04, SI - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(SI*1), R10 + ADDL $0x04, BX + MOVQ src_len+32(FP), DI + SUBL CX, DI + LEAQ (DX)(CX*1), R8 + LEAQ (DX)(BX*1), R9 // matchLen - XORL R12, R12 - CMPL R8, $0x08 - JL matchlen_match4_match_nolit_encodeBetterBlockAsm10B + XORL R11, R11 + CMPL DI, $0x08 + JB matchlen_match4_match_nolit_encodeBetterBlockAsm10B matchlen_loopback_match_nolit_encodeBetterBlockAsm10B: - MOVQ (R9)(R12*1), R11 - XORQ (R10)(R12*1), R11 - TESTQ R11, R11 + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + TESTQ R10, R10 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm10B #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R10, R10 #else - BSFQ R11, R11 + BSFQ R10, R10 #endif - SARQ $0x03, R11 - LEAL (R12)(R11*1), R12 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeBetterBlockAsm10B matchlen_loop_match_nolit_encodeBetterBlockAsm10B: - LEAL -8(R8), R8 - LEAL 8(R12), R12 - CMPL R8, $0x08 - JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm10B + LEAL -8(DI), DI + LEAL 8(R11), R11 + CMPL DI, $0x08 + JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm10B JZ match_nolit_end_encodeBetterBlockAsm10B matchlen_match4_match_nolit_encodeBetterBlockAsm10B: - CMPL R8, $0x04 - JL matchlen_match2_match_nolit_encodeBetterBlockAsm10B - MOVL (R9)(R12*1), R11 - CMPL (R10)(R12*1), R11 + CMPL DI, $0x04 + JB matchlen_match2_match_nolit_encodeBetterBlockAsm10B + MOVL (R8)(R11*1), R10 + CMPL (R9)(R11*1), R10 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm10B - SUBL $0x04, R8 - LEAL 4(R12), R12 + SUBL $0x04, DI + LEAL 4(R11), R11 matchlen_match2_match_nolit_encodeBetterBlockAsm10B: - CMPL R8, $0x02 - JL matchlen_match1_match_nolit_encodeBetterBlockAsm10B - MOVW (R9)(R12*1), R11 - CMPW (R10)(R12*1), R11 + CMPL DI, $0x02 + JB matchlen_match1_match_nolit_encodeBetterBlockAsm10B + MOVW (R8)(R11*1), R10 + CMPW (R9)(R11*1), R10 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm10B - SUBL $0x02, R8 - LEAL 2(R12), R12 + SUBL $0x02, DI + LEAL 2(R11), R11 matchlen_match1_match_nolit_encodeBetterBlockAsm10B: - CMPL R8, $0x01 - JL match_nolit_end_encodeBetterBlockAsm10B - MOVB (R9)(R12*1), R11 - CMPB (R10)(R12*1), R11 + CMPL DI, $0x01 + JB match_nolit_end_encodeBetterBlockAsm10B + MOVB (R8)(R11*1), R10 + CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeBetterBlockAsm10B - LEAL 1(R12), R12 + LEAL 1(R11), R11 match_nolit_end_encodeBetterBlockAsm10B: - MOVL CX, R8 - SUBL SI, R8 + MOVL CX, DI + SUBL BX, DI // Check if repeat - CMPL 16(SP), R8 + CMPL 16(SP), DI JEQ match_is_repeat_encodeBetterBlockAsm10B - MOVL R8, 16(SP) - MOVL 12(SP), SI - CMPL SI, DI + MOVL DI, 16(SP) + MOVL 12(SP), BX + CMPL BX, SI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm10B - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R10 - SUBL SI, R9 - LEAL -1(R9), SI - CMPL SI, $0x3c - JLT one_byte_match_emit_encodeBetterBlockAsm10B - CMPL SI, $0x00000100 - JLT two_bytes_match_emit_encodeBetterBlockAsm10B + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R9 + SUBL BX, R8 + LEAL -1(R8), BX + CMPL BX, $0x3c + JB one_byte_match_emit_encodeBetterBlockAsm10B + CMPL BX, $0x00000100 + JB two_bytes_match_emit_encodeBetterBlockAsm10B + JB three_bytes_match_emit_encodeBetterBlockAsm10B + +three_bytes_match_emit_encodeBetterBlockAsm10B: MOVB $0xf4, (AX) - MOVW SI, 1(AX) + MOVW BX, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBetterBlockAsm10B two_bytes_match_emit_encodeBetterBlockAsm10B: MOVB $0xf0, (AX) - MOVB SI, 1(AX) + MOVB BL, 1(AX) ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_match_emit_encodeBetterBlockAsm10B + CMPL BX, $0x40 + JB memmove_match_emit_encodeBetterBlockAsm10B JMP memmove_long_match_emit_encodeBetterBlockAsm10B one_byte_match_emit_encodeBetterBlockAsm10B: - SHLB $0x02, SI - MOVB SI, (AX) + SHLB $0x02, BL + MOVB BL, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBetterBlockAsm10B: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveShort - CMPQ R9, $0x04 - JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4 - CMPQ R9, $0x08 + CMPQ R8, $0x04 + JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4 + CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7 - CMPQ R9, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16 - CMPQ R9, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4: - MOVL (R10), R11 - MOVL R11, (AX) + MOVL (R9), R10 + MOVL R10, (AX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7: - MOVL (R10), R11 - MOVL -4(R10)(R9*1), R10 - MOVL R11, (AX) - MOVL R10, -4(AX)(R9*1) + MOVL (R9), R10 + MOVL -4(R9)(R8*1), R9 + MOVL R10, (AX) + MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16: - MOVQ (R10), R11 - MOVQ -8(R10)(R9*1), R10 - MOVQ R11, (AX) - MOVQ R10, -8(AX)(R9*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (AX) + MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32: - MOVOU (R10), X0 - MOVOU -16(R10)(R9*1), X1 + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) + MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64: - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeBetterBlockAsm10B: - MOVQ SI, AX + MOVQ BX, AX JMP emit_literal_done_match_emit_encodeBetterBlockAsm10B memmove_long_match_emit_encodeBetterBlockAsm10B: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveLong - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 - MOVQ R9, R13 - SHRQ $0x05, R13 - MOVQ AX, R11 - ANDL $0x0000001f, R11 - MOVQ $0x00000040, R14 - SUBQ R11, R14 - DECQ R13 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R12 + SHRQ $0x05, R12 + MOVQ AX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R13 + SUBQ R10, R13 + DECQ R12 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R10)(R14*1), R11 - LEAQ -32(AX)(R14*1), R15 + LEAQ -32(R9)(R13*1), R10 + LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back: - MOVOU (R11), X4 - MOVOU 16(R11), X5 - MOVOA X4, (R15) - MOVOA X5, 16(R15) - ADDQ $0x20, R15 - ADDQ $0x20, R11 + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) ADDQ $0x20, R14 - DECQ R13 + ADDQ $0x20, R10 + ADDQ $0x20, R13 + DECQ R12 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R10)(R14*1), X4 - MOVOU -16(R10)(R14*1), X5 - MOVOA X4, -32(AX)(R14*1) - MOVOA X5, -16(AX)(R14*1) - ADDQ $0x20, R14 - CMPQ R9, R14 + MOVOU -32(R9)(R13*1), X4 + MOVOU -16(R9)(R13*1), X5 + MOVOA X4, -32(AX)(R13*1) + MOVOA X5, -16(AX)(R13*1) + ADDQ $0x20, R13 + CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ SI, AX + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ BX, AX emit_literal_done_match_emit_encodeBetterBlockAsm10B: - ADDL R12, CX - ADDL $0x04, R12 + ADDL R11, CX + ADDL $0x04, R11 MOVL CX, 12(SP) // emitCopy -two_byte_offset_match_nolit_encodeBetterBlockAsm10B: - CMPL R12, $0x40 - JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B - CMPL R8, $0x00000800 + CMPL R11, $0x40 + JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B + CMPL DI, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm10B - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB R8, 1(AX) - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, SI - MOVB SI, (AX) + MOVL $0x00000001, BX + LEAL 16(BX), BX + MOVB DI, 1(AX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, BX + MOVB BL, (AX) ADDQ $0x02, AX - SUBL $0x08, R12 + SUBL $0x08, R11 // emitRepeat - LEAL -4(R12), R12 + LEAL -4(R11), R11 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - MOVL R12, SI - LEAL -4(R12), R12 - CMPL SI, $0x08 - JLE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - CMPL SI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - CMPL R8, $0x00000800 - JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b + MOVL R11, BX + LEAL -4(R11), R11 + CMPL BX, $0x08 + JBE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b + CMPL BX, $0x0c + JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b + CMPL DI, $0x00000800 + JB repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - CMPL R12, $0x00000104 - JLT repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - LEAL -256(R12), R12 + CMPL R11, $0x00000104 + JB repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b + LEAL -256(R11), R11 MOVW $0x0019, (AX) - MOVW R12, 2(AX) + MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - LEAL -4(R12), R12 + LEAL -4(R11), R11 MOVW $0x0015, (AX) - MOVB R12, 2(AX) + MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - SHLL $0x02, R12 - ORL $0x01, R12 - MOVW R12, (AX) + SHLL $0x02, R11 + ORL $0x01, R11 + MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R12*4), R12 - MOVB R8, 1(AX) - SARL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) + XORQ BX, BX + LEAL 1(BX)(R11*4), R11 + MOVB DI, 1(AX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, R11 + MOVB R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B long_offset_short_match_nolit_encodeBetterBlockAsm10B: MOVB $0xee, (AX) - MOVW R8, 1(AX) - LEAL -60(R12), R12 + MOVW DI, 1(AX) + LEAL -60(R11), R11 ADDQ $0x03, AX // emitRepeat - MOVL R12, SI - LEAL -4(R12), R12 - CMPL SI, $0x08 - JLE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short - CMPL SI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short - CMPL R8, $0x00000800 - JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short + MOVL R11, BX + LEAL -4(R11), R11 + CMPL BX, $0x08 + JBE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short + CMPL BX, $0x0c + JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short + CMPL DI, $0x00000800 + JB repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - CMPL R12, $0x00000104 - JLT repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short - LEAL -256(R12), R12 + CMPL R11, $0x00000104 + JB repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short + LEAL -256(R11), R11 MOVW $0x0019, (AX) - MOVW R12, 2(AX) + MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - LEAL -4(R12), R12 + LEAL -4(R11), R11 MOVW $0x0015, (AX) - MOVB R12, 2(AX) + MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - SHLL $0x02, R12 - ORL $0x01, R12 - MOVW R12, (AX) + SHLL $0x02, R11 + ORL $0x01, R11 + MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R12*4), R12 - MOVB R8, 1(AX) - SARL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) + XORQ BX, BX + LEAL 1(BX)(R11*4), R11 + MOVB DI, 1(AX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, R11 + MOVB R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B - JMP two_byte_offset_match_nolit_encodeBetterBlockAsm10B two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B: - CMPL R12, $0x0c - JGE emit_copy_three_match_nolit_encodeBetterBlockAsm10B - CMPL R8, $0x00000800 - JGE emit_copy_three_match_nolit_encodeBetterBlockAsm10B - MOVB $0x01, BL - LEAL -16(BX)(R12*4), R12 - MOVB R8, 1(AX) - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) + MOVL R11, BX + SHLL $0x02, BX + CMPL R11, $0x0c + JAE emit_copy_three_match_nolit_encodeBetterBlockAsm10B + CMPL DI, $0x00000800 + JAE emit_copy_three_match_nolit_encodeBetterBlockAsm10B + LEAL -15(BX), BX + MOVB DI, 1(AX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, BX + MOVB BL, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B emit_copy_three_match_nolit_encodeBetterBlockAsm10B: - MOVB $0x02, BL - LEAL -4(BX)(R12*4), R12 - MOVB R12, (AX) - MOVW R8, 1(AX) + LEAL -2(BX), BX + MOVB BL, (AX) + MOVW DI, 1(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B match_is_repeat_encodeBetterBlockAsm10B: - MOVL 12(SP), SI - CMPL SI, DI + MOVL 12(SP), BX + CMPL BX, SI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R10 - SUBL SI, R9 - LEAL -1(R9), SI - CMPL SI, $0x3c - JLT one_byte_match_emit_repeat_encodeBetterBlockAsm10B - CMPL SI, $0x00000100 - JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm10B + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R9 + SUBL BX, R8 + LEAL -1(R8), BX + CMPL BX, $0x3c + JB one_byte_match_emit_repeat_encodeBetterBlockAsm10B + CMPL BX, $0x00000100 + JB two_bytes_match_emit_repeat_encodeBetterBlockAsm10B + JB three_bytes_match_emit_repeat_encodeBetterBlockAsm10B + +three_bytes_match_emit_repeat_encodeBetterBlockAsm10B: MOVB $0xf4, (AX) - MOVW SI, 1(AX) + MOVW BX, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B two_bytes_match_emit_repeat_encodeBetterBlockAsm10B: MOVB $0xf0, (AX) - MOVB SI, 1(AX) + MOVB BL, 1(AX) ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_match_emit_repeat_encodeBetterBlockAsm10B + CMPL BX, $0x40 + JB memmove_match_emit_repeat_encodeBetterBlockAsm10B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B one_byte_match_emit_repeat_encodeBetterBlockAsm10B: - SHLB $0x02, SI - MOVB SI, (AX) + SHLB $0x02, BL + MOVB BL, (AX) ADDQ $0x01, AX memmove_match_emit_repeat_encodeBetterBlockAsm10B: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveShort - CMPQ R9, $0x04 - JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4 - CMPQ R9, $0x08 + CMPQ R8, $0x04 + JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4 + CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7 - CMPQ R9, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16 - CMPQ R9, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4: - MOVL (R10), R11 - MOVL R11, (AX) + MOVL (R9), R10 + MOVL R10, (AX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7: - MOVL (R10), R11 - MOVL -4(R10)(R9*1), R10 - MOVL R11, (AX) - MOVL R10, -4(AX)(R9*1) + MOVL (R9), R10 + MOVL -4(R9)(R8*1), R9 + MOVL R10, (AX) + MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16: - MOVQ (R10), R11 - MOVQ -8(R10)(R9*1), R10 - MOVQ R11, (AX) - MOVQ R10, -8(AX)(R9*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (AX) + MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32: - MOVOU (R10), X0 - MOVOU -16(R10)(R9*1), X1 + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) + MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64: - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B: - MOVQ SI, AX + MOVQ BX, AX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B memmove_long_match_emit_repeat_encodeBetterBlockAsm10B: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveLong - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 - MOVQ R9, R13 - SHRQ $0x05, R13 - MOVQ AX, R11 - ANDL $0x0000001f, R11 - MOVQ $0x00000040, R14 - SUBQ R11, R14 - DECQ R13 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R12 + SHRQ $0x05, R12 + MOVQ AX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R13 + SUBQ R10, R13 + DECQ R12 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R10)(R14*1), R11 - LEAQ -32(AX)(R14*1), R15 + LEAQ -32(R9)(R13*1), R10 + LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back: - MOVOU (R11), X4 - MOVOU 16(R11), X5 - MOVOA X4, (R15) - MOVOA X5, 16(R15) - ADDQ $0x20, R15 - ADDQ $0x20, R11 + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) ADDQ $0x20, R14 - DECQ R13 + ADDQ $0x20, R10 + ADDQ $0x20, R13 + DECQ R12 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R10)(R14*1), X4 - MOVOU -16(R10)(R14*1), X5 - MOVOA X4, -32(AX)(R14*1) - MOVOA X5, -16(AX)(R14*1) - ADDQ $0x20, R14 - CMPQ R9, R14 + MOVOU -32(R9)(R13*1), X4 + MOVOU -16(R9)(R13*1), X5 + MOVOA X4, -32(AX)(R13*1) + MOVOA X5, -16(AX)(R13*1) + ADDQ $0x20, R13 + CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ SI, AX + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ BX, AX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B: - ADDL R12, CX - ADDL $0x04, R12 + ADDL R11, CX + ADDL $0x04, R11 MOVL CX, 12(SP) // emitRepeat - MOVL R12, SI - LEAL -4(R12), R12 - CMPL SI, $0x08 - JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B - CMPL SI, $0x0c - JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B - CMPL R8, $0x00000800 - JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B + MOVL R11, BX + LEAL -4(R11), R11 + CMPL BX, $0x08 + JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B + CMPL BX, $0x0c + JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B + CMPL DI, $0x00000800 + JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B: - CMPL R12, $0x00000104 - JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B - LEAL -256(R12), R12 + CMPL R11, $0x00000104 + JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B + LEAL -256(R11), R11 MOVW $0x0019, (AX) - MOVW R12, 2(AX) + MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B: - LEAL -4(R12), R12 + LEAL -4(R11), R11 MOVW $0x0015, (AX) - MOVB R12, 2(AX) + MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B: - SHLL $0x02, R12 - ORL $0x01, R12 - MOVW R12, (AX) + SHLL $0x02, R11 + ORL $0x01, R11 + MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B: - XORQ SI, SI - LEAL 1(SI)(R12*4), R12 - MOVB R8, 1(AX) - SARL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) + XORQ BX, BX + LEAL 1(BX)(R11*4), R11 + MOVB DI, 1(AX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, R11 + MOVB R11, (AX) ADDQ $0x02, AX match_nolit_emitcopy_end_encodeBetterBlockAsm10B: CMPL CX, 8(SP) - JGE emit_remainder_encodeBetterBlockAsm10B + JAE emit_remainder_encodeBetterBlockAsm10B CMPQ AX, (SP) - JL match_nolit_dst_ok_encodeBetterBlockAsm10B + JB match_nolit_dst_ok_encodeBetterBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm10B: - MOVQ $0x0000cf1bbcdcbf9b, SI - MOVQ $0x9e3779b1, R8 - LEAQ 1(DI), DI - LEAQ -2(CX), R9 - MOVQ (DX)(DI*1), R10 - MOVQ 1(DX)(DI*1), R11 - MOVQ (DX)(R9*1), R12 - MOVQ 1(DX)(R9*1), R13 - SHLQ $0x10, R10 - IMULQ SI, R10 - SHRQ $0x34, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x36, R11 - SHLQ $0x10, R12 - IMULQ SI, R12 - SHRQ $0x34, R12 - SHLQ $0x20, R13 - IMULQ R8, R13 - SHRQ $0x36, R13 - LEAQ 1(DI), R8 - LEAQ 1(R9), R14 - MOVL DI, 24(SP)(R10*4) - MOVL R9, 24(SP)(R12*4) - MOVL R8, 16408(SP)(R11*4) - MOVL R14, 16408(SP)(R13*4) - ADDQ $0x01, DI - SUBQ $0x01, R9 + MOVQ $0x0000cf1bbcdcbf9b, BX + MOVQ $0x9e3779b1, DI + LEAQ 1(SI), SI + LEAQ -2(CX), R8 + MOVQ (DX)(SI*1), R9 + MOVQ 1(DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + MOVQ 1(DX)(R8*1), R12 + SHLQ $0x10, R9 + IMULQ BX, R9 + SHRQ $0x34, R9 + SHLQ $0x20, R10 + IMULQ DI, R10 + SHRQ $0x36, R10 + SHLQ $0x10, R11 + IMULQ BX, R11 + SHRQ $0x34, R11 + SHLQ $0x20, R12 + IMULQ DI, R12 + SHRQ $0x36, R12 + LEAQ 1(SI), DI + LEAQ 1(R8), R13 + MOVL SI, 24(SP)(R9*4) + MOVL R8, 24(SP)(R11*4) + MOVL DI, 16408(SP)(R10*4) + MOVL R13, 16408(SP)(R12*4) + ADDQ $0x01, SI + SUBQ $0x01, R8 index_loop_encodeBetterBlockAsm10B: - CMPQ DI, R9 + CMPQ SI, R8 JAE search_loop_encodeBetterBlockAsm10B - MOVQ (DX)(DI*1), R8 - MOVQ (DX)(R9*1), R10 - SHLQ $0x10, R8 - IMULQ SI, R8 - SHRQ $0x34, R8 - SHLQ $0x10, R10 - IMULQ SI, R10 - SHRQ $0x34, R10 - MOVL DI, 24(SP)(R8*4) - MOVL R9, 24(SP)(R10*4) - ADDQ $0x02, DI - SUBQ $0x02, R9 + MOVQ (DX)(SI*1), DI + MOVQ (DX)(R8*1), R9 + SHLQ $0x10, DI + IMULQ BX, DI + SHRQ $0x34, DI + SHLQ $0x10, R9 + IMULQ BX, R9 + SHRQ $0x34, R9 + MOVL SI, 24(SP)(DI*4) + MOVL R8, 24(SP)(R9*4) + ADDQ $0x02, SI + SUBQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm10B emit_remainder_encodeBetterBlockAsm10B: @@ -9362,7 +9353,7 @@ emit_remainder_encodeBetterBlockAsm10B: SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) - JL emit_remainder_ok_encodeBetterBlockAsm10B + JB emit_remainder_ok_encodeBetterBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET @@ -9377,9 +9368,12 @@ emit_remainder_ok_encodeBetterBlockAsm10B: SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c - JLT one_byte_emit_remainder_encodeBetterBlockAsm10B + JB one_byte_emit_remainder_encodeBetterBlockAsm10B CMPL DX, $0x00000100 - JLT two_bytes_emit_remainder_encodeBetterBlockAsm10B + JB two_bytes_emit_remainder_encodeBetterBlockAsm10B + JB three_bytes_emit_remainder_encodeBetterBlockAsm10B + +three_bytes_emit_remainder_encodeBetterBlockAsm10B: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX @@ -9390,7 +9384,7 @@ two_bytes_emit_remainder_encodeBetterBlockAsm10B: MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 - JL memmove_emit_remainder_encodeBetterBlockAsm10B + JB memmove_emit_remainder_encodeBetterBlockAsm10B JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B one_byte_emit_remainder_encodeBetterBlockAsm10B: @@ -9537,8 +9531,8 @@ zero_loop_encodeBetterBlockAsm8B: MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -6(CX), DX - LEAQ -8(CX), SI - MOVL SI, 8(SP) + LEAQ -8(CX), BX + MOVL BX, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX @@ -9548,488 +9542,231 @@ zero_loop_encodeBetterBlockAsm8B: MOVQ src_base+24(FP), DX search_loop_encodeBetterBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - SHRL $0x04, SI - LEAL 1(CX)(SI*1), SI - CMPL SI, 8(SP) - JGE emit_remainder_encodeBetterBlockAsm8B - MOVQ (DX)(CX*1), DI - MOVL SI, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R9 - MOVQ $0x9e3779b1, SI - MOVQ DI, R10 - MOVQ DI, R11 - SHLQ $0x10, R10 - IMULQ R9, R10 - SHRQ $0x36, R10 - SHLQ $0x20, R11 - IMULQ SI, R11 - SHRQ $0x38, R11 - MOVL 24(SP)(R10*4), SI - MOVL 4120(SP)(R11*4), R8 - MOVL CX, 24(SP)(R10*4) - MOVL CX, 4120(SP)(R11*4) - MOVQ (DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - CMPQ R10, DI + MOVL CX, BX + SUBL 12(SP), BX + SHRL $0x04, BX + LEAL 1(CX)(BX*1), BX + CMPL BX, 8(SP) + JAE emit_remainder_encodeBetterBlockAsm8B + MOVQ (DX)(CX*1), SI + MOVL BX, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R8 + MOVQ $0x9e3779b1, BX + MOVQ SI, R9 + MOVQ SI, R10 + SHLQ $0x10, R9 + IMULQ R8, R9 + SHRQ $0x36, R9 + SHLQ $0x20, R10 + IMULQ BX, R10 + SHRQ $0x38, R10 + MOVL 24(SP)(R9*4), BX + MOVL 4120(SP)(R10*4), DI + MOVL CX, 24(SP)(R9*4) + MOVL CX, 4120(SP)(R10*4) + MOVQ (DX)(BX*1), R9 + MOVQ (DX)(DI*1), R10 + CMPQ R9, SI JEQ candidate_match_encodeBetterBlockAsm8B - CMPQ R11, DI + CMPQ R10, SI JNE no_short_found_encodeBetterBlockAsm8B - MOVL R8, SI + MOVL DI, BX JMP candidate_match_encodeBetterBlockAsm8B no_short_found_encodeBetterBlockAsm8B: - CMPL R10, DI + CMPL R9, SI JEQ candidate_match_encodeBetterBlockAsm8B - CMPL R11, DI + CMPL R10, SI JEQ candidateS_match_encodeBetterBlockAsm8B MOVL 20(SP), CX JMP search_loop_encodeBetterBlockAsm8B candidateS_match_encodeBetterBlockAsm8B: - SHRQ $0x08, DI - MOVQ DI, R10 - SHLQ $0x10, R10 - IMULQ R9, R10 - SHRQ $0x36, R10 - MOVL 24(SP)(R10*4), SI + SHRQ $0x08, SI + MOVQ SI, R9 + SHLQ $0x10, R9 + IMULQ R8, R9 + SHRQ $0x36, R9 + MOVL 24(SP)(R9*4), BX INCL CX - MOVL CX, 24(SP)(R10*4) - CMPL (DX)(SI*1), DI + MOVL CX, 24(SP)(R9*4) + CMPL (DX)(BX*1), SI JEQ candidate_match_encodeBetterBlockAsm8B DECL CX - MOVL R8, SI + MOVL DI, BX candidate_match_encodeBetterBlockAsm8B: - MOVL 12(SP), DI - TESTL SI, SI + MOVL 12(SP), SI + TESTL BX, BX JZ match_extend_back_end_encodeBetterBlockAsm8B match_extend_back_loop_encodeBetterBlockAsm8B: - CMPL CX, DI - JLE match_extend_back_end_encodeBetterBlockAsm8B - MOVB -1(DX)(SI*1), BL + CMPL CX, SI + JBE match_extend_back_end_encodeBetterBlockAsm8B + MOVB -1(DX)(BX*1), DI MOVB -1(DX)(CX*1), R8 - CMPB BL, R8 + CMPB DI, R8 JNE match_extend_back_end_encodeBetterBlockAsm8B LEAL -1(CX), CX - DECL SI + DECL BX JZ match_extend_back_end_encodeBetterBlockAsm8B JMP match_extend_back_loop_encodeBetterBlockAsm8B match_extend_back_end_encodeBetterBlockAsm8B: - MOVL CX, DI - SUBL 12(SP), DI - LEAQ 3(AX)(DI*1), DI - CMPQ DI, (SP) - JL match_dst_size_check_encodeBetterBlockAsm8B + MOVL CX, SI + SUBL 12(SP), SI + LEAQ 3(AX)(SI*1), SI + CMPQ SI, (SP) + JB match_dst_size_check_encodeBetterBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBetterBlockAsm8B: - MOVL CX, DI + MOVL CX, SI ADDL $0x04, CX - ADDL $0x04, SI - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(SI*1), R10 + ADDL $0x04, BX + MOVQ src_len+32(FP), DI + SUBL CX, DI + LEAQ (DX)(CX*1), R8 + LEAQ (DX)(BX*1), R9 // matchLen - XORL R12, R12 - CMPL R8, $0x08 - JL matchlen_match4_match_nolit_encodeBetterBlockAsm8B + XORL R11, R11 + CMPL DI, $0x08 + JB matchlen_match4_match_nolit_encodeBetterBlockAsm8B matchlen_loopback_match_nolit_encodeBetterBlockAsm8B: - MOVQ (R9)(R12*1), R11 - XORQ (R10)(R12*1), R11 - TESTQ R11, R11 + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + TESTQ R10, R10 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm8B #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R10, R10 #else - BSFQ R11, R11 + BSFQ R10, R10 #endif - SARQ $0x03, R11 - LEAL (R12)(R11*1), R12 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeBetterBlockAsm8B matchlen_loop_match_nolit_encodeBetterBlockAsm8B: - LEAL -8(R8), R8 - LEAL 8(R12), R12 - CMPL R8, $0x08 - JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm8B + LEAL -8(DI), DI + LEAL 8(R11), R11 + CMPL DI, $0x08 + JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm8B JZ match_nolit_end_encodeBetterBlockAsm8B matchlen_match4_match_nolit_encodeBetterBlockAsm8B: - CMPL R8, $0x04 - JL matchlen_match2_match_nolit_encodeBetterBlockAsm8B - MOVL (R9)(R12*1), R11 - CMPL (R10)(R12*1), R11 + CMPL DI, $0x04 + JB matchlen_match2_match_nolit_encodeBetterBlockAsm8B + MOVL (R8)(R11*1), R10 + CMPL (R9)(R11*1), R10 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm8B - SUBL $0x04, R8 - LEAL 4(R12), R12 + SUBL $0x04, DI + LEAL 4(R11), R11 matchlen_match2_match_nolit_encodeBetterBlockAsm8B: - CMPL R8, $0x02 - JL matchlen_match1_match_nolit_encodeBetterBlockAsm8B - MOVW (R9)(R12*1), R11 - CMPW (R10)(R12*1), R11 + CMPL DI, $0x02 + JB matchlen_match1_match_nolit_encodeBetterBlockAsm8B + MOVW (R8)(R11*1), R10 + CMPW (R9)(R11*1), R10 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm8B - SUBL $0x02, R8 - LEAL 2(R12), R12 + SUBL $0x02, DI + LEAL 2(R11), R11 matchlen_match1_match_nolit_encodeBetterBlockAsm8B: - CMPL R8, $0x01 - JL match_nolit_end_encodeBetterBlockAsm8B - MOVB (R9)(R12*1), R11 - CMPB (R10)(R12*1), R11 + CMPL DI, $0x01 + JB match_nolit_end_encodeBetterBlockAsm8B + MOVB (R8)(R11*1), R10 + CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeBetterBlockAsm8B - LEAL 1(R12), R12 + LEAL 1(R11), R11 match_nolit_end_encodeBetterBlockAsm8B: - MOVL CX, R8 - SUBL SI, R8 + MOVL CX, DI + SUBL BX, DI // Check if repeat - CMPL 16(SP), R8 + CMPL 16(SP), DI JEQ match_is_repeat_encodeBetterBlockAsm8B - MOVL R8, 16(SP) - MOVL 12(SP), SI - CMPL SI, DI + MOVL DI, 16(SP) + MOVL 12(SP), BX + CMPL BX, SI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm8B - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R10 - SUBL SI, R9 - LEAL -1(R9), SI - CMPL SI, $0x3c - JLT one_byte_match_emit_encodeBetterBlockAsm8B - CMPL SI, $0x00000100 - JLT two_bytes_match_emit_encodeBetterBlockAsm8B + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R9 + SUBL BX, R8 + LEAL -1(R8), BX + CMPL BX, $0x3c + JB one_byte_match_emit_encodeBetterBlockAsm8B + CMPL BX, $0x00000100 + JB two_bytes_match_emit_encodeBetterBlockAsm8B + JB three_bytes_match_emit_encodeBetterBlockAsm8B + +three_bytes_match_emit_encodeBetterBlockAsm8B: MOVB $0xf4, (AX) - MOVW SI, 1(AX) + MOVW BX, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBetterBlockAsm8B two_bytes_match_emit_encodeBetterBlockAsm8B: MOVB $0xf0, (AX) - MOVB SI, 1(AX) + MOVB BL, 1(AX) ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_match_emit_encodeBetterBlockAsm8B + CMPL BX, $0x40 + JB memmove_match_emit_encodeBetterBlockAsm8B JMP memmove_long_match_emit_encodeBetterBlockAsm8B one_byte_match_emit_encodeBetterBlockAsm8B: - SHLB $0x02, SI - MOVB SI, (AX) + SHLB $0x02, BL + MOVB BL, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBetterBlockAsm8B: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveShort - CMPQ R9, $0x04 - JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4 - CMPQ R9, $0x08 + CMPQ R8, $0x04 + JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4 + CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7 - CMPQ R9, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16 - CMPQ R9, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4: - MOVL (R10), R11 - MOVL R11, (AX) + MOVL (R9), R10 + MOVL R10, (AX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7: - MOVL (R10), R11 - MOVL -4(R10)(R9*1), R10 - MOVL R11, (AX) - MOVL R10, -4(AX)(R9*1) - JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B - -emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16: - MOVQ (R10), R11 - MOVQ -8(R10)(R9*1), R10 - MOVQ R11, (AX) - MOVQ R10, -8(AX)(R9*1) - JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B - -emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32: - MOVOU (R10), X0 - MOVOU -16(R10)(R9*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) - JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B - -emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64: - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - -memmove_end_copy_match_emit_encodeBetterBlockAsm8B: - MOVQ SI, AX - JMP emit_literal_done_match_emit_encodeBetterBlockAsm8B - -memmove_long_match_emit_encodeBetterBlockAsm8B: - LEAQ (AX)(R9*1), SI - - // genMemMoveLong - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 - MOVQ R9, R13 - SHRQ $0x05, R13 - MOVQ AX, R11 - ANDL $0x0000001f, R11 - MOVQ $0x00000040, R14 - SUBQ R11, R14 - DECQ R13 - JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R10)(R14*1), R11 - LEAQ -32(AX)(R14*1), R15 - -emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back: - MOVOU (R11), X4 - MOVOU 16(R11), X5 - MOVOA X4, (R15) - MOVOA X5, 16(R15) - ADDQ $0x20, R15 - ADDQ $0x20, R11 - ADDQ $0x20, R14 - DECQ R13 - JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back - -emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R10)(R14*1), X4 - MOVOU -16(R10)(R14*1), X5 - MOVOA X4, -32(AX)(R14*1) - MOVOA X5, -16(AX)(R14*1) - ADDQ $0x20, R14 - CMPQ R9, R14 - JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ SI, AX - -emit_literal_done_match_emit_encodeBetterBlockAsm8B: - ADDL R12, CX - ADDL $0x04, R12 - MOVL CX, 12(SP) - - // emitCopy -two_byte_offset_match_nolit_encodeBetterBlockAsm8B: - CMPL R12, $0x40 - JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B - CMPL R8, $0x00000800 - JAE long_offset_short_match_nolit_encodeBetterBlockAsm8B - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB R8, 1(AX) - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R12 - - // emitRepeat - LEAL -4(R12), R12 - JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b - MOVL R12, SI - LEAL -4(R12), R12 - CMPL SI, $0x08 - JLE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b - CMPL SI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b - -cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b: - CMPL R12, $0x00000104 - JLT repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b - LEAL -256(R12), R12 - MOVW $0x0019, (AX) - MOVW R12, 2(AX) - ADDQ $0x04, AX - JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - -repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b: - LEAL -4(R12), R12 - MOVW $0x0015, (AX) - MOVB R12, 2(AX) - ADDQ $0x03, AX - JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - -repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b: - SHLL $0x02, R12 - ORL $0x01, R12 - MOVW R12, (AX) - ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - XORQ SI, SI - LEAL 1(SI)(R12*4), R12 - MOVB R8, 1(AX) - SARL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) - ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - -long_offset_short_match_nolit_encodeBetterBlockAsm8B: - MOVB $0xee, (AX) - MOVW R8, 1(AX) - LEAL -60(R12), R12 - ADDQ $0x03, AX - - // emitRepeat - MOVL R12, SI - LEAL -4(R12), R12 - CMPL SI, $0x08 - JLE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short - CMPL SI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short - -cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: - CMPL R12, $0x00000104 - JLT repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short - LEAL -256(R12), R12 - MOVW $0x0019, (AX) - MOVW R12, 2(AX) - ADDQ $0x04, AX - JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - -repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: - LEAL -4(R12), R12 - MOVW $0x0015, (AX) - MOVB R12, 2(AX) - ADDQ $0x03, AX - JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - -repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: - SHLL $0x02, R12 - ORL $0x01, R12 - MOVW R12, (AX) - ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - XORQ SI, SI - LEAL 1(SI)(R12*4), R12 - MOVB R8, 1(AX) - SARL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) - ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - JMP two_byte_offset_match_nolit_encodeBetterBlockAsm8B - -two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B: - CMPL R12, $0x0c - JGE emit_copy_three_match_nolit_encodeBetterBlockAsm8B - MOVB $0x01, BL - LEAL -16(BX)(R12*4), R12 - MOVB R8, 1(AX) - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) - ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - -emit_copy_three_match_nolit_encodeBetterBlockAsm8B: - MOVB $0x02, BL - LEAL -4(BX)(R12*4), R12 - MOVB R12, (AX) - MOVW R8, 1(AX) - ADDQ $0x03, AX - JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - -match_is_repeat_encodeBetterBlockAsm8B: - MOVL 12(SP), SI - CMPL SI, DI - JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B - MOVL DI, R8 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R9 - SUBL SI, R8 - LEAL -1(R8), SI - CMPL SI, $0x3c - JLT one_byte_match_emit_repeat_encodeBetterBlockAsm8B - CMPL SI, $0x00000100 - JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm8B - MOVB $0xf4, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX - JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B - -two_bytes_match_emit_repeat_encodeBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB SI, 1(AX) - ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_match_emit_repeat_encodeBetterBlockAsm8B - JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B - -one_byte_match_emit_repeat_encodeBetterBlockAsm8B: - SHLB $0x02, SI - MOVB SI, (AX) - ADDQ $0x01, AX - -memmove_match_emit_repeat_encodeBetterBlockAsm8B: - LEAQ (AX)(R8*1), SI - - // genMemMoveShort - CMPQ R8, $0x04 - JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4 - CMPQ R8, $0x08 - JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7 - CMPQ R8, $0x10 - JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 - JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32 - JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64 - -emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) - JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B - -emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R9 MOVL R10, (AX) MOVL R9, -4(AX)(R8*1) - JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B + JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B -emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16: +emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) - JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B + JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B -emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32: +emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) - JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B + JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B -emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64: +emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 @@ -10039,30 +9776,30 @@ emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) -memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B: - MOVQ SI, AX - JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B +memmove_end_copy_match_emit_encodeBetterBlockAsm8B: + MOVQ BX, AX + JMP emit_literal_done_match_emit_encodeBetterBlockAsm8B -memmove_long_match_emit_repeat_encodeBetterBlockAsm8B: - LEAQ (AX)(R8*1), SI +memmove_long_match_emit_encodeBetterBlockAsm8B: + LEAQ (AX)(R8*1), BX // genMemMoveLong MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 + MOVQ R8, R12 + SHRQ $0x05, R12 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R13 SUBQ R10, R13 - DECQ R11 - JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 + DECQ R12 + JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(R9)(R13*1), R10 LEAQ -32(AX)(R13*1), R14 -emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back: +emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R14) @@ -10070,120 +9807,381 @@ emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_bac ADDQ $0x20, R14 ADDQ $0x20, R10 ADDQ $0x20, R13 - DECQ R11 - JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back + DECQ R12 + JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back -emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: +emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(R9)(R13*1), X4 MOVOU -16(R9)(R13*1), X5 MOVOA X4, -32(AX)(R13*1) MOVOA X5, -16(AX)(R13*1) ADDQ $0x20, R13 CMPQ R8, R13 - JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 + JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) - MOVQ SI, AX + MOVQ BX, AX + +emit_literal_done_match_emit_encodeBetterBlockAsm8B: + ADDL R11, CX + ADDL $0x04, R11 + MOVL CX, 12(SP) + + // emitCopy + CMPL R11, $0x40 + JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B + CMPL DI, $0x00000800 + JAE long_offset_short_match_nolit_encodeBetterBlockAsm8B + MOVL $0x00000001, BX + LEAL 16(BX), BX + MOVB DI, 1(AX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, BX + MOVB BL, (AX) + ADDQ $0x02, AX + SUBL $0x08, R11 + + // emitRepeat + LEAL -4(R11), R11 + JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b + MOVL R11, BX + LEAL -4(R11), R11 + CMPL BX, $0x08 + JBE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b + CMPL BX, $0x0c + JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b + +cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b: + CMPL R11, $0x00000104 + JB repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b + LEAL -256(R11), R11 + MOVW $0x0019, (AX) + MOVW R11, 2(AX) + ADDQ $0x04, AX + JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B + +repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b: + LEAL -4(R11), R11 + MOVW $0x0015, (AX) + MOVB R11, 2(AX) + ADDQ $0x03, AX + JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B + +repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b: + SHLL $0x02, R11 + ORL $0x01, R11 + MOVW R11, (AX) + ADDQ $0x02, AX + JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B + XORQ BX, BX + LEAL 1(BX)(R11*4), R11 + MOVB DI, 1(AX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, R11 + MOVB R11, (AX) + ADDQ $0x02, AX + JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B + +long_offset_short_match_nolit_encodeBetterBlockAsm8B: + MOVB $0xee, (AX) + MOVW DI, 1(AX) + LEAL -60(R11), R11 + ADDQ $0x03, AX + + // emitRepeat + MOVL R11, BX + LEAL -4(R11), R11 + CMPL BX, $0x08 + JBE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short + CMPL BX, $0x0c + JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short + +cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: + CMPL R11, $0x00000104 + JB repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short + LEAL -256(R11), R11 + MOVW $0x0019, (AX) + MOVW R11, 2(AX) + ADDQ $0x04, AX + JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B + +repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: + LEAL -4(R11), R11 + MOVW $0x0015, (AX) + MOVB R11, 2(AX) + ADDQ $0x03, AX + JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B + +repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: + SHLL $0x02, R11 + ORL $0x01, R11 + MOVW R11, (AX) + ADDQ $0x02, AX + JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B + XORQ BX, BX + LEAL 1(BX)(R11*4), R11 + MOVB DI, 1(AX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, R11 + MOVB R11, (AX) + ADDQ $0x02, AX + JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B + +two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B: + MOVL R11, BX + SHLL $0x02, BX + CMPL R11, $0x0c + JAE emit_copy_three_match_nolit_encodeBetterBlockAsm8B + LEAL -15(BX), BX + MOVB DI, 1(AX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, BX + MOVB BL, (AX) + ADDQ $0x02, AX + JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B + +emit_copy_three_match_nolit_encodeBetterBlockAsm8B: + LEAL -2(BX), BX + MOVB BL, (AX) + MOVW DI, 1(AX) + ADDQ $0x03, AX + JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B + +match_is_repeat_encodeBetterBlockAsm8B: + MOVL 12(SP), BX + CMPL BX, SI + JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B + MOVL SI, DI + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R8 + SUBL BX, DI + LEAL -1(DI), BX + CMPL BX, $0x3c + JB one_byte_match_emit_repeat_encodeBetterBlockAsm8B + CMPL BX, $0x00000100 + JB two_bytes_match_emit_repeat_encodeBetterBlockAsm8B + JB three_bytes_match_emit_repeat_encodeBetterBlockAsm8B + +three_bytes_match_emit_repeat_encodeBetterBlockAsm8B: + MOVB $0xf4, (AX) + MOVW BX, 1(AX) + ADDQ $0x03, AX + JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B + +two_bytes_match_emit_repeat_encodeBetterBlockAsm8B: + MOVB $0xf0, (AX) + MOVB BL, 1(AX) + ADDQ $0x02, AX + CMPL BX, $0x40 + JB memmove_match_emit_repeat_encodeBetterBlockAsm8B + JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B + +one_byte_match_emit_repeat_encodeBetterBlockAsm8B: + SHLB $0x02, BL + MOVB BL, (AX) + ADDQ $0x01, AX + +memmove_match_emit_repeat_encodeBetterBlockAsm8B: + LEAQ (AX)(DI*1), BX + + // genMemMoveShort + CMPQ DI, $0x04 + JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4 + CMPQ DI, $0x08 + JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7 + CMPQ DI, $0x10 + JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16 + CMPQ DI, $0x20 + JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32 + JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64 + +emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4: + MOVL (R8), R9 + MOVL R9, (AX) + JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B + +emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7: + MOVL (R8), R9 + MOVL -4(R8)(DI*1), R8 + MOVL R9, (AX) + MOVL R8, -4(AX)(DI*1) + JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B + +emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16: + MOVQ (R8), R9 + MOVQ -8(R8)(DI*1), R8 + MOVQ R9, (AX) + MOVQ R8, -8(AX)(DI*1) + JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B + +emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32: + MOVOU (R8), X0 + MOVOU -16(R8)(DI*1), X1 + MOVOU X0, (AX) + MOVOU X1, -16(AX)(DI*1) + JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B + +emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64: + MOVOU (R8), X0 + MOVOU 16(R8), X1 + MOVOU -32(R8)(DI*1), X2 + MOVOU -16(R8)(DI*1), X3 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(DI*1) + MOVOU X3, -16(AX)(DI*1) + +memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B: + MOVQ BX, AX + JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B + +memmove_long_match_emit_repeat_encodeBetterBlockAsm8B: + LEAQ (AX)(DI*1), BX + + // genMemMoveLong + MOVOU (R8), X0 + MOVOU 16(R8), X1 + MOVOU -32(R8)(DI*1), X2 + MOVOU -16(R8)(DI*1), X3 + MOVQ DI, R10 + SHRQ $0x05, R10 + MOVQ AX, R9 + ANDL $0x0000001f, R9 + MOVQ $0x00000040, R12 + SUBQ R9, R12 + DECQ R10 + JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 + LEAQ -32(R8)(R12*1), R9 + LEAQ -32(AX)(R12*1), R13 + +emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back: + MOVOU (R9), X4 + MOVOU 16(R9), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R9 + ADDQ $0x20, R12 + DECQ R10 + JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back + +emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: + MOVOU -32(R8)(R12*1), X4 + MOVOU -16(R8)(R12*1), X5 + MOVOA X4, -32(AX)(R12*1) + MOVOA X5, -16(AX)(R12*1) + ADDQ $0x20, R12 + CMPQ DI, R12 + JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(DI*1) + MOVOU X3, -16(AX)(DI*1) + MOVQ BX, AX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B: - ADDL R12, CX - ADDL $0x04, R12 + ADDL R11, CX + ADDL $0x04, R11 MOVL CX, 12(SP) // emitRepeat - MOVL R12, SI - LEAL -4(R12), R12 - CMPL SI, $0x08 - JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B - CMPL SI, $0x0c - JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B + MOVL R11, BX + LEAL -4(R11), R11 + CMPL BX, $0x08 + JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B + CMPL BX, $0x0c + JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B: - CMPL R12, $0x00000104 - JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B - LEAL -256(R12), R12 + CMPL R11, $0x00000104 + JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B + LEAL -256(R11), R11 MOVW $0x0019, (AX) - MOVW R12, 2(AX) + MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B: - LEAL -4(R12), R12 + LEAL -4(R11), R11 MOVW $0x0015, (AX) - MOVB R12, 2(AX) + MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B: - SHLL $0x02, R12 - ORL $0x01, R12 - MOVW R12, (AX) + SHLL $0x02, R11 + ORL $0x01, R11 + MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - XORQ SI, SI - LEAL 1(SI)(R12*4), R12 - MOVB R8, 1(AX) - SARL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) + XORQ BX, BX + LEAL 1(BX)(R11*4), R11 + MOVB DI, 1(AX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, R11 + MOVB R11, (AX) ADDQ $0x02, AX match_nolit_emitcopy_end_encodeBetterBlockAsm8B: CMPL CX, 8(SP) - JGE emit_remainder_encodeBetterBlockAsm8B + JAE emit_remainder_encodeBetterBlockAsm8B CMPQ AX, (SP) - JL match_nolit_dst_ok_encodeBetterBlockAsm8B + JB match_nolit_dst_ok_encodeBetterBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm8B: - MOVQ $0x0000cf1bbcdcbf9b, SI - MOVQ $0x9e3779b1, R8 - LEAQ 1(DI), DI - LEAQ -2(CX), R9 - MOVQ (DX)(DI*1), R10 - MOVQ 1(DX)(DI*1), R11 - MOVQ (DX)(R9*1), R12 - MOVQ 1(DX)(R9*1), R13 - SHLQ $0x10, R10 - IMULQ SI, R10 - SHRQ $0x36, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x38, R11 - SHLQ $0x10, R12 - IMULQ SI, R12 - SHRQ $0x36, R12 - SHLQ $0x20, R13 - IMULQ R8, R13 - SHRQ $0x38, R13 - LEAQ 1(DI), R8 - LEAQ 1(R9), R14 - MOVL DI, 24(SP)(R10*4) - MOVL R9, 24(SP)(R12*4) - MOVL R8, 4120(SP)(R11*4) - MOVL R14, 4120(SP)(R13*4) - ADDQ $0x01, DI - SUBQ $0x01, R9 + MOVQ $0x0000cf1bbcdcbf9b, BX + MOVQ $0x9e3779b1, DI + LEAQ 1(SI), SI + LEAQ -2(CX), R8 + MOVQ (DX)(SI*1), R9 + MOVQ 1(DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + MOVQ 1(DX)(R8*1), R12 + SHLQ $0x10, R9 + IMULQ BX, R9 + SHRQ $0x36, R9 + SHLQ $0x20, R10 + IMULQ DI, R10 + SHRQ $0x38, R10 + SHLQ $0x10, R11 + IMULQ BX, R11 + SHRQ $0x36, R11 + SHLQ $0x20, R12 + IMULQ DI, R12 + SHRQ $0x38, R12 + LEAQ 1(SI), DI + LEAQ 1(R8), R13 + MOVL SI, 24(SP)(R9*4) + MOVL R8, 24(SP)(R11*4) + MOVL DI, 4120(SP)(R10*4) + MOVL R13, 4120(SP)(R12*4) + ADDQ $0x01, SI + SUBQ $0x01, R8 index_loop_encodeBetterBlockAsm8B: - CMPQ DI, R9 + CMPQ SI, R8 JAE search_loop_encodeBetterBlockAsm8B - MOVQ (DX)(DI*1), R8 - MOVQ (DX)(R9*1), R10 - SHLQ $0x10, R8 - IMULQ SI, R8 - SHRQ $0x36, R8 - SHLQ $0x10, R10 - IMULQ SI, R10 - SHRQ $0x36, R10 - MOVL DI, 24(SP)(R8*4) - MOVL R9, 24(SP)(R10*4) - ADDQ $0x02, DI - SUBQ $0x02, R9 + MOVQ (DX)(SI*1), DI + MOVQ (DX)(R8*1), R9 + SHLQ $0x10, DI + IMULQ BX, DI + SHRQ $0x36, DI + SHLQ $0x10, R9 + IMULQ BX, R9 + SHRQ $0x36, R9 + MOVL SI, 24(SP)(DI*4) + MOVL R8, 24(SP)(R9*4) + ADDQ $0x02, SI + SUBQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm8B emit_remainder_encodeBetterBlockAsm8B: @@ -10191,7 +10189,7 @@ emit_remainder_encodeBetterBlockAsm8B: SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) - JL emit_remainder_ok_encodeBetterBlockAsm8B + JB emit_remainder_ok_encodeBetterBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET @@ -10206,9 +10204,12 @@ emit_remainder_ok_encodeBetterBlockAsm8B: SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c - JLT one_byte_emit_remainder_encodeBetterBlockAsm8B + JB one_byte_emit_remainder_encodeBetterBlockAsm8B CMPL DX, $0x00000100 - JLT two_bytes_emit_remainder_encodeBetterBlockAsm8B + JB two_bytes_emit_remainder_encodeBetterBlockAsm8B + JB three_bytes_emit_remainder_encodeBetterBlockAsm8B + +three_bytes_emit_remainder_encodeBetterBlockAsm8B: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX @@ -10219,7 +10220,7 @@ two_bytes_emit_remainder_encodeBetterBlockAsm8B: MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 - JL memmove_emit_remainder_encodeBetterBlockAsm8B + JB memmove_emit_remainder_encodeBetterBlockAsm8B JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B one_byte_emit_remainder_encodeBetterBlockAsm8B: @@ -10366,8 +10367,8 @@ zero_loop_encodeSnappyBlockAsm: MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX - LEAQ -8(CX), SI - MOVL SI, 8(SP) + LEAQ -8(CX), BX + MOVL BX, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX @@ -10377,539 +10378,216 @@ zero_loop_encodeSnappyBlockAsm: MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - SHRL $0x06, SI - LEAL 4(CX)(SI*1), SI - CMPL SI, 8(SP) - JGE emit_remainder_encodeSnappyBlockAsm - MOVQ (DX)(CX*1), DI - MOVL SI, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R9 - MOVQ DI, R10 - MOVQ DI, R11 - SHRQ $0x08, R11 + MOVL CX, BX + SUBL 12(SP), BX + SHRL $0x06, BX + LEAL 4(CX)(BX*1), BX + CMPL BX, 8(SP) + JAE emit_remainder_encodeSnappyBlockAsm + MOVQ (DX)(CX*1), SI + MOVL BX, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R8 + MOVQ SI, R9 + MOVQ SI, R10 + SHRQ $0x08, R10 + SHLQ $0x10, R9 + IMULQ R8, R9 + SHRQ $0x32, R9 SHLQ $0x10, R10 - IMULQ R9, R10 + IMULQ R8, R10 SHRQ $0x32, R10 - SHLQ $0x10, R11 - IMULQ R9, R11 - SHRQ $0x32, R11 - MOVL 24(SP)(R10*4), SI - MOVL 24(SP)(R11*4), R8 - MOVL CX, 24(SP)(R10*4) - LEAL 1(CX), R10 - MOVL R10, 24(SP)(R11*4) - MOVQ DI, R10 - SHRQ $0x10, R10 - SHLQ $0x10, R10 - IMULQ R9, R10 - SHRQ $0x32, R10 - MOVL CX, R9 - SUBL 16(SP), R9 - MOVL 1(DX)(R9*1), R11 - MOVQ DI, R9 - SHRQ $0x08, R9 - CMPL R9, R11 - JNE no_repeat_found_encodeSnappyBlockAsm - LEAL 1(CX), DI - MOVL 12(SP), SI - MOVL DI, R8 + MOVL 24(SP)(R9*4), BX + MOVL 24(SP)(R10*4), DI + MOVL CX, 24(SP)(R9*4) + LEAL 1(CX), R9 + MOVL R9, 24(SP)(R10*4) + MOVQ SI, R9 + SHRQ $0x10, R9 + SHLQ $0x10, R9 + IMULQ R8, R9 + SHRQ $0x32, R9 + MOVL CX, R8 SUBL 16(SP), R8 + MOVL 1(DX)(R8*1), R10 + MOVQ SI, R8 + SHRQ $0x08, R8 + CMPL R8, R10 + JNE no_repeat_found_encodeSnappyBlockAsm + LEAL 1(CX), SI + MOVL 12(SP), BX + MOVL SI, DI + SUBL 16(SP), DI JZ repeat_extend_back_end_encodeSnappyBlockAsm repeat_extend_back_loop_encodeSnappyBlockAsm: - CMPL DI, SI - JLE repeat_extend_back_end_encodeSnappyBlockAsm - MOVB -1(DX)(R8*1), BL - MOVB -1(DX)(DI*1), R9 - CMPB BL, R9 + CMPL SI, BX + JBE repeat_extend_back_end_encodeSnappyBlockAsm + MOVB -1(DX)(DI*1), R8 + MOVB -1(DX)(SI*1), R9 + CMPB R8, R9 JNE repeat_extend_back_end_encodeSnappyBlockAsm - LEAL -1(DI), DI - DECL R8 + LEAL -1(SI), SI + DECL DI JNZ repeat_extend_back_loop_encodeSnappyBlockAsm repeat_extend_back_end_encodeSnappyBlockAsm: - MOVL 12(SP), SI - CMPL SI, DI + MOVL 12(SP), BX + CMPL BX, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm - MOVL DI, R8 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R9 - SUBL SI, R8 - LEAL -1(R8), SI - CMPL SI, $0x3c - JLT one_byte_repeat_emit_encodeSnappyBlockAsm - CMPL SI, $0x00000100 - JLT two_bytes_repeat_emit_encodeSnappyBlockAsm - CMPL SI, $0x00010000 - JLT three_bytes_repeat_emit_encodeSnappyBlockAsm - CMPL SI, $0x01000000 - JLT four_bytes_repeat_emit_encodeSnappyBlockAsm + MOVL SI, DI + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R8 + SUBL BX, DI + LEAL -1(DI), BX + CMPL BX, $0x3c + JB one_byte_repeat_emit_encodeSnappyBlockAsm + CMPL BX, $0x00000100 + JB two_bytes_repeat_emit_encodeSnappyBlockAsm + CMPL BX, $0x00010000 + JB three_bytes_repeat_emit_encodeSnappyBlockAsm + CMPL BX, $0x01000000 + JB four_bytes_repeat_emit_encodeSnappyBlockAsm MOVB $0xfc, (AX) - MOVL SI, 1(AX) + MOVL BX, 1(AX) ADDQ $0x05, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm four_bytes_repeat_emit_encodeSnappyBlockAsm: - MOVL SI, R10 - SHRL $0x10, R10 + MOVL BX, R9 + SHRL $0x10, R9 MOVB $0xf8, (AX) - MOVW SI, 1(AX) - MOVB R10, 3(AX) + MOVW BX, 1(AX) + MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm three_bytes_repeat_emit_encodeSnappyBlockAsm: MOVB $0xf4, (AX) - MOVW SI, 1(AX) + MOVW BX, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm two_bytes_repeat_emit_encodeSnappyBlockAsm: MOVB $0xf0, (AX) - MOVB SI, 1(AX) + MOVB BL, 1(AX) ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_repeat_emit_encodeSnappyBlockAsm + CMPL BX, $0x40 + JB memmove_repeat_emit_encodeSnappyBlockAsm JMP memmove_long_repeat_emit_encodeSnappyBlockAsm one_byte_repeat_emit_encodeSnappyBlockAsm: - SHLB $0x02, SI - MOVB SI, (AX) + SHLB $0x02, BL + MOVB BL, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm: - LEAQ (AX)(R8*1), SI + LEAQ (AX)(DI*1), BX // genMemMoveShort - CMPQ R8, $0x08 - JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ DI, $0x08 + JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8 + CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R8), R9 + MOVQ R9, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R8), R9 + MOVQ -8(R8)(DI*1), R8 + MOVQ R9, (AX) + MOVQ R8, -8(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 + MOVOU (R8), X0 + MOVOU -16(R8)(DI*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU X1, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 + MOVOU (R8), X0 + MOVOU 16(R8), X1 + MOVOU -32(R8)(DI*1), X2 + MOVOU -16(R8)(DI*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU X2, -32(AX)(DI*1) + MOVOU X3, -16(AX)(DI*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm: - MOVQ SI, AX + MOVQ BX, AX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm memmove_long_repeat_emit_encodeSnappyBlockAsm: - LEAQ (AX)(R8*1), SI + LEAQ (AX)(DI*1), BX // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R8), X0 + MOVOU 16(R8), X1 + MOVOU -32(R8)(DI*1), X2 + MOVOU -16(R8)(DI*1), X3 + MOVQ DI, R10 + SHRQ $0x05, R10 + MOVQ AX, R9 + ANDL $0x0000001f, R9 + MOVQ $0x00000040, R11 + SUBQ R9, R11 + DECQ R10 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R8)(R11*1), R9 + LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) - ADDQ $0x20, R13 - ADDQ $0x20, R10 + MOVOU (R9), X4 + MOVOU 16(R9), X5 + MOVOA X4, (R12) + MOVOA X5, 16(R12) ADDQ $0x20, R12 - DECQ R11 + ADDQ $0x20, R9 + ADDQ $0x20, R11 + DECQ R10 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R8)(R11*1), X4 + MOVOU -16(R8)(R11*1), X5 + MOVOA X4, -32(AX)(R11*1) + MOVOA X5, -16(AX)(R11*1) + ADDQ $0x20, R11 + CMPQ DI, R11 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ SI, AX + MOVOU X2, -32(AX)(DI*1) + MOVOU X3, -16(AX)(DI*1) + MOVQ BX, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm: ADDL $0x05, CX - MOVL CX, SI - SUBL 16(SP), SI - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(SI*1), SI - - // matchLen - XORL R11, R11 - CMPL R8, $0x08 - JL matchlen_match4_repeat_extend_encodeSnappyBlockAsm - -matchlen_loopback_repeat_extend_encodeSnappyBlockAsm: - MOVQ (R9)(R11*1), R10 - XORQ (SI)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm - -#ifdef GOAMD64_v3 - TZCNTQ R10, R10 - -#else - BSFQ R10, R10 - -#endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 - JMP repeat_extend_forward_end_encodeSnappyBlockAsm - -matchlen_loop_repeat_extend_encodeSnappyBlockAsm: - LEAL -8(R8), R8 - LEAL 8(R11), R11 - CMPL R8, $0x08 - JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm - JZ repeat_extend_forward_end_encodeSnappyBlockAsm - -matchlen_match4_repeat_extend_encodeSnappyBlockAsm: - CMPL R8, $0x04 - JL matchlen_match2_repeat_extend_encodeSnappyBlockAsm - MOVL (R9)(R11*1), R10 - CMPL (SI)(R11*1), R10 - JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm - SUBL $0x04, R8 - LEAL 4(R11), R11 - -matchlen_match2_repeat_extend_encodeSnappyBlockAsm: - CMPL R8, $0x02 - JL matchlen_match1_repeat_extend_encodeSnappyBlockAsm - MOVW (R9)(R11*1), R10 - CMPW (SI)(R11*1), R10 - JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm - SUBL $0x02, R8 - LEAL 2(R11), R11 - -matchlen_match1_repeat_extend_encodeSnappyBlockAsm: - CMPL R8, $0x01 - JL repeat_extend_forward_end_encodeSnappyBlockAsm - MOVB (R9)(R11*1), R10 - CMPB (SI)(R11*1), R10 - JNE repeat_extend_forward_end_encodeSnappyBlockAsm - LEAL 1(R11), R11 - -repeat_extend_forward_end_encodeSnappyBlockAsm: - ADDL R11, CX - MOVL CX, SI - SUBL DI, SI - MOVL 16(SP), DI - - // emitCopy - CMPL DI, $0x00010000 - JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm - -four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm: - CMPL SI, $0x40 - JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm - MOVB $0xff, (AX) - MOVL DI, 1(AX) - LEAL -64(SI), SI - ADDQ $0x05, AX - CMPL SI, $0x04 - JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm - JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm - -four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm: - TESTL SI, SI - JZ repeat_end_emit_encodeSnappyBlockAsm - MOVB $0x03, BL - LEAL -4(BX)(SI*4), SI - MOVB SI, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX - JMP repeat_end_emit_encodeSnappyBlockAsm - -two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm: - CMPL SI, $0x40 - JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(SI), SI - ADDQ $0x03, AX - JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm - -two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm: - CMPL SI, $0x0c - JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm - CMPL DI, $0x00000800 - JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm - MOVB $0x01, BL - LEAL -16(BX)(SI*4), SI - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) - ADDQ $0x02, AX - JMP repeat_end_emit_encodeSnappyBlockAsm - -emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm: - MOVB $0x02, BL - LEAL -4(BX)(SI*4), SI - MOVB SI, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX - -repeat_end_emit_encodeSnappyBlockAsm: - MOVL CX, 12(SP) - JMP search_loop_encodeSnappyBlockAsm - -no_repeat_found_encodeSnappyBlockAsm: - CMPL (DX)(SI*1), DI - JEQ candidate_match_encodeSnappyBlockAsm - SHRQ $0x08, DI - MOVL 24(SP)(R10*4), SI - LEAL 2(CX), R9 - CMPL (DX)(R8*1), DI - JEQ candidate2_match_encodeSnappyBlockAsm - MOVL R9, 24(SP)(R10*4) - SHRQ $0x08, DI - CMPL (DX)(SI*1), DI - JEQ candidate3_match_encodeSnappyBlockAsm - MOVL 20(SP), CX - JMP search_loop_encodeSnappyBlockAsm - -candidate3_match_encodeSnappyBlockAsm: - ADDL $0x02, CX - JMP candidate_match_encodeSnappyBlockAsm - -candidate2_match_encodeSnappyBlockAsm: - MOVL R9, 24(SP)(R10*4) - INCL CX - MOVL R8, SI - -candidate_match_encodeSnappyBlockAsm: - MOVL 12(SP), DI - TESTL SI, SI - JZ match_extend_back_end_encodeSnappyBlockAsm - -match_extend_back_loop_encodeSnappyBlockAsm: - CMPL CX, DI - JLE match_extend_back_end_encodeSnappyBlockAsm - MOVB -1(DX)(SI*1), BL - MOVB -1(DX)(CX*1), R8 - CMPB BL, R8 - JNE match_extend_back_end_encodeSnappyBlockAsm - LEAL -1(CX), CX - DECL SI - JZ match_extend_back_end_encodeSnappyBlockAsm - JMP match_extend_back_loop_encodeSnappyBlockAsm - -match_extend_back_end_encodeSnappyBlockAsm: - MOVL CX, DI - SUBL 12(SP), DI - LEAQ 5(AX)(DI*1), DI - CMPQ DI, (SP) - JL match_dst_size_check_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) - RET - -match_dst_size_check_encodeSnappyBlockAsm: - MOVL CX, DI - MOVL 12(SP), R8 - CMPL R8, DI - JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(R8*1), DI - SUBL R8, R9 - LEAL -1(R9), R8 - CMPL R8, $0x3c - JLT one_byte_match_emit_encodeSnappyBlockAsm - CMPL R8, $0x00000100 - JLT two_bytes_match_emit_encodeSnappyBlockAsm - CMPL R8, $0x00010000 - JLT three_bytes_match_emit_encodeSnappyBlockAsm - CMPL R8, $0x01000000 - JLT four_bytes_match_emit_encodeSnappyBlockAsm - MOVB $0xfc, (AX) - MOVL R8, 1(AX) - ADDQ $0x05, AX - JMP memmove_long_match_emit_encodeSnappyBlockAsm - -four_bytes_match_emit_encodeSnappyBlockAsm: - MOVL R8, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW R8, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX - JMP memmove_long_match_emit_encodeSnappyBlockAsm - -three_bytes_match_emit_encodeSnappyBlockAsm: - MOVB $0xf4, (AX) - MOVW R8, 1(AX) - ADDQ $0x03, AX - JMP memmove_long_match_emit_encodeSnappyBlockAsm - -two_bytes_match_emit_encodeSnappyBlockAsm: - MOVB $0xf0, (AX) - MOVB R8, 1(AX) - ADDQ $0x02, AX - CMPL R8, $0x40 - JL memmove_match_emit_encodeSnappyBlockAsm - JMP memmove_long_match_emit_encodeSnappyBlockAsm - -one_byte_match_emit_encodeSnappyBlockAsm: - SHLB $0x02, R8 - MOVB R8, (AX) - ADDQ $0x01, AX - -memmove_match_emit_encodeSnappyBlockAsm: - LEAQ (AX)(R9*1), R8 - - // genMemMoveShort - CMPQ R9, $0x08 - JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8 - CMPQ R9, $0x10 - JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16 - CMPQ R9, $0x20 - JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32 - JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64 - -emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8: - MOVQ (DI), R10 - MOVQ R10, (AX) - JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm - -emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16: - MOVQ (DI), R10 - MOVQ -8(DI)(R9*1), DI - MOVQ R10, (AX) - MOVQ DI, -8(AX)(R9*1) - JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm - -emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32: - MOVOU (DI), X0 - MOVOU -16(DI)(R9*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) - JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm - -emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64: - MOVOU (DI), X0 - MOVOU 16(DI), X1 - MOVOU -32(DI)(R9*1), X2 - MOVOU -16(DI)(R9*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - -memmove_end_copy_match_emit_encodeSnappyBlockAsm: - MOVQ R8, AX - JMP emit_literal_done_match_emit_encodeSnappyBlockAsm - -memmove_long_match_emit_encodeSnappyBlockAsm: - LEAQ (AX)(R9*1), R8 - - // genMemMoveLong - MOVOU (DI), X0 - MOVOU 16(DI), X1 - MOVOU -32(DI)(R9*1), X2 - MOVOU -16(DI)(R9*1), X3 - MOVQ R9, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 - JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(DI)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 - -emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) - ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 - JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back - -emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(DI)(R12*1), X4 - MOVOU -16(DI)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R9, R12 - JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ R8, AX - -emit_literal_done_match_emit_encodeSnappyBlockAsm: -match_nolit_loop_encodeSnappyBlockAsm: - MOVL CX, DI - SUBL SI, DI - MOVL DI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, SI + MOVL CX, BX + SUBL 16(SP), BX MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 - LEAQ (DX)(SI*1), SI + LEAQ (DX)(BX*1), BX // matchLen XORL R10, R10 CMPL DI, $0x08 - JL matchlen_match4_match_nolit_encodeSnappyBlockAsm + JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm -matchlen_loopback_match_nolit_encodeSnappyBlockAsm: +matchlen_loopback_repeat_extend_encodeSnappyBlockAsm: MOVQ (R8)(R10*1), R9 - XORQ (SI)(R10*1), R9 + XORQ (BX)(R10*1), R9 TESTQ R9, R9 - JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm + JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm #ifdef GOAMD64_v3 TZCNTQ R9, R9 @@ -10920,129 +10598,452 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm: #endif SARQ $0x03, R9 LEAL (R10)(R9*1), R10 - JMP match_nolit_end_encodeSnappyBlockAsm + JMP repeat_extend_forward_end_encodeSnappyBlockAsm -matchlen_loop_match_nolit_encodeSnappyBlockAsm: +matchlen_loop_repeat_extend_encodeSnappyBlockAsm: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 - JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm - JZ match_nolit_end_encodeSnappyBlockAsm + JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm + JZ repeat_extend_forward_end_encodeSnappyBlockAsm -matchlen_match4_match_nolit_encodeSnappyBlockAsm: +matchlen_match4_repeat_extend_encodeSnappyBlockAsm: CMPL DI, $0x04 - JL matchlen_match2_match_nolit_encodeSnappyBlockAsm + JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm MOVL (R8)(R10*1), R9 - CMPL (SI)(R10*1), R9 - JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm + CMPL (BX)(R10*1), R9 + JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm SUBL $0x04, DI LEAL 4(R10), R10 -matchlen_match2_match_nolit_encodeSnappyBlockAsm: +matchlen_match2_repeat_extend_encodeSnappyBlockAsm: CMPL DI, $0x02 - JL matchlen_match1_match_nolit_encodeSnappyBlockAsm + JB matchlen_match1_repeat_extend_encodeSnappyBlockAsm MOVW (R8)(R10*1), R9 - CMPW (SI)(R10*1), R9 - JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm + CMPW (BX)(R10*1), R9 + JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm SUBL $0x02, DI LEAL 2(R10), R10 -matchlen_match1_match_nolit_encodeSnappyBlockAsm: +matchlen_match1_repeat_extend_encodeSnappyBlockAsm: CMPL DI, $0x01 - JL match_nolit_end_encodeSnappyBlockAsm + JB repeat_extend_forward_end_encodeSnappyBlockAsm MOVB (R8)(R10*1), R9 - CMPB (SI)(R10*1), R9 - JNE match_nolit_end_encodeSnappyBlockAsm + CMPB (BX)(R10*1), R9 + JNE repeat_extend_forward_end_encodeSnappyBlockAsm LEAL 1(R10), R10 -match_nolit_end_encodeSnappyBlockAsm: +repeat_extend_forward_end_encodeSnappyBlockAsm: ADDL R10, CX + MOVL CX, BX + SUBL SI, BX MOVL 16(SP), SI - ADDL $0x04, R10 - MOVL CX, 12(SP) // emitCopy CMPL SI, $0x00010000 - JL two_byte_offset_match_nolit_encodeSnappyBlockAsm + JB two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm -four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm: - CMPL R10, $0x40 - JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm +four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm: + CMPL BX, $0x40 + JBE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm MOVB $0xff, (AX) MOVL SI, 1(AX) - LEAL -64(R10), R10 + LEAL -64(BX), BX ADDQ $0x05, AX - CMPL R10, $0x04 - JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm + CMPL BX, $0x04 + JB four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm + JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm + +four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm: + TESTL BX, BX + JZ repeat_end_emit_encodeSnappyBlockAsm + XORL DI, DI + LEAL -1(DI)(BX*4), BX + MOVB BL, (AX) + MOVL SI, 1(AX) + ADDQ $0x05, AX + JMP repeat_end_emit_encodeSnappyBlockAsm + +two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm: + CMPL BX, $0x40 + JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm + MOVB $0xee, (AX) + MOVW SI, 1(AX) + LEAL -60(BX), BX + ADDQ $0x03, AX + JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm + +two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm: + MOVL BX, DI + SHLL $0x02, DI + CMPL BX, $0x0c + JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm + CMPL SI, $0x00000800 + JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm + LEAL -15(DI), DI + MOVB SI, 1(AX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (AX) + ADDQ $0x02, AX + JMP repeat_end_emit_encodeSnappyBlockAsm + +emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm: + LEAL -2(DI), DI + MOVB DI, (AX) + MOVW SI, 1(AX) + ADDQ $0x03, AX + +repeat_end_emit_encodeSnappyBlockAsm: + MOVL CX, 12(SP) + JMP search_loop_encodeSnappyBlockAsm + +no_repeat_found_encodeSnappyBlockAsm: + CMPL (DX)(BX*1), SI + JEQ candidate_match_encodeSnappyBlockAsm + SHRQ $0x08, SI + MOVL 24(SP)(R9*4), BX + LEAL 2(CX), R8 + CMPL (DX)(DI*1), SI + JEQ candidate2_match_encodeSnappyBlockAsm + MOVL R8, 24(SP)(R9*4) + SHRQ $0x08, SI + CMPL (DX)(BX*1), SI + JEQ candidate3_match_encodeSnappyBlockAsm + MOVL 20(SP), CX + JMP search_loop_encodeSnappyBlockAsm + +candidate3_match_encodeSnappyBlockAsm: + ADDL $0x02, CX + JMP candidate_match_encodeSnappyBlockAsm + +candidate2_match_encodeSnappyBlockAsm: + MOVL R8, 24(SP)(R9*4) + INCL CX + MOVL DI, BX + +candidate_match_encodeSnappyBlockAsm: + MOVL 12(SP), SI + TESTL BX, BX + JZ match_extend_back_end_encodeSnappyBlockAsm + +match_extend_back_loop_encodeSnappyBlockAsm: + CMPL CX, SI + JBE match_extend_back_end_encodeSnappyBlockAsm + MOVB -1(DX)(BX*1), DI + MOVB -1(DX)(CX*1), R8 + CMPB DI, R8 + JNE match_extend_back_end_encodeSnappyBlockAsm + LEAL -1(CX), CX + DECL BX + JZ match_extend_back_end_encodeSnappyBlockAsm + JMP match_extend_back_loop_encodeSnappyBlockAsm + +match_extend_back_end_encodeSnappyBlockAsm: + MOVL CX, SI + SUBL 12(SP), SI + LEAQ 5(AX)(SI*1), SI + CMPQ SI, (SP) + JB match_dst_size_check_encodeSnappyBlockAsm + MOVQ $0x00000000, ret+48(FP) + RET + +match_dst_size_check_encodeSnappyBlockAsm: + MOVL CX, SI + MOVL 12(SP), DI + CMPL DI, SI + JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(DI*1), SI + SUBL DI, R8 + LEAL -1(R8), DI + CMPL DI, $0x3c + JB one_byte_match_emit_encodeSnappyBlockAsm + CMPL DI, $0x00000100 + JB two_bytes_match_emit_encodeSnappyBlockAsm + CMPL DI, $0x00010000 + JB three_bytes_match_emit_encodeSnappyBlockAsm + CMPL DI, $0x01000000 + JB four_bytes_match_emit_encodeSnappyBlockAsm + MOVB $0xfc, (AX) + MOVL DI, 1(AX) + ADDQ $0x05, AX + JMP memmove_long_match_emit_encodeSnappyBlockAsm + +four_bytes_match_emit_encodeSnappyBlockAsm: + MOVL DI, R9 + SHRL $0x10, R9 + MOVB $0xf8, (AX) + MOVW DI, 1(AX) + MOVB R9, 3(AX) + ADDQ $0x04, AX + JMP memmove_long_match_emit_encodeSnappyBlockAsm + +three_bytes_match_emit_encodeSnappyBlockAsm: + MOVB $0xf4, (AX) + MOVW DI, 1(AX) + ADDQ $0x03, AX + JMP memmove_long_match_emit_encodeSnappyBlockAsm + +two_bytes_match_emit_encodeSnappyBlockAsm: + MOVB $0xf0, (AX) + MOVB DI, 1(AX) + ADDQ $0x02, AX + CMPL DI, $0x40 + JB memmove_match_emit_encodeSnappyBlockAsm + JMP memmove_long_match_emit_encodeSnappyBlockAsm + +one_byte_match_emit_encodeSnappyBlockAsm: + SHLB $0x02, DI + MOVB DI, (AX) + ADDQ $0x01, AX + +memmove_match_emit_encodeSnappyBlockAsm: + LEAQ (AX)(R8*1), DI + + // genMemMoveShort + CMPQ R8, $0x08 + JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8 + CMPQ R8, $0x10 + JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16 + CMPQ R8, $0x20 + JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32 + JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64 + +emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8: + MOVQ (SI), R9 + MOVQ R9, (AX) + JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm + +emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16: + MOVQ (SI), R9 + MOVQ -8(SI)(R8*1), SI + MOVQ R9, (AX) + MOVQ SI, -8(AX)(R8*1) + JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm + +emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32: + MOVOU (SI), X0 + MOVOU -16(SI)(R8*1), X1 + MOVOU X0, (AX) + MOVOU X1, -16(AX)(R8*1) + JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm + +emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64: + MOVOU (SI), X0 + MOVOU 16(SI), X1 + MOVOU -32(SI)(R8*1), X2 + MOVOU -16(SI)(R8*1), X3 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + +memmove_end_copy_match_emit_encodeSnappyBlockAsm: + MOVQ DI, AX + JMP emit_literal_done_match_emit_encodeSnappyBlockAsm + +memmove_long_match_emit_encodeSnappyBlockAsm: + LEAQ (AX)(R8*1), DI + + // genMemMoveLong + MOVOU (SI), X0 + MOVOU 16(SI), X1 + MOVOU -32(SI)(R8*1), X2 + MOVOU -16(SI)(R8*1), X3 + MOVQ R8, R10 + SHRQ $0x05, R10 + MOVQ AX, R9 + ANDL $0x0000001f, R9 + MOVQ $0x00000040, R11 + SUBQ R9, R11 + DECQ R10 + JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 + LEAQ -32(SI)(R11*1), R9 + LEAQ -32(AX)(R11*1), R12 + +emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back: + MOVOU (R9), X4 + MOVOU 16(R9), X5 + MOVOA X4, (R12) + MOVOA X5, 16(R12) + ADDQ $0x20, R12 + ADDQ $0x20, R9 + ADDQ $0x20, R11 + DECQ R10 + JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back + +emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: + MOVOU -32(SI)(R11*1), X4 + MOVOU -16(SI)(R11*1), X5 + MOVOA X4, -32(AX)(R11*1) + MOVOA X5, -16(AX)(R11*1) + ADDQ $0x20, R11 + CMPQ R8, R11 + JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ DI, AX + +emit_literal_done_match_emit_encodeSnappyBlockAsm: +match_nolit_loop_encodeSnappyBlockAsm: + MOVL CX, SI + SUBL BX, SI + MOVL SI, 16(SP) + ADDL $0x04, CX + ADDL $0x04, BX + MOVQ src_len+32(FP), SI + SUBL CX, SI + LEAQ (DX)(CX*1), DI + LEAQ (DX)(BX*1), BX + + // matchLen + XORL R9, R9 + CMPL SI, $0x08 + JB matchlen_match4_match_nolit_encodeSnappyBlockAsm + +matchlen_loopback_match_nolit_encodeSnappyBlockAsm: + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + TESTQ R8, R8 + JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm + +#ifdef GOAMD64_v3 + TZCNTQ R8, R8 + +#else + BSFQ R8, R8 + +#endif + SARQ $0x03, R8 + LEAL (R9)(R8*1), R9 + JMP match_nolit_end_encodeSnappyBlockAsm + +matchlen_loop_match_nolit_encodeSnappyBlockAsm: + LEAL -8(SI), SI + LEAL 8(R9), R9 + CMPL SI, $0x08 + JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm + JZ match_nolit_end_encodeSnappyBlockAsm + +matchlen_match4_match_nolit_encodeSnappyBlockAsm: + CMPL SI, $0x04 + JB matchlen_match2_match_nolit_encodeSnappyBlockAsm + MOVL (DI)(R9*1), R8 + CMPL (BX)(R9*1), R8 + JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm + SUBL $0x04, SI + LEAL 4(R9), R9 + +matchlen_match2_match_nolit_encodeSnappyBlockAsm: + CMPL SI, $0x02 + JB matchlen_match1_match_nolit_encodeSnappyBlockAsm + MOVW (DI)(R9*1), R8 + CMPW (BX)(R9*1), R8 + JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm + SUBL $0x02, SI + LEAL 2(R9), R9 + +matchlen_match1_match_nolit_encodeSnappyBlockAsm: + CMPL SI, $0x01 + JB match_nolit_end_encodeSnappyBlockAsm + MOVB (DI)(R9*1), R8 + CMPB (BX)(R9*1), R8 + JNE match_nolit_end_encodeSnappyBlockAsm + LEAL 1(R9), R9 + +match_nolit_end_encodeSnappyBlockAsm: + ADDL R9, CX + MOVL 16(SP), BX + ADDL $0x04, R9 + MOVL CX, 12(SP) + + // emitCopy + CMPL BX, $0x00010000 + JB two_byte_offset_match_nolit_encodeSnappyBlockAsm + +four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm: + CMPL R9, $0x40 + JBE four_bytes_remain_match_nolit_encodeSnappyBlockAsm + MOVB $0xff, (AX) + MOVL BX, 1(AX) + LEAL -64(R9), R9 + ADDQ $0x05, AX + CMPL R9, $0x04 + JB four_bytes_remain_match_nolit_encodeSnappyBlockAsm JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm four_bytes_remain_match_nolit_encodeSnappyBlockAsm: - TESTL R10, R10 + TESTL R9, R9 JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm - MOVB $0x03, BL - LEAL -4(BX)(R10*4), R10 - MOVB R10, (AX) - MOVL SI, 1(AX) + XORL SI, SI + LEAL -1(SI)(R9*4), R9 + MOVB R9, (AX) + MOVL BX, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm two_byte_offset_match_nolit_encodeSnappyBlockAsm: - CMPL R10, $0x40 - JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm + CMPL R9, $0x40 + JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(R10), R10 + MOVW BX, 1(AX) + LEAL -60(R9), R9 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm two_byte_offset_short_match_nolit_encodeSnappyBlockAsm: - CMPL R10, $0x0c - JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm - CMPL SI, $0x00000800 - JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm - MOVB $0x01, BL - LEAL -16(BX)(R10*4), R10 - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, R10 - MOVB R10, (AX) + MOVL R9, SI + SHLL $0x02, SI + CMPL R9, $0x0c + JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm + CMPL BX, $0x00000800 + JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm + LEAL -15(SI), SI + MOVB BL, 1(AX) + SHRL $0x08, BX + SHLL $0x05, BX + ORL BX, SI + MOVB SI, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm emit_copy_three_match_nolit_encodeSnappyBlockAsm: - MOVB $0x02, BL - LEAL -4(BX)(R10*4), R10 - MOVB R10, (AX) - MOVW SI, 1(AX) + LEAL -2(SI), SI + MOVB SI, (AX) + MOVW BX, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm: CMPL CX, 8(SP) - JGE emit_remainder_encodeSnappyBlockAsm - MOVQ -2(DX)(CX*1), DI + JAE emit_remainder_encodeSnappyBlockAsm + MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) - JL match_nolit_dst_ok_encodeSnappyBlockAsm + JB match_nolit_dst_ok_encodeSnappyBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm: - MOVQ $0x0000cf1bbcdcbf9b, R9 - MOVQ DI, R8 - SHRQ $0x10, DI - MOVQ DI, SI - SHLQ $0x10, R8 - IMULQ R9, R8 - SHRQ $0x32, R8 - SHLQ $0x10, SI - IMULQ R9, SI - SHRQ $0x32, SI - LEAL -2(CX), R9 - LEAQ 24(SP)(SI*4), R10 - MOVL (R10), SI - MOVL R9, 24(SP)(R8*4) - MOVL CX, (R10) - CMPL (DX)(SI*1), DI + MOVQ $0x0000cf1bbcdcbf9b, R8 + MOVQ SI, DI + SHRQ $0x10, SI + MOVQ SI, BX + SHLQ $0x10, DI + IMULQ R8, DI + SHRQ $0x32, DI + SHLQ $0x10, BX + IMULQ R8, BX + SHRQ $0x32, BX + LEAL -2(CX), R8 + LEAQ 24(SP)(BX*4), R9 + MOVL (R9), BX + MOVL R8, 24(SP)(DI*4) + MOVL CX, (R9) + CMPL (DX)(BX*1), SI JEQ match_nolit_loop_encodeSnappyBlockAsm INCL CX JMP search_loop_encodeSnappyBlockAsm @@ -11052,7 +11053,7 @@ emit_remainder_encodeSnappyBlockAsm: SUBL 12(SP), CX LEAQ 5(AX)(CX*1), CX CMPQ CX, (SP) - JL emit_remainder_ok_encodeSnappyBlockAsm + JB emit_remainder_ok_encodeSnappyBlockAsm MOVQ $0x00000000, ret+48(FP) RET @@ -11067,13 +11068,13 @@ emit_remainder_ok_encodeSnappyBlockAsm: SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c - JLT one_byte_emit_remainder_encodeSnappyBlockAsm + JB one_byte_emit_remainder_encodeSnappyBlockAsm CMPL DX, $0x00000100 - JLT two_bytes_emit_remainder_encodeSnappyBlockAsm + JB two_bytes_emit_remainder_encodeSnappyBlockAsm CMPL DX, $0x00010000 - JLT three_bytes_emit_remainder_encodeSnappyBlockAsm + JB three_bytes_emit_remainder_encodeSnappyBlockAsm CMPL DX, $0x01000000 - JLT four_bytes_emit_remainder_encodeSnappyBlockAsm + JB four_bytes_emit_remainder_encodeSnappyBlockAsm MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX @@ -11099,7 +11100,7 @@ two_bytes_emit_remainder_encodeSnappyBlockAsm: MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 - JL memmove_emit_remainder_encodeSnappyBlockAsm + JB memmove_emit_remainder_encodeSnappyBlockAsm JMP memmove_long_emit_remainder_encodeSnappyBlockAsm one_byte_emit_remainder_encodeSnappyBlockAsm: @@ -11246,8 +11247,8 @@ zero_loop_encodeSnappyBlockAsm64K: MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX - LEAQ -8(CX), SI - MOVL SI, 8(SP) + LEAQ -8(CX), BX + MOVL BX, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX @@ -11257,477 +11258,200 @@ zero_loop_encodeSnappyBlockAsm64K: MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm64K: - MOVL CX, SI - SUBL 12(SP), SI - SHRL $0x06, SI - LEAL 4(CX)(SI*1), SI - CMPL SI, 8(SP) - JGE emit_remainder_encodeSnappyBlockAsm64K - MOVQ (DX)(CX*1), DI - MOVL SI, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R9 - MOVQ DI, R10 - MOVQ DI, R11 - SHRQ $0x08, R11 + MOVL CX, BX + SUBL 12(SP), BX + SHRL $0x06, BX + LEAL 4(CX)(BX*1), BX + CMPL BX, 8(SP) + JAE emit_remainder_encodeSnappyBlockAsm64K + MOVQ (DX)(CX*1), SI + MOVL BX, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R8 + MOVQ SI, R9 + MOVQ SI, R10 + SHRQ $0x08, R10 + SHLQ $0x10, R9 + IMULQ R8, R9 + SHRQ $0x32, R9 SHLQ $0x10, R10 - IMULQ R9, R10 + IMULQ R8, R10 SHRQ $0x32, R10 - SHLQ $0x10, R11 - IMULQ R9, R11 - SHRQ $0x32, R11 - MOVL 24(SP)(R10*4), SI - MOVL 24(SP)(R11*4), R8 - MOVL CX, 24(SP)(R10*4) - LEAL 1(CX), R10 - MOVL R10, 24(SP)(R11*4) - MOVQ DI, R10 - SHRQ $0x10, R10 - SHLQ $0x10, R10 - IMULQ R9, R10 - SHRQ $0x32, R10 - MOVL CX, R9 - SUBL 16(SP), R9 - MOVL 1(DX)(R9*1), R11 - MOVQ DI, R9 - SHRQ $0x08, R9 - CMPL R9, R11 - JNE no_repeat_found_encodeSnappyBlockAsm64K - LEAL 1(CX), DI - MOVL 12(SP), SI - MOVL DI, R8 + MOVL 24(SP)(R9*4), BX + MOVL 24(SP)(R10*4), DI + MOVL CX, 24(SP)(R9*4) + LEAL 1(CX), R9 + MOVL R9, 24(SP)(R10*4) + MOVQ SI, R9 + SHRQ $0x10, R9 + SHLQ $0x10, R9 + IMULQ R8, R9 + SHRQ $0x32, R9 + MOVL CX, R8 SUBL 16(SP), R8 + MOVL 1(DX)(R8*1), R10 + MOVQ SI, R8 + SHRQ $0x08, R8 + CMPL R8, R10 + JNE no_repeat_found_encodeSnappyBlockAsm64K + LEAL 1(CX), SI + MOVL 12(SP), BX + MOVL SI, DI + SUBL 16(SP), DI JZ repeat_extend_back_end_encodeSnappyBlockAsm64K repeat_extend_back_loop_encodeSnappyBlockAsm64K: - CMPL DI, SI - JLE repeat_extend_back_end_encodeSnappyBlockAsm64K - MOVB -1(DX)(R8*1), BL - MOVB -1(DX)(DI*1), R9 - CMPB BL, R9 + CMPL SI, BX + JBE repeat_extend_back_end_encodeSnappyBlockAsm64K + MOVB -1(DX)(DI*1), R8 + MOVB -1(DX)(SI*1), R9 + CMPB R8, R9 JNE repeat_extend_back_end_encodeSnappyBlockAsm64K - LEAL -1(DI), DI - DECL R8 + LEAL -1(SI), SI + DECL DI JNZ repeat_extend_back_loop_encodeSnappyBlockAsm64K repeat_extend_back_end_encodeSnappyBlockAsm64K: - MOVL 12(SP), SI - CMPL SI, DI + MOVL 12(SP), BX + CMPL BX, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K - MOVL DI, R8 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R9 - SUBL SI, R8 - LEAL -1(R8), SI - CMPL SI, $0x3c - JLT one_byte_repeat_emit_encodeSnappyBlockAsm64K - CMPL SI, $0x00000100 - JLT two_bytes_repeat_emit_encodeSnappyBlockAsm64K + MOVL SI, DI + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R8 + SUBL BX, DI + LEAL -1(DI), BX + CMPL BX, $0x3c + JB one_byte_repeat_emit_encodeSnappyBlockAsm64K + CMPL BX, $0x00000100 + JB two_bytes_repeat_emit_encodeSnappyBlockAsm64K + JB three_bytes_repeat_emit_encodeSnappyBlockAsm64K + +three_bytes_repeat_emit_encodeSnappyBlockAsm64K: MOVB $0xf4, (AX) - MOVW SI, 1(AX) + MOVW BX, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K two_bytes_repeat_emit_encodeSnappyBlockAsm64K: MOVB $0xf0, (AX) - MOVB SI, 1(AX) + MOVB BL, 1(AX) ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_repeat_emit_encodeSnappyBlockAsm64K + CMPL BX, $0x40 + JB memmove_repeat_emit_encodeSnappyBlockAsm64K JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K one_byte_repeat_emit_encodeSnappyBlockAsm64K: - SHLB $0x02, SI - MOVB SI, (AX) + SHLB $0x02, BL + MOVB BL, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(R8*1), SI + LEAQ (AX)(DI*1), BX // genMemMoveShort - CMPQ R8, $0x08 - JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8 - CMPQ R8, $0x10 + CMPQ DI, $0x08 + JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8 + CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R8), R9 + MOVQ R9, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R8), R9 + MOVQ -8(R8)(DI*1), R8 + MOVQ R9, (AX) + MOVQ R8, -8(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 + MOVOU (R8), X0 + MOVOU -16(R8)(DI*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU X1, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 + MOVOU (R8), X0 + MOVOU 16(R8), X1 + MOVOU -32(R8)(DI*1), X2 + MOVOU -16(R8)(DI*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU X2, -32(AX)(DI*1) + MOVOU X3, -16(AX)(DI*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K: - MOVQ SI, AX + MOVQ BX, AX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K memmove_long_repeat_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(R8*1), SI + LEAQ (AX)(DI*1), BX // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R8), X0 + MOVOU 16(R8), X1 + MOVOU -32(R8)(DI*1), X2 + MOVOU -16(R8)(DI*1), X3 + MOVQ DI, R10 + SHRQ $0x05, R10 + MOVQ AX, R9 + ANDL $0x0000001f, R9 + MOVQ $0x00000040, R11 + SUBQ R9, R11 + DECQ R10 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R8)(R11*1), R9 + LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) - ADDQ $0x20, R13 - ADDQ $0x20, R10 + MOVOU (R9), X4 + MOVOU 16(R9), X5 + MOVOA X4, (R12) + MOVOA X5, 16(R12) ADDQ $0x20, R12 - DECQ R11 + ADDQ $0x20, R9 + ADDQ $0x20, R11 + DECQ R10 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R8)(R11*1), X4 + MOVOU -16(R8)(R11*1), X5 + MOVOA X4, -32(AX)(R11*1) + MOVOA X5, -16(AX)(R11*1) + ADDQ $0x20, R11 + CMPQ DI, R11 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ SI, AX + MOVOU X2, -32(AX)(DI*1) + MOVOU X3, -16(AX)(DI*1) + MOVQ BX, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K: ADDL $0x05, CX - MOVL CX, SI - SUBL 16(SP), SI - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(SI*1), SI - - // matchLen - XORL R11, R11 - CMPL R8, $0x08 - JL matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K - -matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K: - MOVQ (R9)(R11*1), R10 - XORQ (SI)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm64K - -#ifdef GOAMD64_v3 - TZCNTQ R10, R10 - -#else - BSFQ R10, R10 - -#endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 - JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K - -matchlen_loop_repeat_extend_encodeSnappyBlockAsm64K: - LEAL -8(R8), R8 - LEAL 8(R11), R11 - CMPL R8, $0x08 - JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K - JZ repeat_extend_forward_end_encodeSnappyBlockAsm64K - -matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K: - CMPL R8, $0x04 - JL matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K - MOVL (R9)(R11*1), R10 - CMPL (SI)(R11*1), R10 - JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K - SUBL $0x04, R8 - LEAL 4(R11), R11 - -matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K: - CMPL R8, $0x02 - JL matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K - MOVW (R9)(R11*1), R10 - CMPW (SI)(R11*1), R10 - JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K - SUBL $0x02, R8 - LEAL 2(R11), R11 - -matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K: - CMPL R8, $0x01 - JL repeat_extend_forward_end_encodeSnappyBlockAsm64K - MOVB (R9)(R11*1), R10 - CMPB (SI)(R11*1), R10 - JNE repeat_extend_forward_end_encodeSnappyBlockAsm64K - LEAL 1(R11), R11 - -repeat_extend_forward_end_encodeSnappyBlockAsm64K: - ADDL R11, CX - MOVL CX, SI - SUBL DI, SI - MOVL 16(SP), DI - - // emitCopy -two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K: - CMPL SI, $0x40 - JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(SI), SI - ADDQ $0x03, AX - JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K - -two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K: - CMPL SI, $0x0c - JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K - CMPL DI, $0x00000800 - JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K - MOVB $0x01, BL - LEAL -16(BX)(SI*4), SI - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) - ADDQ $0x02, AX - JMP repeat_end_emit_encodeSnappyBlockAsm64K - -emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K: - MOVB $0x02, BL - LEAL -4(BX)(SI*4), SI - MOVB SI, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX - -repeat_end_emit_encodeSnappyBlockAsm64K: - MOVL CX, 12(SP) - JMP search_loop_encodeSnappyBlockAsm64K - -no_repeat_found_encodeSnappyBlockAsm64K: - CMPL (DX)(SI*1), DI - JEQ candidate_match_encodeSnappyBlockAsm64K - SHRQ $0x08, DI - MOVL 24(SP)(R10*4), SI - LEAL 2(CX), R9 - CMPL (DX)(R8*1), DI - JEQ candidate2_match_encodeSnappyBlockAsm64K - MOVL R9, 24(SP)(R10*4) - SHRQ $0x08, DI - CMPL (DX)(SI*1), DI - JEQ candidate3_match_encodeSnappyBlockAsm64K - MOVL 20(SP), CX - JMP search_loop_encodeSnappyBlockAsm64K - -candidate3_match_encodeSnappyBlockAsm64K: - ADDL $0x02, CX - JMP candidate_match_encodeSnappyBlockAsm64K - -candidate2_match_encodeSnappyBlockAsm64K: - MOVL R9, 24(SP)(R10*4) - INCL CX - MOVL R8, SI - -candidate_match_encodeSnappyBlockAsm64K: - MOVL 12(SP), DI - TESTL SI, SI - JZ match_extend_back_end_encodeSnappyBlockAsm64K - -match_extend_back_loop_encodeSnappyBlockAsm64K: - CMPL CX, DI - JLE match_extend_back_end_encodeSnappyBlockAsm64K - MOVB -1(DX)(SI*1), BL - MOVB -1(DX)(CX*1), R8 - CMPB BL, R8 - JNE match_extend_back_end_encodeSnappyBlockAsm64K - LEAL -1(CX), CX - DECL SI - JZ match_extend_back_end_encodeSnappyBlockAsm64K - JMP match_extend_back_loop_encodeSnappyBlockAsm64K - -match_extend_back_end_encodeSnappyBlockAsm64K: - MOVL CX, DI - SUBL 12(SP), DI - LEAQ 3(AX)(DI*1), DI - CMPQ DI, (SP) - JL match_dst_size_check_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) - RET - -match_dst_size_check_encodeSnappyBlockAsm64K: - MOVL CX, DI - MOVL 12(SP), R8 - CMPL R8, DI - JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm64K - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(R8*1), DI - SUBL R8, R9 - LEAL -1(R9), R8 - CMPL R8, $0x3c - JLT one_byte_match_emit_encodeSnappyBlockAsm64K - CMPL R8, $0x00000100 - JLT two_bytes_match_emit_encodeSnappyBlockAsm64K - MOVB $0xf4, (AX) - MOVW R8, 1(AX) - ADDQ $0x03, AX - JMP memmove_long_match_emit_encodeSnappyBlockAsm64K - -two_bytes_match_emit_encodeSnappyBlockAsm64K: - MOVB $0xf0, (AX) - MOVB R8, 1(AX) - ADDQ $0x02, AX - CMPL R8, $0x40 - JL memmove_match_emit_encodeSnappyBlockAsm64K - JMP memmove_long_match_emit_encodeSnappyBlockAsm64K - -one_byte_match_emit_encodeSnappyBlockAsm64K: - SHLB $0x02, R8 - MOVB R8, (AX) - ADDQ $0x01, AX - -memmove_match_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(R9*1), R8 - - // genMemMoveShort - CMPQ R9, $0x08 - JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8 - CMPQ R9, $0x10 - JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16 - CMPQ R9, $0x20 - JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32 - JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64 - -emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8: - MOVQ (DI), R10 - MOVQ R10, (AX) - JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K - -emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16: - MOVQ (DI), R10 - MOVQ -8(DI)(R9*1), DI - MOVQ R10, (AX) - MOVQ DI, -8(AX)(R9*1) - JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K - -emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32: - MOVOU (DI), X0 - MOVOU -16(DI)(R9*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) - JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K - -emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64: - MOVOU (DI), X0 - MOVOU 16(DI), X1 - MOVOU -32(DI)(R9*1), X2 - MOVOU -16(DI)(R9*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - -memmove_end_copy_match_emit_encodeSnappyBlockAsm64K: - MOVQ R8, AX - JMP emit_literal_done_match_emit_encodeSnappyBlockAsm64K - -memmove_long_match_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(R9*1), R8 - - // genMemMoveLong - MOVOU (DI), X0 - MOVOU 16(DI), X1 - MOVOU -32(DI)(R9*1), X2 - MOVOU -16(DI)(R9*1), X3 - MOVQ R9, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 - JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(DI)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 - -emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) - ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 - JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back - -emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(DI)(R12*1), X4 - MOVOU -16(DI)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R9, R12 - JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ R8, AX - -emit_literal_done_match_emit_encodeSnappyBlockAsm64K: -match_nolit_loop_encodeSnappyBlockAsm64K: - MOVL CX, DI - SUBL SI, DI - MOVL DI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, SI + MOVL CX, BX + SUBL 16(SP), BX MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 - LEAQ (DX)(SI*1), SI + LEAQ (DX)(BX*1), BX // matchLen XORL R10, R10 CMPL DI, $0x08 - JL matchlen_match4_match_nolit_encodeSnappyBlockAsm64K + JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K -matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K: +matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K: MOVQ (R8)(R10*1), R9 - XORQ (SI)(R10*1), R9 + XORQ (BX)(R10*1), R9 TESTQ R9, R9 - JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm64K + JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm64K #ifdef GOAMD64_v3 TZCNTQ R9, R9 @@ -11738,105 +11462,388 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K: #endif SARQ $0x03, R9 LEAL (R10)(R9*1), R10 - JMP match_nolit_end_encodeSnappyBlockAsm64K + JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K -matchlen_loop_match_nolit_encodeSnappyBlockAsm64K: +matchlen_loop_repeat_extend_encodeSnappyBlockAsm64K: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 - JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K - JZ match_nolit_end_encodeSnappyBlockAsm64K + JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K + JZ repeat_extend_forward_end_encodeSnappyBlockAsm64K -matchlen_match4_match_nolit_encodeSnappyBlockAsm64K: +matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K: CMPL DI, $0x04 - JL matchlen_match2_match_nolit_encodeSnappyBlockAsm64K + JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K MOVL (R8)(R10*1), R9 - CMPL (SI)(R10*1), R9 - JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm64K + CMPL (BX)(R10*1), R9 + JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K SUBL $0x04, DI LEAL 4(R10), R10 -matchlen_match2_match_nolit_encodeSnappyBlockAsm64K: +matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K: CMPL DI, $0x02 - JL matchlen_match1_match_nolit_encodeSnappyBlockAsm64K + JB matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K MOVW (R8)(R10*1), R9 - CMPW (SI)(R10*1), R9 - JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm64K + CMPW (BX)(R10*1), R9 + JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K SUBL $0x02, DI LEAL 2(R10), R10 -matchlen_match1_match_nolit_encodeSnappyBlockAsm64K: +matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K: CMPL DI, $0x01 - JL match_nolit_end_encodeSnappyBlockAsm64K + JB repeat_extend_forward_end_encodeSnappyBlockAsm64K MOVB (R8)(R10*1), R9 - CMPB (SI)(R10*1), R9 - JNE match_nolit_end_encodeSnappyBlockAsm64K + CMPB (BX)(R10*1), R9 + JNE repeat_extend_forward_end_encodeSnappyBlockAsm64K LEAL 1(R10), R10 -match_nolit_end_encodeSnappyBlockAsm64K: +repeat_extend_forward_end_encodeSnappyBlockAsm64K: ADDL R10, CX + MOVL CX, BX + SUBL SI, BX MOVL 16(SP), SI - ADDL $0x04, R10 + + // emitCopy +two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K: + CMPL BX, $0x40 + JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K + MOVB $0xee, (AX) + MOVW SI, 1(AX) + LEAL -60(BX), BX + ADDQ $0x03, AX + JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K + +two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K: + MOVL BX, DI + SHLL $0x02, DI + CMPL BX, $0x0c + JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K + CMPL SI, $0x00000800 + JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K + LEAL -15(DI), DI + MOVB SI, 1(AX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (AX) + ADDQ $0x02, AX + JMP repeat_end_emit_encodeSnappyBlockAsm64K + +emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K: + LEAL -2(DI), DI + MOVB DI, (AX) + MOVW SI, 1(AX) + ADDQ $0x03, AX + +repeat_end_emit_encodeSnappyBlockAsm64K: + MOVL CX, 12(SP) + JMP search_loop_encodeSnappyBlockAsm64K + +no_repeat_found_encodeSnappyBlockAsm64K: + CMPL (DX)(BX*1), SI + JEQ candidate_match_encodeSnappyBlockAsm64K + SHRQ $0x08, SI + MOVL 24(SP)(R9*4), BX + LEAL 2(CX), R8 + CMPL (DX)(DI*1), SI + JEQ candidate2_match_encodeSnappyBlockAsm64K + MOVL R8, 24(SP)(R9*4) + SHRQ $0x08, SI + CMPL (DX)(BX*1), SI + JEQ candidate3_match_encodeSnappyBlockAsm64K + MOVL 20(SP), CX + JMP search_loop_encodeSnappyBlockAsm64K + +candidate3_match_encodeSnappyBlockAsm64K: + ADDL $0x02, CX + JMP candidate_match_encodeSnappyBlockAsm64K + +candidate2_match_encodeSnappyBlockAsm64K: + MOVL R8, 24(SP)(R9*4) + INCL CX + MOVL DI, BX + +candidate_match_encodeSnappyBlockAsm64K: + MOVL 12(SP), SI + TESTL BX, BX + JZ match_extend_back_end_encodeSnappyBlockAsm64K + +match_extend_back_loop_encodeSnappyBlockAsm64K: + CMPL CX, SI + JBE match_extend_back_end_encodeSnappyBlockAsm64K + MOVB -1(DX)(BX*1), DI + MOVB -1(DX)(CX*1), R8 + CMPB DI, R8 + JNE match_extend_back_end_encodeSnappyBlockAsm64K + LEAL -1(CX), CX + DECL BX + JZ match_extend_back_end_encodeSnappyBlockAsm64K + JMP match_extend_back_loop_encodeSnappyBlockAsm64K + +match_extend_back_end_encodeSnappyBlockAsm64K: + MOVL CX, SI + SUBL 12(SP), SI + LEAQ 3(AX)(SI*1), SI + CMPQ SI, (SP) + JB match_dst_size_check_encodeSnappyBlockAsm64K + MOVQ $0x00000000, ret+48(FP) + RET + +match_dst_size_check_encodeSnappyBlockAsm64K: + MOVL CX, SI + MOVL 12(SP), DI + CMPL DI, SI + JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm64K + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(DI*1), SI + SUBL DI, R8 + LEAL -1(R8), DI + CMPL DI, $0x3c + JB one_byte_match_emit_encodeSnappyBlockAsm64K + CMPL DI, $0x00000100 + JB two_bytes_match_emit_encodeSnappyBlockAsm64K + JB three_bytes_match_emit_encodeSnappyBlockAsm64K + +three_bytes_match_emit_encodeSnappyBlockAsm64K: + MOVB $0xf4, (AX) + MOVW DI, 1(AX) + ADDQ $0x03, AX + JMP memmove_long_match_emit_encodeSnappyBlockAsm64K + +two_bytes_match_emit_encodeSnappyBlockAsm64K: + MOVB $0xf0, (AX) + MOVB DI, 1(AX) + ADDQ $0x02, AX + CMPL DI, $0x40 + JB memmove_match_emit_encodeSnappyBlockAsm64K + JMP memmove_long_match_emit_encodeSnappyBlockAsm64K + +one_byte_match_emit_encodeSnappyBlockAsm64K: + SHLB $0x02, DI + MOVB DI, (AX) + ADDQ $0x01, AX + +memmove_match_emit_encodeSnappyBlockAsm64K: + LEAQ (AX)(R8*1), DI + + // genMemMoveShort + CMPQ R8, $0x08 + JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8 + CMPQ R8, $0x10 + JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16 + CMPQ R8, $0x20 + JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32 + JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64 + +emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8: + MOVQ (SI), R9 + MOVQ R9, (AX) + JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K + +emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16: + MOVQ (SI), R9 + MOVQ -8(SI)(R8*1), SI + MOVQ R9, (AX) + MOVQ SI, -8(AX)(R8*1) + JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K + +emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32: + MOVOU (SI), X0 + MOVOU -16(SI)(R8*1), X1 + MOVOU X0, (AX) + MOVOU X1, -16(AX)(R8*1) + JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K + +emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64: + MOVOU (SI), X0 + MOVOU 16(SI), X1 + MOVOU -32(SI)(R8*1), X2 + MOVOU -16(SI)(R8*1), X3 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + +memmove_end_copy_match_emit_encodeSnappyBlockAsm64K: + MOVQ DI, AX + JMP emit_literal_done_match_emit_encodeSnappyBlockAsm64K + +memmove_long_match_emit_encodeSnappyBlockAsm64K: + LEAQ (AX)(R8*1), DI + + // genMemMoveLong + MOVOU (SI), X0 + MOVOU 16(SI), X1 + MOVOU -32(SI)(R8*1), X2 + MOVOU -16(SI)(R8*1), X3 + MOVQ R8, R10 + SHRQ $0x05, R10 + MOVQ AX, R9 + ANDL $0x0000001f, R9 + MOVQ $0x00000040, R11 + SUBQ R9, R11 + DECQ R10 + JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 + LEAQ -32(SI)(R11*1), R9 + LEAQ -32(AX)(R11*1), R12 + +emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back: + MOVOU (R9), X4 + MOVOU 16(R9), X5 + MOVOA X4, (R12) + MOVOA X5, 16(R12) + ADDQ $0x20, R12 + ADDQ $0x20, R9 + ADDQ $0x20, R11 + DECQ R10 + JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back + +emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: + MOVOU -32(SI)(R11*1), X4 + MOVOU -16(SI)(R11*1), X5 + MOVOA X4, -32(AX)(R11*1) + MOVOA X5, -16(AX)(R11*1) + ADDQ $0x20, R11 + CMPQ R8, R11 + JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ DI, AX + +emit_literal_done_match_emit_encodeSnappyBlockAsm64K: +match_nolit_loop_encodeSnappyBlockAsm64K: + MOVL CX, SI + SUBL BX, SI + MOVL SI, 16(SP) + ADDL $0x04, CX + ADDL $0x04, BX + MOVQ src_len+32(FP), SI + SUBL CX, SI + LEAQ (DX)(CX*1), DI + LEAQ (DX)(BX*1), BX + + // matchLen + XORL R9, R9 + CMPL SI, $0x08 + JB matchlen_match4_match_nolit_encodeSnappyBlockAsm64K + +matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K: + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + TESTQ R8, R8 + JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm64K + +#ifdef GOAMD64_v3 + TZCNTQ R8, R8 + +#else + BSFQ R8, R8 + +#endif + SARQ $0x03, R8 + LEAL (R9)(R8*1), R9 + JMP match_nolit_end_encodeSnappyBlockAsm64K + +matchlen_loop_match_nolit_encodeSnappyBlockAsm64K: + LEAL -8(SI), SI + LEAL 8(R9), R9 + CMPL SI, $0x08 + JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K + JZ match_nolit_end_encodeSnappyBlockAsm64K + +matchlen_match4_match_nolit_encodeSnappyBlockAsm64K: + CMPL SI, $0x04 + JB matchlen_match2_match_nolit_encodeSnappyBlockAsm64K + MOVL (DI)(R9*1), R8 + CMPL (BX)(R9*1), R8 + JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm64K + SUBL $0x04, SI + LEAL 4(R9), R9 + +matchlen_match2_match_nolit_encodeSnappyBlockAsm64K: + CMPL SI, $0x02 + JB matchlen_match1_match_nolit_encodeSnappyBlockAsm64K + MOVW (DI)(R9*1), R8 + CMPW (BX)(R9*1), R8 + JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm64K + SUBL $0x02, SI + LEAL 2(R9), R9 + +matchlen_match1_match_nolit_encodeSnappyBlockAsm64K: + CMPL SI, $0x01 + JB match_nolit_end_encodeSnappyBlockAsm64K + MOVB (DI)(R9*1), R8 + CMPB (BX)(R9*1), R8 + JNE match_nolit_end_encodeSnappyBlockAsm64K + LEAL 1(R9), R9 + +match_nolit_end_encodeSnappyBlockAsm64K: + ADDL R9, CX + MOVL 16(SP), BX + ADDL $0x04, R9 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm64K: - CMPL R10, $0x40 - JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K + CMPL R9, $0x40 + JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(R10), R10 + MOVW BX, 1(AX) + LEAL -60(R9), R9 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm64K two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K: - CMPL R10, $0x0c - JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K - CMPL SI, $0x00000800 - JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K - MOVB $0x01, BL - LEAL -16(BX)(R10*4), R10 - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, R10 - MOVB R10, (AX) + MOVL R9, SI + SHLL $0x02, SI + CMPL R9, $0x0c + JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K + CMPL BX, $0x00000800 + JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K + LEAL -15(SI), SI + MOVB BL, 1(AX) + SHRL $0x08, BX + SHLL $0x05, BX + ORL BX, SI + MOVB SI, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm64K emit_copy_three_match_nolit_encodeSnappyBlockAsm64K: - MOVB $0x02, BL - LEAL -4(BX)(R10*4), R10 - MOVB R10, (AX) - MOVW SI, 1(AX) + LEAL -2(SI), SI + MOVB SI, (AX) + MOVW BX, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm64K: CMPL CX, 8(SP) - JGE emit_remainder_encodeSnappyBlockAsm64K - MOVQ -2(DX)(CX*1), DI + JAE emit_remainder_encodeSnappyBlockAsm64K + MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) - JL match_nolit_dst_ok_encodeSnappyBlockAsm64K + JB match_nolit_dst_ok_encodeSnappyBlockAsm64K MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm64K: - MOVQ $0x0000cf1bbcdcbf9b, R9 - MOVQ DI, R8 - SHRQ $0x10, DI - MOVQ DI, SI - SHLQ $0x10, R8 - IMULQ R9, R8 - SHRQ $0x32, R8 - SHLQ $0x10, SI - IMULQ R9, SI - SHRQ $0x32, SI - LEAL -2(CX), R9 - LEAQ 24(SP)(SI*4), R10 - MOVL (R10), SI - MOVL R9, 24(SP)(R8*4) - MOVL CX, (R10) - CMPL (DX)(SI*1), DI + MOVQ $0x0000cf1bbcdcbf9b, R8 + MOVQ SI, DI + SHRQ $0x10, SI + MOVQ SI, BX + SHLQ $0x10, DI + IMULQ R8, DI + SHRQ $0x32, DI + SHLQ $0x10, BX + IMULQ R8, BX + SHRQ $0x32, BX + LEAL -2(CX), R8 + LEAQ 24(SP)(BX*4), R9 + MOVL (R9), BX + MOVL R8, 24(SP)(DI*4) + MOVL CX, (R9) + CMPL (DX)(BX*1), SI JEQ match_nolit_loop_encodeSnappyBlockAsm64K INCL CX JMP search_loop_encodeSnappyBlockAsm64K @@ -11846,7 +11853,7 @@ emit_remainder_encodeSnappyBlockAsm64K: SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) - JL emit_remainder_ok_encodeSnappyBlockAsm64K + JB emit_remainder_ok_encodeSnappyBlockAsm64K MOVQ $0x00000000, ret+48(FP) RET @@ -11861,9 +11868,12 @@ emit_remainder_ok_encodeSnappyBlockAsm64K: SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c - JLT one_byte_emit_remainder_encodeSnappyBlockAsm64K + JB one_byte_emit_remainder_encodeSnappyBlockAsm64K CMPL DX, $0x00000100 - JLT two_bytes_emit_remainder_encodeSnappyBlockAsm64K + JB two_bytes_emit_remainder_encodeSnappyBlockAsm64K + JB three_bytes_emit_remainder_encodeSnappyBlockAsm64K + +three_bytes_emit_remainder_encodeSnappyBlockAsm64K: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX @@ -11874,7 +11884,7 @@ two_bytes_emit_remainder_encodeSnappyBlockAsm64K: MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 - JL memmove_emit_remainder_encodeSnappyBlockAsm64K + JB memmove_emit_remainder_encodeSnappyBlockAsm64K JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K one_byte_emit_remainder_encodeSnappyBlockAsm64K: @@ -12021,8 +12031,8 @@ zero_loop_encodeSnappyBlockAsm12B: MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX - LEAQ -8(CX), SI - MOVL SI, 8(SP) + LEAQ -8(CX), BX + MOVL BX, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX @@ -12032,477 +12042,200 @@ zero_loop_encodeSnappyBlockAsm12B: MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - SHRL $0x05, SI - LEAL 4(CX)(SI*1), SI - CMPL SI, 8(SP) - JGE emit_remainder_encodeSnappyBlockAsm12B - MOVQ (DX)(CX*1), DI - MOVL SI, 20(SP) - MOVQ $0x000000cf1bbcdcbb, R9 - MOVQ DI, R10 - MOVQ DI, R11 - SHRQ $0x08, R11 + MOVL CX, BX + SUBL 12(SP), BX + SHRL $0x05, BX + LEAL 4(CX)(BX*1), BX + CMPL BX, 8(SP) + JAE emit_remainder_encodeSnappyBlockAsm12B + MOVQ (DX)(CX*1), SI + MOVL BX, 20(SP) + MOVQ $0x000000cf1bbcdcbb, R8 + MOVQ SI, R9 + MOVQ SI, R10 + SHRQ $0x08, R10 + SHLQ $0x18, R9 + IMULQ R8, R9 + SHRQ $0x34, R9 SHLQ $0x18, R10 - IMULQ R9, R10 + IMULQ R8, R10 SHRQ $0x34, R10 - SHLQ $0x18, R11 - IMULQ R9, R11 - SHRQ $0x34, R11 - MOVL 24(SP)(R10*4), SI - MOVL 24(SP)(R11*4), R8 - MOVL CX, 24(SP)(R10*4) - LEAL 1(CX), R10 - MOVL R10, 24(SP)(R11*4) - MOVQ DI, R10 - SHRQ $0x10, R10 - SHLQ $0x18, R10 - IMULQ R9, R10 - SHRQ $0x34, R10 - MOVL CX, R9 - SUBL 16(SP), R9 - MOVL 1(DX)(R9*1), R11 - MOVQ DI, R9 - SHRQ $0x08, R9 - CMPL R9, R11 - JNE no_repeat_found_encodeSnappyBlockAsm12B - LEAL 1(CX), DI - MOVL 12(SP), SI - MOVL DI, R8 + MOVL 24(SP)(R9*4), BX + MOVL 24(SP)(R10*4), DI + MOVL CX, 24(SP)(R9*4) + LEAL 1(CX), R9 + MOVL R9, 24(SP)(R10*4) + MOVQ SI, R9 + SHRQ $0x10, R9 + SHLQ $0x18, R9 + IMULQ R8, R9 + SHRQ $0x34, R9 + MOVL CX, R8 SUBL 16(SP), R8 + MOVL 1(DX)(R8*1), R10 + MOVQ SI, R8 + SHRQ $0x08, R8 + CMPL R8, R10 + JNE no_repeat_found_encodeSnappyBlockAsm12B + LEAL 1(CX), SI + MOVL 12(SP), BX + MOVL SI, DI + SUBL 16(SP), DI JZ repeat_extend_back_end_encodeSnappyBlockAsm12B repeat_extend_back_loop_encodeSnappyBlockAsm12B: - CMPL DI, SI - JLE repeat_extend_back_end_encodeSnappyBlockAsm12B - MOVB -1(DX)(R8*1), BL - MOVB -1(DX)(DI*1), R9 - CMPB BL, R9 + CMPL SI, BX + JBE repeat_extend_back_end_encodeSnappyBlockAsm12B + MOVB -1(DX)(DI*1), R8 + MOVB -1(DX)(SI*1), R9 + CMPB R8, R9 JNE repeat_extend_back_end_encodeSnappyBlockAsm12B - LEAL -1(DI), DI - DECL R8 + LEAL -1(SI), SI + DECL DI JNZ repeat_extend_back_loop_encodeSnappyBlockAsm12B repeat_extend_back_end_encodeSnappyBlockAsm12B: - MOVL 12(SP), SI - CMPL SI, DI + MOVL 12(SP), BX + CMPL BX, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B - MOVL DI, R8 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R9 - SUBL SI, R8 - LEAL -1(R8), SI - CMPL SI, $0x3c - JLT one_byte_repeat_emit_encodeSnappyBlockAsm12B - CMPL SI, $0x00000100 - JLT two_bytes_repeat_emit_encodeSnappyBlockAsm12B + MOVL SI, DI + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R8 + SUBL BX, DI + LEAL -1(DI), BX + CMPL BX, $0x3c + JB one_byte_repeat_emit_encodeSnappyBlockAsm12B + CMPL BX, $0x00000100 + JB two_bytes_repeat_emit_encodeSnappyBlockAsm12B + JB three_bytes_repeat_emit_encodeSnappyBlockAsm12B + +three_bytes_repeat_emit_encodeSnappyBlockAsm12B: MOVB $0xf4, (AX) - MOVW SI, 1(AX) + MOVW BX, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B two_bytes_repeat_emit_encodeSnappyBlockAsm12B: MOVB $0xf0, (AX) - MOVB SI, 1(AX) + MOVB BL, 1(AX) ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_repeat_emit_encodeSnappyBlockAsm12B + CMPL BX, $0x40 + JB memmove_repeat_emit_encodeSnappyBlockAsm12B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B one_byte_repeat_emit_encodeSnappyBlockAsm12B: - SHLB $0x02, SI - MOVB SI, (AX) + SHLB $0x02, BL + MOVB BL, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(R8*1), SI + LEAQ (AX)(DI*1), BX // genMemMoveShort - CMPQ R8, $0x08 - JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ DI, $0x08 + JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8 + CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R8), R9 + MOVQ R9, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R8), R9 + MOVQ -8(R8)(DI*1), R8 + MOVQ R9, (AX) + MOVQ R8, -8(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 + MOVOU (R8), X0 + MOVOU -16(R8)(DI*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU X1, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 + MOVOU (R8), X0 + MOVOU 16(R8), X1 + MOVOU -32(R8)(DI*1), X2 + MOVOU -16(R8)(DI*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU X2, -32(AX)(DI*1) + MOVOU X3, -16(AX)(DI*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B: - MOVQ SI, AX + MOVQ BX, AX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B memmove_long_repeat_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(R8*1), SI + LEAQ (AX)(DI*1), BX // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R8), X0 + MOVOU 16(R8), X1 + MOVOU -32(R8)(DI*1), X2 + MOVOU -16(R8)(DI*1), X3 + MOVQ DI, R10 + SHRQ $0x05, R10 + MOVQ AX, R9 + ANDL $0x0000001f, R9 + MOVQ $0x00000040, R11 + SUBQ R9, R11 + DECQ R10 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R8)(R11*1), R9 + LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) - ADDQ $0x20, R13 - ADDQ $0x20, R10 + MOVOU (R9), X4 + MOVOU 16(R9), X5 + MOVOA X4, (R12) + MOVOA X5, 16(R12) ADDQ $0x20, R12 - DECQ R11 + ADDQ $0x20, R9 + ADDQ $0x20, R11 + DECQ R10 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R8)(R11*1), X4 + MOVOU -16(R8)(R11*1), X5 + MOVOA X4, -32(AX)(R11*1) + MOVOA X5, -16(AX)(R11*1) + ADDQ $0x20, R11 + CMPQ DI, R11 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ SI, AX + MOVOU X2, -32(AX)(DI*1) + MOVOU X3, -16(AX)(DI*1) + MOVQ BX, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B: ADDL $0x05, CX - MOVL CX, SI - SUBL 16(SP), SI - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(SI*1), SI - - // matchLen - XORL R11, R11 - CMPL R8, $0x08 - JL matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B - -matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B: - MOVQ (R9)(R11*1), R10 - XORQ (SI)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B - -#ifdef GOAMD64_v3 - TZCNTQ R10, R10 - -#else - BSFQ R10, R10 - -#endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 - JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B - -matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B: - LEAL -8(R8), R8 - LEAL 8(R11), R11 - CMPL R8, $0x08 - JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B - JZ repeat_extend_forward_end_encodeSnappyBlockAsm12B - -matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B: - CMPL R8, $0x04 - JL matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B - MOVL (R9)(R11*1), R10 - CMPL (SI)(R11*1), R10 - JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B - SUBL $0x04, R8 - LEAL 4(R11), R11 - -matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B: - CMPL R8, $0x02 - JL matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B - MOVW (R9)(R11*1), R10 - CMPW (SI)(R11*1), R10 - JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B - SUBL $0x02, R8 - LEAL 2(R11), R11 - -matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B: - CMPL R8, $0x01 - JL repeat_extend_forward_end_encodeSnappyBlockAsm12B - MOVB (R9)(R11*1), R10 - CMPB (SI)(R11*1), R10 - JNE repeat_extend_forward_end_encodeSnappyBlockAsm12B - LEAL 1(R11), R11 - -repeat_extend_forward_end_encodeSnappyBlockAsm12B: - ADDL R11, CX - MOVL CX, SI - SUBL DI, SI - MOVL 16(SP), DI - - // emitCopy -two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B: - CMPL SI, $0x40 - JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(SI), SI - ADDQ $0x03, AX - JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B - -two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B: - CMPL SI, $0x0c - JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B - CMPL DI, $0x00000800 - JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B - MOVB $0x01, BL - LEAL -16(BX)(SI*4), SI - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) - ADDQ $0x02, AX - JMP repeat_end_emit_encodeSnappyBlockAsm12B - -emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B: - MOVB $0x02, BL - LEAL -4(BX)(SI*4), SI - MOVB SI, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX - -repeat_end_emit_encodeSnappyBlockAsm12B: - MOVL CX, 12(SP) - JMP search_loop_encodeSnappyBlockAsm12B - -no_repeat_found_encodeSnappyBlockAsm12B: - CMPL (DX)(SI*1), DI - JEQ candidate_match_encodeSnappyBlockAsm12B - SHRQ $0x08, DI - MOVL 24(SP)(R10*4), SI - LEAL 2(CX), R9 - CMPL (DX)(R8*1), DI - JEQ candidate2_match_encodeSnappyBlockAsm12B - MOVL R9, 24(SP)(R10*4) - SHRQ $0x08, DI - CMPL (DX)(SI*1), DI - JEQ candidate3_match_encodeSnappyBlockAsm12B - MOVL 20(SP), CX - JMP search_loop_encodeSnappyBlockAsm12B - -candidate3_match_encodeSnappyBlockAsm12B: - ADDL $0x02, CX - JMP candidate_match_encodeSnappyBlockAsm12B - -candidate2_match_encodeSnappyBlockAsm12B: - MOVL R9, 24(SP)(R10*4) - INCL CX - MOVL R8, SI - -candidate_match_encodeSnappyBlockAsm12B: - MOVL 12(SP), DI - TESTL SI, SI - JZ match_extend_back_end_encodeSnappyBlockAsm12B - -match_extend_back_loop_encodeSnappyBlockAsm12B: - CMPL CX, DI - JLE match_extend_back_end_encodeSnappyBlockAsm12B - MOVB -1(DX)(SI*1), BL - MOVB -1(DX)(CX*1), R8 - CMPB BL, R8 - JNE match_extend_back_end_encodeSnappyBlockAsm12B - LEAL -1(CX), CX - DECL SI - JZ match_extend_back_end_encodeSnappyBlockAsm12B - JMP match_extend_back_loop_encodeSnappyBlockAsm12B - -match_extend_back_end_encodeSnappyBlockAsm12B: - MOVL CX, DI - SUBL 12(SP), DI - LEAQ 3(AX)(DI*1), DI - CMPQ DI, (SP) - JL match_dst_size_check_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) - RET - -match_dst_size_check_encodeSnappyBlockAsm12B: - MOVL CX, DI - MOVL 12(SP), R8 - CMPL R8, DI - JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm12B - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(R8*1), DI - SUBL R8, R9 - LEAL -1(R9), R8 - CMPL R8, $0x3c - JLT one_byte_match_emit_encodeSnappyBlockAsm12B - CMPL R8, $0x00000100 - JLT two_bytes_match_emit_encodeSnappyBlockAsm12B - MOVB $0xf4, (AX) - MOVW R8, 1(AX) - ADDQ $0x03, AX - JMP memmove_long_match_emit_encodeSnappyBlockAsm12B - -two_bytes_match_emit_encodeSnappyBlockAsm12B: - MOVB $0xf0, (AX) - MOVB R8, 1(AX) - ADDQ $0x02, AX - CMPL R8, $0x40 - JL memmove_match_emit_encodeSnappyBlockAsm12B - JMP memmove_long_match_emit_encodeSnappyBlockAsm12B - -one_byte_match_emit_encodeSnappyBlockAsm12B: - SHLB $0x02, R8 - MOVB R8, (AX) - ADDQ $0x01, AX - -memmove_match_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(R9*1), R8 - - // genMemMoveShort - CMPQ R9, $0x08 - JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8 - CMPQ R9, $0x10 - JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 - CMPQ R9, $0x20 - JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 - JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 - -emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8: - MOVQ (DI), R10 - MOVQ R10, (AX) - JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B - -emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: - MOVQ (DI), R10 - MOVQ -8(DI)(R9*1), DI - MOVQ R10, (AX) - MOVQ DI, -8(AX)(R9*1) - JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B - -emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: - MOVOU (DI), X0 - MOVOU -16(DI)(R9*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) - JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B - -emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: - MOVOU (DI), X0 - MOVOU 16(DI), X1 - MOVOU -32(DI)(R9*1), X2 - MOVOU -16(DI)(R9*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - -memmove_end_copy_match_emit_encodeSnappyBlockAsm12B: - MOVQ R8, AX - JMP emit_literal_done_match_emit_encodeSnappyBlockAsm12B - -memmove_long_match_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(R9*1), R8 - - // genMemMoveLong - MOVOU (DI), X0 - MOVOU 16(DI), X1 - MOVOU -32(DI)(R9*1), X2 - MOVOU -16(DI)(R9*1), X3 - MOVQ R9, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 - JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(DI)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 - -emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) - ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 - JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back - -emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(DI)(R12*1), X4 - MOVOU -16(DI)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R9, R12 - JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ R8, AX - -emit_literal_done_match_emit_encodeSnappyBlockAsm12B: -match_nolit_loop_encodeSnappyBlockAsm12B: - MOVL CX, DI - SUBL SI, DI - MOVL DI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, SI + MOVL CX, BX + SUBL 16(SP), BX MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 - LEAQ (DX)(SI*1), SI + LEAQ (DX)(BX*1), BX // matchLen XORL R10, R10 CMPL DI, $0x08 - JL matchlen_match4_match_nolit_encodeSnappyBlockAsm12B + JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B -matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B: +matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B: MOVQ (R8)(R10*1), R9 - XORQ (SI)(R10*1), R9 + XORQ (BX)(R10*1), R9 TESTQ R9, R9 - JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm12B + JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B #ifdef GOAMD64_v3 TZCNTQ R9, R9 @@ -12513,105 +12246,388 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B: #endif SARQ $0x03, R9 LEAL (R10)(R9*1), R10 - JMP match_nolit_end_encodeSnappyBlockAsm12B + JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B -matchlen_loop_match_nolit_encodeSnappyBlockAsm12B: +matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 - JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B - JZ match_nolit_end_encodeSnappyBlockAsm12B + JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B + JZ repeat_extend_forward_end_encodeSnappyBlockAsm12B -matchlen_match4_match_nolit_encodeSnappyBlockAsm12B: +matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B: CMPL DI, $0x04 - JL matchlen_match2_match_nolit_encodeSnappyBlockAsm12B + JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B MOVL (R8)(R10*1), R9 - CMPL (SI)(R10*1), R9 - JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm12B + CMPL (BX)(R10*1), R9 + JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B SUBL $0x04, DI LEAL 4(R10), R10 -matchlen_match2_match_nolit_encodeSnappyBlockAsm12B: +matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B: CMPL DI, $0x02 - JL matchlen_match1_match_nolit_encodeSnappyBlockAsm12B + JB matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B MOVW (R8)(R10*1), R9 - CMPW (SI)(R10*1), R9 - JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm12B + CMPW (BX)(R10*1), R9 + JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B SUBL $0x02, DI LEAL 2(R10), R10 -matchlen_match1_match_nolit_encodeSnappyBlockAsm12B: +matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B: CMPL DI, $0x01 - JL match_nolit_end_encodeSnappyBlockAsm12B + JB repeat_extend_forward_end_encodeSnappyBlockAsm12B MOVB (R8)(R10*1), R9 - CMPB (SI)(R10*1), R9 - JNE match_nolit_end_encodeSnappyBlockAsm12B + CMPB (BX)(R10*1), R9 + JNE repeat_extend_forward_end_encodeSnappyBlockAsm12B LEAL 1(R10), R10 -match_nolit_end_encodeSnappyBlockAsm12B: +repeat_extend_forward_end_encodeSnappyBlockAsm12B: ADDL R10, CX + MOVL CX, BX + SUBL SI, BX MOVL 16(SP), SI - ADDL $0x04, R10 + + // emitCopy +two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B: + CMPL BX, $0x40 + JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B + MOVB $0xee, (AX) + MOVW SI, 1(AX) + LEAL -60(BX), BX + ADDQ $0x03, AX + JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B + +two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B: + MOVL BX, DI + SHLL $0x02, DI + CMPL BX, $0x0c + JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B + CMPL SI, $0x00000800 + JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B + LEAL -15(DI), DI + MOVB SI, 1(AX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (AX) + ADDQ $0x02, AX + JMP repeat_end_emit_encodeSnappyBlockAsm12B + +emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B: + LEAL -2(DI), DI + MOVB DI, (AX) + MOVW SI, 1(AX) + ADDQ $0x03, AX + +repeat_end_emit_encodeSnappyBlockAsm12B: + MOVL CX, 12(SP) + JMP search_loop_encodeSnappyBlockAsm12B + +no_repeat_found_encodeSnappyBlockAsm12B: + CMPL (DX)(BX*1), SI + JEQ candidate_match_encodeSnappyBlockAsm12B + SHRQ $0x08, SI + MOVL 24(SP)(R9*4), BX + LEAL 2(CX), R8 + CMPL (DX)(DI*1), SI + JEQ candidate2_match_encodeSnappyBlockAsm12B + MOVL R8, 24(SP)(R9*4) + SHRQ $0x08, SI + CMPL (DX)(BX*1), SI + JEQ candidate3_match_encodeSnappyBlockAsm12B + MOVL 20(SP), CX + JMP search_loop_encodeSnappyBlockAsm12B + +candidate3_match_encodeSnappyBlockAsm12B: + ADDL $0x02, CX + JMP candidate_match_encodeSnappyBlockAsm12B + +candidate2_match_encodeSnappyBlockAsm12B: + MOVL R8, 24(SP)(R9*4) + INCL CX + MOVL DI, BX + +candidate_match_encodeSnappyBlockAsm12B: + MOVL 12(SP), SI + TESTL BX, BX + JZ match_extend_back_end_encodeSnappyBlockAsm12B + +match_extend_back_loop_encodeSnappyBlockAsm12B: + CMPL CX, SI + JBE match_extend_back_end_encodeSnappyBlockAsm12B + MOVB -1(DX)(BX*1), DI + MOVB -1(DX)(CX*1), R8 + CMPB DI, R8 + JNE match_extend_back_end_encodeSnappyBlockAsm12B + LEAL -1(CX), CX + DECL BX + JZ match_extend_back_end_encodeSnappyBlockAsm12B + JMP match_extend_back_loop_encodeSnappyBlockAsm12B + +match_extend_back_end_encodeSnappyBlockAsm12B: + MOVL CX, SI + SUBL 12(SP), SI + LEAQ 3(AX)(SI*1), SI + CMPQ SI, (SP) + JB match_dst_size_check_encodeSnappyBlockAsm12B + MOVQ $0x00000000, ret+48(FP) + RET + +match_dst_size_check_encodeSnappyBlockAsm12B: + MOVL CX, SI + MOVL 12(SP), DI + CMPL DI, SI + JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm12B + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(DI*1), SI + SUBL DI, R8 + LEAL -1(R8), DI + CMPL DI, $0x3c + JB one_byte_match_emit_encodeSnappyBlockAsm12B + CMPL DI, $0x00000100 + JB two_bytes_match_emit_encodeSnappyBlockAsm12B + JB three_bytes_match_emit_encodeSnappyBlockAsm12B + +three_bytes_match_emit_encodeSnappyBlockAsm12B: + MOVB $0xf4, (AX) + MOVW DI, 1(AX) + ADDQ $0x03, AX + JMP memmove_long_match_emit_encodeSnappyBlockAsm12B + +two_bytes_match_emit_encodeSnappyBlockAsm12B: + MOVB $0xf0, (AX) + MOVB DI, 1(AX) + ADDQ $0x02, AX + CMPL DI, $0x40 + JB memmove_match_emit_encodeSnappyBlockAsm12B + JMP memmove_long_match_emit_encodeSnappyBlockAsm12B + +one_byte_match_emit_encodeSnappyBlockAsm12B: + SHLB $0x02, DI + MOVB DI, (AX) + ADDQ $0x01, AX + +memmove_match_emit_encodeSnappyBlockAsm12B: + LEAQ (AX)(R8*1), DI + + // genMemMoveShort + CMPQ R8, $0x08 + JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8 + CMPQ R8, $0x10 + JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 + CMPQ R8, $0x20 + JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 + JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 + +emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8: + MOVQ (SI), R9 + MOVQ R9, (AX) + JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B + +emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: + MOVQ (SI), R9 + MOVQ -8(SI)(R8*1), SI + MOVQ R9, (AX) + MOVQ SI, -8(AX)(R8*1) + JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B + +emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: + MOVOU (SI), X0 + MOVOU -16(SI)(R8*1), X1 + MOVOU X0, (AX) + MOVOU X1, -16(AX)(R8*1) + JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B + +emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: + MOVOU (SI), X0 + MOVOU 16(SI), X1 + MOVOU -32(SI)(R8*1), X2 + MOVOU -16(SI)(R8*1), X3 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + +memmove_end_copy_match_emit_encodeSnappyBlockAsm12B: + MOVQ DI, AX + JMP emit_literal_done_match_emit_encodeSnappyBlockAsm12B + +memmove_long_match_emit_encodeSnappyBlockAsm12B: + LEAQ (AX)(R8*1), DI + + // genMemMoveLong + MOVOU (SI), X0 + MOVOU 16(SI), X1 + MOVOU -32(SI)(R8*1), X2 + MOVOU -16(SI)(R8*1), X3 + MOVQ R8, R10 + SHRQ $0x05, R10 + MOVQ AX, R9 + ANDL $0x0000001f, R9 + MOVQ $0x00000040, R11 + SUBQ R9, R11 + DECQ R10 + JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 + LEAQ -32(SI)(R11*1), R9 + LEAQ -32(AX)(R11*1), R12 + +emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: + MOVOU (R9), X4 + MOVOU 16(R9), X5 + MOVOA X4, (R12) + MOVOA X5, 16(R12) + ADDQ $0x20, R12 + ADDQ $0x20, R9 + ADDQ $0x20, R11 + DECQ R10 + JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back + +emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: + MOVOU -32(SI)(R11*1), X4 + MOVOU -16(SI)(R11*1), X5 + MOVOA X4, -32(AX)(R11*1) + MOVOA X5, -16(AX)(R11*1) + ADDQ $0x20, R11 + CMPQ R8, R11 + JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ DI, AX + +emit_literal_done_match_emit_encodeSnappyBlockAsm12B: +match_nolit_loop_encodeSnappyBlockAsm12B: + MOVL CX, SI + SUBL BX, SI + MOVL SI, 16(SP) + ADDL $0x04, CX + ADDL $0x04, BX + MOVQ src_len+32(FP), SI + SUBL CX, SI + LEAQ (DX)(CX*1), DI + LEAQ (DX)(BX*1), BX + + // matchLen + XORL R9, R9 + CMPL SI, $0x08 + JB matchlen_match4_match_nolit_encodeSnappyBlockAsm12B + +matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B: + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + TESTQ R8, R8 + JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm12B + +#ifdef GOAMD64_v3 + TZCNTQ R8, R8 + +#else + BSFQ R8, R8 + +#endif + SARQ $0x03, R8 + LEAL (R9)(R8*1), R9 + JMP match_nolit_end_encodeSnappyBlockAsm12B + +matchlen_loop_match_nolit_encodeSnappyBlockAsm12B: + LEAL -8(SI), SI + LEAL 8(R9), R9 + CMPL SI, $0x08 + JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B + JZ match_nolit_end_encodeSnappyBlockAsm12B + +matchlen_match4_match_nolit_encodeSnappyBlockAsm12B: + CMPL SI, $0x04 + JB matchlen_match2_match_nolit_encodeSnappyBlockAsm12B + MOVL (DI)(R9*1), R8 + CMPL (BX)(R9*1), R8 + JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm12B + SUBL $0x04, SI + LEAL 4(R9), R9 + +matchlen_match2_match_nolit_encodeSnappyBlockAsm12B: + CMPL SI, $0x02 + JB matchlen_match1_match_nolit_encodeSnappyBlockAsm12B + MOVW (DI)(R9*1), R8 + CMPW (BX)(R9*1), R8 + JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm12B + SUBL $0x02, SI + LEAL 2(R9), R9 + +matchlen_match1_match_nolit_encodeSnappyBlockAsm12B: + CMPL SI, $0x01 + JB match_nolit_end_encodeSnappyBlockAsm12B + MOVB (DI)(R9*1), R8 + CMPB (BX)(R9*1), R8 + JNE match_nolit_end_encodeSnappyBlockAsm12B + LEAL 1(R9), R9 + +match_nolit_end_encodeSnappyBlockAsm12B: + ADDL R9, CX + MOVL 16(SP), BX + ADDL $0x04, R9 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm12B: - CMPL R10, $0x40 - JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B + CMPL R9, $0x40 + JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(R10), R10 + MOVW BX, 1(AX) + LEAL -60(R9), R9 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm12B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B: - CMPL R10, $0x0c - JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B - CMPL SI, $0x00000800 - JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B - MOVB $0x01, BL - LEAL -16(BX)(R10*4), R10 - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, R10 - MOVB R10, (AX) + MOVL R9, SI + SHLL $0x02, SI + CMPL R9, $0x0c + JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B + CMPL BX, $0x00000800 + JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B + LEAL -15(SI), SI + MOVB BL, 1(AX) + SHRL $0x08, BX + SHLL $0x05, BX + ORL BX, SI + MOVB SI, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12B emit_copy_three_match_nolit_encodeSnappyBlockAsm12B: - MOVB $0x02, BL - LEAL -4(BX)(R10*4), R10 - MOVB R10, (AX) - MOVW SI, 1(AX) + LEAL -2(SI), SI + MOVB SI, (AX) + MOVW BX, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm12B: CMPL CX, 8(SP) - JGE emit_remainder_encodeSnappyBlockAsm12B - MOVQ -2(DX)(CX*1), DI + JAE emit_remainder_encodeSnappyBlockAsm12B + MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) - JL match_nolit_dst_ok_encodeSnappyBlockAsm12B + JB match_nolit_dst_ok_encodeSnappyBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm12B: - MOVQ $0x000000cf1bbcdcbb, R9 - MOVQ DI, R8 - SHRQ $0x10, DI - MOVQ DI, SI - SHLQ $0x18, R8 - IMULQ R9, R8 - SHRQ $0x34, R8 - SHLQ $0x18, SI - IMULQ R9, SI - SHRQ $0x34, SI - LEAL -2(CX), R9 - LEAQ 24(SP)(SI*4), R10 - MOVL (R10), SI - MOVL R9, 24(SP)(R8*4) - MOVL CX, (R10) - CMPL (DX)(SI*1), DI + MOVQ $0x000000cf1bbcdcbb, R8 + MOVQ SI, DI + SHRQ $0x10, SI + MOVQ SI, BX + SHLQ $0x18, DI + IMULQ R8, DI + SHRQ $0x34, DI + SHLQ $0x18, BX + IMULQ R8, BX + SHRQ $0x34, BX + LEAL -2(CX), R8 + LEAQ 24(SP)(BX*4), R9 + MOVL (R9), BX + MOVL R8, 24(SP)(DI*4) + MOVL CX, (R9) + CMPL (DX)(BX*1), SI JEQ match_nolit_loop_encodeSnappyBlockAsm12B INCL CX JMP search_loop_encodeSnappyBlockAsm12B @@ -12621,7 +12637,7 @@ emit_remainder_encodeSnappyBlockAsm12B: SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) - JL emit_remainder_ok_encodeSnappyBlockAsm12B + JB emit_remainder_ok_encodeSnappyBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET @@ -12636,9 +12652,12 @@ emit_remainder_ok_encodeSnappyBlockAsm12B: SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c - JLT one_byte_emit_remainder_encodeSnappyBlockAsm12B + JB one_byte_emit_remainder_encodeSnappyBlockAsm12B CMPL DX, $0x00000100 - JLT two_bytes_emit_remainder_encodeSnappyBlockAsm12B + JB two_bytes_emit_remainder_encodeSnappyBlockAsm12B + JB three_bytes_emit_remainder_encodeSnappyBlockAsm12B + +three_bytes_emit_remainder_encodeSnappyBlockAsm12B: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX @@ -12649,7 +12668,7 @@ two_bytes_emit_remainder_encodeSnappyBlockAsm12B: MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 - JL memmove_emit_remainder_encodeSnappyBlockAsm12B + JB memmove_emit_remainder_encodeSnappyBlockAsm12B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B one_byte_emit_remainder_encodeSnappyBlockAsm12B: @@ -12796,8 +12815,8 @@ zero_loop_encodeSnappyBlockAsm10B: MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX - LEAQ -8(CX), SI - MOVL SI, 8(SP) + LEAQ -8(CX), BX + MOVL BX, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX @@ -12807,477 +12826,200 @@ zero_loop_encodeSnappyBlockAsm10B: MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - SHRL $0x05, SI - LEAL 4(CX)(SI*1), SI - CMPL SI, 8(SP) - JGE emit_remainder_encodeSnappyBlockAsm10B - MOVQ (DX)(CX*1), DI - MOVL SI, 20(SP) - MOVQ $0x9e3779b1, R9 - MOVQ DI, R10 - MOVQ DI, R11 - SHRQ $0x08, R11 + MOVL CX, BX + SUBL 12(SP), BX + SHRL $0x05, BX + LEAL 4(CX)(BX*1), BX + CMPL BX, 8(SP) + JAE emit_remainder_encodeSnappyBlockAsm10B + MOVQ (DX)(CX*1), SI + MOVL BX, 20(SP) + MOVQ $0x9e3779b1, R8 + MOVQ SI, R9 + MOVQ SI, R10 + SHRQ $0x08, R10 + SHLQ $0x20, R9 + IMULQ R8, R9 + SHRQ $0x36, R9 SHLQ $0x20, R10 - IMULQ R9, R10 + IMULQ R8, R10 SHRQ $0x36, R10 - SHLQ $0x20, R11 - IMULQ R9, R11 - SHRQ $0x36, R11 - MOVL 24(SP)(R10*4), SI - MOVL 24(SP)(R11*4), R8 - MOVL CX, 24(SP)(R10*4) - LEAL 1(CX), R10 - MOVL R10, 24(SP)(R11*4) - MOVQ DI, R10 - SHRQ $0x10, R10 - SHLQ $0x20, R10 - IMULQ R9, R10 - SHRQ $0x36, R10 - MOVL CX, R9 - SUBL 16(SP), R9 - MOVL 1(DX)(R9*1), R11 - MOVQ DI, R9 - SHRQ $0x08, R9 - CMPL R9, R11 - JNE no_repeat_found_encodeSnappyBlockAsm10B - LEAL 1(CX), DI - MOVL 12(SP), SI - MOVL DI, R8 + MOVL 24(SP)(R9*4), BX + MOVL 24(SP)(R10*4), DI + MOVL CX, 24(SP)(R9*4) + LEAL 1(CX), R9 + MOVL R9, 24(SP)(R10*4) + MOVQ SI, R9 + SHRQ $0x10, R9 + SHLQ $0x20, R9 + IMULQ R8, R9 + SHRQ $0x36, R9 + MOVL CX, R8 SUBL 16(SP), R8 + MOVL 1(DX)(R8*1), R10 + MOVQ SI, R8 + SHRQ $0x08, R8 + CMPL R8, R10 + JNE no_repeat_found_encodeSnappyBlockAsm10B + LEAL 1(CX), SI + MOVL 12(SP), BX + MOVL SI, DI + SUBL 16(SP), DI JZ repeat_extend_back_end_encodeSnappyBlockAsm10B repeat_extend_back_loop_encodeSnappyBlockAsm10B: - CMPL DI, SI - JLE repeat_extend_back_end_encodeSnappyBlockAsm10B - MOVB -1(DX)(R8*1), BL - MOVB -1(DX)(DI*1), R9 - CMPB BL, R9 + CMPL SI, BX + JBE repeat_extend_back_end_encodeSnappyBlockAsm10B + MOVB -1(DX)(DI*1), R8 + MOVB -1(DX)(SI*1), R9 + CMPB R8, R9 JNE repeat_extend_back_end_encodeSnappyBlockAsm10B - LEAL -1(DI), DI - DECL R8 + LEAL -1(SI), SI + DECL DI JNZ repeat_extend_back_loop_encodeSnappyBlockAsm10B repeat_extend_back_end_encodeSnappyBlockAsm10B: - MOVL 12(SP), SI - CMPL SI, DI + MOVL 12(SP), BX + CMPL BX, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B - MOVL DI, R8 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R9 - SUBL SI, R8 - LEAL -1(R8), SI - CMPL SI, $0x3c - JLT one_byte_repeat_emit_encodeSnappyBlockAsm10B - CMPL SI, $0x00000100 - JLT two_bytes_repeat_emit_encodeSnappyBlockAsm10B + MOVL SI, DI + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R8 + SUBL BX, DI + LEAL -1(DI), BX + CMPL BX, $0x3c + JB one_byte_repeat_emit_encodeSnappyBlockAsm10B + CMPL BX, $0x00000100 + JB two_bytes_repeat_emit_encodeSnappyBlockAsm10B + JB three_bytes_repeat_emit_encodeSnappyBlockAsm10B + +three_bytes_repeat_emit_encodeSnappyBlockAsm10B: MOVB $0xf4, (AX) - MOVW SI, 1(AX) + MOVW BX, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B two_bytes_repeat_emit_encodeSnappyBlockAsm10B: MOVB $0xf0, (AX) - MOVB SI, 1(AX) + MOVB BL, 1(AX) ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_repeat_emit_encodeSnappyBlockAsm10B + CMPL BX, $0x40 + JB memmove_repeat_emit_encodeSnappyBlockAsm10B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B one_byte_repeat_emit_encodeSnappyBlockAsm10B: - SHLB $0x02, SI - MOVB SI, (AX) + SHLB $0x02, BL + MOVB BL, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(R8*1), SI + LEAQ (AX)(DI*1), BX // genMemMoveShort - CMPQ R8, $0x08 - JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ DI, $0x08 + JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8 + CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R8), R9 + MOVQ R9, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R8), R9 + MOVQ -8(R8)(DI*1), R8 + MOVQ R9, (AX) + MOVQ R8, -8(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 + MOVOU (R8), X0 + MOVOU -16(R8)(DI*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU X1, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 + MOVOU (R8), X0 + MOVOU 16(R8), X1 + MOVOU -32(R8)(DI*1), X2 + MOVOU -16(R8)(DI*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU X2, -32(AX)(DI*1) + MOVOU X3, -16(AX)(DI*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B: - MOVQ SI, AX + MOVQ BX, AX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B memmove_long_repeat_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(R8*1), SI + LEAQ (AX)(DI*1), BX // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R8), X0 + MOVOU 16(R8), X1 + MOVOU -32(R8)(DI*1), X2 + MOVOU -16(R8)(DI*1), X3 + MOVQ DI, R10 + SHRQ $0x05, R10 + MOVQ AX, R9 + ANDL $0x0000001f, R9 + MOVQ $0x00000040, R11 + SUBQ R9, R11 + DECQ R10 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R8)(R11*1), R9 + LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) - ADDQ $0x20, R13 - ADDQ $0x20, R10 + MOVOU (R9), X4 + MOVOU 16(R9), X5 + MOVOA X4, (R12) + MOVOA X5, 16(R12) ADDQ $0x20, R12 - DECQ R11 + ADDQ $0x20, R9 + ADDQ $0x20, R11 + DECQ R10 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R8)(R11*1), X4 + MOVOU -16(R8)(R11*1), X5 + MOVOA X4, -32(AX)(R11*1) + MOVOA X5, -16(AX)(R11*1) + ADDQ $0x20, R11 + CMPQ DI, R11 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ SI, AX + MOVOU X2, -32(AX)(DI*1) + MOVOU X3, -16(AX)(DI*1) + MOVQ BX, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B: ADDL $0x05, CX - MOVL CX, SI - SUBL 16(SP), SI - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(SI*1), SI - - // matchLen - XORL R11, R11 - CMPL R8, $0x08 - JL matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B - -matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B: - MOVQ (R9)(R11*1), R10 - XORQ (SI)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B - -#ifdef GOAMD64_v3 - TZCNTQ R10, R10 - -#else - BSFQ R10, R10 - -#endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 - JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B - -matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B: - LEAL -8(R8), R8 - LEAL 8(R11), R11 - CMPL R8, $0x08 - JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B - JZ repeat_extend_forward_end_encodeSnappyBlockAsm10B - -matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B: - CMPL R8, $0x04 - JL matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B - MOVL (R9)(R11*1), R10 - CMPL (SI)(R11*1), R10 - JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B - SUBL $0x04, R8 - LEAL 4(R11), R11 - -matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B: - CMPL R8, $0x02 - JL matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B - MOVW (R9)(R11*1), R10 - CMPW (SI)(R11*1), R10 - JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B - SUBL $0x02, R8 - LEAL 2(R11), R11 - -matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B: - CMPL R8, $0x01 - JL repeat_extend_forward_end_encodeSnappyBlockAsm10B - MOVB (R9)(R11*1), R10 - CMPB (SI)(R11*1), R10 - JNE repeat_extend_forward_end_encodeSnappyBlockAsm10B - LEAL 1(R11), R11 - -repeat_extend_forward_end_encodeSnappyBlockAsm10B: - ADDL R11, CX - MOVL CX, SI - SUBL DI, SI - MOVL 16(SP), DI - - // emitCopy -two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B: - CMPL SI, $0x40 - JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(SI), SI - ADDQ $0x03, AX - JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B - -two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B: - CMPL SI, $0x0c - JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B - CMPL DI, $0x00000800 - JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B - MOVB $0x01, BL - LEAL -16(BX)(SI*4), SI - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) - ADDQ $0x02, AX - JMP repeat_end_emit_encodeSnappyBlockAsm10B - -emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B: - MOVB $0x02, BL - LEAL -4(BX)(SI*4), SI - MOVB SI, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX - -repeat_end_emit_encodeSnappyBlockAsm10B: - MOVL CX, 12(SP) - JMP search_loop_encodeSnappyBlockAsm10B - -no_repeat_found_encodeSnappyBlockAsm10B: - CMPL (DX)(SI*1), DI - JEQ candidate_match_encodeSnappyBlockAsm10B - SHRQ $0x08, DI - MOVL 24(SP)(R10*4), SI - LEAL 2(CX), R9 - CMPL (DX)(R8*1), DI - JEQ candidate2_match_encodeSnappyBlockAsm10B - MOVL R9, 24(SP)(R10*4) - SHRQ $0x08, DI - CMPL (DX)(SI*1), DI - JEQ candidate3_match_encodeSnappyBlockAsm10B - MOVL 20(SP), CX - JMP search_loop_encodeSnappyBlockAsm10B - -candidate3_match_encodeSnappyBlockAsm10B: - ADDL $0x02, CX - JMP candidate_match_encodeSnappyBlockAsm10B - -candidate2_match_encodeSnappyBlockAsm10B: - MOVL R9, 24(SP)(R10*4) - INCL CX - MOVL R8, SI - -candidate_match_encodeSnappyBlockAsm10B: - MOVL 12(SP), DI - TESTL SI, SI - JZ match_extend_back_end_encodeSnappyBlockAsm10B - -match_extend_back_loop_encodeSnappyBlockAsm10B: - CMPL CX, DI - JLE match_extend_back_end_encodeSnappyBlockAsm10B - MOVB -1(DX)(SI*1), BL - MOVB -1(DX)(CX*1), R8 - CMPB BL, R8 - JNE match_extend_back_end_encodeSnappyBlockAsm10B - LEAL -1(CX), CX - DECL SI - JZ match_extend_back_end_encodeSnappyBlockAsm10B - JMP match_extend_back_loop_encodeSnappyBlockAsm10B - -match_extend_back_end_encodeSnappyBlockAsm10B: - MOVL CX, DI - SUBL 12(SP), DI - LEAQ 3(AX)(DI*1), DI - CMPQ DI, (SP) - JL match_dst_size_check_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) - RET - -match_dst_size_check_encodeSnappyBlockAsm10B: - MOVL CX, DI - MOVL 12(SP), R8 - CMPL R8, DI - JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm10B - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(R8*1), DI - SUBL R8, R9 - LEAL -1(R9), R8 - CMPL R8, $0x3c - JLT one_byte_match_emit_encodeSnappyBlockAsm10B - CMPL R8, $0x00000100 - JLT two_bytes_match_emit_encodeSnappyBlockAsm10B - MOVB $0xf4, (AX) - MOVW R8, 1(AX) - ADDQ $0x03, AX - JMP memmove_long_match_emit_encodeSnappyBlockAsm10B - -two_bytes_match_emit_encodeSnappyBlockAsm10B: - MOVB $0xf0, (AX) - MOVB R8, 1(AX) - ADDQ $0x02, AX - CMPL R8, $0x40 - JL memmove_match_emit_encodeSnappyBlockAsm10B - JMP memmove_long_match_emit_encodeSnappyBlockAsm10B - -one_byte_match_emit_encodeSnappyBlockAsm10B: - SHLB $0x02, R8 - MOVB R8, (AX) - ADDQ $0x01, AX - -memmove_match_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(R9*1), R8 - - // genMemMoveShort - CMPQ R9, $0x08 - JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8 - CMPQ R9, $0x10 - JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 - CMPQ R9, $0x20 - JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 - JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 - -emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8: - MOVQ (DI), R10 - MOVQ R10, (AX) - JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B - -emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: - MOVQ (DI), R10 - MOVQ -8(DI)(R9*1), DI - MOVQ R10, (AX) - MOVQ DI, -8(AX)(R9*1) - JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B - -emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: - MOVOU (DI), X0 - MOVOU -16(DI)(R9*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) - JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B - -emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: - MOVOU (DI), X0 - MOVOU 16(DI), X1 - MOVOU -32(DI)(R9*1), X2 - MOVOU -16(DI)(R9*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - -memmove_end_copy_match_emit_encodeSnappyBlockAsm10B: - MOVQ R8, AX - JMP emit_literal_done_match_emit_encodeSnappyBlockAsm10B - -memmove_long_match_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(R9*1), R8 - - // genMemMoveLong - MOVOU (DI), X0 - MOVOU 16(DI), X1 - MOVOU -32(DI)(R9*1), X2 - MOVOU -16(DI)(R9*1), X3 - MOVQ R9, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 - JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(DI)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 - -emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) - ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 - JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back - -emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(DI)(R12*1), X4 - MOVOU -16(DI)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R9, R12 - JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ R8, AX - -emit_literal_done_match_emit_encodeSnappyBlockAsm10B: -match_nolit_loop_encodeSnappyBlockAsm10B: - MOVL CX, DI - SUBL SI, DI - MOVL DI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, SI + MOVL CX, BX + SUBL 16(SP), BX MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 - LEAQ (DX)(SI*1), SI + LEAQ (DX)(BX*1), BX // matchLen XORL R10, R10 CMPL DI, $0x08 - JL matchlen_match4_match_nolit_encodeSnappyBlockAsm10B + JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B -matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B: +matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B: MOVQ (R8)(R10*1), R9 - XORQ (SI)(R10*1), R9 + XORQ (BX)(R10*1), R9 TESTQ R9, R9 - JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm10B + JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B #ifdef GOAMD64_v3 TZCNTQ R9, R9 @@ -13288,105 +13030,388 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B: #endif SARQ $0x03, R9 LEAL (R10)(R9*1), R10 - JMP match_nolit_end_encodeSnappyBlockAsm10B + JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B -matchlen_loop_match_nolit_encodeSnappyBlockAsm10B: +matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 - JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B - JZ match_nolit_end_encodeSnappyBlockAsm10B + JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B + JZ repeat_extend_forward_end_encodeSnappyBlockAsm10B -matchlen_match4_match_nolit_encodeSnappyBlockAsm10B: +matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B: CMPL DI, $0x04 - JL matchlen_match2_match_nolit_encodeSnappyBlockAsm10B + JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B MOVL (R8)(R10*1), R9 - CMPL (SI)(R10*1), R9 - JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm10B + CMPL (BX)(R10*1), R9 + JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B SUBL $0x04, DI LEAL 4(R10), R10 -matchlen_match2_match_nolit_encodeSnappyBlockAsm10B: +matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B: CMPL DI, $0x02 - JL matchlen_match1_match_nolit_encodeSnappyBlockAsm10B + JB matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B MOVW (R8)(R10*1), R9 - CMPW (SI)(R10*1), R9 - JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm10B + CMPW (BX)(R10*1), R9 + JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B SUBL $0x02, DI LEAL 2(R10), R10 -matchlen_match1_match_nolit_encodeSnappyBlockAsm10B: +matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B: CMPL DI, $0x01 - JL match_nolit_end_encodeSnappyBlockAsm10B + JB repeat_extend_forward_end_encodeSnappyBlockAsm10B MOVB (R8)(R10*1), R9 - CMPB (SI)(R10*1), R9 - JNE match_nolit_end_encodeSnappyBlockAsm10B + CMPB (BX)(R10*1), R9 + JNE repeat_extend_forward_end_encodeSnappyBlockAsm10B LEAL 1(R10), R10 -match_nolit_end_encodeSnappyBlockAsm10B: +repeat_extend_forward_end_encodeSnappyBlockAsm10B: ADDL R10, CX + MOVL CX, BX + SUBL SI, BX MOVL 16(SP), SI - ADDL $0x04, R10 + + // emitCopy +two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B: + CMPL BX, $0x40 + JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B + MOVB $0xee, (AX) + MOVW SI, 1(AX) + LEAL -60(BX), BX + ADDQ $0x03, AX + JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B + +two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B: + MOVL BX, DI + SHLL $0x02, DI + CMPL BX, $0x0c + JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B + CMPL SI, $0x00000800 + JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B + LEAL -15(DI), DI + MOVB SI, 1(AX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (AX) + ADDQ $0x02, AX + JMP repeat_end_emit_encodeSnappyBlockAsm10B + +emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B: + LEAL -2(DI), DI + MOVB DI, (AX) + MOVW SI, 1(AX) + ADDQ $0x03, AX + +repeat_end_emit_encodeSnappyBlockAsm10B: + MOVL CX, 12(SP) + JMP search_loop_encodeSnappyBlockAsm10B + +no_repeat_found_encodeSnappyBlockAsm10B: + CMPL (DX)(BX*1), SI + JEQ candidate_match_encodeSnappyBlockAsm10B + SHRQ $0x08, SI + MOVL 24(SP)(R9*4), BX + LEAL 2(CX), R8 + CMPL (DX)(DI*1), SI + JEQ candidate2_match_encodeSnappyBlockAsm10B + MOVL R8, 24(SP)(R9*4) + SHRQ $0x08, SI + CMPL (DX)(BX*1), SI + JEQ candidate3_match_encodeSnappyBlockAsm10B + MOVL 20(SP), CX + JMP search_loop_encodeSnappyBlockAsm10B + +candidate3_match_encodeSnappyBlockAsm10B: + ADDL $0x02, CX + JMP candidate_match_encodeSnappyBlockAsm10B + +candidate2_match_encodeSnappyBlockAsm10B: + MOVL R8, 24(SP)(R9*4) + INCL CX + MOVL DI, BX + +candidate_match_encodeSnappyBlockAsm10B: + MOVL 12(SP), SI + TESTL BX, BX + JZ match_extend_back_end_encodeSnappyBlockAsm10B + +match_extend_back_loop_encodeSnappyBlockAsm10B: + CMPL CX, SI + JBE match_extend_back_end_encodeSnappyBlockAsm10B + MOVB -1(DX)(BX*1), DI + MOVB -1(DX)(CX*1), R8 + CMPB DI, R8 + JNE match_extend_back_end_encodeSnappyBlockAsm10B + LEAL -1(CX), CX + DECL BX + JZ match_extend_back_end_encodeSnappyBlockAsm10B + JMP match_extend_back_loop_encodeSnappyBlockAsm10B + +match_extend_back_end_encodeSnappyBlockAsm10B: + MOVL CX, SI + SUBL 12(SP), SI + LEAQ 3(AX)(SI*1), SI + CMPQ SI, (SP) + JB match_dst_size_check_encodeSnappyBlockAsm10B + MOVQ $0x00000000, ret+48(FP) + RET + +match_dst_size_check_encodeSnappyBlockAsm10B: + MOVL CX, SI + MOVL 12(SP), DI + CMPL DI, SI + JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm10B + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(DI*1), SI + SUBL DI, R8 + LEAL -1(R8), DI + CMPL DI, $0x3c + JB one_byte_match_emit_encodeSnappyBlockAsm10B + CMPL DI, $0x00000100 + JB two_bytes_match_emit_encodeSnappyBlockAsm10B + JB three_bytes_match_emit_encodeSnappyBlockAsm10B + +three_bytes_match_emit_encodeSnappyBlockAsm10B: + MOVB $0xf4, (AX) + MOVW DI, 1(AX) + ADDQ $0x03, AX + JMP memmove_long_match_emit_encodeSnappyBlockAsm10B + +two_bytes_match_emit_encodeSnappyBlockAsm10B: + MOVB $0xf0, (AX) + MOVB DI, 1(AX) + ADDQ $0x02, AX + CMPL DI, $0x40 + JB memmove_match_emit_encodeSnappyBlockAsm10B + JMP memmove_long_match_emit_encodeSnappyBlockAsm10B + +one_byte_match_emit_encodeSnappyBlockAsm10B: + SHLB $0x02, DI + MOVB DI, (AX) + ADDQ $0x01, AX + +memmove_match_emit_encodeSnappyBlockAsm10B: + LEAQ (AX)(R8*1), DI + + // genMemMoveShort + CMPQ R8, $0x08 + JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8 + CMPQ R8, $0x10 + JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 + CMPQ R8, $0x20 + JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 + JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 + +emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8: + MOVQ (SI), R9 + MOVQ R9, (AX) + JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B + +emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: + MOVQ (SI), R9 + MOVQ -8(SI)(R8*1), SI + MOVQ R9, (AX) + MOVQ SI, -8(AX)(R8*1) + JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B + +emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: + MOVOU (SI), X0 + MOVOU -16(SI)(R8*1), X1 + MOVOU X0, (AX) + MOVOU X1, -16(AX)(R8*1) + JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B + +emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: + MOVOU (SI), X0 + MOVOU 16(SI), X1 + MOVOU -32(SI)(R8*1), X2 + MOVOU -16(SI)(R8*1), X3 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + +memmove_end_copy_match_emit_encodeSnappyBlockAsm10B: + MOVQ DI, AX + JMP emit_literal_done_match_emit_encodeSnappyBlockAsm10B + +memmove_long_match_emit_encodeSnappyBlockAsm10B: + LEAQ (AX)(R8*1), DI + + // genMemMoveLong + MOVOU (SI), X0 + MOVOU 16(SI), X1 + MOVOU -32(SI)(R8*1), X2 + MOVOU -16(SI)(R8*1), X3 + MOVQ R8, R10 + SHRQ $0x05, R10 + MOVQ AX, R9 + ANDL $0x0000001f, R9 + MOVQ $0x00000040, R11 + SUBQ R9, R11 + DECQ R10 + JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 + LEAQ -32(SI)(R11*1), R9 + LEAQ -32(AX)(R11*1), R12 + +emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: + MOVOU (R9), X4 + MOVOU 16(R9), X5 + MOVOA X4, (R12) + MOVOA X5, 16(R12) + ADDQ $0x20, R12 + ADDQ $0x20, R9 + ADDQ $0x20, R11 + DECQ R10 + JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back + +emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: + MOVOU -32(SI)(R11*1), X4 + MOVOU -16(SI)(R11*1), X5 + MOVOA X4, -32(AX)(R11*1) + MOVOA X5, -16(AX)(R11*1) + ADDQ $0x20, R11 + CMPQ R8, R11 + JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ DI, AX + +emit_literal_done_match_emit_encodeSnappyBlockAsm10B: +match_nolit_loop_encodeSnappyBlockAsm10B: + MOVL CX, SI + SUBL BX, SI + MOVL SI, 16(SP) + ADDL $0x04, CX + ADDL $0x04, BX + MOVQ src_len+32(FP), SI + SUBL CX, SI + LEAQ (DX)(CX*1), DI + LEAQ (DX)(BX*1), BX + + // matchLen + XORL R9, R9 + CMPL SI, $0x08 + JB matchlen_match4_match_nolit_encodeSnappyBlockAsm10B + +matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B: + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + TESTQ R8, R8 + JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm10B + +#ifdef GOAMD64_v3 + TZCNTQ R8, R8 + +#else + BSFQ R8, R8 + +#endif + SARQ $0x03, R8 + LEAL (R9)(R8*1), R9 + JMP match_nolit_end_encodeSnappyBlockAsm10B + +matchlen_loop_match_nolit_encodeSnappyBlockAsm10B: + LEAL -8(SI), SI + LEAL 8(R9), R9 + CMPL SI, $0x08 + JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B + JZ match_nolit_end_encodeSnappyBlockAsm10B + +matchlen_match4_match_nolit_encodeSnappyBlockAsm10B: + CMPL SI, $0x04 + JB matchlen_match2_match_nolit_encodeSnappyBlockAsm10B + MOVL (DI)(R9*1), R8 + CMPL (BX)(R9*1), R8 + JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm10B + SUBL $0x04, SI + LEAL 4(R9), R9 + +matchlen_match2_match_nolit_encodeSnappyBlockAsm10B: + CMPL SI, $0x02 + JB matchlen_match1_match_nolit_encodeSnappyBlockAsm10B + MOVW (DI)(R9*1), R8 + CMPW (BX)(R9*1), R8 + JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm10B + SUBL $0x02, SI + LEAL 2(R9), R9 + +matchlen_match1_match_nolit_encodeSnappyBlockAsm10B: + CMPL SI, $0x01 + JB match_nolit_end_encodeSnappyBlockAsm10B + MOVB (DI)(R9*1), R8 + CMPB (BX)(R9*1), R8 + JNE match_nolit_end_encodeSnappyBlockAsm10B + LEAL 1(R9), R9 + +match_nolit_end_encodeSnappyBlockAsm10B: + ADDL R9, CX + MOVL 16(SP), BX + ADDL $0x04, R9 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm10B: - CMPL R10, $0x40 - JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B + CMPL R9, $0x40 + JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(R10), R10 + MOVW BX, 1(AX) + LEAL -60(R9), R9 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm10B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B: - CMPL R10, $0x0c - JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B - CMPL SI, $0x00000800 - JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B - MOVB $0x01, BL - LEAL -16(BX)(R10*4), R10 - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, R10 - MOVB R10, (AX) + MOVL R9, SI + SHLL $0x02, SI + CMPL R9, $0x0c + JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B + CMPL BX, $0x00000800 + JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B + LEAL -15(SI), SI + MOVB BL, 1(AX) + SHRL $0x08, BX + SHLL $0x05, BX + ORL BX, SI + MOVB SI, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10B emit_copy_three_match_nolit_encodeSnappyBlockAsm10B: - MOVB $0x02, BL - LEAL -4(BX)(R10*4), R10 - MOVB R10, (AX) - MOVW SI, 1(AX) + LEAL -2(SI), SI + MOVB SI, (AX) + MOVW BX, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm10B: CMPL CX, 8(SP) - JGE emit_remainder_encodeSnappyBlockAsm10B - MOVQ -2(DX)(CX*1), DI + JAE emit_remainder_encodeSnappyBlockAsm10B + MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) - JL match_nolit_dst_ok_encodeSnappyBlockAsm10B + JB match_nolit_dst_ok_encodeSnappyBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm10B: - MOVQ $0x9e3779b1, R9 - MOVQ DI, R8 - SHRQ $0x10, DI - MOVQ DI, SI - SHLQ $0x20, R8 - IMULQ R9, R8 - SHRQ $0x36, R8 - SHLQ $0x20, SI - IMULQ R9, SI - SHRQ $0x36, SI - LEAL -2(CX), R9 - LEAQ 24(SP)(SI*4), R10 - MOVL (R10), SI - MOVL R9, 24(SP)(R8*4) - MOVL CX, (R10) - CMPL (DX)(SI*1), DI + MOVQ $0x9e3779b1, R8 + MOVQ SI, DI + SHRQ $0x10, SI + MOVQ SI, BX + SHLQ $0x20, DI + IMULQ R8, DI + SHRQ $0x36, DI + SHLQ $0x20, BX + IMULQ R8, BX + SHRQ $0x36, BX + LEAL -2(CX), R8 + LEAQ 24(SP)(BX*4), R9 + MOVL (R9), BX + MOVL R8, 24(SP)(DI*4) + MOVL CX, (R9) + CMPL (DX)(BX*1), SI JEQ match_nolit_loop_encodeSnappyBlockAsm10B INCL CX JMP search_loop_encodeSnappyBlockAsm10B @@ -13396,7 +13421,7 @@ emit_remainder_encodeSnappyBlockAsm10B: SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) - JL emit_remainder_ok_encodeSnappyBlockAsm10B + JB emit_remainder_ok_encodeSnappyBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET @@ -13411,9 +13436,12 @@ emit_remainder_ok_encodeSnappyBlockAsm10B: SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c - JLT one_byte_emit_remainder_encodeSnappyBlockAsm10B + JB one_byte_emit_remainder_encodeSnappyBlockAsm10B CMPL DX, $0x00000100 - JLT two_bytes_emit_remainder_encodeSnappyBlockAsm10B + JB two_bytes_emit_remainder_encodeSnappyBlockAsm10B + JB three_bytes_emit_remainder_encodeSnappyBlockAsm10B + +three_bytes_emit_remainder_encodeSnappyBlockAsm10B: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX @@ -13424,7 +13452,7 @@ two_bytes_emit_remainder_encodeSnappyBlockAsm10B: MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 - JL memmove_emit_remainder_encodeSnappyBlockAsm10B + JB memmove_emit_remainder_encodeSnappyBlockAsm10B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B one_byte_emit_remainder_encodeSnappyBlockAsm10B: @@ -13571,8 +13599,8 @@ zero_loop_encodeSnappyBlockAsm8B: MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX - LEAQ -8(CX), SI - MOVL SI, 8(SP) + LEAQ -8(CX), BX + MOVL BX, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX @@ -13582,475 +13610,200 @@ zero_loop_encodeSnappyBlockAsm8B: MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - SHRL $0x04, SI - LEAL 4(CX)(SI*1), SI - CMPL SI, 8(SP) - JGE emit_remainder_encodeSnappyBlockAsm8B - MOVQ (DX)(CX*1), DI - MOVL SI, 20(SP) - MOVQ $0x9e3779b1, R9 - MOVQ DI, R10 - MOVQ DI, R11 - SHRQ $0x08, R11 + MOVL CX, BX + SUBL 12(SP), BX + SHRL $0x04, BX + LEAL 4(CX)(BX*1), BX + CMPL BX, 8(SP) + JAE emit_remainder_encodeSnappyBlockAsm8B + MOVQ (DX)(CX*1), SI + MOVL BX, 20(SP) + MOVQ $0x9e3779b1, R8 + MOVQ SI, R9 + MOVQ SI, R10 + SHRQ $0x08, R10 + SHLQ $0x20, R9 + IMULQ R8, R9 + SHRQ $0x38, R9 SHLQ $0x20, R10 - IMULQ R9, R10 + IMULQ R8, R10 SHRQ $0x38, R10 - SHLQ $0x20, R11 - IMULQ R9, R11 - SHRQ $0x38, R11 - MOVL 24(SP)(R10*4), SI - MOVL 24(SP)(R11*4), R8 - MOVL CX, 24(SP)(R10*4) - LEAL 1(CX), R10 - MOVL R10, 24(SP)(R11*4) - MOVQ DI, R10 - SHRQ $0x10, R10 - SHLQ $0x20, R10 - IMULQ R9, R10 - SHRQ $0x38, R10 - MOVL CX, R9 - SUBL 16(SP), R9 - MOVL 1(DX)(R9*1), R11 - MOVQ DI, R9 - SHRQ $0x08, R9 - CMPL R9, R11 - JNE no_repeat_found_encodeSnappyBlockAsm8B - LEAL 1(CX), DI - MOVL 12(SP), SI - MOVL DI, R8 + MOVL 24(SP)(R9*4), BX + MOVL 24(SP)(R10*4), DI + MOVL CX, 24(SP)(R9*4) + LEAL 1(CX), R9 + MOVL R9, 24(SP)(R10*4) + MOVQ SI, R9 + SHRQ $0x10, R9 + SHLQ $0x20, R9 + IMULQ R8, R9 + SHRQ $0x38, R9 + MOVL CX, R8 SUBL 16(SP), R8 + MOVL 1(DX)(R8*1), R10 + MOVQ SI, R8 + SHRQ $0x08, R8 + CMPL R8, R10 + JNE no_repeat_found_encodeSnappyBlockAsm8B + LEAL 1(CX), SI + MOVL 12(SP), BX + MOVL SI, DI + SUBL 16(SP), DI JZ repeat_extend_back_end_encodeSnappyBlockAsm8B repeat_extend_back_loop_encodeSnappyBlockAsm8B: - CMPL DI, SI - JLE repeat_extend_back_end_encodeSnappyBlockAsm8B - MOVB -1(DX)(R8*1), BL - MOVB -1(DX)(DI*1), R9 - CMPB BL, R9 + CMPL SI, BX + JBE repeat_extend_back_end_encodeSnappyBlockAsm8B + MOVB -1(DX)(DI*1), R8 + MOVB -1(DX)(SI*1), R9 + CMPB R8, R9 JNE repeat_extend_back_end_encodeSnappyBlockAsm8B - LEAL -1(DI), DI - DECL R8 + LEAL -1(SI), SI + DECL DI JNZ repeat_extend_back_loop_encodeSnappyBlockAsm8B repeat_extend_back_end_encodeSnappyBlockAsm8B: - MOVL 12(SP), SI - CMPL SI, DI + MOVL 12(SP), BX + CMPL BX, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B - MOVL DI, R8 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R9 - SUBL SI, R8 - LEAL -1(R8), SI - CMPL SI, $0x3c - JLT one_byte_repeat_emit_encodeSnappyBlockAsm8B - CMPL SI, $0x00000100 - JLT two_bytes_repeat_emit_encodeSnappyBlockAsm8B + MOVL SI, DI + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R8 + SUBL BX, DI + LEAL -1(DI), BX + CMPL BX, $0x3c + JB one_byte_repeat_emit_encodeSnappyBlockAsm8B + CMPL BX, $0x00000100 + JB two_bytes_repeat_emit_encodeSnappyBlockAsm8B + JB three_bytes_repeat_emit_encodeSnappyBlockAsm8B + +three_bytes_repeat_emit_encodeSnappyBlockAsm8B: MOVB $0xf4, (AX) - MOVW SI, 1(AX) + MOVW BX, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B two_bytes_repeat_emit_encodeSnappyBlockAsm8B: MOVB $0xf0, (AX) - MOVB SI, 1(AX) + MOVB BL, 1(AX) ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_repeat_emit_encodeSnappyBlockAsm8B + CMPL BX, $0x40 + JB memmove_repeat_emit_encodeSnappyBlockAsm8B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B one_byte_repeat_emit_encodeSnappyBlockAsm8B: - SHLB $0x02, SI - MOVB SI, (AX) + SHLB $0x02, BL + MOVB BL, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(R8*1), SI + LEAQ (AX)(DI*1), BX // genMemMoveShort - CMPQ R8, $0x08 - JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ DI, $0x08 + JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8 + CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R8), R9 + MOVQ R9, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R8), R9 + MOVQ -8(R8)(DI*1), R8 + MOVQ R9, (AX) + MOVQ R8, -8(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 + MOVOU (R8), X0 + MOVOU -16(R8)(DI*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU X1, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 + MOVOU (R8), X0 + MOVOU 16(R8), X1 + MOVOU -32(R8)(DI*1), X2 + MOVOU -16(R8)(DI*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU X2, -32(AX)(DI*1) + MOVOU X3, -16(AX)(DI*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B: - MOVQ SI, AX + MOVQ BX, AX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B memmove_long_repeat_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(R8*1), SI + LEAQ (AX)(DI*1), BX // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R8), X0 + MOVOU 16(R8), X1 + MOVOU -32(R8)(DI*1), X2 + MOVOU -16(R8)(DI*1), X3 + MOVQ DI, R10 + SHRQ $0x05, R10 + MOVQ AX, R9 + ANDL $0x0000001f, R9 + MOVQ $0x00000040, R11 + SUBQ R9, R11 + DECQ R10 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R8)(R11*1), R9 + LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) - ADDQ $0x20, R13 - ADDQ $0x20, R10 + MOVOU (R9), X4 + MOVOU 16(R9), X5 + MOVOA X4, (R12) + MOVOA X5, 16(R12) ADDQ $0x20, R12 - DECQ R11 + ADDQ $0x20, R9 + ADDQ $0x20, R11 + DECQ R10 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R8)(R11*1), X4 + MOVOU -16(R8)(R11*1), X5 + MOVOA X4, -32(AX)(R11*1) + MOVOA X5, -16(AX)(R11*1) + ADDQ $0x20, R11 + CMPQ DI, R11 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ SI, AX + MOVOU X2, -32(AX)(DI*1) + MOVOU X3, -16(AX)(DI*1) + MOVQ BX, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B: ADDL $0x05, CX - MOVL CX, SI - SUBL 16(SP), SI - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(SI*1), SI - - // matchLen - XORL R11, R11 - CMPL R8, $0x08 - JL matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B - -matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B: - MOVQ (R9)(R11*1), R10 - XORQ (SI)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B - -#ifdef GOAMD64_v3 - TZCNTQ R10, R10 - -#else - BSFQ R10, R10 - -#endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 - JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B - -matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B: - LEAL -8(R8), R8 - LEAL 8(R11), R11 - CMPL R8, $0x08 - JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B - JZ repeat_extend_forward_end_encodeSnappyBlockAsm8B - -matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B: - CMPL R8, $0x04 - JL matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B - MOVL (R9)(R11*1), R10 - CMPL (SI)(R11*1), R10 - JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B - SUBL $0x04, R8 - LEAL 4(R11), R11 - -matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B: - CMPL R8, $0x02 - JL matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B - MOVW (R9)(R11*1), R10 - CMPW (SI)(R11*1), R10 - JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B - SUBL $0x02, R8 - LEAL 2(R11), R11 - -matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B: - CMPL R8, $0x01 - JL repeat_extend_forward_end_encodeSnappyBlockAsm8B - MOVB (R9)(R11*1), R10 - CMPB (SI)(R11*1), R10 - JNE repeat_extend_forward_end_encodeSnappyBlockAsm8B - LEAL 1(R11), R11 - -repeat_extend_forward_end_encodeSnappyBlockAsm8B: - ADDL R11, CX - MOVL CX, SI - SUBL DI, SI - MOVL 16(SP), DI - - // emitCopy -two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B: - CMPL SI, $0x40 - JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(SI), SI - ADDQ $0x03, AX - JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B - -two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B: - CMPL SI, $0x0c - JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B - MOVB $0x01, BL - LEAL -16(BX)(SI*4), SI - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) - ADDQ $0x02, AX - JMP repeat_end_emit_encodeSnappyBlockAsm8B - -emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B: - MOVB $0x02, BL - LEAL -4(BX)(SI*4), SI - MOVB SI, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX - -repeat_end_emit_encodeSnappyBlockAsm8B: - MOVL CX, 12(SP) - JMP search_loop_encodeSnappyBlockAsm8B - -no_repeat_found_encodeSnappyBlockAsm8B: - CMPL (DX)(SI*1), DI - JEQ candidate_match_encodeSnappyBlockAsm8B - SHRQ $0x08, DI - MOVL 24(SP)(R10*4), SI - LEAL 2(CX), R9 - CMPL (DX)(R8*1), DI - JEQ candidate2_match_encodeSnappyBlockAsm8B - MOVL R9, 24(SP)(R10*4) - SHRQ $0x08, DI - CMPL (DX)(SI*1), DI - JEQ candidate3_match_encodeSnappyBlockAsm8B - MOVL 20(SP), CX - JMP search_loop_encodeSnappyBlockAsm8B - -candidate3_match_encodeSnappyBlockAsm8B: - ADDL $0x02, CX - JMP candidate_match_encodeSnappyBlockAsm8B - -candidate2_match_encodeSnappyBlockAsm8B: - MOVL R9, 24(SP)(R10*4) - INCL CX - MOVL R8, SI - -candidate_match_encodeSnappyBlockAsm8B: - MOVL 12(SP), DI - TESTL SI, SI - JZ match_extend_back_end_encodeSnappyBlockAsm8B - -match_extend_back_loop_encodeSnappyBlockAsm8B: - CMPL CX, DI - JLE match_extend_back_end_encodeSnappyBlockAsm8B - MOVB -1(DX)(SI*1), BL - MOVB -1(DX)(CX*1), R8 - CMPB BL, R8 - JNE match_extend_back_end_encodeSnappyBlockAsm8B - LEAL -1(CX), CX - DECL SI - JZ match_extend_back_end_encodeSnappyBlockAsm8B - JMP match_extend_back_loop_encodeSnappyBlockAsm8B - -match_extend_back_end_encodeSnappyBlockAsm8B: - MOVL CX, DI - SUBL 12(SP), DI - LEAQ 3(AX)(DI*1), DI - CMPQ DI, (SP) - JL match_dst_size_check_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) - RET - -match_dst_size_check_encodeSnappyBlockAsm8B: - MOVL CX, DI - MOVL 12(SP), R8 - CMPL R8, DI - JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm8B - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(R8*1), DI - SUBL R8, R9 - LEAL -1(R9), R8 - CMPL R8, $0x3c - JLT one_byte_match_emit_encodeSnappyBlockAsm8B - CMPL R8, $0x00000100 - JLT two_bytes_match_emit_encodeSnappyBlockAsm8B - MOVB $0xf4, (AX) - MOVW R8, 1(AX) - ADDQ $0x03, AX - JMP memmove_long_match_emit_encodeSnappyBlockAsm8B - -two_bytes_match_emit_encodeSnappyBlockAsm8B: - MOVB $0xf0, (AX) - MOVB R8, 1(AX) - ADDQ $0x02, AX - CMPL R8, $0x40 - JL memmove_match_emit_encodeSnappyBlockAsm8B - JMP memmove_long_match_emit_encodeSnappyBlockAsm8B - -one_byte_match_emit_encodeSnappyBlockAsm8B: - SHLB $0x02, R8 - MOVB R8, (AX) - ADDQ $0x01, AX - -memmove_match_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(R9*1), R8 - - // genMemMoveShort - CMPQ R9, $0x08 - JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8 - CMPQ R9, $0x10 - JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 - CMPQ R9, $0x20 - JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 - JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 - -emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8: - MOVQ (DI), R10 - MOVQ R10, (AX) - JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B - -emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: - MOVQ (DI), R10 - MOVQ -8(DI)(R9*1), DI - MOVQ R10, (AX) - MOVQ DI, -8(AX)(R9*1) - JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B - -emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: - MOVOU (DI), X0 - MOVOU -16(DI)(R9*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) - JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B - -emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: - MOVOU (DI), X0 - MOVOU 16(DI), X1 - MOVOU -32(DI)(R9*1), X2 - MOVOU -16(DI)(R9*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - -memmove_end_copy_match_emit_encodeSnappyBlockAsm8B: - MOVQ R8, AX - JMP emit_literal_done_match_emit_encodeSnappyBlockAsm8B - -memmove_long_match_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(R9*1), R8 - - // genMemMoveLong - MOVOU (DI), X0 - MOVOU 16(DI), X1 - MOVOU -32(DI)(R9*1), X2 - MOVOU -16(DI)(R9*1), X3 - MOVQ R9, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 - JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(DI)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 - -emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) - ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 - JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back - -emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(DI)(R12*1), X4 - MOVOU -16(DI)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R9, R12 - JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ R8, AX - -emit_literal_done_match_emit_encodeSnappyBlockAsm8B: -match_nolit_loop_encodeSnappyBlockAsm8B: - MOVL CX, DI - SUBL SI, DI - MOVL DI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, SI + MOVL CX, BX + SUBL 16(SP), BX MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 - LEAQ (DX)(SI*1), SI + LEAQ (DX)(BX*1), BX // matchLen XORL R10, R10 CMPL DI, $0x08 - JL matchlen_match4_match_nolit_encodeSnappyBlockAsm8B + JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B -matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B: +matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B: MOVQ (R8)(R10*1), R9 - XORQ (SI)(R10*1), R9 + XORQ (BX)(R10*1), R9 TESTQ R9, R9 - JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm8B + JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B #ifdef GOAMD64_v3 TZCNTQ R9, R9 @@ -14061,103 +13814,384 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B: #endif SARQ $0x03, R9 LEAL (R10)(R9*1), R10 - JMP match_nolit_end_encodeSnappyBlockAsm8B + JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B -matchlen_loop_match_nolit_encodeSnappyBlockAsm8B: +matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 - JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B - JZ match_nolit_end_encodeSnappyBlockAsm8B + JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B + JZ repeat_extend_forward_end_encodeSnappyBlockAsm8B -matchlen_match4_match_nolit_encodeSnappyBlockAsm8B: +matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B: CMPL DI, $0x04 - JL matchlen_match2_match_nolit_encodeSnappyBlockAsm8B + JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B MOVL (R8)(R10*1), R9 - CMPL (SI)(R10*1), R9 - JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm8B + CMPL (BX)(R10*1), R9 + JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B SUBL $0x04, DI LEAL 4(R10), R10 -matchlen_match2_match_nolit_encodeSnappyBlockAsm8B: +matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B: CMPL DI, $0x02 - JL matchlen_match1_match_nolit_encodeSnappyBlockAsm8B + JB matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B MOVW (R8)(R10*1), R9 - CMPW (SI)(R10*1), R9 - JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm8B + CMPW (BX)(R10*1), R9 + JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B SUBL $0x02, DI LEAL 2(R10), R10 -matchlen_match1_match_nolit_encodeSnappyBlockAsm8B: +matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B: CMPL DI, $0x01 - JL match_nolit_end_encodeSnappyBlockAsm8B + JB repeat_extend_forward_end_encodeSnappyBlockAsm8B MOVB (R8)(R10*1), R9 - CMPB (SI)(R10*1), R9 - JNE match_nolit_end_encodeSnappyBlockAsm8B + CMPB (BX)(R10*1), R9 + JNE repeat_extend_forward_end_encodeSnappyBlockAsm8B LEAL 1(R10), R10 -match_nolit_end_encodeSnappyBlockAsm8B: +repeat_extend_forward_end_encodeSnappyBlockAsm8B: ADDL R10, CX + MOVL CX, BX + SUBL SI, BX MOVL 16(SP), SI - ADDL $0x04, R10 + + // emitCopy +two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B: + CMPL BX, $0x40 + JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B + MOVB $0xee, (AX) + MOVW SI, 1(AX) + LEAL -60(BX), BX + ADDQ $0x03, AX + JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B + +two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B: + MOVL BX, DI + SHLL $0x02, DI + CMPL BX, $0x0c + JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B + LEAL -15(DI), DI + MOVB SI, 1(AX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (AX) + ADDQ $0x02, AX + JMP repeat_end_emit_encodeSnappyBlockAsm8B + +emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B: + LEAL -2(DI), DI + MOVB DI, (AX) + MOVW SI, 1(AX) + ADDQ $0x03, AX + +repeat_end_emit_encodeSnappyBlockAsm8B: + MOVL CX, 12(SP) + JMP search_loop_encodeSnappyBlockAsm8B + +no_repeat_found_encodeSnappyBlockAsm8B: + CMPL (DX)(BX*1), SI + JEQ candidate_match_encodeSnappyBlockAsm8B + SHRQ $0x08, SI + MOVL 24(SP)(R9*4), BX + LEAL 2(CX), R8 + CMPL (DX)(DI*1), SI + JEQ candidate2_match_encodeSnappyBlockAsm8B + MOVL R8, 24(SP)(R9*4) + SHRQ $0x08, SI + CMPL (DX)(BX*1), SI + JEQ candidate3_match_encodeSnappyBlockAsm8B + MOVL 20(SP), CX + JMP search_loop_encodeSnappyBlockAsm8B + +candidate3_match_encodeSnappyBlockAsm8B: + ADDL $0x02, CX + JMP candidate_match_encodeSnappyBlockAsm8B + +candidate2_match_encodeSnappyBlockAsm8B: + MOVL R8, 24(SP)(R9*4) + INCL CX + MOVL DI, BX + +candidate_match_encodeSnappyBlockAsm8B: + MOVL 12(SP), SI + TESTL BX, BX + JZ match_extend_back_end_encodeSnappyBlockAsm8B + +match_extend_back_loop_encodeSnappyBlockAsm8B: + CMPL CX, SI + JBE match_extend_back_end_encodeSnappyBlockAsm8B + MOVB -1(DX)(BX*1), DI + MOVB -1(DX)(CX*1), R8 + CMPB DI, R8 + JNE match_extend_back_end_encodeSnappyBlockAsm8B + LEAL -1(CX), CX + DECL BX + JZ match_extend_back_end_encodeSnappyBlockAsm8B + JMP match_extend_back_loop_encodeSnappyBlockAsm8B + +match_extend_back_end_encodeSnappyBlockAsm8B: + MOVL CX, SI + SUBL 12(SP), SI + LEAQ 3(AX)(SI*1), SI + CMPQ SI, (SP) + JB match_dst_size_check_encodeSnappyBlockAsm8B + MOVQ $0x00000000, ret+48(FP) + RET + +match_dst_size_check_encodeSnappyBlockAsm8B: + MOVL CX, SI + MOVL 12(SP), DI + CMPL DI, SI + JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm8B + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(DI*1), SI + SUBL DI, R8 + LEAL -1(R8), DI + CMPL DI, $0x3c + JB one_byte_match_emit_encodeSnappyBlockAsm8B + CMPL DI, $0x00000100 + JB two_bytes_match_emit_encodeSnappyBlockAsm8B + JB three_bytes_match_emit_encodeSnappyBlockAsm8B + +three_bytes_match_emit_encodeSnappyBlockAsm8B: + MOVB $0xf4, (AX) + MOVW DI, 1(AX) + ADDQ $0x03, AX + JMP memmove_long_match_emit_encodeSnappyBlockAsm8B + +two_bytes_match_emit_encodeSnappyBlockAsm8B: + MOVB $0xf0, (AX) + MOVB DI, 1(AX) + ADDQ $0x02, AX + CMPL DI, $0x40 + JB memmove_match_emit_encodeSnappyBlockAsm8B + JMP memmove_long_match_emit_encodeSnappyBlockAsm8B + +one_byte_match_emit_encodeSnappyBlockAsm8B: + SHLB $0x02, DI + MOVB DI, (AX) + ADDQ $0x01, AX + +memmove_match_emit_encodeSnappyBlockAsm8B: + LEAQ (AX)(R8*1), DI + + // genMemMoveShort + CMPQ R8, $0x08 + JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8 + CMPQ R8, $0x10 + JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 + CMPQ R8, $0x20 + JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 + JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 + +emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8: + MOVQ (SI), R9 + MOVQ R9, (AX) + JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B + +emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: + MOVQ (SI), R9 + MOVQ -8(SI)(R8*1), SI + MOVQ R9, (AX) + MOVQ SI, -8(AX)(R8*1) + JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B + +emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: + MOVOU (SI), X0 + MOVOU -16(SI)(R8*1), X1 + MOVOU X0, (AX) + MOVOU X1, -16(AX)(R8*1) + JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B + +emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: + MOVOU (SI), X0 + MOVOU 16(SI), X1 + MOVOU -32(SI)(R8*1), X2 + MOVOU -16(SI)(R8*1), X3 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + +memmove_end_copy_match_emit_encodeSnappyBlockAsm8B: + MOVQ DI, AX + JMP emit_literal_done_match_emit_encodeSnappyBlockAsm8B + +memmove_long_match_emit_encodeSnappyBlockAsm8B: + LEAQ (AX)(R8*1), DI + + // genMemMoveLong + MOVOU (SI), X0 + MOVOU 16(SI), X1 + MOVOU -32(SI)(R8*1), X2 + MOVOU -16(SI)(R8*1), X3 + MOVQ R8, R10 + SHRQ $0x05, R10 + MOVQ AX, R9 + ANDL $0x0000001f, R9 + MOVQ $0x00000040, R11 + SUBQ R9, R11 + DECQ R10 + JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 + LEAQ -32(SI)(R11*1), R9 + LEAQ -32(AX)(R11*1), R12 + +emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: + MOVOU (R9), X4 + MOVOU 16(R9), X5 + MOVOA X4, (R12) + MOVOA X5, 16(R12) + ADDQ $0x20, R12 + ADDQ $0x20, R9 + ADDQ $0x20, R11 + DECQ R10 + JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back + +emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: + MOVOU -32(SI)(R11*1), X4 + MOVOU -16(SI)(R11*1), X5 + MOVOA X4, -32(AX)(R11*1) + MOVOA X5, -16(AX)(R11*1) + ADDQ $0x20, R11 + CMPQ R8, R11 + JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ DI, AX + +emit_literal_done_match_emit_encodeSnappyBlockAsm8B: +match_nolit_loop_encodeSnappyBlockAsm8B: + MOVL CX, SI + SUBL BX, SI + MOVL SI, 16(SP) + ADDL $0x04, CX + ADDL $0x04, BX + MOVQ src_len+32(FP), SI + SUBL CX, SI + LEAQ (DX)(CX*1), DI + LEAQ (DX)(BX*1), BX + + // matchLen + XORL R9, R9 + CMPL SI, $0x08 + JB matchlen_match4_match_nolit_encodeSnappyBlockAsm8B + +matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B: + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + TESTQ R8, R8 + JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm8B + +#ifdef GOAMD64_v3 + TZCNTQ R8, R8 + +#else + BSFQ R8, R8 + +#endif + SARQ $0x03, R8 + LEAL (R9)(R8*1), R9 + JMP match_nolit_end_encodeSnappyBlockAsm8B + +matchlen_loop_match_nolit_encodeSnappyBlockAsm8B: + LEAL -8(SI), SI + LEAL 8(R9), R9 + CMPL SI, $0x08 + JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B + JZ match_nolit_end_encodeSnappyBlockAsm8B + +matchlen_match4_match_nolit_encodeSnappyBlockAsm8B: + CMPL SI, $0x04 + JB matchlen_match2_match_nolit_encodeSnappyBlockAsm8B + MOVL (DI)(R9*1), R8 + CMPL (BX)(R9*1), R8 + JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm8B + SUBL $0x04, SI + LEAL 4(R9), R9 + +matchlen_match2_match_nolit_encodeSnappyBlockAsm8B: + CMPL SI, $0x02 + JB matchlen_match1_match_nolit_encodeSnappyBlockAsm8B + MOVW (DI)(R9*1), R8 + CMPW (BX)(R9*1), R8 + JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm8B + SUBL $0x02, SI + LEAL 2(R9), R9 + +matchlen_match1_match_nolit_encodeSnappyBlockAsm8B: + CMPL SI, $0x01 + JB match_nolit_end_encodeSnappyBlockAsm8B + MOVB (DI)(R9*1), R8 + CMPB (BX)(R9*1), R8 + JNE match_nolit_end_encodeSnappyBlockAsm8B + LEAL 1(R9), R9 + +match_nolit_end_encodeSnappyBlockAsm8B: + ADDL R9, CX + MOVL 16(SP), BX + ADDL $0x04, R9 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm8B: - CMPL R10, $0x40 - JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B + CMPL R9, $0x40 + JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(R10), R10 + MOVW BX, 1(AX) + LEAL -60(R9), R9 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm8B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B: - CMPL R10, $0x0c - JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm8B - MOVB $0x01, BL - LEAL -16(BX)(R10*4), R10 - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, R10 - MOVB R10, (AX) + MOVL R9, SI + SHLL $0x02, SI + CMPL R9, $0x0c + JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm8B + LEAL -15(SI), SI + MOVB BL, 1(AX) + SHRL $0x08, BX + SHLL $0x05, BX + ORL BX, SI + MOVB SI, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8B emit_copy_three_match_nolit_encodeSnappyBlockAsm8B: - MOVB $0x02, BL - LEAL -4(BX)(R10*4), R10 - MOVB R10, (AX) - MOVW SI, 1(AX) + LEAL -2(SI), SI + MOVB SI, (AX) + MOVW BX, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm8B: CMPL CX, 8(SP) - JGE emit_remainder_encodeSnappyBlockAsm8B - MOVQ -2(DX)(CX*1), DI + JAE emit_remainder_encodeSnappyBlockAsm8B + MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) - JL match_nolit_dst_ok_encodeSnappyBlockAsm8B + JB match_nolit_dst_ok_encodeSnappyBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm8B: - MOVQ $0x9e3779b1, R9 - MOVQ DI, R8 - SHRQ $0x10, DI - MOVQ DI, SI - SHLQ $0x20, R8 - IMULQ R9, R8 - SHRQ $0x38, R8 - SHLQ $0x20, SI - IMULQ R9, SI - SHRQ $0x38, SI - LEAL -2(CX), R9 - LEAQ 24(SP)(SI*4), R10 - MOVL (R10), SI - MOVL R9, 24(SP)(R8*4) - MOVL CX, (R10) - CMPL (DX)(SI*1), DI + MOVQ $0x9e3779b1, R8 + MOVQ SI, DI + SHRQ $0x10, SI + MOVQ SI, BX + SHLQ $0x20, DI + IMULQ R8, DI + SHRQ $0x38, DI + SHLQ $0x20, BX + IMULQ R8, BX + SHRQ $0x38, BX + LEAL -2(CX), R8 + LEAQ 24(SP)(BX*4), R9 + MOVL (R9), BX + MOVL R8, 24(SP)(DI*4) + MOVL CX, (R9) + CMPL (DX)(BX*1), SI JEQ match_nolit_loop_encodeSnappyBlockAsm8B INCL CX JMP search_loop_encodeSnappyBlockAsm8B @@ -14167,7 +14201,7 @@ emit_remainder_encodeSnappyBlockAsm8B: SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) - JL emit_remainder_ok_encodeSnappyBlockAsm8B + JB emit_remainder_ok_encodeSnappyBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET @@ -14182,9 +14216,12 @@ emit_remainder_ok_encodeSnappyBlockAsm8B: SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c - JLT one_byte_emit_remainder_encodeSnappyBlockAsm8B + JB one_byte_emit_remainder_encodeSnappyBlockAsm8B CMPL DX, $0x00000100 - JLT two_bytes_emit_remainder_encodeSnappyBlockAsm8B + JB two_bytes_emit_remainder_encodeSnappyBlockAsm8B + JB three_bytes_emit_remainder_encodeSnappyBlockAsm8B + +three_bytes_emit_remainder_encodeSnappyBlockAsm8B: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX @@ -14195,7 +14232,7 @@ two_bytes_emit_remainder_encodeSnappyBlockAsm8B: MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 - JL memmove_emit_remainder_encodeSnappyBlockAsm8B + JB memmove_emit_remainder_encodeSnappyBlockAsm8B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B one_byte_emit_remainder_encodeSnappyBlockAsm8B: @@ -14342,8 +14379,8 @@ zero_loop_encodeSnappyBetterBlockAsm: MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX - LEAQ -8(CX), SI - MOVL SI, 8(SP) + LEAQ -8(CX), BX + MOVL BX, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX @@ -14353,424 +14390,424 @@ zero_loop_encodeSnappyBetterBlockAsm: MOVQ src_base+24(FP), DX search_loop_encodeSnappyBetterBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - SHRL $0x07, SI - CMPL SI, $0x63 - JLE check_maxskip_ok_encodeSnappyBetterBlockAsm - LEAL 100(CX), SI + MOVL CX, BX + SUBL 12(SP), BX + SHRL $0x07, BX + CMPL BX, $0x63 + JBE check_maxskip_ok_encodeSnappyBetterBlockAsm + LEAL 100(CX), BX JMP check_maxskip_cont_encodeSnappyBetterBlockAsm check_maxskip_ok_encodeSnappyBetterBlockAsm: - LEAL 1(CX)(SI*1), SI + LEAL 1(CX)(BX*1), BX check_maxskip_cont_encodeSnappyBetterBlockAsm: - CMPL SI, 8(SP) - JGE emit_remainder_encodeSnappyBetterBlockAsm - MOVQ (DX)(CX*1), DI - MOVL SI, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R9 - MOVQ $0x9e3779b1, SI - MOVQ DI, R10 - MOVQ DI, R11 - SHLQ $0x08, R10 - IMULQ R9, R10 - SHRQ $0x2f, R10 - SHLQ $0x20, R11 - IMULQ SI, R11 - SHRQ $0x32, R11 - MOVL 24(SP)(R10*4), SI - MOVL 524312(SP)(R11*4), R8 - MOVL CX, 24(SP)(R10*4) - MOVL CX, 524312(SP)(R11*4) - MOVQ (DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - CMPQ R10, DI + CMPL BX, 8(SP) + JAE emit_remainder_encodeSnappyBetterBlockAsm + MOVQ (DX)(CX*1), SI + MOVL BX, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R8 + MOVQ $0x9e3779b1, BX + MOVQ SI, R9 + MOVQ SI, R10 + SHLQ $0x08, R9 + IMULQ R8, R9 + SHRQ $0x2f, R9 + SHLQ $0x20, R10 + IMULQ BX, R10 + SHRQ $0x32, R10 + MOVL 24(SP)(R9*4), BX + MOVL 524312(SP)(R10*4), DI + MOVL CX, 24(SP)(R9*4) + MOVL CX, 524312(SP)(R10*4) + MOVQ (DX)(BX*1), R9 + MOVQ (DX)(DI*1), R10 + CMPQ R9, SI JEQ candidate_match_encodeSnappyBetterBlockAsm - CMPQ R11, DI + CMPQ R10, SI JNE no_short_found_encodeSnappyBetterBlockAsm - MOVL R8, SI + MOVL DI, BX JMP candidate_match_encodeSnappyBetterBlockAsm no_short_found_encodeSnappyBetterBlockAsm: - CMPL R10, DI + CMPL R9, SI JEQ candidate_match_encodeSnappyBetterBlockAsm - CMPL R11, DI + CMPL R10, SI JEQ candidateS_match_encodeSnappyBetterBlockAsm MOVL 20(SP), CX JMP search_loop_encodeSnappyBetterBlockAsm candidateS_match_encodeSnappyBetterBlockAsm: - SHRQ $0x08, DI - MOVQ DI, R10 - SHLQ $0x08, R10 - IMULQ R9, R10 - SHRQ $0x2f, R10 - MOVL 24(SP)(R10*4), SI + SHRQ $0x08, SI + MOVQ SI, R9 + SHLQ $0x08, R9 + IMULQ R8, R9 + SHRQ $0x2f, R9 + MOVL 24(SP)(R9*4), BX INCL CX - MOVL CX, 24(SP)(R10*4) - CMPL (DX)(SI*1), DI + MOVL CX, 24(SP)(R9*4) + CMPL (DX)(BX*1), SI JEQ candidate_match_encodeSnappyBetterBlockAsm DECL CX - MOVL R8, SI + MOVL DI, BX candidate_match_encodeSnappyBetterBlockAsm: - MOVL 12(SP), DI - TESTL SI, SI + MOVL 12(SP), SI + TESTL BX, BX JZ match_extend_back_end_encodeSnappyBetterBlockAsm match_extend_back_loop_encodeSnappyBetterBlockAsm: - CMPL CX, DI - JLE match_extend_back_end_encodeSnappyBetterBlockAsm - MOVB -1(DX)(SI*1), BL + CMPL CX, SI + JBE match_extend_back_end_encodeSnappyBetterBlockAsm + MOVB -1(DX)(BX*1), DI MOVB -1(DX)(CX*1), R8 - CMPB BL, R8 + CMPB DI, R8 JNE match_extend_back_end_encodeSnappyBetterBlockAsm LEAL -1(CX), CX - DECL SI + DECL BX JZ match_extend_back_end_encodeSnappyBetterBlockAsm JMP match_extend_back_loop_encodeSnappyBetterBlockAsm match_extend_back_end_encodeSnappyBetterBlockAsm: - MOVL CX, DI - SUBL 12(SP), DI - LEAQ 5(AX)(DI*1), DI - CMPQ DI, (SP) - JL match_dst_size_check_encodeSnappyBetterBlockAsm + MOVL CX, SI + SUBL 12(SP), SI + LEAQ 5(AX)(SI*1), SI + CMPQ SI, (SP) + JB match_dst_size_check_encodeSnappyBetterBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm: - MOVL CX, DI + MOVL CX, SI ADDL $0x04, CX - ADDL $0x04, SI - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(SI*1), R10 + ADDL $0x04, BX + MOVQ src_len+32(FP), DI + SUBL CX, DI + LEAQ (DX)(CX*1), R8 + LEAQ (DX)(BX*1), R9 // matchLen - XORL R12, R12 - CMPL R8, $0x08 - JL matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm + XORL R11, R11 + CMPL DI, $0x08 + JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm: - MOVQ (R9)(R12*1), R11 - XORQ (R10)(R12*1), R11 - TESTQ R11, R11 + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + TESTQ R10, R10 JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R10, R10 #else - BSFQ R11, R11 + BSFQ R10, R10 #endif - SARQ $0x03, R11 - LEAL (R12)(R11*1), R12 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeSnappyBetterBlockAsm matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm: - LEAL -8(R8), R8 - LEAL 8(R12), R12 - CMPL R8, $0x08 - JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm + LEAL -8(DI), DI + LEAL 8(R11), R11 + CMPL DI, $0x08 + JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm JZ match_nolit_end_encodeSnappyBetterBlockAsm matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm: - CMPL R8, $0x04 - JL matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm - MOVL (R9)(R12*1), R11 - CMPL (R10)(R12*1), R11 + CMPL DI, $0x04 + JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm + MOVL (R8)(R11*1), R10 + CMPL (R9)(R11*1), R10 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm - SUBL $0x04, R8 - LEAL 4(R12), R12 + SUBL $0x04, DI + LEAL 4(R11), R11 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm: - CMPL R8, $0x02 - JL matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm - MOVW (R9)(R12*1), R11 - CMPW (R10)(R12*1), R11 + CMPL DI, $0x02 + JB matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm + MOVW (R8)(R11*1), R10 + CMPW (R9)(R11*1), R10 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm - SUBL $0x02, R8 - LEAL 2(R12), R12 + SUBL $0x02, DI + LEAL 2(R11), R11 matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm: - CMPL R8, $0x01 - JL match_nolit_end_encodeSnappyBetterBlockAsm - MOVB (R9)(R12*1), R11 - CMPB (R10)(R12*1), R11 + CMPL DI, $0x01 + JB match_nolit_end_encodeSnappyBetterBlockAsm + MOVB (R8)(R11*1), R10 + CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeSnappyBetterBlockAsm - LEAL 1(R12), R12 + LEAL 1(R11), R11 match_nolit_end_encodeSnappyBetterBlockAsm: - MOVL CX, R8 - SUBL SI, R8 + MOVL CX, DI + SUBL BX, DI // Check if repeat - CMPL R12, $0x01 - JG match_length_ok_encodeSnappyBetterBlockAsm - CMPL R8, $0x0000ffff - JLE match_length_ok_encodeSnappyBetterBlockAsm + CMPL R11, $0x01 + JA match_length_ok_encodeSnappyBetterBlockAsm + CMPL DI, $0x0000ffff + JBE match_length_ok_encodeSnappyBetterBlockAsm MOVL 20(SP), CX INCL CX JMP search_loop_encodeSnappyBetterBlockAsm match_length_ok_encodeSnappyBetterBlockAsm: - MOVL R8, 16(SP) - MOVL 12(SP), SI - CMPL SI, DI + MOVL DI, 16(SP) + MOVL 12(SP), BX + CMPL BX, SI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R10 - SUBL SI, R9 - LEAL -1(R9), SI - CMPL SI, $0x3c - JLT one_byte_match_emit_encodeSnappyBetterBlockAsm - CMPL SI, $0x00000100 - JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm - CMPL SI, $0x00010000 - JLT three_bytes_match_emit_encodeSnappyBetterBlockAsm - CMPL SI, $0x01000000 - JLT four_bytes_match_emit_encodeSnappyBetterBlockAsm + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R9 + SUBL BX, R8 + LEAL -1(R8), BX + CMPL BX, $0x3c + JB one_byte_match_emit_encodeSnappyBetterBlockAsm + CMPL BX, $0x00000100 + JB two_bytes_match_emit_encodeSnappyBetterBlockAsm + CMPL BX, $0x00010000 + JB three_bytes_match_emit_encodeSnappyBetterBlockAsm + CMPL BX, $0x01000000 + JB four_bytes_match_emit_encodeSnappyBetterBlockAsm MOVB $0xfc, (AX) - MOVL SI, 1(AX) + MOVL BX, 1(AX) ADDQ $0x05, AX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm four_bytes_match_emit_encodeSnappyBetterBlockAsm: - MOVL SI, R11 - SHRL $0x10, R11 + MOVL BX, R10 + SHRL $0x10, R10 MOVB $0xf8, (AX) - MOVW SI, 1(AX) - MOVB R11, 3(AX) + MOVW BX, 1(AX) + MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm three_bytes_match_emit_encodeSnappyBetterBlockAsm: MOVB $0xf4, (AX) - MOVW SI, 1(AX) + MOVW BX, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm two_bytes_match_emit_encodeSnappyBetterBlockAsm: MOVB $0xf0, (AX) - MOVB SI, 1(AX) + MOVB BL, 1(AX) ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_match_emit_encodeSnappyBetterBlockAsm + CMPL BX, $0x40 + JB memmove_match_emit_encodeSnappyBetterBlockAsm JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm one_byte_match_emit_encodeSnappyBetterBlockAsm: - SHLB $0x02, SI - MOVB SI, (AX) + SHLB $0x02, BL + MOVB BL, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBetterBlockAsm: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveShort - CMPQ R9, $0x08 - JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8 - CMPQ R9, $0x10 + CMPQ R8, $0x08 + JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8 + CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16 - CMPQ R9, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8: - MOVQ (R10), R11 - MOVQ R11, (AX) + MOVQ (R9), R10 + MOVQ R10, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16: - MOVQ (R10), R11 - MOVQ -8(R10)(R9*1), R10 - MOVQ R11, (AX) - MOVQ R10, -8(AX)(R9*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (AX) + MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32: - MOVOU (R10), X0 - MOVOU -16(R10)(R9*1), X1 + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) + MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64: - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm: - MOVQ SI, AX + MOVQ BX, AX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm memmove_long_match_emit_encodeSnappyBetterBlockAsm: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveLong - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 - MOVQ R9, R13 - SHRQ $0x05, R13 - MOVQ AX, R11 - ANDL $0x0000001f, R11 - MOVQ $0x00000040, R14 - SUBQ R11, R14 - DECQ R13 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R12 + SHRQ $0x05, R12 + MOVQ AX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R13 + SUBQ R10, R13 + DECQ R12 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R10)(R14*1), R11 - LEAQ -32(AX)(R14*1), R15 + LEAQ -32(R9)(R13*1), R10 + LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back: - MOVOU (R11), X4 - MOVOU 16(R11), X5 - MOVOA X4, (R15) - MOVOA X5, 16(R15) - ADDQ $0x20, R15 - ADDQ $0x20, R11 + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) ADDQ $0x20, R14 - DECQ R13 + ADDQ $0x20, R10 + ADDQ $0x20, R13 + DECQ R12 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R10)(R14*1), X4 - MOVOU -16(R10)(R14*1), X5 - MOVOA X4, -32(AX)(R14*1) - MOVOA X5, -16(AX)(R14*1) - ADDQ $0x20, R14 - CMPQ R9, R14 + MOVOU -32(R9)(R13*1), X4 + MOVOU -16(R9)(R13*1), X5 + MOVOA X4, -32(AX)(R13*1) + MOVOA X5, -16(AX)(R13*1) + ADDQ $0x20, R13 + CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ SI, AX + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ BX, AX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm: - ADDL R12, CX - ADDL $0x04, R12 + ADDL R11, CX + ADDL $0x04, R11 MOVL CX, 12(SP) // emitCopy - CMPL R8, $0x00010000 - JL two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm + CMPL DI, $0x00010000 + JB two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm: - CMPL R12, $0x40 - JLE four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm + CMPL R11, $0x40 + JBE four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm MOVB $0xff, (AX) - MOVL R8, 1(AX) - LEAL -64(R12), R12 + MOVL DI, 1(AX) + LEAL -64(R11), R11 ADDQ $0x05, AX - CMPL R12, $0x04 - JL four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm + CMPL R11, $0x04 + JB four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm JMP four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm: - TESTL R12, R12 + TESTL R11, R11 JZ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm - MOVB $0x03, BL - LEAL -4(BX)(R12*4), R12 - MOVB R12, (AX) - MOVL R8, 1(AX) + XORL BX, BX + LEAL -1(BX)(R11*4), R11 + MOVB R11, (AX) + MOVL DI, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm: - CMPL R12, $0x40 - JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm + CMPL R11, $0x40 + JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm MOVB $0xee, (AX) - MOVW R8, 1(AX) - LEAL -60(R12), R12 + MOVW DI, 1(AX) + LEAL -60(R11), R11 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm: - CMPL R12, $0x0c - JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm - CMPL R8, $0x00000800 - JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm - MOVB $0x01, BL - LEAL -16(BX)(R12*4), R12 - MOVB R8, 1(AX) - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) + MOVL R11, BX + SHLL $0x02, BX + CMPL R11, $0x0c + JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm + CMPL DI, $0x00000800 + JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm + LEAL -15(BX), BX + MOVB DI, 1(AX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, BX + MOVB BL, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm: - MOVB $0x02, BL - LEAL -4(BX)(R12*4), R12 - MOVB R12, (AX) - MOVW R8, 1(AX) + LEAL -2(BX), BX + MOVB BL, (AX) + MOVW DI, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm: CMPL CX, 8(SP) - JGE emit_remainder_encodeSnappyBetterBlockAsm + JAE emit_remainder_encodeSnappyBetterBlockAsm CMPQ AX, (SP) - JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm + JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm: - MOVQ $0x00cf1bbcdcbfa563, SI - MOVQ $0x9e3779b1, R8 - LEAQ 1(DI), DI - LEAQ -2(CX), R9 - MOVQ (DX)(DI*1), R10 - MOVQ 1(DX)(DI*1), R11 - MOVQ (DX)(R9*1), R12 - MOVQ 1(DX)(R9*1), R13 - SHLQ $0x08, R10 - IMULQ SI, R10 - SHRQ $0x2f, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x32, R11 - SHLQ $0x08, R12 - IMULQ SI, R12 - SHRQ $0x2f, R12 - SHLQ $0x20, R13 - IMULQ R8, R13 - SHRQ $0x32, R13 - LEAQ 1(DI), R8 - LEAQ 1(R9), R14 - MOVL DI, 24(SP)(R10*4) - MOVL R9, 24(SP)(R12*4) - MOVL R8, 524312(SP)(R11*4) - MOVL R14, 524312(SP)(R13*4) - ADDQ $0x01, DI - SUBQ $0x01, R9 + MOVQ $0x00cf1bbcdcbfa563, BX + MOVQ $0x9e3779b1, DI + LEAQ 1(SI), SI + LEAQ -2(CX), R8 + MOVQ (DX)(SI*1), R9 + MOVQ 1(DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + MOVQ 1(DX)(R8*1), R12 + SHLQ $0x08, R9 + IMULQ BX, R9 + SHRQ $0x2f, R9 + SHLQ $0x20, R10 + IMULQ DI, R10 + SHRQ $0x32, R10 + SHLQ $0x08, R11 + IMULQ BX, R11 + SHRQ $0x2f, R11 + SHLQ $0x20, R12 + IMULQ DI, R12 + SHRQ $0x32, R12 + LEAQ 1(SI), DI + LEAQ 1(R8), R13 + MOVL SI, 24(SP)(R9*4) + MOVL R8, 24(SP)(R11*4) + MOVL DI, 524312(SP)(R10*4) + MOVL R13, 524312(SP)(R12*4) + ADDQ $0x01, SI + SUBQ $0x01, R8 index_loop_encodeSnappyBetterBlockAsm: - CMPQ DI, R9 + CMPQ SI, R8 JAE search_loop_encodeSnappyBetterBlockAsm - MOVQ (DX)(DI*1), R8 - MOVQ (DX)(R9*1), R10 - SHLQ $0x08, R8 - IMULQ SI, R8 - SHRQ $0x2f, R8 - SHLQ $0x08, R10 - IMULQ SI, R10 - SHRQ $0x2f, R10 - MOVL DI, 24(SP)(R8*4) - MOVL R9, 24(SP)(R10*4) - ADDQ $0x02, DI - SUBQ $0x02, R9 + MOVQ (DX)(SI*1), DI + MOVQ (DX)(R8*1), R9 + SHLQ $0x08, DI + IMULQ BX, DI + SHRQ $0x2f, DI + SHLQ $0x08, R9 + IMULQ BX, R9 + SHRQ $0x2f, R9 + MOVL SI, 24(SP)(DI*4) + MOVL R8, 24(SP)(R9*4) + ADDQ $0x02, SI + SUBQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm emit_remainder_encodeSnappyBetterBlockAsm: @@ -14778,7 +14815,7 @@ emit_remainder_encodeSnappyBetterBlockAsm: SUBL 12(SP), CX LEAQ 5(AX)(CX*1), CX CMPQ CX, (SP) - JL emit_remainder_ok_encodeSnappyBetterBlockAsm + JB emit_remainder_ok_encodeSnappyBetterBlockAsm MOVQ $0x00000000, ret+48(FP) RET @@ -14793,13 +14830,13 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm: SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c - JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm + JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm CMPL DX, $0x00000100 - JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm + JB two_bytes_emit_remainder_encodeSnappyBetterBlockAsm CMPL DX, $0x00010000 - JLT three_bytes_emit_remainder_encodeSnappyBetterBlockAsm + JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm CMPL DX, $0x01000000 - JLT four_bytes_emit_remainder_encodeSnappyBetterBlockAsm + JB four_bytes_emit_remainder_encodeSnappyBetterBlockAsm MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX @@ -14825,7 +14862,7 @@ two_bytes_emit_remainder_encodeSnappyBetterBlockAsm: MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 - JL memmove_emit_remainder_encodeSnappyBetterBlockAsm + JB memmove_emit_remainder_encodeSnappyBetterBlockAsm JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm one_byte_emit_remainder_encodeSnappyBetterBlockAsm: @@ -14972,8 +15009,8 @@ zero_loop_encodeSnappyBetterBlockAsm64K: MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX - LEAQ -8(CX), SI - MOVL SI, 8(SP) + LEAQ -8(CX), BX + MOVL BX, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX @@ -14983,364 +15020,367 @@ zero_loop_encodeSnappyBetterBlockAsm64K: MOVQ src_base+24(FP), DX search_loop_encodeSnappyBetterBlockAsm64K: - MOVL CX, SI - SUBL 12(SP), SI - SHRL $0x07, SI - LEAL 1(CX)(SI*1), SI - CMPL SI, 8(SP) - JGE emit_remainder_encodeSnappyBetterBlockAsm64K - MOVQ (DX)(CX*1), DI - MOVL SI, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R9 - MOVQ $0x9e3779b1, SI - MOVQ DI, R10 - MOVQ DI, R11 - SHLQ $0x08, R10 - IMULQ R9, R10 - SHRQ $0x30, R10 - SHLQ $0x20, R11 - IMULQ SI, R11 - SHRQ $0x32, R11 - MOVL 24(SP)(R10*4), SI - MOVL 262168(SP)(R11*4), R8 - MOVL CX, 24(SP)(R10*4) - MOVL CX, 262168(SP)(R11*4) - MOVQ (DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - CMPQ R10, DI + MOVL CX, BX + SUBL 12(SP), BX + SHRL $0x07, BX + LEAL 1(CX)(BX*1), BX + CMPL BX, 8(SP) + JAE emit_remainder_encodeSnappyBetterBlockAsm64K + MOVQ (DX)(CX*1), SI + MOVL BX, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R8 + MOVQ $0x9e3779b1, BX + MOVQ SI, R9 + MOVQ SI, R10 + SHLQ $0x08, R9 + IMULQ R8, R9 + SHRQ $0x30, R9 + SHLQ $0x20, R10 + IMULQ BX, R10 + SHRQ $0x32, R10 + MOVL 24(SP)(R9*4), BX + MOVL 262168(SP)(R10*4), DI + MOVL CX, 24(SP)(R9*4) + MOVL CX, 262168(SP)(R10*4) + MOVQ (DX)(BX*1), R9 + MOVQ (DX)(DI*1), R10 + CMPQ R9, SI JEQ candidate_match_encodeSnappyBetterBlockAsm64K - CMPQ R11, DI + CMPQ R10, SI JNE no_short_found_encodeSnappyBetterBlockAsm64K - MOVL R8, SI + MOVL DI, BX JMP candidate_match_encodeSnappyBetterBlockAsm64K no_short_found_encodeSnappyBetterBlockAsm64K: - CMPL R10, DI + CMPL R9, SI JEQ candidate_match_encodeSnappyBetterBlockAsm64K - CMPL R11, DI + CMPL R10, SI JEQ candidateS_match_encodeSnappyBetterBlockAsm64K MOVL 20(SP), CX JMP search_loop_encodeSnappyBetterBlockAsm64K candidateS_match_encodeSnappyBetterBlockAsm64K: - SHRQ $0x08, DI - MOVQ DI, R10 - SHLQ $0x08, R10 - IMULQ R9, R10 - SHRQ $0x30, R10 - MOVL 24(SP)(R10*4), SI + SHRQ $0x08, SI + MOVQ SI, R9 + SHLQ $0x08, R9 + IMULQ R8, R9 + SHRQ $0x30, R9 + MOVL 24(SP)(R9*4), BX INCL CX - MOVL CX, 24(SP)(R10*4) - CMPL (DX)(SI*1), DI + MOVL CX, 24(SP)(R9*4) + CMPL (DX)(BX*1), SI JEQ candidate_match_encodeSnappyBetterBlockAsm64K DECL CX - MOVL R8, SI + MOVL DI, BX candidate_match_encodeSnappyBetterBlockAsm64K: - MOVL 12(SP), DI - TESTL SI, SI + MOVL 12(SP), SI + TESTL BX, BX JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K match_extend_back_loop_encodeSnappyBetterBlockAsm64K: - CMPL CX, DI - JLE match_extend_back_end_encodeSnappyBetterBlockAsm64K - MOVB -1(DX)(SI*1), BL + CMPL CX, SI + JBE match_extend_back_end_encodeSnappyBetterBlockAsm64K + MOVB -1(DX)(BX*1), DI MOVB -1(DX)(CX*1), R8 - CMPB BL, R8 + CMPB DI, R8 JNE match_extend_back_end_encodeSnappyBetterBlockAsm64K LEAL -1(CX), CX - DECL SI + DECL BX JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K JMP match_extend_back_loop_encodeSnappyBetterBlockAsm64K match_extend_back_end_encodeSnappyBetterBlockAsm64K: - MOVL CX, DI - SUBL 12(SP), DI - LEAQ 3(AX)(DI*1), DI - CMPQ DI, (SP) - JL match_dst_size_check_encodeSnappyBetterBlockAsm64K + MOVL CX, SI + SUBL 12(SP), SI + LEAQ 3(AX)(SI*1), SI + CMPQ SI, (SP) + JB match_dst_size_check_encodeSnappyBetterBlockAsm64K MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm64K: - MOVL CX, DI + MOVL CX, SI ADDL $0x04, CX - ADDL $0x04, SI - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(SI*1), R10 + ADDL $0x04, BX + MOVQ src_len+32(FP), DI + SUBL CX, DI + LEAQ (DX)(CX*1), R8 + LEAQ (DX)(BX*1), R9 // matchLen - XORL R12, R12 - CMPL R8, $0x08 - JL matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K + XORL R11, R11 + CMPL DI, $0x08 + JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K: - MOVQ (R9)(R12*1), R11 - XORQ (R10)(R12*1), R11 - TESTQ R11, R11 + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + TESTQ R10, R10 JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm64K #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R10, R10 #else - BSFQ R11, R11 + BSFQ R10, R10 #endif - SARQ $0x03, R11 - LEAL (R12)(R11*1), R12 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeSnappyBetterBlockAsm64K matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm64K: - LEAL -8(R8), R8 - LEAL 8(R12), R12 - CMPL R8, $0x08 - JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K + LEAL -8(DI), DI + LEAL 8(R11), R11 + CMPL DI, $0x08 + JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K JZ match_nolit_end_encodeSnappyBetterBlockAsm64K matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL R8, $0x04 - JL matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K - MOVL (R9)(R12*1), R11 - CMPL (R10)(R12*1), R11 + CMPL DI, $0x04 + JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K + MOVL (R8)(R11*1), R10 + CMPL (R9)(R11*1), R10 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K - SUBL $0x04, R8 - LEAL 4(R12), R12 + SUBL $0x04, DI + LEAL 4(R11), R11 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL R8, $0x02 - JL matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K - MOVW (R9)(R12*1), R11 - CMPW (R10)(R12*1), R11 + CMPL DI, $0x02 + JB matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K + MOVW (R8)(R11*1), R10 + CMPW (R9)(R11*1), R10 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K - SUBL $0x02, R8 - LEAL 2(R12), R12 + SUBL $0x02, DI + LEAL 2(R11), R11 matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL R8, $0x01 - JL match_nolit_end_encodeSnappyBetterBlockAsm64K - MOVB (R9)(R12*1), R11 - CMPB (R10)(R12*1), R11 + CMPL DI, $0x01 + JB match_nolit_end_encodeSnappyBetterBlockAsm64K + MOVB (R8)(R11*1), R10 + CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeSnappyBetterBlockAsm64K - LEAL 1(R12), R12 + LEAL 1(R11), R11 match_nolit_end_encodeSnappyBetterBlockAsm64K: - MOVL CX, R8 - SUBL SI, R8 + MOVL CX, DI + SUBL BX, DI // Check if repeat - MOVL R8, 16(SP) - MOVL 12(SP), SI - CMPL SI, DI + MOVL DI, 16(SP) + MOVL 12(SP), BX + CMPL BX, SI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R10 - SUBL SI, R9 - LEAL -1(R9), SI - CMPL SI, $0x3c - JLT one_byte_match_emit_encodeSnappyBetterBlockAsm64K - CMPL SI, $0x00000100 - JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm64K + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R9 + SUBL BX, R8 + LEAL -1(R8), BX + CMPL BX, $0x3c + JB one_byte_match_emit_encodeSnappyBetterBlockAsm64K + CMPL BX, $0x00000100 + JB two_bytes_match_emit_encodeSnappyBetterBlockAsm64K + JB three_bytes_match_emit_encodeSnappyBetterBlockAsm64K + +three_bytes_match_emit_encodeSnappyBetterBlockAsm64K: MOVB $0xf4, (AX) - MOVW SI, 1(AX) + MOVW BX, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K two_bytes_match_emit_encodeSnappyBetterBlockAsm64K: MOVB $0xf0, (AX) - MOVB SI, 1(AX) + MOVB BL, 1(AX) ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_match_emit_encodeSnappyBetterBlockAsm64K + CMPL BX, $0x40 + JB memmove_match_emit_encodeSnappyBetterBlockAsm64K JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K one_byte_match_emit_encodeSnappyBetterBlockAsm64K: - SHLB $0x02, SI - MOVB SI, (AX) + SHLB $0x02, BL + MOVB BL, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveShort - CMPQ R9, $0x08 - JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8 - CMPQ R9, $0x10 + CMPQ R8, $0x08 + JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8 + CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16 - CMPQ R9, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8: - MOVQ (R10), R11 - MOVQ R11, (AX) + MOVQ (R9), R10 + MOVQ R10, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16: - MOVQ (R10), R11 - MOVQ -8(R10)(R9*1), R10 - MOVQ R11, (AX) - MOVQ R10, -8(AX)(R9*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (AX) + MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32: - MOVOU (R10), X0 - MOVOU -16(R10)(R9*1), X1 + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) + MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64: - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K: - MOVQ SI, AX + MOVQ BX, AX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K memmove_long_match_emit_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveLong - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 - MOVQ R9, R13 - SHRQ $0x05, R13 - MOVQ AX, R11 - ANDL $0x0000001f, R11 - MOVQ $0x00000040, R14 - SUBQ R11, R14 - DECQ R13 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R12 + SHRQ $0x05, R12 + MOVQ AX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R13 + SUBQ R10, R13 + DECQ R12 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(R10)(R14*1), R11 - LEAQ -32(AX)(R14*1), R15 + LEAQ -32(R9)(R13*1), R10 + LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back: - MOVOU (R11), X4 - MOVOU 16(R11), X5 - MOVOA X4, (R15) - MOVOA X5, 16(R15) - ADDQ $0x20, R15 - ADDQ $0x20, R11 + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) ADDQ $0x20, R14 - DECQ R13 + ADDQ $0x20, R10 + ADDQ $0x20, R13 + DECQ R12 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(R10)(R14*1), X4 - MOVOU -16(R10)(R14*1), X5 - MOVOA X4, -32(AX)(R14*1) - MOVOA X5, -16(AX)(R14*1) - ADDQ $0x20, R14 - CMPQ R9, R14 + MOVOU -32(R9)(R13*1), X4 + MOVOU -16(R9)(R13*1), X5 + MOVOA X4, -32(AX)(R13*1) + MOVOA X5, -16(AX)(R13*1) + ADDQ $0x20, R13 + CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ SI, AX + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ BX, AX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K: - ADDL R12, CX - ADDL $0x04, R12 + ADDL R11, CX + ADDL $0x04, R11 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL R12, $0x40 - JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K + CMPL R11, $0x40 + JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K MOVB $0xee, (AX) - MOVW R8, 1(AX) - LEAL -60(R12), R12 + MOVW DI, 1(AX) + LEAL -60(R11), R11 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL R12, $0x0c - JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K - CMPL R8, $0x00000800 - JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K - MOVB $0x01, BL - LEAL -16(BX)(R12*4), R12 - MOVB R8, 1(AX) - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) + MOVL R11, BX + SHLL $0x02, BX + CMPL R11, $0x0c + JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K + CMPL DI, $0x00000800 + JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K + LEAL -15(BX), BX + MOVB DI, 1(AX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, BX + MOVB BL, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K: - MOVB $0x02, BL - LEAL -4(BX)(R12*4), R12 - MOVB R12, (AX) - MOVW R8, 1(AX) + LEAL -2(BX), BX + MOVB BL, (AX) + MOVW DI, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K: CMPL CX, 8(SP) - JGE emit_remainder_encodeSnappyBetterBlockAsm64K + JAE emit_remainder_encodeSnappyBetterBlockAsm64K CMPQ AX, (SP) - JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K + JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K: - MOVQ $0x00cf1bbcdcbfa563, SI - MOVQ $0x9e3779b1, R8 - LEAQ 1(DI), DI - LEAQ -2(CX), R9 - MOVQ (DX)(DI*1), R10 - MOVQ 1(DX)(DI*1), R11 - MOVQ (DX)(R9*1), R12 - MOVQ 1(DX)(R9*1), R13 - SHLQ $0x08, R10 - IMULQ SI, R10 - SHRQ $0x30, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x32, R11 - SHLQ $0x08, R12 - IMULQ SI, R12 - SHRQ $0x30, R12 - SHLQ $0x20, R13 - IMULQ R8, R13 - SHRQ $0x32, R13 - LEAQ 1(DI), R8 - LEAQ 1(R9), R14 - MOVL DI, 24(SP)(R10*4) - MOVL R9, 24(SP)(R12*4) - MOVL R8, 262168(SP)(R11*4) - MOVL R14, 262168(SP)(R13*4) - ADDQ $0x01, DI - SUBQ $0x01, R9 + MOVQ $0x00cf1bbcdcbfa563, BX + MOVQ $0x9e3779b1, DI + LEAQ 1(SI), SI + LEAQ -2(CX), R8 + MOVQ (DX)(SI*1), R9 + MOVQ 1(DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + MOVQ 1(DX)(R8*1), R12 + SHLQ $0x08, R9 + IMULQ BX, R9 + SHRQ $0x30, R9 + SHLQ $0x20, R10 + IMULQ DI, R10 + SHRQ $0x32, R10 + SHLQ $0x08, R11 + IMULQ BX, R11 + SHRQ $0x30, R11 + SHLQ $0x20, R12 + IMULQ DI, R12 + SHRQ $0x32, R12 + LEAQ 1(SI), DI + LEAQ 1(R8), R13 + MOVL SI, 24(SP)(R9*4) + MOVL R8, 24(SP)(R11*4) + MOVL DI, 262168(SP)(R10*4) + MOVL R13, 262168(SP)(R12*4) + ADDQ $0x01, SI + SUBQ $0x01, R8 index_loop_encodeSnappyBetterBlockAsm64K: - CMPQ DI, R9 + CMPQ SI, R8 JAE search_loop_encodeSnappyBetterBlockAsm64K - MOVQ (DX)(DI*1), R8 - MOVQ (DX)(R9*1), R10 - SHLQ $0x08, R8 - IMULQ SI, R8 - SHRQ $0x30, R8 - SHLQ $0x08, R10 - IMULQ SI, R10 - SHRQ $0x30, R10 - MOVL DI, 24(SP)(R8*4) - MOVL R9, 24(SP)(R10*4) - ADDQ $0x02, DI - SUBQ $0x02, R9 + MOVQ (DX)(SI*1), DI + MOVQ (DX)(R8*1), R9 + SHLQ $0x08, DI + IMULQ BX, DI + SHRQ $0x30, DI + SHLQ $0x08, R9 + IMULQ BX, R9 + SHRQ $0x30, R9 + MOVL SI, 24(SP)(DI*4) + MOVL R8, 24(SP)(R9*4) + ADDQ $0x02, SI + SUBQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm64K emit_remainder_encodeSnappyBetterBlockAsm64K: @@ -15348,7 +15388,7 @@ emit_remainder_encodeSnappyBetterBlockAsm64K: SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) - JL emit_remainder_ok_encodeSnappyBetterBlockAsm64K + JB emit_remainder_ok_encodeSnappyBetterBlockAsm64K MOVQ $0x00000000, ret+48(FP) RET @@ -15363,9 +15403,12 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm64K: SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c - JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K + JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K CMPL DX, $0x00000100 - JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K + JB two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K + JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K + +three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX @@ -15376,7 +15419,7 @@ two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K: MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 - JL memmove_emit_remainder_encodeSnappyBetterBlockAsm64K + JB memmove_emit_remainder_encodeSnappyBetterBlockAsm64K JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K: @@ -15523,8 +15566,8 @@ zero_loop_encodeSnappyBetterBlockAsm12B: MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX - LEAQ -8(CX), SI - MOVL SI, 8(SP) + LEAQ -8(CX), BX + MOVL BX, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX @@ -15534,364 +15577,367 @@ zero_loop_encodeSnappyBetterBlockAsm12B: MOVQ src_base+24(FP), DX search_loop_encodeSnappyBetterBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - SHRL $0x06, SI - LEAL 1(CX)(SI*1), SI - CMPL SI, 8(SP) - JGE emit_remainder_encodeSnappyBetterBlockAsm12B - MOVQ (DX)(CX*1), DI - MOVL SI, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R9 - MOVQ $0x9e3779b1, SI - MOVQ DI, R10 - MOVQ DI, R11 - SHLQ $0x10, R10 - IMULQ R9, R10 - SHRQ $0x32, R10 - SHLQ $0x20, R11 - IMULQ SI, R11 - SHRQ $0x34, R11 - MOVL 24(SP)(R10*4), SI - MOVL 65560(SP)(R11*4), R8 - MOVL CX, 24(SP)(R10*4) - MOVL CX, 65560(SP)(R11*4) - MOVQ (DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - CMPQ R10, DI + MOVL CX, BX + SUBL 12(SP), BX + SHRL $0x06, BX + LEAL 1(CX)(BX*1), BX + CMPL BX, 8(SP) + JAE emit_remainder_encodeSnappyBetterBlockAsm12B + MOVQ (DX)(CX*1), SI + MOVL BX, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R8 + MOVQ $0x9e3779b1, BX + MOVQ SI, R9 + MOVQ SI, R10 + SHLQ $0x10, R9 + IMULQ R8, R9 + SHRQ $0x32, R9 + SHLQ $0x20, R10 + IMULQ BX, R10 + SHRQ $0x34, R10 + MOVL 24(SP)(R9*4), BX + MOVL 65560(SP)(R10*4), DI + MOVL CX, 24(SP)(R9*4) + MOVL CX, 65560(SP)(R10*4) + MOVQ (DX)(BX*1), R9 + MOVQ (DX)(DI*1), R10 + CMPQ R9, SI JEQ candidate_match_encodeSnappyBetterBlockAsm12B - CMPQ R11, DI + CMPQ R10, SI JNE no_short_found_encodeSnappyBetterBlockAsm12B - MOVL R8, SI + MOVL DI, BX JMP candidate_match_encodeSnappyBetterBlockAsm12B no_short_found_encodeSnappyBetterBlockAsm12B: - CMPL R10, DI + CMPL R9, SI JEQ candidate_match_encodeSnappyBetterBlockAsm12B - CMPL R11, DI + CMPL R10, SI JEQ candidateS_match_encodeSnappyBetterBlockAsm12B MOVL 20(SP), CX JMP search_loop_encodeSnappyBetterBlockAsm12B candidateS_match_encodeSnappyBetterBlockAsm12B: - SHRQ $0x08, DI - MOVQ DI, R10 - SHLQ $0x10, R10 - IMULQ R9, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R10*4), SI + SHRQ $0x08, SI + MOVQ SI, R9 + SHLQ $0x10, R9 + IMULQ R8, R9 + SHRQ $0x32, R9 + MOVL 24(SP)(R9*4), BX INCL CX - MOVL CX, 24(SP)(R10*4) - CMPL (DX)(SI*1), DI + MOVL CX, 24(SP)(R9*4) + CMPL (DX)(BX*1), SI JEQ candidate_match_encodeSnappyBetterBlockAsm12B DECL CX - MOVL R8, SI + MOVL DI, BX candidate_match_encodeSnappyBetterBlockAsm12B: - MOVL 12(SP), DI - TESTL SI, SI + MOVL 12(SP), SI + TESTL BX, BX JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B match_extend_back_loop_encodeSnappyBetterBlockAsm12B: - CMPL CX, DI - JLE match_extend_back_end_encodeSnappyBetterBlockAsm12B - MOVB -1(DX)(SI*1), BL + CMPL CX, SI + JBE match_extend_back_end_encodeSnappyBetterBlockAsm12B + MOVB -1(DX)(BX*1), DI MOVB -1(DX)(CX*1), R8 - CMPB BL, R8 + CMPB DI, R8 JNE match_extend_back_end_encodeSnappyBetterBlockAsm12B LEAL -1(CX), CX - DECL SI + DECL BX JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B JMP match_extend_back_loop_encodeSnappyBetterBlockAsm12B match_extend_back_end_encodeSnappyBetterBlockAsm12B: - MOVL CX, DI - SUBL 12(SP), DI - LEAQ 3(AX)(DI*1), DI - CMPQ DI, (SP) - JL match_dst_size_check_encodeSnappyBetterBlockAsm12B + MOVL CX, SI + SUBL 12(SP), SI + LEAQ 3(AX)(SI*1), SI + CMPQ SI, (SP) + JB match_dst_size_check_encodeSnappyBetterBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm12B: - MOVL CX, DI + MOVL CX, SI ADDL $0x04, CX - ADDL $0x04, SI - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(SI*1), R10 + ADDL $0x04, BX + MOVQ src_len+32(FP), DI + SUBL CX, DI + LEAQ (DX)(CX*1), R8 + LEAQ (DX)(BX*1), R9 // matchLen - XORL R12, R12 - CMPL R8, $0x08 - JL matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B + XORL R11, R11 + CMPL DI, $0x08 + JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B: - MOVQ (R9)(R12*1), R11 - XORQ (R10)(R12*1), R11 - TESTQ R11, R11 + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + TESTQ R10, R10 JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm12B #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R10, R10 #else - BSFQ R11, R11 + BSFQ R10, R10 #endif - SARQ $0x03, R11 - LEAL (R12)(R11*1), R12 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeSnappyBetterBlockAsm12B matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm12B: - LEAL -8(R8), R8 - LEAL 8(R12), R12 - CMPL R8, $0x08 - JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B + LEAL -8(DI), DI + LEAL 8(R11), R11 + CMPL DI, $0x08 + JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B JZ match_nolit_end_encodeSnappyBetterBlockAsm12B matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL R8, $0x04 - JL matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B - MOVL (R9)(R12*1), R11 - CMPL (R10)(R12*1), R11 + CMPL DI, $0x04 + JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B + MOVL (R8)(R11*1), R10 + CMPL (R9)(R11*1), R10 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B - SUBL $0x04, R8 - LEAL 4(R12), R12 + SUBL $0x04, DI + LEAL 4(R11), R11 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL R8, $0x02 - JL matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B - MOVW (R9)(R12*1), R11 - CMPW (R10)(R12*1), R11 + CMPL DI, $0x02 + JB matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B + MOVW (R8)(R11*1), R10 + CMPW (R9)(R11*1), R10 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B - SUBL $0x02, R8 - LEAL 2(R12), R12 + SUBL $0x02, DI + LEAL 2(R11), R11 matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL R8, $0x01 - JL match_nolit_end_encodeSnappyBetterBlockAsm12B - MOVB (R9)(R12*1), R11 - CMPB (R10)(R12*1), R11 + CMPL DI, $0x01 + JB match_nolit_end_encodeSnappyBetterBlockAsm12B + MOVB (R8)(R11*1), R10 + CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeSnappyBetterBlockAsm12B - LEAL 1(R12), R12 + LEAL 1(R11), R11 match_nolit_end_encodeSnappyBetterBlockAsm12B: - MOVL CX, R8 - SUBL SI, R8 + MOVL CX, DI + SUBL BX, DI // Check if repeat - MOVL R8, 16(SP) - MOVL 12(SP), SI - CMPL SI, DI + MOVL DI, 16(SP) + MOVL 12(SP), BX + CMPL BX, SI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R10 - SUBL SI, R9 - LEAL -1(R9), SI - CMPL SI, $0x3c - JLT one_byte_match_emit_encodeSnappyBetterBlockAsm12B - CMPL SI, $0x00000100 - JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm12B + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R9 + SUBL BX, R8 + LEAL -1(R8), BX + CMPL BX, $0x3c + JB one_byte_match_emit_encodeSnappyBetterBlockAsm12B + CMPL BX, $0x00000100 + JB two_bytes_match_emit_encodeSnappyBetterBlockAsm12B + JB three_bytes_match_emit_encodeSnappyBetterBlockAsm12B + +three_bytes_match_emit_encodeSnappyBetterBlockAsm12B: MOVB $0xf4, (AX) - MOVW SI, 1(AX) + MOVW BX, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B two_bytes_match_emit_encodeSnappyBetterBlockAsm12B: MOVB $0xf0, (AX) - MOVB SI, 1(AX) + MOVB BL, 1(AX) ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_match_emit_encodeSnappyBetterBlockAsm12B + CMPL BX, $0x40 + JB memmove_match_emit_encodeSnappyBetterBlockAsm12B JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B one_byte_match_emit_encodeSnappyBetterBlockAsm12B: - SHLB $0x02, SI - MOVB SI, (AX) + SHLB $0x02, BL + MOVB BL, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveShort - CMPQ R9, $0x08 - JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8 - CMPQ R9, $0x10 + CMPQ R8, $0x08 + JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8 + CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16 - CMPQ R9, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8: - MOVQ (R10), R11 - MOVQ R11, (AX) + MOVQ (R9), R10 + MOVQ R10, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16: - MOVQ (R10), R11 - MOVQ -8(R10)(R9*1), R10 - MOVQ R11, (AX) - MOVQ R10, -8(AX)(R9*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (AX) + MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32: - MOVOU (R10), X0 - MOVOU -16(R10)(R9*1), X1 + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) + MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64: - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B: - MOVQ SI, AX + MOVQ BX, AX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B memmove_long_match_emit_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveLong - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 - MOVQ R9, R13 - SHRQ $0x05, R13 - MOVQ AX, R11 - ANDL $0x0000001f, R11 - MOVQ $0x00000040, R14 - SUBQ R11, R14 - DECQ R13 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R12 + SHRQ $0x05, R12 + MOVQ AX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R13 + SUBQ R10, R13 + DECQ R12 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R10)(R14*1), R11 - LEAQ -32(AX)(R14*1), R15 + LEAQ -32(R9)(R13*1), R10 + LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back: - MOVOU (R11), X4 - MOVOU 16(R11), X5 - MOVOA X4, (R15) - MOVOA X5, 16(R15) - ADDQ $0x20, R15 - ADDQ $0x20, R11 + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) ADDQ $0x20, R14 - DECQ R13 + ADDQ $0x20, R10 + ADDQ $0x20, R13 + DECQ R12 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R10)(R14*1), X4 - MOVOU -16(R10)(R14*1), X5 - MOVOA X4, -32(AX)(R14*1) - MOVOA X5, -16(AX)(R14*1) - ADDQ $0x20, R14 - CMPQ R9, R14 + MOVOU -32(R9)(R13*1), X4 + MOVOU -16(R9)(R13*1), X5 + MOVOA X4, -32(AX)(R13*1) + MOVOA X5, -16(AX)(R13*1) + ADDQ $0x20, R13 + CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ SI, AX + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ BX, AX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B: - ADDL R12, CX - ADDL $0x04, R12 + ADDL R11, CX + ADDL $0x04, R11 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL R12, $0x40 - JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B + CMPL R11, $0x40 + JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B MOVB $0xee, (AX) - MOVW R8, 1(AX) - LEAL -60(R12), R12 + MOVW DI, 1(AX) + LEAL -60(R11), R11 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL R12, $0x0c - JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B - CMPL R8, $0x00000800 - JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B - MOVB $0x01, BL - LEAL -16(BX)(R12*4), R12 - MOVB R8, 1(AX) - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) + MOVL R11, BX + SHLL $0x02, BX + CMPL R11, $0x0c + JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B + CMPL DI, $0x00000800 + JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B + LEAL -15(BX), BX + MOVB DI, 1(AX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, BX + MOVB BL, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B: - MOVB $0x02, BL - LEAL -4(BX)(R12*4), R12 - MOVB R12, (AX) - MOVW R8, 1(AX) + LEAL -2(BX), BX + MOVB BL, (AX) + MOVW DI, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B: CMPL CX, 8(SP) - JGE emit_remainder_encodeSnappyBetterBlockAsm12B + JAE emit_remainder_encodeSnappyBetterBlockAsm12B CMPQ AX, (SP) - JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B + JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B: - MOVQ $0x0000cf1bbcdcbf9b, SI - MOVQ $0x9e3779b1, R8 - LEAQ 1(DI), DI - LEAQ -2(CX), R9 - MOVQ (DX)(DI*1), R10 - MOVQ 1(DX)(DI*1), R11 - MOVQ (DX)(R9*1), R12 - MOVQ 1(DX)(R9*1), R13 - SHLQ $0x10, R10 - IMULQ SI, R10 - SHRQ $0x32, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x34, R11 - SHLQ $0x10, R12 - IMULQ SI, R12 - SHRQ $0x32, R12 - SHLQ $0x20, R13 - IMULQ R8, R13 - SHRQ $0x34, R13 - LEAQ 1(DI), R8 - LEAQ 1(R9), R14 - MOVL DI, 24(SP)(R10*4) - MOVL R9, 24(SP)(R12*4) - MOVL R8, 65560(SP)(R11*4) - MOVL R14, 65560(SP)(R13*4) - ADDQ $0x01, DI - SUBQ $0x01, R9 + MOVQ $0x0000cf1bbcdcbf9b, BX + MOVQ $0x9e3779b1, DI + LEAQ 1(SI), SI + LEAQ -2(CX), R8 + MOVQ (DX)(SI*1), R9 + MOVQ 1(DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + MOVQ 1(DX)(R8*1), R12 + SHLQ $0x10, R9 + IMULQ BX, R9 + SHRQ $0x32, R9 + SHLQ $0x20, R10 + IMULQ DI, R10 + SHRQ $0x34, R10 + SHLQ $0x10, R11 + IMULQ BX, R11 + SHRQ $0x32, R11 + SHLQ $0x20, R12 + IMULQ DI, R12 + SHRQ $0x34, R12 + LEAQ 1(SI), DI + LEAQ 1(R8), R13 + MOVL SI, 24(SP)(R9*4) + MOVL R8, 24(SP)(R11*4) + MOVL DI, 65560(SP)(R10*4) + MOVL R13, 65560(SP)(R12*4) + ADDQ $0x01, SI + SUBQ $0x01, R8 index_loop_encodeSnappyBetterBlockAsm12B: - CMPQ DI, R9 + CMPQ SI, R8 JAE search_loop_encodeSnappyBetterBlockAsm12B - MOVQ (DX)(DI*1), R8 - MOVQ (DX)(R9*1), R10 - SHLQ $0x10, R8 - IMULQ SI, R8 - SHRQ $0x32, R8 - SHLQ $0x10, R10 - IMULQ SI, R10 - SHRQ $0x32, R10 - MOVL DI, 24(SP)(R8*4) - MOVL R9, 24(SP)(R10*4) - ADDQ $0x02, DI - SUBQ $0x02, R9 + MOVQ (DX)(SI*1), DI + MOVQ (DX)(R8*1), R9 + SHLQ $0x10, DI + IMULQ BX, DI + SHRQ $0x32, DI + SHLQ $0x10, R9 + IMULQ BX, R9 + SHRQ $0x32, R9 + MOVL SI, 24(SP)(DI*4) + MOVL R8, 24(SP)(R9*4) + ADDQ $0x02, SI + SUBQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm12B emit_remainder_encodeSnappyBetterBlockAsm12B: @@ -15899,7 +15945,7 @@ emit_remainder_encodeSnappyBetterBlockAsm12B: SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) - JL emit_remainder_ok_encodeSnappyBetterBlockAsm12B + JB emit_remainder_ok_encodeSnappyBetterBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET @@ -15914,9 +15960,12 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm12B: SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c - JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B + JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B CMPL DX, $0x00000100 - JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B + JB two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B + JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B + +three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX @@ -15927,7 +15976,7 @@ two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B: MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 - JL memmove_emit_remainder_encodeSnappyBetterBlockAsm12B + JB memmove_emit_remainder_encodeSnappyBetterBlockAsm12B JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B: @@ -16074,8 +16123,8 @@ zero_loop_encodeSnappyBetterBlockAsm10B: MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX - LEAQ -8(CX), SI - MOVL SI, 8(SP) + LEAQ -8(CX), BX + MOVL BX, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX @@ -16085,364 +16134,367 @@ zero_loop_encodeSnappyBetterBlockAsm10B: MOVQ src_base+24(FP), DX search_loop_encodeSnappyBetterBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - SHRL $0x05, SI - LEAL 1(CX)(SI*1), SI - CMPL SI, 8(SP) - JGE emit_remainder_encodeSnappyBetterBlockAsm10B - MOVQ (DX)(CX*1), DI - MOVL SI, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R9 - MOVQ $0x9e3779b1, SI - MOVQ DI, R10 - MOVQ DI, R11 - SHLQ $0x10, R10 - IMULQ R9, R10 - SHRQ $0x34, R10 - SHLQ $0x20, R11 - IMULQ SI, R11 - SHRQ $0x36, R11 - MOVL 24(SP)(R10*4), SI - MOVL 16408(SP)(R11*4), R8 - MOVL CX, 24(SP)(R10*4) - MOVL CX, 16408(SP)(R11*4) - MOVQ (DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - CMPQ R10, DI + MOVL CX, BX + SUBL 12(SP), BX + SHRL $0x05, BX + LEAL 1(CX)(BX*1), BX + CMPL BX, 8(SP) + JAE emit_remainder_encodeSnappyBetterBlockAsm10B + MOVQ (DX)(CX*1), SI + MOVL BX, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R8 + MOVQ $0x9e3779b1, BX + MOVQ SI, R9 + MOVQ SI, R10 + SHLQ $0x10, R9 + IMULQ R8, R9 + SHRQ $0x34, R9 + SHLQ $0x20, R10 + IMULQ BX, R10 + SHRQ $0x36, R10 + MOVL 24(SP)(R9*4), BX + MOVL 16408(SP)(R10*4), DI + MOVL CX, 24(SP)(R9*4) + MOVL CX, 16408(SP)(R10*4) + MOVQ (DX)(BX*1), R9 + MOVQ (DX)(DI*1), R10 + CMPQ R9, SI JEQ candidate_match_encodeSnappyBetterBlockAsm10B - CMPQ R11, DI + CMPQ R10, SI JNE no_short_found_encodeSnappyBetterBlockAsm10B - MOVL R8, SI + MOVL DI, BX JMP candidate_match_encodeSnappyBetterBlockAsm10B no_short_found_encodeSnappyBetterBlockAsm10B: - CMPL R10, DI + CMPL R9, SI JEQ candidate_match_encodeSnappyBetterBlockAsm10B - CMPL R11, DI + CMPL R10, SI JEQ candidateS_match_encodeSnappyBetterBlockAsm10B MOVL 20(SP), CX JMP search_loop_encodeSnappyBetterBlockAsm10B candidateS_match_encodeSnappyBetterBlockAsm10B: - SHRQ $0x08, DI - MOVQ DI, R10 - SHLQ $0x10, R10 - IMULQ R9, R10 - SHRQ $0x34, R10 - MOVL 24(SP)(R10*4), SI + SHRQ $0x08, SI + MOVQ SI, R9 + SHLQ $0x10, R9 + IMULQ R8, R9 + SHRQ $0x34, R9 + MOVL 24(SP)(R9*4), BX INCL CX - MOVL CX, 24(SP)(R10*4) - CMPL (DX)(SI*1), DI + MOVL CX, 24(SP)(R9*4) + CMPL (DX)(BX*1), SI JEQ candidate_match_encodeSnappyBetterBlockAsm10B DECL CX - MOVL R8, SI + MOVL DI, BX candidate_match_encodeSnappyBetterBlockAsm10B: - MOVL 12(SP), DI - TESTL SI, SI + MOVL 12(SP), SI + TESTL BX, BX JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B match_extend_back_loop_encodeSnappyBetterBlockAsm10B: - CMPL CX, DI - JLE match_extend_back_end_encodeSnappyBetterBlockAsm10B - MOVB -1(DX)(SI*1), BL + CMPL CX, SI + JBE match_extend_back_end_encodeSnappyBetterBlockAsm10B + MOVB -1(DX)(BX*1), DI MOVB -1(DX)(CX*1), R8 - CMPB BL, R8 + CMPB DI, R8 JNE match_extend_back_end_encodeSnappyBetterBlockAsm10B LEAL -1(CX), CX - DECL SI + DECL BX JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B JMP match_extend_back_loop_encodeSnappyBetterBlockAsm10B match_extend_back_end_encodeSnappyBetterBlockAsm10B: - MOVL CX, DI - SUBL 12(SP), DI - LEAQ 3(AX)(DI*1), DI - CMPQ DI, (SP) - JL match_dst_size_check_encodeSnappyBetterBlockAsm10B + MOVL CX, SI + SUBL 12(SP), SI + LEAQ 3(AX)(SI*1), SI + CMPQ SI, (SP) + JB match_dst_size_check_encodeSnappyBetterBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm10B: - MOVL CX, DI + MOVL CX, SI ADDL $0x04, CX - ADDL $0x04, SI - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(SI*1), R10 + ADDL $0x04, BX + MOVQ src_len+32(FP), DI + SUBL CX, DI + LEAQ (DX)(CX*1), R8 + LEAQ (DX)(BX*1), R9 // matchLen - XORL R12, R12 - CMPL R8, $0x08 - JL matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B + XORL R11, R11 + CMPL DI, $0x08 + JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B: - MOVQ (R9)(R12*1), R11 - XORQ (R10)(R12*1), R11 - TESTQ R11, R11 + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + TESTQ R10, R10 JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm10B #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R10, R10 #else - BSFQ R11, R11 + BSFQ R10, R10 #endif - SARQ $0x03, R11 - LEAL (R12)(R11*1), R12 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeSnappyBetterBlockAsm10B matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm10B: - LEAL -8(R8), R8 - LEAL 8(R12), R12 - CMPL R8, $0x08 - JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B + LEAL -8(DI), DI + LEAL 8(R11), R11 + CMPL DI, $0x08 + JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B JZ match_nolit_end_encodeSnappyBetterBlockAsm10B matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL R8, $0x04 - JL matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B - MOVL (R9)(R12*1), R11 - CMPL (R10)(R12*1), R11 + CMPL DI, $0x04 + JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B + MOVL (R8)(R11*1), R10 + CMPL (R9)(R11*1), R10 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B - SUBL $0x04, R8 - LEAL 4(R12), R12 + SUBL $0x04, DI + LEAL 4(R11), R11 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL R8, $0x02 - JL matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B - MOVW (R9)(R12*1), R11 - CMPW (R10)(R12*1), R11 + CMPL DI, $0x02 + JB matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B + MOVW (R8)(R11*1), R10 + CMPW (R9)(R11*1), R10 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B - SUBL $0x02, R8 - LEAL 2(R12), R12 + SUBL $0x02, DI + LEAL 2(R11), R11 matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL R8, $0x01 - JL match_nolit_end_encodeSnappyBetterBlockAsm10B - MOVB (R9)(R12*1), R11 - CMPB (R10)(R12*1), R11 + CMPL DI, $0x01 + JB match_nolit_end_encodeSnappyBetterBlockAsm10B + MOVB (R8)(R11*1), R10 + CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeSnappyBetterBlockAsm10B - LEAL 1(R12), R12 + LEAL 1(R11), R11 match_nolit_end_encodeSnappyBetterBlockAsm10B: - MOVL CX, R8 - SUBL SI, R8 + MOVL CX, DI + SUBL BX, DI // Check if repeat - MOVL R8, 16(SP) - MOVL 12(SP), SI - CMPL SI, DI + MOVL DI, 16(SP) + MOVL 12(SP), BX + CMPL BX, SI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R10 - SUBL SI, R9 - LEAL -1(R9), SI - CMPL SI, $0x3c - JLT one_byte_match_emit_encodeSnappyBetterBlockAsm10B - CMPL SI, $0x00000100 - JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm10B + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R9 + SUBL BX, R8 + LEAL -1(R8), BX + CMPL BX, $0x3c + JB one_byte_match_emit_encodeSnappyBetterBlockAsm10B + CMPL BX, $0x00000100 + JB two_bytes_match_emit_encodeSnappyBetterBlockAsm10B + JB three_bytes_match_emit_encodeSnappyBetterBlockAsm10B + +three_bytes_match_emit_encodeSnappyBetterBlockAsm10B: MOVB $0xf4, (AX) - MOVW SI, 1(AX) + MOVW BX, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B two_bytes_match_emit_encodeSnappyBetterBlockAsm10B: MOVB $0xf0, (AX) - MOVB SI, 1(AX) + MOVB BL, 1(AX) ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_match_emit_encodeSnappyBetterBlockAsm10B + CMPL BX, $0x40 + JB memmove_match_emit_encodeSnappyBetterBlockAsm10B JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B one_byte_match_emit_encodeSnappyBetterBlockAsm10B: - SHLB $0x02, SI - MOVB SI, (AX) + SHLB $0x02, BL + MOVB BL, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveShort - CMPQ R9, $0x08 - JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8 - CMPQ R9, $0x10 + CMPQ R8, $0x08 + JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8 + CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16 - CMPQ R9, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8: - MOVQ (R10), R11 - MOVQ R11, (AX) + MOVQ (R9), R10 + MOVQ R10, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16: - MOVQ (R10), R11 - MOVQ -8(R10)(R9*1), R10 - MOVQ R11, (AX) - MOVQ R10, -8(AX)(R9*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (AX) + MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32: - MOVOU (R10), X0 - MOVOU -16(R10)(R9*1), X1 + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) + MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64: - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B: - MOVQ SI, AX + MOVQ BX, AX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B memmove_long_match_emit_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveLong - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 - MOVQ R9, R13 - SHRQ $0x05, R13 - MOVQ AX, R11 - ANDL $0x0000001f, R11 - MOVQ $0x00000040, R14 - SUBQ R11, R14 - DECQ R13 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R12 + SHRQ $0x05, R12 + MOVQ AX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R13 + SUBQ R10, R13 + DECQ R12 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R10)(R14*1), R11 - LEAQ -32(AX)(R14*1), R15 + LEAQ -32(R9)(R13*1), R10 + LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back: - MOVOU (R11), X4 - MOVOU 16(R11), X5 - MOVOA X4, (R15) - MOVOA X5, 16(R15) - ADDQ $0x20, R15 - ADDQ $0x20, R11 + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) ADDQ $0x20, R14 - DECQ R13 + ADDQ $0x20, R10 + ADDQ $0x20, R13 + DECQ R12 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R10)(R14*1), X4 - MOVOU -16(R10)(R14*1), X5 - MOVOA X4, -32(AX)(R14*1) - MOVOA X5, -16(AX)(R14*1) - ADDQ $0x20, R14 - CMPQ R9, R14 + MOVOU -32(R9)(R13*1), X4 + MOVOU -16(R9)(R13*1), X5 + MOVOA X4, -32(AX)(R13*1) + MOVOA X5, -16(AX)(R13*1) + ADDQ $0x20, R13 + CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ SI, AX + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ BX, AX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B: - ADDL R12, CX - ADDL $0x04, R12 + ADDL R11, CX + ADDL $0x04, R11 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL R12, $0x40 - JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B + CMPL R11, $0x40 + JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B MOVB $0xee, (AX) - MOVW R8, 1(AX) - LEAL -60(R12), R12 + MOVW DI, 1(AX) + LEAL -60(R11), R11 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL R12, $0x0c - JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B - CMPL R8, $0x00000800 - JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B - MOVB $0x01, BL - LEAL -16(BX)(R12*4), R12 - MOVB R8, 1(AX) - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) + MOVL R11, BX + SHLL $0x02, BX + CMPL R11, $0x0c + JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B + CMPL DI, $0x00000800 + JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B + LEAL -15(BX), BX + MOVB DI, 1(AX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, BX + MOVB BL, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B: - MOVB $0x02, BL - LEAL -4(BX)(R12*4), R12 - MOVB R12, (AX) - MOVW R8, 1(AX) + LEAL -2(BX), BX + MOVB BL, (AX) + MOVW DI, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B: CMPL CX, 8(SP) - JGE emit_remainder_encodeSnappyBetterBlockAsm10B + JAE emit_remainder_encodeSnappyBetterBlockAsm10B CMPQ AX, (SP) - JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B + JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B: - MOVQ $0x0000cf1bbcdcbf9b, SI - MOVQ $0x9e3779b1, R8 - LEAQ 1(DI), DI - LEAQ -2(CX), R9 - MOVQ (DX)(DI*1), R10 - MOVQ 1(DX)(DI*1), R11 - MOVQ (DX)(R9*1), R12 - MOVQ 1(DX)(R9*1), R13 - SHLQ $0x10, R10 - IMULQ SI, R10 - SHRQ $0x34, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x36, R11 - SHLQ $0x10, R12 - IMULQ SI, R12 - SHRQ $0x34, R12 - SHLQ $0x20, R13 - IMULQ R8, R13 - SHRQ $0x36, R13 - LEAQ 1(DI), R8 - LEAQ 1(R9), R14 - MOVL DI, 24(SP)(R10*4) - MOVL R9, 24(SP)(R12*4) - MOVL R8, 16408(SP)(R11*4) - MOVL R14, 16408(SP)(R13*4) - ADDQ $0x01, DI - SUBQ $0x01, R9 + MOVQ $0x0000cf1bbcdcbf9b, BX + MOVQ $0x9e3779b1, DI + LEAQ 1(SI), SI + LEAQ -2(CX), R8 + MOVQ (DX)(SI*1), R9 + MOVQ 1(DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + MOVQ 1(DX)(R8*1), R12 + SHLQ $0x10, R9 + IMULQ BX, R9 + SHRQ $0x34, R9 + SHLQ $0x20, R10 + IMULQ DI, R10 + SHRQ $0x36, R10 + SHLQ $0x10, R11 + IMULQ BX, R11 + SHRQ $0x34, R11 + SHLQ $0x20, R12 + IMULQ DI, R12 + SHRQ $0x36, R12 + LEAQ 1(SI), DI + LEAQ 1(R8), R13 + MOVL SI, 24(SP)(R9*4) + MOVL R8, 24(SP)(R11*4) + MOVL DI, 16408(SP)(R10*4) + MOVL R13, 16408(SP)(R12*4) + ADDQ $0x01, SI + SUBQ $0x01, R8 index_loop_encodeSnappyBetterBlockAsm10B: - CMPQ DI, R9 + CMPQ SI, R8 JAE search_loop_encodeSnappyBetterBlockAsm10B - MOVQ (DX)(DI*1), R8 - MOVQ (DX)(R9*1), R10 - SHLQ $0x10, R8 - IMULQ SI, R8 - SHRQ $0x34, R8 - SHLQ $0x10, R10 - IMULQ SI, R10 - SHRQ $0x34, R10 - MOVL DI, 24(SP)(R8*4) - MOVL R9, 24(SP)(R10*4) - ADDQ $0x02, DI - SUBQ $0x02, R9 + MOVQ (DX)(SI*1), DI + MOVQ (DX)(R8*1), R9 + SHLQ $0x10, DI + IMULQ BX, DI + SHRQ $0x34, DI + SHLQ $0x10, R9 + IMULQ BX, R9 + SHRQ $0x34, R9 + MOVL SI, 24(SP)(DI*4) + MOVL R8, 24(SP)(R9*4) + ADDQ $0x02, SI + SUBQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm10B emit_remainder_encodeSnappyBetterBlockAsm10B: @@ -16450,7 +16502,7 @@ emit_remainder_encodeSnappyBetterBlockAsm10B: SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) - JL emit_remainder_ok_encodeSnappyBetterBlockAsm10B + JB emit_remainder_ok_encodeSnappyBetterBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET @@ -16465,9 +16517,12 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm10B: SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c - JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B + JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B CMPL DX, $0x00000100 - JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B + JB two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B + JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B + +three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX @@ -16478,7 +16533,7 @@ two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B: MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 - JL memmove_emit_remainder_encodeSnappyBetterBlockAsm10B + JB memmove_emit_remainder_encodeSnappyBetterBlockAsm10B JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B: @@ -16625,8 +16680,8 @@ zero_loop_encodeSnappyBetterBlockAsm8B: MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX - LEAQ -8(CX), SI - MOVL SI, 8(SP) + LEAQ -8(CX), BX + MOVL BX, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX @@ -16636,362 +16691,365 @@ zero_loop_encodeSnappyBetterBlockAsm8B: MOVQ src_base+24(FP), DX search_loop_encodeSnappyBetterBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - SHRL $0x04, SI - LEAL 1(CX)(SI*1), SI - CMPL SI, 8(SP) - JGE emit_remainder_encodeSnappyBetterBlockAsm8B - MOVQ (DX)(CX*1), DI - MOVL SI, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R9 - MOVQ $0x9e3779b1, SI - MOVQ DI, R10 - MOVQ DI, R11 - SHLQ $0x10, R10 - IMULQ R9, R10 - SHRQ $0x36, R10 - SHLQ $0x20, R11 - IMULQ SI, R11 - SHRQ $0x38, R11 - MOVL 24(SP)(R10*4), SI - MOVL 4120(SP)(R11*4), R8 - MOVL CX, 24(SP)(R10*4) - MOVL CX, 4120(SP)(R11*4) - MOVQ (DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - CMPQ R10, DI + MOVL CX, BX + SUBL 12(SP), BX + SHRL $0x04, BX + LEAL 1(CX)(BX*1), BX + CMPL BX, 8(SP) + JAE emit_remainder_encodeSnappyBetterBlockAsm8B + MOVQ (DX)(CX*1), SI + MOVL BX, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R8 + MOVQ $0x9e3779b1, BX + MOVQ SI, R9 + MOVQ SI, R10 + SHLQ $0x10, R9 + IMULQ R8, R9 + SHRQ $0x36, R9 + SHLQ $0x20, R10 + IMULQ BX, R10 + SHRQ $0x38, R10 + MOVL 24(SP)(R9*4), BX + MOVL 4120(SP)(R10*4), DI + MOVL CX, 24(SP)(R9*4) + MOVL CX, 4120(SP)(R10*4) + MOVQ (DX)(BX*1), R9 + MOVQ (DX)(DI*1), R10 + CMPQ R9, SI JEQ candidate_match_encodeSnappyBetterBlockAsm8B - CMPQ R11, DI + CMPQ R10, SI JNE no_short_found_encodeSnappyBetterBlockAsm8B - MOVL R8, SI + MOVL DI, BX JMP candidate_match_encodeSnappyBetterBlockAsm8B no_short_found_encodeSnappyBetterBlockAsm8B: - CMPL R10, DI + CMPL R9, SI JEQ candidate_match_encodeSnappyBetterBlockAsm8B - CMPL R11, DI + CMPL R10, SI JEQ candidateS_match_encodeSnappyBetterBlockAsm8B MOVL 20(SP), CX JMP search_loop_encodeSnappyBetterBlockAsm8B candidateS_match_encodeSnappyBetterBlockAsm8B: - SHRQ $0x08, DI - MOVQ DI, R10 - SHLQ $0x10, R10 - IMULQ R9, R10 - SHRQ $0x36, R10 - MOVL 24(SP)(R10*4), SI + SHRQ $0x08, SI + MOVQ SI, R9 + SHLQ $0x10, R9 + IMULQ R8, R9 + SHRQ $0x36, R9 + MOVL 24(SP)(R9*4), BX INCL CX - MOVL CX, 24(SP)(R10*4) - CMPL (DX)(SI*1), DI + MOVL CX, 24(SP)(R9*4) + CMPL (DX)(BX*1), SI JEQ candidate_match_encodeSnappyBetterBlockAsm8B DECL CX - MOVL R8, SI + MOVL DI, BX candidate_match_encodeSnappyBetterBlockAsm8B: - MOVL 12(SP), DI - TESTL SI, SI + MOVL 12(SP), SI + TESTL BX, BX JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B match_extend_back_loop_encodeSnappyBetterBlockAsm8B: - CMPL CX, DI - JLE match_extend_back_end_encodeSnappyBetterBlockAsm8B - MOVB -1(DX)(SI*1), BL + CMPL CX, SI + JBE match_extend_back_end_encodeSnappyBetterBlockAsm8B + MOVB -1(DX)(BX*1), DI MOVB -1(DX)(CX*1), R8 - CMPB BL, R8 + CMPB DI, R8 JNE match_extend_back_end_encodeSnappyBetterBlockAsm8B LEAL -1(CX), CX - DECL SI + DECL BX JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B JMP match_extend_back_loop_encodeSnappyBetterBlockAsm8B match_extend_back_end_encodeSnappyBetterBlockAsm8B: - MOVL CX, DI - SUBL 12(SP), DI - LEAQ 3(AX)(DI*1), DI - CMPQ DI, (SP) - JL match_dst_size_check_encodeSnappyBetterBlockAsm8B + MOVL CX, SI + SUBL 12(SP), SI + LEAQ 3(AX)(SI*1), SI + CMPQ SI, (SP) + JB match_dst_size_check_encodeSnappyBetterBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm8B: - MOVL CX, DI + MOVL CX, SI ADDL $0x04, CX - ADDL $0x04, SI - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(SI*1), R10 + ADDL $0x04, BX + MOVQ src_len+32(FP), DI + SUBL CX, DI + LEAQ (DX)(CX*1), R8 + LEAQ (DX)(BX*1), R9 // matchLen - XORL R12, R12 - CMPL R8, $0x08 - JL matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B + XORL R11, R11 + CMPL DI, $0x08 + JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B: - MOVQ (R9)(R12*1), R11 - XORQ (R10)(R12*1), R11 - TESTQ R11, R11 + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + TESTQ R10, R10 JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm8B #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R10, R10 #else - BSFQ R11, R11 + BSFQ R10, R10 #endif - SARQ $0x03, R11 - LEAL (R12)(R11*1), R12 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeSnappyBetterBlockAsm8B matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm8B: - LEAL -8(R8), R8 - LEAL 8(R12), R12 - CMPL R8, $0x08 - JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B + LEAL -8(DI), DI + LEAL 8(R11), R11 + CMPL DI, $0x08 + JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B JZ match_nolit_end_encodeSnappyBetterBlockAsm8B matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL R8, $0x04 - JL matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B - MOVL (R9)(R12*1), R11 - CMPL (R10)(R12*1), R11 + CMPL DI, $0x04 + JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B + MOVL (R8)(R11*1), R10 + CMPL (R9)(R11*1), R10 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B - SUBL $0x04, R8 - LEAL 4(R12), R12 + SUBL $0x04, DI + LEAL 4(R11), R11 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL R8, $0x02 - JL matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B - MOVW (R9)(R12*1), R11 - CMPW (R10)(R12*1), R11 + CMPL DI, $0x02 + JB matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B + MOVW (R8)(R11*1), R10 + CMPW (R9)(R11*1), R10 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B - SUBL $0x02, R8 - LEAL 2(R12), R12 + SUBL $0x02, DI + LEAL 2(R11), R11 matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL R8, $0x01 - JL match_nolit_end_encodeSnappyBetterBlockAsm8B - MOVB (R9)(R12*1), R11 - CMPB (R10)(R12*1), R11 + CMPL DI, $0x01 + JB match_nolit_end_encodeSnappyBetterBlockAsm8B + MOVB (R8)(R11*1), R10 + CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeSnappyBetterBlockAsm8B - LEAL 1(R12), R12 + LEAL 1(R11), R11 match_nolit_end_encodeSnappyBetterBlockAsm8B: - MOVL CX, R8 - SUBL SI, R8 + MOVL CX, DI + SUBL BX, DI // Check if repeat - MOVL R8, 16(SP) - MOVL 12(SP), SI - CMPL SI, DI + MOVL DI, 16(SP) + MOVL 12(SP), BX + CMPL BX, SI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B - MOVL DI, R9 - MOVL DI, 12(SP) - LEAQ (DX)(SI*1), R10 - SUBL SI, R9 - LEAL -1(R9), SI - CMPL SI, $0x3c - JLT one_byte_match_emit_encodeSnappyBetterBlockAsm8B - CMPL SI, $0x00000100 - JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm8B + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R9 + SUBL BX, R8 + LEAL -1(R8), BX + CMPL BX, $0x3c + JB one_byte_match_emit_encodeSnappyBetterBlockAsm8B + CMPL BX, $0x00000100 + JB two_bytes_match_emit_encodeSnappyBetterBlockAsm8B + JB three_bytes_match_emit_encodeSnappyBetterBlockAsm8B + +three_bytes_match_emit_encodeSnappyBetterBlockAsm8B: MOVB $0xf4, (AX) - MOVW SI, 1(AX) + MOVW BX, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B two_bytes_match_emit_encodeSnappyBetterBlockAsm8B: MOVB $0xf0, (AX) - MOVB SI, 1(AX) + MOVB BL, 1(AX) ADDQ $0x02, AX - CMPL SI, $0x40 - JL memmove_match_emit_encodeSnappyBetterBlockAsm8B + CMPL BX, $0x40 + JB memmove_match_emit_encodeSnappyBetterBlockAsm8B JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B one_byte_match_emit_encodeSnappyBetterBlockAsm8B: - SHLB $0x02, SI - MOVB SI, (AX) + SHLB $0x02, BL + MOVB BL, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveShort - CMPQ R9, $0x08 - JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8 - CMPQ R9, $0x10 + CMPQ R8, $0x08 + JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8 + CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16 - CMPQ R9, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8: - MOVQ (R10), R11 - MOVQ R11, (AX) + MOVQ (R9), R10 + MOVQ R10, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16: - MOVQ (R10), R11 - MOVQ -8(R10)(R9*1), R10 - MOVQ R11, (AX) - MOVQ R10, -8(AX)(R9*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (AX) + MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32: - MOVOU (R10), X0 - MOVOU -16(R10)(R9*1), X1 + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) - MOVOU X1, -16(AX)(R9*1) + MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64: - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B: - MOVQ SI, AX + MOVQ BX, AX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B memmove_long_match_emit_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(R9*1), SI + LEAQ (AX)(R8*1), BX // genMemMoveLong - MOVOU (R10), X0 - MOVOU 16(R10), X1 - MOVOU -32(R10)(R9*1), X2 - MOVOU -16(R10)(R9*1), X3 - MOVQ R9, R13 - SHRQ $0x05, R13 - MOVQ AX, R11 - ANDL $0x0000001f, R11 - MOVQ $0x00000040, R14 - SUBQ R11, R14 - DECQ R13 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R12 + SHRQ $0x05, R12 + MOVQ AX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R13 + SUBQ R10, R13 + DECQ R12 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R10)(R14*1), R11 - LEAQ -32(AX)(R14*1), R15 + LEAQ -32(R9)(R13*1), R10 + LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back: - MOVOU (R11), X4 - MOVOU 16(R11), X5 - MOVOA X4, (R15) - MOVOA X5, 16(R15) - ADDQ $0x20, R15 - ADDQ $0x20, R11 + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) ADDQ $0x20, R14 - DECQ R13 + ADDQ $0x20, R10 + ADDQ $0x20, R13 + DECQ R12 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R10)(R14*1), X4 - MOVOU -16(R10)(R14*1), X5 - MOVOA X4, -32(AX)(R14*1) - MOVOA X5, -16(AX)(R14*1) - ADDQ $0x20, R14 - CMPQ R9, R14 + MOVOU -32(R9)(R13*1), X4 + MOVOU -16(R9)(R13*1), X5 + MOVOA X4, -32(AX)(R13*1) + MOVOA X5, -16(AX)(R13*1) + ADDQ $0x20, R13 + CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R9*1) - MOVOU X3, -16(AX)(R9*1) - MOVQ SI, AX + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ BX, AX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B: - ADDL R12, CX - ADDL $0x04, R12 + ADDL R11, CX + ADDL $0x04, R11 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL R12, $0x40 - JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B + CMPL R11, $0x40 + JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B MOVB $0xee, (AX) - MOVW R8, 1(AX) - LEAL -60(R12), R12 + MOVW DI, 1(AX) + LEAL -60(R11), R11 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL R12, $0x0c - JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B - MOVB $0x01, BL - LEAL -16(BX)(R12*4), R12 - MOVB R8, 1(AX) - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, R12 - MOVB R12, (AX) + MOVL R11, BX + SHLL $0x02, BX + CMPL R11, $0x0c + JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B + LEAL -15(BX), BX + MOVB DI, 1(AX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, BX + MOVB BL, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B: - MOVB $0x02, BL - LEAL -4(BX)(R12*4), R12 - MOVB R12, (AX) - MOVW R8, 1(AX) + LEAL -2(BX), BX + MOVB BL, (AX) + MOVW DI, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B: CMPL CX, 8(SP) - JGE emit_remainder_encodeSnappyBetterBlockAsm8B + JAE emit_remainder_encodeSnappyBetterBlockAsm8B CMPQ AX, (SP) - JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B + JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B: - MOVQ $0x0000cf1bbcdcbf9b, SI - MOVQ $0x9e3779b1, R8 - LEAQ 1(DI), DI - LEAQ -2(CX), R9 - MOVQ (DX)(DI*1), R10 - MOVQ 1(DX)(DI*1), R11 - MOVQ (DX)(R9*1), R12 - MOVQ 1(DX)(R9*1), R13 - SHLQ $0x10, R10 - IMULQ SI, R10 - SHRQ $0x36, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x38, R11 - SHLQ $0x10, R12 - IMULQ SI, R12 - SHRQ $0x36, R12 - SHLQ $0x20, R13 - IMULQ R8, R13 - SHRQ $0x38, R13 - LEAQ 1(DI), R8 - LEAQ 1(R9), R14 - MOVL DI, 24(SP)(R10*4) - MOVL R9, 24(SP)(R12*4) - MOVL R8, 4120(SP)(R11*4) - MOVL R14, 4120(SP)(R13*4) - ADDQ $0x01, DI - SUBQ $0x01, R9 + MOVQ $0x0000cf1bbcdcbf9b, BX + MOVQ $0x9e3779b1, DI + LEAQ 1(SI), SI + LEAQ -2(CX), R8 + MOVQ (DX)(SI*1), R9 + MOVQ 1(DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + MOVQ 1(DX)(R8*1), R12 + SHLQ $0x10, R9 + IMULQ BX, R9 + SHRQ $0x36, R9 + SHLQ $0x20, R10 + IMULQ DI, R10 + SHRQ $0x38, R10 + SHLQ $0x10, R11 + IMULQ BX, R11 + SHRQ $0x36, R11 + SHLQ $0x20, R12 + IMULQ DI, R12 + SHRQ $0x38, R12 + LEAQ 1(SI), DI + LEAQ 1(R8), R13 + MOVL SI, 24(SP)(R9*4) + MOVL R8, 24(SP)(R11*4) + MOVL DI, 4120(SP)(R10*4) + MOVL R13, 4120(SP)(R12*4) + ADDQ $0x01, SI + SUBQ $0x01, R8 index_loop_encodeSnappyBetterBlockAsm8B: - CMPQ DI, R9 + CMPQ SI, R8 JAE search_loop_encodeSnappyBetterBlockAsm8B - MOVQ (DX)(DI*1), R8 - MOVQ (DX)(R9*1), R10 - SHLQ $0x10, R8 - IMULQ SI, R8 - SHRQ $0x36, R8 - SHLQ $0x10, R10 - IMULQ SI, R10 - SHRQ $0x36, R10 - MOVL DI, 24(SP)(R8*4) - MOVL R9, 24(SP)(R10*4) - ADDQ $0x02, DI - SUBQ $0x02, R9 + MOVQ (DX)(SI*1), DI + MOVQ (DX)(R8*1), R9 + SHLQ $0x10, DI + IMULQ BX, DI + SHRQ $0x36, DI + SHLQ $0x10, R9 + IMULQ BX, R9 + SHRQ $0x36, R9 + MOVL SI, 24(SP)(DI*4) + MOVL R8, 24(SP)(R9*4) + ADDQ $0x02, SI + SUBQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm8B emit_remainder_encodeSnappyBetterBlockAsm8B: @@ -16999,7 +17057,7 @@ emit_remainder_encodeSnappyBetterBlockAsm8B: SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) - JL emit_remainder_ok_encodeSnappyBetterBlockAsm8B + JB emit_remainder_ok_encodeSnappyBetterBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET @@ -17014,9 +17072,12 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm8B: SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c - JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B + JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B CMPL DX, $0x00000100 - JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B + JB two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B + JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B + +three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX @@ -17027,7 +17088,7 @@ two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B: MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 - JL memmove_emit_remainder_encodeSnappyBetterBlockAsm8B + JB memmove_emit_remainder_encodeSnappyBetterBlockAsm8B JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B: @@ -17151,6 +17212,1017 @@ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B: MOVQ AX, ret+48(FP) RET +// func calcBlockSize(src []byte) int +// Requires: BMI, SSE2 +TEXT ·calcBlockSize(SB), $32792-32 + XORQ AX, AX + MOVQ $0x00000100, CX + LEAQ 24(SP), DX + PXOR X0, X0 + +zero_loop_calcBlockSize: + MOVOU X0, (DX) + MOVOU X0, 16(DX) + MOVOU X0, 32(DX) + MOVOU X0, 48(DX) + MOVOU X0, 64(DX) + MOVOU X0, 80(DX) + MOVOU X0, 96(DX) + MOVOU X0, 112(DX) + ADDQ $0x80, DX + DECQ CX + JNZ zero_loop_calcBlockSize + MOVL $0x00000000, 12(SP) + MOVQ src_len+8(FP), CX + LEAQ -9(CX), DX + LEAQ -8(CX), BX + MOVL BX, 8(SP) + SHRQ $0x05, CX + SUBL CX, DX + LEAQ (AX)(DX*1), DX + MOVQ DX, (SP) + MOVL $0x00000001, CX + MOVL CX, 16(SP) + MOVQ src_base+0(FP), DX + +search_loop_calcBlockSize: + MOVL CX, BX + SUBL 12(SP), BX + SHRL $0x05, BX + LEAL 4(CX)(BX*1), BX + CMPL BX, 8(SP) + JAE emit_remainder_calcBlockSize + MOVQ (DX)(CX*1), SI + MOVL BX, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R8 + MOVQ SI, R9 + MOVQ SI, R10 + SHRQ $0x08, R10 + SHLQ $0x10, R9 + IMULQ R8, R9 + SHRQ $0x33, R9 + SHLQ $0x10, R10 + IMULQ R8, R10 + SHRQ $0x33, R10 + MOVL 24(SP)(R9*4), BX + MOVL 24(SP)(R10*4), DI + MOVL CX, 24(SP)(R9*4) + LEAL 1(CX), R9 + MOVL R9, 24(SP)(R10*4) + MOVQ SI, R9 + SHRQ $0x10, R9 + SHLQ $0x10, R9 + IMULQ R8, R9 + SHRQ $0x33, R9 + MOVL CX, R8 + SUBL 16(SP), R8 + MOVL 1(DX)(R8*1), R10 + MOVQ SI, R8 + SHRQ $0x08, R8 + CMPL R8, R10 + JNE no_repeat_found_calcBlockSize + LEAL 1(CX), SI + MOVL 12(SP), BX + MOVL SI, DI + SUBL 16(SP), DI + JZ repeat_extend_back_end_calcBlockSize + +repeat_extend_back_loop_calcBlockSize: + CMPL SI, BX + JBE repeat_extend_back_end_calcBlockSize + MOVB -1(DX)(DI*1), R8 + MOVB -1(DX)(SI*1), R9 + CMPB R8, R9 + JNE repeat_extend_back_end_calcBlockSize + LEAL -1(SI), SI + DECL DI + JNZ repeat_extend_back_loop_calcBlockSize + +repeat_extend_back_end_calcBlockSize: + MOVL 12(SP), BX + CMPL BX, SI + JEQ emit_literal_done_repeat_emit_calcBlockSize + MOVL SI, DI + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R8 + SUBL BX, DI + LEAL -1(DI), BX + CMPL BX, $0x3c + JB one_byte_repeat_emit_calcBlockSize + CMPL BX, $0x00000100 + JB two_bytes_repeat_emit_calcBlockSize + CMPL BX, $0x00010000 + JB three_bytes_repeat_emit_calcBlockSize + CMPL BX, $0x01000000 + JB four_bytes_repeat_emit_calcBlockSize + ADDQ $0x05, AX + JMP memmove_long_repeat_emit_calcBlockSize + +four_bytes_repeat_emit_calcBlockSize: + ADDQ $0x04, AX + JMP memmove_long_repeat_emit_calcBlockSize + +three_bytes_repeat_emit_calcBlockSize: + ADDQ $0x03, AX + JMP memmove_long_repeat_emit_calcBlockSize + +two_bytes_repeat_emit_calcBlockSize: + ADDQ $0x02, AX + CMPL BX, $0x40 + JB memmove_repeat_emit_calcBlockSize + JMP memmove_long_repeat_emit_calcBlockSize + +one_byte_repeat_emit_calcBlockSize: + ADDQ $0x01, AX + +memmove_repeat_emit_calcBlockSize: + LEAQ (AX)(DI*1), AX + JMP emit_literal_done_repeat_emit_calcBlockSize + +memmove_long_repeat_emit_calcBlockSize: + LEAQ (AX)(DI*1), AX + +emit_literal_done_repeat_emit_calcBlockSize: + ADDL $0x05, CX + MOVL CX, BX + SUBL 16(SP), BX + MOVQ src_len+8(FP), DI + SUBL CX, DI + LEAQ (DX)(CX*1), R8 + LEAQ (DX)(BX*1), BX + + // matchLen + XORL R10, R10 + CMPL DI, $0x08 + JB matchlen_match4_repeat_extend_calcBlockSize + +matchlen_loopback_repeat_extend_calcBlockSize: + MOVQ (R8)(R10*1), R9 + XORQ (BX)(R10*1), R9 + TESTQ R9, R9 + JZ matchlen_loop_repeat_extend_calcBlockSize + +#ifdef GOAMD64_v3 + TZCNTQ R9, R9 + +#else + BSFQ R9, R9 + +#endif + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 + JMP repeat_extend_forward_end_calcBlockSize + +matchlen_loop_repeat_extend_calcBlockSize: + LEAL -8(DI), DI + LEAL 8(R10), R10 + CMPL DI, $0x08 + JAE matchlen_loopback_repeat_extend_calcBlockSize + JZ repeat_extend_forward_end_calcBlockSize + +matchlen_match4_repeat_extend_calcBlockSize: + CMPL DI, $0x04 + JB matchlen_match2_repeat_extend_calcBlockSize + MOVL (R8)(R10*1), R9 + CMPL (BX)(R10*1), R9 + JNE matchlen_match2_repeat_extend_calcBlockSize + SUBL $0x04, DI + LEAL 4(R10), R10 + +matchlen_match2_repeat_extend_calcBlockSize: + CMPL DI, $0x02 + JB matchlen_match1_repeat_extend_calcBlockSize + MOVW (R8)(R10*1), R9 + CMPW (BX)(R10*1), R9 + JNE matchlen_match1_repeat_extend_calcBlockSize + SUBL $0x02, DI + LEAL 2(R10), R10 + +matchlen_match1_repeat_extend_calcBlockSize: + CMPL DI, $0x01 + JB repeat_extend_forward_end_calcBlockSize + MOVB (R8)(R10*1), R9 + CMPB (BX)(R10*1), R9 + JNE repeat_extend_forward_end_calcBlockSize + LEAL 1(R10), R10 + +repeat_extend_forward_end_calcBlockSize: + ADDL R10, CX + MOVL CX, BX + SUBL SI, BX + MOVL 16(SP), SI + + // emitCopy + CMPL SI, $0x00010000 + JB two_byte_offset_repeat_as_copy_calcBlockSize + +four_bytes_loop_back_repeat_as_copy_calcBlockSize: + CMPL BX, $0x40 + JBE four_bytes_remain_repeat_as_copy_calcBlockSize + LEAL -64(BX), BX + ADDQ $0x05, AX + CMPL BX, $0x04 + JB four_bytes_remain_repeat_as_copy_calcBlockSize + JMP four_bytes_loop_back_repeat_as_copy_calcBlockSize + +four_bytes_remain_repeat_as_copy_calcBlockSize: + TESTL BX, BX + JZ repeat_end_emit_calcBlockSize + XORL BX, BX + ADDQ $0x05, AX + JMP repeat_end_emit_calcBlockSize + +two_byte_offset_repeat_as_copy_calcBlockSize: + CMPL BX, $0x40 + JBE two_byte_offset_short_repeat_as_copy_calcBlockSize + LEAL -60(BX), BX + ADDQ $0x03, AX + JMP two_byte_offset_repeat_as_copy_calcBlockSize + +two_byte_offset_short_repeat_as_copy_calcBlockSize: + MOVL BX, DI + SHLL $0x02, DI + CMPL BX, $0x0c + JAE emit_copy_three_repeat_as_copy_calcBlockSize + CMPL SI, $0x00000800 + JAE emit_copy_three_repeat_as_copy_calcBlockSize + ADDQ $0x02, AX + JMP repeat_end_emit_calcBlockSize + +emit_copy_three_repeat_as_copy_calcBlockSize: + ADDQ $0x03, AX + +repeat_end_emit_calcBlockSize: + MOVL CX, 12(SP) + JMP search_loop_calcBlockSize + +no_repeat_found_calcBlockSize: + CMPL (DX)(BX*1), SI + JEQ candidate_match_calcBlockSize + SHRQ $0x08, SI + MOVL 24(SP)(R9*4), BX + LEAL 2(CX), R8 + CMPL (DX)(DI*1), SI + JEQ candidate2_match_calcBlockSize + MOVL R8, 24(SP)(R9*4) + SHRQ $0x08, SI + CMPL (DX)(BX*1), SI + JEQ candidate3_match_calcBlockSize + MOVL 20(SP), CX + JMP search_loop_calcBlockSize + +candidate3_match_calcBlockSize: + ADDL $0x02, CX + JMP candidate_match_calcBlockSize + +candidate2_match_calcBlockSize: + MOVL R8, 24(SP)(R9*4) + INCL CX + MOVL DI, BX + +candidate_match_calcBlockSize: + MOVL 12(SP), SI + TESTL BX, BX + JZ match_extend_back_end_calcBlockSize + +match_extend_back_loop_calcBlockSize: + CMPL CX, SI + JBE match_extend_back_end_calcBlockSize + MOVB -1(DX)(BX*1), DI + MOVB -1(DX)(CX*1), R8 + CMPB DI, R8 + JNE match_extend_back_end_calcBlockSize + LEAL -1(CX), CX + DECL BX + JZ match_extend_back_end_calcBlockSize + JMP match_extend_back_loop_calcBlockSize + +match_extend_back_end_calcBlockSize: + MOVL CX, SI + SUBL 12(SP), SI + LEAQ 5(AX)(SI*1), SI + CMPQ SI, (SP) + JB match_dst_size_check_calcBlockSize + MOVQ $0x00000000, ret+24(FP) + RET + +match_dst_size_check_calcBlockSize: + MOVL CX, SI + MOVL 12(SP), DI + CMPL DI, SI + JEQ emit_literal_done_match_emit_calcBlockSize + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(DI*1), SI + SUBL DI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c + JB one_byte_match_emit_calcBlockSize + CMPL SI, $0x00000100 + JB two_bytes_match_emit_calcBlockSize + CMPL SI, $0x00010000 + JB three_bytes_match_emit_calcBlockSize + CMPL SI, $0x01000000 + JB four_bytes_match_emit_calcBlockSize + ADDQ $0x05, AX + JMP memmove_long_match_emit_calcBlockSize + +four_bytes_match_emit_calcBlockSize: + ADDQ $0x04, AX + JMP memmove_long_match_emit_calcBlockSize + +three_bytes_match_emit_calcBlockSize: + ADDQ $0x03, AX + JMP memmove_long_match_emit_calcBlockSize + +two_bytes_match_emit_calcBlockSize: + ADDQ $0x02, AX + CMPL SI, $0x40 + JB memmove_match_emit_calcBlockSize + JMP memmove_long_match_emit_calcBlockSize + +one_byte_match_emit_calcBlockSize: + ADDQ $0x01, AX + +memmove_match_emit_calcBlockSize: + LEAQ (AX)(R8*1), AX + JMP emit_literal_done_match_emit_calcBlockSize + +memmove_long_match_emit_calcBlockSize: + LEAQ (AX)(R8*1), AX + +emit_literal_done_match_emit_calcBlockSize: +match_nolit_loop_calcBlockSize: + MOVL CX, SI + SUBL BX, SI + MOVL SI, 16(SP) + ADDL $0x04, CX + ADDL $0x04, BX + MOVQ src_len+8(FP), SI + SUBL CX, SI + LEAQ (DX)(CX*1), DI + LEAQ (DX)(BX*1), BX + + // matchLen + XORL R9, R9 + CMPL SI, $0x08 + JB matchlen_match4_match_nolit_calcBlockSize + +matchlen_loopback_match_nolit_calcBlockSize: + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + TESTQ R8, R8 + JZ matchlen_loop_match_nolit_calcBlockSize + +#ifdef GOAMD64_v3 + TZCNTQ R8, R8 + +#else + BSFQ R8, R8 + +#endif + SARQ $0x03, R8 + LEAL (R9)(R8*1), R9 + JMP match_nolit_end_calcBlockSize + +matchlen_loop_match_nolit_calcBlockSize: + LEAL -8(SI), SI + LEAL 8(R9), R9 + CMPL SI, $0x08 + JAE matchlen_loopback_match_nolit_calcBlockSize + JZ match_nolit_end_calcBlockSize + +matchlen_match4_match_nolit_calcBlockSize: + CMPL SI, $0x04 + JB matchlen_match2_match_nolit_calcBlockSize + MOVL (DI)(R9*1), R8 + CMPL (BX)(R9*1), R8 + JNE matchlen_match2_match_nolit_calcBlockSize + SUBL $0x04, SI + LEAL 4(R9), R9 + +matchlen_match2_match_nolit_calcBlockSize: + CMPL SI, $0x02 + JB matchlen_match1_match_nolit_calcBlockSize + MOVW (DI)(R9*1), R8 + CMPW (BX)(R9*1), R8 + JNE matchlen_match1_match_nolit_calcBlockSize + SUBL $0x02, SI + LEAL 2(R9), R9 + +matchlen_match1_match_nolit_calcBlockSize: + CMPL SI, $0x01 + JB match_nolit_end_calcBlockSize + MOVB (DI)(R9*1), R8 + CMPB (BX)(R9*1), R8 + JNE match_nolit_end_calcBlockSize + LEAL 1(R9), R9 + +match_nolit_end_calcBlockSize: + ADDL R9, CX + MOVL 16(SP), BX + ADDL $0x04, R9 + MOVL CX, 12(SP) + + // emitCopy + CMPL BX, $0x00010000 + JB two_byte_offset_match_nolit_calcBlockSize + +four_bytes_loop_back_match_nolit_calcBlockSize: + CMPL R9, $0x40 + JBE four_bytes_remain_match_nolit_calcBlockSize + LEAL -64(R9), R9 + ADDQ $0x05, AX + CMPL R9, $0x04 + JB four_bytes_remain_match_nolit_calcBlockSize + JMP four_bytes_loop_back_match_nolit_calcBlockSize + +four_bytes_remain_match_nolit_calcBlockSize: + TESTL R9, R9 + JZ match_nolit_emitcopy_end_calcBlockSize + XORL BX, BX + ADDQ $0x05, AX + JMP match_nolit_emitcopy_end_calcBlockSize + +two_byte_offset_match_nolit_calcBlockSize: + CMPL R9, $0x40 + JBE two_byte_offset_short_match_nolit_calcBlockSize + LEAL -60(R9), R9 + ADDQ $0x03, AX + JMP two_byte_offset_match_nolit_calcBlockSize + +two_byte_offset_short_match_nolit_calcBlockSize: + MOVL R9, SI + SHLL $0x02, SI + CMPL R9, $0x0c + JAE emit_copy_three_match_nolit_calcBlockSize + CMPL BX, $0x00000800 + JAE emit_copy_three_match_nolit_calcBlockSize + ADDQ $0x02, AX + JMP match_nolit_emitcopy_end_calcBlockSize + +emit_copy_three_match_nolit_calcBlockSize: + ADDQ $0x03, AX + +match_nolit_emitcopy_end_calcBlockSize: + CMPL CX, 8(SP) + JAE emit_remainder_calcBlockSize + MOVQ -2(DX)(CX*1), SI + CMPQ AX, (SP) + JB match_nolit_dst_ok_calcBlockSize + MOVQ $0x00000000, ret+24(FP) + RET + +match_nolit_dst_ok_calcBlockSize: + MOVQ $0x0000cf1bbcdcbf9b, R8 + MOVQ SI, DI + SHRQ $0x10, SI + MOVQ SI, BX + SHLQ $0x10, DI + IMULQ R8, DI + SHRQ $0x33, DI + SHLQ $0x10, BX + IMULQ R8, BX + SHRQ $0x33, BX + LEAL -2(CX), R8 + LEAQ 24(SP)(BX*4), R9 + MOVL (R9), BX + MOVL R8, 24(SP)(DI*4) + MOVL CX, (R9) + CMPL (DX)(BX*1), SI + JEQ match_nolit_loop_calcBlockSize + INCL CX + JMP search_loop_calcBlockSize + +emit_remainder_calcBlockSize: + MOVQ src_len+8(FP), CX + SUBL 12(SP), CX + LEAQ 5(AX)(CX*1), CX + CMPQ CX, (SP) + JB emit_remainder_ok_calcBlockSize + MOVQ $0x00000000, ret+24(FP) + RET + +emit_remainder_ok_calcBlockSize: + MOVQ src_len+8(FP), CX + MOVL 12(SP), BX + CMPL BX, CX + JEQ emit_literal_done_emit_remainder_calcBlockSize + MOVL CX, SI + MOVL CX, 12(SP) + LEAQ (DX)(BX*1), CX + SUBL BX, SI + LEAL -1(SI), CX + CMPL CX, $0x3c + JB one_byte_emit_remainder_calcBlockSize + CMPL CX, $0x00000100 + JB two_bytes_emit_remainder_calcBlockSize + CMPL CX, $0x00010000 + JB three_bytes_emit_remainder_calcBlockSize + CMPL CX, $0x01000000 + JB four_bytes_emit_remainder_calcBlockSize + ADDQ $0x05, AX + JMP memmove_long_emit_remainder_calcBlockSize + +four_bytes_emit_remainder_calcBlockSize: + ADDQ $0x04, AX + JMP memmove_long_emit_remainder_calcBlockSize + +three_bytes_emit_remainder_calcBlockSize: + ADDQ $0x03, AX + JMP memmove_long_emit_remainder_calcBlockSize + +two_bytes_emit_remainder_calcBlockSize: + ADDQ $0x02, AX + CMPL CX, $0x40 + JB memmove_emit_remainder_calcBlockSize + JMP memmove_long_emit_remainder_calcBlockSize + +one_byte_emit_remainder_calcBlockSize: + ADDQ $0x01, AX + +memmove_emit_remainder_calcBlockSize: + LEAQ (AX)(SI*1), AX + JMP emit_literal_done_emit_remainder_calcBlockSize + +memmove_long_emit_remainder_calcBlockSize: + LEAQ (AX)(SI*1), AX + +emit_literal_done_emit_remainder_calcBlockSize: + MOVQ AX, ret+24(FP) + RET + +// func calcBlockSizeSmall(src []byte) int +// Requires: BMI, SSE2 +TEXT ·calcBlockSizeSmall(SB), $2072-32 + XORQ AX, AX + MOVQ $0x00000010, CX + LEAQ 24(SP), DX + PXOR X0, X0 + +zero_loop_calcBlockSizeSmall: + MOVOU X0, (DX) + MOVOU X0, 16(DX) + MOVOU X0, 32(DX) + MOVOU X0, 48(DX) + MOVOU X0, 64(DX) + MOVOU X0, 80(DX) + MOVOU X0, 96(DX) + MOVOU X0, 112(DX) + ADDQ $0x80, DX + DECQ CX + JNZ zero_loop_calcBlockSizeSmall + MOVL $0x00000000, 12(SP) + MOVQ src_len+8(FP), CX + LEAQ -9(CX), DX + LEAQ -8(CX), BX + MOVL BX, 8(SP) + SHRQ $0x05, CX + SUBL CX, DX + LEAQ (AX)(DX*1), DX + MOVQ DX, (SP) + MOVL $0x00000001, CX + MOVL CX, 16(SP) + MOVQ src_base+0(FP), DX + +search_loop_calcBlockSizeSmall: + MOVL CX, BX + SUBL 12(SP), BX + SHRL $0x04, BX + LEAL 4(CX)(BX*1), BX + CMPL BX, 8(SP) + JAE emit_remainder_calcBlockSizeSmall + MOVQ (DX)(CX*1), SI + MOVL BX, 20(SP) + MOVQ $0x9e3779b1, R8 + MOVQ SI, R9 + MOVQ SI, R10 + SHRQ $0x08, R10 + SHLQ $0x20, R9 + IMULQ R8, R9 + SHRQ $0x37, R9 + SHLQ $0x20, R10 + IMULQ R8, R10 + SHRQ $0x37, R10 + MOVL 24(SP)(R9*4), BX + MOVL 24(SP)(R10*4), DI + MOVL CX, 24(SP)(R9*4) + LEAL 1(CX), R9 + MOVL R9, 24(SP)(R10*4) + MOVQ SI, R9 + SHRQ $0x10, R9 + SHLQ $0x20, R9 + IMULQ R8, R9 + SHRQ $0x37, R9 + MOVL CX, R8 + SUBL 16(SP), R8 + MOVL 1(DX)(R8*1), R10 + MOVQ SI, R8 + SHRQ $0x08, R8 + CMPL R8, R10 + JNE no_repeat_found_calcBlockSizeSmall + LEAL 1(CX), SI + MOVL 12(SP), BX + MOVL SI, DI + SUBL 16(SP), DI + JZ repeat_extend_back_end_calcBlockSizeSmall + +repeat_extend_back_loop_calcBlockSizeSmall: + CMPL SI, BX + JBE repeat_extend_back_end_calcBlockSizeSmall + MOVB -1(DX)(DI*1), R8 + MOVB -1(DX)(SI*1), R9 + CMPB R8, R9 + JNE repeat_extend_back_end_calcBlockSizeSmall + LEAL -1(SI), SI + DECL DI + JNZ repeat_extend_back_loop_calcBlockSizeSmall + +repeat_extend_back_end_calcBlockSizeSmall: + MOVL 12(SP), BX + CMPL BX, SI + JEQ emit_literal_done_repeat_emit_calcBlockSizeSmall + MOVL SI, DI + MOVL SI, 12(SP) + LEAQ (DX)(BX*1), R8 + SUBL BX, DI + LEAL -1(DI), BX + CMPL BX, $0x3c + JB one_byte_repeat_emit_calcBlockSizeSmall + CMPL BX, $0x00000100 + JB two_bytes_repeat_emit_calcBlockSizeSmall + JB three_bytes_repeat_emit_calcBlockSizeSmall + +three_bytes_repeat_emit_calcBlockSizeSmall: + ADDQ $0x03, AX + JMP memmove_long_repeat_emit_calcBlockSizeSmall + +two_bytes_repeat_emit_calcBlockSizeSmall: + ADDQ $0x02, AX + CMPL BX, $0x40 + JB memmove_repeat_emit_calcBlockSizeSmall + JMP memmove_long_repeat_emit_calcBlockSizeSmall + +one_byte_repeat_emit_calcBlockSizeSmall: + ADDQ $0x01, AX + +memmove_repeat_emit_calcBlockSizeSmall: + LEAQ (AX)(DI*1), AX + JMP emit_literal_done_repeat_emit_calcBlockSizeSmall + +memmove_long_repeat_emit_calcBlockSizeSmall: + LEAQ (AX)(DI*1), AX + +emit_literal_done_repeat_emit_calcBlockSizeSmall: + ADDL $0x05, CX + MOVL CX, BX + SUBL 16(SP), BX + MOVQ src_len+8(FP), DI + SUBL CX, DI + LEAQ (DX)(CX*1), R8 + LEAQ (DX)(BX*1), BX + + // matchLen + XORL R10, R10 + CMPL DI, $0x08 + JB matchlen_match4_repeat_extend_calcBlockSizeSmall + +matchlen_loopback_repeat_extend_calcBlockSizeSmall: + MOVQ (R8)(R10*1), R9 + XORQ (BX)(R10*1), R9 + TESTQ R9, R9 + JZ matchlen_loop_repeat_extend_calcBlockSizeSmall + +#ifdef GOAMD64_v3 + TZCNTQ R9, R9 + +#else + BSFQ R9, R9 + +#endif + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 + JMP repeat_extend_forward_end_calcBlockSizeSmall + +matchlen_loop_repeat_extend_calcBlockSizeSmall: + LEAL -8(DI), DI + LEAL 8(R10), R10 + CMPL DI, $0x08 + JAE matchlen_loopback_repeat_extend_calcBlockSizeSmall + JZ repeat_extend_forward_end_calcBlockSizeSmall + +matchlen_match4_repeat_extend_calcBlockSizeSmall: + CMPL DI, $0x04 + JB matchlen_match2_repeat_extend_calcBlockSizeSmall + MOVL (R8)(R10*1), R9 + CMPL (BX)(R10*1), R9 + JNE matchlen_match2_repeat_extend_calcBlockSizeSmall + SUBL $0x04, DI + LEAL 4(R10), R10 + +matchlen_match2_repeat_extend_calcBlockSizeSmall: + CMPL DI, $0x02 + JB matchlen_match1_repeat_extend_calcBlockSizeSmall + MOVW (R8)(R10*1), R9 + CMPW (BX)(R10*1), R9 + JNE matchlen_match1_repeat_extend_calcBlockSizeSmall + SUBL $0x02, DI + LEAL 2(R10), R10 + +matchlen_match1_repeat_extend_calcBlockSizeSmall: + CMPL DI, $0x01 + JB repeat_extend_forward_end_calcBlockSizeSmall + MOVB (R8)(R10*1), R9 + CMPB (BX)(R10*1), R9 + JNE repeat_extend_forward_end_calcBlockSizeSmall + LEAL 1(R10), R10 + +repeat_extend_forward_end_calcBlockSizeSmall: + ADDL R10, CX + MOVL CX, BX + SUBL SI, BX + MOVL 16(SP), SI + + // emitCopy +two_byte_offset_repeat_as_copy_calcBlockSizeSmall: + CMPL BX, $0x40 + JBE two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall + LEAL -60(BX), BX + ADDQ $0x03, AX + JMP two_byte_offset_repeat_as_copy_calcBlockSizeSmall + +two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall: + MOVL BX, SI + SHLL $0x02, SI + CMPL BX, $0x0c + JAE emit_copy_three_repeat_as_copy_calcBlockSizeSmall + ADDQ $0x02, AX + JMP repeat_end_emit_calcBlockSizeSmall + +emit_copy_three_repeat_as_copy_calcBlockSizeSmall: + ADDQ $0x03, AX + +repeat_end_emit_calcBlockSizeSmall: + MOVL CX, 12(SP) + JMP search_loop_calcBlockSizeSmall + +no_repeat_found_calcBlockSizeSmall: + CMPL (DX)(BX*1), SI + JEQ candidate_match_calcBlockSizeSmall + SHRQ $0x08, SI + MOVL 24(SP)(R9*4), BX + LEAL 2(CX), R8 + CMPL (DX)(DI*1), SI + JEQ candidate2_match_calcBlockSizeSmall + MOVL R8, 24(SP)(R9*4) + SHRQ $0x08, SI + CMPL (DX)(BX*1), SI + JEQ candidate3_match_calcBlockSizeSmall + MOVL 20(SP), CX + JMP search_loop_calcBlockSizeSmall + +candidate3_match_calcBlockSizeSmall: + ADDL $0x02, CX + JMP candidate_match_calcBlockSizeSmall + +candidate2_match_calcBlockSizeSmall: + MOVL R8, 24(SP)(R9*4) + INCL CX + MOVL DI, BX + +candidate_match_calcBlockSizeSmall: + MOVL 12(SP), SI + TESTL BX, BX + JZ match_extend_back_end_calcBlockSizeSmall + +match_extend_back_loop_calcBlockSizeSmall: + CMPL CX, SI + JBE match_extend_back_end_calcBlockSizeSmall + MOVB -1(DX)(BX*1), DI + MOVB -1(DX)(CX*1), R8 + CMPB DI, R8 + JNE match_extend_back_end_calcBlockSizeSmall + LEAL -1(CX), CX + DECL BX + JZ match_extend_back_end_calcBlockSizeSmall + JMP match_extend_back_loop_calcBlockSizeSmall + +match_extend_back_end_calcBlockSizeSmall: + MOVL CX, SI + SUBL 12(SP), SI + LEAQ 3(AX)(SI*1), SI + CMPQ SI, (SP) + JB match_dst_size_check_calcBlockSizeSmall + MOVQ $0x00000000, ret+24(FP) + RET + +match_dst_size_check_calcBlockSizeSmall: + MOVL CX, SI + MOVL 12(SP), DI + CMPL DI, SI + JEQ emit_literal_done_match_emit_calcBlockSizeSmall + MOVL SI, R8 + MOVL SI, 12(SP) + LEAQ (DX)(DI*1), SI + SUBL DI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c + JB one_byte_match_emit_calcBlockSizeSmall + CMPL SI, $0x00000100 + JB two_bytes_match_emit_calcBlockSizeSmall + JB three_bytes_match_emit_calcBlockSizeSmall + +three_bytes_match_emit_calcBlockSizeSmall: + ADDQ $0x03, AX + JMP memmove_long_match_emit_calcBlockSizeSmall + +two_bytes_match_emit_calcBlockSizeSmall: + ADDQ $0x02, AX + CMPL SI, $0x40 + JB memmove_match_emit_calcBlockSizeSmall + JMP memmove_long_match_emit_calcBlockSizeSmall + +one_byte_match_emit_calcBlockSizeSmall: + ADDQ $0x01, AX + +memmove_match_emit_calcBlockSizeSmall: + LEAQ (AX)(R8*1), AX + JMP emit_literal_done_match_emit_calcBlockSizeSmall + +memmove_long_match_emit_calcBlockSizeSmall: + LEAQ (AX)(R8*1), AX + +emit_literal_done_match_emit_calcBlockSizeSmall: +match_nolit_loop_calcBlockSizeSmall: + MOVL CX, SI + SUBL BX, SI + MOVL SI, 16(SP) + ADDL $0x04, CX + ADDL $0x04, BX + MOVQ src_len+8(FP), SI + SUBL CX, SI + LEAQ (DX)(CX*1), DI + LEAQ (DX)(BX*1), BX + + // matchLen + XORL R9, R9 + CMPL SI, $0x08 + JB matchlen_match4_match_nolit_calcBlockSizeSmall + +matchlen_loopback_match_nolit_calcBlockSizeSmall: + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + TESTQ R8, R8 + JZ matchlen_loop_match_nolit_calcBlockSizeSmall + +#ifdef GOAMD64_v3 + TZCNTQ R8, R8 + +#else + BSFQ R8, R8 + +#endif + SARQ $0x03, R8 + LEAL (R9)(R8*1), R9 + JMP match_nolit_end_calcBlockSizeSmall + +matchlen_loop_match_nolit_calcBlockSizeSmall: + LEAL -8(SI), SI + LEAL 8(R9), R9 + CMPL SI, $0x08 + JAE matchlen_loopback_match_nolit_calcBlockSizeSmall + JZ match_nolit_end_calcBlockSizeSmall + +matchlen_match4_match_nolit_calcBlockSizeSmall: + CMPL SI, $0x04 + JB matchlen_match2_match_nolit_calcBlockSizeSmall + MOVL (DI)(R9*1), R8 + CMPL (BX)(R9*1), R8 + JNE matchlen_match2_match_nolit_calcBlockSizeSmall + SUBL $0x04, SI + LEAL 4(R9), R9 + +matchlen_match2_match_nolit_calcBlockSizeSmall: + CMPL SI, $0x02 + JB matchlen_match1_match_nolit_calcBlockSizeSmall + MOVW (DI)(R9*1), R8 + CMPW (BX)(R9*1), R8 + JNE matchlen_match1_match_nolit_calcBlockSizeSmall + SUBL $0x02, SI + LEAL 2(R9), R9 + +matchlen_match1_match_nolit_calcBlockSizeSmall: + CMPL SI, $0x01 + JB match_nolit_end_calcBlockSizeSmall + MOVB (DI)(R9*1), R8 + CMPB (BX)(R9*1), R8 + JNE match_nolit_end_calcBlockSizeSmall + LEAL 1(R9), R9 + +match_nolit_end_calcBlockSizeSmall: + ADDL R9, CX + MOVL 16(SP), BX + ADDL $0x04, R9 + MOVL CX, 12(SP) + + // emitCopy +two_byte_offset_match_nolit_calcBlockSizeSmall: + CMPL R9, $0x40 + JBE two_byte_offset_short_match_nolit_calcBlockSizeSmall + LEAL -60(R9), R9 + ADDQ $0x03, AX + JMP two_byte_offset_match_nolit_calcBlockSizeSmall + +two_byte_offset_short_match_nolit_calcBlockSizeSmall: + MOVL R9, BX + SHLL $0x02, BX + CMPL R9, $0x0c + JAE emit_copy_three_match_nolit_calcBlockSizeSmall + ADDQ $0x02, AX + JMP match_nolit_emitcopy_end_calcBlockSizeSmall + +emit_copy_three_match_nolit_calcBlockSizeSmall: + ADDQ $0x03, AX + +match_nolit_emitcopy_end_calcBlockSizeSmall: + CMPL CX, 8(SP) + JAE emit_remainder_calcBlockSizeSmall + MOVQ -2(DX)(CX*1), SI + CMPQ AX, (SP) + JB match_nolit_dst_ok_calcBlockSizeSmall + MOVQ $0x00000000, ret+24(FP) + RET + +match_nolit_dst_ok_calcBlockSizeSmall: + MOVQ $0x9e3779b1, R8 + MOVQ SI, DI + SHRQ $0x10, SI + MOVQ SI, BX + SHLQ $0x20, DI + IMULQ R8, DI + SHRQ $0x37, DI + SHLQ $0x20, BX + IMULQ R8, BX + SHRQ $0x37, BX + LEAL -2(CX), R8 + LEAQ 24(SP)(BX*4), R9 + MOVL (R9), BX + MOVL R8, 24(SP)(DI*4) + MOVL CX, (R9) + CMPL (DX)(BX*1), SI + JEQ match_nolit_loop_calcBlockSizeSmall + INCL CX + JMP search_loop_calcBlockSizeSmall + +emit_remainder_calcBlockSizeSmall: + MOVQ src_len+8(FP), CX + SUBL 12(SP), CX + LEAQ 3(AX)(CX*1), CX + CMPQ CX, (SP) + JB emit_remainder_ok_calcBlockSizeSmall + MOVQ $0x00000000, ret+24(FP) + RET + +emit_remainder_ok_calcBlockSizeSmall: + MOVQ src_len+8(FP), CX + MOVL 12(SP), BX + CMPL BX, CX + JEQ emit_literal_done_emit_remainder_calcBlockSizeSmall + MOVL CX, SI + MOVL CX, 12(SP) + LEAQ (DX)(BX*1), CX + SUBL BX, SI + LEAL -1(SI), CX + CMPL CX, $0x3c + JB one_byte_emit_remainder_calcBlockSizeSmall + CMPL CX, $0x00000100 + JB two_bytes_emit_remainder_calcBlockSizeSmall + JB three_bytes_emit_remainder_calcBlockSizeSmall + +three_bytes_emit_remainder_calcBlockSizeSmall: + ADDQ $0x03, AX + JMP memmove_long_emit_remainder_calcBlockSizeSmall + +two_bytes_emit_remainder_calcBlockSizeSmall: + ADDQ $0x02, AX + CMPL CX, $0x40 + JB memmove_emit_remainder_calcBlockSizeSmall + JMP memmove_long_emit_remainder_calcBlockSizeSmall + +one_byte_emit_remainder_calcBlockSizeSmall: + ADDQ $0x01, AX + +memmove_emit_remainder_calcBlockSizeSmall: + LEAQ (AX)(SI*1), AX + JMP emit_literal_done_emit_remainder_calcBlockSizeSmall + +memmove_long_emit_remainder_calcBlockSizeSmall: + LEAQ (AX)(SI*1), AX + +emit_literal_done_emit_remainder_calcBlockSizeSmall: + MOVQ AX, ret+24(FP) + RET + // func emitLiteral(dst []byte, lit []byte) int // Requires: SSE2 TEXT ·emitLiteral(SB), NOSPLIT, $0-56 @@ -17162,13 +18234,13 @@ TEXT ·emitLiteral(SB), NOSPLIT, $0-56 MOVL DX, BX LEAL -1(DX), SI CMPL SI, $0x3c - JLT one_byte_standalone + JB one_byte_standalone CMPL SI, $0x00000100 - JLT two_bytes_standalone + JB two_bytes_standalone CMPL SI, $0x00010000 - JLT three_bytes_standalone + JB three_bytes_standalone CMPL SI, $0x01000000 - JLT four_bytes_standalone + JB four_bytes_standalone MOVB $0xfc, (AX) MOVL SI, 1(AX) ADDQ $0x05, BX @@ -17198,7 +18270,7 @@ two_bytes_standalone: ADDQ $0x02, BX ADDQ $0x02, AX CMPL SI, $0x40 - JL memmove_standalone + JB memmove_standalone JMP memmove_long_standalone one_byte_standalone: @@ -17329,22 +18401,21 @@ emit_repeat_again_standalone: MOVL DX, SI LEAL -4(DX), DX CMPL SI, $0x08 - JLE repeat_two_standalone + JBE repeat_two_standalone CMPL SI, $0x0c - JGE cant_repeat_two_offset_standalone + JAE cant_repeat_two_offset_standalone CMPL CX, $0x00000800 - JLT repeat_two_offset_standalone + JB repeat_two_offset_standalone cant_repeat_two_offset_standalone: CMPL DX, $0x00000104 - JLT repeat_three_standalone + JB repeat_three_standalone CMPL DX, $0x00010100 - JLT repeat_four_standalone + JB repeat_four_standalone CMPL DX, $0x0100ffff - JLT repeat_five_standalone + JB repeat_five_standalone LEAL -16842747(DX), DX - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) + MOVL $0xfffb001d, (AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX ADDQ $0x05, BX @@ -17409,40 +18480,37 @@ TEXT ·emitCopy(SB), NOSPLIT, $0-48 // emitCopy CMPL CX, $0x00010000 - JL two_byte_offset_standalone - -four_bytes_loop_back_standalone: + JB two_byte_offset_standalone CMPL DX, $0x40 - JLE four_bytes_remain_standalone + JBE four_bytes_remain_standalone MOVB $0xff, (AX) MOVL CX, 1(AX) LEAL -64(DX), DX ADDQ $0x05, BX ADDQ $0x05, AX CMPL DX, $0x04 - JL four_bytes_remain_standalone + JB four_bytes_remain_standalone // emitRepeat emit_repeat_again_standalone_emit_copy: MOVL DX, SI LEAL -4(DX), DX CMPL SI, $0x08 - JLE repeat_two_standalone_emit_copy + JBE repeat_two_standalone_emit_copy CMPL SI, $0x0c - JGE cant_repeat_two_offset_standalone_emit_copy + JAE cant_repeat_two_offset_standalone_emit_copy CMPL CX, $0x00000800 - JLT repeat_two_offset_standalone_emit_copy + JB repeat_two_offset_standalone_emit_copy cant_repeat_two_offset_standalone_emit_copy: CMPL DX, $0x00000104 - JLT repeat_three_standalone_emit_copy + JB repeat_three_standalone_emit_copy CMPL DX, $0x00010100 - JLT repeat_four_standalone_emit_copy + JB repeat_four_standalone_emit_copy CMPL DX, $0x0100ffff - JLT repeat_five_standalone_emit_copy + JB repeat_five_standalone_emit_copy LEAL -16842747(DX), DX - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) + MOVL $0xfffb001d, (AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX ADDQ $0x05, BX @@ -17494,13 +18562,12 @@ repeat_two_offset_standalone_emit_copy: ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end - JMP four_bytes_loop_back_standalone four_bytes_remain_standalone: TESTL DX, DX JZ gen_emit_copy_end - MOVB $0x03, SI - LEAL -4(SI)(DX*4), DX + XORL SI, SI + LEAL -1(SI)(DX*4), DX MOVB DL, (AX) MOVL CX, 1(AX) ADDQ $0x05, BX @@ -17509,7 +18576,7 @@ four_bytes_remain_standalone: two_byte_offset_standalone: CMPL DX, $0x40 - JLE two_byte_offset_short_standalone + JBE two_byte_offset_short_standalone CMPL CX, $0x00000800 JAE long_offset_short_standalone MOVL $0x00000001, SI @@ -17532,22 +18599,21 @@ emit_repeat_again_standalone_emit_copy_short_2b: MOVL DX, SI LEAL -4(DX), DX CMPL SI, $0x08 - JLE repeat_two_standalone_emit_copy_short_2b + JBE repeat_two_standalone_emit_copy_short_2b CMPL SI, $0x0c - JGE cant_repeat_two_offset_standalone_emit_copy_short_2b + JAE cant_repeat_two_offset_standalone_emit_copy_short_2b CMPL CX, $0x00000800 - JLT repeat_two_offset_standalone_emit_copy_short_2b + JB repeat_two_offset_standalone_emit_copy_short_2b cant_repeat_two_offset_standalone_emit_copy_short_2b: CMPL DX, $0x00000104 - JLT repeat_three_standalone_emit_copy_short_2b + JB repeat_three_standalone_emit_copy_short_2b CMPL DX, $0x00010100 - JLT repeat_four_standalone_emit_copy_short_2b + JB repeat_four_standalone_emit_copy_short_2b CMPL DX, $0x0100ffff - JLT repeat_five_standalone_emit_copy_short_2b + JB repeat_five_standalone_emit_copy_short_2b LEAL -16842747(DX), DX - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) + MOVL $0xfffb001d, (AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX ADDQ $0x05, BX @@ -17612,22 +18678,21 @@ emit_repeat_again_standalone_emit_copy_short: MOVL DX, SI LEAL -4(DX), DX CMPL SI, $0x08 - JLE repeat_two_standalone_emit_copy_short + JBE repeat_two_standalone_emit_copy_short CMPL SI, $0x0c - JGE cant_repeat_two_offset_standalone_emit_copy_short + JAE cant_repeat_two_offset_standalone_emit_copy_short CMPL CX, $0x00000800 - JLT repeat_two_offset_standalone_emit_copy_short + JB repeat_two_offset_standalone_emit_copy_short cant_repeat_two_offset_standalone_emit_copy_short: CMPL DX, $0x00000104 - JLT repeat_three_standalone_emit_copy_short + JB repeat_three_standalone_emit_copy_short CMPL DX, $0x00010100 - JLT repeat_four_standalone_emit_copy_short + JB repeat_four_standalone_emit_copy_short CMPL DX, $0x0100ffff - JLT repeat_five_standalone_emit_copy_short + JB repeat_five_standalone_emit_copy_short LEAL -16842747(DX), DX - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) + MOVL $0xfffb001d, (AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX ADDQ $0x05, BX @@ -17679,28 +18744,27 @@ repeat_two_offset_standalone_emit_copy_short: ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end - JMP two_byte_offset_standalone two_byte_offset_short_standalone: + MOVL DX, SI + SHLL $0x02, SI CMPL DX, $0x0c - JGE emit_copy_three_standalone + JAE emit_copy_three_standalone CMPL CX, $0x00000800 - JGE emit_copy_three_standalone - MOVB $0x01, SI - LEAL -16(SI)(DX*4), DX + JAE emit_copy_three_standalone + LEAL -15(SI), SI MOVB CL, 1(AX) SHRL $0x08, CX SHLL $0x05, CX - ORL CX, DX - MOVB DL, (AX) + ORL CX, SI + MOVB SI, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end emit_copy_three_standalone: - MOVB $0x02, SI - LEAL -4(SI)(DX*4), DX - MOVB DL, (AX) + LEAL -2(SI), SI + MOVB SI, (AX) MOVW CX, 1(AX) ADDQ $0x03, BX ADDQ $0x03, AX @@ -17718,25 +18782,25 @@ TEXT ·emitCopyNoRepeat(SB), NOSPLIT, $0-48 // emitCopy CMPL CX, $0x00010000 - JL two_byte_offset_standalone_snappy + JB two_byte_offset_standalone_snappy four_bytes_loop_back_standalone_snappy: CMPL DX, $0x40 - JLE four_bytes_remain_standalone_snappy + JBE four_bytes_remain_standalone_snappy MOVB $0xff, (AX) MOVL CX, 1(AX) LEAL -64(DX), DX ADDQ $0x05, BX ADDQ $0x05, AX CMPL DX, $0x04 - JL four_bytes_remain_standalone_snappy + JB four_bytes_remain_standalone_snappy JMP four_bytes_loop_back_standalone_snappy four_bytes_remain_standalone_snappy: TESTL DX, DX JZ gen_emit_copy_end_snappy - MOVB $0x03, SI - LEAL -4(SI)(DX*4), DX + XORL SI, SI + LEAL -1(SI)(DX*4), DX MOVB DL, (AX) MOVL CX, 1(AX) ADDQ $0x05, BX @@ -17745,7 +18809,7 @@ four_bytes_remain_standalone_snappy: two_byte_offset_standalone_snappy: CMPL DX, $0x40 - JLE two_byte_offset_short_standalone_snappy + JBE two_byte_offset_short_standalone_snappy MOVB $0xee, (AX) MOVW CX, 1(AX) LEAL -60(DX), DX @@ -17754,25 +18818,25 @@ two_byte_offset_standalone_snappy: JMP two_byte_offset_standalone_snappy two_byte_offset_short_standalone_snappy: + MOVL DX, SI + SHLL $0x02, SI CMPL DX, $0x0c - JGE emit_copy_three_standalone_snappy + JAE emit_copy_three_standalone_snappy CMPL CX, $0x00000800 - JGE emit_copy_three_standalone_snappy - MOVB $0x01, SI - LEAL -16(SI)(DX*4), DX + JAE emit_copy_three_standalone_snappy + LEAL -15(SI), SI MOVB CL, 1(AX) SHRL $0x08, CX SHLL $0x05, CX - ORL CX, DX - MOVB DL, (AX) + ORL CX, SI + MOVB SI, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end_snappy emit_copy_three_standalone_snappy: - MOVB $0x02, SI - LEAL -4(SI)(DX*4), DX - MOVB DL, (AX) + LEAL -2(SI), SI + MOVB SI, (AX) MOVW CX, 1(AX) ADDQ $0x03, BX ADDQ $0x03, AX @@ -17791,7 +18855,7 @@ TEXT ·matchLen(SB), NOSPLIT, $0-56 // matchLen XORL SI, SI CMPL DX, $0x08 - JL matchlen_match4_standalone + JB matchlen_match4_standalone matchlen_loopback_standalone: MOVQ (AX)(SI*1), BX @@ -17814,12 +18878,12 @@ matchlen_loop_standalone: LEAL -8(DX), DX LEAL 8(SI), SI CMPL DX, $0x08 - JGE matchlen_loopback_standalone + JAE matchlen_loopback_standalone JZ gen_match_len_end matchlen_match4_standalone: CMPL DX, $0x04 - JL matchlen_match2_standalone + JB matchlen_match2_standalone MOVL (AX)(SI*1), BX CMPL (CX)(SI*1), BX JNE matchlen_match2_standalone @@ -17828,7 +18892,7 @@ matchlen_match4_standalone: matchlen_match2_standalone: CMPL DX, $0x02 - JL matchlen_match1_standalone + JB matchlen_match1_standalone MOVW (AX)(SI*1), BX CMPW (CX)(SI*1), BX JNE matchlen_match1_standalone @@ -17837,7 +18901,7 @@ matchlen_match2_standalone: matchlen_match1_standalone: CMPL DX, $0x01 - JL gen_match_len_end + JB gen_match_len_end MOVB (AX)(SI*1), BL CMPB (CX)(SI*1), BL JNE gen_match_len_end @@ -17846,3 +18910,1505 @@ matchlen_match1_standalone: gen_match_len_end: MOVQ SI, ret+48(FP) RET + +// func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) +// Requires: SSE2 +TEXT ·cvtLZ4BlockAsm(SB), NOSPLIT, $0-64 + XORQ SI, SI + MOVQ dst_base+0(FP), AX + MOVQ dst_len+8(FP), CX + MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), BX + LEAQ (DX)(BX*1), BX + LEAQ -10(AX)(CX*1), CX + XORQ DI, DI + +lz4_s2_loop: + CMPQ DX, BX + JAE lz4_s2_corrupt + CMPQ AX, CX + JAE lz4_s2_dstfull + MOVBQZX (DX), R8 + MOVQ R8, R9 + MOVQ R8, R10 + SHRQ $0x04, R9 + ANDQ $0x0f, R10 + CMPQ R8, $0xf0 + JB lz4_s2_ll_end + +lz4_s2_ll_loop: + INCQ DX + CMPQ DX, BX + JAE lz4_s2_corrupt + MOVBQZX (DX), R8 + ADDQ R8, R9 + CMPQ R8, $0xff + JEQ lz4_s2_ll_loop + +lz4_s2_ll_end: + LEAQ (DX)(R9*1), R8 + ADDQ $0x04, R10 + CMPQ R8, BX + JAE lz4_s2_corrupt + INCQ DX + INCQ R8 + TESTQ R9, R9 + JZ lz4_s2_lits_done + LEAQ (AX)(R9*1), R11 + CMPQ R11, CX + JAE lz4_s2_dstfull + ADDQ R9, SI + LEAL -1(R9), R11 + CMPL R11, $0x3c + JB one_byte_lz4_s2 + CMPL R11, $0x00000100 + JB two_bytes_lz4_s2 + CMPL R11, $0x00010000 + JB three_bytes_lz4_s2 + CMPL R11, $0x01000000 + JB four_bytes_lz4_s2 + MOVB $0xfc, (AX) + MOVL R11, 1(AX) + ADDQ $0x05, AX + JMP memmove_long_lz4_s2 + +four_bytes_lz4_s2: + MOVL R11, R12 + SHRL $0x10, R12 + MOVB $0xf8, (AX) + MOVW R11, 1(AX) + MOVB R12, 3(AX) + ADDQ $0x04, AX + JMP memmove_long_lz4_s2 + +three_bytes_lz4_s2: + MOVB $0xf4, (AX) + MOVW R11, 1(AX) + ADDQ $0x03, AX + JMP memmove_long_lz4_s2 + +two_bytes_lz4_s2: + MOVB $0xf0, (AX) + MOVB R11, 1(AX) + ADDQ $0x02, AX + CMPL R11, $0x40 + JB memmove_lz4_s2 + JMP memmove_long_lz4_s2 + +one_byte_lz4_s2: + SHLB $0x02, R11 + MOVB R11, (AX) + ADDQ $0x01, AX + +memmove_lz4_s2: + LEAQ (AX)(R9*1), R11 + + // genMemMoveShort + CMPQ R9, $0x08 + JBE emit_lit_memmove_lz4_s2_memmove_move_8 + CMPQ R9, $0x10 + JBE emit_lit_memmove_lz4_s2_memmove_move_8through16 + CMPQ R9, $0x20 + JBE emit_lit_memmove_lz4_s2_memmove_move_17through32 + JMP emit_lit_memmove_lz4_s2_memmove_move_33through64 + +emit_lit_memmove_lz4_s2_memmove_move_8: + MOVQ (DX), R12 + MOVQ R12, (AX) + JMP memmove_end_copy_lz4_s2 + +emit_lit_memmove_lz4_s2_memmove_move_8through16: + MOVQ (DX), R12 + MOVQ -8(DX)(R9*1), DX + MOVQ R12, (AX) + MOVQ DX, -8(AX)(R9*1) + JMP memmove_end_copy_lz4_s2 + +emit_lit_memmove_lz4_s2_memmove_move_17through32: + MOVOU (DX), X0 + MOVOU -16(DX)(R9*1), X1 + MOVOU X0, (AX) + MOVOU X1, -16(AX)(R9*1) + JMP memmove_end_copy_lz4_s2 + +emit_lit_memmove_lz4_s2_memmove_move_33through64: + MOVOU (DX), X0 + MOVOU 16(DX), X1 + MOVOU -32(DX)(R9*1), X2 + MOVOU -16(DX)(R9*1), X3 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(R9*1) + MOVOU X3, -16(AX)(R9*1) + +memmove_end_copy_lz4_s2: + MOVQ R11, AX + JMP lz4_s2_lits_emit_done + +memmove_long_lz4_s2: + LEAQ (AX)(R9*1), R11 + + // genMemMoveLong + MOVOU (DX), X0 + MOVOU 16(DX), X1 + MOVOU -32(DX)(R9*1), X2 + MOVOU -16(DX)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ AX, R12 + ANDL $0x0000001f, R12 + MOVQ $0x00000040, R14 + SUBQ R12, R14 + DECQ R13 + JA emit_lit_memmove_long_lz4_s2large_forward_sse_loop_32 + LEAQ -32(DX)(R14*1), R12 + LEAQ -32(AX)(R14*1), R15 + +emit_lit_memmove_long_lz4_s2large_big_loop_back: + MOVOU (R12), X4 + MOVOU 16(R12), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R12 + ADDQ $0x20, R14 + DECQ R13 + JNA emit_lit_memmove_long_lz4_s2large_big_loop_back + +emit_lit_memmove_long_lz4_s2large_forward_sse_loop_32: + MOVOU -32(DX)(R14*1), X4 + MOVOU -16(DX)(R14*1), X5 + MOVOA X4, -32(AX)(R14*1) + MOVOA X5, -16(AX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 + JAE emit_lit_memmove_long_lz4_s2large_forward_sse_loop_32 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(R9*1) + MOVOU X3, -16(AX)(R9*1) + MOVQ R11, AX + +lz4_s2_lits_emit_done: + MOVQ R8, DX + +lz4_s2_lits_done: + CMPQ DX, BX + JNE lz4_s2_match + CMPQ R10, $0x04 + JEQ lz4_s2_done + JMP lz4_s2_corrupt + +lz4_s2_match: + LEAQ 2(DX), R8 + CMPQ R8, BX + JAE lz4_s2_corrupt + MOVWQZX (DX), R9 + MOVQ R8, DX + TESTQ R9, R9 + JZ lz4_s2_corrupt + CMPQ R9, SI + JA lz4_s2_corrupt + CMPQ R10, $0x13 + JNE lz4_s2_ml_done + +lz4_s2_ml_loop: + MOVBQZX (DX), R8 + INCQ DX + ADDQ R8, R10 + CMPQ DX, BX + JAE lz4_s2_corrupt + CMPQ R8, $0xff + JEQ lz4_s2_ml_loop + +lz4_s2_ml_done: + ADDQ R10, SI + CMPQ R9, DI + JNE lz4_s2_docopy + + // emitRepeat +emit_repeat_again_lz4_s2: + MOVL R10, R8 + LEAL -4(R10), R10 + CMPL R8, $0x08 + JBE repeat_two_lz4_s2 + CMPL R8, $0x0c + JAE cant_repeat_two_offset_lz4_s2 + CMPL R9, $0x00000800 + JB repeat_two_offset_lz4_s2 + +cant_repeat_two_offset_lz4_s2: + CMPL R10, $0x00000104 + JB repeat_three_lz4_s2 + CMPL R10, $0x00010100 + JB repeat_four_lz4_s2 + CMPL R10, $0x0100ffff + JB repeat_five_lz4_s2 + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (AX) + MOVB $0xff, 4(AX) + ADDQ $0x05, AX + JMP emit_repeat_again_lz4_s2 + +repeat_five_lz4_s2: + LEAL -65536(R10), R10 + MOVL R10, R9 + MOVW $0x001d, (AX) + MOVW R10, 2(AX) + SARL $0x10, R9 + MOVB R9, 4(AX) + ADDQ $0x05, AX + JMP lz4_s2_loop + +repeat_four_lz4_s2: + LEAL -256(R10), R10 + MOVW $0x0019, (AX) + MOVW R10, 2(AX) + ADDQ $0x04, AX + JMP lz4_s2_loop + +repeat_three_lz4_s2: + LEAL -4(R10), R10 + MOVW $0x0015, (AX) + MOVB R10, 2(AX) + ADDQ $0x03, AX + JMP lz4_s2_loop + +repeat_two_lz4_s2: + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (AX) + ADDQ $0x02, AX + JMP lz4_s2_loop + +repeat_two_offset_lz4_s2: + XORQ R8, R8 + LEAL 1(R8)(R10*4), R10 + MOVB R9, 1(AX) + SARL $0x08, R9 + SHLL $0x05, R9 + ORL R9, R10 + MOVB R10, (AX) + ADDQ $0x02, AX + JMP lz4_s2_loop + +lz4_s2_docopy: + MOVQ R9, DI + + // emitCopy + CMPL R10, $0x40 + JBE two_byte_offset_short_lz4_s2 + CMPL R9, $0x00000800 + JAE long_offset_short_lz4_s2 + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB R9, 1(AX) + MOVL R9, R11 + SHRL $0x08, R11 + SHLL $0x05, R11 + ORL R11, R8 + MOVB R8, (AX) + ADDQ $0x02, AX + SUBL $0x08, R10 + + // emitRepeat + LEAL -4(R10), R10 + JMP cant_repeat_two_offset_lz4_s2_emit_copy_short_2b + +emit_repeat_again_lz4_s2_emit_copy_short_2b: + MOVL R10, R8 + LEAL -4(R10), R10 + CMPL R8, $0x08 + JBE repeat_two_lz4_s2_emit_copy_short_2b + CMPL R8, $0x0c + JAE cant_repeat_two_offset_lz4_s2_emit_copy_short_2b + CMPL R9, $0x00000800 + JB repeat_two_offset_lz4_s2_emit_copy_short_2b + +cant_repeat_two_offset_lz4_s2_emit_copy_short_2b: + CMPL R10, $0x00000104 + JB repeat_three_lz4_s2_emit_copy_short_2b + CMPL R10, $0x00010100 + JB repeat_four_lz4_s2_emit_copy_short_2b + CMPL R10, $0x0100ffff + JB repeat_five_lz4_s2_emit_copy_short_2b + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (AX) + MOVB $0xff, 4(AX) + ADDQ $0x05, AX + JMP emit_repeat_again_lz4_s2_emit_copy_short_2b + +repeat_five_lz4_s2_emit_copy_short_2b: + LEAL -65536(R10), R10 + MOVL R10, R9 + MOVW $0x001d, (AX) + MOVW R10, 2(AX) + SARL $0x10, R9 + MOVB R9, 4(AX) + ADDQ $0x05, AX + JMP lz4_s2_loop + +repeat_four_lz4_s2_emit_copy_short_2b: + LEAL -256(R10), R10 + MOVW $0x0019, (AX) + MOVW R10, 2(AX) + ADDQ $0x04, AX + JMP lz4_s2_loop + +repeat_three_lz4_s2_emit_copy_short_2b: + LEAL -4(R10), R10 + MOVW $0x0015, (AX) + MOVB R10, 2(AX) + ADDQ $0x03, AX + JMP lz4_s2_loop + +repeat_two_lz4_s2_emit_copy_short_2b: + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (AX) + ADDQ $0x02, AX + JMP lz4_s2_loop + +repeat_two_offset_lz4_s2_emit_copy_short_2b: + XORQ R8, R8 + LEAL 1(R8)(R10*4), R10 + MOVB R9, 1(AX) + SARL $0x08, R9 + SHLL $0x05, R9 + ORL R9, R10 + MOVB R10, (AX) + ADDQ $0x02, AX + JMP lz4_s2_loop + +long_offset_short_lz4_s2: + MOVB $0xee, (AX) + MOVW R9, 1(AX) + LEAL -60(R10), R10 + ADDQ $0x03, AX + + // emitRepeat +emit_repeat_again_lz4_s2_emit_copy_short: + MOVL R10, R8 + LEAL -4(R10), R10 + CMPL R8, $0x08 + JBE repeat_two_lz4_s2_emit_copy_short + CMPL R8, $0x0c + JAE cant_repeat_two_offset_lz4_s2_emit_copy_short + CMPL R9, $0x00000800 + JB repeat_two_offset_lz4_s2_emit_copy_short + +cant_repeat_two_offset_lz4_s2_emit_copy_short: + CMPL R10, $0x00000104 + JB repeat_three_lz4_s2_emit_copy_short + CMPL R10, $0x00010100 + JB repeat_four_lz4_s2_emit_copy_short + CMPL R10, $0x0100ffff + JB repeat_five_lz4_s2_emit_copy_short + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (AX) + MOVB $0xff, 4(AX) + ADDQ $0x05, AX + JMP emit_repeat_again_lz4_s2_emit_copy_short + +repeat_five_lz4_s2_emit_copy_short: + LEAL -65536(R10), R10 + MOVL R10, R9 + MOVW $0x001d, (AX) + MOVW R10, 2(AX) + SARL $0x10, R9 + MOVB R9, 4(AX) + ADDQ $0x05, AX + JMP lz4_s2_loop + +repeat_four_lz4_s2_emit_copy_short: + LEAL -256(R10), R10 + MOVW $0x0019, (AX) + MOVW R10, 2(AX) + ADDQ $0x04, AX + JMP lz4_s2_loop + +repeat_three_lz4_s2_emit_copy_short: + LEAL -4(R10), R10 + MOVW $0x0015, (AX) + MOVB R10, 2(AX) + ADDQ $0x03, AX + JMP lz4_s2_loop + +repeat_two_lz4_s2_emit_copy_short: + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (AX) + ADDQ $0x02, AX + JMP lz4_s2_loop + +repeat_two_offset_lz4_s2_emit_copy_short: + XORQ R8, R8 + LEAL 1(R8)(R10*4), R10 + MOVB R9, 1(AX) + SARL $0x08, R9 + SHLL $0x05, R9 + ORL R9, R10 + MOVB R10, (AX) + ADDQ $0x02, AX + JMP lz4_s2_loop + +two_byte_offset_short_lz4_s2: + MOVL R10, R8 + SHLL $0x02, R8 + CMPL R10, $0x0c + JAE emit_copy_three_lz4_s2 + CMPL R9, $0x00000800 + JAE emit_copy_three_lz4_s2 + LEAL -15(R8), R8 + MOVB R9, 1(AX) + SHRL $0x08, R9 + SHLL $0x05, R9 + ORL R9, R8 + MOVB R8, (AX) + ADDQ $0x02, AX + JMP lz4_s2_loop + +emit_copy_three_lz4_s2: + LEAL -2(R8), R8 + MOVB R8, (AX) + MOVW R9, 1(AX) + ADDQ $0x03, AX + JMP lz4_s2_loop + +lz4_s2_done: + MOVQ dst_base+0(FP), CX + SUBQ CX, AX + MOVQ SI, uncompressed+48(FP) + MOVQ AX, dstUsed+56(FP) + RET + +lz4_s2_corrupt: + XORQ AX, AX + LEAQ -1(AX), SI + MOVQ SI, uncompressed+48(FP) + RET + +lz4_s2_dstfull: + XORQ AX, AX + LEAQ -2(AX), SI + MOVQ SI, uncompressed+48(FP) + RET + +// func cvtLZ4sBlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) +// Requires: SSE2 +TEXT ·cvtLZ4sBlockAsm(SB), NOSPLIT, $0-64 + XORQ SI, SI + MOVQ dst_base+0(FP), AX + MOVQ dst_len+8(FP), CX + MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), BX + LEAQ (DX)(BX*1), BX + LEAQ -10(AX)(CX*1), CX + XORQ DI, DI + +lz4s_s2_loop: + CMPQ DX, BX + JAE lz4s_s2_corrupt + CMPQ AX, CX + JAE lz4s_s2_dstfull + MOVBQZX (DX), R8 + MOVQ R8, R9 + MOVQ R8, R10 + SHRQ $0x04, R9 + ANDQ $0x0f, R10 + CMPQ R8, $0xf0 + JB lz4s_s2_ll_end + +lz4s_s2_ll_loop: + INCQ DX + CMPQ DX, BX + JAE lz4s_s2_corrupt + MOVBQZX (DX), R8 + ADDQ R8, R9 + CMPQ R8, $0xff + JEQ lz4s_s2_ll_loop + +lz4s_s2_ll_end: + LEAQ (DX)(R9*1), R8 + ADDQ $0x03, R10 + CMPQ R8, BX + JAE lz4s_s2_corrupt + INCQ DX + INCQ R8 + TESTQ R9, R9 + JZ lz4s_s2_lits_done + LEAQ (AX)(R9*1), R11 + CMPQ R11, CX + JAE lz4s_s2_dstfull + ADDQ R9, SI + LEAL -1(R9), R11 + CMPL R11, $0x3c + JB one_byte_lz4s_s2 + CMPL R11, $0x00000100 + JB two_bytes_lz4s_s2 + CMPL R11, $0x00010000 + JB three_bytes_lz4s_s2 + CMPL R11, $0x01000000 + JB four_bytes_lz4s_s2 + MOVB $0xfc, (AX) + MOVL R11, 1(AX) + ADDQ $0x05, AX + JMP memmove_long_lz4s_s2 + +four_bytes_lz4s_s2: + MOVL R11, R12 + SHRL $0x10, R12 + MOVB $0xf8, (AX) + MOVW R11, 1(AX) + MOVB R12, 3(AX) + ADDQ $0x04, AX + JMP memmove_long_lz4s_s2 + +three_bytes_lz4s_s2: + MOVB $0xf4, (AX) + MOVW R11, 1(AX) + ADDQ $0x03, AX + JMP memmove_long_lz4s_s2 + +two_bytes_lz4s_s2: + MOVB $0xf0, (AX) + MOVB R11, 1(AX) + ADDQ $0x02, AX + CMPL R11, $0x40 + JB memmove_lz4s_s2 + JMP memmove_long_lz4s_s2 + +one_byte_lz4s_s2: + SHLB $0x02, R11 + MOVB R11, (AX) + ADDQ $0x01, AX + +memmove_lz4s_s2: + LEAQ (AX)(R9*1), R11 + + // genMemMoveShort + CMPQ R9, $0x08 + JBE emit_lit_memmove_lz4s_s2_memmove_move_8 + CMPQ R9, $0x10 + JBE emit_lit_memmove_lz4s_s2_memmove_move_8through16 + CMPQ R9, $0x20 + JBE emit_lit_memmove_lz4s_s2_memmove_move_17through32 + JMP emit_lit_memmove_lz4s_s2_memmove_move_33through64 + +emit_lit_memmove_lz4s_s2_memmove_move_8: + MOVQ (DX), R12 + MOVQ R12, (AX) + JMP memmove_end_copy_lz4s_s2 + +emit_lit_memmove_lz4s_s2_memmove_move_8through16: + MOVQ (DX), R12 + MOVQ -8(DX)(R9*1), DX + MOVQ R12, (AX) + MOVQ DX, -8(AX)(R9*1) + JMP memmove_end_copy_lz4s_s2 + +emit_lit_memmove_lz4s_s2_memmove_move_17through32: + MOVOU (DX), X0 + MOVOU -16(DX)(R9*1), X1 + MOVOU X0, (AX) + MOVOU X1, -16(AX)(R9*1) + JMP memmove_end_copy_lz4s_s2 + +emit_lit_memmove_lz4s_s2_memmove_move_33through64: + MOVOU (DX), X0 + MOVOU 16(DX), X1 + MOVOU -32(DX)(R9*1), X2 + MOVOU -16(DX)(R9*1), X3 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(R9*1) + MOVOU X3, -16(AX)(R9*1) + +memmove_end_copy_lz4s_s2: + MOVQ R11, AX + JMP lz4s_s2_lits_emit_done + +memmove_long_lz4s_s2: + LEAQ (AX)(R9*1), R11 + + // genMemMoveLong + MOVOU (DX), X0 + MOVOU 16(DX), X1 + MOVOU -32(DX)(R9*1), X2 + MOVOU -16(DX)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ AX, R12 + ANDL $0x0000001f, R12 + MOVQ $0x00000040, R14 + SUBQ R12, R14 + DECQ R13 + JA emit_lit_memmove_long_lz4s_s2large_forward_sse_loop_32 + LEAQ -32(DX)(R14*1), R12 + LEAQ -32(AX)(R14*1), R15 + +emit_lit_memmove_long_lz4s_s2large_big_loop_back: + MOVOU (R12), X4 + MOVOU 16(R12), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R12 + ADDQ $0x20, R14 + DECQ R13 + JNA emit_lit_memmove_long_lz4s_s2large_big_loop_back + +emit_lit_memmove_long_lz4s_s2large_forward_sse_loop_32: + MOVOU -32(DX)(R14*1), X4 + MOVOU -16(DX)(R14*1), X5 + MOVOA X4, -32(AX)(R14*1) + MOVOA X5, -16(AX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 + JAE emit_lit_memmove_long_lz4s_s2large_forward_sse_loop_32 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(R9*1) + MOVOU X3, -16(AX)(R9*1) + MOVQ R11, AX + +lz4s_s2_lits_emit_done: + MOVQ R8, DX + +lz4s_s2_lits_done: + CMPQ DX, BX + JNE lz4s_s2_match + CMPQ R10, $0x03 + JEQ lz4s_s2_done + JMP lz4s_s2_corrupt + +lz4s_s2_match: + CMPQ R10, $0x03 + JEQ lz4s_s2_loop + LEAQ 2(DX), R8 + CMPQ R8, BX + JAE lz4s_s2_corrupt + MOVWQZX (DX), R9 + MOVQ R8, DX + TESTQ R9, R9 + JZ lz4s_s2_corrupt + CMPQ R9, SI + JA lz4s_s2_corrupt + CMPQ R10, $0x12 + JNE lz4s_s2_ml_done + +lz4s_s2_ml_loop: + MOVBQZX (DX), R8 + INCQ DX + ADDQ R8, R10 + CMPQ DX, BX + JAE lz4s_s2_corrupt + CMPQ R8, $0xff + JEQ lz4s_s2_ml_loop + +lz4s_s2_ml_done: + ADDQ R10, SI + CMPQ R9, DI + JNE lz4s_s2_docopy + + // emitRepeat +emit_repeat_again_lz4_s2: + MOVL R10, R8 + LEAL -4(R10), R10 + CMPL R8, $0x08 + JBE repeat_two_lz4_s2 + CMPL R8, $0x0c + JAE cant_repeat_two_offset_lz4_s2 + CMPL R9, $0x00000800 + JB repeat_two_offset_lz4_s2 + +cant_repeat_two_offset_lz4_s2: + CMPL R10, $0x00000104 + JB repeat_three_lz4_s2 + CMPL R10, $0x00010100 + JB repeat_four_lz4_s2 + CMPL R10, $0x0100ffff + JB repeat_five_lz4_s2 + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (AX) + MOVB $0xff, 4(AX) + ADDQ $0x05, AX + JMP emit_repeat_again_lz4_s2 + +repeat_five_lz4_s2: + LEAL -65536(R10), R10 + MOVL R10, R9 + MOVW $0x001d, (AX) + MOVW R10, 2(AX) + SARL $0x10, R9 + MOVB R9, 4(AX) + ADDQ $0x05, AX + JMP lz4s_s2_loop + +repeat_four_lz4_s2: + LEAL -256(R10), R10 + MOVW $0x0019, (AX) + MOVW R10, 2(AX) + ADDQ $0x04, AX + JMP lz4s_s2_loop + +repeat_three_lz4_s2: + LEAL -4(R10), R10 + MOVW $0x0015, (AX) + MOVB R10, 2(AX) + ADDQ $0x03, AX + JMP lz4s_s2_loop + +repeat_two_lz4_s2: + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (AX) + ADDQ $0x02, AX + JMP lz4s_s2_loop + +repeat_two_offset_lz4_s2: + XORQ R8, R8 + LEAL 1(R8)(R10*4), R10 + MOVB R9, 1(AX) + SARL $0x08, R9 + SHLL $0x05, R9 + ORL R9, R10 + MOVB R10, (AX) + ADDQ $0x02, AX + JMP lz4s_s2_loop + +lz4s_s2_docopy: + MOVQ R9, DI + + // emitCopy + CMPL R10, $0x40 + JBE two_byte_offset_short_lz4_s2 + CMPL R9, $0x00000800 + JAE long_offset_short_lz4_s2 + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB R9, 1(AX) + MOVL R9, R11 + SHRL $0x08, R11 + SHLL $0x05, R11 + ORL R11, R8 + MOVB R8, (AX) + ADDQ $0x02, AX + SUBL $0x08, R10 + + // emitRepeat + LEAL -4(R10), R10 + JMP cant_repeat_two_offset_lz4_s2_emit_copy_short_2b + +emit_repeat_again_lz4_s2_emit_copy_short_2b: + MOVL R10, R8 + LEAL -4(R10), R10 + CMPL R8, $0x08 + JBE repeat_two_lz4_s2_emit_copy_short_2b + CMPL R8, $0x0c + JAE cant_repeat_two_offset_lz4_s2_emit_copy_short_2b + CMPL R9, $0x00000800 + JB repeat_two_offset_lz4_s2_emit_copy_short_2b + +cant_repeat_two_offset_lz4_s2_emit_copy_short_2b: + CMPL R10, $0x00000104 + JB repeat_three_lz4_s2_emit_copy_short_2b + CMPL R10, $0x00010100 + JB repeat_four_lz4_s2_emit_copy_short_2b + CMPL R10, $0x0100ffff + JB repeat_five_lz4_s2_emit_copy_short_2b + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (AX) + MOVB $0xff, 4(AX) + ADDQ $0x05, AX + JMP emit_repeat_again_lz4_s2_emit_copy_short_2b + +repeat_five_lz4_s2_emit_copy_short_2b: + LEAL -65536(R10), R10 + MOVL R10, R9 + MOVW $0x001d, (AX) + MOVW R10, 2(AX) + SARL $0x10, R9 + MOVB R9, 4(AX) + ADDQ $0x05, AX + JMP lz4s_s2_loop + +repeat_four_lz4_s2_emit_copy_short_2b: + LEAL -256(R10), R10 + MOVW $0x0019, (AX) + MOVW R10, 2(AX) + ADDQ $0x04, AX + JMP lz4s_s2_loop + +repeat_three_lz4_s2_emit_copy_short_2b: + LEAL -4(R10), R10 + MOVW $0x0015, (AX) + MOVB R10, 2(AX) + ADDQ $0x03, AX + JMP lz4s_s2_loop + +repeat_two_lz4_s2_emit_copy_short_2b: + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (AX) + ADDQ $0x02, AX + JMP lz4s_s2_loop + +repeat_two_offset_lz4_s2_emit_copy_short_2b: + XORQ R8, R8 + LEAL 1(R8)(R10*4), R10 + MOVB R9, 1(AX) + SARL $0x08, R9 + SHLL $0x05, R9 + ORL R9, R10 + MOVB R10, (AX) + ADDQ $0x02, AX + JMP lz4s_s2_loop + +long_offset_short_lz4_s2: + MOVB $0xee, (AX) + MOVW R9, 1(AX) + LEAL -60(R10), R10 + ADDQ $0x03, AX + + // emitRepeat +emit_repeat_again_lz4_s2_emit_copy_short: + MOVL R10, R8 + LEAL -4(R10), R10 + CMPL R8, $0x08 + JBE repeat_two_lz4_s2_emit_copy_short + CMPL R8, $0x0c + JAE cant_repeat_two_offset_lz4_s2_emit_copy_short + CMPL R9, $0x00000800 + JB repeat_two_offset_lz4_s2_emit_copy_short + +cant_repeat_two_offset_lz4_s2_emit_copy_short: + CMPL R10, $0x00000104 + JB repeat_three_lz4_s2_emit_copy_short + CMPL R10, $0x00010100 + JB repeat_four_lz4_s2_emit_copy_short + CMPL R10, $0x0100ffff + JB repeat_five_lz4_s2_emit_copy_short + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (AX) + MOVB $0xff, 4(AX) + ADDQ $0x05, AX + JMP emit_repeat_again_lz4_s2_emit_copy_short + +repeat_five_lz4_s2_emit_copy_short: + LEAL -65536(R10), R10 + MOVL R10, R9 + MOVW $0x001d, (AX) + MOVW R10, 2(AX) + SARL $0x10, R9 + MOVB R9, 4(AX) + ADDQ $0x05, AX + JMP lz4s_s2_loop + +repeat_four_lz4_s2_emit_copy_short: + LEAL -256(R10), R10 + MOVW $0x0019, (AX) + MOVW R10, 2(AX) + ADDQ $0x04, AX + JMP lz4s_s2_loop + +repeat_three_lz4_s2_emit_copy_short: + LEAL -4(R10), R10 + MOVW $0x0015, (AX) + MOVB R10, 2(AX) + ADDQ $0x03, AX + JMP lz4s_s2_loop + +repeat_two_lz4_s2_emit_copy_short: + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (AX) + ADDQ $0x02, AX + JMP lz4s_s2_loop + +repeat_two_offset_lz4_s2_emit_copy_short: + XORQ R8, R8 + LEAL 1(R8)(R10*4), R10 + MOVB R9, 1(AX) + SARL $0x08, R9 + SHLL $0x05, R9 + ORL R9, R10 + MOVB R10, (AX) + ADDQ $0x02, AX + JMP lz4s_s2_loop + +two_byte_offset_short_lz4_s2: + MOVL R10, R8 + SHLL $0x02, R8 + CMPL R10, $0x0c + JAE emit_copy_three_lz4_s2 + CMPL R9, $0x00000800 + JAE emit_copy_three_lz4_s2 + LEAL -15(R8), R8 + MOVB R9, 1(AX) + SHRL $0x08, R9 + SHLL $0x05, R9 + ORL R9, R8 + MOVB R8, (AX) + ADDQ $0x02, AX + JMP lz4s_s2_loop + +emit_copy_three_lz4_s2: + LEAL -2(R8), R8 + MOVB R8, (AX) + MOVW R9, 1(AX) + ADDQ $0x03, AX + JMP lz4s_s2_loop + +lz4s_s2_done: + MOVQ dst_base+0(FP), CX + SUBQ CX, AX + MOVQ SI, uncompressed+48(FP) + MOVQ AX, dstUsed+56(FP) + RET + +lz4s_s2_corrupt: + XORQ AX, AX + LEAQ -1(AX), SI + MOVQ SI, uncompressed+48(FP) + RET + +lz4s_s2_dstfull: + XORQ AX, AX + LEAQ -2(AX), SI + MOVQ SI, uncompressed+48(FP) + RET + +// func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) +// Requires: SSE2 +TEXT ·cvtLZ4BlockSnappyAsm(SB), NOSPLIT, $0-64 + XORQ SI, SI + MOVQ dst_base+0(FP), AX + MOVQ dst_len+8(FP), CX + MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), BX + LEAQ (DX)(BX*1), BX + LEAQ -10(AX)(CX*1), CX + +lz4_snappy_loop: + CMPQ DX, BX + JAE lz4_snappy_corrupt + CMPQ AX, CX + JAE lz4_snappy_dstfull + MOVBQZX (DX), DI + MOVQ DI, R8 + MOVQ DI, R9 + SHRQ $0x04, R8 + ANDQ $0x0f, R9 + CMPQ DI, $0xf0 + JB lz4_snappy_ll_end + +lz4_snappy_ll_loop: + INCQ DX + CMPQ DX, BX + JAE lz4_snappy_corrupt + MOVBQZX (DX), DI + ADDQ DI, R8 + CMPQ DI, $0xff + JEQ lz4_snappy_ll_loop + +lz4_snappy_ll_end: + LEAQ (DX)(R8*1), DI + ADDQ $0x04, R9 + CMPQ DI, BX + JAE lz4_snappy_corrupt + INCQ DX + INCQ DI + TESTQ R8, R8 + JZ lz4_snappy_lits_done + LEAQ (AX)(R8*1), R10 + CMPQ R10, CX + JAE lz4_snappy_dstfull + ADDQ R8, SI + LEAL -1(R8), R10 + CMPL R10, $0x3c + JB one_byte_lz4_snappy + CMPL R10, $0x00000100 + JB two_bytes_lz4_snappy + CMPL R10, $0x00010000 + JB three_bytes_lz4_snappy + CMPL R10, $0x01000000 + JB four_bytes_lz4_snappy + MOVB $0xfc, (AX) + MOVL R10, 1(AX) + ADDQ $0x05, AX + JMP memmove_long_lz4_snappy + +four_bytes_lz4_snappy: + MOVL R10, R11 + SHRL $0x10, R11 + MOVB $0xf8, (AX) + MOVW R10, 1(AX) + MOVB R11, 3(AX) + ADDQ $0x04, AX + JMP memmove_long_lz4_snappy + +three_bytes_lz4_snappy: + MOVB $0xf4, (AX) + MOVW R10, 1(AX) + ADDQ $0x03, AX + JMP memmove_long_lz4_snappy + +two_bytes_lz4_snappy: + MOVB $0xf0, (AX) + MOVB R10, 1(AX) + ADDQ $0x02, AX + CMPL R10, $0x40 + JB memmove_lz4_snappy + JMP memmove_long_lz4_snappy + +one_byte_lz4_snappy: + SHLB $0x02, R10 + MOVB R10, (AX) + ADDQ $0x01, AX + +memmove_lz4_snappy: + LEAQ (AX)(R8*1), R10 + + // genMemMoveShort + CMPQ R8, $0x08 + JBE emit_lit_memmove_lz4_snappy_memmove_move_8 + CMPQ R8, $0x10 + JBE emit_lit_memmove_lz4_snappy_memmove_move_8through16 + CMPQ R8, $0x20 + JBE emit_lit_memmove_lz4_snappy_memmove_move_17through32 + JMP emit_lit_memmove_lz4_snappy_memmove_move_33through64 + +emit_lit_memmove_lz4_snappy_memmove_move_8: + MOVQ (DX), R11 + MOVQ R11, (AX) + JMP memmove_end_copy_lz4_snappy + +emit_lit_memmove_lz4_snappy_memmove_move_8through16: + MOVQ (DX), R11 + MOVQ -8(DX)(R8*1), DX + MOVQ R11, (AX) + MOVQ DX, -8(AX)(R8*1) + JMP memmove_end_copy_lz4_snappy + +emit_lit_memmove_lz4_snappy_memmove_move_17through32: + MOVOU (DX), X0 + MOVOU -16(DX)(R8*1), X1 + MOVOU X0, (AX) + MOVOU X1, -16(AX)(R8*1) + JMP memmove_end_copy_lz4_snappy + +emit_lit_memmove_lz4_snappy_memmove_move_33through64: + MOVOU (DX), X0 + MOVOU 16(DX), X1 + MOVOU -32(DX)(R8*1), X2 + MOVOU -16(DX)(R8*1), X3 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + +memmove_end_copy_lz4_snappy: + MOVQ R10, AX + JMP lz4_snappy_lits_emit_done + +memmove_long_lz4_snappy: + LEAQ (AX)(R8*1), R10 + + // genMemMoveLong + MOVOU (DX), X0 + MOVOU 16(DX), X1 + MOVOU -32(DX)(R8*1), X2 + MOVOU -16(DX)(R8*1), X3 + MOVQ R8, R12 + SHRQ $0x05, R12 + MOVQ AX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 + JA emit_lit_memmove_long_lz4_snappylarge_forward_sse_loop_32 + LEAQ -32(DX)(R13*1), R11 + LEAQ -32(AX)(R13*1), R14 + +emit_lit_memmove_long_lz4_snappylarge_big_loop_back: + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 + ADDQ $0x20, R13 + DECQ R12 + JNA emit_lit_memmove_long_lz4_snappylarge_big_loop_back + +emit_lit_memmove_long_lz4_snappylarge_forward_sse_loop_32: + MOVOU -32(DX)(R13*1), X4 + MOVOU -16(DX)(R13*1), X5 + MOVOA X4, -32(AX)(R13*1) + MOVOA X5, -16(AX)(R13*1) + ADDQ $0x20, R13 + CMPQ R8, R13 + JAE emit_lit_memmove_long_lz4_snappylarge_forward_sse_loop_32 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ R10, AX + +lz4_snappy_lits_emit_done: + MOVQ DI, DX + +lz4_snappy_lits_done: + CMPQ DX, BX + JNE lz4_snappy_match + CMPQ R9, $0x04 + JEQ lz4_snappy_done + JMP lz4_snappy_corrupt + +lz4_snappy_match: + LEAQ 2(DX), DI + CMPQ DI, BX + JAE lz4_snappy_corrupt + MOVWQZX (DX), R8 + MOVQ DI, DX + TESTQ R8, R8 + JZ lz4_snappy_corrupt + CMPQ R8, SI + JA lz4_snappy_corrupt + CMPQ R9, $0x13 + JNE lz4_snappy_ml_done + +lz4_snappy_ml_loop: + MOVBQZX (DX), DI + INCQ DX + ADDQ DI, R9 + CMPQ DX, BX + JAE lz4_snappy_corrupt + CMPQ DI, $0xff + JEQ lz4_snappy_ml_loop + +lz4_snappy_ml_done: + ADDQ R9, SI + + // emitCopy +two_byte_offset_lz4_s2: + CMPL R9, $0x40 + JBE two_byte_offset_short_lz4_s2 + MOVB $0xee, (AX) + MOVW R8, 1(AX) + LEAL -60(R9), R9 + ADDQ $0x03, AX + CMPQ AX, CX + JAE lz4_snappy_loop + JMP two_byte_offset_lz4_s2 + +two_byte_offset_short_lz4_s2: + MOVL R9, DI + SHLL $0x02, DI + CMPL R9, $0x0c + JAE emit_copy_three_lz4_s2 + CMPL R8, $0x00000800 + JAE emit_copy_three_lz4_s2 + LEAL -15(DI), DI + MOVB R8, 1(AX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, DI + MOVB DI, (AX) + ADDQ $0x02, AX + JMP lz4_snappy_loop + +emit_copy_three_lz4_s2: + LEAL -2(DI), DI + MOVB DI, (AX) + MOVW R8, 1(AX) + ADDQ $0x03, AX + JMP lz4_snappy_loop + +lz4_snappy_done: + MOVQ dst_base+0(FP), CX + SUBQ CX, AX + MOVQ SI, uncompressed+48(FP) + MOVQ AX, dstUsed+56(FP) + RET + +lz4_snappy_corrupt: + XORQ AX, AX + LEAQ -1(AX), SI + MOVQ SI, uncompressed+48(FP) + RET + +lz4_snappy_dstfull: + XORQ AX, AX + LEAQ -2(AX), SI + MOVQ SI, uncompressed+48(FP) + RET + +// func cvtLZ4sBlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) +// Requires: SSE2 +TEXT ·cvtLZ4sBlockSnappyAsm(SB), NOSPLIT, $0-64 + XORQ SI, SI + MOVQ dst_base+0(FP), AX + MOVQ dst_len+8(FP), CX + MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), BX + LEAQ (DX)(BX*1), BX + LEAQ -10(AX)(CX*1), CX + +lz4s_snappy_loop: + CMPQ DX, BX + JAE lz4s_snappy_corrupt + CMPQ AX, CX + JAE lz4s_snappy_dstfull + MOVBQZX (DX), DI + MOVQ DI, R8 + MOVQ DI, R9 + SHRQ $0x04, R8 + ANDQ $0x0f, R9 + CMPQ DI, $0xf0 + JB lz4s_snappy_ll_end + +lz4s_snappy_ll_loop: + INCQ DX + CMPQ DX, BX + JAE lz4s_snappy_corrupt + MOVBQZX (DX), DI + ADDQ DI, R8 + CMPQ DI, $0xff + JEQ lz4s_snappy_ll_loop + +lz4s_snappy_ll_end: + LEAQ (DX)(R8*1), DI + ADDQ $0x03, R9 + CMPQ DI, BX + JAE lz4s_snappy_corrupt + INCQ DX + INCQ DI + TESTQ R8, R8 + JZ lz4s_snappy_lits_done + LEAQ (AX)(R8*1), R10 + CMPQ R10, CX + JAE lz4s_snappy_dstfull + ADDQ R8, SI + LEAL -1(R8), R10 + CMPL R10, $0x3c + JB one_byte_lz4s_snappy + CMPL R10, $0x00000100 + JB two_bytes_lz4s_snappy + CMPL R10, $0x00010000 + JB three_bytes_lz4s_snappy + CMPL R10, $0x01000000 + JB four_bytes_lz4s_snappy + MOVB $0xfc, (AX) + MOVL R10, 1(AX) + ADDQ $0x05, AX + JMP memmove_long_lz4s_snappy + +four_bytes_lz4s_snappy: + MOVL R10, R11 + SHRL $0x10, R11 + MOVB $0xf8, (AX) + MOVW R10, 1(AX) + MOVB R11, 3(AX) + ADDQ $0x04, AX + JMP memmove_long_lz4s_snappy + +three_bytes_lz4s_snappy: + MOVB $0xf4, (AX) + MOVW R10, 1(AX) + ADDQ $0x03, AX + JMP memmove_long_lz4s_snappy + +two_bytes_lz4s_snappy: + MOVB $0xf0, (AX) + MOVB R10, 1(AX) + ADDQ $0x02, AX + CMPL R10, $0x40 + JB memmove_lz4s_snappy + JMP memmove_long_lz4s_snappy + +one_byte_lz4s_snappy: + SHLB $0x02, R10 + MOVB R10, (AX) + ADDQ $0x01, AX + +memmove_lz4s_snappy: + LEAQ (AX)(R8*1), R10 + + // genMemMoveShort + CMPQ R8, $0x08 + JBE emit_lit_memmove_lz4s_snappy_memmove_move_8 + CMPQ R8, $0x10 + JBE emit_lit_memmove_lz4s_snappy_memmove_move_8through16 + CMPQ R8, $0x20 + JBE emit_lit_memmove_lz4s_snappy_memmove_move_17through32 + JMP emit_lit_memmove_lz4s_snappy_memmove_move_33through64 + +emit_lit_memmove_lz4s_snappy_memmove_move_8: + MOVQ (DX), R11 + MOVQ R11, (AX) + JMP memmove_end_copy_lz4s_snappy + +emit_lit_memmove_lz4s_snappy_memmove_move_8through16: + MOVQ (DX), R11 + MOVQ -8(DX)(R8*1), DX + MOVQ R11, (AX) + MOVQ DX, -8(AX)(R8*1) + JMP memmove_end_copy_lz4s_snappy + +emit_lit_memmove_lz4s_snappy_memmove_move_17through32: + MOVOU (DX), X0 + MOVOU -16(DX)(R8*1), X1 + MOVOU X0, (AX) + MOVOU X1, -16(AX)(R8*1) + JMP memmove_end_copy_lz4s_snappy + +emit_lit_memmove_lz4s_snappy_memmove_move_33through64: + MOVOU (DX), X0 + MOVOU 16(DX), X1 + MOVOU -32(DX)(R8*1), X2 + MOVOU -16(DX)(R8*1), X3 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + +memmove_end_copy_lz4s_snappy: + MOVQ R10, AX + JMP lz4s_snappy_lits_emit_done + +memmove_long_lz4s_snappy: + LEAQ (AX)(R8*1), R10 + + // genMemMoveLong + MOVOU (DX), X0 + MOVOU 16(DX), X1 + MOVOU -32(DX)(R8*1), X2 + MOVOU -16(DX)(R8*1), X3 + MOVQ R8, R12 + SHRQ $0x05, R12 + MOVQ AX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 + JA emit_lit_memmove_long_lz4s_snappylarge_forward_sse_loop_32 + LEAQ -32(DX)(R13*1), R11 + LEAQ -32(AX)(R13*1), R14 + +emit_lit_memmove_long_lz4s_snappylarge_big_loop_back: + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 + ADDQ $0x20, R13 + DECQ R12 + JNA emit_lit_memmove_long_lz4s_snappylarge_big_loop_back + +emit_lit_memmove_long_lz4s_snappylarge_forward_sse_loop_32: + MOVOU -32(DX)(R13*1), X4 + MOVOU -16(DX)(R13*1), X5 + MOVOA X4, -32(AX)(R13*1) + MOVOA X5, -16(AX)(R13*1) + ADDQ $0x20, R13 + CMPQ R8, R13 + JAE emit_lit_memmove_long_lz4s_snappylarge_forward_sse_loop_32 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ R10, AX + +lz4s_snappy_lits_emit_done: + MOVQ DI, DX + +lz4s_snappy_lits_done: + CMPQ DX, BX + JNE lz4s_snappy_match + CMPQ R9, $0x03 + JEQ lz4s_snappy_done + JMP lz4s_snappy_corrupt + +lz4s_snappy_match: + CMPQ R9, $0x03 + JEQ lz4s_snappy_loop + LEAQ 2(DX), DI + CMPQ DI, BX + JAE lz4s_snappy_corrupt + MOVWQZX (DX), R8 + MOVQ DI, DX + TESTQ R8, R8 + JZ lz4s_snappy_corrupt + CMPQ R8, SI + JA lz4s_snappy_corrupt + CMPQ R9, $0x12 + JNE lz4s_snappy_ml_done + +lz4s_snappy_ml_loop: + MOVBQZX (DX), DI + INCQ DX + ADDQ DI, R9 + CMPQ DX, BX + JAE lz4s_snappy_corrupt + CMPQ DI, $0xff + JEQ lz4s_snappy_ml_loop + +lz4s_snappy_ml_done: + ADDQ R9, SI + + // emitCopy +two_byte_offset_lz4_s2: + CMPL R9, $0x40 + JBE two_byte_offset_short_lz4_s2 + MOVB $0xee, (AX) + MOVW R8, 1(AX) + LEAL -60(R9), R9 + ADDQ $0x03, AX + CMPQ AX, CX + JAE lz4s_snappy_loop + JMP two_byte_offset_lz4_s2 + +two_byte_offset_short_lz4_s2: + MOVL R9, DI + SHLL $0x02, DI + CMPL R9, $0x0c + JAE emit_copy_three_lz4_s2 + CMPL R8, $0x00000800 + JAE emit_copy_three_lz4_s2 + LEAL -15(DI), DI + MOVB R8, 1(AX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, DI + MOVB DI, (AX) + ADDQ $0x02, AX + JMP lz4s_snappy_loop + +emit_copy_three_lz4_s2: + LEAL -2(DI), DI + MOVB DI, (AX) + MOVW R8, 1(AX) + ADDQ $0x03, AX + JMP lz4s_snappy_loop + +lz4s_snappy_done: + MOVQ dst_base+0(FP), CX + SUBQ CX, AX + MOVQ SI, uncompressed+48(FP) + MOVQ AX, dstUsed+56(FP) + RET + +lz4s_snappy_corrupt: + XORQ AX, AX + LEAQ -1(AX), SI + MOVQ SI, uncompressed+48(FP) + RET + +lz4s_snappy_dstfull: + XORQ AX, AX + LEAQ -2(AX), SI + MOVQ SI, uncompressed+48(FP) + RET diff --git a/vendor/github.com/klauspost/compress/s2/lz4convert.go b/vendor/github.com/klauspost/compress/s2/lz4convert.go new file mode 100644 index 00000000..46ed908e --- /dev/null +++ b/vendor/github.com/klauspost/compress/s2/lz4convert.go @@ -0,0 +1,585 @@ +// Copyright (c) 2022 Klaus Post. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package s2 + +import ( + "encoding/binary" + "errors" + "fmt" +) + +// LZ4Converter provides conversion from LZ4 blocks as defined here: +// https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md +type LZ4Converter struct { +} + +// ErrDstTooSmall is returned when provided destination is too small. +var ErrDstTooSmall = errors.New("s2: destination too small") + +// ConvertBlock will convert an LZ4 block and append it as an S2 +// block without block length to dst. +// The uncompressed size is returned as well. +// dst must have capacity to contain the entire compressed block. +func (l *LZ4Converter) ConvertBlock(dst, src []byte) ([]byte, int, error) { + if len(src) == 0 { + return dst, 0, nil + } + const debug = false + const inline = true + const lz4MinMatch = 4 + + s, d := 0, len(dst) + dst = dst[:cap(dst)] + if !debug && hasAmd64Asm { + res, sz := cvtLZ4BlockAsm(dst[d:], src) + if res < 0 { + const ( + errCorrupt = -1 + errDstTooSmall = -2 + ) + switch res { + case errCorrupt: + return nil, 0, ErrCorrupt + case errDstTooSmall: + return nil, 0, ErrDstTooSmall + default: + return nil, 0, fmt.Errorf("unexpected result: %d", res) + } + } + if d+sz > len(dst) { + return nil, 0, ErrDstTooSmall + } + return dst[:d+sz], res, nil + } + + dLimit := len(dst) - 10 + var lastOffset uint16 + var uncompressed int + if debug { + fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst)) + } + + for { + if s >= len(src) { + return dst[:d], 0, ErrCorrupt + } + // Read literal info + token := src[s] + ll := int(token >> 4) + ml := int(lz4MinMatch + (token & 0xf)) + + // If upper nibble is 15, literal length is extended + if token >= 0xf0 { + for { + s++ + if s >= len(src) { + if debug { + fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src)) + } + return dst[:d], 0, ErrCorrupt + } + val := src[s] + ll += int(val) + if val != 255 { + break + } + } + } + // Skip past token + if s+ll >= len(src) { + if debug { + fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src)) + } + return nil, 0, ErrCorrupt + } + s++ + if ll > 0 { + if d+ll > dLimit { + return nil, 0, ErrDstTooSmall + } + if debug { + fmt.Printf("emit %d literals\n", ll) + } + d += emitLiteralGo(dst[d:], src[s:s+ll]) + s += ll + uncompressed += ll + } + + // Check if we are done... + if s == len(src) && ml == lz4MinMatch { + break + } + // 2 byte offset + if s >= len(src)-2 { + if debug { + fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2) + } + return nil, 0, ErrCorrupt + } + offset := binary.LittleEndian.Uint16(src[s:]) + s += 2 + if offset == 0 { + if debug { + fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s) + } + return nil, 0, ErrCorrupt + } + if int(offset) > uncompressed { + if debug { + fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed) + } + return nil, 0, ErrCorrupt + } + + if ml == lz4MinMatch+15 { + for { + if s >= len(src) { + if debug { + fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src)) + } + return nil, 0, ErrCorrupt + } + val := src[s] + s++ + ml += int(val) + if val != 255 { + if s >= len(src) { + if debug { + fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src)) + } + return nil, 0, ErrCorrupt + } + break + } + } + } + if offset == lastOffset { + if debug { + fmt.Printf("emit repeat, length: %d, offset: %d\n", ml, offset) + } + if !inline { + d += emitRepeat16(dst[d:], offset, ml) + } else { + length := ml + dst := dst[d:] + for len(dst) > 5 { + // Repeat offset, make length cheaper + length -= 4 + if length <= 4 { + dst[0] = uint8(length)<<2 | tagCopy1 + dst[1] = 0 + d += 2 + break + } + if length < 8 && offset < 2048 { + // Encode WITH offset + dst[1] = uint8(offset) + dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1 + d += 2 + break + } + if length < (1<<8)+4 { + length -= 4 + dst[2] = uint8(length) + dst[1] = 0 + dst[0] = 5<<2 | tagCopy1 + d += 3 + break + } + if length < (1<<16)+(1<<8) { + length -= 1 << 8 + dst[3] = uint8(length >> 8) + dst[2] = uint8(length >> 0) + dst[1] = 0 + dst[0] = 6<<2 | tagCopy1 + d += 4 + break + } + const maxRepeat = (1 << 24) - 1 + length -= 1 << 16 + left := 0 + if length > maxRepeat { + left = length - maxRepeat + 4 + length = maxRepeat - 4 + } + dst[4] = uint8(length >> 16) + dst[3] = uint8(length >> 8) + dst[2] = uint8(length >> 0) + dst[1] = 0 + dst[0] = 7<<2 | tagCopy1 + if left > 0 { + d += 5 + emitRepeat16(dst[5:], offset, left) + break + } + d += 5 + break + } + } + } else { + if debug { + fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset) + } + if !inline { + d += emitCopy16(dst[d:], offset, ml) + } else { + length := ml + dst := dst[d:] + for len(dst) > 5 { + // Offset no more than 2 bytes. + if length > 64 { + off := 3 + if offset < 2048 { + // emit 8 bytes as tagCopy1, rest as repeats. + dst[1] = uint8(offset) + dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1 + length -= 8 + off = 2 + } else { + // Emit a length 60 copy, encoded as 3 bytes. + // Emit remaining as repeat value (minimum 4 bytes). + dst[2] = uint8(offset >> 8) + dst[1] = uint8(offset) + dst[0] = 59<<2 | tagCopy2 + length -= 60 + } + // Emit remaining as repeats, at least 4 bytes remain. + d += off + emitRepeat16(dst[off:], offset, length) + break + } + if length >= 12 || offset >= 2048 { + // Emit the remaining copy, encoded as 3 bytes. + dst[2] = uint8(offset >> 8) + dst[1] = uint8(offset) + dst[0] = uint8(length-1)<<2 | tagCopy2 + d += 3 + break + } + // Emit the remaining copy, encoded as 2 bytes. + dst[1] = uint8(offset) + dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1 + d += 2 + break + } + } + lastOffset = offset + } + uncompressed += ml + if d > dLimit { + return nil, 0, ErrDstTooSmall + } + } + + return dst[:d], uncompressed, nil +} + +// ConvertBlockSnappy will convert an LZ4 block and append it +// as a Snappy block without block length to dst. +// The uncompressed size is returned as well. +// dst must have capacity to contain the entire compressed block. +func (l *LZ4Converter) ConvertBlockSnappy(dst, src []byte) ([]byte, int, error) { + if len(src) == 0 { + return dst, 0, nil + } + const debug = false + const lz4MinMatch = 4 + + s, d := 0, len(dst) + dst = dst[:cap(dst)] + // Use assembly when possible + if !debug && hasAmd64Asm { + res, sz := cvtLZ4BlockSnappyAsm(dst[d:], src) + if res < 0 { + const ( + errCorrupt = -1 + errDstTooSmall = -2 + ) + switch res { + case errCorrupt: + return nil, 0, ErrCorrupt + case errDstTooSmall: + return nil, 0, ErrDstTooSmall + default: + return nil, 0, fmt.Errorf("unexpected result: %d", res) + } + } + if d+sz > len(dst) { + return nil, 0, ErrDstTooSmall + } + return dst[:d+sz], res, nil + } + + dLimit := len(dst) - 10 + var uncompressed int + if debug { + fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst)) + } + + for { + if s >= len(src) { + return nil, 0, ErrCorrupt + } + // Read literal info + token := src[s] + ll := int(token >> 4) + ml := int(lz4MinMatch + (token & 0xf)) + + // If upper nibble is 15, literal length is extended + if token >= 0xf0 { + for { + s++ + if s >= len(src) { + if debug { + fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src)) + } + return nil, 0, ErrCorrupt + } + val := src[s] + ll += int(val) + if val != 255 { + break + } + } + } + // Skip past token + if s+ll >= len(src) { + if debug { + fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src)) + } + return nil, 0, ErrCorrupt + } + s++ + if ll > 0 { + if d+ll > dLimit { + return nil, 0, ErrDstTooSmall + } + if debug { + fmt.Printf("emit %d literals\n", ll) + } + d += emitLiteralGo(dst[d:], src[s:s+ll]) + s += ll + uncompressed += ll + } + + // Check if we are done... + if s == len(src) && ml == lz4MinMatch { + break + } + // 2 byte offset + if s >= len(src)-2 { + if debug { + fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2) + } + return nil, 0, ErrCorrupt + } + offset := binary.LittleEndian.Uint16(src[s:]) + s += 2 + if offset == 0 { + if debug { + fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s) + } + return nil, 0, ErrCorrupt + } + if int(offset) > uncompressed { + if debug { + fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed) + } + return nil, 0, ErrCorrupt + } + + if ml == lz4MinMatch+15 { + for { + if s >= len(src) { + if debug { + fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src)) + } + return nil, 0, ErrCorrupt + } + val := src[s] + s++ + ml += int(val) + if val != 255 { + if s >= len(src) { + if debug { + fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src)) + } + return nil, 0, ErrCorrupt + } + break + } + } + } + if debug { + fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset) + } + length := ml + // d += emitCopyNoRepeat(dst[d:], int(offset), ml) + for length > 0 { + if d >= dLimit { + return nil, 0, ErrDstTooSmall + } + + // Offset no more than 2 bytes. + if length > 64 { + // Emit a length 64 copy, encoded as 3 bytes. + dst[d+2] = uint8(offset >> 8) + dst[d+1] = uint8(offset) + dst[d+0] = 63<<2 | tagCopy2 + length -= 64 + d += 3 + continue + } + if length >= 12 || offset >= 2048 || length < 4 { + // Emit the remaining copy, encoded as 3 bytes. + dst[d+2] = uint8(offset >> 8) + dst[d+1] = uint8(offset) + dst[d+0] = uint8(length-1)<<2 | tagCopy2 + d += 3 + break + } + // Emit the remaining copy, encoded as 2 bytes. + dst[d+1] = uint8(offset) + dst[d+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1 + d += 2 + break + } + uncompressed += ml + if d > dLimit { + return nil, 0, ErrDstTooSmall + } + } + + return dst[:d], uncompressed, nil +} + +// emitRepeat writes a repeat chunk and returns the number of bytes written. +// Length must be at least 4 and < 1<<24 +func emitRepeat16(dst []byte, offset uint16, length int) int { + // Repeat offset, make length cheaper + length -= 4 + if length <= 4 { + dst[0] = uint8(length)<<2 | tagCopy1 + dst[1] = 0 + return 2 + } + if length < 8 && offset < 2048 { + // Encode WITH offset + dst[1] = uint8(offset) + dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1 + return 2 + } + if length < (1<<8)+4 { + length -= 4 + dst[2] = uint8(length) + dst[1] = 0 + dst[0] = 5<<2 | tagCopy1 + return 3 + } + if length < (1<<16)+(1<<8) { + length -= 1 << 8 + dst[3] = uint8(length >> 8) + dst[2] = uint8(length >> 0) + dst[1] = 0 + dst[0] = 6<<2 | tagCopy1 + return 4 + } + const maxRepeat = (1 << 24) - 1 + length -= 1 << 16 + left := 0 + if length > maxRepeat { + left = length - maxRepeat + 4 + length = maxRepeat - 4 + } + dst[4] = uint8(length >> 16) + dst[3] = uint8(length >> 8) + dst[2] = uint8(length >> 0) + dst[1] = 0 + dst[0] = 7<<2 | tagCopy1 + if left > 0 { + return 5 + emitRepeat16(dst[5:], offset, left) + } + return 5 +} + +// emitCopy writes a copy chunk and returns the number of bytes written. +// +// It assumes that: +// +// dst is long enough to hold the encoded bytes +// 1 <= offset && offset <= math.MaxUint16 +// 4 <= length && length <= math.MaxUint32 +func emitCopy16(dst []byte, offset uint16, length int) int { + // Offset no more than 2 bytes. + if length > 64 { + off := 3 + if offset < 2048 { + // emit 8 bytes as tagCopy1, rest as repeats. + dst[1] = uint8(offset) + dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1 + length -= 8 + off = 2 + } else { + // Emit a length 60 copy, encoded as 3 bytes. + // Emit remaining as repeat value (minimum 4 bytes). + dst[2] = uint8(offset >> 8) + dst[1] = uint8(offset) + dst[0] = 59<<2 | tagCopy2 + length -= 60 + } + // Emit remaining as repeats, at least 4 bytes remain. + return off + emitRepeat16(dst[off:], offset, length) + } + if length >= 12 || offset >= 2048 { + // Emit the remaining copy, encoded as 3 bytes. + dst[2] = uint8(offset >> 8) + dst[1] = uint8(offset) + dst[0] = uint8(length-1)<<2 | tagCopy2 + return 3 + } + // Emit the remaining copy, encoded as 2 bytes. + dst[1] = uint8(offset) + dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1 + return 2 +} + +// emitLiteral writes a literal chunk and returns the number of bytes written. +// +// It assumes that: +// +// dst is long enough to hold the encoded bytes +// 0 <= len(lit) && len(lit) <= math.MaxUint32 +func emitLiteralGo(dst, lit []byte) int { + if len(lit) == 0 { + return 0 + } + i, n := 0, uint(len(lit)-1) + switch { + case n < 60: + dst[0] = uint8(n)<<2 | tagLiteral + i = 1 + case n < 1<<8: + dst[1] = uint8(n) + dst[0] = 60<<2 | tagLiteral + i = 2 + case n < 1<<16: + dst[2] = uint8(n >> 8) + dst[1] = uint8(n) + dst[0] = 61<<2 | tagLiteral + i = 3 + case n < 1<<24: + dst[3] = uint8(n >> 16) + dst[2] = uint8(n >> 8) + dst[1] = uint8(n) + dst[0] = 62<<2 | tagLiteral + i = 4 + default: + dst[4] = uint8(n >> 24) + dst[3] = uint8(n >> 16) + dst[2] = uint8(n >> 8) + dst[1] = uint8(n) + dst[0] = 63<<2 | tagLiteral + i = 5 + } + return i + copy(dst[i:], lit) +} diff --git a/vendor/github.com/klauspost/compress/s2/lz4sconvert.go b/vendor/github.com/klauspost/compress/s2/lz4sconvert.go new file mode 100644 index 00000000..000f3971 --- /dev/null +++ b/vendor/github.com/klauspost/compress/s2/lz4sconvert.go @@ -0,0 +1,467 @@ +// Copyright (c) 2022 Klaus Post. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package s2 + +import ( + "encoding/binary" + "fmt" +) + +// LZ4sConverter provides conversion from LZ4s. +// (Intel modified LZ4 Blocks) +// https://cdrdv2-public.intel.com/743912/743912-qat-programmers-guide-v2.0.pdf +// LZ4s is a variant of LZ4 block format. LZ4s should be considered as an intermediate compressed block format. +// The LZ4s format is selected when the application sets the compType to CPA_DC_LZ4S in CpaDcSessionSetupData. +// The LZ4s block returned by the Intel® QAT hardware can be used by an external +// software post-processing to generate other compressed data formats. +// The following table lists the differences between LZ4 and LZ4s block format. LZ4s block format uses +// the same high-level formatting as LZ4 block format with the following encoding changes: +// For Min Match of 4 bytes, Copy length value 1-15 means length 4-18 with 18 bytes adding an extra byte. +// ONLY "Min match of 4 bytes" is supported. +type LZ4sConverter struct { +} + +// ConvertBlock will convert an LZ4s block and append it as an S2 +// block without block length to dst. +// The uncompressed size is returned as well. +// dst must have capacity to contain the entire compressed block. +func (l *LZ4sConverter) ConvertBlock(dst, src []byte) ([]byte, int, error) { + if len(src) == 0 { + return dst, 0, nil + } + const debug = false + const inline = true + const lz4MinMatch = 3 + + s, d := 0, len(dst) + dst = dst[:cap(dst)] + if !debug && hasAmd64Asm { + res, sz := cvtLZ4sBlockAsm(dst[d:], src) + if res < 0 { + const ( + errCorrupt = -1 + errDstTooSmall = -2 + ) + switch res { + case errCorrupt: + return nil, 0, ErrCorrupt + case errDstTooSmall: + return nil, 0, ErrDstTooSmall + default: + return nil, 0, fmt.Errorf("unexpected result: %d", res) + } + } + if d+sz > len(dst) { + return nil, 0, ErrDstTooSmall + } + return dst[:d+sz], res, nil + } + + dLimit := len(dst) - 10 + var lastOffset uint16 + var uncompressed int + if debug { + fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst)) + } + + for { + if s >= len(src) { + return dst[:d], 0, ErrCorrupt + } + // Read literal info + token := src[s] + ll := int(token >> 4) + ml := int(lz4MinMatch + (token & 0xf)) + + // If upper nibble is 15, literal length is extended + if token >= 0xf0 { + for { + s++ + if s >= len(src) { + if debug { + fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src)) + } + return dst[:d], 0, ErrCorrupt + } + val := src[s] + ll += int(val) + if val != 255 { + break + } + } + } + // Skip past token + if s+ll >= len(src) { + if debug { + fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src)) + } + return nil, 0, ErrCorrupt + } + s++ + if ll > 0 { + if d+ll > dLimit { + return nil, 0, ErrDstTooSmall + } + if debug { + fmt.Printf("emit %d literals\n", ll) + } + d += emitLiteralGo(dst[d:], src[s:s+ll]) + s += ll + uncompressed += ll + } + + // Check if we are done... + if ml == lz4MinMatch { + if s == len(src) { + break + } + // 0 bytes. + continue + } + // 2 byte offset + if s >= len(src)-2 { + if debug { + fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2) + } + return nil, 0, ErrCorrupt + } + offset := binary.LittleEndian.Uint16(src[s:]) + s += 2 + if offset == 0 { + if debug { + fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s) + } + return nil, 0, ErrCorrupt + } + if int(offset) > uncompressed { + if debug { + fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed) + } + return nil, 0, ErrCorrupt + } + + if ml == lz4MinMatch+15 { + for { + if s >= len(src) { + if debug { + fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src)) + } + return nil, 0, ErrCorrupt + } + val := src[s] + s++ + ml += int(val) + if val != 255 { + if s >= len(src) { + if debug { + fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src)) + } + return nil, 0, ErrCorrupt + } + break + } + } + } + if offset == lastOffset { + if debug { + fmt.Printf("emit repeat, length: %d, offset: %d\n", ml, offset) + } + if !inline { + d += emitRepeat16(dst[d:], offset, ml) + } else { + length := ml + dst := dst[d:] + for len(dst) > 5 { + // Repeat offset, make length cheaper + length -= 4 + if length <= 4 { + dst[0] = uint8(length)<<2 | tagCopy1 + dst[1] = 0 + d += 2 + break + } + if length < 8 && offset < 2048 { + // Encode WITH offset + dst[1] = uint8(offset) + dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1 + d += 2 + break + } + if length < (1<<8)+4 { + length -= 4 + dst[2] = uint8(length) + dst[1] = 0 + dst[0] = 5<<2 | tagCopy1 + d += 3 + break + } + if length < (1<<16)+(1<<8) { + length -= 1 << 8 + dst[3] = uint8(length >> 8) + dst[2] = uint8(length >> 0) + dst[1] = 0 + dst[0] = 6<<2 | tagCopy1 + d += 4 + break + } + const maxRepeat = (1 << 24) - 1 + length -= 1 << 16 + left := 0 + if length > maxRepeat { + left = length - maxRepeat + 4 + length = maxRepeat - 4 + } + dst[4] = uint8(length >> 16) + dst[3] = uint8(length >> 8) + dst[2] = uint8(length >> 0) + dst[1] = 0 + dst[0] = 7<<2 | tagCopy1 + if left > 0 { + d += 5 + emitRepeat16(dst[5:], offset, left) + break + } + d += 5 + break + } + } + } else { + if debug { + fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset) + } + if !inline { + d += emitCopy16(dst[d:], offset, ml) + } else { + length := ml + dst := dst[d:] + for len(dst) > 5 { + // Offset no more than 2 bytes. + if length > 64 { + off := 3 + if offset < 2048 { + // emit 8 bytes as tagCopy1, rest as repeats. + dst[1] = uint8(offset) + dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1 + length -= 8 + off = 2 + } else { + // Emit a length 60 copy, encoded as 3 bytes. + // Emit remaining as repeat value (minimum 4 bytes). + dst[2] = uint8(offset >> 8) + dst[1] = uint8(offset) + dst[0] = 59<<2 | tagCopy2 + length -= 60 + } + // Emit remaining as repeats, at least 4 bytes remain. + d += off + emitRepeat16(dst[off:], offset, length) + break + } + if length >= 12 || offset >= 2048 { + // Emit the remaining copy, encoded as 3 bytes. + dst[2] = uint8(offset >> 8) + dst[1] = uint8(offset) + dst[0] = uint8(length-1)<<2 | tagCopy2 + d += 3 + break + } + // Emit the remaining copy, encoded as 2 bytes. + dst[1] = uint8(offset) + dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1 + d += 2 + break + } + } + lastOffset = offset + } + uncompressed += ml + if d > dLimit { + return nil, 0, ErrDstTooSmall + } + } + + return dst[:d], uncompressed, nil +} + +// ConvertBlockSnappy will convert an LZ4s block and append it +// as a Snappy block without block length to dst. +// The uncompressed size is returned as well. +// dst must have capacity to contain the entire compressed block. +func (l *LZ4sConverter) ConvertBlockSnappy(dst, src []byte) ([]byte, int, error) { + if len(src) == 0 { + return dst, 0, nil + } + const debug = false + const lz4MinMatch = 3 + + s, d := 0, len(dst) + dst = dst[:cap(dst)] + // Use assembly when possible + if !debug && hasAmd64Asm { + res, sz := cvtLZ4sBlockSnappyAsm(dst[d:], src) + if res < 0 { + const ( + errCorrupt = -1 + errDstTooSmall = -2 + ) + switch res { + case errCorrupt: + return nil, 0, ErrCorrupt + case errDstTooSmall: + return nil, 0, ErrDstTooSmall + default: + return nil, 0, fmt.Errorf("unexpected result: %d", res) + } + } + if d+sz > len(dst) { + return nil, 0, ErrDstTooSmall + } + return dst[:d+sz], res, nil + } + + dLimit := len(dst) - 10 + var uncompressed int + if debug { + fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst)) + } + + for { + if s >= len(src) { + return nil, 0, ErrCorrupt + } + // Read literal info + token := src[s] + ll := int(token >> 4) + ml := int(lz4MinMatch + (token & 0xf)) + + // If upper nibble is 15, literal length is extended + if token >= 0xf0 { + for { + s++ + if s >= len(src) { + if debug { + fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src)) + } + return nil, 0, ErrCorrupt + } + val := src[s] + ll += int(val) + if val != 255 { + break + } + } + } + // Skip past token + if s+ll >= len(src) { + if debug { + fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src)) + } + return nil, 0, ErrCorrupt + } + s++ + if ll > 0 { + if d+ll > dLimit { + return nil, 0, ErrDstTooSmall + } + if debug { + fmt.Printf("emit %d literals\n", ll) + } + d += emitLiteralGo(dst[d:], src[s:s+ll]) + s += ll + uncompressed += ll + } + + // Check if we are done... + if ml == lz4MinMatch { + if s == len(src) { + break + } + // 0 bytes. + continue + } + // 2 byte offset + if s >= len(src)-2 { + if debug { + fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2) + } + return nil, 0, ErrCorrupt + } + offset := binary.LittleEndian.Uint16(src[s:]) + s += 2 + if offset == 0 { + if debug { + fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s) + } + return nil, 0, ErrCorrupt + } + if int(offset) > uncompressed { + if debug { + fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed) + } + return nil, 0, ErrCorrupt + } + + if ml == lz4MinMatch+15 { + for { + if s >= len(src) { + if debug { + fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src)) + } + return nil, 0, ErrCorrupt + } + val := src[s] + s++ + ml += int(val) + if val != 255 { + if s >= len(src) { + if debug { + fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src)) + } + return nil, 0, ErrCorrupt + } + break + } + } + } + if debug { + fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset) + } + length := ml + // d += emitCopyNoRepeat(dst[d:], int(offset), ml) + for length > 0 { + if d >= dLimit { + return nil, 0, ErrDstTooSmall + } + + // Offset no more than 2 bytes. + if length > 64 { + // Emit a length 64 copy, encoded as 3 bytes. + dst[d+2] = uint8(offset >> 8) + dst[d+1] = uint8(offset) + dst[d+0] = 63<<2 | tagCopy2 + length -= 64 + d += 3 + continue + } + if length >= 12 || offset >= 2048 || length < 4 { + // Emit the remaining copy, encoded as 3 bytes. + dst[d+2] = uint8(offset >> 8) + dst[d+1] = uint8(offset) + dst[d+0] = uint8(length-1)<<2 | tagCopy2 + d += 3 + break + } + // Emit the remaining copy, encoded as 2 bytes. + dst[d+1] = uint8(offset) + dst[d+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1 + d += 2 + break + } + uncompressed += ml + if d > dLimit { + return nil, 0, ErrDstTooSmall + } + } + + return dst[:d], uncompressed, nil +} diff --git a/vendor/github.com/klauspost/compress/s2/reader.go b/vendor/github.com/klauspost/compress/s2/reader.go new file mode 100644 index 00000000..8b84baa6 --- /dev/null +++ b/vendor/github.com/klauspost/compress/s2/reader.go @@ -0,0 +1,1055 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Copyright (c) 2019+ Klaus Post. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package s2 + +import ( + "errors" + "fmt" + "io" + "io/ioutil" + "math" + "runtime" + "sync" +) + +// ErrCantSeek is returned if the stream cannot be seeked. +type ErrCantSeek struct { + Reason string +} + +// Error returns the error as string. +func (e ErrCantSeek) Error() string { + return fmt.Sprintf("s2: Can't seek because %s", e.Reason) +} + +// NewReader returns a new Reader that decompresses from r, using the framing +// format described at +// https://github.com/google/snappy/blob/master/framing_format.txt with S2 changes. +func NewReader(r io.Reader, opts ...ReaderOption) *Reader { + nr := Reader{ + r: r, + maxBlock: maxBlockSize, + } + for _, opt := range opts { + if err := opt(&nr); err != nil { + nr.err = err + return &nr + } + } + nr.maxBufSize = MaxEncodedLen(nr.maxBlock) + checksumSize + if nr.lazyBuf > 0 { + nr.buf = make([]byte, MaxEncodedLen(nr.lazyBuf)+checksumSize) + } else { + nr.buf = make([]byte, MaxEncodedLen(defaultBlockSize)+checksumSize) + } + nr.readHeader = nr.ignoreStreamID + nr.paramsOK = true + return &nr +} + +// ReaderOption is an option for creating a decoder. +type ReaderOption func(*Reader) error + +// ReaderMaxBlockSize allows to control allocations if the stream +// has been compressed with a smaller WriterBlockSize, or with the default 1MB. +// Blocks must be this size or smaller to decompress, +// otherwise the decoder will return ErrUnsupported. +// +// For streams compressed with Snappy this can safely be set to 64KB (64 << 10). +// +// Default is the maximum limit of 4MB. +func ReaderMaxBlockSize(blockSize int) ReaderOption { + return func(r *Reader) error { + if blockSize > maxBlockSize || blockSize <= 0 { + return errors.New("s2: block size too large. Must be <= 4MB and > 0") + } + if r.lazyBuf == 0 && blockSize < defaultBlockSize { + r.lazyBuf = blockSize + } + r.maxBlock = blockSize + return nil + } +} + +// ReaderAllocBlock allows to control upfront stream allocations +// and not allocate for frames bigger than this initially. +// If frames bigger than this is seen a bigger buffer will be allocated. +// +// Default is 1MB, which is default output size. +func ReaderAllocBlock(blockSize int) ReaderOption { + return func(r *Reader) error { + if blockSize > maxBlockSize || blockSize < 1024 { + return errors.New("s2: invalid ReaderAllocBlock. Must be <= 4MB and >= 1024") + } + r.lazyBuf = blockSize + return nil + } +} + +// ReaderIgnoreStreamIdentifier will make the reader skip the expected +// stream identifier at the beginning of the stream. +// This can be used when serving a stream that has been forwarded to a specific point. +func ReaderIgnoreStreamIdentifier() ReaderOption { + return func(r *Reader) error { + r.ignoreStreamID = true + return nil + } +} + +// ReaderSkippableCB will register a callback for chuncks with the specified ID. +// ID must be a Reserved skippable chunks ID, 0x80-0xfd (inclusive). +// For each chunk with the ID, the callback is called with the content. +// Any returned non-nil error will abort decompression. +// Only one callback per ID is supported, latest sent will be used. +func ReaderSkippableCB(id uint8, fn func(r io.Reader) error) ReaderOption { + return func(r *Reader) error { + if id < 0x80 || id > 0xfd { + return fmt.Errorf("ReaderSkippableCB: Invalid id provided, must be 0x80-0xfd (inclusive)") + } + r.skippableCB[id] = fn + return nil + } +} + +// ReaderIgnoreCRC will make the reader skip CRC calculation and checks. +func ReaderIgnoreCRC() ReaderOption { + return func(r *Reader) error { + r.ignoreCRC = true + return nil + } +} + +// Reader is an io.Reader that can read Snappy-compressed bytes. +type Reader struct { + r io.Reader + err error + decoded []byte + buf []byte + skippableCB [0x80]func(r io.Reader) error + blockStart int64 // Uncompressed offset at start of current. + index *Index + + // decoded[i:j] contains decoded bytes that have not yet been passed on. + i, j int + // maximum block size allowed. + maxBlock int + // maximum expected buffer size. + maxBufSize int + // alloc a buffer this size if > 0. + lazyBuf int + readHeader bool + paramsOK bool + snappyFrame bool + ignoreStreamID bool + ignoreCRC bool +} + +// ensureBufferSize will ensure that the buffer can take at least n bytes. +// If false is returned the buffer exceeds maximum allowed size. +func (r *Reader) ensureBufferSize(n int) bool { + if n > r.maxBufSize { + r.err = ErrCorrupt + return false + } + if cap(r.buf) >= n { + return true + } + // Realloc buffer. + r.buf = make([]byte, n) + return true +} + +// Reset discards any buffered data, resets all state, and switches the Snappy +// reader to read from r. This permits reusing a Reader rather than allocating +// a new one. +func (r *Reader) Reset(reader io.Reader) { + if !r.paramsOK { + return + } + r.index = nil + r.r = reader + r.err = nil + r.i = 0 + r.j = 0 + r.blockStart = 0 + r.readHeader = r.ignoreStreamID +} + +func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) { + if _, r.err = io.ReadFull(r.r, p); r.err != nil { + if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) { + r.err = ErrCorrupt + } + return false + } + return true +} + +// skippable will skip n bytes. +// If the supplied reader supports seeking that is used. +// tmp is used as a temporary buffer for reading. +// The supplied slice does not need to be the size of the read. +func (r *Reader) skippable(tmp []byte, n int, allowEOF bool, id uint8) (ok bool) { + if id < 0x80 { + r.err = fmt.Errorf("interbal error: skippable id < 0x80") + return false + } + if fn := r.skippableCB[id-0x80]; fn != nil { + rd := io.LimitReader(r.r, int64(n)) + r.err = fn(rd) + if r.err != nil { + return false + } + _, r.err = io.CopyBuffer(ioutil.Discard, rd, tmp) + return r.err == nil + } + if rs, ok := r.r.(io.ReadSeeker); ok { + _, err := rs.Seek(int64(n), io.SeekCurrent) + if err == nil { + return true + } + if err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) { + r.err = ErrCorrupt + return false + } + } + for n > 0 { + if n < len(tmp) { + tmp = tmp[:n] + } + if _, r.err = io.ReadFull(r.r, tmp); r.err != nil { + if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) { + r.err = ErrCorrupt + } + return false + } + n -= len(tmp) + } + return true +} + +// Read satisfies the io.Reader interface. +func (r *Reader) Read(p []byte) (int, error) { + if r.err != nil { + return 0, r.err + } + for { + if r.i < r.j { + n := copy(p, r.decoded[r.i:r.j]) + r.i += n + return n, nil + } + if !r.readFull(r.buf[:4], true) { + return 0, r.err + } + chunkType := r.buf[0] + if !r.readHeader { + if chunkType != chunkTypeStreamIdentifier { + r.err = ErrCorrupt + return 0, r.err + } + r.readHeader = true + } + chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16 + + // The chunk types are specified at + // https://github.com/google/snappy/blob/master/framing_format.txt + switch chunkType { + case chunkTypeCompressedData: + r.blockStart += int64(r.j) + // Section 4.2. Compressed data (chunk type 0x00). + if chunkLen < checksumSize { + r.err = ErrCorrupt + return 0, r.err + } + if !r.ensureBufferSize(chunkLen) { + if r.err == nil { + r.err = ErrUnsupported + } + return 0, r.err + } + buf := r.buf[:chunkLen] + if !r.readFull(buf, false) { + return 0, r.err + } + checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 + buf = buf[checksumSize:] + + n, err := DecodedLen(buf) + if err != nil { + r.err = err + return 0, r.err + } + if r.snappyFrame && n > maxSnappyBlockSize { + r.err = ErrCorrupt + return 0, r.err + } + + if n > len(r.decoded) { + if n > r.maxBlock { + r.err = ErrCorrupt + return 0, r.err + } + r.decoded = make([]byte, n) + } + if _, err := Decode(r.decoded, buf); err != nil { + r.err = err + return 0, r.err + } + if !r.ignoreCRC && crc(r.decoded[:n]) != checksum { + r.err = ErrCRC + return 0, r.err + } + r.i, r.j = 0, n + continue + + case chunkTypeUncompressedData: + r.blockStart += int64(r.j) + // Section 4.3. Uncompressed data (chunk type 0x01). + if chunkLen < checksumSize { + r.err = ErrCorrupt + return 0, r.err + } + if !r.ensureBufferSize(chunkLen) { + if r.err == nil { + r.err = ErrUnsupported + } + return 0, r.err + } + buf := r.buf[:checksumSize] + if !r.readFull(buf, false) { + return 0, r.err + } + checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 + // Read directly into r.decoded instead of via r.buf. + n := chunkLen - checksumSize + if r.snappyFrame && n > maxSnappyBlockSize { + r.err = ErrCorrupt + return 0, r.err + } + if n > len(r.decoded) { + if n > r.maxBlock { + r.err = ErrCorrupt + return 0, r.err + } + r.decoded = make([]byte, n) + } + if !r.readFull(r.decoded[:n], false) { + return 0, r.err + } + if !r.ignoreCRC && crc(r.decoded[:n]) != checksum { + r.err = ErrCRC + return 0, r.err + } + r.i, r.j = 0, n + continue + + case chunkTypeStreamIdentifier: + // Section 4.1. Stream identifier (chunk type 0xff). + if chunkLen != len(magicBody) { + r.err = ErrCorrupt + return 0, r.err + } + if !r.readFull(r.buf[:len(magicBody)], false) { + return 0, r.err + } + if string(r.buf[:len(magicBody)]) != magicBody { + if string(r.buf[:len(magicBody)]) != magicBodySnappy { + r.err = ErrCorrupt + return 0, r.err + } else { + r.snappyFrame = true + } + } else { + r.snappyFrame = false + } + continue + } + + if chunkType <= 0x7f { + // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f). + // fmt.Printf("ERR chunktype: 0x%x\n", chunkType) + r.err = ErrUnsupported + return 0, r.err + } + // Section 4.4 Padding (chunk type 0xfe). + // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd). + if chunkLen > maxChunkSize { + // fmt.Printf("ERR chunkLen: 0x%x\n", chunkLen) + r.err = ErrUnsupported + return 0, r.err + } + + // fmt.Printf("skippable: ID: 0x%x, len: 0x%x\n", chunkType, chunkLen) + if !r.skippable(r.buf, chunkLen, false, chunkType) { + return 0, r.err + } + } +} + +// DecodeConcurrent will decode the full stream to w. +// This function should not be combined with reading, seeking or other operations. +// Up to 'concurrent' goroutines will be used. +// If <= 0, runtime.NumCPU will be used. +// On success the number of bytes decompressed nil and is returned. +// This is mainly intended for bigger streams. +func (r *Reader) DecodeConcurrent(w io.Writer, concurrent int) (written int64, err error) { + if r.i > 0 || r.j > 0 || r.blockStart > 0 { + return 0, errors.New("DecodeConcurrent called after ") + } + if concurrent <= 0 { + concurrent = runtime.NumCPU() + } + + // Write to output + var errMu sync.Mutex + var aErr error + setErr := func(e error) (ok bool) { + errMu.Lock() + defer errMu.Unlock() + if e == nil { + return aErr == nil + } + if aErr == nil { + aErr = e + } + return false + } + hasErr := func() (ok bool) { + errMu.Lock() + v := aErr != nil + errMu.Unlock() + return v + } + + var aWritten int64 + toRead := make(chan []byte, concurrent) + writtenBlocks := make(chan []byte, concurrent) + queue := make(chan chan []byte, concurrent) + reUse := make(chan chan []byte, concurrent) + for i := 0; i < concurrent; i++ { + toRead <- make([]byte, 0, r.maxBufSize) + writtenBlocks <- make([]byte, 0, r.maxBufSize) + reUse <- make(chan []byte, 1) + } + // Writer + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + for toWrite := range queue { + entry := <-toWrite + reUse <- toWrite + if hasErr() { + writtenBlocks <- entry + continue + } + n, err := w.Write(entry) + want := len(entry) + writtenBlocks <- entry + if err != nil { + setErr(err) + continue + } + if n != want { + setErr(io.ErrShortWrite) + continue + } + aWritten += int64(n) + } + }() + + // Reader + defer func() { + close(queue) + if r.err != nil { + err = r.err + setErr(r.err) + } + wg.Wait() + if err == nil { + err = aErr + } + written = aWritten + }() + + for !hasErr() { + if !r.readFull(r.buf[:4], true) { + if r.err == io.EOF { + r.err = nil + } + return 0, r.err + } + chunkType := r.buf[0] + if !r.readHeader { + if chunkType != chunkTypeStreamIdentifier { + r.err = ErrCorrupt + return 0, r.err + } + r.readHeader = true + } + chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16 + + // The chunk types are specified at + // https://github.com/google/snappy/blob/master/framing_format.txt + switch chunkType { + case chunkTypeCompressedData: + r.blockStart += int64(r.j) + // Section 4.2. Compressed data (chunk type 0x00). + if chunkLen < checksumSize { + r.err = ErrCorrupt + return 0, r.err + } + if chunkLen > r.maxBufSize { + r.err = ErrCorrupt + return 0, r.err + } + orgBuf := <-toRead + buf := orgBuf[:chunkLen] + + if !r.readFull(buf, false) { + return 0, r.err + } + + checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 + buf = buf[checksumSize:] + + n, err := DecodedLen(buf) + if err != nil { + r.err = err + return 0, r.err + } + if r.snappyFrame && n > maxSnappyBlockSize { + r.err = ErrCorrupt + return 0, r.err + } + + if n > r.maxBlock { + r.err = ErrCorrupt + return 0, r.err + } + wg.Add(1) + + decoded := <-writtenBlocks + entry := <-reUse + queue <- entry + go func() { + defer wg.Done() + decoded = decoded[:n] + _, err := Decode(decoded, buf) + toRead <- orgBuf + if err != nil { + writtenBlocks <- decoded + setErr(err) + return + } + if !r.ignoreCRC && crc(decoded) != checksum { + writtenBlocks <- decoded + setErr(ErrCRC) + return + } + entry <- decoded + }() + continue + + case chunkTypeUncompressedData: + + // Section 4.3. Uncompressed data (chunk type 0x01). + if chunkLen < checksumSize { + r.err = ErrCorrupt + return 0, r.err + } + if chunkLen > r.maxBufSize { + r.err = ErrCorrupt + return 0, r.err + } + // Grab write buffer + orgBuf := <-writtenBlocks + buf := orgBuf[:checksumSize] + if !r.readFull(buf, false) { + return 0, r.err + } + checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 + // Read content. + n := chunkLen - checksumSize + + if r.snappyFrame && n > maxSnappyBlockSize { + r.err = ErrCorrupt + return 0, r.err + } + if n > r.maxBlock { + r.err = ErrCorrupt + return 0, r.err + } + // Read uncompressed + buf = orgBuf[:n] + if !r.readFull(buf, false) { + return 0, r.err + } + + if !r.ignoreCRC && crc(buf) != checksum { + r.err = ErrCRC + return 0, r.err + } + entry := <-reUse + queue <- entry + entry <- buf + continue + + case chunkTypeStreamIdentifier: + // Section 4.1. Stream identifier (chunk type 0xff). + if chunkLen != len(magicBody) { + r.err = ErrCorrupt + return 0, r.err + } + if !r.readFull(r.buf[:len(magicBody)], false) { + return 0, r.err + } + if string(r.buf[:len(magicBody)]) != magicBody { + if string(r.buf[:len(magicBody)]) != magicBodySnappy { + r.err = ErrCorrupt + return 0, r.err + } else { + r.snappyFrame = true + } + } else { + r.snappyFrame = false + } + continue + } + + if chunkType <= 0x7f { + // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f). + // fmt.Printf("ERR chunktype: 0x%x\n", chunkType) + r.err = ErrUnsupported + return 0, r.err + } + // Section 4.4 Padding (chunk type 0xfe). + // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd). + if chunkLen > maxChunkSize { + // fmt.Printf("ERR chunkLen: 0x%x\n", chunkLen) + r.err = ErrUnsupported + return 0, r.err + } + + // fmt.Printf("skippable: ID: 0x%x, len: 0x%x\n", chunkType, chunkLen) + if !r.skippable(r.buf, chunkLen, false, chunkType) { + return 0, r.err + } + } + return 0, r.err +} + +// Skip will skip n bytes forward in the decompressed output. +// For larger skips this consumes less CPU and is faster than reading output and discarding it. +// CRC is not checked on skipped blocks. +// io.ErrUnexpectedEOF is returned if the stream ends before all bytes have been skipped. +// If a decoding error is encountered subsequent calls to Read will also fail. +func (r *Reader) Skip(n int64) error { + if n < 0 { + return errors.New("attempted negative skip") + } + if r.err != nil { + return r.err + } + + for n > 0 { + if r.i < r.j { + // Skip in buffer. + // decoded[i:j] contains decoded bytes that have not yet been passed on. + left := int64(r.j - r.i) + if left >= n { + tmp := int64(r.i) + n + if tmp > math.MaxInt32 { + return errors.New("s2: internal overflow in skip") + } + r.i = int(tmp) + return nil + } + n -= int64(r.j - r.i) + r.i = r.j + } + + // Buffer empty; read blocks until we have content. + if !r.readFull(r.buf[:4], true) { + if r.err == io.EOF { + r.err = io.ErrUnexpectedEOF + } + return r.err + } + chunkType := r.buf[0] + if !r.readHeader { + if chunkType != chunkTypeStreamIdentifier { + r.err = ErrCorrupt + return r.err + } + r.readHeader = true + } + chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16 + + // The chunk types are specified at + // https://github.com/google/snappy/blob/master/framing_format.txt + switch chunkType { + case chunkTypeCompressedData: + r.blockStart += int64(r.j) + // Section 4.2. Compressed data (chunk type 0x00). + if chunkLen < checksumSize { + r.err = ErrCorrupt + return r.err + } + if !r.ensureBufferSize(chunkLen) { + if r.err == nil { + r.err = ErrUnsupported + } + return r.err + } + buf := r.buf[:chunkLen] + if !r.readFull(buf, false) { + return r.err + } + checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 + buf = buf[checksumSize:] + + dLen, err := DecodedLen(buf) + if err != nil { + r.err = err + return r.err + } + if dLen > r.maxBlock { + r.err = ErrCorrupt + return r.err + } + // Check if destination is within this block + if int64(dLen) > n { + if len(r.decoded) < dLen { + r.decoded = make([]byte, dLen) + } + if _, err := Decode(r.decoded, buf); err != nil { + r.err = err + return r.err + } + if crc(r.decoded[:dLen]) != checksum { + r.err = ErrCorrupt + return r.err + } + } else { + // Skip block completely + n -= int64(dLen) + r.blockStart += int64(dLen) + dLen = 0 + } + r.i, r.j = 0, dLen + continue + case chunkTypeUncompressedData: + r.blockStart += int64(r.j) + // Section 4.3. Uncompressed data (chunk type 0x01). + if chunkLen < checksumSize { + r.err = ErrCorrupt + return r.err + } + if !r.ensureBufferSize(chunkLen) { + if r.err != nil { + r.err = ErrUnsupported + } + return r.err + } + buf := r.buf[:checksumSize] + if !r.readFull(buf, false) { + return r.err + } + checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 + // Read directly into r.decoded instead of via r.buf. + n2 := chunkLen - checksumSize + if n2 > len(r.decoded) { + if n2 > r.maxBlock { + r.err = ErrCorrupt + return r.err + } + r.decoded = make([]byte, n2) + } + if !r.readFull(r.decoded[:n2], false) { + return r.err + } + if int64(n2) < n { + if crc(r.decoded[:n2]) != checksum { + r.err = ErrCorrupt + return r.err + } + } + r.i, r.j = 0, n2 + continue + case chunkTypeStreamIdentifier: + // Section 4.1. Stream identifier (chunk type 0xff). + if chunkLen != len(magicBody) { + r.err = ErrCorrupt + return r.err + } + if !r.readFull(r.buf[:len(magicBody)], false) { + return r.err + } + if string(r.buf[:len(magicBody)]) != magicBody { + if string(r.buf[:len(magicBody)]) != magicBodySnappy { + r.err = ErrCorrupt + return r.err + } + } + + continue + } + + if chunkType <= 0x7f { + // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f). + r.err = ErrUnsupported + return r.err + } + if chunkLen > maxChunkSize { + r.err = ErrUnsupported + return r.err + } + // Section 4.4 Padding (chunk type 0xfe). + // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd). + if !r.skippable(r.buf, chunkLen, false, chunkType) { + return r.err + } + } + return nil +} + +// ReadSeeker provides random or forward seeking in compressed content. +// See Reader.ReadSeeker +type ReadSeeker struct { + *Reader + readAtMu sync.Mutex +} + +// ReadSeeker will return an io.ReadSeeker and io.ReaderAt +// compatible version of the reader. +// If 'random' is specified the returned io.Seeker can be used for +// random seeking, otherwise only forward seeking is supported. +// Enabling random seeking requires the original input to support +// the io.Seeker interface. +// A custom index can be specified which will be used if supplied. +// When using a custom index, it will not be read from the input stream. +// The ReadAt position will affect regular reads and the current position of Seek. +// So using Read after ReadAt will continue from where the ReadAt stopped. +// No functions should be used concurrently. +// The returned ReadSeeker contains a shallow reference to the existing Reader, +// meaning changes performed to one is reflected in the other. +func (r *Reader) ReadSeeker(random bool, index []byte) (*ReadSeeker, error) { + // Read index if provided. + if len(index) != 0 { + if r.index == nil { + r.index = &Index{} + } + if _, err := r.index.Load(index); err != nil { + return nil, ErrCantSeek{Reason: "loading index returned: " + err.Error()} + } + } + + // Check if input is seekable + rs, ok := r.r.(io.ReadSeeker) + if !ok { + if !random { + return &ReadSeeker{Reader: r}, nil + } + return nil, ErrCantSeek{Reason: "input stream isn't seekable"} + } + + if r.index != nil { + // Seekable and index, ok... + return &ReadSeeker{Reader: r}, nil + } + + // Load from stream. + r.index = &Index{} + + // Read current position. + pos, err := rs.Seek(0, io.SeekCurrent) + if err != nil { + return nil, ErrCantSeek{Reason: "seeking input returned: " + err.Error()} + } + err = r.index.LoadStream(rs) + if err != nil { + if err == ErrUnsupported { + // If we don't require random seeking, reset input and return. + if !random { + _, err = rs.Seek(pos, io.SeekStart) + if err != nil { + return nil, ErrCantSeek{Reason: "resetting stream returned: " + err.Error()} + } + r.index = nil + return &ReadSeeker{Reader: r}, nil + } + return nil, ErrCantSeek{Reason: "input stream does not contain an index"} + } + return nil, ErrCantSeek{Reason: "reading index returned: " + err.Error()} + } + + // reset position. + _, err = rs.Seek(pos, io.SeekStart) + if err != nil { + return nil, ErrCantSeek{Reason: "seeking input returned: " + err.Error()} + } + return &ReadSeeker{Reader: r}, nil +} + +// Seek allows seeking in compressed data. +func (r *ReadSeeker) Seek(offset int64, whence int) (int64, error) { + if r.err != nil { + if !errors.Is(r.err, io.EOF) { + return 0, r.err + } + // Reset on EOF + r.err = nil + } + + // Calculate absolute offset. + absOffset := offset + + switch whence { + case io.SeekStart: + case io.SeekCurrent: + absOffset = r.blockStart + int64(r.i) + offset + case io.SeekEnd: + if r.index == nil { + return 0, ErrUnsupported + } + absOffset = r.index.TotalUncompressed + offset + default: + r.err = ErrUnsupported + return 0, r.err + } + + if absOffset < 0 { + return 0, errors.New("seek before start of file") + } + + if !r.readHeader { + // Make sure we read the header. + _, r.err = r.Read([]byte{}) + if r.err != nil { + return 0, r.err + } + } + + // If we are inside current block no need to seek. + // This includes no offset changes. + if absOffset >= r.blockStart && absOffset < r.blockStart+int64(r.j) { + r.i = int(absOffset - r.blockStart) + return r.blockStart + int64(r.i), nil + } + + rs, ok := r.r.(io.ReadSeeker) + if r.index == nil || !ok { + currOffset := r.blockStart + int64(r.i) + if absOffset >= currOffset { + err := r.Skip(absOffset - currOffset) + return r.blockStart + int64(r.i), err + } + return 0, ErrUnsupported + } + + // We can seek and we have an index. + c, u, err := r.index.Find(absOffset) + if err != nil { + return r.blockStart + int64(r.i), err + } + + // Seek to next block + _, err = rs.Seek(c, io.SeekStart) + if err != nil { + return 0, err + } + + r.i = r.j // Remove rest of current block. + r.blockStart = u - int64(r.j) // Adjust current block start for accounting. + if u < absOffset { + // Forward inside block + return absOffset, r.Skip(absOffset - u) + } + if u > absOffset { + return 0, fmt.Errorf("s2 seek: (internal error) u (%d) > absOffset (%d)", u, absOffset) + } + return absOffset, nil +} + +// ReadAt reads len(p) bytes into p starting at offset off in the +// underlying input source. It returns the number of bytes +// read (0 <= n <= len(p)) and any error encountered. +// +// When ReadAt returns n < len(p), it returns a non-nil error +// explaining why more bytes were not returned. In this respect, +// ReadAt is stricter than Read. +// +// Even if ReadAt returns n < len(p), it may use all of p as scratch +// space during the call. If some data is available but not len(p) bytes, +// ReadAt blocks until either all the data is available or an error occurs. +// In this respect ReadAt is different from Read. +// +// If the n = len(p) bytes returned by ReadAt are at the end of the +// input source, ReadAt may return either err == EOF or err == nil. +// +// If ReadAt is reading from an input source with a seek offset, +// ReadAt should not affect nor be affected by the underlying +// seek offset. +// +// Clients of ReadAt can execute parallel ReadAt calls on the +// same input source. This is however not recommended. +func (r *ReadSeeker) ReadAt(p []byte, offset int64) (int, error) { + r.readAtMu.Lock() + defer r.readAtMu.Unlock() + _, err := r.Seek(offset, io.SeekStart) + if err != nil { + return 0, err + } + n := 0 + for n < len(p) { + n2, err := r.Read(p[n:]) + if err != nil { + // This will include io.EOF + return n + n2, err + } + n += n2 + } + return n, nil +} + +// ReadByte satisfies the io.ByteReader interface. +func (r *Reader) ReadByte() (byte, error) { + if r.err != nil { + return 0, r.err + } + if r.i < r.j { + c := r.decoded[r.i] + r.i++ + return c, nil + } + var tmp [1]byte + for i := 0; i < 10; i++ { + n, err := r.Read(tmp[:]) + if err != nil { + return 0, err + } + if n == 1 { + return tmp[0], nil + } + } + return 0, io.ErrNoProgress +} + +// SkippableCB will register a callback for chunks with the specified ID. +// ID must be a Reserved skippable chunks ID, 0x80-0xfe (inclusive). +// For each chunk with the ID, the callback is called with the content. +// Any returned non-nil error will abort decompression. +// Only one callback per ID is supported, latest sent will be used. +// Sending a nil function will disable previous callbacks. +func (r *Reader) SkippableCB(id uint8, fn func(r io.Reader) error) error { + if id < 0x80 || id > chunkTypePadding { + return fmt.Errorf("ReaderSkippableCB: Invalid id provided, must be 0x80-0xfe (inclusive)") + } + r.skippableCB[id] = fn + return nil +} diff --git a/vendor/github.com/klauspost/compress/s2/writer.go b/vendor/github.com/klauspost/compress/s2/writer.go new file mode 100644 index 00000000..5a944068 --- /dev/null +++ b/vendor/github.com/klauspost/compress/s2/writer.go @@ -0,0 +1,1020 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Copyright (c) 2019+ Klaus Post. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package s2 + +import ( + "crypto/rand" + "encoding/binary" + "errors" + "fmt" + "io" + "runtime" + "sync" +) + +const ( + levelUncompressed = iota + 1 + levelFast + levelBetter + levelBest +) + +// NewWriter returns a new Writer that compresses to w, using the +// framing format described at +// https://github.com/google/snappy/blob/master/framing_format.txt +// +// Users must call Close to guarantee all data has been forwarded to +// the underlying io.Writer and that resources are released. +// They may also call Flush zero or more times before calling Close. +func NewWriter(w io.Writer, opts ...WriterOption) *Writer { + w2 := Writer{ + blockSize: defaultBlockSize, + concurrency: runtime.GOMAXPROCS(0), + randSrc: rand.Reader, + level: levelFast, + } + for _, opt := range opts { + if err := opt(&w2); err != nil { + w2.errState = err + return &w2 + } + } + w2.obufLen = obufHeaderLen + MaxEncodedLen(w2.blockSize) + w2.paramsOK = true + w2.ibuf = make([]byte, 0, w2.blockSize) + w2.buffers.New = func() interface{} { + return make([]byte, w2.obufLen) + } + w2.Reset(w) + return &w2 +} + +// Writer is an io.Writer that can write Snappy-compressed bytes. +type Writer struct { + errMu sync.Mutex + errState error + + // ibuf is a buffer for the incoming (uncompressed) bytes. + ibuf []byte + + blockSize int + obufLen int + concurrency int + written int64 + uncompWritten int64 // Bytes sent to compression + output chan chan result + buffers sync.Pool + pad int + + writer io.Writer + randSrc io.Reader + writerWg sync.WaitGroup + index Index + customEnc func(dst, src []byte) int + + // wroteStreamHeader is whether we have written the stream header. + wroteStreamHeader bool + paramsOK bool + snappy bool + flushOnWrite bool + appendIndex bool + level uint8 +} + +type result struct { + b []byte + // Uncompressed start offset + startOffset int64 +} + +// err returns the previously set error. +// If no error has been set it is set to err if not nil. +func (w *Writer) err(err error) error { + w.errMu.Lock() + errSet := w.errState + if errSet == nil && err != nil { + w.errState = err + errSet = err + } + w.errMu.Unlock() + return errSet +} + +// Reset discards the writer's state and switches the Snappy writer to write to w. +// This permits reusing a Writer rather than allocating a new one. +func (w *Writer) Reset(writer io.Writer) { + if !w.paramsOK { + return + } + // Close previous writer, if any. + if w.output != nil { + close(w.output) + w.writerWg.Wait() + w.output = nil + } + w.errState = nil + w.ibuf = w.ibuf[:0] + w.wroteStreamHeader = false + w.written = 0 + w.writer = writer + w.uncompWritten = 0 + w.index.reset(w.blockSize) + + // If we didn't get a writer, stop here. + if writer == nil { + return + } + // If no concurrency requested, don't spin up writer goroutine. + if w.concurrency == 1 { + return + } + + toWrite := make(chan chan result, w.concurrency) + w.output = toWrite + w.writerWg.Add(1) + + // Start a writer goroutine that will write all output in order. + go func() { + defer w.writerWg.Done() + + // Get a queued write. + for write := range toWrite { + // Wait for the data to be available. + input := <-write + in := input.b + if len(in) > 0 { + if w.err(nil) == nil { + // Don't expose data from previous buffers. + toWrite := in[:len(in):len(in)] + // Write to output. + n, err := writer.Write(toWrite) + if err == nil && n != len(toWrite) { + err = io.ErrShortBuffer + } + _ = w.err(err) + w.err(w.index.add(w.written, input.startOffset)) + w.written += int64(n) + } + } + if cap(in) >= w.obufLen { + w.buffers.Put(in) + } + // close the incoming write request. + // This can be used for synchronizing flushes. + close(write) + } + }() +} + +// Write satisfies the io.Writer interface. +func (w *Writer) Write(p []byte) (nRet int, errRet error) { + if err := w.err(nil); err != nil { + return 0, err + } + if w.flushOnWrite { + return w.write(p) + } + // If we exceed the input buffer size, start writing + for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err(nil) == nil { + var n int + if len(w.ibuf) == 0 { + // Large write, empty buffer. + // Write directly from p to avoid copy. + n, _ = w.write(p) + } else { + n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) + w.ibuf = w.ibuf[:len(w.ibuf)+n] + w.write(w.ibuf) + w.ibuf = w.ibuf[:0] + } + nRet += n + p = p[n:] + } + if err := w.err(nil); err != nil { + return nRet, err + } + // p should always be able to fit into w.ibuf now. + n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) + w.ibuf = w.ibuf[:len(w.ibuf)+n] + nRet += n + return nRet, nil +} + +// ReadFrom implements the io.ReaderFrom interface. +// Using this is typically more efficient since it avoids a memory copy. +// ReadFrom reads data from r until EOF or error. +// The return value n is the number of bytes read. +// Any error except io.EOF encountered during the read is also returned. +func (w *Writer) ReadFrom(r io.Reader) (n int64, err error) { + if err := w.err(nil); err != nil { + return 0, err + } + if len(w.ibuf) > 0 { + err := w.Flush() + if err != nil { + return 0, err + } + } + if br, ok := r.(byter); ok { + buf := br.Bytes() + if err := w.EncodeBuffer(buf); err != nil { + return 0, err + } + return int64(len(buf)), w.Flush() + } + for { + inbuf := w.buffers.Get().([]byte)[:w.blockSize+obufHeaderLen] + n2, err := io.ReadFull(r, inbuf[obufHeaderLen:]) + if err != nil { + if err == io.ErrUnexpectedEOF { + err = io.EOF + } + if err != io.EOF { + return n, w.err(err) + } + } + if n2 == 0 { + break + } + n += int64(n2) + err2 := w.writeFull(inbuf[:n2+obufHeaderLen]) + if w.err(err2) != nil { + break + } + + if err != nil { + // We got EOF and wrote everything + break + } + } + + return n, w.err(nil) +} + +// AddSkippableBlock will add a skippable block to the stream. +// The ID must be 0x80-0xfe (inclusive). +// Length of the skippable block must be <= 16777215 bytes. +func (w *Writer) AddSkippableBlock(id uint8, data []byte) (err error) { + if err := w.err(nil); err != nil { + return err + } + if len(data) == 0 { + return nil + } + if id < 0x80 || id > chunkTypePadding { + return fmt.Errorf("invalid skippable block id %x", id) + } + if len(data) > maxChunkSize { + return fmt.Errorf("skippable block excessed maximum size") + } + var header [4]byte + chunkLen := 4 + len(data) + header[0] = id + header[1] = uint8(chunkLen >> 0) + header[2] = uint8(chunkLen >> 8) + header[3] = uint8(chunkLen >> 16) + if w.concurrency == 1 { + write := func(b []byte) error { + n, err := w.writer.Write(b) + if err = w.err(err); err != nil { + return err + } + if n != len(data) { + return w.err(io.ErrShortWrite) + } + w.written += int64(n) + return w.err(nil) + } + if !w.wroteStreamHeader { + w.wroteStreamHeader = true + if w.snappy { + if err := write([]byte(magicChunkSnappy)); err != nil { + return err + } + } else { + if err := write([]byte(magicChunk)); err != nil { + return err + } + } + } + if err := write(header[:]); err != nil { + return err + } + if err := write(data); err != nil { + return err + } + } + + // Create output... + if !w.wroteStreamHeader { + w.wroteStreamHeader = true + hWriter := make(chan result) + w.output <- hWriter + if w.snappy { + hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)} + } else { + hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)} + } + } + + // Copy input. + inbuf := w.buffers.Get().([]byte)[:4] + copy(inbuf, header[:]) + inbuf = append(inbuf, data...) + + output := make(chan result, 1) + // Queue output. + w.output <- output + output <- result{startOffset: w.uncompWritten, b: inbuf} + + return nil +} + +// EncodeBuffer will add a buffer to the stream. +// This is the fastest way to encode a stream, +// but the input buffer cannot be written to by the caller +// until Flush or Close has been called when concurrency != 1. +// +// If you cannot control that, use the regular Write function. +// +// Note that input is not buffered. +// This means that each write will result in discrete blocks being created. +// For buffered writes, use the regular Write function. +func (w *Writer) EncodeBuffer(buf []byte) (err error) { + if err := w.err(nil); err != nil { + return err + } + + if w.flushOnWrite { + _, err := w.write(buf) + return err + } + // Flush queued data first. + if len(w.ibuf) > 0 { + err := w.Flush() + if err != nil { + return err + } + } + if w.concurrency == 1 { + _, err := w.writeSync(buf) + return err + } + + // Spawn goroutine and write block to output channel. + if !w.wroteStreamHeader { + w.wroteStreamHeader = true + hWriter := make(chan result) + w.output <- hWriter + if w.snappy { + hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)} + } else { + hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)} + } + } + + for len(buf) > 0 { + // Cut input. + uncompressed := buf + if len(uncompressed) > w.blockSize { + uncompressed = uncompressed[:w.blockSize] + } + buf = buf[len(uncompressed):] + // Get an output buffer. + obuf := w.buffers.Get().([]byte)[:len(uncompressed)+obufHeaderLen] + output := make(chan result) + // Queue output now, so we keep order. + w.output <- output + res := result{ + startOffset: w.uncompWritten, + } + w.uncompWritten += int64(len(uncompressed)) + go func() { + checksum := crc(uncompressed) + + // Set to uncompressed. + chunkType := uint8(chunkTypeUncompressedData) + chunkLen := 4 + len(uncompressed) + + // Attempt compressing. + n := binary.PutUvarint(obuf[obufHeaderLen:], uint64(len(uncompressed))) + n2 := w.encodeBlock(obuf[obufHeaderLen+n:], uncompressed) + + // Check if we should use this, or store as uncompressed instead. + if n2 > 0 { + chunkType = uint8(chunkTypeCompressedData) + chunkLen = 4 + n + n2 + obuf = obuf[:obufHeaderLen+n+n2] + } else { + // copy uncompressed + copy(obuf[obufHeaderLen:], uncompressed) + } + + // Fill in the per-chunk header that comes before the body. + obuf[0] = chunkType + obuf[1] = uint8(chunkLen >> 0) + obuf[2] = uint8(chunkLen >> 8) + obuf[3] = uint8(chunkLen >> 16) + obuf[4] = uint8(checksum >> 0) + obuf[5] = uint8(checksum >> 8) + obuf[6] = uint8(checksum >> 16) + obuf[7] = uint8(checksum >> 24) + + // Queue final output. + res.b = obuf + output <- res + }() + } + return nil +} + +func (w *Writer) encodeBlock(obuf, uncompressed []byte) int { + if w.customEnc != nil { + if ret := w.customEnc(obuf, uncompressed); ret >= 0 { + return ret + } + } + if w.snappy { + switch w.level { + case levelFast: + return encodeBlockSnappy(obuf, uncompressed) + case levelBetter: + return encodeBlockBetterSnappy(obuf, uncompressed) + case levelBest: + return encodeBlockBestSnappy(obuf, uncompressed) + } + return 0 + } + switch w.level { + case levelFast: + return encodeBlock(obuf, uncompressed) + case levelBetter: + return encodeBlockBetter(obuf, uncompressed) + case levelBest: + return encodeBlockBest(obuf, uncompressed, nil) + } + return 0 +} + +func (w *Writer) write(p []byte) (nRet int, errRet error) { + if err := w.err(nil); err != nil { + return 0, err + } + if w.concurrency == 1 { + return w.writeSync(p) + } + + // Spawn goroutine and write block to output channel. + for len(p) > 0 { + if !w.wroteStreamHeader { + w.wroteStreamHeader = true + hWriter := make(chan result) + w.output <- hWriter + if w.snappy { + hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)} + } else { + hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)} + } + } + + var uncompressed []byte + if len(p) > w.blockSize { + uncompressed, p = p[:w.blockSize], p[w.blockSize:] + } else { + uncompressed, p = p, nil + } + + // Copy input. + // If the block is incompressible, this is used for the result. + inbuf := w.buffers.Get().([]byte)[:len(uncompressed)+obufHeaderLen] + obuf := w.buffers.Get().([]byte)[:w.obufLen] + copy(inbuf[obufHeaderLen:], uncompressed) + uncompressed = inbuf[obufHeaderLen:] + + output := make(chan result) + // Queue output now, so we keep order. + w.output <- output + res := result{ + startOffset: w.uncompWritten, + } + w.uncompWritten += int64(len(uncompressed)) + + go func() { + checksum := crc(uncompressed) + + // Set to uncompressed. + chunkType := uint8(chunkTypeUncompressedData) + chunkLen := 4 + len(uncompressed) + + // Attempt compressing. + n := binary.PutUvarint(obuf[obufHeaderLen:], uint64(len(uncompressed))) + n2 := w.encodeBlock(obuf[obufHeaderLen+n:], uncompressed) + + // Check if we should use this, or store as uncompressed instead. + if n2 > 0 { + chunkType = uint8(chunkTypeCompressedData) + chunkLen = 4 + n + n2 + obuf = obuf[:obufHeaderLen+n+n2] + } else { + // Use input as output. + obuf, inbuf = inbuf, obuf + } + + // Fill in the per-chunk header that comes before the body. + obuf[0] = chunkType + obuf[1] = uint8(chunkLen >> 0) + obuf[2] = uint8(chunkLen >> 8) + obuf[3] = uint8(chunkLen >> 16) + obuf[4] = uint8(checksum >> 0) + obuf[5] = uint8(checksum >> 8) + obuf[6] = uint8(checksum >> 16) + obuf[7] = uint8(checksum >> 24) + + // Queue final output. + res.b = obuf + output <- res + + // Put unused buffer back in pool. + w.buffers.Put(inbuf) + }() + nRet += len(uncompressed) + } + return nRet, nil +} + +// writeFull is a special version of write that will always write the full buffer. +// Data to be compressed should start at offset obufHeaderLen and fill the remainder of the buffer. +// The data will be written as a single block. +// The caller is not allowed to use inbuf after this function has been called. +func (w *Writer) writeFull(inbuf []byte) (errRet error) { + if err := w.err(nil); err != nil { + return err + } + + if w.concurrency == 1 { + _, err := w.writeSync(inbuf[obufHeaderLen:]) + return err + } + + // Spawn goroutine and write block to output channel. + if !w.wroteStreamHeader { + w.wroteStreamHeader = true + hWriter := make(chan result) + w.output <- hWriter + if w.snappy { + hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)} + } else { + hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)} + } + } + + // Get an output buffer. + obuf := w.buffers.Get().([]byte)[:w.obufLen] + uncompressed := inbuf[obufHeaderLen:] + + output := make(chan result) + // Queue output now, so we keep order. + w.output <- output + res := result{ + startOffset: w.uncompWritten, + } + w.uncompWritten += int64(len(uncompressed)) + + go func() { + checksum := crc(uncompressed) + + // Set to uncompressed. + chunkType := uint8(chunkTypeUncompressedData) + chunkLen := 4 + len(uncompressed) + + // Attempt compressing. + n := binary.PutUvarint(obuf[obufHeaderLen:], uint64(len(uncompressed))) + n2 := w.encodeBlock(obuf[obufHeaderLen+n:], uncompressed) + + // Check if we should use this, or store as uncompressed instead. + if n2 > 0 { + chunkType = uint8(chunkTypeCompressedData) + chunkLen = 4 + n + n2 + obuf = obuf[:obufHeaderLen+n+n2] + } else { + // Use input as output. + obuf, inbuf = inbuf, obuf + } + + // Fill in the per-chunk header that comes before the body. + obuf[0] = chunkType + obuf[1] = uint8(chunkLen >> 0) + obuf[2] = uint8(chunkLen >> 8) + obuf[3] = uint8(chunkLen >> 16) + obuf[4] = uint8(checksum >> 0) + obuf[5] = uint8(checksum >> 8) + obuf[6] = uint8(checksum >> 16) + obuf[7] = uint8(checksum >> 24) + + // Queue final output. + res.b = obuf + output <- res + + // Put unused buffer back in pool. + w.buffers.Put(inbuf) + }() + return nil +} + +func (w *Writer) writeSync(p []byte) (nRet int, errRet error) { + if err := w.err(nil); err != nil { + return 0, err + } + if !w.wroteStreamHeader { + w.wroteStreamHeader = true + var n int + var err error + if w.snappy { + n, err = w.writer.Write([]byte(magicChunkSnappy)) + } else { + n, err = w.writer.Write([]byte(magicChunk)) + } + if err != nil { + return 0, w.err(err) + } + if n != len(magicChunk) { + return 0, w.err(io.ErrShortWrite) + } + w.written += int64(n) + } + + for len(p) > 0 { + var uncompressed []byte + if len(p) > w.blockSize { + uncompressed, p = p[:w.blockSize], p[w.blockSize:] + } else { + uncompressed, p = p, nil + } + + obuf := w.buffers.Get().([]byte)[:w.obufLen] + checksum := crc(uncompressed) + + // Set to uncompressed. + chunkType := uint8(chunkTypeUncompressedData) + chunkLen := 4 + len(uncompressed) + + // Attempt compressing. + n := binary.PutUvarint(obuf[obufHeaderLen:], uint64(len(uncompressed))) + n2 := w.encodeBlock(obuf[obufHeaderLen+n:], uncompressed) + + if n2 > 0 { + chunkType = uint8(chunkTypeCompressedData) + chunkLen = 4 + n + n2 + obuf = obuf[:obufHeaderLen+n+n2] + } else { + obuf = obuf[:8] + } + + // Fill in the per-chunk header that comes before the body. + obuf[0] = chunkType + obuf[1] = uint8(chunkLen >> 0) + obuf[2] = uint8(chunkLen >> 8) + obuf[3] = uint8(chunkLen >> 16) + obuf[4] = uint8(checksum >> 0) + obuf[5] = uint8(checksum >> 8) + obuf[6] = uint8(checksum >> 16) + obuf[7] = uint8(checksum >> 24) + + n, err := w.writer.Write(obuf) + if err != nil { + return 0, w.err(err) + } + if n != len(obuf) { + return 0, w.err(io.ErrShortWrite) + } + w.err(w.index.add(w.written, w.uncompWritten)) + w.written += int64(n) + w.uncompWritten += int64(len(uncompressed)) + + if chunkType == chunkTypeUncompressedData { + // Write uncompressed data. + n, err := w.writer.Write(uncompressed) + if err != nil { + return 0, w.err(err) + } + if n != len(uncompressed) { + return 0, w.err(io.ErrShortWrite) + } + w.written += int64(n) + } + w.buffers.Put(obuf) + // Queue final output. + nRet += len(uncompressed) + } + return nRet, nil +} + +// Flush flushes the Writer to its underlying io.Writer. +// This does not apply padding. +func (w *Writer) Flush() error { + if err := w.err(nil); err != nil { + return err + } + + // Queue any data still in input buffer. + if len(w.ibuf) != 0 { + if !w.wroteStreamHeader { + _, err := w.writeSync(w.ibuf) + w.ibuf = w.ibuf[:0] + return w.err(err) + } else { + _, err := w.write(w.ibuf) + w.ibuf = w.ibuf[:0] + err = w.err(err) + if err != nil { + return err + } + } + } + if w.output == nil { + return w.err(nil) + } + + // Send empty buffer + res := make(chan result) + w.output <- res + // Block until this has been picked up. + res <- result{b: nil, startOffset: w.uncompWritten} + // When it is closed, we have flushed. + <-res + return w.err(nil) +} + +// Close calls Flush and then closes the Writer. +// Calling Close multiple times is ok, +// but calling CloseIndex after this will make it not return the index. +func (w *Writer) Close() error { + _, err := w.closeIndex(w.appendIndex) + return err +} + +// CloseIndex calls Close and returns an index on first call. +// This is not required if you are only adding index to a stream. +func (w *Writer) CloseIndex() ([]byte, error) { + return w.closeIndex(true) +} + +func (w *Writer) closeIndex(idx bool) ([]byte, error) { + err := w.Flush() + if w.output != nil { + close(w.output) + w.writerWg.Wait() + w.output = nil + } + + var index []byte + if w.err(nil) == nil && w.writer != nil { + // Create index. + if idx { + compSize := int64(-1) + if w.pad <= 1 { + compSize = w.written + } + index = w.index.appendTo(w.ibuf[:0], w.uncompWritten, compSize) + // Count as written for padding. + if w.appendIndex { + w.written += int64(len(index)) + } + } + + if w.pad > 1 { + tmp := w.ibuf[:0] + if len(index) > 0 { + // Allocate another buffer. + tmp = w.buffers.Get().([]byte)[:0] + defer w.buffers.Put(tmp) + } + add := calcSkippableFrame(w.written, int64(w.pad)) + frame, err := skippableFrame(tmp, add, w.randSrc) + if err = w.err(err); err != nil { + return nil, err + } + n, err2 := w.writer.Write(frame) + if err2 == nil && n != len(frame) { + err2 = io.ErrShortWrite + } + _ = w.err(err2) + } + if len(index) > 0 && w.appendIndex { + n, err2 := w.writer.Write(index) + if err2 == nil && n != len(index) { + err2 = io.ErrShortWrite + } + _ = w.err(err2) + } + } + err = w.err(errClosed) + if err == errClosed { + return index, nil + } + return nil, err +} + +// calcSkippableFrame will return a total size to be added for written +// to be divisible by multiple. +// The value will always be > skippableFrameHeader. +// The function will panic if written < 0 or wantMultiple <= 0. +func calcSkippableFrame(written, wantMultiple int64) int { + if wantMultiple <= 0 { + panic("wantMultiple <= 0") + } + if written < 0 { + panic("written < 0") + } + leftOver := written % wantMultiple + if leftOver == 0 { + return 0 + } + toAdd := wantMultiple - leftOver + for toAdd < skippableFrameHeader { + toAdd += wantMultiple + } + return int(toAdd) +} + +// skippableFrame will add a skippable frame with a total size of bytes. +// total should be >= skippableFrameHeader and < maxBlockSize + skippableFrameHeader +func skippableFrame(dst []byte, total int, r io.Reader) ([]byte, error) { + if total == 0 { + return dst, nil + } + if total < skippableFrameHeader { + return dst, fmt.Errorf("s2: requested skippable frame (%d) < 4", total) + } + if int64(total) >= maxBlockSize+skippableFrameHeader { + return dst, fmt.Errorf("s2: requested skippable frame (%d) >= max 1<<24", total) + } + // Chunk type 0xfe "Section 4.4 Padding (chunk type 0xfe)" + dst = append(dst, chunkTypePadding) + f := uint32(total - skippableFrameHeader) + // Add chunk length. + dst = append(dst, uint8(f), uint8(f>>8), uint8(f>>16)) + // Add data + start := len(dst) + dst = append(dst, make([]byte, f)...) + _, err := io.ReadFull(r, dst[start:]) + return dst, err +} + +var errClosed = errors.New("s2: Writer is closed") + +// WriterOption is an option for creating a encoder. +type WriterOption func(*Writer) error + +// WriterConcurrency will set the concurrency, +// meaning the maximum number of decoders to run concurrently. +// The value supplied must be at least 1. +// By default this will be set to GOMAXPROCS. +func WriterConcurrency(n int) WriterOption { + return func(w *Writer) error { + if n <= 0 { + return errors.New("concurrency must be at least 1") + } + w.concurrency = n + return nil + } +} + +// WriterAddIndex will append an index to the end of a stream +// when it is closed. +func WriterAddIndex() WriterOption { + return func(w *Writer) error { + w.appendIndex = true + return nil + } +} + +// WriterBetterCompression will enable better compression. +// EncodeBetter compresses better than Encode but typically with a +// 10-40% speed decrease on both compression and decompression. +func WriterBetterCompression() WriterOption { + return func(w *Writer) error { + w.level = levelBetter + return nil + } +} + +// WriterBestCompression will enable better compression. +// EncodeBetter compresses better than Encode but typically with a +// big speed decrease on compression. +func WriterBestCompression() WriterOption { + return func(w *Writer) error { + w.level = levelBest + return nil + } +} + +// WriterUncompressed will bypass compression. +// The stream will be written as uncompressed blocks only. +// If concurrency is > 1 CRC and output will still be done async. +func WriterUncompressed() WriterOption { + return func(w *Writer) error { + w.level = levelUncompressed + return nil + } +} + +// WriterBlockSize allows to override the default block size. +// Blocks will be this size or smaller. +// Minimum size is 4KB and and maximum size is 4MB. +// +// Bigger blocks may give bigger throughput on systems with many cores, +// and will increase compression slightly, but it will limit the possible +// concurrency for smaller payloads for both encoding and decoding. +// Default block size is 1MB. +// +// When writing Snappy compatible output using WriterSnappyCompat, +// the maximum block size is 64KB. +func WriterBlockSize(n int) WriterOption { + return func(w *Writer) error { + if w.snappy && n > maxSnappyBlockSize || n < minBlockSize { + return errors.New("s2: block size too large. Must be <= 64K and >=4KB on for snappy compatible output") + } + if n > maxBlockSize || n < minBlockSize { + return errors.New("s2: block size too large. Must be <= 4MB and >=4KB") + } + w.blockSize = n + return nil + } +} + +// WriterPadding will add padding to all output so the size will be a multiple of n. +// This can be used to obfuscate the exact output size or make blocks of a certain size. +// The contents will be a skippable frame, so it will be invisible by the decoder. +// n must be > 0 and <= 4MB. +// The padded area will be filled with data from crypto/rand.Reader. +// The padding will be applied whenever Close is called on the writer. +func WriterPadding(n int) WriterOption { + return func(w *Writer) error { + if n <= 0 { + return fmt.Errorf("s2: padding must be at least 1") + } + // No need to waste our time. + if n == 1 { + w.pad = 0 + } + if n > maxBlockSize { + return fmt.Errorf("s2: padding must less than 4MB") + } + w.pad = n + return nil + } +} + +// WriterPaddingSrc will get random data for padding from the supplied source. +// By default crypto/rand is used. +func WriterPaddingSrc(reader io.Reader) WriterOption { + return func(w *Writer) error { + w.randSrc = reader + return nil + } +} + +// WriterSnappyCompat will write snappy compatible output. +// The output can be decompressed using either snappy or s2. +// If block size is more than 64KB it is set to that. +func WriterSnappyCompat() WriterOption { + return func(w *Writer) error { + w.snappy = true + if w.blockSize > 64<<10 { + // We choose 8 bytes less than 64K, since that will make literal emits slightly more effective. + // And allows us to skip some size checks. + w.blockSize = (64 << 10) - 8 + } + return nil + } +} + +// WriterFlushOnWrite will compress blocks on each call to the Write function. +// +// This is quite inefficient as blocks size will depend on the write size. +// +// Use WriterConcurrency(1) to also make sure that output is flushed. +// When Write calls return, otherwise they will be written when compression is done. +func WriterFlushOnWrite() WriterOption { + return func(w *Writer) error { + w.flushOnWrite = true + return nil + } +} + +// WriterCustomEncoder allows to override the encoder for blocks on the stream. +// The function must compress 'src' into 'dst' and return the bytes used in dst as an integer. +// Block size (initial varint) should not be added by the encoder. +// Returning value 0 indicates the block could not be compressed. +// Returning a negative value indicates that compression should be attempted. +// The function should expect to be called concurrently. +func WriterCustomEncoder(fn func(dst, src []byte) int) WriterOption { + return func(w *Writer) error { + w.customEnc = fn + return nil + } +} diff --git a/vendor/github.com/klauspost/cpuid/v2/README.md b/vendor/github.com/klauspost/cpuid/v2/README.md index 857a93e5..37b5167d 100644 --- a/vendor/github.com/klauspost/cpuid/v2/README.md +++ b/vendor/github.com/klauspost/cpuid/v2/README.md @@ -19,6 +19,12 @@ Package home: https://github.com/klauspost/cpuid `go get -u github.com/klauspost/cpuid/v2` using modules. Drop `v2` for others. +Installing binary: + +`go install github.com/klauspost/cpuid/v2/cmd/cpuid@latest` + +Or download binaries from release page: https://github.com/klauspost/cpuid/releases + ### Homebrew For macOS/Linux users, you can install via [brew](https://brew.sh/) @@ -302,6 +308,7 @@ Exit Code 1 | AVXSLOW | Indicates the CPU performs 2 128 bit operations instead of one | | AVXVNNI | AVX (VEX encoded) VNNI neural network instructions | | AVXVNNIINT8 | AVX-VNNI-INT8 instructions | +| BHI_CTRL | Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 | | BMI1 | Bit Manipulation Instruction Set 1 | | BMI2 | Bit Manipulation Instruction Set 2 | | CETIBT | Intel CET Indirect Branch Tracking | @@ -355,6 +362,7 @@ Exit Code 1 | IBS_OPFUSE | AMD: Indicates support for IbsOpFuse | | IBS_PREVENTHOST | Disallowing IBS use by the host supported | | IBS_ZEN4 | Fetch and Op IBS support IBS extensions added with Zen4 | +| IDPRED_CTRL | IPRED_DIS | | INT_WBINVD | WBINVD/WBNOINVD are interruptible. | | INVLPGB | NVLPGB and TLBSYNC instruction supported | | LAHF | LAHF/SAHF in long mode | @@ -372,8 +380,9 @@ Exit Code 1 | MOVDIRI | Move Doubleword as Direct Store | | MOVSB_ZL | Fast Zero-Length MOVSB | | MPX | Intel MPX (Memory Protection Extensions) | -| MOVU | MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD | +| MOVU | MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD | | MSRIRC | Instruction Retired Counter MSR available | +| MSRLIST | Read/Write List of Model Specific Registers | | MSR_PAGEFLUSH | Page Flush MSR available | | NRIPS | Indicates support for NRIP save on VMEXIT | | NX | NX (No-Execute) bit | @@ -381,12 +390,13 @@ Exit Code 1 | PCONFIG | PCONFIG for Intel Multi-Key Total Memory Encryption | | POPCNT | POPCNT instruction | | PPIN | AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled | -| PREFETCHI | PREFETCHIT0/1 instructions | -| PSFD | AMD: Predictive Store Forward Disable | +| PREFETCHI | PREFETCHIT0/1 instructions | +| PSFD | Predictive Store Forward Disable | | RDPRU | RDPRU instruction supported | | RDRAND | RDRAND instruction is available | | RDSEED | RDSEED instruction is available | | RDTSCP | RDTSCP Instruction | +| RRSBA_CTRL | Restricted RSB Alternate | | RTM | Restricted Transactional Memory | | RTM_ALWAYS_ABORT | Indicates that the loaded microcode is forcing RTM abort. | | SERIALIZE | Serialize Instruction Execution | @@ -439,6 +449,7 @@ Exit Code 1 | VTE | AMD Virtual Transparent Encryption supported | | WAITPKG | TPAUSE, UMONITOR, UMWAIT | | WBNOINVD | Write Back and Do Not Invalidate Cache | +| WRMSRNS | Non-Serializing Write to Model Specific Register | | X87 | FPU | | XGETBV1 | Supports XGETBV with ECX = 1 | | XOP | Bulldozer XOP functions | diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid.go b/vendor/github.com/klauspost/cpuid/v2/cpuid.go index cf2ae9c5..89a861d4 100644 --- a/vendor/github.com/klauspost/cpuid/v2/cpuid.go +++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go @@ -99,6 +99,7 @@ const ( AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one AVXVNNI // AVX (VEX encoded) VNNI neural network instructions AVXVNNIINT8 // AVX-VNNI-INT8 instructions + BHI_CTRL // Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 BMI1 // Bit Manipulation Instruction Set 1 BMI2 // Bit Manipulation Instruction Set 2 CETIBT // Intel CET Indirect Branch Tracking @@ -152,6 +153,7 @@ const ( IBS_OPFUSE // AMD: Indicates support for IbsOpFuse IBS_PREVENTHOST // Disallowing IBS use by the host supported IBS_ZEN4 // AMD: Fetch and Op IBS support IBS extensions added with Zen4 + IDPRED_CTRL // IPRED_DIS INT_WBINVD // WBINVD/WBNOINVD are interruptible. INVLPGB // NVLPGB and TLBSYNC instruction supported LAHF // LAHF/SAHF in long mode @@ -171,6 +173,7 @@ const ( MOVU // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD MPX // Intel MPX (Memory Protection Extensions) MSRIRC // Instruction Retired Counter MSR available + MSRLIST // Read/Write List of Model Specific Registers MSR_PAGEFLUSH // Page Flush MSR available NRIPS // Indicates support for NRIP save on VMEXIT NX // NX (No-Execute) bit @@ -179,11 +182,12 @@ const ( POPCNT // POPCNT instruction PPIN // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled PREFETCHI // PREFETCHIT0/1 instructions - PSFD // AMD: Predictive Store Forward Disable + PSFD // Predictive Store Forward Disable RDPRU // RDPRU instruction supported RDRAND // RDRAND instruction is available RDSEED // RDSEED instruction is available RDTSCP // RDTSCP Instruction + RRSBA_CTRL // Restricted RSB Alternate RTM // Restricted Transactional Memory RTM_ALWAYS_ABORT // Indicates that the loaded microcode is forcing RTM abort. SERIALIZE // Serialize Instruction Execution @@ -236,6 +240,7 @@ const ( VTE // AMD Virtual Transparent Encryption supported WAITPKG // TPAUSE, UMONITOR, UMWAIT WBNOINVD // Write Back and Do Not Invalidate Cache + WRMSRNS // Non-Serializing Write to Model Specific Register X87 // FPU XGETBV1 // Supports XGETBV with ECX = 1 XOP // Bulldozer XOP functions @@ -1232,13 +1237,20 @@ func support() flagSet { fs.setIf(edx&(1<<25) != 0, AMXINT8) // eax1 = CPUID.(EAX=7, ECX=1).EAX fs.setIf(eax1&(1<<5) != 0, AVX512BF16) + fs.setIf(eax1&(1<<19) != 0, WRMSRNS) fs.setIf(eax1&(1<<21) != 0, AMXFP16) + fs.setIf(eax1&(1<<27) != 0, MSRLIST) } } // CPUID.(EAX=7, ECX=2) _, _, _, edx = cpuidex(7, 2) + fs.setIf(edx&(1<<0) != 0, PSFD) + fs.setIf(edx&(1<<1) != 0, IDPRED_CTRL) + fs.setIf(edx&(1<<2) != 0, RRSBA_CTRL) + fs.setIf(edx&(1<<4) != 0, BHI_CTRL) fs.setIf(edx&(1<<5) != 0, MCDT_NO) + } // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1) diff --git a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go index 8b6cd2b7..2a27f44d 100644 --- a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go +++ b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go @@ -39,181 +39,186 @@ func _() { _ = x[AVXSLOW-29] _ = x[AVXVNNI-30] _ = x[AVXVNNIINT8-31] - _ = x[BMI1-32] - _ = x[BMI2-33] - _ = x[CETIBT-34] - _ = x[CETSS-35] - _ = x[CLDEMOTE-36] - _ = x[CLMUL-37] - _ = x[CLZERO-38] - _ = x[CMOV-39] - _ = x[CMPCCXADD-40] - _ = x[CMPSB_SCADBS_SHORT-41] - _ = x[CMPXCHG8-42] - _ = x[CPBOOST-43] - _ = x[CPPC-44] - _ = x[CX16-45] - _ = x[EFER_LMSLE_UNS-46] - _ = x[ENQCMD-47] - _ = x[ERMS-48] - _ = x[F16C-49] - _ = x[FLUSH_L1D-50] - _ = x[FMA3-51] - _ = x[FMA4-52] - _ = x[FP128-53] - _ = x[FP256-54] - _ = x[FSRM-55] - _ = x[FXSR-56] - _ = x[FXSROPT-57] - _ = x[GFNI-58] - _ = x[HLE-59] - _ = x[HRESET-60] - _ = x[HTT-61] - _ = x[HWA-62] - _ = x[HYBRID_CPU-63] - _ = x[HYPERVISOR-64] - _ = x[IA32_ARCH_CAP-65] - _ = x[IA32_CORE_CAP-66] - _ = x[IBPB-67] - _ = x[IBRS-68] - _ = x[IBRS_PREFERRED-69] - _ = x[IBRS_PROVIDES_SMP-70] - _ = x[IBS-71] - _ = x[IBSBRNTRGT-72] - _ = x[IBSFETCHSAM-73] - _ = x[IBSFFV-74] - _ = x[IBSOPCNT-75] - _ = x[IBSOPCNTEXT-76] - _ = x[IBSOPSAM-77] - _ = x[IBSRDWROPCNT-78] - _ = x[IBSRIPINVALIDCHK-79] - _ = x[IBS_FETCH_CTLX-80] - _ = x[IBS_OPDATA4-81] - _ = x[IBS_OPFUSE-82] - _ = x[IBS_PREVENTHOST-83] - _ = x[IBS_ZEN4-84] - _ = x[INT_WBINVD-85] - _ = x[INVLPGB-86] - _ = x[LAHF-87] - _ = x[LAM-88] - _ = x[LBRVIRT-89] - _ = x[LZCNT-90] - _ = x[MCAOVERFLOW-91] - _ = x[MCDT_NO-92] - _ = x[MCOMMIT-93] - _ = x[MD_CLEAR-94] - _ = x[MMX-95] - _ = x[MMXEXT-96] - _ = x[MOVBE-97] - _ = x[MOVDIR64B-98] - _ = x[MOVDIRI-99] - _ = x[MOVSB_ZL-100] - _ = x[MOVU-101] - _ = x[MPX-102] - _ = x[MSRIRC-103] - _ = x[MSR_PAGEFLUSH-104] - _ = x[NRIPS-105] - _ = x[NX-106] - _ = x[OSXSAVE-107] - _ = x[PCONFIG-108] - _ = x[POPCNT-109] - _ = x[PPIN-110] - _ = x[PREFETCHI-111] - _ = x[PSFD-112] - _ = x[RDPRU-113] - _ = x[RDRAND-114] - _ = x[RDSEED-115] - _ = x[RDTSCP-116] - _ = x[RTM-117] - _ = x[RTM_ALWAYS_ABORT-118] - _ = x[SERIALIZE-119] - _ = x[SEV-120] - _ = x[SEV_64BIT-121] - _ = x[SEV_ALTERNATIVE-122] - _ = x[SEV_DEBUGSWAP-123] - _ = x[SEV_ES-124] - _ = x[SEV_RESTRICTED-125] - _ = x[SEV_SNP-126] - _ = x[SGX-127] - _ = x[SGXLC-128] - _ = x[SHA-129] - _ = x[SME-130] - _ = x[SME_COHERENT-131] - _ = x[SPEC_CTRL_SSBD-132] - _ = x[SRBDS_CTRL-133] - _ = x[SSE-134] - _ = x[SSE2-135] - _ = x[SSE3-136] - _ = x[SSE4-137] - _ = x[SSE42-138] - _ = x[SSE4A-139] - _ = x[SSSE3-140] - _ = x[STIBP-141] - _ = x[STIBP_ALWAYSON-142] - _ = x[STOSB_SHORT-143] - _ = x[SUCCOR-144] - _ = x[SVM-145] - _ = x[SVMDA-146] - _ = x[SVMFBASID-147] - _ = x[SVML-148] - _ = x[SVMNP-149] - _ = x[SVMPF-150] - _ = x[SVMPFT-151] - _ = x[SYSCALL-152] - _ = x[SYSEE-153] - _ = x[TBM-154] - _ = x[TLB_FLUSH_NESTED-155] - _ = x[TME-156] - _ = x[TOPEXT-157] - _ = x[TSCRATEMSR-158] - _ = x[TSXLDTRK-159] - _ = x[VAES-160] - _ = x[VMCBCLEAN-161] - _ = x[VMPL-162] - _ = x[VMSA_REGPROT-163] - _ = x[VMX-164] - _ = x[VPCLMULQDQ-165] - _ = x[VTE-166] - _ = x[WAITPKG-167] - _ = x[WBNOINVD-168] - _ = x[X87-169] - _ = x[XGETBV1-170] - _ = x[XOP-171] - _ = x[XSAVE-172] - _ = x[XSAVEC-173] - _ = x[XSAVEOPT-174] - _ = x[XSAVES-175] - _ = x[AESARM-176] - _ = x[ARMCPUID-177] - _ = x[ASIMD-178] - _ = x[ASIMDDP-179] - _ = x[ASIMDHP-180] - _ = x[ASIMDRDM-181] - _ = x[ATOMICS-182] - _ = x[CRC32-183] - _ = x[DCPOP-184] - _ = x[EVTSTRM-185] - _ = x[FCMA-186] - _ = x[FP-187] - _ = x[FPHP-188] - _ = x[GPA-189] - _ = x[JSCVT-190] - _ = x[LRCPC-191] - _ = x[PMULL-192] - _ = x[SHA1-193] - _ = x[SHA2-194] - _ = x[SHA3-195] - _ = x[SHA512-196] - _ = x[SM3-197] - _ = x[SM4-198] - _ = x[SVE-199] - _ = x[lastID-200] + _ = x[BHI_CTRL-32] + _ = x[BMI1-33] + _ = x[BMI2-34] + _ = x[CETIBT-35] + _ = x[CETSS-36] + _ = x[CLDEMOTE-37] + _ = x[CLMUL-38] + _ = x[CLZERO-39] + _ = x[CMOV-40] + _ = x[CMPCCXADD-41] + _ = x[CMPSB_SCADBS_SHORT-42] + _ = x[CMPXCHG8-43] + _ = x[CPBOOST-44] + _ = x[CPPC-45] + _ = x[CX16-46] + _ = x[EFER_LMSLE_UNS-47] + _ = x[ENQCMD-48] + _ = x[ERMS-49] + _ = x[F16C-50] + _ = x[FLUSH_L1D-51] + _ = x[FMA3-52] + _ = x[FMA4-53] + _ = x[FP128-54] + _ = x[FP256-55] + _ = x[FSRM-56] + _ = x[FXSR-57] + _ = x[FXSROPT-58] + _ = x[GFNI-59] + _ = x[HLE-60] + _ = x[HRESET-61] + _ = x[HTT-62] + _ = x[HWA-63] + _ = x[HYBRID_CPU-64] + _ = x[HYPERVISOR-65] + _ = x[IA32_ARCH_CAP-66] + _ = x[IA32_CORE_CAP-67] + _ = x[IBPB-68] + _ = x[IBRS-69] + _ = x[IBRS_PREFERRED-70] + _ = x[IBRS_PROVIDES_SMP-71] + _ = x[IBS-72] + _ = x[IBSBRNTRGT-73] + _ = x[IBSFETCHSAM-74] + _ = x[IBSFFV-75] + _ = x[IBSOPCNT-76] + _ = x[IBSOPCNTEXT-77] + _ = x[IBSOPSAM-78] + _ = x[IBSRDWROPCNT-79] + _ = x[IBSRIPINVALIDCHK-80] + _ = x[IBS_FETCH_CTLX-81] + _ = x[IBS_OPDATA4-82] + _ = x[IBS_OPFUSE-83] + _ = x[IBS_PREVENTHOST-84] + _ = x[IBS_ZEN4-85] + _ = x[IDPRED_CTRL-86] + _ = x[INT_WBINVD-87] + _ = x[INVLPGB-88] + _ = x[LAHF-89] + _ = x[LAM-90] + _ = x[LBRVIRT-91] + _ = x[LZCNT-92] + _ = x[MCAOVERFLOW-93] + _ = x[MCDT_NO-94] + _ = x[MCOMMIT-95] + _ = x[MD_CLEAR-96] + _ = x[MMX-97] + _ = x[MMXEXT-98] + _ = x[MOVBE-99] + _ = x[MOVDIR64B-100] + _ = x[MOVDIRI-101] + _ = x[MOVSB_ZL-102] + _ = x[MOVU-103] + _ = x[MPX-104] + _ = x[MSRIRC-105] + _ = x[MSRLIST-106] + _ = x[MSR_PAGEFLUSH-107] + _ = x[NRIPS-108] + _ = x[NX-109] + _ = x[OSXSAVE-110] + _ = x[PCONFIG-111] + _ = x[POPCNT-112] + _ = x[PPIN-113] + _ = x[PREFETCHI-114] + _ = x[PSFD-115] + _ = x[RDPRU-116] + _ = x[RDRAND-117] + _ = x[RDSEED-118] + _ = x[RDTSCP-119] + _ = x[RRSBA_CTRL-120] + _ = x[RTM-121] + _ = x[RTM_ALWAYS_ABORT-122] + _ = x[SERIALIZE-123] + _ = x[SEV-124] + _ = x[SEV_64BIT-125] + _ = x[SEV_ALTERNATIVE-126] + _ = x[SEV_DEBUGSWAP-127] + _ = x[SEV_ES-128] + _ = x[SEV_RESTRICTED-129] + _ = x[SEV_SNP-130] + _ = x[SGX-131] + _ = x[SGXLC-132] + _ = x[SHA-133] + _ = x[SME-134] + _ = x[SME_COHERENT-135] + _ = x[SPEC_CTRL_SSBD-136] + _ = x[SRBDS_CTRL-137] + _ = x[SSE-138] + _ = x[SSE2-139] + _ = x[SSE3-140] + _ = x[SSE4-141] + _ = x[SSE42-142] + _ = x[SSE4A-143] + _ = x[SSSE3-144] + _ = x[STIBP-145] + _ = x[STIBP_ALWAYSON-146] + _ = x[STOSB_SHORT-147] + _ = x[SUCCOR-148] + _ = x[SVM-149] + _ = x[SVMDA-150] + _ = x[SVMFBASID-151] + _ = x[SVML-152] + _ = x[SVMNP-153] + _ = x[SVMPF-154] + _ = x[SVMPFT-155] + _ = x[SYSCALL-156] + _ = x[SYSEE-157] + _ = x[TBM-158] + _ = x[TLB_FLUSH_NESTED-159] + _ = x[TME-160] + _ = x[TOPEXT-161] + _ = x[TSCRATEMSR-162] + _ = x[TSXLDTRK-163] + _ = x[VAES-164] + _ = x[VMCBCLEAN-165] + _ = x[VMPL-166] + _ = x[VMSA_REGPROT-167] + _ = x[VMX-168] + _ = x[VPCLMULQDQ-169] + _ = x[VTE-170] + _ = x[WAITPKG-171] + _ = x[WBNOINVD-172] + _ = x[WRMSRNS-173] + _ = x[X87-174] + _ = x[XGETBV1-175] + _ = x[XOP-176] + _ = x[XSAVE-177] + _ = x[XSAVEC-178] + _ = x[XSAVEOPT-179] + _ = x[XSAVES-180] + _ = x[AESARM-181] + _ = x[ARMCPUID-182] + _ = x[ASIMD-183] + _ = x[ASIMDDP-184] + _ = x[ASIMDHP-185] + _ = x[ASIMDRDM-186] + _ = x[ATOMICS-187] + _ = x[CRC32-188] + _ = x[DCPOP-189] + _ = x[EVTSTRM-190] + _ = x[FCMA-191] + _ = x[FP-192] + _ = x[FPHP-193] + _ = x[GPA-194] + _ = x[JSCVT-195] + _ = x[LRCPC-196] + _ = x[PMULL-197] + _ = x[SHA1-198] + _ = x[SHA2-199] + _ = x[SHA3-200] + _ = x[SHA512-201] + _ = x[SM3-202] + _ = x[SM4-203] + _ = x[SVE-204] + _ = x[lastID-205] _ = x[firstID-0] } -const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8BMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4INT_WBINVDINVLPGBLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRTMRTM_ALWAYS_ABORTSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID" +const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID" -var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 65, 69, 79, 91, 99, 107, 115, 123, 130, 140, 150, 158, 168, 179, 187, 197, 215, 230, 237, 249, 256, 263, 274, 278, 282, 288, 293, 301, 306, 312, 316, 325, 343, 351, 358, 362, 366, 380, 386, 390, 394, 403, 407, 411, 416, 421, 425, 429, 436, 440, 443, 449, 452, 455, 465, 475, 488, 501, 505, 509, 523, 540, 543, 553, 564, 570, 578, 589, 597, 609, 625, 639, 650, 660, 675, 683, 693, 700, 704, 707, 714, 719, 730, 737, 744, 752, 755, 761, 766, 775, 782, 790, 794, 797, 803, 816, 821, 823, 830, 837, 843, 847, 856, 860, 865, 871, 877, 883, 886, 902, 911, 914, 923, 938, 951, 957, 971, 978, 981, 986, 989, 992, 1004, 1018, 1028, 1031, 1035, 1039, 1043, 1048, 1053, 1058, 1063, 1077, 1088, 1094, 1097, 1102, 1111, 1115, 1120, 1125, 1131, 1138, 1143, 1146, 1162, 1165, 1171, 1181, 1189, 1193, 1202, 1206, 1218, 1221, 1231, 1234, 1241, 1249, 1252, 1259, 1262, 1267, 1273, 1281, 1287, 1293, 1301, 1306, 1313, 1320, 1328, 1335, 1340, 1345, 1352, 1356, 1358, 1362, 1365, 1370, 1375, 1380, 1384, 1388, 1392, 1398, 1401, 1404, 1407, 1413} +var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 65, 69, 79, 91, 99, 107, 115, 123, 130, 140, 150, 158, 168, 179, 187, 197, 215, 230, 237, 249, 256, 263, 274, 282, 286, 290, 296, 301, 309, 314, 320, 324, 333, 351, 359, 366, 370, 374, 388, 394, 398, 402, 411, 415, 419, 424, 429, 433, 437, 444, 448, 451, 457, 460, 463, 473, 483, 496, 509, 513, 517, 531, 548, 551, 561, 572, 578, 586, 597, 605, 617, 633, 647, 658, 668, 683, 691, 702, 712, 719, 723, 726, 733, 738, 749, 756, 763, 771, 774, 780, 785, 794, 801, 809, 813, 816, 822, 829, 842, 847, 849, 856, 863, 869, 873, 882, 886, 891, 897, 903, 909, 919, 922, 938, 947, 950, 959, 974, 987, 993, 1007, 1014, 1017, 1022, 1025, 1028, 1040, 1054, 1064, 1067, 1071, 1075, 1079, 1084, 1089, 1094, 1099, 1113, 1124, 1130, 1133, 1138, 1147, 1151, 1156, 1161, 1167, 1174, 1179, 1182, 1198, 1201, 1207, 1217, 1225, 1229, 1238, 1242, 1254, 1257, 1267, 1270, 1277, 1285, 1292, 1295, 1302, 1305, 1310, 1316, 1324, 1330, 1336, 1344, 1349, 1356, 1363, 1371, 1378, 1383, 1388, 1395, 1399, 1401, 1405, 1408, 1413, 1418, 1423, 1427, 1431, 1435, 1441, 1444, 1447, 1450, 1456} func (i FeatureID) String() string { if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) { diff --git a/vendor/github.com/minio/minio-go/v7/.gitignore b/vendor/github.com/minio/minio-go/v7/.gitignore index 12ecd6ae..8ae0384e 100644 --- a/vendor/github.com/minio/minio-go/v7/.gitignore +++ b/vendor/github.com/minio/minio-go/v7/.gitignore @@ -2,4 +2,5 @@ *.test validator golangci-lint -functional_tests \ No newline at end of file +functional_tests +.idea \ No newline at end of file diff --git a/vendor/github.com/minio/minio-go/v7/api-datatypes.go b/vendor/github.com/minio/minio-go/v7/api-datatypes.go index 1f479387..cb811b4e 100644 --- a/vendor/github.com/minio/minio-go/v7/api-datatypes.go +++ b/vendor/github.com/minio/minio-go/v7/api-datatypes.go @@ -43,7 +43,7 @@ type StringMap map[string]string // if m is nil it can be initialized, which is often the case if m is // nested in another xml structural. This is also why the first thing done // on the first line is initialize it. -func (m *StringMap) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { +func (m *StringMap) UnmarshalXML(d *xml.Decoder, _ xml.StartElement) error { *m = StringMap{} type Item struct { Key string diff --git a/vendor/github.com/minio/minio-go/v7/api-get-object.go b/vendor/github.com/minio/minio-go/v7/api-get-object.go index b17f3146..e31e4cf9 100644 --- a/vendor/github.com/minio/minio-go/v7/api-get-object.go +++ b/vendor/github.com/minio/minio-go/v7/api-get-object.go @@ -23,8 +23,6 @@ import ( "fmt" "io" "net/http" - "net/url" - "strconv" "sync" "github.com/minio/minio-go/v7/pkg/s3utils" @@ -654,19 +652,11 @@ func (c *Client) getObject(ctx context.Context, bucketName, objectName string, o return nil, ObjectInfo{}, nil, err } - urlValues := make(url.Values) - if opts.VersionID != "" { - urlValues.Set("versionId", opts.VersionID) - } - if opts.PartNumber > 0 { - urlValues.Set("partNumber", strconv.Itoa(opts.PartNumber)) - } - // Execute GET on objectName. resp, err := c.executeMethod(ctx, http.MethodGet, requestMetadata{ bucketName: bucketName, objectName: objectName, - queryValues: urlValues, + queryValues: opts.toQueryValues(), customHeader: opts.Header(), contentSHA256Hex: emptySHA256Hex, }) diff --git a/vendor/github.com/minio/minio-go/v7/api-get-options.go b/vendor/github.com/minio/minio-go/v7/api-get-options.go index 0082c1fa..bb86a599 100644 --- a/vendor/github.com/minio/minio-go/v7/api-get-options.go +++ b/vendor/github.com/minio/minio-go/v7/api-get-options.go @@ -20,6 +20,8 @@ package minio import ( "fmt" "net/http" + "net/url" + "strconv" "time" "github.com/minio/minio-go/v7/pkg/encrypt" @@ -36,6 +38,7 @@ type AdvancedGetOptions struct { // during GET requests. type GetObjectOptions struct { headers map[string]string + reqParams url.Values ServerSideEncryption encrypt.ServerSide VersionID string PartNumber int @@ -83,6 +86,34 @@ func (o *GetObjectOptions) Set(key, value string) { o.headers[http.CanonicalHeaderKey(key)] = value } +// SetReqParam - set request query string parameter +// supported key: see supportedQueryValues. +// If an unsupported key is passed in, it will be ignored and nothing will be done. +func (o *GetObjectOptions) SetReqParam(key, value string) { + if !isStandardQueryValue(key) { + // do nothing + return + } + if o.reqParams == nil { + o.reqParams = make(url.Values) + } + o.reqParams.Set(key, value) +} + +// AddReqParam - add request query string parameter +// supported key: see supportedQueryValues. +// If an unsupported key is passed in, it will be ignored and nothing will be done. +func (o *GetObjectOptions) AddReqParam(key, value string) { + if !isStandardQueryValue(key) { + // do nothing + return + } + if o.reqParams == nil { + o.reqParams = make(url.Values) + } + o.reqParams.Add(key, value) +} + // SetMatchETag - set match etag. func (o *GetObjectOptions) SetMatchETag(etag string) error { if etag == "" { @@ -149,3 +180,24 @@ func (o *GetObjectOptions) SetRange(start, end int64) error { } return nil } + +// toQueryValues - Convert the versionId, partNumber, and reqParams in Options to query string parameters. +func (o *GetObjectOptions) toQueryValues() url.Values { + urlValues := make(url.Values) + if o.VersionID != "" { + urlValues.Set("versionId", o.VersionID) + } + if o.PartNumber > 0 { + urlValues.Set("partNumber", strconv.Itoa(o.PartNumber)) + } + + if o.reqParams != nil { + for key, values := range o.reqParams { + for _, value := range values { + urlValues.Add(key, value) + } + } + } + + return urlValues +} diff --git a/vendor/github.com/minio/minio-go/v7/api-put-object-multipart.go b/vendor/github.com/minio/minio-go/v7/api-put-object-multipart.go index 18bdeb24..85d6c70a 100644 --- a/vendor/github.com/minio/minio-go/v7/api-put-object-multipart.go +++ b/vendor/github.com/minio/minio-go/v7/api-put-object-multipart.go @@ -387,6 +387,12 @@ func (c *Client) completeMultipartUpload(ctx context.Context, bucketName, object return UploadInfo{}, err } + headers := opts.Header() + if s3utils.IsAmazonEndpoint(*c.endpointURL) { + headers.Del(encrypt.SseKmsKeyID) // Remove X-Amz-Server-Side-Encryption-Aws-Kms-Key-Id not supported in CompleteMultipartUpload + headers.Del(encrypt.SseGenericHeader) // Remove X-Amz-Server-Side-Encryption not supported in CompleteMultipartUpload + } + // Instantiate all the complete multipart buffer. completeMultipartUploadBuffer := bytes.NewReader(completeMultipartUploadBytes) reqMetadata := requestMetadata{ @@ -396,7 +402,7 @@ func (c *Client) completeMultipartUpload(ctx context.Context, bucketName, object contentBody: completeMultipartUploadBuffer, contentLength: int64(len(completeMultipartUploadBytes)), contentSHA256Hex: sum256Hex(completeMultipartUploadBytes), - customHeader: opts.Header(), + customHeader: headers, } // Execute POST to complete multipart upload for an objectName. diff --git a/vendor/github.com/minio/minio-go/v7/api-put-object-streaming.go b/vendor/github.com/minio/minio-go/v7/api-put-object-streaming.go index cdc7d3d3..55b3f38e 100644 --- a/vendor/github.com/minio/minio-go/v7/api-put-object-streaming.go +++ b/vendor/github.com/minio/minio-go/v7/api-put-object-streaming.go @@ -500,8 +500,6 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam // CRC32C is ~50% faster on AMD64 @ 30GB/s var crcBytes []byte crc := crc32.New(crc32.MakeTable(crc32.Castagnoli)) - md5Hash := c.md5Hasher() - defer md5Hash.Close() // Total data read and written to server. should be equal to 'size' at the end of the call. var totalUploadedSize int64 @@ -569,9 +567,10 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam var md5Base64 string if opts.SendContentMd5 { - md5Hash.Reset() + md5Hash := c.md5Hasher() md5Hash.Write(buf[:length]) md5Base64 = base64.StdEncoding.EncodeToString(md5Hash.Sum(nil)) + md5Hash.Close() } defer wg.Done() @@ -590,6 +589,7 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam objPart, uerr := c.uploadPart(ctx, p) if uerr != nil { errCh <- uerr + return } // Save successfully uploaded part metadata. diff --git a/vendor/github.com/minio/minio-go/v7/api-put-object.go b/vendor/github.com/minio/minio-go/v7/api-put-object.go index 2376ee87..b29df17d 100644 --- a/vendor/github.com/minio/minio-go/v7/api-put-object.go +++ b/vendor/github.com/minio/minio-go/v7/api-put-object.go @@ -93,6 +93,28 @@ type PutObjectOptions struct { // This can be used for faster uploads on non-seekable or slow-to-seek input. ConcurrentStreamParts bool Internal AdvancedPutOptions + + customHeaders http.Header +} + +// SetMatchETag if etag matches while PUT MinIO returns an error +// this is a MinIO specific extension to support optimistic locking +// semantics. +func (opts *PutObjectOptions) SetMatchETag(etag string) { + if opts.customHeaders == nil { + opts.customHeaders = http.Header{} + } + opts.customHeaders.Set("If-Match", "\""+etag+"\"") +} + +// SetMatchETagExcept if etag does not match while PUT MinIO returns an +// error this is a MinIO specific extension to support optimistic locking +// semantics. +func (opts *PutObjectOptions) SetMatchETagExcept(etag string) { + if opts.customHeaders == nil { + opts.customHeaders = http.Header{} + } + opts.customHeaders.Set("If-None-Match", "\""+etag+"\"") } // getNumThreads - gets the number of threads to be used in the multipart @@ -187,6 +209,12 @@ func (opts PutObjectOptions) Header() (header http.Header) { header.Set("x-amz-meta-"+k, v) } } + + // set any other additional custom headers. + for k, v := range opts.customHeaders { + header[k] = v + } + return } diff --git a/vendor/github.com/minio/minio-go/v7/api-putobject-snowball.go b/vendor/github.com/minio/minio-go/v7/api-putobject-snowball.go index 899bc6b7..849471e3 100644 --- a/vendor/github.com/minio/minio-go/v7/api-putobject-snowball.go +++ b/vendor/github.com/minio/minio-go/v7/api-putobject-snowball.go @@ -59,6 +59,7 @@ type SnowballObject struct { Size int64 // Modtime to apply to the object. + // If Modtime is the zero value current time will be used. ModTime time.Time // Content of the object. @@ -172,6 +173,10 @@ objectLoop: ModTime: obj.ModTime, Format: tar.FormatPAX, } + if header.ModTime.IsZero() { + header.ModTime = time.Now().UTC() + } + if err := t.WriteHeader(&header); err != nil { closeObj() return err diff --git a/vendor/github.com/minio/minio-go/v7/api-remove.go b/vendor/github.com/minio/minio-go/v7/api-remove.go index c0ff31a5..ca6b80a7 100644 --- a/vendor/github.com/minio/minio-go/v7/api-remove.go +++ b/vendor/github.com/minio/minio-go/v7/api-remove.go @@ -235,7 +235,7 @@ func generateRemoveMultiObjectsRequest(objects []ObjectInfo) []byte { // processRemoveMultiObjectsResponse - parse the remove multi objects web service // and return the success/failure result status for each object -func processRemoveMultiObjectsResponse(body io.Reader, objects []ObjectInfo, resultCh chan<- RemoveObjectResult) { +func processRemoveMultiObjectsResponse(body io.Reader, resultCh chan<- RemoveObjectResult) { // Parse multi delete XML response rmResult := &deleteMultiObjectsResult{} err := xmlDecoder(body, rmResult) @@ -459,7 +459,7 @@ func (c *Client) removeObjects(ctx context.Context, bucketName string, objectsCh } // Process multiobjects remove xml response - processRemoveMultiObjectsResponse(resp.Body, batch, resultCh) + processRemoveMultiObjectsResponse(resp.Body, resultCh) closeResponse(resp) } diff --git a/vendor/github.com/minio/minio-go/v7/api-s3-datatypes.go b/vendor/github.com/minio/minio-go/v7/api-s3-datatypes.go index 827395ee..30cbe7f1 100644 --- a/vendor/github.com/minio/minio-go/v7/api-s3-datatypes.go +++ b/vendor/github.com/minio/minio-go/v7/api-s3-datatypes.go @@ -110,7 +110,7 @@ type ListVersionsResult struct { // UnmarshalXML is a custom unmarshal code for the response of ListObjectVersions, the custom // code will unmarshal and tags and save them in Versions field to // preserve the lexical order of the listing. -func (l *ListVersionsResult) UnmarshalXML(d *xml.Decoder, start xml.StartElement) (err error) { +func (l *ListVersionsResult) UnmarshalXML(d *xml.Decoder, _ xml.StartElement) (err error) { for { // Read tokens from the XML document in a stream. t, err := d.Token() diff --git a/vendor/github.com/minio/minio-go/v7/api.go b/vendor/github.com/minio/minio-go/v7/api.go index 99a03df9..3b28519e 100644 --- a/vendor/github.com/minio/minio-go/v7/api.go +++ b/vendor/github.com/minio/minio-go/v7/api.go @@ -106,6 +106,12 @@ type Options struct { Region string BucketLookup BucketLookupType + // Allows setting a custom region lookup based on URL pattern + // not all URL patterns are covered by this library so if you + // have a custom endpoints with many regions you can use this + // function to perform region lookups appropriately. + CustomRegionViaURL func(u url.URL) string + // TrailingHeaders indicates server support of trailing headers. // Only supported for v4 signatures. TrailingHeaders bool @@ -118,7 +124,7 @@ type Options struct { // Global constants. const ( libraryName = "minio-go" - libraryVersion = "v7.0.47" + libraryVersion = "v7.0.50" ) // User Agent should always following the below style. @@ -234,7 +240,11 @@ func privateNew(endpoint string, opts *Options) (*Client, error) { // Sets custom region, if region is empty bucket location cache is used automatically. if opts.Region == "" { - opts.Region = s3utils.GetRegionFromURL(*clnt.endpointURL) + if opts.CustomRegionViaURL != nil { + opts.Region = opts.CustomRegionViaURL(*clnt.endpointURL) + } else { + opts.Region = s3utils.GetRegionFromURL(*clnt.endpointURL) + } } clnt.region = opts.Region diff --git a/vendor/github.com/minio/minio-go/v7/bucket-cache.go b/vendor/github.com/minio/minio-go/v7/bucket-cache.go index cafb4568..9df0a310 100644 --- a/vendor/github.com/minio/minio-go/v7/bucket-cache.go +++ b/vendor/github.com/minio/minio-go/v7/bucket-cache.go @@ -190,12 +190,11 @@ func (c *Client) getBucketLocationRequest(ctx context.Context, bucketName string } } - isVirtualHost := s3utils.IsVirtualHostSupported(targetURL, bucketName) + isVirtualStyle := c.isVirtualHostStyleRequest(targetURL, bucketName) var urlStr string - // only support Aliyun OSS for virtual hosted path, compatible Amazon & Google Endpoint - if isVirtualHost && s3utils.IsAliyunOSSEndpoint(targetURL) { + if isVirtualStyle { urlStr = c.endpointURL.Scheme + "://" + bucketName + "." + targetURL.Host + "/?location" } else { targetURL.Path = path.Join(bucketName, "") + "/" diff --git a/vendor/github.com/minio/minio-go/v7/core.go b/vendor/github.com/minio/minio-go/v7/core.go index 207a3870..e186b973 100644 --- a/vendor/github.com/minio/minio-go/v7/core.go +++ b/vendor/github.com/minio/minio-go/v7/core.go @@ -86,19 +86,30 @@ func (c Core) ListMultipartUploads(ctx context.Context, bucket, prefix, keyMarke return c.listMultipartUploadsQuery(ctx, bucket, keyMarker, uploadIDMarker, prefix, delimiter, maxUploads) } +// PutObjectPartOptions contains options for PutObjectPart API +type PutObjectPartOptions struct { + Md5Base64, Sha256Hex string + SSE encrypt.ServerSide + CustomHeader, Trailer http.Header +} + // PutObjectPart - Upload an object part. -func (c Core) PutObjectPart(ctx context.Context, bucket, object, uploadID string, partID int, data io.Reader, size int64, md5Base64, sha256Hex string, sse encrypt.ServerSide) (ObjectPart, error) { +func (c Core) PutObjectPart(ctx context.Context, bucket, object, uploadID string, partID int, + data io.Reader, size int64, opts PutObjectPartOptions, +) (ObjectPart, error) { p := uploadPartParams{ bucketName: bucket, objectName: object, uploadID: uploadID, reader: data, partNumber: partID, - md5Base64: md5Base64, - sha256Hex: sha256Hex, + md5Base64: opts.Md5Base64, + sha256Hex: opts.Sha256Hex, size: size, - sse: sse, + sse: opts.SSE, streamSha256: true, + customHeader: opts.CustomHeader, + trailer: opts.Trailer, } return c.uploadPart(ctx, p) } @@ -109,11 +120,11 @@ func (c Core) ListObjectParts(ctx context.Context, bucket, object, uploadID stri } // CompleteMultipartUpload - Concatenate uploaded parts and commit to an object. -func (c Core) CompleteMultipartUpload(ctx context.Context, bucket, object, uploadID string, parts []CompletePart, opts PutObjectOptions) (string, error) { +func (c Core) CompleteMultipartUpload(ctx context.Context, bucket, object, uploadID string, parts []CompletePart, opts PutObjectOptions) (UploadInfo, error) { res, err := c.completeMultipartUpload(ctx, bucket, object, uploadID, completeMultipartUpload{ Parts: parts, }, opts) - return res.ETag, err + return res, err } // AbortMultipartUpload - Abort an incomplete upload. diff --git a/vendor/github.com/minio/minio-go/v7/functional_tests.go b/vendor/github.com/minio/minio-go/v7/functional_tests.go index 23dd7e9b..33285239 100644 --- a/vendor/github.com/minio/minio-go/v7/functional_tests.go +++ b/vendor/github.com/minio/minio-go/v7/functional_tests.go @@ -2053,7 +2053,7 @@ func testPutObjectWithChecksums() { } // Enable tracing, write to stderr. - //c.TraceOn(os.Stderr) + // c.TraceOn(os.Stderr) // Set user agent. c.SetAppInfo("MinIO-go-FunctionalTest", "0.1.0") @@ -8414,14 +8414,20 @@ func testSSECMultipartEncryptedToSSECCopyObjectPart() { var completeParts []minio.CompletePart - part, err := c.PutObjectPart(context.Background(), bucketName, objectName, uploadID, 1, bytes.NewReader(buf[:5*1024*1024]), 5*1024*1024, "", "", srcencryption) + part, err := c.PutObjectPart(context.Background(), bucketName, objectName, uploadID, 1, + bytes.NewReader(buf[:5*1024*1024]), 5*1024*1024, + minio.PutObjectPartOptions{SSE: srcencryption}, + ) if err != nil { logError(testName, function, args, startTime, "", "PutObjectPart call failed", err) return } completeParts = append(completeParts, minio.CompletePart{PartNumber: part.PartNumber, ETag: part.ETag}) - part, err = c.PutObjectPart(context.Background(), bucketName, objectName, uploadID, 2, bytes.NewReader(buf[5*1024*1024:]), 1024*1024, "", "", srcencryption) + part, err = c.PutObjectPart(context.Background(), bucketName, objectName, uploadID, 2, + bytes.NewReader(buf[5*1024*1024:]), 1024*1024, + minio.PutObjectPartOptions{SSE: srcencryption}, + ) if err != nil { logError(testName, function, args, startTime, "", "PutObjectPart call failed", err) return diff --git a/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_tls_identity.go b/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_tls_identity.go index a0ad0904..dee0a8cb 100644 --- a/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_tls_identity.go +++ b/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_tls_identity.go @@ -140,6 +140,9 @@ func (i *STSCertificateIdentity) Retrieve() (Value, error) { if err != nil { return Value{}, err } + if req.Form == nil { + req.Form = url.Values{} + } req.Form.Add("DurationSeconds", strconv.FormatUint(uint64(livetime.Seconds()), 10)) resp, err := i.Client.Do(req) diff --git a/vendor/github.com/minio/minio-go/v7/pkg/encrypt/server-side.go b/vendor/github.com/minio/minio-go/v7/pkg/encrypt/server-side.go index 163fa62b..a7081c59 100644 --- a/vendor/github.com/minio/minio-go/v7/pkg/encrypt/server-side.go +++ b/vendor/github.com/minio/minio-go/v7/pkg/encrypt/server-side.go @@ -28,27 +28,27 @@ import ( ) const ( - // sseGenericHeader is the AWS SSE header used for SSE-S3 and SSE-KMS. - sseGenericHeader = "X-Amz-Server-Side-Encryption" + // SseGenericHeader is the AWS SSE header used for SSE-S3 and SSE-KMS. + SseGenericHeader = "X-Amz-Server-Side-Encryption" - // sseKmsKeyID is the AWS SSE-KMS key id. - sseKmsKeyID = sseGenericHeader + "-Aws-Kms-Key-Id" - // sseEncryptionContext is the AWS SSE-KMS Encryption Context data. - sseEncryptionContext = sseGenericHeader + "-Context" + // SseKmsKeyID is the AWS SSE-KMS key id. + SseKmsKeyID = SseGenericHeader + "-Aws-Kms-Key-Id" + // SseEncryptionContext is the AWS SSE-KMS Encryption Context data. + SseEncryptionContext = SseGenericHeader + "-Context" - // sseCustomerAlgorithm is the AWS SSE-C algorithm HTTP header key. - sseCustomerAlgorithm = sseGenericHeader + "-Customer-Algorithm" - // sseCustomerKey is the AWS SSE-C encryption key HTTP header key. - sseCustomerKey = sseGenericHeader + "-Customer-Key" - // sseCustomerKeyMD5 is the AWS SSE-C encryption key MD5 HTTP header key. - sseCustomerKeyMD5 = sseGenericHeader + "-Customer-Key-MD5" + // SseCustomerAlgorithm is the AWS SSE-C algorithm HTTP header key. + SseCustomerAlgorithm = SseGenericHeader + "-Customer-Algorithm" + // SseCustomerKey is the AWS SSE-C encryption key HTTP header key. + SseCustomerKey = SseGenericHeader + "-Customer-Key" + // SseCustomerKeyMD5 is the AWS SSE-C encryption key MD5 HTTP header key. + SseCustomerKeyMD5 = SseGenericHeader + "-Customer-Key-MD5" - // sseCopyCustomerAlgorithm is the AWS SSE-C algorithm HTTP header key for CopyObject API. - sseCopyCustomerAlgorithm = "X-Amz-Copy-Source-Server-Side-Encryption-Customer-Algorithm" - // sseCopyCustomerKey is the AWS SSE-C encryption key HTTP header key for CopyObject API. - sseCopyCustomerKey = "X-Amz-Copy-Source-Server-Side-Encryption-Customer-Key" - // sseCopyCustomerKeyMD5 is the AWS SSE-C encryption key MD5 HTTP header key for CopyObject API. - sseCopyCustomerKeyMD5 = "X-Amz-Copy-Source-Server-Side-Encryption-Customer-Key-MD5" + // SseCopyCustomerAlgorithm is the AWS SSE-C algorithm HTTP header key for CopyObject API. + SseCopyCustomerAlgorithm = "X-Amz-Copy-Source-Server-Side-Encryption-Customer-Algorithm" + // SseCopyCustomerKey is the AWS SSE-C encryption key HTTP header key for CopyObject API. + SseCopyCustomerKey = "X-Amz-Copy-Source-Server-Side-Encryption-Customer-Key" + // SseCopyCustomerKeyMD5 is the AWS SSE-C encryption key MD5 HTTP header key for CopyObject API. + SseCopyCustomerKeyMD5 = "X-Amz-Copy-Source-Server-Side-Encryption-Customer-Key-MD5" ) // PBKDF creates a SSE-C key from the provided password and salt. @@ -157,9 +157,9 @@ func (s ssec) Type() Type { return SSEC } func (s ssec) Marshal(h http.Header) { keyMD5 := md5.Sum(s[:]) - h.Set(sseCustomerAlgorithm, "AES256") - h.Set(sseCustomerKey, base64.StdEncoding.EncodeToString(s[:])) - h.Set(sseCustomerKeyMD5, base64.StdEncoding.EncodeToString(keyMD5[:])) + h.Set(SseCustomerAlgorithm, "AES256") + h.Set(SseCustomerKey, base64.StdEncoding.EncodeToString(s[:])) + h.Set(SseCustomerKeyMD5, base64.StdEncoding.EncodeToString(keyMD5[:])) } type ssecCopy [32]byte @@ -168,16 +168,16 @@ func (s ssecCopy) Type() Type { return SSEC } func (s ssecCopy) Marshal(h http.Header) { keyMD5 := md5.Sum(s[:]) - h.Set(sseCopyCustomerAlgorithm, "AES256") - h.Set(sseCopyCustomerKey, base64.StdEncoding.EncodeToString(s[:])) - h.Set(sseCopyCustomerKeyMD5, base64.StdEncoding.EncodeToString(keyMD5[:])) + h.Set(SseCopyCustomerAlgorithm, "AES256") + h.Set(SseCopyCustomerKey, base64.StdEncoding.EncodeToString(s[:])) + h.Set(SseCopyCustomerKeyMD5, base64.StdEncoding.EncodeToString(keyMD5[:])) } type s3 struct{} func (s s3) Type() Type { return S3 } -func (s s3) Marshal(h http.Header) { h.Set(sseGenericHeader, "AES256") } +func (s s3) Marshal(h http.Header) { h.Set(SseGenericHeader, "AES256") } type kms struct { key string @@ -188,11 +188,11 @@ type kms struct { func (s kms) Type() Type { return KMS } func (s kms) Marshal(h http.Header) { - h.Set(sseGenericHeader, "aws:kms") + h.Set(SseGenericHeader, "aws:kms") if s.key != "" { - h.Set(sseKmsKeyID, s.key) + h.Set(SseKmsKeyID, s.key) } if s.hasContext { - h.Set(sseEncryptionContext, base64.StdEncoding.EncodeToString(s.context)) + h.Set(SseEncryptionContext, base64.StdEncoding.EncodeToString(s.context)) } } diff --git a/vendor/github.com/minio/minio-go/v7/pkg/s3utils/utils.go b/vendor/github.com/minio/minio-go/v7/pkg/s3utils/utils.go index 79c12946..51a04b06 100644 --- a/vendor/github.com/minio/minio-go/v7/pkg/s3utils/utils.go +++ b/vendor/github.com/minio/minio-go/v7/pkg/s3utils/utils.go @@ -339,12 +339,12 @@ func EncodePath(pathName string) string { encodedPathname.WriteRune(s) continue default: - len := utf8.RuneLen(s) - if len < 0 { + l := utf8.RuneLen(s) + if l < 0 { // if utf8 cannot convert return the same string as is return pathName } - u := make([]byte, len) + u := make([]byte, l) utf8.EncodeRune(u, s) for _, r := range u { hex := hex.EncodeToString([]byte{r}) diff --git a/vendor/github.com/minio/minio-go/v7/s3-endpoints.go b/vendor/github.com/minio/minio-go/v7/s3-endpoints.go index 589c0e54..0a26edd5 100644 --- a/vendor/github.com/minio/minio-go/v7/s3-endpoints.go +++ b/vendor/github.com/minio/minio-go/v7/s3-endpoints.go @@ -34,6 +34,7 @@ var awsS3EndpointMap = map[string]string{ "eu-south-2": "s3.dualstack.eu-south-2.amazonaws.com", "ap-east-1": "s3.dualstack.ap-east-1.amazonaws.com", "ap-south-1": "s3.dualstack.ap-south-1.amazonaws.com", + "ap-south-2": "s3.dualstack.ap-south-2.amazonaws.com", "ap-southeast-1": "s3.dualstack.ap-southeast-1.amazonaws.com", "ap-southeast-2": "s3.dualstack.ap-southeast-2.amazonaws.com", "ap-northeast-1": "s3.dualstack.ap-northeast-1.amazonaws.com", @@ -48,6 +49,7 @@ var awsS3EndpointMap = map[string]string{ "cn-north-1": "s3.dualstack.cn-north-1.amazonaws.com.cn", "cn-northwest-1": "s3.dualstack.cn-northwest-1.amazonaws.com.cn", "ap-southeast-3": "s3.dualstack.ap-southeast-3.amazonaws.com", + "ap-southeast-4": "s3.dualstack.ap-southeast-4.amazonaws.com", } // getS3Endpoint get Amazon S3 endpoint based on the bucket location. diff --git a/vendor/github.com/minio/minio-go/v7/utils.go b/vendor/github.com/minio/minio-go/v7/utils.go index 7ce8ec5c..9389a7fa 100644 --- a/vendor/github.com/minio/minio-go/v7/utils.go +++ b/vendor/github.com/minio/minio-go/v7/utils.go @@ -511,6 +511,23 @@ func isAmzHeader(headerKey string) bool { return strings.HasPrefix(key, "x-amz-meta-") || strings.HasPrefix(key, "x-amz-grant-") || key == "x-amz-acl" || isSSEHeader(headerKey) || strings.HasPrefix(key, "x-amz-checksum-") } +// supportedQueryValues is a list of query strings that can be passed in when using GetObject. +var supportedQueryValues = map[string]bool{ + "partNumber": true, + "versionId": true, + "response-cache-control": true, + "response-content-disposition": true, + "response-content-encoding": true, + "response-content-language": true, + "response-content-type": true, + "response-expires": true, +} + +// isStandardQueryValue will return true when the passed in query string parameter is supported rather than customized. +func isStandardQueryValue(qsKey string) bool { + return supportedQueryValues[qsKey] +} + var ( md5Pool = sync.Pool{New: func() interface{} { return md5.New() }} sha256Pool = sync.Pool{New: func() interface{} { return sha256.New() }} diff --git a/vendor/golang.org/x/crypto/acme/rfc8555.go b/vendor/golang.org/x/crypto/acme/rfc8555.go index ee24dfde..3152e531 100644 --- a/vendor/golang.org/x/crypto/acme/rfc8555.go +++ b/vendor/golang.org/x/crypto/acme/rfc8555.go @@ -117,7 +117,7 @@ func (c *Client) updateRegRFC(ctx context.Context, a *Account) (*Account, error) return responseAccount(res) } -// getGegRFC is equivalent to c.GetReg but for CAs implementing RFC 8555. +// getRegRFC is equivalent to c.GetReg but for CAs implementing RFC 8555. // It expects c.Discover to have already been called. func (c *Client) getRegRFC(ctx context.Context) (*Account, error) { req := json.RawMessage(`{"onlyReturnExisting": true}`) diff --git a/vendor/golang.org/x/sys/cpu/hwcap_linux.go b/vendor/golang.org/x/sys/cpu/hwcap_linux.go index f3baa379..1d9d91f3 100644 --- a/vendor/golang.org/x/sys/cpu/hwcap_linux.go +++ b/vendor/golang.org/x/sys/cpu/hwcap_linux.go @@ -24,6 +24,21 @@ var hwCap uint var hwCap2 uint func readHWCAP() error { + // For Go 1.21+, get auxv from the Go runtime. + if a := getAuxv(); len(a) > 0 { + for len(a) >= 2 { + tag, val := a[0], uint(a[1]) + a = a[2:] + switch tag { + case _AT_HWCAP: + hwCap = val + case _AT_HWCAP2: + hwCap2 = val + } + } + return nil + } + buf, err := ioutil.ReadFile(procAuxv) if err != nil { // e.g. on android /proc/self/auxv is not accessible, so silently diff --git a/vendor/golang.org/x/sys/cpu/runtime_auxv.go b/vendor/golang.org/x/sys/cpu/runtime_auxv.go new file mode 100644 index 00000000..5f92ac9a --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/runtime_auxv.go @@ -0,0 +1,16 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cpu + +// getAuxvFn is non-nil on Go 1.21+ (via runtime_auxv_go121.go init) +// on platforms that use auxv. +var getAuxvFn func() []uintptr + +func getAuxv() []uintptr { + if getAuxvFn == nil { + return nil + } + return getAuxvFn() +} diff --git a/vendor/golang.org/x/sys/cpu/runtime_auxv_go121.go b/vendor/golang.org/x/sys/cpu/runtime_auxv_go121.go new file mode 100644 index 00000000..b975ea2a --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/runtime_auxv_go121.go @@ -0,0 +1,19 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.21 +// +build go1.21 + +package cpu + +import ( + _ "unsafe" // for linkname +) + +//go:linkname runtime_getAuxv runtime.getAuxv +func runtime_getAuxv() []uintptr + +func init() { + getAuxvFn = runtime_getAuxv +} diff --git a/vendor/golang.org/x/sys/execabs/execabs.go b/vendor/golang.org/x/sys/execabs/execabs.go index b981cfbb..3bf40fdf 100644 --- a/vendor/golang.org/x/sys/execabs/execabs.go +++ b/vendor/golang.org/x/sys/execabs/execabs.go @@ -63,7 +63,7 @@ func LookPath(file string) (string, error) { } func fixCmd(name string, cmd *exec.Cmd) { - if filepath.Base(name) == name && !filepath.IsAbs(cmd.Path) { + if filepath.Base(name) == name && !filepath.IsAbs(cmd.Path) && !isGo119ErrFieldSet(cmd) { // exec.Command was called with a bare binary name and // exec.LookPath returned a path which is not absolute. // Set cmd.lookPathErr and clear cmd.Path so that it diff --git a/vendor/golang.org/x/sys/execabs/execabs_go118.go b/vendor/golang.org/x/sys/execabs/execabs_go118.go index 6ab5f508..2000064a 100644 --- a/vendor/golang.org/x/sys/execabs/execabs_go118.go +++ b/vendor/golang.org/x/sys/execabs/execabs_go118.go @@ -7,6 +7,12 @@ package execabs +import "os/exec" + func isGo119ErrDot(err error) bool { return false } + +func isGo119ErrFieldSet(cmd *exec.Cmd) bool { + return false +} diff --git a/vendor/golang.org/x/sys/execabs/execabs_go119.go b/vendor/golang.org/x/sys/execabs/execabs_go119.go index 46c5b525..f364b341 100644 --- a/vendor/golang.org/x/sys/execabs/execabs_go119.go +++ b/vendor/golang.org/x/sys/execabs/execabs_go119.go @@ -15,3 +15,7 @@ import ( func isGo119ErrDot(err error) bool { return errors.Is(err, exec.ErrDot) } + +func isGo119ErrFieldSet(cmd *exec.Cmd) bool { + return cmd.Err != nil +} diff --git a/vendor/golang.org/x/sys/unix/ioctl_signed.go b/vendor/golang.org/x/sys/unix/ioctl_signed.go new file mode 100644 index 00000000..7def9580 --- /dev/null +++ b/vendor/golang.org/x/sys/unix/ioctl_signed.go @@ -0,0 +1,70 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build aix || solaris +// +build aix solaris + +package unix + +import ( + "unsafe" +) + +// ioctl itself should not be exposed directly, but additional get/set +// functions for specific types are permissible. + +// IoctlSetInt performs an ioctl operation which sets an integer value +// on fd, using the specified request number. +func IoctlSetInt(fd int, req int, value int) error { + return ioctl(fd, req, uintptr(value)) +} + +// IoctlSetPointerInt performs an ioctl operation which sets an +// integer value on fd, using the specified request number. The ioctl +// argument is called with a pointer to the integer value, rather than +// passing the integer value directly. +func IoctlSetPointerInt(fd int, req int, value int) error { + v := int32(value) + return ioctlPtr(fd, req, unsafe.Pointer(&v)) +} + +// IoctlSetWinsize performs an ioctl on fd with a *Winsize argument. +// +// To change fd's window size, the req argument should be TIOCSWINSZ. +func IoctlSetWinsize(fd int, req int, value *Winsize) error { + // TODO: if we get the chance, remove the req parameter and + // hardcode TIOCSWINSZ. + return ioctlPtr(fd, req, unsafe.Pointer(value)) +} + +// IoctlSetTermios performs an ioctl on fd with a *Termios. +// +// The req value will usually be TCSETA or TIOCSETA. +func IoctlSetTermios(fd int, req int, value *Termios) error { + // TODO: if we get the chance, remove the req parameter. + return ioctlPtr(fd, req, unsafe.Pointer(value)) +} + +// IoctlGetInt performs an ioctl operation which gets an integer value +// from fd, using the specified request number. +// +// A few ioctl requests use the return value as an output parameter; +// for those, IoctlRetInt should be used instead of this function. +func IoctlGetInt(fd int, req int) (int, error) { + var value int + err := ioctlPtr(fd, req, unsafe.Pointer(&value)) + return value, err +} + +func IoctlGetWinsize(fd int, req int) (*Winsize, error) { + var value Winsize + err := ioctlPtr(fd, req, unsafe.Pointer(&value)) + return &value, err +} + +func IoctlGetTermios(fd int, req int) (*Termios, error) { + var value Termios + err := ioctlPtr(fd, req, unsafe.Pointer(&value)) + return &value, err +} diff --git a/vendor/golang.org/x/sys/unix/ioctl.go b/vendor/golang.org/x/sys/unix/ioctl_unsigned.go similarity index 76% rename from vendor/golang.org/x/sys/unix/ioctl.go rename to vendor/golang.org/x/sys/unix/ioctl_unsigned.go index 1c51b0ec..649913d1 100644 --- a/vendor/golang.org/x/sys/unix/ioctl.go +++ b/vendor/golang.org/x/sys/unix/ioctl_unsigned.go @@ -2,13 +2,12 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build aix || darwin || dragonfly || freebsd || hurd || linux || netbsd || openbsd || solaris -// +build aix darwin dragonfly freebsd hurd linux netbsd openbsd solaris +//go:build darwin || dragonfly || freebsd || hurd || linux || netbsd || openbsd +// +build darwin dragonfly freebsd hurd linux netbsd openbsd package unix import ( - "runtime" "unsafe" ) @@ -27,7 +26,7 @@ func IoctlSetInt(fd int, req uint, value int) error { // passing the integer value directly. func IoctlSetPointerInt(fd int, req uint, value int) error { v := int32(value) - return ioctl(fd, req, uintptr(unsafe.Pointer(&v))) + return ioctlPtr(fd, req, unsafe.Pointer(&v)) } // IoctlSetWinsize performs an ioctl on fd with a *Winsize argument. @@ -36,9 +35,7 @@ func IoctlSetPointerInt(fd int, req uint, value int) error { func IoctlSetWinsize(fd int, req uint, value *Winsize) error { // TODO: if we get the chance, remove the req parameter and // hardcode TIOCSWINSZ. - err := ioctl(fd, req, uintptr(unsafe.Pointer(value))) - runtime.KeepAlive(value) - return err + return ioctlPtr(fd, req, unsafe.Pointer(value)) } // IoctlSetTermios performs an ioctl on fd with a *Termios. @@ -46,9 +43,7 @@ func IoctlSetWinsize(fd int, req uint, value *Winsize) error { // The req value will usually be TCSETA or TIOCSETA. func IoctlSetTermios(fd int, req uint, value *Termios) error { // TODO: if we get the chance, remove the req parameter. - err := ioctl(fd, req, uintptr(unsafe.Pointer(value))) - runtime.KeepAlive(value) - return err + return ioctlPtr(fd, req, unsafe.Pointer(value)) } // IoctlGetInt performs an ioctl operation which gets an integer value @@ -58,18 +53,18 @@ func IoctlSetTermios(fd int, req uint, value *Termios) error { // for those, IoctlRetInt should be used instead of this function. func IoctlGetInt(fd int, req uint) (int, error) { var value int - err := ioctl(fd, req, uintptr(unsafe.Pointer(&value))) + err := ioctlPtr(fd, req, unsafe.Pointer(&value)) return value, err } func IoctlGetWinsize(fd int, req uint) (*Winsize, error) { var value Winsize - err := ioctl(fd, req, uintptr(unsafe.Pointer(&value))) + err := ioctlPtr(fd, req, unsafe.Pointer(&value)) return &value, err } func IoctlGetTermios(fd int, req uint) (*Termios, error) { var value Termios - err := ioctl(fd, req, uintptr(unsafe.Pointer(&value))) + err := ioctlPtr(fd, req, unsafe.Pointer(&value)) return &value, err } diff --git a/vendor/golang.org/x/sys/unix/ioctl_zos.go b/vendor/golang.org/x/sys/unix/ioctl_zos.go index 5384e7d9..cdc21bf7 100644 --- a/vendor/golang.org/x/sys/unix/ioctl_zos.go +++ b/vendor/golang.org/x/sys/unix/ioctl_zos.go @@ -17,25 +17,23 @@ import ( // IoctlSetInt performs an ioctl operation which sets an integer value // on fd, using the specified request number. -func IoctlSetInt(fd int, req uint, value int) error { +func IoctlSetInt(fd int, req int, value int) error { return ioctl(fd, req, uintptr(value)) } // IoctlSetWinsize performs an ioctl on fd with a *Winsize argument. // // To change fd's window size, the req argument should be TIOCSWINSZ. -func IoctlSetWinsize(fd int, req uint, value *Winsize) error { +func IoctlSetWinsize(fd int, req int, value *Winsize) error { // TODO: if we get the chance, remove the req parameter and // hardcode TIOCSWINSZ. - err := ioctl(fd, req, uintptr(unsafe.Pointer(value))) - runtime.KeepAlive(value) - return err + return ioctlPtr(fd, req, unsafe.Pointer(value)) } // IoctlSetTermios performs an ioctl on fd with a *Termios. // // The req value is expected to be TCSETS, TCSETSW, or TCSETSF -func IoctlSetTermios(fd int, req uint, value *Termios) error { +func IoctlSetTermios(fd int, req int, value *Termios) error { if (req != TCSETS) && (req != TCSETSW) && (req != TCSETSF) { return ENOSYS } @@ -49,22 +47,22 @@ func IoctlSetTermios(fd int, req uint, value *Termios) error { // // A few ioctl requests use the return value as an output parameter; // for those, IoctlRetInt should be used instead of this function. -func IoctlGetInt(fd int, req uint) (int, error) { +func IoctlGetInt(fd int, req int) (int, error) { var value int - err := ioctl(fd, req, uintptr(unsafe.Pointer(&value))) + err := ioctlPtr(fd, req, unsafe.Pointer(&value)) return value, err } -func IoctlGetWinsize(fd int, req uint) (*Winsize, error) { +func IoctlGetWinsize(fd int, req int) (*Winsize, error) { var value Winsize - err := ioctl(fd, req, uintptr(unsafe.Pointer(&value))) + err := ioctlPtr(fd, req, unsafe.Pointer(&value)) return &value, err } // IoctlGetTermios performs an ioctl on fd with a *Termios. // // The req value is expected to be TCGETS -func IoctlGetTermios(fd int, req uint) (*Termios, error) { +func IoctlGetTermios(fd int, req int) (*Termios, error) { var value Termios if req != TCGETS { return &value, ENOSYS diff --git a/vendor/golang.org/x/sys/unix/mkerrors.sh b/vendor/golang.org/x/sys/unix/mkerrors.sh index 7456d9dd..2045d3da 100644 --- a/vendor/golang.org/x/sys/unix/mkerrors.sh +++ b/vendor/golang.org/x/sys/unix/mkerrors.sh @@ -66,6 +66,7 @@ includes_Darwin=' #include #include #include +#include #include #include #include @@ -521,6 +522,7 @@ ccflags="$@" $2 ~ /^NFC_(GENL|PROTO|COMM|RF|SE|DIRECTION|LLCP|SOCKPROTO)_/ || $2 ~ /^NFC_.*_(MAX)?SIZE$/ || $2 ~ /^RAW_PAYLOAD_/ || + $2 ~ /^[US]F_/ || $2 ~ /^TP_STATUS_/ || $2 ~ /^FALLOC_/ || $2 ~ /^ICMPV?6?_(FILTER|SEC)/ || diff --git a/vendor/golang.org/x/sys/unix/ptrace_darwin.go b/vendor/golang.org/x/sys/unix/ptrace_darwin.go index 463c3eff..39dba6ca 100644 --- a/vendor/golang.org/x/sys/unix/ptrace_darwin.go +++ b/vendor/golang.org/x/sys/unix/ptrace_darwin.go @@ -7,6 +7,12 @@ package unix +import "unsafe" + func ptrace(request int, pid int, addr uintptr, data uintptr) error { return ptrace1(request, pid, addr, data) } + +func ptracePtr(request int, pid int, addr uintptr, data unsafe.Pointer) error { + return ptrace1Ptr(request, pid, addr, data) +} diff --git a/vendor/golang.org/x/sys/unix/ptrace_ios.go b/vendor/golang.org/x/sys/unix/ptrace_ios.go index ed0509a0..9ea66330 100644 --- a/vendor/golang.org/x/sys/unix/ptrace_ios.go +++ b/vendor/golang.org/x/sys/unix/ptrace_ios.go @@ -7,6 +7,12 @@ package unix +import "unsafe" + func ptrace(request int, pid int, addr uintptr, data uintptr) (err error) { return ENOTSUP } + +func ptracePtr(request int, pid int, addr uintptr, data unsafe.Pointer) (err error) { + return ENOTSUP +} diff --git a/vendor/golang.org/x/sys/unix/syscall_aix.go b/vendor/golang.org/x/sys/unix/syscall_aix.go index 2db1b51e..c406ae00 100644 --- a/vendor/golang.org/x/sys/unix/syscall_aix.go +++ b/vendor/golang.org/x/sys/unix/syscall_aix.go @@ -292,9 +292,7 @@ func anyToSockaddr(fd int, rsa *RawSockaddrAny) (Sockaddr, error) { break } } - - bytes := (*[len(pp.Path)]byte)(unsafe.Pointer(&pp.Path[0]))[0:n] - sa.Name = string(bytes) + sa.Name = string(unsafe.Slice((*byte)(unsafe.Pointer(&pp.Path[0])), n)) return sa, nil case AF_INET: @@ -410,7 +408,8 @@ func (w WaitStatus) CoreDump() bool { return w&0x80 == 0x80 } func (w WaitStatus) TrapCause() int { return -1 } -//sys ioctl(fd int, req uint, arg uintptr) (err error) +//sys ioctl(fd int, req int, arg uintptr) (err error) +//sys ioctlPtr(fd int, req int, arg unsafe.Pointer) (err error) = ioctl // fcntl must never be called with cmd=F_DUP2FD because it doesn't work on AIX // There is no way to create a custom fcntl and to keep //sys fcntl easily, diff --git a/vendor/golang.org/x/sys/unix/syscall_aix_ppc.go b/vendor/golang.org/x/sys/unix/syscall_aix_ppc.go index e92a0be1..f2871fa9 100644 --- a/vendor/golang.org/x/sys/unix/syscall_aix_ppc.go +++ b/vendor/golang.org/x/sys/unix/syscall_aix_ppc.go @@ -8,7 +8,6 @@ package unix //sysnb Getrlimit(resource int, rlim *Rlimit) (err error) = getrlimit64 -//sysnb Setrlimit(resource int, rlim *Rlimit) (err error) = setrlimit64 //sys Seek(fd int, offset int64, whence int) (off int64, err error) = lseek64 //sys mmap(addr uintptr, length uintptr, prot int, flags int, fd int, offset int64) (xaddr uintptr, err error) diff --git a/vendor/golang.org/x/sys/unix/syscall_aix_ppc64.go b/vendor/golang.org/x/sys/unix/syscall_aix_ppc64.go index 16eed170..75718ec0 100644 --- a/vendor/golang.org/x/sys/unix/syscall_aix_ppc64.go +++ b/vendor/golang.org/x/sys/unix/syscall_aix_ppc64.go @@ -8,7 +8,6 @@ package unix //sysnb Getrlimit(resource int, rlim *Rlimit) (err error) -//sysnb Setrlimit(resource int, rlim *Rlimit) (err error) //sys Seek(fd int, offset int64, whence int) (off int64, err error) = lseek //sys mmap(addr uintptr, length uintptr, prot int, flags int, fd int, offset int64) (xaddr uintptr, err error) = mmap64 diff --git a/vendor/golang.org/x/sys/unix/syscall_bsd.go b/vendor/golang.org/x/sys/unix/syscall_bsd.go index eda42671..7705c327 100644 --- a/vendor/golang.org/x/sys/unix/syscall_bsd.go +++ b/vendor/golang.org/x/sys/unix/syscall_bsd.go @@ -245,8 +245,7 @@ func anyToSockaddr(fd int, rsa *RawSockaddrAny) (Sockaddr, error) { break } } - bytes := (*[len(pp.Path)]byte)(unsafe.Pointer(&pp.Path[0]))[0:n] - sa.Name = string(bytes) + sa.Name = string(unsafe.Slice((*byte)(unsafe.Pointer(&pp.Path[0])), n)) return sa, nil case AF_INET: diff --git a/vendor/golang.org/x/sys/unix/syscall_darwin.go b/vendor/golang.org/x/sys/unix/syscall_darwin.go index 192b071b..20692150 100644 --- a/vendor/golang.org/x/sys/unix/syscall_darwin.go +++ b/vendor/golang.org/x/sys/unix/syscall_darwin.go @@ -14,7 +14,6 @@ package unix import ( "fmt" - "runtime" "syscall" "unsafe" ) @@ -376,11 +375,10 @@ func Flistxattr(fd int, dest []byte) (sz int, err error) { func Kill(pid int, signum syscall.Signal) (err error) { return kill(pid, int(signum), 1) } //sys ioctl(fd int, req uint, arg uintptr) (err error) +//sys ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) = SYS_IOCTL func IoctlCtlInfo(fd int, ctlInfo *CtlInfo) error { - err := ioctl(fd, CTLIOCGINFO, uintptr(unsafe.Pointer(ctlInfo))) - runtime.KeepAlive(ctlInfo) - return err + return ioctlPtr(fd, CTLIOCGINFO, unsafe.Pointer(ctlInfo)) } // IfreqMTU is struct ifreq used to get or set a network device's MTU. @@ -394,16 +392,14 @@ type IfreqMTU struct { func IoctlGetIfreqMTU(fd int, ifname string) (*IfreqMTU, error) { var ifreq IfreqMTU copy(ifreq.Name[:], ifname) - err := ioctl(fd, SIOCGIFMTU, uintptr(unsafe.Pointer(&ifreq))) + err := ioctlPtr(fd, SIOCGIFMTU, unsafe.Pointer(&ifreq)) return &ifreq, err } // IoctlSetIfreqMTU performs the SIOCSIFMTU ioctl operation on fd to set the MTU // of the network device specified by ifreq.Name. func IoctlSetIfreqMTU(fd int, ifreq *IfreqMTU) error { - err := ioctl(fd, SIOCSIFMTU, uintptr(unsafe.Pointer(ifreq))) - runtime.KeepAlive(ifreq) - return err + return ioctlPtr(fd, SIOCSIFMTU, unsafe.Pointer(ifreq)) } //sys sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) = SYS_SYSCTL @@ -617,6 +613,7 @@ func SysctlKinfoProcSlice(name string, args ...int) ([]KinfoProc, error) { //sys Rmdir(path string) (err error) //sys Seek(fd int, offset int64, whence int) (newoffset int64, err error) = SYS_LSEEK //sys Select(nfd int, r *FdSet, w *FdSet, e *FdSet, timeout *Timeval) (n int, err error) +//sys Setattrlist(path string, attrlist *Attrlist, attrBuf []byte, options int) (err error) //sys Setegid(egid int) (err error) //sysnb Seteuid(euid int) (err error) //sysnb Setgid(gid int) (err error) @@ -626,7 +623,6 @@ func SysctlKinfoProcSlice(name string, args ...int) ([]KinfoProc, error) { //sys Setprivexec(flag int) (err error) //sysnb Setregid(rgid int, egid int) (err error) //sysnb Setreuid(ruid int, euid int) (err error) -//sysnb Setrlimit(which int, lim *Rlimit) (err error) //sysnb Setsid() (pid int, err error) //sysnb Settimeofday(tp *Timeval) (err error) //sysnb Setuid(uid int) (err error) @@ -680,7 +676,6 @@ func SysctlKinfoProcSlice(name string, args ...int) ([]KinfoProc, error) { // Kqueue_from_portset_np // Kqueue_portset // Getattrlist -// Setattrlist // Getdirentriesattr // Searchfs // Delete diff --git a/vendor/golang.org/x/sys/unix/syscall_darwin_amd64.go b/vendor/golang.org/x/sys/unix/syscall_darwin_amd64.go index b37310ce..9fa87980 100644 --- a/vendor/golang.org/x/sys/unix/syscall_darwin_amd64.go +++ b/vendor/golang.org/x/sys/unix/syscall_darwin_amd64.go @@ -47,5 +47,6 @@ func Syscall9(num, a1, a2, a3, a4, a5, a6, a7, a8, a9 uintptr) (r1, r2 uintptr, //sys getfsstat(buf unsafe.Pointer, size uintptr, flags int) (n int, err error) = SYS_GETFSSTAT64 //sys Lstat(path string, stat *Stat_t) (err error) = SYS_LSTAT64 //sys ptrace1(request int, pid int, addr uintptr, data uintptr) (err error) = SYS_ptrace +//sys ptrace1Ptr(request int, pid int, addr unsafe.Pointer, data uintptr) (err error) = SYS_ptrace //sys Stat(path string, stat *Stat_t) (err error) = SYS_STAT64 //sys Statfs(path string, stat *Statfs_t) (err error) = SYS_STATFS64 diff --git a/vendor/golang.org/x/sys/unix/syscall_darwin_arm64.go b/vendor/golang.org/x/sys/unix/syscall_darwin_arm64.go index d51ec996..f17b8c52 100644 --- a/vendor/golang.org/x/sys/unix/syscall_darwin_arm64.go +++ b/vendor/golang.org/x/sys/unix/syscall_darwin_arm64.go @@ -47,5 +47,6 @@ func Syscall9(num, a1, a2, a3, a4, a5, a6, a7, a8, a9 uintptr) (r1, r2 uintptr, //sys getfsstat(buf unsafe.Pointer, size uintptr, flags int) (n int, err error) = SYS_GETFSSTAT //sys Lstat(path string, stat *Stat_t) (err error) //sys ptrace1(request int, pid int, addr uintptr, data uintptr) (err error) = SYS_ptrace +//sys ptrace1Ptr(request int, pid int, addr unsafe.Pointer, data uintptr) (err error) = SYS_ptrace //sys Stat(path string, stat *Stat_t) (err error) //sys Statfs(path string, stat *Statfs_t) (err error) diff --git a/vendor/golang.org/x/sys/unix/syscall_dragonfly.go b/vendor/golang.org/x/sys/unix/syscall_dragonfly.go index a41111a7..d4ce988e 100644 --- a/vendor/golang.org/x/sys/unix/syscall_dragonfly.go +++ b/vendor/golang.org/x/sys/unix/syscall_dragonfly.go @@ -172,6 +172,7 @@ func Getfsstat(buf []Statfs_t, flags int) (n int, err error) { } //sys ioctl(fd int, req uint, arg uintptr) (err error) +//sys ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) = SYS_IOCTL //sys sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) = SYS___SYSCTL @@ -325,7 +326,6 @@ func Sendfile(outfd int, infd int, offset *int64, count int) (written int, err e //sysnb Setreuid(ruid int, euid int) (err error) //sysnb Setresgid(rgid int, egid int, sgid int) (err error) //sysnb Setresuid(ruid int, euid int, suid int) (err error) -//sysnb Setrlimit(which int, lim *Rlimit) (err error) //sysnb Setsid() (pid int, err error) //sysnb Settimeofday(tp *Timeval) (err error) //sysnb Setuid(uid int) (err error) diff --git a/vendor/golang.org/x/sys/unix/syscall_freebsd.go b/vendor/golang.org/x/sys/unix/syscall_freebsd.go index d50b9dc2..afb10106 100644 --- a/vendor/golang.org/x/sys/unix/syscall_freebsd.go +++ b/vendor/golang.org/x/sys/unix/syscall_freebsd.go @@ -161,7 +161,8 @@ func Getfsstat(buf []Statfs_t, flags int) (n int, err error) { return } -//sys ioctl(fd int, req uint, arg uintptr) (err error) +//sys ioctl(fd int, req uint, arg uintptr) (err error) = SYS_IOCTL +//sys ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) = SYS_IOCTL //sys sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) = SYS___SYSCTL @@ -253,6 +254,7 @@ func Sendfile(outfd int, infd int, offset *int64, count int) (written int, err e } //sys ptrace(request int, pid int, addr uintptr, data int) (err error) +//sys ptracePtr(request int, pid int, addr unsafe.Pointer, data int) (err error) = SYS_PTRACE func PtraceAttach(pid int) (err error) { return ptrace(PT_ATTACH, pid, 0, 0) @@ -267,19 +269,36 @@ func PtraceDetach(pid int) (err error) { } func PtraceGetFpRegs(pid int, fpregsout *FpReg) (err error) { - return ptrace(PT_GETFPREGS, pid, uintptr(unsafe.Pointer(fpregsout)), 0) + return ptracePtr(PT_GETFPREGS, pid, unsafe.Pointer(fpregsout), 0) } func PtraceGetRegs(pid int, regsout *Reg) (err error) { - return ptrace(PT_GETREGS, pid, uintptr(unsafe.Pointer(regsout)), 0) + return ptracePtr(PT_GETREGS, pid, unsafe.Pointer(regsout), 0) +} + +func PtraceIO(req int, pid int, offs uintptr, out []byte, countin int) (count int, err error) { + ioDesc := PtraceIoDesc{ + Op: int32(req), + Offs: offs, + } + if countin > 0 { + _ = out[:countin] // check bounds + ioDesc.Addr = &out[0] + } else if out != nil { + ioDesc.Addr = (*byte)(unsafe.Pointer(&_zero)) + } + ioDesc.SetLen(countin) + + err = ptracePtr(PT_IO, pid, unsafe.Pointer(&ioDesc), 0) + return int(ioDesc.Len), err } func PtraceLwpEvents(pid int, enable int) (err error) { return ptrace(PT_LWP_EVENTS, pid, 0, enable) } -func PtraceLwpInfo(pid int, info uintptr) (err error) { - return ptrace(PT_LWPINFO, pid, info, int(unsafe.Sizeof(PtraceLwpInfoStruct{}))) +func PtraceLwpInfo(pid int, info *PtraceLwpInfoStruct) (err error) { + return ptracePtr(PT_LWPINFO, pid, unsafe.Pointer(info), int(unsafe.Sizeof(*info))) } func PtracePeekData(pid int, addr uintptr, out []byte) (count int, err error) { @@ -299,13 +318,25 @@ func PtracePokeText(pid int, addr uintptr, data []byte) (count int, err error) { } func PtraceSetRegs(pid int, regs *Reg) (err error) { - return ptrace(PT_SETREGS, pid, uintptr(unsafe.Pointer(regs)), 0) + return ptracePtr(PT_SETREGS, pid, unsafe.Pointer(regs), 0) } func PtraceSingleStep(pid int) (err error) { return ptrace(PT_STEP, pid, 1, 0) } +func Dup3(oldfd, newfd, flags int) error { + if oldfd == newfd || flags&^O_CLOEXEC != 0 { + return EINVAL + } + how := F_DUP2FD + if flags&O_CLOEXEC != 0 { + how = F_DUP2FD_CLOEXEC + } + _, err := fcntl(oldfd, how, newfd) + return err +} + /* * Exposed directly */ @@ -402,7 +433,6 @@ func PtraceSingleStep(pid int) (err error) { //sysnb Setreuid(ruid int, euid int) (err error) //sysnb Setresgid(rgid int, egid int, sgid int) (err error) //sysnb Setresuid(ruid int, euid int, suid int) (err error) -//sysnb Setrlimit(which int, lim *Rlimit) (err error) //sysnb Setsid() (pid int, err error) //sysnb Settimeofday(tp *Timeval) (err error) //sysnb Setuid(uid int) (err error) diff --git a/vendor/golang.org/x/sys/unix/syscall_freebsd_386.go b/vendor/golang.org/x/sys/unix/syscall_freebsd_386.go index 6a91d471..b8da5100 100644 --- a/vendor/golang.org/x/sys/unix/syscall_freebsd_386.go +++ b/vendor/golang.org/x/sys/unix/syscall_freebsd_386.go @@ -42,6 +42,10 @@ func (cmsg *Cmsghdr) SetLen(length int) { cmsg.Len = uint32(length) } +func (d *PtraceIoDesc) SetLen(length int) { + d.Len = uint32(length) +} + func sendfile(outfd int, infd int, offset *int64, count int) (written int, err error) { var writtenOut uint64 = 0 _, _, e1 := Syscall9(SYS_SENDFILE, uintptr(infd), uintptr(outfd), uintptr(*offset), uintptr((*offset)>>32), uintptr(count), 0, uintptr(unsafe.Pointer(&writtenOut)), 0, 0) @@ -57,16 +61,5 @@ func sendfile(outfd int, infd int, offset *int64, count int) (written int, err e func Syscall9(num, a1, a2, a3, a4, a5, a6, a7, a8, a9 uintptr) (r1, r2 uintptr, err syscall.Errno) func PtraceGetFsBase(pid int, fsbase *int64) (err error) { - return ptrace(PT_GETFSBASE, pid, uintptr(unsafe.Pointer(fsbase)), 0) -} - -func PtraceIO(req int, pid int, offs uintptr, out []byte, countin int) (count int, err error) { - ioDesc := PtraceIoDesc{ - Op: int32(req), - Offs: offs, - Addr: uintptr(unsafe.Pointer(&out[0])), // TODO(#58351): this is not safe. - Len: uint32(countin), - } - err = ptrace(PT_IO, pid, uintptr(unsafe.Pointer(&ioDesc)), 0) - return int(ioDesc.Len), err + return ptracePtr(PT_GETFSBASE, pid, unsafe.Pointer(fsbase), 0) } diff --git a/vendor/golang.org/x/sys/unix/syscall_freebsd_amd64.go b/vendor/golang.org/x/sys/unix/syscall_freebsd_amd64.go index 48110a0a..47155c48 100644 --- a/vendor/golang.org/x/sys/unix/syscall_freebsd_amd64.go +++ b/vendor/golang.org/x/sys/unix/syscall_freebsd_amd64.go @@ -42,6 +42,10 @@ func (cmsg *Cmsghdr) SetLen(length int) { cmsg.Len = uint32(length) } +func (d *PtraceIoDesc) SetLen(length int) { + d.Len = uint64(length) +} + func sendfile(outfd int, infd int, offset *int64, count int) (written int, err error) { var writtenOut uint64 = 0 _, _, e1 := Syscall9(SYS_SENDFILE, uintptr(infd), uintptr(outfd), uintptr(*offset), uintptr(count), 0, uintptr(unsafe.Pointer(&writtenOut)), 0, 0, 0) @@ -57,16 +61,5 @@ func sendfile(outfd int, infd int, offset *int64, count int) (written int, err e func Syscall9(num, a1, a2, a3, a4, a5, a6, a7, a8, a9 uintptr) (r1, r2 uintptr, err syscall.Errno) func PtraceGetFsBase(pid int, fsbase *int64) (err error) { - return ptrace(PT_GETFSBASE, pid, uintptr(unsafe.Pointer(fsbase)), 0) -} - -func PtraceIO(req int, pid int, offs uintptr, out []byte, countin int) (count int, err error) { - ioDesc := PtraceIoDesc{ - Op: int32(req), - Offs: offs, - Addr: uintptr(unsafe.Pointer(&out[0])), // TODO(#58351): this is not safe. - Len: uint64(countin), - } - err = ptrace(PT_IO, pid, uintptr(unsafe.Pointer(&ioDesc)), 0) - return int(ioDesc.Len), err + return ptracePtr(PT_GETFSBASE, pid, unsafe.Pointer(fsbase), 0) } diff --git a/vendor/golang.org/x/sys/unix/syscall_freebsd_arm.go b/vendor/golang.org/x/sys/unix/syscall_freebsd_arm.go index 52f1d4b7..08932093 100644 --- a/vendor/golang.org/x/sys/unix/syscall_freebsd_arm.go +++ b/vendor/golang.org/x/sys/unix/syscall_freebsd_arm.go @@ -42,6 +42,10 @@ func (cmsg *Cmsghdr) SetLen(length int) { cmsg.Len = uint32(length) } +func (d *PtraceIoDesc) SetLen(length int) { + d.Len = uint32(length) +} + func sendfile(outfd int, infd int, offset *int64, count int) (written int, err error) { var writtenOut uint64 = 0 _, _, e1 := Syscall9(SYS_SENDFILE, uintptr(infd), uintptr(outfd), uintptr(*offset), uintptr((*offset)>>32), uintptr(count), 0, uintptr(unsafe.Pointer(&writtenOut)), 0, 0) @@ -55,14 +59,3 @@ func sendfile(outfd int, infd int, offset *int64, count int) (written int, err e } func Syscall9(num, a1, a2, a3, a4, a5, a6, a7, a8, a9 uintptr) (r1, r2 uintptr, err syscall.Errno) - -func PtraceIO(req int, pid int, offs uintptr, out []byte, countin int) (count int, err error) { - ioDesc := PtraceIoDesc{ - Op: int32(req), - Offs: offs, - Addr: uintptr(unsafe.Pointer(&out[0])), // TODO(#58351): this is not safe. - Len: uint32(countin), - } - err = ptrace(PT_IO, pid, uintptr(unsafe.Pointer(&ioDesc)), 0) - return int(ioDesc.Len), err -} diff --git a/vendor/golang.org/x/sys/unix/syscall_freebsd_arm64.go b/vendor/golang.org/x/sys/unix/syscall_freebsd_arm64.go index 5537ee4f..d151a0d0 100644 --- a/vendor/golang.org/x/sys/unix/syscall_freebsd_arm64.go +++ b/vendor/golang.org/x/sys/unix/syscall_freebsd_arm64.go @@ -42,6 +42,10 @@ func (cmsg *Cmsghdr) SetLen(length int) { cmsg.Len = uint32(length) } +func (d *PtraceIoDesc) SetLen(length int) { + d.Len = uint64(length) +} + func sendfile(outfd int, infd int, offset *int64, count int) (written int, err error) { var writtenOut uint64 = 0 _, _, e1 := Syscall9(SYS_SENDFILE, uintptr(infd), uintptr(outfd), uintptr(*offset), uintptr(count), 0, uintptr(unsafe.Pointer(&writtenOut)), 0, 0, 0) @@ -55,14 +59,3 @@ func sendfile(outfd int, infd int, offset *int64, count int) (written int, err e } func Syscall9(num, a1, a2, a3, a4, a5, a6, a7, a8, a9 uintptr) (r1, r2 uintptr, err syscall.Errno) - -func PtraceIO(req int, pid int, offs uintptr, out []byte, countin int) (count int, err error) { - ioDesc := PtraceIoDesc{ - Op: int32(req), - Offs: offs, - Addr: uintptr(unsafe.Pointer(&out[0])), // TODO(#58351): this is not safe. - Len: uint64(countin), - } - err = ptrace(PT_IO, pid, uintptr(unsafe.Pointer(&ioDesc)), 0) - return int(ioDesc.Len), err -} diff --git a/vendor/golang.org/x/sys/unix/syscall_freebsd_riscv64.go b/vendor/golang.org/x/sys/unix/syscall_freebsd_riscv64.go index 164abd5d..d5cd64b3 100644 --- a/vendor/golang.org/x/sys/unix/syscall_freebsd_riscv64.go +++ b/vendor/golang.org/x/sys/unix/syscall_freebsd_riscv64.go @@ -42,6 +42,10 @@ func (cmsg *Cmsghdr) SetLen(length int) { cmsg.Len = uint32(length) } +func (d *PtraceIoDesc) SetLen(length int) { + d.Len = uint64(length) +} + func sendfile(outfd int, infd int, offset *int64, count int) (written int, err error) { var writtenOut uint64 = 0 _, _, e1 := Syscall9(SYS_SENDFILE, uintptr(infd), uintptr(outfd), uintptr(*offset), uintptr(count), 0, uintptr(unsafe.Pointer(&writtenOut)), 0, 0, 0) @@ -55,14 +59,3 @@ func sendfile(outfd int, infd int, offset *int64, count int) (written int, err e } func Syscall9(num, a1, a2, a3, a4, a5, a6, a7, a8, a9 uintptr) (r1, r2 uintptr, err syscall.Errno) - -func PtraceIO(req int, pid int, offs uintptr, out []byte, countin int) (count int, err error) { - ioDesc := PtraceIoDesc{ - Op: int32(req), - Offs: offs, - Addr: uintptr(unsafe.Pointer(&out[0])), // TODO(#58351): this is not safe. - Len: uint64(countin), - } - err = ptrace(PT_IO, pid, uintptr(unsafe.Pointer(&ioDesc)), 0) - return int(ioDesc.Len), err -} diff --git a/vendor/golang.org/x/sys/unix/syscall_hurd.go b/vendor/golang.org/x/sys/unix/syscall_hurd.go index 4ffb6480..381fd467 100644 --- a/vendor/golang.org/x/sys/unix/syscall_hurd.go +++ b/vendor/golang.org/x/sys/unix/syscall_hurd.go @@ -20,3 +20,11 @@ func ioctl(fd int, req uint, arg uintptr) (err error) { } return } + +func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { + r0, er := C.ioctl(C.int(fd), C.ulong(req), C.uintptr_t(uintptr(arg))) + if r0 == -1 && er != nil { + err = er + } + return +} diff --git a/vendor/golang.org/x/sys/unix/syscall_linux.go b/vendor/golang.org/x/sys/unix/syscall_linux.go index 5443dddd..fbaeb5ff 100644 --- a/vendor/golang.org/x/sys/unix/syscall_linux.go +++ b/vendor/golang.org/x/sys/unix/syscall_linux.go @@ -1015,8 +1015,7 @@ func anyToSockaddr(fd int, rsa *RawSockaddrAny) (Sockaddr, error) { for n < len(pp.Path) && pp.Path[n] != 0 { n++ } - bytes := (*[len(pp.Path)]byte)(unsafe.Pointer(&pp.Path[0]))[0:n] - sa.Name = string(bytes) + sa.Name = string(unsafe.Slice((*byte)(unsafe.Pointer(&pp.Path[0])), n)) return sa, nil case AF_INET: @@ -1365,6 +1364,10 @@ func SetsockoptTCPRepairOpt(fd, level, opt int, o []TCPRepairOpt) (err error) { return setsockopt(fd, level, opt, unsafe.Pointer(&o[0]), uintptr(SizeofTCPRepairOpt*len(o))) } +func SetsockoptTCPMD5Sig(fd, level, opt int, s *TCPMD5Sig) error { + return setsockopt(fd, level, opt, unsafe.Pointer(s), unsafe.Sizeof(*s)) +} + // Keyctl Commands (http://man7.org/linux/man-pages/man2/keyctl.2.html) // KeyctlInt calls keyctl commands in which each argument is an int. @@ -1579,6 +1582,7 @@ func BindToDevice(fd int, device string) (err error) { } //sys ptrace(request int, pid int, addr uintptr, data uintptr) (err error) +//sys ptracePtr(request int, pid int, addr uintptr, data unsafe.Pointer) (err error) = SYS_PTRACE func ptracePeek(req int, pid int, addr uintptr, out []byte) (count int, err error) { // The peek requests are machine-size oriented, so we wrap it @@ -1596,7 +1600,7 @@ func ptracePeek(req int, pid int, addr uintptr, out []byte) (count int, err erro // boundary. n := 0 if addr%SizeofPtr != 0 { - err = ptrace(req, pid, addr-addr%SizeofPtr, uintptr(unsafe.Pointer(&buf[0]))) + err = ptracePtr(req, pid, addr-addr%SizeofPtr, unsafe.Pointer(&buf[0])) if err != nil { return 0, err } @@ -1608,7 +1612,7 @@ func ptracePeek(req int, pid int, addr uintptr, out []byte) (count int, err erro for len(out) > 0 { // We use an internal buffer to guarantee alignment. // It's not documented if this is necessary, but we're paranoid. - err = ptrace(req, pid, addr+uintptr(n), uintptr(unsafe.Pointer(&buf[0]))) + err = ptracePtr(req, pid, addr+uintptr(n), unsafe.Pointer(&buf[0])) if err != nil { return n, err } @@ -1640,7 +1644,7 @@ func ptracePoke(pokeReq int, peekReq int, pid int, addr uintptr, data []byte) (c n := 0 if addr%SizeofPtr != 0 { var buf [SizeofPtr]byte - err = ptrace(peekReq, pid, addr-addr%SizeofPtr, uintptr(unsafe.Pointer(&buf[0]))) + err = ptracePtr(peekReq, pid, addr-addr%SizeofPtr, unsafe.Pointer(&buf[0])) if err != nil { return 0, err } @@ -1667,7 +1671,7 @@ func ptracePoke(pokeReq int, peekReq int, pid int, addr uintptr, data []byte) (c // Trailing edge. if len(data) > 0 { var buf [SizeofPtr]byte - err = ptrace(peekReq, pid, addr+uintptr(n), uintptr(unsafe.Pointer(&buf[0]))) + err = ptracePtr(peekReq, pid, addr+uintptr(n), unsafe.Pointer(&buf[0])) if err != nil { return n, err } @@ -1696,11 +1700,11 @@ func PtracePokeUser(pid int, addr uintptr, data []byte) (count int, err error) { } func PtraceGetRegs(pid int, regsout *PtraceRegs) (err error) { - return ptrace(PTRACE_GETREGS, pid, 0, uintptr(unsafe.Pointer(regsout))) + return ptracePtr(PTRACE_GETREGS, pid, 0, unsafe.Pointer(regsout)) } func PtraceSetRegs(pid int, regs *PtraceRegs) (err error) { - return ptrace(PTRACE_SETREGS, pid, 0, uintptr(unsafe.Pointer(regs))) + return ptracePtr(PTRACE_SETREGS, pid, 0, unsafe.Pointer(regs)) } func PtraceSetOptions(pid int, options int) (err error) { @@ -1709,7 +1713,7 @@ func PtraceSetOptions(pid int, options int) (err error) { func PtraceGetEventMsg(pid int) (msg uint, err error) { var data _C_long - err = ptrace(PTRACE_GETEVENTMSG, pid, 0, uintptr(unsafe.Pointer(&data))) + err = ptracePtr(PTRACE_GETEVENTMSG, pid, 0, unsafe.Pointer(&data)) msg = uint(data) return } @@ -1869,7 +1873,6 @@ func Getpgrp() (pid int) { //sys OpenTree(dfd int, fileName string, flags uint) (r int, err error) //sys PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) //sys PivotRoot(newroot string, putold string) (err error) = SYS_PIVOT_ROOT -//sysnb Prlimit(pid int, resource int, newlimit *Rlimit, old *Rlimit) (err error) = SYS_PRLIMIT64 //sys Prctl(option int, arg2 uintptr, arg3 uintptr, arg4 uintptr, arg5 uintptr) (err error) //sys Pselect(nfd int, r *FdSet, w *FdSet, e *FdSet, timeout *Timespec, sigmask *Sigset_t) (n int, err error) = SYS_PSELECT6 //sys read(fd int, p []byte) (n int, err error) @@ -1883,6 +1886,15 @@ func Getpgrp() (pid int) { //sysnb Settimeofday(tv *Timeval) (err error) //sys Setns(fd int, nstype int) (err error) +//go:linkname syscall_prlimit syscall.prlimit +func syscall_prlimit(pid, resource int, newlimit, old *syscall.Rlimit) error + +func Prlimit(pid, resource int, newlimit, old *Rlimit) error { + // Just call the syscall version, because as of Go 1.21 + // it will affect starting a new process. + return syscall_prlimit(pid, resource, (*syscall.Rlimit)(newlimit), (*syscall.Rlimit)(old)) +} + // PrctlRetInt performs a prctl operation specified by option and further // optional arguments arg2 through arg5 depending on option. It returns a // non-negative integer that is returned by the prctl syscall. @@ -2154,6 +2166,14 @@ func isGroupMember(gid int) bool { return false } +func isCapDacOverrideSet() bool { + hdr := CapUserHeader{Version: LINUX_CAPABILITY_VERSION_3} + data := [2]CapUserData{} + err := Capget(&hdr, &data[0]) + + return err == nil && data[0].Effective&(1< 0 { + _p1 = unsafe.Pointer(&attrBuf[0]) + } else { + _p1 = unsafe.Pointer(&_zero) + } + _, _, e1 := syscall_syscall6(libc_setattrlist_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(attrlist)), uintptr(_p1), uintptr(len(attrBuf)), uintptr(options), 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_setattrlist_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_setattrlist setattrlist "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Setegid(egid int) (err error) { _, _, e1 := syscall_syscall(libc_setegid_trampoline_addr, uintptr(egid), 0, 0) if e1 != 0 { @@ -2115,20 +2148,6 @@ var libc_setreuid_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(which int, lim *Rlimit) (err error) { - _, _, e1 := syscall_rawSyscall(libc_setrlimit_trampoline_addr, uintptr(which), uintptr(unsafe.Pointer(lim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -var libc_setrlimit_trampoline_addr uintptr - -//go:cgo_import_dynamic libc_setrlimit setrlimit "/usr/lib/libSystem.B.dylib" - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Setsid() (pid int, err error) { r0, _, e1 := syscall_rawSyscall(libc_setsid_trampoline_addr, 0, 0, 0) pid = int(r0) @@ -2502,6 +2521,14 @@ func ptrace1(request int, pid int, addr uintptr, data uintptr) (err error) { return } +func ptrace1Ptr(request int, pid int, addr uintptr, data unsafe.Pointer) (err error) { + _, _, e1 := syscall_syscall6(libc_ptrace_trampoline_addr, uintptr(request), uintptr(pid), addr, uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + var libc_ptrace_trampoline_addr uintptr //go:cgo_import_dynamic libc_ptrace ptrace "/usr/lib/libSystem.B.dylib" diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s index 95fe4c0e..4baaed0b 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s @@ -705,6 +705,11 @@ TEXT libc_select_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_select_trampoline_addr(SB), RODATA, $8 DATA ·libc_select_trampoline_addr(SB)/8, $libc_select_trampoline<>(SB) +TEXT libc_setattrlist_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_setattrlist(SB) +GLOBL ·libc_setattrlist_trampoline_addr(SB), RODATA, $8 +DATA ·libc_setattrlist_trampoline_addr(SB)/8, $libc_setattrlist_trampoline<>(SB) + TEXT libc_setegid_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_setegid(SB) @@ -759,12 +764,6 @@ TEXT libc_setreuid_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_setreuid_trampoline_addr(SB), RODATA, $8 DATA ·libc_setreuid_trampoline_addr(SB)/8, $libc_setreuid_trampoline<>(SB) -TEXT libc_setrlimit_trampoline<>(SB),NOSPLIT,$0-0 - JMP libc_setrlimit(SB) - -GLOBL ·libc_setrlimit_trampoline_addr(SB), RODATA, $8 -DATA ·libc_setrlimit_trampoline_addr(SB)/8, $libc_setrlimit_trampoline<>(SB) - TEXT libc_setsid_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_setsid(SB) diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go index 26a0fdc5..51d6f3fb 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go @@ -725,6 +725,14 @@ func ioctl(fd int, req uint, arg uintptr) (err error) { return } +func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { + _, _, e1 := syscall_syscall(libc_ioctl_trampoline_addr, uintptr(fd), uintptr(req), uintptr(arg)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + var libc_ioctl_trampoline_addr uintptr //go:cgo_import_dynamic libc_ioctl ioctl "/usr/lib/libSystem.B.dylib" @@ -1984,6 +1992,31 @@ var libc_select_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func Setattrlist(path string, attrlist *Attrlist, attrBuf []byte, options int) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(path) + if err != nil { + return + } + var _p1 unsafe.Pointer + if len(attrBuf) > 0 { + _p1 = unsafe.Pointer(&attrBuf[0]) + } else { + _p1 = unsafe.Pointer(&_zero) + } + _, _, e1 := syscall_syscall6(libc_setattrlist_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(attrlist)), uintptr(_p1), uintptr(len(attrBuf)), uintptr(options), 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_setattrlist_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_setattrlist setattrlist "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Setegid(egid int) (err error) { _, _, e1 := syscall_syscall(libc_setegid_trampoline_addr, uintptr(egid), 0, 0) if e1 != 0 { @@ -2115,20 +2148,6 @@ var libc_setreuid_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(which int, lim *Rlimit) (err error) { - _, _, e1 := syscall_rawSyscall(libc_setrlimit_trampoline_addr, uintptr(which), uintptr(unsafe.Pointer(lim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -var libc_setrlimit_trampoline_addr uintptr - -//go:cgo_import_dynamic libc_setrlimit setrlimit "/usr/lib/libSystem.B.dylib" - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Setsid() (pid int, err error) { r0, _, e1 := syscall_rawSyscall(libc_setsid_trampoline_addr, 0, 0, 0) pid = int(r0) @@ -2502,6 +2521,14 @@ func ptrace1(request int, pid int, addr uintptr, data uintptr) (err error) { return } +func ptrace1Ptr(request int, pid int, addr uintptr, data unsafe.Pointer) (err error) { + _, _, e1 := syscall_syscall6(libc_ptrace_trampoline_addr, uintptr(request), uintptr(pid), addr, uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + var libc_ptrace_trampoline_addr uintptr //go:cgo_import_dynamic libc_ptrace ptrace "/usr/lib/libSystem.B.dylib" diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s index efa5b4c9..c3b82c03 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s @@ -705,6 +705,11 @@ TEXT libc_select_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_select_trampoline_addr(SB), RODATA, $8 DATA ·libc_select_trampoline_addr(SB)/8, $libc_select_trampoline<>(SB) +TEXT libc_setattrlist_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_setattrlist(SB) +GLOBL ·libc_setattrlist_trampoline_addr(SB), RODATA, $8 +DATA ·libc_setattrlist_trampoline_addr(SB)/8, $libc_setattrlist_trampoline<>(SB) + TEXT libc_setegid_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_setegid(SB) @@ -759,12 +764,6 @@ TEXT libc_setreuid_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_setreuid_trampoline_addr(SB), RODATA, $8 DATA ·libc_setreuid_trampoline_addr(SB)/8, $libc_setreuid_trampoline<>(SB) -TEXT libc_setrlimit_trampoline<>(SB),NOSPLIT,$0-0 - JMP libc_setrlimit(SB) - -GLOBL ·libc_setrlimit_trampoline_addr(SB), RODATA, $8 -DATA ·libc_setrlimit_trampoline_addr(SB)/8, $libc_setrlimit_trampoline<>(SB) - TEXT libc_setsid_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_setsid(SB) diff --git a/vendor/golang.org/x/sys/unix/zsyscall_dragonfly_amd64.go b/vendor/golang.org/x/sys/unix/zsyscall_dragonfly_amd64.go index 54749f9c..0eabac7a 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_dragonfly_amd64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_dragonfly_amd64.go @@ -436,6 +436,16 @@ func ioctl(fd int, req uint, arg uintptr) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { + _, _, e1 := Syscall(SYS_IOCTL, uintptr(fd), uintptr(req), uintptr(arg)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) { var _p0 unsafe.Pointer if len(mib) > 0 { @@ -1400,16 +1410,6 @@ func Setresuid(ruid int, euid int, suid int) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(which int, lim *Rlimit) (err error) { - _, _, e1 := RawSyscall(SYS_SETRLIMIT, uintptr(which), uintptr(unsafe.Pointer(lim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Setsid() (pid int, err error) { r0, _, e1 := RawSyscall(SYS_SETSID, 0, 0, 0) pid = int(r0) diff --git a/vendor/golang.org/x/sys/unix/zsyscall_freebsd_386.go b/vendor/golang.org/x/sys/unix/zsyscall_freebsd_386.go index 77479d45..ee313eb0 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_freebsd_386.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_freebsd_386.go @@ -388,6 +388,16 @@ func ioctl(fd int, req uint, arg uintptr) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { + _, _, e1 := Syscall(SYS_IOCTL, uintptr(fd), uintptr(req), uintptr(arg)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) { var _p0 unsafe.Pointer if len(mib) > 0 { @@ -414,6 +424,16 @@ func ptrace(request int, pid int, addr uintptr, data int) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func ptracePtr(request int, pid int, addr unsafe.Pointer, data int) (err error) { + _, _, e1 := Syscall6(SYS_PTRACE, uintptr(request), uintptr(pid), uintptr(addr), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Access(path string, mode uint32) (err error) { var _p0 *byte _p0, err = BytePtrFromString(path) @@ -1625,16 +1645,6 @@ func Setresuid(ruid int, euid int, suid int) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(which int, lim *Rlimit) (err error) { - _, _, e1 := RawSyscall(SYS_SETRLIMIT, uintptr(which), uintptr(unsafe.Pointer(lim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Setsid() (pid int, err error) { r0, _, e1 := RawSyscall(SYS_SETSID, 0, 0, 0) pid = int(r0) diff --git a/vendor/golang.org/x/sys/unix/zsyscall_freebsd_amd64.go b/vendor/golang.org/x/sys/unix/zsyscall_freebsd_amd64.go index 2e966d4d..4c986e44 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_freebsd_amd64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_freebsd_amd64.go @@ -388,6 +388,16 @@ func ioctl(fd int, req uint, arg uintptr) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { + _, _, e1 := Syscall(SYS_IOCTL, uintptr(fd), uintptr(req), uintptr(arg)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) { var _p0 unsafe.Pointer if len(mib) > 0 { @@ -414,6 +424,16 @@ func ptrace(request int, pid int, addr uintptr, data int) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func ptracePtr(request int, pid int, addr unsafe.Pointer, data int) (err error) { + _, _, e1 := Syscall6(SYS_PTRACE, uintptr(request), uintptr(pid), uintptr(addr), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Access(path string, mode uint32) (err error) { var _p0 *byte _p0, err = BytePtrFromString(path) @@ -1625,16 +1645,6 @@ func Setresuid(ruid int, euid int, suid int) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(which int, lim *Rlimit) (err error) { - _, _, e1 := RawSyscall(SYS_SETRLIMIT, uintptr(which), uintptr(unsafe.Pointer(lim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Setsid() (pid int, err error) { r0, _, e1 := RawSyscall(SYS_SETSID, 0, 0, 0) pid = int(r0) diff --git a/vendor/golang.org/x/sys/unix/zsyscall_freebsd_arm.go b/vendor/golang.org/x/sys/unix/zsyscall_freebsd_arm.go index d65a7c0f..55521694 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_freebsd_arm.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_freebsd_arm.go @@ -388,6 +388,16 @@ func ioctl(fd int, req uint, arg uintptr) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { + _, _, e1 := Syscall(SYS_IOCTL, uintptr(fd), uintptr(req), uintptr(arg)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) { var _p0 unsafe.Pointer if len(mib) > 0 { @@ -414,6 +424,16 @@ func ptrace(request int, pid int, addr uintptr, data int) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func ptracePtr(request int, pid int, addr unsafe.Pointer, data int) (err error) { + _, _, e1 := Syscall6(SYS_PTRACE, uintptr(request), uintptr(pid), uintptr(addr), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Access(path string, mode uint32) (err error) { var _p0 *byte _p0, err = BytePtrFromString(path) @@ -1625,16 +1645,6 @@ func Setresuid(ruid int, euid int, suid int) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(which int, lim *Rlimit) (err error) { - _, _, e1 := RawSyscall(SYS_SETRLIMIT, uintptr(which), uintptr(unsafe.Pointer(lim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Setsid() (pid int, err error) { r0, _, e1 := RawSyscall(SYS_SETSID, 0, 0, 0) pid = int(r0) diff --git a/vendor/golang.org/x/sys/unix/zsyscall_freebsd_arm64.go b/vendor/golang.org/x/sys/unix/zsyscall_freebsd_arm64.go index 6f0b97c6..67a226fb 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_freebsd_arm64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_freebsd_arm64.go @@ -388,6 +388,16 @@ func ioctl(fd int, req uint, arg uintptr) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { + _, _, e1 := Syscall(SYS_IOCTL, uintptr(fd), uintptr(req), uintptr(arg)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) { var _p0 unsafe.Pointer if len(mib) > 0 { @@ -414,6 +424,16 @@ func ptrace(request int, pid int, addr uintptr, data int) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func ptracePtr(request int, pid int, addr unsafe.Pointer, data int) (err error) { + _, _, e1 := Syscall6(SYS_PTRACE, uintptr(request), uintptr(pid), uintptr(addr), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Access(path string, mode uint32) (err error) { var _p0 *byte _p0, err = BytePtrFromString(path) @@ -1625,16 +1645,6 @@ func Setresuid(ruid int, euid int, suid int) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(which int, lim *Rlimit) (err error) { - _, _, e1 := RawSyscall(SYS_SETRLIMIT, uintptr(which), uintptr(unsafe.Pointer(lim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Setsid() (pid int, err error) { r0, _, e1 := RawSyscall(SYS_SETSID, 0, 0, 0) pid = int(r0) diff --git a/vendor/golang.org/x/sys/unix/zsyscall_freebsd_riscv64.go b/vendor/golang.org/x/sys/unix/zsyscall_freebsd_riscv64.go index e1c23b52..f0b9ddaa 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_freebsd_riscv64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_freebsd_riscv64.go @@ -388,6 +388,16 @@ func ioctl(fd int, req uint, arg uintptr) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { + _, _, e1 := Syscall(SYS_IOCTL, uintptr(fd), uintptr(req), uintptr(arg)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) { var _p0 unsafe.Pointer if len(mib) > 0 { @@ -414,6 +424,16 @@ func ptrace(request int, pid int, addr uintptr, data int) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func ptracePtr(request int, pid int, addr unsafe.Pointer, data int) (err error) { + _, _, e1 := Syscall6(SYS_PTRACE, uintptr(request), uintptr(pid), uintptr(addr), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Access(path string, mode uint32) (err error) { var _p0 *byte _p0, err = BytePtrFromString(path) @@ -1625,16 +1645,6 @@ func Setresuid(ruid int, euid int, suid int) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(which int, lim *Rlimit) (err error) { - _, _, e1 := RawSyscall(SYS_SETRLIMIT, uintptr(which), uintptr(unsafe.Pointer(lim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Setsid() (pid int, err error) { r0, _, e1 := RawSyscall(SYS_SETSID, 0, 0, 0) pid = int(r0) diff --git a/vendor/golang.org/x/sys/unix/zsyscall_linux.go b/vendor/golang.org/x/sys/unix/zsyscall_linux.go index 36ea3a55..da63d9d7 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_linux.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_linux.go @@ -379,6 +379,16 @@ func ptrace(request int, pid int, addr uintptr, data uintptr) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func ptracePtr(request int, pid int, addr uintptr, data unsafe.Pointer) (err error) { + _, _, e1 := Syscall6(SYS_PTRACE, uintptr(request), uintptr(pid), uintptr(addr), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func reboot(magic1 uint, magic2 uint, cmd int, arg string) (err error) { var _p0 *byte _p0, err = BytePtrFromString(arg) @@ -1336,16 +1346,6 @@ func PivotRoot(newroot string, putold string) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Prlimit(pid int, resource int, newlimit *Rlimit, old *Rlimit) (err error) { - _, _, e1 := RawSyscall6(SYS_PRLIMIT64, uintptr(pid), uintptr(resource), uintptr(unsafe.Pointer(newlimit)), uintptr(unsafe.Pointer(old)), 0, 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Prctl(option int, arg2 uintptr, arg3 uintptr, arg4 uintptr, arg5 uintptr) (err error) { _, _, e1 := Syscall6(SYS_PRCTL, uintptr(option), uintptr(arg2), uintptr(arg3), uintptr(arg4), uintptr(arg5), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_linux_386.go b/vendor/golang.org/x/sys/unix/zsyscall_linux_386.go index c81b0ad4..07b549cc 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_linux_386.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_linux_386.go @@ -411,16 +411,6 @@ func getrlimit(resource int, rlim *rlimit32) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func setrlimit(resource int, rlim *rlimit32) (err error) { - _, _, e1 := RawSyscall(SYS_SETRLIMIT, uintptr(resource), uintptr(unsafe.Pointer(rlim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func futimesat(dirfd int, path string, times *[2]Timeval) (err error) { var _p0 *byte _p0, err = BytePtrFromString(path) diff --git a/vendor/golang.org/x/sys/unix/zsyscall_linux_amd64.go b/vendor/golang.org/x/sys/unix/zsyscall_linux_amd64.go index 2206bce7..5f481bf8 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_linux_amd64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_linux_amd64.go @@ -334,16 +334,6 @@ func setfsuid(uid int) (prev int, err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(resource int, rlim *Rlimit) (err error) { - _, _, e1 := RawSyscall(SYS_SETRLIMIT, uintptr(resource), uintptr(unsafe.Pointer(rlim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Shutdown(fd int, how int) (err error) { _, _, e1 := Syscall(SYS_SHUTDOWN, uintptr(fd), uintptr(how), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_linux_arm.go b/vendor/golang.org/x/sys/unix/zsyscall_linux_arm.go index edf6b39f..824cd52c 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_linux_arm.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_linux_arm.go @@ -578,16 +578,6 @@ func getrlimit(resource int, rlim *rlimit32) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func setrlimit(resource int, rlim *rlimit32) (err error) { - _, _, e1 := RawSyscall(SYS_SETRLIMIT, uintptr(resource), uintptr(unsafe.Pointer(rlim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func armSyncFileRange(fd int, flags int, off int64, n int64) (err error) { _, _, e1 := Syscall6(SYS_ARM_SYNC_FILE_RANGE, uintptr(fd), uintptr(flags), uintptr(off), uintptr(off>>32), uintptr(n), uintptr(n>>32)) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_linux_arm64.go b/vendor/golang.org/x/sys/unix/zsyscall_linux_arm64.go index 190609f2..e77aecfe 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_linux_arm64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_linux_arm64.go @@ -289,16 +289,6 @@ func setfsuid(uid int) (prev int, err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func setrlimit(resource int, rlim *Rlimit) (err error) { - _, _, e1 := RawSyscall(SYS_SETRLIMIT, uintptr(resource), uintptr(unsafe.Pointer(rlim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Shutdown(fd int, how int) (err error) { _, _, e1 := Syscall(SYS_SHUTDOWN, uintptr(fd), uintptr(how), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_linux_mips.go b/vendor/golang.org/x/sys/unix/zsyscall_linux_mips.go index 5f984cbb..961a3afb 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_linux_mips.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_linux_mips.go @@ -644,16 +644,6 @@ func getrlimit(resource int, rlim *rlimit32) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func setrlimit(resource int, rlim *rlimit32) (err error) { - _, _, e1 := RawSyscall(SYS_SETRLIMIT, uintptr(resource), uintptr(unsafe.Pointer(rlim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Alarm(seconds uint) (remaining uint, err error) { r0, _, e1 := Syscall(SYS_ALARM, uintptr(seconds), 0, 0) remaining = uint(r0) diff --git a/vendor/golang.org/x/sys/unix/zsyscall_linux_mips64.go b/vendor/golang.org/x/sys/unix/zsyscall_linux_mips64.go index 46fc380a..ed05005e 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_linux_mips64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_linux_mips64.go @@ -278,16 +278,6 @@ func setfsuid(uid int) (prev int, err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(resource int, rlim *Rlimit) (err error) { - _, _, e1 := RawSyscall(SYS_SETRLIMIT, uintptr(resource), uintptr(unsafe.Pointer(rlim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Shutdown(fd int, how int) (err error) { _, _, e1 := Syscall(SYS_SHUTDOWN, uintptr(fd), uintptr(how), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_linux_mips64le.go b/vendor/golang.org/x/sys/unix/zsyscall_linux_mips64le.go index cbd0d4da..d365b718 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_linux_mips64le.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_linux_mips64le.go @@ -278,16 +278,6 @@ func setfsuid(uid int) (prev int, err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(resource int, rlim *Rlimit) (err error) { - _, _, e1 := RawSyscall(SYS_SETRLIMIT, uintptr(resource), uintptr(unsafe.Pointer(rlim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Shutdown(fd int, how int) (err error) { _, _, e1 := Syscall(SYS_SHUTDOWN, uintptr(fd), uintptr(how), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_linux_mipsle.go b/vendor/golang.org/x/sys/unix/zsyscall_linux_mipsle.go index 0c13d15f..c3f1b8bb 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_linux_mipsle.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_linux_mipsle.go @@ -644,16 +644,6 @@ func getrlimit(resource int, rlim *rlimit32) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func setrlimit(resource int, rlim *rlimit32) (err error) { - _, _, e1 := RawSyscall(SYS_SETRLIMIT, uintptr(resource), uintptr(unsafe.Pointer(rlim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Alarm(seconds uint) (remaining uint, err error) { r0, _, e1 := Syscall(SYS_ALARM, uintptr(seconds), 0, 0) remaining = uint(r0) diff --git a/vendor/golang.org/x/sys/unix/zsyscall_linux_ppc.go b/vendor/golang.org/x/sys/unix/zsyscall_linux_ppc.go index e01432ae..a6574cf9 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_linux_ppc.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_linux_ppc.go @@ -624,16 +624,6 @@ func getrlimit(resource int, rlim *rlimit32) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func setrlimit(resource int, rlim *rlimit32) (err error) { - _, _, e1 := RawSyscall(SYS_SETRLIMIT, uintptr(resource), uintptr(unsafe.Pointer(rlim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func syncFileRange2(fd int, flags int, off int64, n int64) (err error) { _, _, e1 := Syscall6(SYS_SYNC_FILE_RANGE2, uintptr(fd), uintptr(flags), uintptr(off>>32), uintptr(off), uintptr(n>>32), uintptr(n)) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_linux_ppc64.go b/vendor/golang.org/x/sys/unix/zsyscall_linux_ppc64.go index 13c7ee7b..f4099026 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_linux_ppc64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_linux_ppc64.go @@ -349,16 +349,6 @@ func setfsuid(uid int) (prev int, err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(resource int, rlim *Rlimit) (err error) { - _, _, e1 := RawSyscall(SYS_SETRLIMIT, uintptr(resource), uintptr(unsafe.Pointer(rlim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Shutdown(fd int, how int) (err error) { _, _, e1 := Syscall(SYS_SHUTDOWN, uintptr(fd), uintptr(how), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_linux_ppc64le.go b/vendor/golang.org/x/sys/unix/zsyscall_linux_ppc64le.go index 02d0c0fd..9dfcc299 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_linux_ppc64le.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_linux_ppc64le.go @@ -349,16 +349,6 @@ func setfsuid(uid int) (prev int, err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(resource int, rlim *Rlimit) (err error) { - _, _, e1 := RawSyscall(SYS_SETRLIMIT, uintptr(resource), uintptr(unsafe.Pointer(rlim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Shutdown(fd int, how int) (err error) { _, _, e1 := Syscall(SYS_SHUTDOWN, uintptr(fd), uintptr(how), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_linux_riscv64.go b/vendor/golang.org/x/sys/unix/zsyscall_linux_riscv64.go index 9fee3b1d..0b292395 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_linux_riscv64.go @@ -269,16 +269,6 @@ func setfsuid(uid int) (prev int, err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(resource int, rlim *Rlimit) (err error) { - _, _, e1 := RawSyscall(SYS_SETRLIMIT, uintptr(resource), uintptr(unsafe.Pointer(rlim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Shutdown(fd int, how int) (err error) { _, _, e1 := Syscall(SYS_SHUTDOWN, uintptr(fd), uintptr(how), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_linux_s390x.go b/vendor/golang.org/x/sys/unix/zsyscall_linux_s390x.go index 647bbfec..6cde3223 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_linux_s390x.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_linux_s390x.go @@ -319,16 +319,6 @@ func setfsuid(uid int) (prev int, err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(resource int, rlim *Rlimit) (err error) { - _, _, e1 := RawSyscall(SYS_SETRLIMIT, uintptr(resource), uintptr(unsafe.Pointer(rlim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Splice(rfd int, roff *int64, wfd int, woff *int64, len int, flags int) (n int64, err error) { r0, _, e1 := Syscall6(SYS_SPLICE, uintptr(rfd), uintptr(unsafe.Pointer(roff)), uintptr(wfd), uintptr(unsafe.Pointer(woff)), uintptr(len), uintptr(flags)) n = int64(r0) diff --git a/vendor/golang.org/x/sys/unix/zsyscall_linux_sparc64.go b/vendor/golang.org/x/sys/unix/zsyscall_linux_sparc64.go index ada057f8..5253d65b 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_linux_sparc64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_linux_sparc64.go @@ -329,16 +329,6 @@ func setfsuid(uid int) (prev int, err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(resource int, rlim *Rlimit) (err error) { - _, _, e1 := RawSyscall(SYS_SETRLIMIT, uintptr(resource), uintptr(unsafe.Pointer(rlim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Shutdown(fd int, how int) (err error) { _, _, e1 := Syscall(SYS_SHUTDOWN, uintptr(fd), uintptr(how), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_netbsd_386.go b/vendor/golang.org/x/sys/unix/zsyscall_netbsd_386.go index 79f73899..cdb2af5a 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_netbsd_386.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_netbsd_386.go @@ -405,6 +405,16 @@ func ioctl(fd int, req uint, arg uintptr) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { + _, _, e1 := Syscall(SYS_IOCTL, uintptr(fd), uintptr(req), uintptr(arg)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) { var _p0 unsafe.Pointer if len(mib) > 0 { @@ -1597,16 +1607,6 @@ func Setreuid(ruid int, euid int) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(which int, lim *Rlimit) (err error) { - _, _, e1 := RawSyscall(SYS_SETRLIMIT, uintptr(which), uintptr(unsafe.Pointer(lim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Setsid() (pid int, err error) { r0, _, e1 := RawSyscall(SYS_SETSID, 0, 0, 0) pid = int(r0) diff --git a/vendor/golang.org/x/sys/unix/zsyscall_netbsd_amd64.go b/vendor/golang.org/x/sys/unix/zsyscall_netbsd_amd64.go index fb161f3a..9d25f76b 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_netbsd_amd64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_netbsd_amd64.go @@ -405,6 +405,16 @@ func ioctl(fd int, req uint, arg uintptr) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { + _, _, e1 := Syscall(SYS_IOCTL, uintptr(fd), uintptr(req), uintptr(arg)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) { var _p0 unsafe.Pointer if len(mib) > 0 { @@ -1597,16 +1607,6 @@ func Setreuid(ruid int, euid int) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(which int, lim *Rlimit) (err error) { - _, _, e1 := RawSyscall(SYS_SETRLIMIT, uintptr(which), uintptr(unsafe.Pointer(lim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Setsid() (pid int, err error) { r0, _, e1 := RawSyscall(SYS_SETSID, 0, 0, 0) pid = int(r0) diff --git a/vendor/golang.org/x/sys/unix/zsyscall_netbsd_arm.go b/vendor/golang.org/x/sys/unix/zsyscall_netbsd_arm.go index 4c8ac993..d3f80351 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_netbsd_arm.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_netbsd_arm.go @@ -405,6 +405,16 @@ func ioctl(fd int, req uint, arg uintptr) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { + _, _, e1 := Syscall(SYS_IOCTL, uintptr(fd), uintptr(req), uintptr(arg)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) { var _p0 unsafe.Pointer if len(mib) > 0 { @@ -1597,16 +1607,6 @@ func Setreuid(ruid int, euid int) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(which int, lim *Rlimit) (err error) { - _, _, e1 := RawSyscall(SYS_SETRLIMIT, uintptr(which), uintptr(unsafe.Pointer(lim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Setsid() (pid int, err error) { r0, _, e1 := RawSyscall(SYS_SETSID, 0, 0, 0) pid = int(r0) diff --git a/vendor/golang.org/x/sys/unix/zsyscall_netbsd_arm64.go b/vendor/golang.org/x/sys/unix/zsyscall_netbsd_arm64.go index 76dd8ec4..887188a5 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_netbsd_arm64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_netbsd_arm64.go @@ -405,6 +405,16 @@ func ioctl(fd int, req uint, arg uintptr) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { + _, _, e1 := Syscall(SYS_IOCTL, uintptr(fd), uintptr(req), uintptr(arg)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) { var _p0 unsafe.Pointer if len(mib) > 0 { @@ -1597,16 +1607,6 @@ func Setreuid(ruid int, euid int) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(which int, lim *Rlimit) (err error) { - _, _, e1 := RawSyscall(SYS_SETRLIMIT, uintptr(which), uintptr(unsafe.Pointer(lim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Setsid() (pid int, err error) { r0, _, e1 := RawSyscall(SYS_SETSID, 0, 0, 0) pid = int(r0) diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.go index caeb807b..6699a783 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.go @@ -527,6 +527,14 @@ func ioctl(fd int, req uint, arg uintptr) (err error) { return } +func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { + _, _, e1 := syscall_syscall(libc_ioctl_trampoline_addr, uintptr(fd), uintptr(req), uintptr(arg)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + var libc_ioctl_trampoline_addr uintptr //go:cgo_import_dynamic libc_ioctl ioctl "libc.so" @@ -1886,20 +1894,6 @@ var libc_setresuid_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(which int, lim *Rlimit) (err error) { - _, _, e1 := syscall_rawSyscall(libc_setrlimit_trampoline_addr, uintptr(which), uintptr(unsafe.Pointer(lim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -var libc_setrlimit_trampoline_addr uintptr - -//go:cgo_import_dynamic libc_setrlimit setrlimit "libc.so" - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Setrtable(rtable int) (err error) { _, _, e1 := syscall_rawSyscall(libc_setrtable_trampoline_addr, uintptr(rtable), 0, 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.s index 08744425..04f0de34 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.s @@ -573,11 +573,6 @@ TEXT libc_setresuid_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_setresuid_trampoline_addr(SB), RODATA, $4 DATA ·libc_setresuid_trampoline_addr(SB)/4, $libc_setresuid_trampoline<>(SB) -TEXT libc_setrlimit_trampoline<>(SB),NOSPLIT,$0-0 - JMP libc_setrlimit(SB) -GLOBL ·libc_setrlimit_trampoline_addr(SB), RODATA, $4 -DATA ·libc_setrlimit_trampoline_addr(SB)/4, $libc_setrlimit_trampoline<>(SB) - TEXT libc_setrtable_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_setrtable(SB) GLOBL ·libc_setrtable_trampoline_addr(SB), RODATA, $4 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.go index a05e5f4f..1e775fe0 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.go @@ -527,6 +527,14 @@ func ioctl(fd int, req uint, arg uintptr) (err error) { return } +func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { + _, _, e1 := syscall_syscall(libc_ioctl_trampoline_addr, uintptr(fd), uintptr(req), uintptr(arg)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + var libc_ioctl_trampoline_addr uintptr //go:cgo_import_dynamic libc_ioctl ioctl "libc.so" @@ -1886,20 +1894,6 @@ var libc_setresuid_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(which int, lim *Rlimit) (err error) { - _, _, e1 := syscall_rawSyscall(libc_setrlimit_trampoline_addr, uintptr(which), uintptr(unsafe.Pointer(lim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -var libc_setrlimit_trampoline_addr uintptr - -//go:cgo_import_dynamic libc_setrlimit setrlimit "libc.so" - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Setrtable(rtable int) (err error) { _, _, e1 := syscall_rawSyscall(libc_setrtable_trampoline_addr, uintptr(rtable), 0, 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.s index 5782cd10..27b6f4df 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.s @@ -573,11 +573,6 @@ TEXT libc_setresuid_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_setresuid_trampoline_addr(SB), RODATA, $8 DATA ·libc_setresuid_trampoline_addr(SB)/8, $libc_setresuid_trampoline<>(SB) -TEXT libc_setrlimit_trampoline<>(SB),NOSPLIT,$0-0 - JMP libc_setrlimit(SB) -GLOBL ·libc_setrlimit_trampoline_addr(SB), RODATA, $8 -DATA ·libc_setrlimit_trampoline_addr(SB)/8, $libc_setrlimit_trampoline<>(SB) - TEXT libc_setrtable_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_setrtable(SB) GLOBL ·libc_setrtable_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.go index b2da8e50..7f642789 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.go @@ -527,6 +527,14 @@ func ioctl(fd int, req uint, arg uintptr) (err error) { return } +func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { + _, _, e1 := syscall_syscall(libc_ioctl_trampoline_addr, uintptr(fd), uintptr(req), uintptr(arg)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + var libc_ioctl_trampoline_addr uintptr //go:cgo_import_dynamic libc_ioctl ioctl "libc.so" @@ -1886,20 +1894,6 @@ var libc_setresuid_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(which int, lim *Rlimit) (err error) { - _, _, e1 := syscall_rawSyscall(libc_setrlimit_trampoline_addr, uintptr(which), uintptr(unsafe.Pointer(lim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -var libc_setrlimit_trampoline_addr uintptr - -//go:cgo_import_dynamic libc_setrlimit setrlimit "libc.so" - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Setrtable(rtable int) (err error) { _, _, e1 := syscall_rawSyscall(libc_setrtable_trampoline_addr, uintptr(rtable), 0, 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.s index cf310420..b797045f 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.s @@ -573,11 +573,6 @@ TEXT libc_setresuid_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_setresuid_trampoline_addr(SB), RODATA, $4 DATA ·libc_setresuid_trampoline_addr(SB)/4, $libc_setresuid_trampoline<>(SB) -TEXT libc_setrlimit_trampoline<>(SB),NOSPLIT,$0-0 - JMP libc_setrlimit(SB) -GLOBL ·libc_setrlimit_trampoline_addr(SB), RODATA, $4 -DATA ·libc_setrlimit_trampoline_addr(SB)/4, $libc_setrlimit_trampoline<>(SB) - TEXT libc_setrtable_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_setrtable(SB) GLOBL ·libc_setrtable_trampoline_addr(SB), RODATA, $4 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.go index 048b2655..756ef7b1 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.go @@ -527,6 +527,14 @@ func ioctl(fd int, req uint, arg uintptr) (err error) { return } +func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { + _, _, e1 := syscall_syscall(libc_ioctl_trampoline_addr, uintptr(fd), uintptr(req), uintptr(arg)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + var libc_ioctl_trampoline_addr uintptr //go:cgo_import_dynamic libc_ioctl ioctl "libc.so" @@ -1886,20 +1894,6 @@ var libc_setresuid_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(which int, lim *Rlimit) (err error) { - _, _, e1 := syscall_rawSyscall(libc_setrlimit_trampoline_addr, uintptr(which), uintptr(unsafe.Pointer(lim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -var libc_setrlimit_trampoline_addr uintptr - -//go:cgo_import_dynamic libc_setrlimit setrlimit "libc.so" - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Setrtable(rtable int) (err error) { _, _, e1 := syscall_rawSyscall(libc_setrtable_trampoline_addr, uintptr(rtable), 0, 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.s index 484bb42e..a8712662 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.s @@ -573,11 +573,6 @@ TEXT libc_setresuid_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_setresuid_trampoline_addr(SB), RODATA, $8 DATA ·libc_setresuid_trampoline_addr(SB)/8, $libc_setresuid_trampoline<>(SB) -TEXT libc_setrlimit_trampoline<>(SB),NOSPLIT,$0-0 - JMP libc_setrlimit(SB) -GLOBL ·libc_setrlimit_trampoline_addr(SB), RODATA, $8 -DATA ·libc_setrlimit_trampoline_addr(SB)/8, $libc_setrlimit_trampoline<>(SB) - TEXT libc_setrtable_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_setrtable(SB) GLOBL ·libc_setrtable_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.go index 6f33e37e..7bc2e24e 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.go @@ -527,6 +527,14 @@ func ioctl(fd int, req uint, arg uintptr) (err error) { return } +func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { + _, _, e1 := syscall_syscall(libc_ioctl_trampoline_addr, uintptr(fd), uintptr(req), uintptr(arg)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + var libc_ioctl_trampoline_addr uintptr //go:cgo_import_dynamic libc_ioctl ioctl "libc.so" @@ -1886,20 +1894,6 @@ var libc_setresuid_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(which int, lim *Rlimit) (err error) { - _, _, e1 := syscall_rawSyscall(libc_setrlimit_trampoline_addr, uintptr(which), uintptr(unsafe.Pointer(lim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -var libc_setrlimit_trampoline_addr uintptr - -//go:cgo_import_dynamic libc_setrlimit setrlimit "libc.so" - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Setrtable(rtable int) (err error) { _, _, e1 := syscall_rawSyscall(libc_setrtable_trampoline_addr, uintptr(rtable), 0, 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.s index 55af2726..05d4bffd 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.s @@ -573,11 +573,6 @@ TEXT libc_setresuid_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_setresuid_trampoline_addr(SB), RODATA, $8 DATA ·libc_setresuid_trampoline_addr(SB)/8, $libc_setresuid_trampoline<>(SB) -TEXT libc_setrlimit_trampoline<>(SB),NOSPLIT,$0-0 - JMP libc_setrlimit(SB) -GLOBL ·libc_setrlimit_trampoline_addr(SB), RODATA, $8 -DATA ·libc_setrlimit_trampoline_addr(SB)/8, $libc_setrlimit_trampoline<>(SB) - TEXT libc_setrtable_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_setrtable(SB) GLOBL ·libc_setrtable_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.go index 330cf7f7..739be621 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.go @@ -527,6 +527,14 @@ func ioctl(fd int, req uint, arg uintptr) (err error) { return } +func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { + _, _, e1 := syscall_syscall(libc_ioctl_trampoline_addr, uintptr(fd), uintptr(req), uintptr(arg)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + var libc_ioctl_trampoline_addr uintptr //go:cgo_import_dynamic libc_ioctl ioctl "libc.so" @@ -1886,20 +1894,6 @@ var libc_setresuid_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(which int, lim *Rlimit) (err error) { - _, _, e1 := syscall_rawSyscall(libc_setrlimit_trampoline_addr, uintptr(which), uintptr(unsafe.Pointer(lim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -var libc_setrlimit_trampoline_addr uintptr - -//go:cgo_import_dynamic libc_setrlimit setrlimit "libc.so" - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Setrtable(rtable int) (err error) { _, _, e1 := syscall_rawSyscall(libc_setrtable_trampoline_addr, uintptr(rtable), 0, 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.s index 4028255b..74a25f8d 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.s @@ -687,12 +687,6 @@ TEXT libc_setresuid_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_setresuid_trampoline_addr(SB), RODATA, $8 DATA ·libc_setresuid_trampoline_addr(SB)/8, $libc_setresuid_trampoline<>(SB) -TEXT libc_setrlimit_trampoline<>(SB),NOSPLIT,$0-0 - CALL libc_setrlimit(SB) - RET -GLOBL ·libc_setrlimit_trampoline_addr(SB), RODATA, $8 -DATA ·libc_setrlimit_trampoline_addr(SB)/8, $libc_setrlimit_trampoline<>(SB) - TEXT libc_setrtable_trampoline<>(SB),NOSPLIT,$0-0 CALL libc_setrtable(SB) RET diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.go index 5f24de0d..7d95a197 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.go @@ -527,6 +527,14 @@ func ioctl(fd int, req uint, arg uintptr) (err error) { return } +func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { + _, _, e1 := syscall_syscall(libc_ioctl_trampoline_addr, uintptr(fd), uintptr(req), uintptr(arg)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + var libc_ioctl_trampoline_addr uintptr //go:cgo_import_dynamic libc_ioctl ioctl "libc.so" @@ -1886,20 +1894,6 @@ var libc_setresuid_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(which int, lim *Rlimit) (err error) { - _, _, e1 := syscall_rawSyscall(libc_setrlimit_trampoline_addr, uintptr(which), uintptr(unsafe.Pointer(lim)), 0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -var libc_setrlimit_trampoline_addr uintptr - -//go:cgo_import_dynamic libc_setrlimit setrlimit "libc.so" - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Setrtable(rtable int) (err error) { _, _, e1 := syscall_rawSyscall(libc_setrtable_trampoline_addr, uintptr(rtable), 0, 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.s index e1fbd4df..990be245 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.s @@ -573,11 +573,6 @@ TEXT libc_setresuid_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_setresuid_trampoline_addr(SB), RODATA, $8 DATA ·libc_setresuid_trampoline_addr(SB)/8, $libc_setresuid_trampoline<>(SB) -TEXT libc_setrlimit_trampoline<>(SB),NOSPLIT,$0-0 - JMP libc_setrlimit(SB) -GLOBL ·libc_setrlimit_trampoline_addr(SB), RODATA, $8 -DATA ·libc_setrlimit_trampoline_addr(SB)/8, $libc_setrlimit_trampoline<>(SB) - TEXT libc_setrtable_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_setrtable(SB) GLOBL ·libc_setrtable_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_solaris_amd64.go b/vendor/golang.org/x/sys/unix/zsyscall_solaris_amd64.go index 78d4a424..609d1c59 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_solaris_amd64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_solaris_amd64.go @@ -110,7 +110,6 @@ import ( //go:cgo_import_dynamic libc_setpriority setpriority "libc.so" //go:cgo_import_dynamic libc_setregid setregid "libc.so" //go:cgo_import_dynamic libc_setreuid setreuid "libc.so" -//go:cgo_import_dynamic libc_setrlimit setrlimit "libc.so" //go:cgo_import_dynamic libc_setsid setsid "libc.so" //go:cgo_import_dynamic libc_setuid setuid "libc.so" //go:cgo_import_dynamic libc_shutdown shutdown "libsocket.so" @@ -250,7 +249,6 @@ import ( //go:linkname procSetpriority libc_setpriority //go:linkname procSetregid libc_setregid //go:linkname procSetreuid libc_setreuid -//go:linkname procSetrlimit libc_setrlimit //go:linkname procSetsid libc_setsid //go:linkname procSetuid libc_setuid //go:linkname procshutdown libc_shutdown @@ -391,7 +389,6 @@ var ( procSetpriority, procSetregid, procSetreuid, - procSetrlimit, procSetsid, procSetuid, procshutdown, @@ -646,7 +643,18 @@ func __minor(version int, dev uint64) (val uint) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func ioctlRet(fd int, req uint, arg uintptr) (ret int, err error) { +func ioctlRet(fd int, req int, arg uintptr) (ret int, err error) { + r0, _, e1 := sysvicall6(uintptr(unsafe.Pointer(&procioctl)), 3, uintptr(fd), uintptr(req), uintptr(arg), 0, 0, 0) + ret = int(r0) + if e1 != 0 { + err = e1 + } + return +} + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + +func ioctlPtrRet(fd int, req int, arg unsafe.Pointer) (ret int, err error) { r0, _, e1 := sysvicall6(uintptr(unsafe.Pointer(&procioctl)), 3, uintptr(fd), uintptr(req), uintptr(arg), 0, 0, 0) ret = int(r0) if e1 != 0 { @@ -1639,16 +1647,6 @@ func Setreuid(ruid int, euid int) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Setrlimit(which int, lim *Rlimit) (err error) { - _, _, e1 := rawSysvicall6(uintptr(unsafe.Pointer(&procSetrlimit)), 2, uintptr(which), uintptr(unsafe.Pointer(lim)), 0, 0, 0, 0) - if e1 != 0 { - err = e1 - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Setsid() (pid int, err error) { r0, _, e1 := rawSysvicall6(uintptr(unsafe.Pointer(&procSetsid)), 0, 0, 0, 0, 0, 0, 0) pid = int(r0) diff --git a/vendor/golang.org/x/sys/unix/zsyscall_zos_s390x.go b/vendor/golang.org/x/sys/unix/zsyscall_zos_s390x.go index f2079457..c3168174 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_zos_s390x.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_zos_s390x.go @@ -257,7 +257,17 @@ func munmap(addr uintptr, length uintptr) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func ioctl(fd int, req uint, arg uintptr) (err error) { +func ioctl(fd int, req int, arg uintptr) (err error) { + _, _, e1 := syscall_syscall(SYS_IOCTL, uintptr(fd), uintptr(req), uintptr(arg)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + +func ioctlPtr(fd int, req int, arg unsafe.Pointer) (err error) { _, _, e1 := syscall_syscall(SYS_IOCTL, uintptr(fd), uintptr(req), uintptr(arg)) if e1 != 0 { err = errnoErr(e1) diff --git a/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go b/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go index e2a64f09..690cefc3 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go @@ -151,6 +151,16 @@ type Dirent struct { _ [3]byte } +type Attrlist struct { + Bitmapcount uint16 + Reserved uint16 + Commonattr uint32 + Volattr uint32 + Dirattr uint32 + Fileattr uint32 + Forkattr uint32 +} + const ( PathMax = 0x400 ) @@ -610,6 +620,7 @@ const ( AT_REMOVEDIR = 0x80 AT_SYMLINK_FOLLOW = 0x40 AT_SYMLINK_NOFOLLOW = 0x20 + AT_EACCESS = 0x10 ) type PollFd struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go b/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go index 34aa7752..5bffc10e 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go @@ -151,6 +151,16 @@ type Dirent struct { _ [3]byte } +type Attrlist struct { + Bitmapcount uint16 + Reserved uint16 + Commonattr uint32 + Volattr uint32 + Dirattr uint32 + Fileattr uint32 + Forkattr uint32 +} + const ( PathMax = 0x400 ) @@ -610,6 +620,7 @@ const ( AT_REMOVEDIR = 0x80 AT_SYMLINK_FOLLOW = 0x40 AT_SYMLINK_NOFOLLOW = 0x20 + AT_EACCESS = 0x10 ) type PollFd struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go index d9c78cdc..29dc4833 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go @@ -362,7 +362,7 @@ type FpExtendedPrecision struct{} type PtraceIoDesc struct { Op int32 Offs uintptr - Addr uintptr + Addr *byte Len uint32 } diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go index 26991b16..0a89b289 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go @@ -367,7 +367,7 @@ type FpExtendedPrecision struct{} type PtraceIoDesc struct { Op int32 Offs uintptr - Addr uintptr + Addr *byte Len uint64 } diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go index f8324e7e..c8666bb1 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go @@ -350,7 +350,7 @@ type FpExtendedPrecision struct { type PtraceIoDesc struct { Op int32 Offs uintptr - Addr uintptr + Addr *byte Len uint32 } diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go index 4220411f..88fb48a8 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go @@ -347,7 +347,7 @@ type FpExtendedPrecision struct{} type PtraceIoDesc struct { Op int32 Offs uintptr - Addr uintptr + Addr *byte Len uint64 } diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go index 0660fd45..698dc975 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go @@ -348,7 +348,7 @@ type FpExtendedPrecision struct{} type PtraceIoDesc struct { Op int32 Offs uintptr - Addr uintptr + Addr *byte Len uint64 } diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux.go b/vendor/golang.org/x/sys/unix/ztypes_linux.go index 7d9fc8f1..ca84727c 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux.go @@ -456,36 +456,60 @@ type Ucred struct { } type TCPInfo struct { - State uint8 - Ca_state uint8 - Retransmits uint8 - Probes uint8 - Backoff uint8 - Options uint8 - Rto uint32 - Ato uint32 - Snd_mss uint32 - Rcv_mss uint32 - Unacked uint32 - Sacked uint32 - Lost uint32 - Retrans uint32 - Fackets uint32 - Last_data_sent uint32 - Last_ack_sent uint32 - Last_data_recv uint32 - Last_ack_recv uint32 - Pmtu uint32 - Rcv_ssthresh uint32 - Rtt uint32 - Rttvar uint32 - Snd_ssthresh uint32 - Snd_cwnd uint32 - Advmss uint32 - Reordering uint32 - Rcv_rtt uint32 - Rcv_space uint32 - Total_retrans uint32 + State uint8 + Ca_state uint8 + Retransmits uint8 + Probes uint8 + Backoff uint8 + Options uint8 + Rto uint32 + Ato uint32 + Snd_mss uint32 + Rcv_mss uint32 + Unacked uint32 + Sacked uint32 + Lost uint32 + Retrans uint32 + Fackets uint32 + Last_data_sent uint32 + Last_ack_sent uint32 + Last_data_recv uint32 + Last_ack_recv uint32 + Pmtu uint32 + Rcv_ssthresh uint32 + Rtt uint32 + Rttvar uint32 + Snd_ssthresh uint32 + Snd_cwnd uint32 + Advmss uint32 + Reordering uint32 + Rcv_rtt uint32 + Rcv_space uint32 + Total_retrans uint32 + Pacing_rate uint64 + Max_pacing_rate uint64 + Bytes_acked uint64 + Bytes_received uint64 + Segs_out uint32 + Segs_in uint32 + Notsent_bytes uint32 + Min_rtt uint32 + Data_segs_in uint32 + Data_segs_out uint32 + Delivery_rate uint64 + Busy_time uint64 + Rwnd_limited uint64 + Sndbuf_limited uint64 + Delivered uint32 + Delivered_ce uint32 + Bytes_sent uint64 + Bytes_retrans uint64 + Dsack_dups uint32 + Reord_seen uint32 + Rcv_ooopack uint32 + Snd_wnd uint32 + Rcv_wnd uint32 + Rehash uint32 } type CanFilter struct { @@ -528,7 +552,7 @@ const ( SizeofIPv6MTUInfo = 0x20 SizeofICMPv6Filter = 0x20 SizeofUcred = 0xc - SizeofTCPInfo = 0x68 + SizeofTCPInfo = 0xf0 SizeofCanFilter = 0x8 SizeofTCPRepairOpt = 0x8 ) @@ -1043,6 +1067,7 @@ const ( PerfBitCommExec = CBitFieldMaskBit24 PerfBitUseClockID = CBitFieldMaskBit25 PerfBitContextSwitch = CBitFieldMaskBit26 + PerfBitWriteBackward = CBitFieldMaskBit27 ) const ( @@ -1239,7 +1264,7 @@ type TCPMD5Sig struct { Flags uint8 Prefixlen uint8 Keylen uint16 - _ uint32 + Ifindex int32 Key [80]uint8 } @@ -1939,7 +1964,11 @@ const ( NFT_MSG_GETOBJ = 0x13 NFT_MSG_DELOBJ = 0x14 NFT_MSG_GETOBJ_RESET = 0x15 - NFT_MSG_MAX = 0x19 + NFT_MSG_NEWFLOWTABLE = 0x16 + NFT_MSG_GETFLOWTABLE = 0x17 + NFT_MSG_DELFLOWTABLE = 0x18 + NFT_MSG_GETRULE_RESET = 0x19 + NFT_MSG_MAX = 0x1a NFTA_LIST_UNSPEC = 0x0 NFTA_LIST_ELEM = 0x1 NFTA_HOOK_UNSPEC = 0x0 @@ -2443,9 +2472,11 @@ const ( SOF_TIMESTAMPING_OPT_STATS = 0x1000 SOF_TIMESTAMPING_OPT_PKTINFO = 0x2000 SOF_TIMESTAMPING_OPT_TX_SWHW = 0x4000 + SOF_TIMESTAMPING_BIND_PHC = 0x8000 + SOF_TIMESTAMPING_OPT_ID_TCP = 0x10000 - SOF_TIMESTAMPING_LAST = 0x8000 - SOF_TIMESTAMPING_MASK = 0xffff + SOF_TIMESTAMPING_LAST = 0x10000 + SOF_TIMESTAMPING_MASK = 0x1ffff SCM_TSTAMP_SND = 0x0 SCM_TSTAMP_SCHED = 0x1 @@ -3265,7 +3296,7 @@ const ( DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES = 0xae DEVLINK_ATTR_NESTED_DEVLINK = 0xaf DEVLINK_ATTR_SELFTESTS = 0xb0 - DEVLINK_ATTR_MAX = 0xb0 + DEVLINK_ATTR_MAX = 0xb3 DEVLINK_DPIPE_FIELD_MAPPING_TYPE_NONE = 0x0 DEVLINK_DPIPE_FIELD_MAPPING_TYPE_IFINDEX = 0x1 DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT = 0x0 @@ -3281,7 +3312,8 @@ const ( DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR = 0x1 DEVLINK_PORT_FN_ATTR_STATE = 0x2 DEVLINK_PORT_FN_ATTR_OPSTATE = 0x3 - DEVLINK_PORT_FUNCTION_ATTR_MAX = 0x3 + DEVLINK_PORT_FN_ATTR_CAPS = 0x4 + DEVLINK_PORT_FUNCTION_ATTR_MAX = 0x4 ) type FsverityDigest struct { @@ -3572,7 +3604,8 @@ const ( ETHTOOL_MSG_MODULE_SET = 0x23 ETHTOOL_MSG_PSE_GET = 0x24 ETHTOOL_MSG_PSE_SET = 0x25 - ETHTOOL_MSG_USER_MAX = 0x25 + ETHTOOL_MSG_RSS_GET = 0x26 + ETHTOOL_MSG_USER_MAX = 0x26 ETHTOOL_MSG_KERNEL_NONE = 0x0 ETHTOOL_MSG_STRSET_GET_REPLY = 0x1 ETHTOOL_MSG_LINKINFO_GET_REPLY = 0x2 @@ -3611,7 +3644,8 @@ const ( ETHTOOL_MSG_MODULE_GET_REPLY = 0x23 ETHTOOL_MSG_MODULE_NTF = 0x24 ETHTOOL_MSG_PSE_GET_REPLY = 0x25 - ETHTOOL_MSG_KERNEL_MAX = 0x25 + ETHTOOL_MSG_RSS_GET_REPLY = 0x26 + ETHTOOL_MSG_KERNEL_MAX = 0x26 ETHTOOL_A_HEADER_UNSPEC = 0x0 ETHTOOL_A_HEADER_DEV_INDEX = 0x1 ETHTOOL_A_HEADER_DEV_NAME = 0x2 @@ -3679,7 +3713,8 @@ const ( ETHTOOL_A_LINKSTATE_SQI_MAX = 0x4 ETHTOOL_A_LINKSTATE_EXT_STATE = 0x5 ETHTOOL_A_LINKSTATE_EXT_SUBSTATE = 0x6 - ETHTOOL_A_LINKSTATE_MAX = 0x6 + ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT = 0x7 + ETHTOOL_A_LINKSTATE_MAX = 0x7 ETHTOOL_A_DEBUG_UNSPEC = 0x0 ETHTOOL_A_DEBUG_HEADER = 0x1 ETHTOOL_A_DEBUG_MSGMASK = 0x2 @@ -4409,7 +4444,7 @@ const ( NL80211_ATTR_MAC_HINT = 0xc8 NL80211_ATTR_MAC_MASK = 0xd7 NL80211_ATTR_MAX_AP_ASSOC_STA = 0xca - NL80211_ATTR_MAX = 0x140 + NL80211_ATTR_MAX = 0x141 NL80211_ATTR_MAX_CRIT_PROT_DURATION = 0xb4 NL80211_ATTR_MAX_CSA_COUNTERS = 0xce NL80211_ATTR_MAX_MATCH_SETS = 0x85 @@ -4552,6 +4587,7 @@ const ( NL80211_ATTR_SUPPORT_MESH_AUTH = 0x73 NL80211_ATTR_SURVEY_INFO = 0x54 NL80211_ATTR_SURVEY_RADIO_STATS = 0xda + NL80211_ATTR_TD_BITMAP = 0x141 NL80211_ATTR_TDLS_ACTION = 0x88 NL80211_ATTR_TDLS_DIALOG_TOKEN = 0x89 NL80211_ATTR_TDLS_EXTERNAL_SETUP = 0x8c @@ -5752,3 +5788,25 @@ const ( AUDIT_NLGRP_NONE = 0x0 AUDIT_NLGRP_READLOG = 0x1 ) + +const ( + TUN_F_CSUM = 0x1 + TUN_F_TSO4 = 0x2 + TUN_F_TSO6 = 0x4 + TUN_F_TSO_ECN = 0x8 + TUN_F_UFO = 0x10 +) + +const ( + VIRTIO_NET_HDR_F_NEEDS_CSUM = 0x1 + VIRTIO_NET_HDR_F_DATA_VALID = 0x2 + VIRTIO_NET_HDR_F_RSC_INFO = 0x4 +) + +const ( + VIRTIO_NET_HDR_GSO_NONE = 0x0 + VIRTIO_NET_HDR_GSO_TCPV4 = 0x1 + VIRTIO_NET_HDR_GSO_UDP = 0x3 + VIRTIO_NET_HDR_GSO_TCPV6 = 0x4 + VIRTIO_NET_HDR_GSO_ECN = 0x80 +) diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_386.go b/vendor/golang.org/x/sys/unix/ztypes_linux_386.go index 89c516a2..4ecc1495 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_386.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_386.go @@ -414,7 +414,7 @@ const ( type SockaddrStorage struct { Family uint16 - _ [122]int8 + Data [122]byte _ uint32 } diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_amd64.go b/vendor/golang.org/x/sys/unix/ztypes_linux_amd64.go index 62b4fb26..34fddff9 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_amd64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_amd64.go @@ -427,7 +427,7 @@ const ( type SockaddrStorage struct { Family uint16 - _ [118]int8 + Data [118]byte _ uint64 } diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_arm.go b/vendor/golang.org/x/sys/unix/ztypes_linux_arm.go index e86b3589..3b14a603 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_arm.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_arm.go @@ -405,7 +405,7 @@ const ( type SockaddrStorage struct { Family uint16 - _ [122]uint8 + Data [122]byte _ uint32 } diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_arm64.go b/vendor/golang.org/x/sys/unix/ztypes_linux_arm64.go index 6c6be4c9..0517651a 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_arm64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_arm64.go @@ -406,7 +406,7 @@ const ( type SockaddrStorage struct { Family uint16 - _ [118]int8 + Data [118]byte _ uint64 } diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_loong64.go b/vendor/golang.org/x/sys/unix/ztypes_linux_loong64.go index 4982ea35..3b0c5181 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_loong64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_loong64.go @@ -407,7 +407,7 @@ const ( type SockaddrStorage struct { Family uint16 - _ [118]int8 + Data [118]byte _ uint64 } diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_mips.go b/vendor/golang.org/x/sys/unix/ztypes_linux_mips.go index 173141a6..fccdf4dd 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_mips.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_mips.go @@ -410,7 +410,7 @@ const ( type SockaddrStorage struct { Family uint16 - _ [122]int8 + Data [122]byte _ uint32 } diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_mips64.go b/vendor/golang.org/x/sys/unix/ztypes_linux_mips64.go index 93ae4c51..500de8fc 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_mips64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_mips64.go @@ -409,7 +409,7 @@ const ( type SockaddrStorage struct { Family uint16 - _ [118]int8 + Data [118]byte _ uint64 } diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_mips64le.go b/vendor/golang.org/x/sys/unix/ztypes_linux_mips64le.go index 4e4e510c..d0434cd2 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_mips64le.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_mips64le.go @@ -409,7 +409,7 @@ const ( type SockaddrStorage struct { Family uint16 - _ [118]int8 + Data [118]byte _ uint64 } diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_mipsle.go b/vendor/golang.org/x/sys/unix/ztypes_linux_mipsle.go index 3f5ba013..84206ba5 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_mipsle.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_mipsle.go @@ -410,7 +410,7 @@ const ( type SockaddrStorage struct { Family uint16 - _ [122]int8 + Data [122]byte _ uint32 } diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_ppc.go b/vendor/golang.org/x/sys/unix/ztypes_linux_ppc.go index 71dfe7cd..ab078cf1 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_ppc.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_ppc.go @@ -417,7 +417,7 @@ const ( type SockaddrStorage struct { Family uint16 - _ [122]uint8 + Data [122]byte _ uint32 } diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_ppc64.go b/vendor/golang.org/x/sys/unix/ztypes_linux_ppc64.go index 3a2b7f0a..42eb2c4c 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_ppc64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_ppc64.go @@ -416,7 +416,7 @@ const ( type SockaddrStorage struct { Family uint16 - _ [118]uint8 + Data [118]byte _ uint64 } diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_ppc64le.go b/vendor/golang.org/x/sys/unix/ztypes_linux_ppc64le.go index a52d6275..31304a4e 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_ppc64le.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_ppc64le.go @@ -416,7 +416,7 @@ const ( type SockaddrStorage struct { Family uint16 - _ [118]uint8 + Data [118]byte _ uint64 } diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go b/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go index dfc007d8..c311f961 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go @@ -434,7 +434,7 @@ const ( type SockaddrStorage struct { Family uint16 - _ [118]uint8 + Data [118]byte _ uint64 } diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_s390x.go b/vendor/golang.org/x/sys/unix/ztypes_linux_s390x.go index b53cb910..bba3cefa 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_s390x.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_s390x.go @@ -429,7 +429,7 @@ const ( type SockaddrStorage struct { Family uint16 - _ [118]int8 + Data [118]byte _ uint64 } diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_sparc64.go b/vendor/golang.org/x/sys/unix/ztypes_linux_sparc64.go index fe0aa354..ad8a0138 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_sparc64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_sparc64.go @@ -411,7 +411,7 @@ const ( type SockaddrStorage struct { Family uint16 - _ [118]int8 + Data [118]byte _ uint64 } diff --git a/vendor/golang.org/x/sys/windows/syscall_windows.go b/vendor/golang.org/x/sys/windows/syscall_windows.go index 41cb3c01..3723b2c2 100644 --- a/vendor/golang.org/x/sys/windows/syscall_windows.go +++ b/vendor/golang.org/x/sys/windows/syscall_windows.go @@ -824,6 +824,9 @@ const socket_error = uintptr(^uint32(0)) //sys WSAStartup(verreq uint32, data *WSAData) (sockerr error) = ws2_32.WSAStartup //sys WSACleanup() (err error) [failretval==socket_error] = ws2_32.WSACleanup //sys WSAIoctl(s Handle, iocc uint32, inbuf *byte, cbif uint32, outbuf *byte, cbob uint32, cbbr *uint32, overlapped *Overlapped, completionRoutine uintptr) (err error) [failretval==socket_error] = ws2_32.WSAIoctl +//sys WSALookupServiceBegin(querySet *WSAQUERYSET, flags uint32, handle *Handle) (err error) [failretval==socket_error] = ws2_32.WSALookupServiceBeginW +//sys WSALookupServiceNext(handle Handle, flags uint32, size *int32, querySet *WSAQUERYSET) (err error) [failretval==socket_error] = ws2_32.WSALookupServiceNextW +//sys WSALookupServiceEnd(handle Handle) (err error) [failretval==socket_error] = ws2_32.WSALookupServiceEnd //sys socket(af int32, typ int32, protocol int32) (handle Handle, err error) [failretval==InvalidHandle] = ws2_32.socket //sys sendto(s Handle, buf []byte, flags int32, to unsafe.Pointer, tolen int32) (err error) [failretval==socket_error] = ws2_32.sendto //sys recvfrom(s Handle, buf []byte, flags int32, from *RawSockaddrAny, fromlen *int32) (n int32, err error) [failretval==-1] = ws2_32.recvfrom @@ -1019,8 +1022,7 @@ func (rsa *RawSockaddrAny) Sockaddr() (Sockaddr, error) { for n < len(pp.Path) && pp.Path[n] != 0 { n++ } - bytes := (*[len(pp.Path)]byte)(unsafe.Pointer(&pp.Path[0]))[0:n] - sa.Name = string(bytes) + sa.Name = string(unsafe.Slice((*byte)(unsafe.Pointer(&pp.Path[0])), n)) return sa, nil case AF_INET: diff --git a/vendor/golang.org/x/sys/windows/types_windows.go b/vendor/golang.org/x/sys/windows/types_windows.go index 0c4add97..0dbb2084 100644 --- a/vendor/golang.org/x/sys/windows/types_windows.go +++ b/vendor/golang.org/x/sys/windows/types_windows.go @@ -1243,6 +1243,51 @@ const ( DnsSectionAdditional = 0x0003 ) +const ( + // flags of WSALookupService + LUP_DEEP = 0x0001 + LUP_CONTAINERS = 0x0002 + LUP_NOCONTAINERS = 0x0004 + LUP_NEAREST = 0x0008 + LUP_RETURN_NAME = 0x0010 + LUP_RETURN_TYPE = 0x0020 + LUP_RETURN_VERSION = 0x0040 + LUP_RETURN_COMMENT = 0x0080 + LUP_RETURN_ADDR = 0x0100 + LUP_RETURN_BLOB = 0x0200 + LUP_RETURN_ALIASES = 0x0400 + LUP_RETURN_QUERY_STRING = 0x0800 + LUP_RETURN_ALL = 0x0FF0 + LUP_RES_SERVICE = 0x8000 + + LUP_FLUSHCACHE = 0x1000 + LUP_FLUSHPREVIOUS = 0x2000 + + LUP_NON_AUTHORITATIVE = 0x4000 + LUP_SECURE = 0x8000 + LUP_RETURN_PREFERRED_NAMES = 0x10000 + LUP_DNS_ONLY = 0x20000 + + LUP_ADDRCONFIG = 0x100000 + LUP_DUAL_ADDR = 0x200000 + LUP_FILESERVER = 0x400000 + LUP_DISABLE_IDN_ENCODING = 0x00800000 + LUP_API_ANSI = 0x01000000 + + LUP_RESOLUTION_HANDLE = 0x80000000 +) + +const ( + // values of WSAQUERYSET's namespace + NS_ALL = 0 + NS_DNS = 12 + NS_NLA = 15 + NS_BTH = 16 + NS_EMAIL = 37 + NS_PNRPNAME = 38 + NS_PNRPCLOUD = 39 +) + type DNSSRVData struct { Target *uint16 Priority uint16 @@ -2184,10 +2229,10 @@ const ( JobObjectExtendedLimitInformation = 9 JobObjectGroupInformation = 11 JobObjectGroupInformationEx = 14 - JobObjectLimitViolationInformation2 = 35 + JobObjectLimitViolationInformation2 = 34 JobObjectNetRateControlInformation = 32 JobObjectNotificationLimitInformation = 12 - JobObjectNotificationLimitInformation2 = 34 + JobObjectNotificationLimitInformation2 = 33 JobObjectSecurityLimitInformation = 5 ) @@ -3258,3 +3303,43 @@ const ( DWMWA_TEXT_COLOR = 36 DWMWA_VISIBLE_FRAME_BORDER_THICKNESS = 37 ) + +type WSAQUERYSET struct { + Size uint32 + ServiceInstanceName *uint16 + ServiceClassId *GUID + Version *WSAVersion + Comment *uint16 + NameSpace uint32 + NSProviderId *GUID + Context *uint16 + NumberOfProtocols uint32 + AfpProtocols *AFProtocols + QueryString *uint16 + NumberOfCsAddrs uint32 + SaBuffer *CSAddrInfo + OutputFlags uint32 + Blob *BLOB +} + +type WSAVersion struct { + Version uint32 + EnumerationOfComparison int32 +} + +type AFProtocols struct { + AddressFamily int32 + Protocol int32 +} + +type CSAddrInfo struct { + LocalAddr SocketAddress + RemoteAddr SocketAddress + SocketType int32 + Protocol int32 +} + +type BLOB struct { + Size uint32 + BlobData *byte +} diff --git a/vendor/golang.org/x/sys/windows/zsyscall_windows.go b/vendor/golang.org/x/sys/windows/zsyscall_windows.go index ac60052e..6d2a2685 100644 --- a/vendor/golang.org/x/sys/windows/zsyscall_windows.go +++ b/vendor/golang.org/x/sys/windows/zsyscall_windows.go @@ -474,6 +474,9 @@ var ( procWSAEnumProtocolsW = modws2_32.NewProc("WSAEnumProtocolsW") procWSAGetOverlappedResult = modws2_32.NewProc("WSAGetOverlappedResult") procWSAIoctl = modws2_32.NewProc("WSAIoctl") + procWSALookupServiceBeginW = modws2_32.NewProc("WSALookupServiceBeginW") + procWSALookupServiceEnd = modws2_32.NewProc("WSALookupServiceEnd") + procWSALookupServiceNextW = modws2_32.NewProc("WSALookupServiceNextW") procWSARecv = modws2_32.NewProc("WSARecv") procWSARecvFrom = modws2_32.NewProc("WSARecvFrom") procWSASend = modws2_32.NewProc("WSASend") @@ -4067,6 +4070,30 @@ func WSAIoctl(s Handle, iocc uint32, inbuf *byte, cbif uint32, outbuf *byte, cbo return } +func WSALookupServiceBegin(querySet *WSAQUERYSET, flags uint32, handle *Handle) (err error) { + r1, _, e1 := syscall.Syscall(procWSALookupServiceBeginW.Addr(), 3, uintptr(unsafe.Pointer(querySet)), uintptr(flags), uintptr(unsafe.Pointer(handle))) + if r1 == socket_error { + err = errnoErr(e1) + } + return +} + +func WSALookupServiceEnd(handle Handle) (err error) { + r1, _, e1 := syscall.Syscall(procWSALookupServiceEnd.Addr(), 1, uintptr(handle), 0, 0) + if r1 == socket_error { + err = errnoErr(e1) + } + return +} + +func WSALookupServiceNext(handle Handle, flags uint32, size *int32, querySet *WSAQUERYSET) (err error) { + r1, _, e1 := syscall.Syscall6(procWSALookupServiceNextW.Addr(), 4, uintptr(handle), uintptr(flags), uintptr(unsafe.Pointer(size)), uintptr(unsafe.Pointer(querySet)), 0, 0) + if r1 == socket_error { + err = errnoErr(e1) + } + return +} + func WSARecv(s Handle, bufs *WSABuf, bufcnt uint32, recvd *uint32, flags *uint32, overlapped *Overlapped, croutine *byte) (err error) { r1, _, e1 := syscall.Syscall9(procWSARecv.Addr(), 7, uintptr(s), uintptr(unsafe.Pointer(bufs)), uintptr(bufcnt), uintptr(unsafe.Pointer(recvd)), uintptr(unsafe.Pointer(flags)), uintptr(unsafe.Pointer(overlapped)), uintptr(unsafe.Pointer(croutine)), 0, 0) if r1 == socket_error { diff --git a/vendor/golang.org/x/text/unicode/norm/forminfo.go b/vendor/golang.org/x/text/unicode/norm/forminfo.go index d69ccb4f..487335d1 100644 --- a/vendor/golang.org/x/text/unicode/norm/forminfo.go +++ b/vendor/golang.org/x/text/unicode/norm/forminfo.go @@ -13,7 +13,7 @@ import "encoding/binary" // a rune to a uint16. The values take two forms. For v >= 0x8000: // bits // 15: 1 (inverse of NFD_QC bit of qcInfo) -// 13..7: qcInfo (see below). isYesD is always true (no decompostion). +// 13..7: qcInfo (see below). isYesD is always true (no decomposition). // 6..0: ccc (compressed CCC value). // For v < 0x8000, the respective rune has a decomposition and v is an index // into a byte array of UTF-8 decomposition sequences and additional info and diff --git a/vendor/golang.org/x/tools/go/gcexportdata/gcexportdata.go b/vendor/golang.org/x/tools/go/gcexportdata/gcexportdata.go index 62044620..165ede0f 100644 --- a/vendor/golang.org/x/tools/go/gcexportdata/gcexportdata.go +++ b/vendor/golang.org/x/tools/go/gcexportdata/gcexportdata.go @@ -27,7 +27,6 @@ import ( "go/token" "go/types" "io" - "io/ioutil" "os/exec" "golang.org/x/tools/internal/gcimporter" @@ -85,6 +84,19 @@ func NewReader(r io.Reader) (io.Reader, error) { } } +// readAll works the same way as io.ReadAll, but avoids allocations and copies +// by preallocating a byte slice of the necessary size if the size is known up +// front. This is always possible when the input is an archive. In that case, +// NewReader will return the known size using an io.LimitedReader. +func readAll(r io.Reader) ([]byte, error) { + if lr, ok := r.(*io.LimitedReader); ok { + data := make([]byte, lr.N) + _, err := io.ReadFull(lr, data) + return data, err + } + return io.ReadAll(r) +} + // Read reads export data from in, decodes it, and returns type // information for the package. // @@ -102,7 +114,7 @@ func NewReader(r io.Reader) (io.Reader, error) { // // On return, the state of the reader is undefined. func Read(in io.Reader, fset *token.FileSet, imports map[string]*types.Package, path string) (*types.Package, error) { - data, err := ioutil.ReadAll(in) + data, err := readAll(in) if err != nil { return nil, fmt.Errorf("reading export data for %q: %v", path, err) } @@ -111,12 +123,6 @@ func Read(in io.Reader, fset *token.FileSet, imports map[string]*types.Package, return nil, fmt.Errorf("can't read export data for %q directly from an archive file (call gcexportdata.NewReader first to extract export data)", path) } - // The App Engine Go runtime v1.6 uses the old export data format. - // TODO(adonovan): delete once v1.7 has been around for a while. - if bytes.HasPrefix(data, []byte("package ")) { - return gcimporter.ImportData(imports, path, path, bytes.NewReader(data)) - } - // The indexed export format starts with an 'i'; the older // binary export format starts with a 'c', 'd', or 'v' // (from "version"). Select appropriate importer. @@ -165,7 +171,7 @@ func Write(out io.Writer, fset *token.FileSet, pkg *types.Package) error { // // Experimental: This API is experimental and may change in the future. func ReadBundle(in io.Reader, fset *token.FileSet, imports map[string]*types.Package) ([]*types.Package, error) { - data, err := ioutil.ReadAll(in) + data, err := readAll(in) if err != nil { return nil, fmt.Errorf("reading export bundle: %v", err) } diff --git a/vendor/golang.org/x/tools/go/packages/packages.go b/vendor/golang.org/x/tools/go/packages/packages.go index 9df20919..0f1505b8 100644 --- a/vendor/golang.org/x/tools/go/packages/packages.go +++ b/vendor/golang.org/x/tools/go/packages/packages.go @@ -15,6 +15,7 @@ import ( "go/scanner" "go/token" "go/types" + "io" "io/ioutil" "log" "os" @@ -877,12 +878,19 @@ func (ld *loader) loadPackage(lpkg *loaderPackage) { // never has to create a types.Package for an indirect dependency, // which would then require that such created packages be explicitly // inserted back into the Import graph as a final step after export data loading. + // (Hence this return is after the Types assignment.) // The Diamond test exercises this case. if !lpkg.needtypes && !lpkg.needsrc { return } if !lpkg.needsrc { - ld.loadFromExportData(lpkg) + if err := ld.loadFromExportData(lpkg); err != nil { + lpkg.Errors = append(lpkg.Errors, Error{ + Pos: "-", + Msg: err.Error(), + Kind: UnknownError, // e.g. can't find/open/parse export data + }) + } return // not a source package, don't get syntax trees } @@ -950,6 +958,8 @@ func (ld *loader) loadPackage(lpkg *loaderPackage) { // - golang.org/issue/52078 (flag to set release tags) // - golang.org/issue/50825 (gopls legacy version support) // - golang.org/issue/55883 (go/packages confusing error) + // + // Should we assert a hard minimum of (currently) go1.16 here? var runtimeVersion int if _, err := fmt.Sscanf(runtime.Version(), "go1.%d", &runtimeVersion); err == nil && runtimeVersion < lpkg.goVersion { defer func() { @@ -967,7 +977,8 @@ func (ld *loader) loadPackage(lpkg *loaderPackage) { // The config requested loading sources and types, but sources are missing. // Add an error to the package and fall back to loading from export data. appendError(Error{"-", fmt.Sprintf("sources missing for package %s", lpkg.ID), ParseError}) - ld.loadFromExportData(lpkg) + _ = ld.loadFromExportData(lpkg) // ignore any secondary errors + return // can't get syntax trees for this package } @@ -1191,9 +1202,10 @@ func sameFile(x, y string) bool { return false } -// loadFromExportData returns type information for the specified +// loadFromExportData ensures that type information is present for the specified // package, loading it from an export data file on the first request. -func (ld *loader) loadFromExportData(lpkg *loaderPackage) (*types.Package, error) { +// On success it sets lpkg.Types to a new Package. +func (ld *loader) loadFromExportData(lpkg *loaderPackage) error { if lpkg.PkgPath == "" { log.Fatalf("internal error: Package %s has no PkgPath", lpkg) } @@ -1204,8 +1216,8 @@ func (ld *loader) loadFromExportData(lpkg *loaderPackage) (*types.Package, error // must be sequential. (Finer-grained locking would require // changes to the gcexportdata API.) // - // The exportMu lock guards the Package.Pkg field and the - // types.Package it points to, for each Package in the graph. + // The exportMu lock guards the lpkg.Types field and the + // types.Package it points to, for each loaderPackage in the graph. // // Not all accesses to Package.Pkg need to be protected by exportMu: // graph ordering ensures that direct dependencies of source @@ -1214,18 +1226,18 @@ func (ld *loader) loadFromExportData(lpkg *loaderPackage) (*types.Package, error defer ld.exportMu.Unlock() if tpkg := lpkg.Types; tpkg != nil && tpkg.Complete() { - return tpkg, nil // cache hit + return nil // cache hit } lpkg.IllTyped = true // fail safe if lpkg.ExportFile == "" { // Errors while building export data will have been printed to stderr. - return nil, fmt.Errorf("no export data file") + return fmt.Errorf("no export data file") } f, err := os.Open(lpkg.ExportFile) if err != nil { - return nil, err + return err } defer f.Close() @@ -1237,7 +1249,7 @@ func (ld *loader) loadFromExportData(lpkg *loaderPackage) (*types.Package, error // queries.) r, err := gcexportdata.NewReader(f) if err != nil { - return nil, fmt.Errorf("reading %s: %v", lpkg.ExportFile, err) + return fmt.Errorf("reading %s: %v", lpkg.ExportFile, err) } // Build the view. @@ -1281,7 +1293,7 @@ func (ld *loader) loadFromExportData(lpkg *loaderPackage) (*types.Package, error // (May modify incomplete packages in view but not create new ones.) tpkg, err := gcexportdata.Read(r, ld.Fset, view, lpkg.PkgPath) if err != nil { - return nil, fmt.Errorf("reading %s: %v", lpkg.ExportFile, err) + return fmt.Errorf("reading %s: %v", lpkg.ExportFile, err) } if _, ok := view["go.shape"]; ok { // Account for the pseudopackage "go.shape" that gets @@ -1294,8 +1306,7 @@ func (ld *loader) loadFromExportData(lpkg *loaderPackage) (*types.Package, error lpkg.Types = tpkg lpkg.IllTyped = false - - return tpkg, nil + return nil } // impliedLoadMode returns loadMode with its dependencies. @@ -1311,3 +1322,5 @@ func impliedLoadMode(loadMode LoadMode) LoadMode { func usesExportData(cfg *Config) bool { return cfg.Mode&NeedExportFile != 0 || cfg.Mode&NeedTypes != 0 && cfg.Mode&NeedDeps == 0 } + +var _ interface{} = io.Discard // assert build toolchain is go1.16 or later diff --git a/vendor/golang.org/x/tools/internal/gcimporter/gcimporter.go b/vendor/golang.org/x/tools/internal/gcimporter/gcimporter.go index 1faaa365..0372fb3a 100644 --- a/vendor/golang.org/x/tools/internal/gcimporter/gcimporter.go +++ b/vendor/golang.org/x/tools/internal/gcimporter/gcimporter.go @@ -2,9 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// This file is a modified copy of $GOROOT/src/go/internal/gcimporter/gcimporter.go, -// but it also contains the original source-based importer code for Go1.6. -// Once we stop supporting 1.6, we can remove that code. +// This file is a reduced copy of $GOROOT/src/go/internal/gcimporter/gcimporter.go. // Package gcimporter provides various functions for reading // gc-generated object files that can be used to implement the @@ -14,10 +12,8 @@ package gcimporter // import "golang.org/x/tools/internal/gcimporter" import ( "bufio" "bytes" - "errors" "fmt" "go/build" - "go/constant" "go/token" "go/types" "io" @@ -25,11 +21,8 @@ import ( "os" "os/exec" "path/filepath" - "sort" - "strconv" "strings" "sync" - "text/scanner" ) const ( @@ -154,37 +147,6 @@ func FindPkg(path, srcDir string) (filename, id string) { return } -// ImportData imports a package by reading the gc-generated export data, -// adds the corresponding package object to the packages map indexed by id, -// and returns the object. -// -// The packages map must contains all packages already imported. The data -// reader position must be the beginning of the export data section. The -// filename is only used in error messages. -// -// If packages[id] contains the completely imported package, that package -// can be used directly, and there is no need to call this function (but -// there is also no harm but for extra time used). -func ImportData(packages map[string]*types.Package, filename, id string, data io.Reader) (pkg *types.Package, err error) { - // support for parser error handling - defer func() { - switch r := recover().(type) { - case nil: - // nothing to do - case importError: - err = r - default: - panic(r) // internal error - } - }() - - var p parser - p.init(filename, id, data, packages) - pkg = p.parseExport() - - return -} - // Import imports a gc-generated package given its import path and srcDir, adds // the corresponding package object to the packages map, and returns the object. // The packages map must contain all packages already imported. @@ -245,15 +207,6 @@ func Import(packages map[string]*types.Package, path, srcDir string, lookup func } switch hdr { - case "$$\n": - // Work-around if we don't have a filename; happens only if lookup != nil. - // Either way, the filename is only needed for importer error messages, so - // this is fine. - if filename == "" { - filename = path - } - return ImportData(packages, filename, id, buf) - case "$$B\n": var data []byte data, err = ioutil.ReadAll(buf) @@ -298,319 +251,6 @@ func Import(packages map[string]*types.Package, path, srcDir string, lookup func return } -// ---------------------------------------------------------------------------- -// Parser - -// TODO(gri) Imported objects don't have position information. -// Ideally use the debug table line info; alternatively -// create some fake position (or the position of the -// import). That way error messages referring to imported -// objects can print meaningful information. - -// parser parses the exports inside a gc compiler-produced -// object/archive file and populates its scope with the results. -type parser struct { - scanner scanner.Scanner - tok rune // current token - lit string // literal string; only valid for Ident, Int, String tokens - id string // package id of imported package - sharedPkgs map[string]*types.Package // package id -> package object (across importer) - localPkgs map[string]*types.Package // package id -> package object (just this package) -} - -func (p *parser) init(filename, id string, src io.Reader, packages map[string]*types.Package) { - p.scanner.Init(src) - p.scanner.Error = func(_ *scanner.Scanner, msg string) { p.error(msg) } - p.scanner.Mode = scanner.ScanIdents | scanner.ScanInts | scanner.ScanChars | scanner.ScanStrings | scanner.ScanComments | scanner.SkipComments - p.scanner.Whitespace = 1<<'\t' | 1<<' ' - p.scanner.Filename = filename // for good error messages - p.next() - p.id = id - p.sharedPkgs = packages - if debug { - // check consistency of packages map - for _, pkg := range packages { - if pkg.Name() == "" { - fmt.Printf("no package name for %s\n", pkg.Path()) - } - } - } -} - -func (p *parser) next() { - p.tok = p.scanner.Scan() - switch p.tok { - case scanner.Ident, scanner.Int, scanner.Char, scanner.String, '·': - p.lit = p.scanner.TokenText() - default: - p.lit = "" - } - if debug { - fmt.Printf("%s: %q -> %q\n", scanner.TokenString(p.tok), p.scanner.TokenText(), p.lit) - } -} - -func declTypeName(pkg *types.Package, name string) *types.TypeName { - scope := pkg.Scope() - if obj := scope.Lookup(name); obj != nil { - return obj.(*types.TypeName) - } - obj := types.NewTypeName(token.NoPos, pkg, name, nil) - // a named type may be referred to before the underlying type - // is known - set it up - types.NewNamed(obj, nil, nil) - scope.Insert(obj) - return obj -} - -// ---------------------------------------------------------------------------- -// Error handling - -// Internal errors are boxed as importErrors. -type importError struct { - pos scanner.Position - err error -} - -func (e importError) Error() string { - return fmt.Sprintf("import error %s (byte offset = %d): %s", e.pos, e.pos.Offset, e.err) -} - -func (p *parser) error(err interface{}) { - if s, ok := err.(string); ok { - err = errors.New(s) - } - // panic with a runtime.Error if err is not an error - panic(importError{p.scanner.Pos(), err.(error)}) -} - -func (p *parser) errorf(format string, args ...interface{}) { - p.error(fmt.Sprintf(format, args...)) -} - -func (p *parser) expect(tok rune) string { - lit := p.lit - if p.tok != tok { - p.errorf("expected %s, got %s (%s)", scanner.TokenString(tok), scanner.TokenString(p.tok), lit) - } - p.next() - return lit -} - -func (p *parser) expectSpecial(tok string) { - sep := 'x' // not white space - i := 0 - for i < len(tok) && p.tok == rune(tok[i]) && sep > ' ' { - sep = p.scanner.Peek() // if sep <= ' ', there is white space before the next token - p.next() - i++ - } - if i < len(tok) { - p.errorf("expected %q, got %q", tok, tok[0:i]) - } -} - -func (p *parser) expectKeyword(keyword string) { - lit := p.expect(scanner.Ident) - if lit != keyword { - p.errorf("expected keyword %s, got %q", keyword, lit) - } -} - -// ---------------------------------------------------------------------------- -// Qualified and unqualified names - -// parsePackageID parses a PackageId: -// -// PackageId = string_lit . -func (p *parser) parsePackageID() string { - id, err := strconv.Unquote(p.expect(scanner.String)) - if err != nil { - p.error(err) - } - // id == "" stands for the imported package id - // (only known at time of package installation) - if id == "" { - id = p.id - } - return id -} - -// parsePackageName parse a PackageName: -// -// PackageName = ident . -func (p *parser) parsePackageName() string { - return p.expect(scanner.Ident) -} - -// parseDotIdent parses a dotIdentifier: -// -// dotIdentifier = ( ident | '·' ) { ident | int | '·' } . -func (p *parser) parseDotIdent() string { - ident := "" - if p.tok != scanner.Int { - sep := 'x' // not white space - for (p.tok == scanner.Ident || p.tok == scanner.Int || p.tok == '·') && sep > ' ' { - ident += p.lit - sep = p.scanner.Peek() // if sep <= ' ', there is white space before the next token - p.next() - } - } - if ident == "" { - p.expect(scanner.Ident) // use expect() for error handling - } - return ident -} - -// parseQualifiedName parses a QualifiedName: -// -// QualifiedName = "@" PackageId "." ( "?" | dotIdentifier ) . -func (p *parser) parseQualifiedName() (id, name string) { - p.expect('@') - id = p.parsePackageID() - p.expect('.') - // Per rev f280b8a485fd (10/2/2013), qualified names may be used for anonymous fields. - if p.tok == '?' { - p.next() - } else { - name = p.parseDotIdent() - } - return -} - -// getPkg returns the package for a given id. If the package is -// not found, create the package and add it to the p.localPkgs -// and p.sharedPkgs maps. name is the (expected) name of the -// package. If name == "", the package name is expected to be -// set later via an import clause in the export data. -// -// id identifies a package, usually by a canonical package path like -// "encoding/json" but possibly by a non-canonical import path like -// "./json". -func (p *parser) getPkg(id, name string) *types.Package { - // package unsafe is not in the packages maps - handle explicitly - if id == "unsafe" { - return types.Unsafe - } - - pkg := p.localPkgs[id] - if pkg == nil { - // first import of id from this package - pkg = p.sharedPkgs[id] - if pkg == nil { - // first import of id by this importer; - // add (possibly unnamed) pkg to shared packages - pkg = types.NewPackage(id, name) - p.sharedPkgs[id] = pkg - } - // add (possibly unnamed) pkg to local packages - if p.localPkgs == nil { - p.localPkgs = make(map[string]*types.Package) - } - p.localPkgs[id] = pkg - } else if name != "" { - // package exists already and we have an expected package name; - // make sure names match or set package name if necessary - if pname := pkg.Name(); pname == "" { - pkg.SetName(name) - } else if pname != name { - p.errorf("%s package name mismatch: %s (given) vs %s (expected)", id, pname, name) - } - } - return pkg -} - -// parseExportedName is like parseQualifiedName, but -// the package id is resolved to an imported *types.Package. -func (p *parser) parseExportedName() (pkg *types.Package, name string) { - id, name := p.parseQualifiedName() - pkg = p.getPkg(id, "") - return -} - -// ---------------------------------------------------------------------------- -// Types - -// parseBasicType parses a BasicType: -// -// BasicType = identifier . -func (p *parser) parseBasicType() types.Type { - id := p.expect(scanner.Ident) - obj := types.Universe.Lookup(id) - if obj, ok := obj.(*types.TypeName); ok { - return obj.Type() - } - p.errorf("not a basic type: %s", id) - return nil -} - -// parseArrayType parses an ArrayType: -// -// ArrayType = "[" int_lit "]" Type . -func (p *parser) parseArrayType(parent *types.Package) types.Type { - // "[" already consumed and lookahead known not to be "]" - lit := p.expect(scanner.Int) - p.expect(']') - elem := p.parseType(parent) - n, err := strconv.ParseInt(lit, 10, 64) - if err != nil { - p.error(err) - } - return types.NewArray(elem, n) -} - -// parseMapType parses a MapType: -// -// MapType = "map" "[" Type "]" Type . -func (p *parser) parseMapType(parent *types.Package) types.Type { - p.expectKeyword("map") - p.expect('[') - key := p.parseType(parent) - p.expect(']') - elem := p.parseType(parent) - return types.NewMap(key, elem) -} - -// parseName parses a Name: -// -// Name = identifier | "?" | QualifiedName . -// -// For unqualified and anonymous names, the returned package is the parent -// package unless parent == nil, in which case the returned package is the -// package being imported. (The parent package is not nil if the name -// is an unqualified struct field or interface method name belonging to a -// type declared in another package.) -// -// For qualified names, the returned package is nil (and not created if -// it doesn't exist yet) unless materializePkg is set (which creates an -// unnamed package with valid package path). In the latter case, a -// subsequent import clause is expected to provide a name for the package. -func (p *parser) parseName(parent *types.Package, materializePkg bool) (pkg *types.Package, name string) { - pkg = parent - if pkg == nil { - pkg = p.sharedPkgs[p.id] - } - switch p.tok { - case scanner.Ident: - name = p.lit - p.next() - case '?': - // anonymous - p.next() - case '@': - // exported name prefixed with package path - pkg = nil - var id string - id, name = p.parseQualifiedName() - if materializePkg { - pkg = p.getPkg(id, "") - } - default: - p.error("name expected") - } - return -} - func deref(typ types.Type) types.Type { if p, _ := typ.(*types.Pointer); p != nil { return p.Elem() @@ -618,563 +258,6 @@ func deref(typ types.Type) types.Type { return typ } -// parseField parses a Field: -// -// Field = Name Type [ string_lit ] . -func (p *parser) parseField(parent *types.Package) (*types.Var, string) { - pkg, name := p.parseName(parent, true) - - if name == "_" { - // Blank fields should be package-qualified because they - // are unexported identifiers, but gc does not qualify them. - // Assuming that the ident belongs to the current package - // causes types to change during re-exporting, leading - // to spurious "can't assign A to B" errors from go/types. - // As a workaround, pretend all blank fields belong - // to the same unique dummy package. - const blankpkg = "<_>" - pkg = p.getPkg(blankpkg, blankpkg) - } - - typ := p.parseType(parent) - anonymous := false - if name == "" { - // anonymous field - typ must be T or *T and T must be a type name - switch typ := deref(typ).(type) { - case *types.Basic: // basic types are named types - pkg = nil // objects defined in Universe scope have no package - name = typ.Name() - case *types.Named: - name = typ.Obj().Name() - default: - p.errorf("anonymous field expected") - } - anonymous = true - } - tag := "" - if p.tok == scanner.String { - s := p.expect(scanner.String) - var err error - tag, err = strconv.Unquote(s) - if err != nil { - p.errorf("invalid struct tag %s: %s", s, err) - } - } - return types.NewField(token.NoPos, pkg, name, typ, anonymous), tag -} - -// parseStructType parses a StructType: -// -// StructType = "struct" "{" [ FieldList ] "}" . -// FieldList = Field { ";" Field } . -func (p *parser) parseStructType(parent *types.Package) types.Type { - var fields []*types.Var - var tags []string - - p.expectKeyword("struct") - p.expect('{') - for i := 0; p.tok != '}' && p.tok != scanner.EOF; i++ { - if i > 0 { - p.expect(';') - } - fld, tag := p.parseField(parent) - if tag != "" && tags == nil { - tags = make([]string, i) - } - if tags != nil { - tags = append(tags, tag) - } - fields = append(fields, fld) - } - p.expect('}') - - return types.NewStruct(fields, tags) -} - -// parseParameter parses a Parameter: -// -// Parameter = ( identifier | "?" ) [ "..." ] Type [ string_lit ] . -func (p *parser) parseParameter() (par *types.Var, isVariadic bool) { - _, name := p.parseName(nil, false) - // remove gc-specific parameter numbering - if i := strings.Index(name, "·"); i >= 0 { - name = name[:i] - } - if p.tok == '.' { - p.expectSpecial("...") - isVariadic = true - } - typ := p.parseType(nil) - if isVariadic { - typ = types.NewSlice(typ) - } - // ignore argument tag (e.g. "noescape") - if p.tok == scanner.String { - p.next() - } - // TODO(gri) should we provide a package? - par = types.NewVar(token.NoPos, nil, name, typ) - return -} - -// parseParameters parses a Parameters: -// -// Parameters = "(" [ ParameterList ] ")" . -// ParameterList = { Parameter "," } Parameter . -func (p *parser) parseParameters() (list []*types.Var, isVariadic bool) { - p.expect('(') - for p.tok != ')' && p.tok != scanner.EOF { - if len(list) > 0 { - p.expect(',') - } - par, variadic := p.parseParameter() - list = append(list, par) - if variadic { - if isVariadic { - p.error("... not on final argument") - } - isVariadic = true - } - } - p.expect(')') - - return -} - -// parseSignature parses a Signature: -// -// Signature = Parameters [ Result ] . -// Result = Type | Parameters . -func (p *parser) parseSignature(recv *types.Var) *types.Signature { - params, isVariadic := p.parseParameters() - - // optional result type - var results []*types.Var - if p.tok == '(' { - var variadic bool - results, variadic = p.parseParameters() - if variadic { - p.error("... not permitted on result type") - } - } - - return types.NewSignature(recv, types.NewTuple(params...), types.NewTuple(results...), isVariadic) -} - -// parseInterfaceType parses an InterfaceType: -// -// InterfaceType = "interface" "{" [ MethodList ] "}" . -// MethodList = Method { ";" Method } . -// Method = Name Signature . -// -// The methods of embedded interfaces are always "inlined" -// by the compiler and thus embedded interfaces are never -// visible in the export data. -func (p *parser) parseInterfaceType(parent *types.Package) types.Type { - var methods []*types.Func - - p.expectKeyword("interface") - p.expect('{') - for i := 0; p.tok != '}' && p.tok != scanner.EOF; i++ { - if i > 0 { - p.expect(';') - } - pkg, name := p.parseName(parent, true) - sig := p.parseSignature(nil) - methods = append(methods, types.NewFunc(token.NoPos, pkg, name, sig)) - } - p.expect('}') - - // Complete requires the type's embedded interfaces to be fully defined, - // but we do not define any - return newInterface(methods, nil).Complete() -} - -// parseChanType parses a ChanType: -// -// ChanType = ( "chan" [ "<-" ] | "<-" "chan" ) Type . -func (p *parser) parseChanType(parent *types.Package) types.Type { - dir := types.SendRecv - if p.tok == scanner.Ident { - p.expectKeyword("chan") - if p.tok == '<' { - p.expectSpecial("<-") - dir = types.SendOnly - } - } else { - p.expectSpecial("<-") - p.expectKeyword("chan") - dir = types.RecvOnly - } - elem := p.parseType(parent) - return types.NewChan(dir, elem) -} - -// parseType parses a Type: -// -// Type = -// BasicType | TypeName | ArrayType | SliceType | StructType | -// PointerType | FuncType | InterfaceType | MapType | ChanType | -// "(" Type ")" . -// -// BasicType = ident . -// TypeName = ExportedName . -// SliceType = "[" "]" Type . -// PointerType = "*" Type . -// FuncType = "func" Signature . -func (p *parser) parseType(parent *types.Package) types.Type { - switch p.tok { - case scanner.Ident: - switch p.lit { - default: - return p.parseBasicType() - case "struct": - return p.parseStructType(parent) - case "func": - // FuncType - p.next() - return p.parseSignature(nil) - case "interface": - return p.parseInterfaceType(parent) - case "map": - return p.parseMapType(parent) - case "chan": - return p.parseChanType(parent) - } - case '@': - // TypeName - pkg, name := p.parseExportedName() - return declTypeName(pkg, name).Type() - case '[': - p.next() // look ahead - if p.tok == ']' { - // SliceType - p.next() - return types.NewSlice(p.parseType(parent)) - } - return p.parseArrayType(parent) - case '*': - // PointerType - p.next() - return types.NewPointer(p.parseType(parent)) - case '<': - return p.parseChanType(parent) - case '(': - // "(" Type ")" - p.next() - typ := p.parseType(parent) - p.expect(')') - return typ - } - p.errorf("expected type, got %s (%q)", scanner.TokenString(p.tok), p.lit) - return nil -} - -// ---------------------------------------------------------------------------- -// Declarations - -// parseImportDecl parses an ImportDecl: -// -// ImportDecl = "import" PackageName PackageId . -func (p *parser) parseImportDecl() { - p.expectKeyword("import") - name := p.parsePackageName() - p.getPkg(p.parsePackageID(), name) -} - -// parseInt parses an int_lit: -// -// int_lit = [ "+" | "-" ] { "0" ... "9" } . -func (p *parser) parseInt() string { - s := "" - switch p.tok { - case '-': - s = "-" - p.next() - case '+': - p.next() - } - return s + p.expect(scanner.Int) -} - -// parseNumber parses a number: -// -// number = int_lit [ "p" int_lit ] . -func (p *parser) parseNumber() (typ *types.Basic, val constant.Value) { - // mantissa - mant := constant.MakeFromLiteral(p.parseInt(), token.INT, 0) - if mant == nil { - panic("invalid mantissa") - } - - if p.lit == "p" { - // exponent (base 2) - p.next() - exp, err := strconv.ParseInt(p.parseInt(), 10, 0) - if err != nil { - p.error(err) - } - if exp < 0 { - denom := constant.MakeInt64(1) - denom = constant.Shift(denom, token.SHL, uint(-exp)) - typ = types.Typ[types.UntypedFloat] - val = constant.BinaryOp(mant, token.QUO, denom) - return - } - if exp > 0 { - mant = constant.Shift(mant, token.SHL, uint(exp)) - } - typ = types.Typ[types.UntypedFloat] - val = mant - return - } - - typ = types.Typ[types.UntypedInt] - val = mant - return -} - -// parseConstDecl parses a ConstDecl: -// -// ConstDecl = "const" ExportedName [ Type ] "=" Literal . -// Literal = bool_lit | int_lit | float_lit | complex_lit | rune_lit | string_lit . -// bool_lit = "true" | "false" . -// complex_lit = "(" float_lit "+" float_lit "i" ")" . -// rune_lit = "(" int_lit "+" int_lit ")" . -// string_lit = `"` { unicode_char } `"` . -func (p *parser) parseConstDecl() { - p.expectKeyword("const") - pkg, name := p.parseExportedName() - - var typ0 types.Type - if p.tok != '=' { - // constant types are never structured - no need for parent type - typ0 = p.parseType(nil) - } - - p.expect('=') - var typ types.Type - var val constant.Value - switch p.tok { - case scanner.Ident: - // bool_lit - if p.lit != "true" && p.lit != "false" { - p.error("expected true or false") - } - typ = types.Typ[types.UntypedBool] - val = constant.MakeBool(p.lit == "true") - p.next() - - case '-', scanner.Int: - // int_lit - typ, val = p.parseNumber() - - case '(': - // complex_lit or rune_lit - p.next() - if p.tok == scanner.Char { - p.next() - p.expect('+') - typ = types.Typ[types.UntypedRune] - _, val = p.parseNumber() - p.expect(')') - break - } - _, re := p.parseNumber() - p.expect('+') - _, im := p.parseNumber() - p.expectKeyword("i") - p.expect(')') - typ = types.Typ[types.UntypedComplex] - val = constant.BinaryOp(re, token.ADD, constant.MakeImag(im)) - - case scanner.Char: - // rune_lit - typ = types.Typ[types.UntypedRune] - val = constant.MakeFromLiteral(p.lit, token.CHAR, 0) - p.next() - - case scanner.String: - // string_lit - typ = types.Typ[types.UntypedString] - val = constant.MakeFromLiteral(p.lit, token.STRING, 0) - p.next() - - default: - p.errorf("expected literal got %s", scanner.TokenString(p.tok)) - } - - if typ0 == nil { - typ0 = typ - } - - pkg.Scope().Insert(types.NewConst(token.NoPos, pkg, name, typ0, val)) -} - -// parseTypeDecl parses a TypeDecl: -// -// TypeDecl = "type" ExportedName Type . -func (p *parser) parseTypeDecl() { - p.expectKeyword("type") - pkg, name := p.parseExportedName() - obj := declTypeName(pkg, name) - - // The type object may have been imported before and thus already - // have a type associated with it. We still need to parse the type - // structure, but throw it away if the object already has a type. - // This ensures that all imports refer to the same type object for - // a given type declaration. - typ := p.parseType(pkg) - - if name := obj.Type().(*types.Named); name.Underlying() == nil { - name.SetUnderlying(typ) - } -} - -// parseVarDecl parses a VarDecl: -// -// VarDecl = "var" ExportedName Type . -func (p *parser) parseVarDecl() { - p.expectKeyword("var") - pkg, name := p.parseExportedName() - typ := p.parseType(pkg) - pkg.Scope().Insert(types.NewVar(token.NoPos, pkg, name, typ)) -} - -// parseFunc parses a Func: -// -// Func = Signature [ Body ] . -// Body = "{" ... "}" . -func (p *parser) parseFunc(recv *types.Var) *types.Signature { - sig := p.parseSignature(recv) - if p.tok == '{' { - p.next() - for i := 1; i > 0; p.next() { - switch p.tok { - case '{': - i++ - case '}': - i-- - } - } - } - return sig -} - -// parseMethodDecl parses a MethodDecl: -// -// MethodDecl = "func" Receiver Name Func . -// Receiver = "(" ( identifier | "?" ) [ "*" ] ExportedName ")" . -func (p *parser) parseMethodDecl() { - // "func" already consumed - p.expect('(') - recv, _ := p.parseParameter() // receiver - p.expect(')') - - // determine receiver base type object - base := deref(recv.Type()).(*types.Named) - - // parse method name, signature, and possibly inlined body - _, name := p.parseName(nil, false) - sig := p.parseFunc(recv) - - // methods always belong to the same package as the base type object - pkg := base.Obj().Pkg() - - // add method to type unless type was imported before - // and method exists already - // TODO(gri) This leads to a quadratic algorithm - ok for now because method counts are small. - base.AddMethod(types.NewFunc(token.NoPos, pkg, name, sig)) -} - -// parseFuncDecl parses a FuncDecl: -// -// FuncDecl = "func" ExportedName Func . -func (p *parser) parseFuncDecl() { - // "func" already consumed - pkg, name := p.parseExportedName() - typ := p.parseFunc(nil) - pkg.Scope().Insert(types.NewFunc(token.NoPos, pkg, name, typ)) -} - -// parseDecl parses a Decl: -// -// Decl = [ ImportDecl | ConstDecl | TypeDecl | VarDecl | FuncDecl | MethodDecl ] "\n" . -func (p *parser) parseDecl() { - if p.tok == scanner.Ident { - switch p.lit { - case "import": - p.parseImportDecl() - case "const": - p.parseConstDecl() - case "type": - p.parseTypeDecl() - case "var": - p.parseVarDecl() - case "func": - p.next() // look ahead - if p.tok == '(' { - p.parseMethodDecl() - } else { - p.parseFuncDecl() - } - } - } - p.expect('\n') -} - -// ---------------------------------------------------------------------------- -// Export - -// parseExport parses an Export: -// -// Export = "PackageClause { Decl } "$$" . -// PackageClause = "package" PackageName [ "safe" ] "\n" . -func (p *parser) parseExport() *types.Package { - p.expectKeyword("package") - name := p.parsePackageName() - if p.tok == scanner.Ident && p.lit == "safe" { - // package was compiled with -u option - ignore - p.next() - } - p.expect('\n') - - pkg := p.getPkg(p.id, name) - - for p.tok != '$' && p.tok != scanner.EOF { - p.parseDecl() - } - - if ch := p.scanner.Peek(); p.tok != '$' || ch != '$' { - // don't call next()/expect() since reading past the - // export data may cause scanner errors (e.g. NUL chars) - p.errorf("expected '$$', got %s %c", scanner.TokenString(p.tok), ch) - } - - if n := p.scanner.ErrorCount; n != 0 { - p.errorf("expected no scanner errors, got %d", n) - } - - // Record all locally referenced packages as imports. - var imports []*types.Package - for id, pkg2 := range p.localPkgs { - if pkg2.Name() == "" { - p.errorf("%s package has no name", id) - } - if id == p.id { - continue // avoid self-edge - } - imports = append(imports, pkg2) - } - sort.Sort(byPath(imports)) - pkg.SetImports(imports) - - // package was imported completely and without errors - pkg.MarkComplete() - - return pkg -} - type byPath []*types.Package func (a byPath) Len() int { return len(a) } diff --git a/vendor/golang.org/x/tools/internal/gcimporter/iexport.go b/vendor/golang.org/x/tools/internal/gcimporter/iexport.go index 7d90f00f..ba53cdcd 100644 --- a/vendor/golang.org/x/tools/internal/gcimporter/iexport.go +++ b/vendor/golang.org/x/tools/internal/gcimporter/iexport.go @@ -22,6 +22,7 @@ import ( "strconv" "strings" + "golang.org/x/tools/internal/tokeninternal" "golang.org/x/tools/internal/typeparams" ) @@ -138,6 +139,17 @@ func iexportCommon(out io.Writer, fset *token.FileSet, bundle, shallow bool, ver p.doDecl(p.declTodo.popHead()) } + // Produce index of offset of each file record in files. + var files intWriter + var fileOffset []uint64 // fileOffset[i] is offset in files of file encoded as i + if p.shallow { + fileOffset = make([]uint64, len(p.fileInfos)) + for i, info := range p.fileInfos { + fileOffset[i] = uint64(files.Len()) + p.encodeFile(&files, info.file, info.needed) + } + } + // Append indices to data0 section. dataLen := uint64(p.data0.Len()) w := p.newWriter() @@ -163,16 +175,75 @@ func iexportCommon(out io.Writer, fset *token.FileSet, bundle, shallow bool, ver } hdr.uint64(uint64(p.version)) hdr.uint64(uint64(p.strings.Len())) + if p.shallow { + hdr.uint64(uint64(files.Len())) + hdr.uint64(uint64(len(fileOffset))) + for _, offset := range fileOffset { + hdr.uint64(offset) + } + } hdr.uint64(dataLen) // Flush output. io.Copy(out, &hdr) io.Copy(out, &p.strings) + if p.shallow { + io.Copy(out, &files) + } io.Copy(out, &p.data0) return nil } +// encodeFile writes to w a representation of the file sufficient to +// faithfully restore position information about all needed offsets. +// Mutates the needed array. +func (p *iexporter) encodeFile(w *intWriter, file *token.File, needed []uint64) { + _ = needed[0] // precondition: needed is non-empty + + w.uint64(p.stringOff(file.Name())) + + size := uint64(file.Size()) + w.uint64(size) + + // Sort the set of needed offsets. Duplicates are harmless. + sort.Slice(needed, func(i, j int) bool { return needed[i] < needed[j] }) + + lines := tokeninternal.GetLines(file) // byte offset of each line start + w.uint64(uint64(len(lines))) + + // Rather than record the entire array of line start offsets, + // we save only a sparse list of (index, offset) pairs for + // the start of each line that contains a needed position. + var sparse [][2]int // (index, offset) pairs +outer: + for i, lineStart := range lines { + lineEnd := size + if i < len(lines)-1 { + lineEnd = uint64(lines[i+1]) + } + // Does this line contains a needed offset? + if needed[0] < lineEnd { + sparse = append(sparse, [2]int{i, lineStart}) + for needed[0] < lineEnd { + needed = needed[1:] + if len(needed) == 0 { + break outer + } + } + } + } + + // Delta-encode the columns. + w.uint64(uint64(len(sparse))) + var prev [2]int + for _, pair := range sparse { + w.uint64(uint64(pair[0] - prev[0])) + w.uint64(uint64(pair[1] - prev[1])) + prev = pair + } +} + // writeIndex writes out an object index. mainIndex indicates whether // we're writing out the main index, which is also read by // non-compiler tools and includes a complete package description @@ -255,6 +326,12 @@ type iexporter struct { strings intWriter stringIndex map[string]uint64 + // In shallow mode, object positions are encoded as (file, offset). + // Each file is recorded as a line-number table. + // Only the lines of needed positions are saved faithfully. + fileInfo map[*token.File]uint64 // value is index in fileInfos + fileInfos []*filePositions + data0 intWriter declIndex map[types.Object]uint64 tparamNames map[types.Object]string // typeparam->exported name @@ -263,6 +340,11 @@ type iexporter struct { indent int // for tracing support } +type filePositions struct { + file *token.File + needed []uint64 // unordered list of needed file offsets +} + func (p *iexporter) trace(format string, args ...interface{}) { if !trace { // Call sites should also be guarded, but having this check here allows @@ -286,6 +368,25 @@ func (p *iexporter) stringOff(s string) uint64 { return off } +// fileIndexAndOffset returns the index of the token.File and the byte offset of pos within it. +func (p *iexporter) fileIndexAndOffset(file *token.File, pos token.Pos) (uint64, uint64) { + index, ok := p.fileInfo[file] + if !ok { + index = uint64(len(p.fileInfo)) + p.fileInfos = append(p.fileInfos, &filePositions{file: file}) + if p.fileInfo == nil { + p.fileInfo = make(map[*token.File]uint64) + } + p.fileInfo[file] = index + } + // Record each needed offset. + info := p.fileInfos[index] + offset := uint64(file.Offset(pos)) + info.needed = append(info.needed, offset) + + return index, offset +} + // pushDecl adds n to the declaration work queue, if not already present. func (p *iexporter) pushDecl(obj types.Object) { // Package unsafe is known to the compiler and predeclared. @@ -346,7 +447,13 @@ func (p *iexporter) doDecl(obj types.Object) { case *types.Func: sig, _ := obj.Type().(*types.Signature) if sig.Recv() != nil { - panic(internalErrorf("unexpected method: %v", sig)) + // We shouldn't see methods in the package scope, + // but the type checker may repair "func () F() {}" + // to "func (Invalid) F()" and then treat it like "func F()", + // so allow that. See golang/go#57729. + if sig.Recv().Type() != types.Typ[types.Invalid] { + panic(internalErrorf("unexpected method: %v", sig)) + } } // Function. @@ -458,13 +565,30 @@ func (w *exportWriter) tag(tag byte) { } func (w *exportWriter) pos(pos token.Pos) { - if w.p.version >= iexportVersionPosCol { + if w.p.shallow { + w.posV2(pos) + } else if w.p.version >= iexportVersionPosCol { w.posV1(pos) } else { w.posV0(pos) } } +// posV2 encoding (used only in shallow mode) records positions as +// (file, offset), where file is the index in the token.File table +// (which records the file name and newline offsets) and offset is a +// byte offset. It effectively ignores //line directives. +func (w *exportWriter) posV2(pos token.Pos) { + if pos == token.NoPos { + w.uint64(0) + return + } + file := w.p.fset.File(pos) // fset must be non-nil + index, offset := w.p.fileIndexAndOffset(file, pos) + w.uint64(1 + index) + w.uint64(offset) +} + func (w *exportWriter) posV1(pos token.Pos) { if w.p.fset == nil { w.int64(0) diff --git a/vendor/golang.org/x/tools/internal/gcimporter/iimport.go b/vendor/golang.org/x/tools/internal/gcimporter/iimport.go index a1c46965..448f903e 100644 --- a/vendor/golang.org/x/tools/internal/gcimporter/iimport.go +++ b/vendor/golang.org/x/tools/internal/gcimporter/iimport.go @@ -137,12 +137,23 @@ func iimportCommon(fset *token.FileSet, imports map[string]*types.Package, data } sLen := int64(r.uint64()) + var fLen int64 + var fileOffset []uint64 + if insert != nil { + // Shallow mode uses a different position encoding. + fLen = int64(r.uint64()) + fileOffset = make([]uint64, r.uint64()) + for i := range fileOffset { + fileOffset[i] = r.uint64() + } + } dLen := int64(r.uint64()) whence, _ := r.Seek(0, io.SeekCurrent) stringData := data[whence : whence+sLen] - declData := data[whence+sLen : whence+sLen+dLen] - r.Seek(sLen+dLen, io.SeekCurrent) + fileData := data[whence+sLen : whence+sLen+fLen] + declData := data[whence+sLen+fLen : whence+sLen+fLen+dLen] + r.Seek(sLen+fLen+dLen, io.SeekCurrent) p := iimporter{ version: int(version), @@ -151,6 +162,9 @@ func iimportCommon(fset *token.FileSet, imports map[string]*types.Package, data stringData: stringData, stringCache: make(map[uint64]string), + fileOffset: fileOffset, + fileData: fileData, + fileCache: make([]*token.File, len(fileOffset)), pkgCache: make(map[uint64]*types.Package), declData: declData, @@ -280,6 +294,9 @@ type iimporter struct { stringData []byte stringCache map[uint64]string + fileOffset []uint64 // fileOffset[i] is offset in fileData for info about file encoded as i + fileData []byte + fileCache []*token.File // memoized decoding of file encoded as i pkgCache map[uint64]*types.Package declData []byte @@ -352,6 +369,55 @@ func (p *iimporter) stringAt(off uint64) string { return s } +func (p *iimporter) fileAt(index uint64) *token.File { + file := p.fileCache[index] + if file == nil { + off := p.fileOffset[index] + file = p.decodeFile(intReader{bytes.NewReader(p.fileData[off:]), p.ipath}) + p.fileCache[index] = file + } + return file +} + +func (p *iimporter) decodeFile(rd intReader) *token.File { + filename := p.stringAt(rd.uint64()) + size := int(rd.uint64()) + file := p.fake.fset.AddFile(filename, -1, size) + + // SetLines requires a nondecreasing sequence. + // Because it is common for clients to derive the interval + // [start, start+len(name)] from a start position, and we + // want to ensure that the end offset is on the same line, + // we fill in the gaps of the sparse encoding with values + // that strictly increase by the largest possible amount. + // This allows us to avoid having to record the actual end + // offset of each needed line. + + lines := make([]int, int(rd.uint64())) + var index, offset int + for i, n := 0, int(rd.uint64()); i < n; i++ { + index += int(rd.uint64()) + offset += int(rd.uint64()) + lines[index] = offset + + // Ensure monotonicity between points. + for j := index - 1; j > 0 && lines[j] == 0; j-- { + lines[j] = lines[j+1] - 1 + } + } + + // Ensure monotonicity after last point. + for j := len(lines) - 1; j > 0 && lines[j] == 0; j-- { + size-- + lines[j] = size + } + + if !file.SetLines(lines) { + errorf("SetLines failed: %d", lines) // can't happen + } + return file +} + func (p *iimporter) pkgAt(off uint64) *types.Package { if pkg, ok := p.pkgCache[off]; ok { return pkg @@ -645,6 +711,9 @@ func (r *importReader) qualifiedIdent() (*types.Package, string) { } func (r *importReader) pos() token.Pos { + if r.p.insert != nil { // shallow mode + return r.posv2() + } if r.p.version >= iexportVersionPosCol { r.posv1() } else { @@ -681,6 +750,15 @@ func (r *importReader) posv1() { } } +func (r *importReader) posv2() token.Pos { + file := r.uint64() + if file == 0 { + return token.NoPos + } + tf := r.p.fileAt(file - 1) + return tf.Pos(int(r.uint64())) +} + func (r *importReader) typ() types.Type { return r.p.typAt(r.uint64(), nil) } diff --git a/vendor/golang.org/x/tools/internal/gcimporter/ureader_yes.go b/vendor/golang.org/x/tools/internal/gcimporter/ureader_yes.go index 20b99903..b285a11c 100644 --- a/vendor/golang.org/x/tools/internal/gcimporter/ureader_yes.go +++ b/vendor/golang.org/x/tools/internal/gcimporter/ureader_yes.go @@ -559,18 +559,7 @@ func (pr *pkgReader) objIdx(idx pkgbits.Index) (*types.Package, string) { named.SetTypeParams(r.typeParamNames()) - rhs := r.typ() - pk := r.p - pk.laterFor(named, func() { - // First be sure that the rhs is initialized, if it needs to be initialized. - delete(pk.laterFors, named) // prevent cycles - if i, ok := pk.laterFors[rhs]; ok { - f := pk.laterFns[i] - pk.laterFns[i] = func() {} // function is running now, so replace it with a no-op - f() // initialize RHS - } - underlying := rhs.Underlying() - + setUnderlying := func(underlying types.Type) { // If the underlying type is an interface, we need to // duplicate its methods so we can replace the receiver // parameter's type (#49906). @@ -595,7 +584,31 @@ func (pr *pkgReader) objIdx(idx pkgbits.Index) (*types.Package, string) { } named.SetUnderlying(underlying) - }) + } + + // Since go.dev/cl/455279, we can assume rhs.Underlying() will + // always be non-nil. However, to temporarily support users of + // older snapshot releases, we continue to fallback to the old + // behavior for now. + // + // TODO(mdempsky): Remove fallback code and simplify after + // allowing time for snapshot users to upgrade. + rhs := r.typ() + if underlying := rhs.Underlying(); underlying != nil { + setUnderlying(underlying) + } else { + pk := r.p + pk.laterFor(named, func() { + // First be sure that the rhs is initialized, if it needs to be initialized. + delete(pk.laterFors, named) // prevent cycles + if i, ok := pk.laterFors[rhs]; ok { + f := pk.laterFns[i] + pk.laterFns[i] = func() {} // function is running now, so replace it with a no-op + f() // initialize RHS + } + setUnderlying(rhs.Underlying()) + }) + } for i, n := 0, r.Len(); i < n; i++ { named.AddMethod(r.method()) diff --git a/vendor/golang.org/x/tools/internal/gocommand/version.go b/vendor/golang.org/x/tools/internal/gocommand/version.go index 98e834f7..307a76d4 100644 --- a/vendor/golang.org/x/tools/internal/gocommand/version.go +++ b/vendor/golang.org/x/tools/internal/gocommand/version.go @@ -58,22 +58,24 @@ func GoVersion(ctx context.Context, inv Invocation, r *Runner) (int, error) { return 0, fmt.Errorf("no parseable ReleaseTags in %v", tags) } -// GoVersionString reports the go version string as shown in `go version` command output. -// When `go version` outputs in non-standard form, this returns an empty string. -func GoVersionString(ctx context.Context, inv Invocation, r *Runner) (string, error) { +// GoVersionOutput returns the complete output of the go version command. +func GoVersionOutput(ctx context.Context, inv Invocation, r *Runner) (string, error) { inv.Verb = "version" goVersion, err := r.Run(ctx, inv) if err != nil { return "", err } - return parseGoVersionOutput(goVersion.Bytes()), nil + return goVersion.String(), nil } -func parseGoVersionOutput(data []byte) string { +// ParseGoVersionOutput extracts the Go version string +// from the output of the "go version" command. +// Given an unrecognized form, it returns an empty string. +func ParseGoVersionOutput(data string) string { re := regexp.MustCompile(`^go version (go\S+|devel \S+)`) - m := re.FindSubmatch(data) + m := re.FindStringSubmatch(data) if len(m) != 2 { return "" // unrecognized version } - return string(m[1]) + return m[1] } diff --git a/vendor/golang.org/x/tools/internal/imports/fix.go b/vendor/golang.org/x/tools/internal/imports/fix.go index 64645580..642a5ac2 100644 --- a/vendor/golang.org/x/tools/internal/imports/fix.go +++ b/vendor/golang.org/x/tools/internal/imports/fix.go @@ -799,7 +799,7 @@ func GetPackageExports(ctx context.Context, wrapped func(PackageExport), searchP return getCandidatePkgs(ctx, callback, filename, filePkg, env) } -var RequiredGoEnvVars = []string{"GO111MODULE", "GOFLAGS", "GOINSECURE", "GOMOD", "GOMODCACHE", "GONOPROXY", "GONOSUMDB", "GOPATH", "GOPROXY", "GOROOT", "GOSUMDB", "GOWORK"} +var requiredGoEnvVars = []string{"GO111MODULE", "GOFLAGS", "GOINSECURE", "GOMOD", "GOMODCACHE", "GONOPROXY", "GONOSUMDB", "GOPATH", "GOPROXY", "GOROOT", "GOSUMDB", "GOWORK"} // ProcessEnv contains environment variables and settings that affect the use of // the go command, the go/build package, etc. @@ -869,7 +869,7 @@ func (e *ProcessEnv) init() error { } foundAllRequired := true - for _, k := range RequiredGoEnvVars { + for _, k := range requiredGoEnvVars { if _, ok := e.Env[k]; !ok { foundAllRequired = false break @@ -885,7 +885,7 @@ func (e *ProcessEnv) init() error { } goEnv := map[string]string{} - stdout, err := e.invokeGo(context.TODO(), "env", append([]string{"-json"}, RequiredGoEnvVars...)...) + stdout, err := e.invokeGo(context.TODO(), "env", append([]string{"-json"}, requiredGoEnvVars...)...) if err != nil { return err } diff --git a/vendor/golang.org/x/tools/internal/imports/sortimports.go b/vendor/golang.org/x/tools/internal/imports/sortimports.go index 85144db1..1a0a7ebd 100644 --- a/vendor/golang.org/x/tools/internal/imports/sortimports.go +++ b/vendor/golang.org/x/tools/internal/imports/sortimports.go @@ -52,6 +52,7 @@ func sortImports(localPrefix string, tokFile *token.File, f *ast.File) { d.Specs = specs // Deduping can leave a blank line before the rparen; clean that up. + // Ignore line directives. if len(d.Specs) > 0 { lastSpec := d.Specs[len(d.Specs)-1] lastLine := tokFile.PositionFor(lastSpec.Pos(), false).Line diff --git a/vendor/golang.org/x/tools/internal/imports/zstdlib.go b/vendor/golang.org/x/tools/internal/imports/zstdlib.go index 5db9b2d4..31a75949 100644 --- a/vendor/golang.org/x/tools/internal/imports/zstdlib.go +++ b/vendor/golang.org/x/tools/internal/imports/zstdlib.go @@ -10,6 +10,7 @@ var stdlib = map[string][]string{ "archive/tar": { "ErrFieldTooLong", "ErrHeader", + "ErrInsecurePath", "ErrWriteAfterClose", "ErrWriteTooLong", "FileInfoHeader", @@ -45,6 +46,7 @@ var stdlib = map[string][]string{ "ErrAlgorithm", "ErrChecksum", "ErrFormat", + "ErrInsecurePath", "File", "FileHeader", "FileInfoHeader", @@ -87,12 +89,15 @@ var stdlib = map[string][]string{ }, "bytes": { "Buffer", + "Clone", "Compare", "Contains", "ContainsAny", "ContainsRune", "Count", "Cut", + "CutPrefix", + "CutSuffix", "Equal", "EqualFold", "ErrTooLarge", @@ -224,12 +229,15 @@ var stdlib = map[string][]string{ }, "context": { "Background", + "CancelCauseFunc", "CancelFunc", "Canceled", + "Cause", "Context", "DeadlineExceeded", "TODO", "WithCancel", + "WithCancelCause", "WithDeadline", "WithTimeout", "WithValue", @@ -306,6 +314,15 @@ var stdlib = map[string][]string{ "Sign", "Verify", }, + "crypto/ecdh": { + "Curve", + "P256", + "P384", + "P521", + "PrivateKey", + "PublicKey", + "X25519", + }, "crypto/ecdsa": { "GenerateKey", "PrivateKey", @@ -318,6 +335,7 @@ var stdlib = map[string][]string{ "crypto/ed25519": { "GenerateKey", "NewKeyFromSeed", + "Options", "PrivateKey", "PrivateKeySize", "PublicKey", @@ -326,6 +344,7 @@ var stdlib = map[string][]string{ "Sign", "SignatureSize", "Verify", + "VerifyWithOptions", }, "crypto/elliptic": { "Curve", @@ -423,10 +442,12 @@ var stdlib = map[string][]string{ "ConstantTimeEq", "ConstantTimeLessOrEq", "ConstantTimeSelect", + "XORBytes", }, "crypto/tls": { "Certificate", "CertificateRequestInfo", + "CertificateVerificationError", "CipherSuite", "CipherSuiteName", "CipherSuites", @@ -604,6 +625,7 @@ var stdlib = map[string][]string{ "SHA384WithRSAPSS", "SHA512WithRSA", "SHA512WithRSAPSS", + "SetFallbackRoots", "SignatureAlgorithm", "SystemCertPool", "SystemRootsError", @@ -1828,19 +1850,42 @@ var stdlib = map[string][]string{ "R_INFO32", "R_LARCH", "R_LARCH_32", + "R_LARCH_32_PCREL", "R_LARCH_64", + "R_LARCH_ABS64_HI12", + "R_LARCH_ABS64_LO20", + "R_LARCH_ABS_HI20", + "R_LARCH_ABS_LO12", "R_LARCH_ADD16", "R_LARCH_ADD24", "R_LARCH_ADD32", "R_LARCH_ADD64", "R_LARCH_ADD8", + "R_LARCH_B16", + "R_LARCH_B21", + "R_LARCH_B26", "R_LARCH_COPY", + "R_LARCH_GNU_VTENTRY", + "R_LARCH_GNU_VTINHERIT", + "R_LARCH_GOT64_HI12", + "R_LARCH_GOT64_LO20", + "R_LARCH_GOT64_PC_HI12", + "R_LARCH_GOT64_PC_LO20", + "R_LARCH_GOT_HI20", + "R_LARCH_GOT_LO12", + "R_LARCH_GOT_PC_HI20", + "R_LARCH_GOT_PC_LO12", "R_LARCH_IRELATIVE", "R_LARCH_JUMP_SLOT", "R_LARCH_MARK_LA", "R_LARCH_MARK_PCREL", "R_LARCH_NONE", + "R_LARCH_PCALA64_HI12", + "R_LARCH_PCALA64_LO20", + "R_LARCH_PCALA_HI20", + "R_LARCH_PCALA_LO12", "R_LARCH_RELATIVE", + "R_LARCH_RELAX", "R_LARCH_SOP_ADD", "R_LARCH_SOP_AND", "R_LARCH_SOP_ASSERT", @@ -1875,6 +1920,22 @@ var stdlib = map[string][]string{ "R_LARCH_TLS_DTPMOD64", "R_LARCH_TLS_DTPREL32", "R_LARCH_TLS_DTPREL64", + "R_LARCH_TLS_GD_HI20", + "R_LARCH_TLS_GD_PC_HI20", + "R_LARCH_TLS_IE64_HI12", + "R_LARCH_TLS_IE64_LO20", + "R_LARCH_TLS_IE64_PC_HI12", + "R_LARCH_TLS_IE64_PC_LO20", + "R_LARCH_TLS_IE_HI20", + "R_LARCH_TLS_IE_LO12", + "R_LARCH_TLS_IE_PC_HI20", + "R_LARCH_TLS_IE_PC_LO12", + "R_LARCH_TLS_LD_HI20", + "R_LARCH_TLS_LD_PC_HI20", + "R_LARCH_TLS_LE64_HI12", + "R_LARCH_TLS_LE64_LO20", + "R_LARCH_TLS_LE_HI20", + "R_LARCH_TLS_LE_LO12", "R_LARCH_TLS_TPREL32", "R_LARCH_TLS_TPREL64", "R_MIPS", @@ -1938,15 +1999,25 @@ var stdlib = map[string][]string{ "R_PPC64_ADDR16_HIGH", "R_PPC64_ADDR16_HIGHA", "R_PPC64_ADDR16_HIGHER", + "R_PPC64_ADDR16_HIGHER34", "R_PPC64_ADDR16_HIGHERA", + "R_PPC64_ADDR16_HIGHERA34", "R_PPC64_ADDR16_HIGHEST", + "R_PPC64_ADDR16_HIGHEST34", "R_PPC64_ADDR16_HIGHESTA", + "R_PPC64_ADDR16_HIGHESTA34", "R_PPC64_ADDR16_LO", "R_PPC64_ADDR16_LO_DS", "R_PPC64_ADDR24", "R_PPC64_ADDR32", "R_PPC64_ADDR64", "R_PPC64_ADDR64_LOCAL", + "R_PPC64_COPY", + "R_PPC64_D28", + "R_PPC64_D34", + "R_PPC64_D34_HA30", + "R_PPC64_D34_HI30", + "R_PPC64_D34_LO", "R_PPC64_DTPMOD64", "R_PPC64_DTPREL16", "R_PPC64_DTPREL16_DS", @@ -1960,8 +2031,12 @@ var stdlib = map[string][]string{ "R_PPC64_DTPREL16_HIGHESTA", "R_PPC64_DTPREL16_LO", "R_PPC64_DTPREL16_LO_DS", + "R_PPC64_DTPREL34", "R_PPC64_DTPREL64", "R_PPC64_ENTRY", + "R_PPC64_GLOB_DAT", + "R_PPC64_GNU_VTENTRY", + "R_PPC64_GNU_VTINHERIT", "R_PPC64_GOT16", "R_PPC64_GOT16_DS", "R_PPC64_GOT16_HA", @@ -1972,29 +2047,50 @@ var stdlib = map[string][]string{ "R_PPC64_GOT_DTPREL16_HA", "R_PPC64_GOT_DTPREL16_HI", "R_PPC64_GOT_DTPREL16_LO_DS", + "R_PPC64_GOT_DTPREL_PCREL34", + "R_PPC64_GOT_PCREL34", "R_PPC64_GOT_TLSGD16", "R_PPC64_GOT_TLSGD16_HA", "R_PPC64_GOT_TLSGD16_HI", "R_PPC64_GOT_TLSGD16_LO", + "R_PPC64_GOT_TLSGD_PCREL34", "R_PPC64_GOT_TLSLD16", "R_PPC64_GOT_TLSLD16_HA", "R_PPC64_GOT_TLSLD16_HI", "R_PPC64_GOT_TLSLD16_LO", + "R_PPC64_GOT_TLSLD_PCREL34", "R_PPC64_GOT_TPREL16_DS", "R_PPC64_GOT_TPREL16_HA", "R_PPC64_GOT_TPREL16_HI", "R_PPC64_GOT_TPREL16_LO_DS", + "R_PPC64_GOT_TPREL_PCREL34", "R_PPC64_IRELATIVE", "R_PPC64_JMP_IREL", "R_PPC64_JMP_SLOT", "R_PPC64_NONE", + "R_PPC64_PCREL28", + "R_PPC64_PCREL34", + "R_PPC64_PCREL_OPT", + "R_PPC64_PLT16_HA", + "R_PPC64_PLT16_HI", + "R_PPC64_PLT16_LO", "R_PPC64_PLT16_LO_DS", + "R_PPC64_PLT32", + "R_PPC64_PLT64", + "R_PPC64_PLTCALL", + "R_PPC64_PLTCALL_NOTOC", "R_PPC64_PLTGOT16", "R_PPC64_PLTGOT16_DS", "R_PPC64_PLTGOT16_HA", "R_PPC64_PLTGOT16_HI", "R_PPC64_PLTGOT16_LO", "R_PPC64_PLTGOT_LO_DS", + "R_PPC64_PLTREL32", + "R_PPC64_PLTREL64", + "R_PPC64_PLTSEQ", + "R_PPC64_PLTSEQ_NOTOC", + "R_PPC64_PLT_PCREL34", + "R_PPC64_PLT_PCREL34_NOTOC", "R_PPC64_REL14", "R_PPC64_REL14_BRNTAKEN", "R_PPC64_REL14_BRTAKEN", @@ -2002,13 +2098,28 @@ var stdlib = map[string][]string{ "R_PPC64_REL16DX_HA", "R_PPC64_REL16_HA", "R_PPC64_REL16_HI", + "R_PPC64_REL16_HIGH", + "R_PPC64_REL16_HIGHA", + "R_PPC64_REL16_HIGHER", + "R_PPC64_REL16_HIGHER34", + "R_PPC64_REL16_HIGHERA", + "R_PPC64_REL16_HIGHERA34", + "R_PPC64_REL16_HIGHEST", + "R_PPC64_REL16_HIGHEST34", + "R_PPC64_REL16_HIGHESTA", + "R_PPC64_REL16_HIGHESTA34", "R_PPC64_REL16_LO", "R_PPC64_REL24", "R_PPC64_REL24_NOTOC", + "R_PPC64_REL30", "R_PPC64_REL32", "R_PPC64_REL64", "R_PPC64_RELATIVE", + "R_PPC64_SECTOFF", "R_PPC64_SECTOFF_DS", + "R_PPC64_SECTOFF_HA", + "R_PPC64_SECTOFF_HI", + "R_PPC64_SECTOFF_LO", "R_PPC64_SECTOFF_LO_DS", "R_PPC64_TLS", "R_PPC64_TLSGD", @@ -2033,7 +2144,11 @@ var stdlib = map[string][]string{ "R_PPC64_TPREL16_HIGHESTA", "R_PPC64_TPREL16_LO", "R_PPC64_TPREL16_LO_DS", + "R_PPC64_TPREL34", "R_PPC64_TPREL64", + "R_PPC64_UADDR16", + "R_PPC64_UADDR32", + "R_PPC64_UADDR64", "R_PPC_ADDR14", "R_PPC_ADDR14_BRNTAKEN", "R_PPC_ADDR14_BRTAKEN", @@ -2581,6 +2696,9 @@ var stdlib = map[string][]string{ "IMAGE_FILE_MACHINE_POWERPC", "IMAGE_FILE_MACHINE_POWERPCFP", "IMAGE_FILE_MACHINE_R4000", + "IMAGE_FILE_MACHINE_RISCV128", + "IMAGE_FILE_MACHINE_RISCV32", + "IMAGE_FILE_MACHINE_RISCV64", "IMAGE_FILE_MACHINE_SH3", "IMAGE_FILE_MACHINE_SH3DSP", "IMAGE_FILE_MACHINE_SH4", @@ -2846,6 +2964,7 @@ var stdlib = map[string][]string{ "errors": { "As", "Is", + "Join", "New", "Unwrap", }, @@ -2916,6 +3035,7 @@ var stdlib = map[string][]string{ "Appendf", "Appendln", "Errorf", + "FormatString", "Formatter", "Fprint", "Fprintf", @@ -3401,6 +3521,7 @@ var stdlib = map[string][]string{ "RecvOnly", "RelativeTo", "Rune", + "Satisfies", "Scope", "Selection", "SelectionKind", @@ -3702,8 +3823,10 @@ var stdlib = map[string][]string{ "LimitedReader", "MultiReader", "MultiWriter", + "NewOffsetWriter", "NewSectionReader", "NopCloser", + "OffsetWriter", "Pipe", "PipeReader", "PipeWriter", @@ -3770,6 +3893,7 @@ var stdlib = map[string][]string{ "ReadDirFile", "ReadFile", "ReadFileFS", + "SkipAll", "SkipDir", "Stat", "StatFS", @@ -4140,6 +4264,7 @@ var stdlib = map[string][]string{ "FlagLoopback", "FlagMulticast", "FlagPointToPoint", + "FlagRunning", "FlagUp", "Flags", "HardwareAddr", @@ -4284,6 +4409,7 @@ var stdlib = map[string][]string{ "NewFileTransport", "NewRequest", "NewRequestWithContext", + "NewResponseController", "NewServeMux", "NoBody", "NotFound", @@ -4303,6 +4429,7 @@ var stdlib = map[string][]string{ "RedirectHandler", "Request", "Response", + "ResponseController", "ResponseWriter", "RoundTripper", "SameSite", @@ -4445,6 +4572,7 @@ var stdlib = map[string][]string{ "NewProxyClientConn", "NewServerConn", "NewSingleHostReverseProxy", + "ProxyRequest", "ReverseProxy", "ServerConn", }, @@ -4476,6 +4604,8 @@ var stdlib = map[string][]string{ "AddrPortFrom", "IPv4Unspecified", "IPv6LinkLocalAllNodes", + "IPv6LinkLocalAllRouters", + "IPv6Loopback", "IPv6Unspecified", "MustParseAddr", "MustParseAddrPort", @@ -4686,6 +4816,7 @@ var stdlib = map[string][]string{ "CommandContext", "ErrDot", "ErrNotFound", + "ErrWaitDelay", "Error", "ExitError", "LookPath", @@ -4734,11 +4865,13 @@ var stdlib = map[string][]string{ "Glob", "HasPrefix", "IsAbs", + "IsLocal", "Join", "ListSeparator", "Match", "Rel", "Separator", + "SkipAll", "SkipDir", "Split", "SplitList", @@ -4860,6 +4993,7 @@ var stdlib = map[string][]string{ "ErrInvalidRepeatOp", "ErrInvalidRepeatSize", "ErrInvalidUTF8", + "ErrLarge", "ErrMissingBracket", "ErrMissingParen", "ErrMissingRepeatArgument", @@ -4968,8 +5102,16 @@ var stdlib = map[string][]string{ }, "runtime/cgo": { "Handle", + "Incomplete", "NewHandle", }, + "runtime/coverage": { + "ClearCounters", + "WriteCounters", + "WriteCountersDir", + "WriteMeta", + "WriteMetaDir", + }, "runtime/debug": { "BuildInfo", "BuildSetting", @@ -5103,6 +5245,8 @@ var stdlib = map[string][]string{ "ContainsRune", "Count", "Cut", + "CutPrefix", + "CutSuffix", "EqualFold", "Fields", "FieldsFunc", @@ -5272,6 +5416,7 @@ var stdlib = map[string][]string{ "AF_TIPC", "AF_UNIX", "AF_UNSPEC", + "AF_UTUN", "AF_VENDOR00", "AF_VENDOR01", "AF_VENDOR02", @@ -5610,20 +5755,25 @@ var stdlib = map[string][]string{ "CLOCAL", "CLONE_CHILD_CLEARTID", "CLONE_CHILD_SETTID", + "CLONE_CLEAR_SIGHAND", "CLONE_CSIGNAL", "CLONE_DETACHED", "CLONE_FILES", "CLONE_FS", + "CLONE_INTO_CGROUP", "CLONE_IO", + "CLONE_NEWCGROUP", "CLONE_NEWIPC", "CLONE_NEWNET", "CLONE_NEWNS", "CLONE_NEWPID", + "CLONE_NEWTIME", "CLONE_NEWUSER", "CLONE_NEWUTS", "CLONE_PARENT", "CLONE_PARENT_SETTID", "CLONE_PID", + "CLONE_PIDFD", "CLONE_PTRACE", "CLONE_SETTLS", "CLONE_SIGHAND", @@ -6166,6 +6316,7 @@ var stdlib = map[string][]string{ "EPROTONOSUPPORT", "EPROTOTYPE", "EPWROFF", + "EQFULL", "ERANGE", "EREMCHG", "EREMOTE", @@ -6592,6 +6743,7 @@ var stdlib = map[string][]string{ "F_DUPFD", "F_DUPFD_CLOEXEC", "F_EXLCK", + "F_FINDSIGS", "F_FLUSH_DATA", "F_FREEZE_FS", "F_FSCTL", @@ -6602,6 +6754,7 @@ var stdlib = map[string][]string{ "F_FSPRIV", "F_FSVOID", "F_FULLFSYNC", + "F_GETCODEDIR", "F_GETFD", "F_GETFL", "F_GETLEASE", @@ -6615,6 +6768,7 @@ var stdlib = map[string][]string{ "F_GETPATH_MTMINFO", "F_GETPIPE_SZ", "F_GETPROTECTIONCLASS", + "F_GETPROTECTIONLEVEL", "F_GETSIG", "F_GLOBAL_NOCACHE", "F_LOCK", @@ -6647,6 +6801,7 @@ var stdlib = map[string][]string{ "F_SETLK64", "F_SETLKW", "F_SETLKW64", + "F_SETLKWTIMEOUT", "F_SETLK_REMOTE", "F_SETNOSIGPIPE", "F_SETOWN", @@ -6656,9 +6811,11 @@ var stdlib = map[string][]string{ "F_SETSIG", "F_SETSIZE", "F_SHLCK", + "F_SINGLE_WRITER", "F_TEST", "F_THAW_FS", "F_TLOCK", + "F_TRANSCODEKEY", "F_ULOCK", "F_UNLCK", "F_UNLCKSYS", @@ -7854,12 +8011,20 @@ var stdlib = map[string][]string{ "NOFLSH", "NOTE_ABSOLUTE", "NOTE_ATTRIB", + "NOTE_BACKGROUND", "NOTE_CHILD", + "NOTE_CRITICAL", "NOTE_DELETE", "NOTE_EOF", "NOTE_EXEC", "NOTE_EXIT", "NOTE_EXITSTATUS", + "NOTE_EXIT_CSERROR", + "NOTE_EXIT_DECRYPTFAIL", + "NOTE_EXIT_DETAIL", + "NOTE_EXIT_DETAIL_MASK", + "NOTE_EXIT_MEMORY", + "NOTE_EXIT_REPARENTED", "NOTE_EXTEND", "NOTE_FFAND", "NOTE_FFCOPY", @@ -7868,6 +8033,7 @@ var stdlib = map[string][]string{ "NOTE_FFNOP", "NOTE_FFOR", "NOTE_FORK", + "NOTE_LEEWAY", "NOTE_LINK", "NOTE_LOWAT", "NOTE_NONE", @@ -7946,6 +8112,7 @@ var stdlib = map[string][]string{ "O_CREAT", "O_DIRECT", "O_DIRECTORY", + "O_DP_GETRAWENCRYPTED", "O_DSYNC", "O_EVTONLY", "O_EXCL", @@ -8235,6 +8402,7 @@ var stdlib = map[string][]string{ "RLIMIT_AS", "RLIMIT_CORE", "RLIMIT_CPU", + "RLIMIT_CPU_USAGE_MONITOR", "RLIMIT_DATA", "RLIMIT_FSIZE", "RLIMIT_NOFILE", @@ -8347,9 +8515,11 @@ var stdlib = map[string][]string{ "RTF_PROTO1", "RTF_PROTO2", "RTF_PROTO3", + "RTF_PROXY", "RTF_REINSTATE", "RTF_REJECT", "RTF_RNH_LOCKED", + "RTF_ROUTER", "RTF_SOURCE", "RTF_SRC", "RTF_STATIC", @@ -8868,6 +9038,7 @@ var stdlib = map[string][]string{ "SO_NO_OFFLOAD", "SO_NP_EXTENSIONS", "SO_NREAD", + "SO_NUMRCVPKT", "SO_NWRITE", "SO_OOBINLINE", "SO_OVERFLOWED", @@ -9037,6 +9208,7 @@ var stdlib = map[string][]string{ "SYS_CREAT", "SYS_CREATE_MODULE", "SYS_CSOPS", + "SYS_CSOPS_AUDITTOKEN", "SYS_DELETE", "SYS_DELETE_MODULE", "SYS_DUP", @@ -9223,6 +9395,7 @@ var stdlib = map[string][]string{ "SYS_JAIL_GET", "SYS_JAIL_REMOVE", "SYS_JAIL_SET", + "SYS_KAS_INFO", "SYS_KDEBUG_TRACE", "SYS_KENV", "SYS_KEVENT", @@ -9250,6 +9423,7 @@ var stdlib = map[string][]string{ "SYS_LCHMOD", "SYS_LCHOWN", "SYS_LCHOWN32", + "SYS_LEDGER", "SYS_LGETFH", "SYS_LGETXATTR", "SYS_LINK", @@ -9346,6 +9520,7 @@ var stdlib = map[string][]string{ "SYS_OPENAT", "SYS_OPENBSD_POLL", "SYS_OPEN_BY_HANDLE_AT", + "SYS_OPEN_DPROTECTED_NP", "SYS_OPEN_EXTENDED", "SYS_OPEN_NOCANCEL", "SYS_OVADVISE", @@ -9978,6 +10153,7 @@ var stdlib = map[string][]string{ "TCP_CONNECTIONTIMEOUT", "TCP_CORK", "TCP_DEFER_ACCEPT", + "TCP_ENABLE_ECN", "TCP_INFO", "TCP_KEEPALIVE", "TCP_KEEPCNT", @@ -10000,11 +10176,13 @@ var stdlib = map[string][]string{ "TCP_NODELAY", "TCP_NOOPT", "TCP_NOPUSH", + "TCP_NOTSENT_LOWAT", "TCP_NSTATES", "TCP_QUICKACK", "TCP_RXT_CONNDROPTIME", "TCP_RXT_FINDROP", "TCP_SACK_ENABLE", + "TCP_SENDMOREACKS", "TCP_SYNCNT", "TCP_VENDOR", "TCP_WINDOW_CLAMP", @@ -10540,6 +10718,8 @@ var stdlib = map[string][]string{ "April", "August", "Date", + "DateOnly", + "DateTime", "December", "Duration", "February", @@ -10594,6 +10774,7 @@ var stdlib = map[string][]string{ "Tick", "Ticker", "Time", + "TimeOnly", "Timer", "Tuesday", "UTC", @@ -10892,6 +11073,7 @@ var stdlib = map[string][]string{ "Zs", }, "unicode/utf16": { + "AppendRune", "Decode", "DecodeRune", "Encode", @@ -10920,10 +11102,14 @@ var stdlib = map[string][]string{ "ValidString", }, "unsafe": { + "Add", "Alignof", - "ArbitraryType", "Offsetof", "Pointer", "Sizeof", + "Slice", + "SliceData", + "String", + "StringData", }, } diff --git a/vendor/golang.org/x/tools/internal/pkgbits/decoder.go b/vendor/golang.org/x/tools/internal/pkgbits/decoder.go index 3205c9a1..b92e8e6e 100644 --- a/vendor/golang.org/x/tools/internal/pkgbits/decoder.go +++ b/vendor/golang.org/x/tools/internal/pkgbits/decoder.go @@ -373,7 +373,7 @@ func (r *Decoder) Int64() int64 { return r.rawVarint() } -// Int64 decodes and returns a uint64 value from the element bitstream. +// Uint64 decodes and returns a uint64 value from the element bitstream. func (r *Decoder) Uint64() uint64 { r.Sync(SyncUint64) return r.rawUvarint() diff --git a/vendor/golang.org/x/tools/internal/pkgbits/encoder.go b/vendor/golang.org/x/tools/internal/pkgbits/encoder.go index e98e4117..6482617a 100644 --- a/vendor/golang.org/x/tools/internal/pkgbits/encoder.go +++ b/vendor/golang.org/x/tools/internal/pkgbits/encoder.go @@ -293,7 +293,7 @@ func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) } // Int encodes and writes an int value into the element bitstream. func (w *Encoder) Int(x int) { w.Int64(int64(x)) } -// Len encodes and writes a uint value into the element bitstream. +// Uint encodes and writes a uint value into the element bitstream. func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) } // Reloc encodes and writes a relocation for the given (section, diff --git a/vendor/golang.org/x/tools/internal/tokeninternal/tokeninternal.go b/vendor/golang.org/x/tools/internal/tokeninternal/tokeninternal.go new file mode 100644 index 00000000..a3fb2d4f --- /dev/null +++ b/vendor/golang.org/x/tools/internal/tokeninternal/tokeninternal.go @@ -0,0 +1,59 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// package tokeninternal provides access to some internal features of the token +// package. +package tokeninternal + +import ( + "go/token" + "sync" + "unsafe" +) + +// GetLines returns the table of line-start offsets from a token.File. +func GetLines(file *token.File) []int { + // token.File has a Lines method on Go 1.21 and later. + if file, ok := (interface{})(file).(interface{ Lines() []int }); ok { + return file.Lines() + } + + // This declaration must match that of token.File. + // This creates a risk of dependency skew. + // For now we check that the size of the two + // declarations is the same, on the (fragile) assumption + // that future changes would add fields. + type tokenFile119 struct { + _ string + _ int + _ int + mu sync.Mutex // we're not complete monsters + lines []int + _ []struct{} + } + type tokenFile118 struct { + _ *token.FileSet // deleted in go1.19 + tokenFile119 + } + + type uP = unsafe.Pointer + switch unsafe.Sizeof(*file) { + case unsafe.Sizeof(tokenFile118{}): + var ptr *tokenFile118 + *(*uP)(uP(&ptr)) = uP(file) + ptr.mu.Lock() + defer ptr.mu.Unlock() + return ptr.lines + + case unsafe.Sizeof(tokenFile119{}): + var ptr *tokenFile119 + *(*uP)(uP(&ptr)) = uP(file) + ptr.mu.Lock() + defer ptr.mu.Unlock() + return ptr.lines + + default: + panic("unexpected token.File size") + } +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 15834131..f84cd711 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -167,10 +167,10 @@ github.com/josharian/intern # github.com/json-iterator/go v1.1.12 ## explicit; go 1.12 github.com/json-iterator/go -# github.com/klauspost/compress v1.15.15 -## explicit; go 1.17 +# github.com/klauspost/compress v1.16.4 +## explicit; go 1.18 github.com/klauspost/compress/s2 -# github.com/klauspost/cpuid/v2 v2.2.3 +# github.com/klauspost/cpuid/v2 v2.2.4 ## explicit; go 1.15 github.com/klauspost/cpuid/v2 # github.com/labstack/echo/v4 v4.9.1 @@ -223,7 +223,7 @@ github.com/miekg/dns # github.com/minio/md5-simd v1.1.2 ## explicit; go 1.14 github.com/minio/md5-simd -# github.com/minio/minio-go/v7 v7.0.47 +# github.com/minio/minio-go/v7 v7.0.50 ## explicit; go 1.17 github.com/minio/minio-go/v7 github.com/minio/minio-go/v7/pkg/credentials @@ -366,7 +366,7 @@ go.uber.org/zap/internal/bufferpool go.uber.org/zap/internal/color go.uber.org/zap/internal/exit go.uber.org/zap/zapcore -# golang.org/x/crypto v0.5.0 +# golang.org/x/crypto v0.7.0 ## explicit; go 1.17 golang.org/x/crypto/acme golang.org/x/crypto/acme/autocert @@ -375,12 +375,12 @@ golang.org/x/crypto/blake2b golang.org/x/crypto/ocsp golang.org/x/crypto/pbkdf2 golang.org/x/crypto/sha3 -# golang.org/x/mod v0.7.0 +# golang.org/x/mod v0.8.0 ## explicit; go 1.17 golang.org/x/mod/internal/lazyregexp golang.org/x/mod/module golang.org/x/mod/semver -# golang.org/x/net v0.7.0 +# golang.org/x/net v0.8.0 ## explicit; go 1.17 golang.org/x/net/bpf golang.org/x/net/http/httpguts @@ -395,7 +395,7 @@ golang.org/x/net/ipv6 golang.org/x/net/publicsuffix golang.org/x/net/webdav golang.org/x/net/webdav/internal/xml -# golang.org/x/sys v0.5.0 +# golang.org/x/sys v0.7.0 ## explicit; go 1.17 golang.org/x/sys/cpu golang.org/x/sys/execabs @@ -403,7 +403,7 @@ golang.org/x/sys/internal/unsafeheader golang.org/x/sys/unix golang.org/x/sys/windows golang.org/x/sys/windows/registry -# golang.org/x/text v0.7.0 +# golang.org/x/text v0.8.0 ## explicit; go 1.17 golang.org/x/text/cases golang.org/x/text/internal @@ -418,7 +418,7 @@ golang.org/x/text/unicode/norm # golang.org/x/time v0.3.0 ## explicit golang.org/x/time/rate -# golang.org/x/tools v0.4.0 +# golang.org/x/tools v0.6.0 ## explicit; go 1.18 golang.org/x/tools/go/ast/astutil golang.org/x/tools/go/buildutil @@ -439,6 +439,7 @@ golang.org/x/tools/internal/gopathwalk golang.org/x/tools/internal/imports golang.org/x/tools/internal/packagesinternal golang.org/x/tools/internal/pkgbits +golang.org/x/tools/internal/tokeninternal golang.org/x/tools/internal/typeparams golang.org/x/tools/internal/typesinternal # google.golang.org/protobuf v1.28.1