Update all dependencies
This commit is contained in:
parent
88b832172e
commit
d344396706
51 changed files with 8645 additions and 60261 deletions
2
vendor/github.com/clipperhouse/stringish/.gitignore
generated
vendored
2
vendor/github.com/clipperhouse/stringish/.gitignore
generated
vendored
|
|
@ -1,2 +0,0 @@
|
|||
.DS_Store
|
||||
*.test
|
||||
21
vendor/github.com/clipperhouse/stringish/LICENSE
generated
vendored
21
vendor/github.com/clipperhouse/stringish/LICENSE
generated
vendored
|
|
@ -1,21 +0,0 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2025 Matt Sherman
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
64
vendor/github.com/clipperhouse/stringish/README.md
generated
vendored
64
vendor/github.com/clipperhouse/stringish/README.md
generated
vendored
|
|
@ -1,64 +0,0 @@
|
|||
# stringish
|
||||
|
||||
A small Go module that provides a generic type constraint for “string-like”
|
||||
data, and a utf8 package that works with both strings and byte slices
|
||||
without conversions.
|
||||
|
||||
```go
|
||||
type Interface interface {
|
||||
~[]byte | ~string
|
||||
}
|
||||
```
|
||||
|
||||
[](https://pkg.go.dev/github.com/clipperhouse/stringish/utf8)
|
||||
[](https://github.com/clipperhouse/stringish/actions/workflows/gotest.yml)
|
||||
|
||||
## Install
|
||||
|
||||
```
|
||||
go get github.com/clipperhouse/stringish
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
```go
|
||||
import (
|
||||
"github.com/clipperhouse/stringish"
|
||||
"github.com/clipperhouse/stringish/utf8"
|
||||
)
|
||||
|
||||
s := "Hello, 世界"
|
||||
r, size := utf8.DecodeRune(s) // not DecodeRuneInString 🎉
|
||||
|
||||
b := []byte("Hello, 世界")
|
||||
r, size = utf8.DecodeRune(b) // same API!
|
||||
|
||||
func MyFoo[T stringish.Interface](s T) T {
|
||||
// pass a string or a []byte
|
||||
// iterate, slice, transform, whatever
|
||||
}
|
||||
```
|
||||
|
||||
## Motivation
|
||||
|
||||
Sometimes we want APIs to accept `string` or `[]byte` without having to convert
|
||||
between those types. That conversion usually allocates!
|
||||
|
||||
By implementing with `stringish.Interface`, we can have a single API, and
|
||||
single implementation for both types: one `Foo` instead of `Foo` and
|
||||
`FooString`.
|
||||
|
||||
We have converted the
|
||||
[`unicode/utf8` package](https://github.com/clipperhouse/stringish/blob/main/utf8/utf8.go)
|
||||
as an example -- note the absence of`*InString` funcs. We might look at `x/text`
|
||||
next.
|
||||
|
||||
## Used by
|
||||
|
||||
- clipperhouse/uax29: [stringish trie](https://github.com/clipperhouse/uax29/blob/master/graphemes/trie.go#L27), [stringish iterator](https://github.com/clipperhouse/uax29/blob/master/internal/iterators/iterator.go#L9), [stringish SplitFunc](https://github.com/clipperhouse/uax29/blob/master/graphemes/splitfunc.go#L21)
|
||||
|
||||
- [clipperhouse/displaywidth](https://github.com/clipperhouse/displaywidth)
|
||||
|
||||
## Prior discussion
|
||||
|
||||
- [Consideration of similar by the Go team](https://github.com/golang/go/issues/48643)
|
||||
5
vendor/github.com/clipperhouse/stringish/interface.go
generated
vendored
5
vendor/github.com/clipperhouse/stringish/interface.go
generated
vendored
|
|
@ -1,5 +0,0 @@
|
|||
package stringish
|
||||
|
||||
type Interface interface {
|
||||
~[]byte | ~string
|
||||
}
|
||||
10
vendor/github.com/clipperhouse/uax29/v2/graphemes/README.md
generated
vendored
10
vendor/github.com/clipperhouse/uax29/v2/graphemes/README.md
generated
vendored
|
|
@ -76,15 +76,17 @@ for tokens.Next() { // Next() returns true until end of data
|
|||
|
||||
### Benchmarks
|
||||
|
||||
On a Mac M2 laptop, we see around 200MB/s, or around 100 million graphemes per second, and no allocations.
|
||||
|
||||
```
|
||||
goos: darwin
|
||||
goarch: arm64
|
||||
pkg: github.com/clipperhouse/uax29/graphemes/comparative
|
||||
cpu: Apple M2
|
||||
BenchmarkGraphemes/clipperhouse/uax29-8 171895 ns/op 203.39 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkGraphemes/rivo/uniseg-8 1980475 ns/op 17.65 MB/s 0 B/op 0 allocs/op
|
||||
|
||||
BenchmarkGraphemesMixed/clipperhouse/uax29-8 142635 ns/op 245.12 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkGraphemesMixed/rivo/uniseg-8 2018284 ns/op 17.32 MB/s 0 B/op 0 allocs/op
|
||||
|
||||
BenchmarkGraphemesASCII/clipperhouse/uax29-8 8846 ns/op 508.73 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkGraphemesASCII/rivo/uniseg-8 366760 ns/op 12.27 MB/s 0 B/op 0 allocs/op
|
||||
```
|
||||
|
||||
### Invalid inputs
|
||||
|
|
|
|||
119
vendor/github.com/clipperhouse/uax29/v2/graphemes/ansi.go
generated
vendored
Normal file
119
vendor/github.com/clipperhouse/uax29/v2/graphemes/ansi.go
generated
vendored
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
package graphemes
|
||||
|
||||
// ansiEscapeLength returns the byte length of a valid ANSI escape sequence at the
|
||||
// start of data, or 0 if none. Input is UTF-8; only 7-bit ESC sequences are
|
||||
// recognized (C1 0x80–0x9F can be UTF-8 continuation bytes).
|
||||
//
|
||||
// Recognized forms (ECMA-48 / ISO 6429):
|
||||
// - CSI: ESC [ then parameter bytes (0x30–0x3F), intermediate (0x20–0x2F), final (0x40–0x7E)
|
||||
// - OSC: ESC ] then payload until ST (ESC \) or BEL (0x07)
|
||||
// - DCS, SOS, PM, APC: ESC P / X / ^ / _ then payload until ST (ESC \)
|
||||
// - Two-byte: ESC + Fe (0x40–0x5F excluding above), or Fp (0x30–0x3F), or nF (0x20–0x2F then final)
|
||||
func ansiEscapeLength[T ~string | ~[]byte](data T) int {
|
||||
n := len(data)
|
||||
if n < 2 {
|
||||
return 0
|
||||
}
|
||||
if data[0] != esc {
|
||||
return 0
|
||||
}
|
||||
|
||||
b1 := data[1]
|
||||
switch b1 {
|
||||
case '[': // CSI
|
||||
body := csiLength(data[2:])
|
||||
if body == 0 {
|
||||
return 0
|
||||
}
|
||||
return 2 + body
|
||||
case ']': // OSC – allows BEL or ST as terminator
|
||||
body := oscLength(data[2:])
|
||||
if body == 0 {
|
||||
return 0
|
||||
}
|
||||
return 2 + body
|
||||
case 'P', 'X', '^', '_': // DCS, SOS, PM, APC – require ST (ESC \) only
|
||||
body := stSequenceLength(data[2:])
|
||||
if body == 0 {
|
||||
return 0
|
||||
}
|
||||
return 2 + body
|
||||
}
|
||||
if b1 >= 0x40 && b1 <= 0x5F {
|
||||
// Fe (C1) two-byte; [ ] P X ^ _ handled above
|
||||
return 2
|
||||
}
|
||||
if b1 >= 0x30 && b1 <= 0x3F {
|
||||
// Fp (private) two-byte
|
||||
return 2
|
||||
}
|
||||
if b1 >= 0x20 && b1 <= 0x2F {
|
||||
// nF: intermediates then one final (0x30–0x7E)
|
||||
i := 2
|
||||
for i < n && data[i] >= 0x20 && data[i] <= 0x2F {
|
||||
i++
|
||||
}
|
||||
if i < n && data[i] >= 0x30 && data[i] <= 0x7E {
|
||||
return i + 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// csiLength returns the length of the CSI body (param/intermediate/final bytes).
|
||||
// data is the slice after "ESC [".
|
||||
// Per ECMA-48, the CSI body has the form:
|
||||
//
|
||||
// parameters (0x30–0x3F)*, intermediates (0x20–0x2F)*, final (0x40–0x7E)
|
||||
//
|
||||
// Once an intermediate byte is seen, subsequent parameter bytes are invalid.
|
||||
func csiLength[T ~string | ~[]byte](data T) int {
|
||||
seenIntermediate := false
|
||||
for i := 0; i < len(data); i++ {
|
||||
b := data[i]
|
||||
if b >= 0x30 && b <= 0x3F {
|
||||
if seenIntermediate {
|
||||
return 0
|
||||
}
|
||||
continue
|
||||
}
|
||||
if b >= 0x20 && b <= 0x2F {
|
||||
seenIntermediate = true
|
||||
continue
|
||||
}
|
||||
if b >= 0x40 && b <= 0x7E {
|
||||
return i + 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// oscLength returns the length of the OSC body up to and including
|
||||
// the terminator. OSC accepts either BEL (0x07) or ST (ESC \) per
|
||||
// widespread terminal convention. data is the slice after "ESC ]".
|
||||
func oscLength[T ~string | ~[]byte](data T) int {
|
||||
for i := 0; i < len(data); i++ {
|
||||
b := data[i]
|
||||
if b == bel {
|
||||
return i + 1
|
||||
}
|
||||
if b == esc && i+1 < len(data) && data[i+1] == '\\' {
|
||||
return i + 2
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// stSequenceLength returns the length of a control-string body up to and
|
||||
// including the ST (ESC \) terminator. Used for DCS, SOS, PM, and APC, which
|
||||
// per ECMA-48 require ST and do not accept BEL. data is the slice after "ESC x".
|
||||
func stSequenceLength[T ~string | ~[]byte](data T) int {
|
||||
for i := 0; i < len(data); i++ {
|
||||
if data[i] == esc && i+1 < len(data) && data[i+1] == '\\' {
|
||||
return i + 2
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
27
vendor/github.com/clipperhouse/uax29/v2/graphemes/iterator.go
generated
vendored
27
vendor/github.com/clipperhouse/uax29/v2/graphemes/iterator.go
generated
vendored
|
|
@ -1,5 +1,7 @@
|
|||
package graphemes
|
||||
|
||||
import "unicode/utf8"
|
||||
|
||||
// FromString returns an iterator for the grapheme clusters in the input string.
|
||||
// Iterate while Next() is true, and access the grapheme via Value().
|
||||
func FromString(s string) *Iterator[string] {
|
||||
|
|
@ -25,6 +27,9 @@ type Iterator[T ~string | ~[]byte] struct {
|
|||
data T
|
||||
pos int
|
||||
start int
|
||||
// AnsiEscapeSequences treats ANSI escape sequences (ECMA-48) as single grapheme
|
||||
// clusters when true. Default is false.
|
||||
AnsiEscapeSequences bool
|
||||
}
|
||||
|
||||
var (
|
||||
|
|
@ -32,6 +37,12 @@ var (
|
|||
splitFuncBytes = splitFunc[[]byte]
|
||||
)
|
||||
|
||||
const (
|
||||
esc = 0x1B
|
||||
cr = 0x0D
|
||||
bel = 0x07
|
||||
)
|
||||
|
||||
// Next advances the iterator to the next grapheme cluster.
|
||||
// Returns false when there are no more grapheme clusters.
|
||||
func (iter *Iterator[T]) Next() bool {
|
||||
|
|
@ -40,12 +51,18 @@ func (iter *Iterator[T]) Next() bool {
|
|||
}
|
||||
iter.start = iter.pos
|
||||
|
||||
// ASCII hot path: if current byte is printable ASCII and
|
||||
// next byte is also ASCII (or end of data), return single byte
|
||||
if iter.AnsiEscapeSequences && iter.data[iter.pos] == esc {
|
||||
if a := ansiEscapeLength(iter.data[iter.pos:]); a > 0 {
|
||||
iter.pos += a
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// ASCII hot path: any ASCII is one grapheme when next byte is ASCII or end.
|
||||
// Fall through on CR so splitfunc can handle CR+LF as a single cluster.
|
||||
b := iter.data[iter.pos]
|
||||
if b >= 0x20 && b < 0x7F {
|
||||
// If next byte is non-ASCII, it could be a combining mark
|
||||
if iter.pos+1 >= len(iter.data) || iter.data[iter.pos+1] < 0x80 {
|
||||
if b < utf8.RuneSelf && b != cr {
|
||||
if iter.pos+1 >= len(iter.data) || iter.data[iter.pos+1] < utf8.RuneSelf {
|
||||
iter.pos++
|
||||
return true
|
||||
}
|
||||
|
|
|
|||
4
vendor/github.com/clipperhouse/uax29/v2/graphemes/splitfunc.go
generated
vendored
4
vendor/github.com/clipperhouse/uax29/v2/graphemes/splitfunc.go
generated
vendored
|
|
@ -2,8 +2,6 @@ package graphemes
|
|||
|
||||
import (
|
||||
"bufio"
|
||||
|
||||
"github.com/clipperhouse/stringish"
|
||||
)
|
||||
|
||||
// is determines if lookup intersects propert(ies)
|
||||
|
|
@ -28,7 +26,7 @@ const (
|
|||
// See https://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries.
|
||||
var SplitFunc bufio.SplitFunc = splitFunc[[]byte]
|
||||
|
||||
func splitFunc[T stringish.Interface](data T, atEOF bool) (advance int, token T, err error) {
|
||||
func splitFunc[T ~string | ~[]byte](data T, atEOF bool) (advance int, token T, err error) {
|
||||
var empty T
|
||||
if len(data) == 0 {
|
||||
return 0, empty, nil
|
||||
|
|
|
|||
4
vendor/github.com/clipperhouse/uax29/v2/graphemes/trie.go
generated
vendored
4
vendor/github.com/clipperhouse/uax29/v2/graphemes/trie.go
generated
vendored
|
|
@ -3,8 +3,6 @@ package graphemes
|
|||
// generated by github.com/clipperhouse/uax29/v2
|
||||
// from https://www.unicode.org/Public/17.0.0/ucd/auxiliary/GraphemeBreakProperty.txt
|
||||
|
||||
import "github.com/clipperhouse/stringish"
|
||||
|
||||
type property uint32
|
||||
|
||||
const (
|
||||
|
|
@ -30,7 +28,7 @@ const (
|
|||
// lookup returns the trie value for the first UTF-8 encoding in s and
|
||||
// the width in bytes of this encoding. The size will be 0 if s does not
|
||||
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
|
||||
func lookup[T stringish.Interface](s T) (v property, sz int) {
|
||||
func lookup[T ~string | ~[]byte](s T) (v property, sz int) {
|
||||
c0 := s[0]
|
||||
switch {
|
||||
case c0 < 0x80: // is ASCII
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue