garm/vendor/github.com/go-openapi/swag/mangling/split.go
dependabot[bot] 2a72bc8689
Bump github.com/go-openapi/swag from 0.23.1 to 0.24.1
Bumps [github.com/go-openapi/swag](https://github.com/go-openapi/swag) from 0.23.1 to 0.24.1.
- [Commits](https://github.com/go-openapi/swag/compare/v0.23.1...v0.24.1)

---
updated-dependencies:
- dependency-name: github.com/go-openapi/swag
  dependency-version: 0.24.1
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-01 11:24:03 +00:00

312 lines
8.5 KiB
Go

// Copyright 2015 go-swagger maintainers
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mangling
import (
"unicode"
)
type splitterOption func(*splitter)
// withPostSplitInitialismCheck allows to catch initialisms after main split process
func withPostSplitInitialismCheck(s *splitter) {
s.postSplitInitialismCheck = true
}
func withReplaceFunc(fn ReplaceFunc) func(*splitter) {
return func(s *splitter) {
s.replaceFunc = fn
}
}
func withInitialismsCache(c *initialismsCache) splitterOption {
return func(s *splitter) {
s.initialismsCache = c
}
}
type (
initialismMatch struct {
body []rune
start, end int
complete bool
hasPlural pluralForm
}
initialismMatches []initialismMatch
)
func (m initialismMatch) isZero() bool {
return m.start == 0 && m.end == 0
}
type splitter struct {
*initialismsCache
postSplitInitialismCheck bool
replaceFunc ReplaceFunc
}
func newSplitter(options ...splitterOption) splitter {
var s splitter
for _, option := range options {
option(&s)
}
if s.replaceFunc == nil {
s.replaceFunc = defaultReplaceTable
}
return s
}
func (s splitter) split(name string) *[]nameLexem {
nameRunes := []rune(name)
matches := s.gatherInitialismMatches(nameRunes)
if matches == nil {
return poolOfLexems.BorrowLexems()
}
return s.mapMatchesToNameLexems(nameRunes, matches)
}
func (s splitter) gatherInitialismMatches(nameRunes []rune) *initialismMatches {
var matches *initialismMatches
for currentRunePosition, currentRune := range nameRunes {
// recycle these allocations as we loop over runes
// with such recycling, only 2 slices should be allocated per call
// instead of o(n).
newMatches := poolOfMatches.BorrowMatches()
// check current initialism matches
if matches != nil { // skip first iteration
for _, match := range *matches {
if keepCompleteMatch := match.complete; keepCompleteMatch {
*newMatches = append(*newMatches, match)
// the match is complete: keep it then move on to next rune
continue
}
currentMatchRune := match.body[currentRunePosition-match.start]
if currentMatchRune != currentRune {
// failed match, move on to next rune
continue
}
// try to complete ongoing match
if currentRunePosition-match.start == len(match.body)-1 {
// we are close; the next step is to check the symbol ahead
// if it is a lowercase letter, then it is not the end of match
// but the beginning of the next word.
//
// NOTE(fredbi): this heuristic sometimes leads to counterintuitive splits and
// perhaps (not sure yet) we should check against case _alternance_.
//
// Example:
//
// In the current version, in the sentence "IDS initialism", "ID" is recognized as an initialism,
// leading to a split like "id_s_initialism" (or IDSInitialism),
// whereas in the sentence "IDx initialism", it is not and produces something like
// "i_d_x_initialism" (or IDxInitialism). The generated file name is not great.
//
// Both go identifiers are tolerated by linters.
//
// Notice that the slightly different input "IDs initialism" is correctly detected
// as a pluralized initialism and produces something like "ids_initialism" (or IDsInitialism).
if currentRunePosition < len(nameRunes)-1 {
nextRune := nameRunes[currentRunePosition+1]
// recognize a plural form for this initialism (only simple pluralization is supported)
if nextRune == 's' && match.hasPlural == simplePlural {
// detected a pluralized initialism
match.body = append(match.body, nextRune)
currentRunePosition++
if currentRunePosition < len(nameRunes)-1 {
nextRune = nameRunes[currentRunePosition+1]
if newWord := unicode.IsLower(nextRune); newWord {
// it is the start of a new word.
// Match is only partial and the initialism is not recognized : move on
continue
}
}
// this is a pluralized match: keep it
match.complete = true
match.hasPlural = simplePlural
match.end = currentRunePosition
*newMatches = append(*newMatches, match)
// match is complete: keep it then move on to next rune
continue
}
if newWord := unicode.IsLower(nextRune); newWord {
// it is the start of a new word
// Match is only partial and the initialism is not recognized : move on
continue
}
}
match.complete = true
match.end = currentRunePosition
}
// append the ongoing matching attempt (not necessarily complete)
*newMatches = append(*newMatches, match)
}
}
// check for new initialism matches
for i, r := range s.initialismsRunes {
if r[0] == currentRune {
*newMatches = append(*newMatches, initialismMatch{
start: currentRunePosition,
body: r,
complete: false,
hasPlural: s.initialismsPluralForm[i],
})
}
}
if matches != nil {
poolOfMatches.RedeemMatches(matches)
}
matches = newMatches
}
// up to the caller to redeem this last slice
return matches
}
func (s splitter) mapMatchesToNameLexems(nameRunes []rune, matches *initialismMatches) *[]nameLexem {
nameLexems := poolOfLexems.BorrowLexems()
var lastAcceptedMatch initialismMatch
for _, match := range *matches {
if !match.complete {
continue
}
if firstMatch := lastAcceptedMatch.isZero(); firstMatch {
s.appendBrokenDownCasualString(nameLexems, nameRunes[:match.start])
*nameLexems = append(*nameLexems, s.breakInitialism(string(match.body)))
lastAcceptedMatch = match
continue
}
if overlappedMatch := match.start <= lastAcceptedMatch.end; overlappedMatch {
continue
}
middle := nameRunes[lastAcceptedMatch.end+1 : match.start]
s.appendBrokenDownCasualString(nameLexems, middle)
*nameLexems = append(*nameLexems, s.breakInitialism(string(match.body)))
lastAcceptedMatch = match
}
// we have not found any accepted matches
if lastAcceptedMatch.isZero() {
*nameLexems = (*nameLexems)[:0]
s.appendBrokenDownCasualString(nameLexems, nameRunes)
} else if lastAcceptedMatch.end+1 != len(nameRunes) {
rest := nameRunes[lastAcceptedMatch.end+1:]
s.appendBrokenDownCasualString(nameLexems, rest)
}
poolOfMatches.RedeemMatches(matches)
return nameLexems
}
func (s splitter) breakInitialism(original string) nameLexem {
return newInitialismNameLexem(original, original)
}
func (s splitter) appendBrokenDownCasualString(segments *[]nameLexem, str []rune) {
currentSegment := poolOfBuffers.BorrowBuffer(len(str)) // unlike strings.Builder, bytes.Buffer initial storage can reused
defer func() {
poolOfBuffers.RedeemBuffer(currentSegment)
}()
addCasualNameLexem := func(original string) {
*segments = append(*segments, newCasualNameLexem(original))
}
addInitialismNameLexem := func(original, match string) {
*segments = append(*segments, newInitialismNameLexem(original, match))
}
var addNameLexem func(string)
if s.postSplitInitialismCheck {
addNameLexem = func(original string) {
for i := range s.initialisms {
if isEqualFoldIgnoreSpace(s.initialismsUpperCased[i], original) {
addInitialismNameLexem(original, s.initialisms[i])
return
}
}
addCasualNameLexem(original)
}
} else {
addNameLexem = addCasualNameLexem
}
// NOTE: (performance). The few remaining non-amortized allocations
// lay in the code below: using String() forces
for _, rn := range str {
if replace, found := s.replaceFunc(rn); found {
if currentSegment.Len() > 0 {
addNameLexem(currentSegment.String())
currentSegment.Reset()
}
if replace != "" {
addNameLexem(replace)
}
continue
}
if !unicode.In(rn, unicode.L, unicode.M, unicode.N, unicode.Pc) {
if currentSegment.Len() > 0 {
addNameLexem(currentSegment.String())
currentSegment.Reset()
}
continue
}
if unicode.IsUpper(rn) {
if currentSegment.Len() > 0 {
addNameLexem(currentSegment.String())
}
currentSegment.Reset()
}
currentSegment.WriteRune(rn)
}
if currentSegment.Len() > 0 {
addNameLexem(currentSegment.String())
}
}