garm/vendor/github.com/go-openapi/swag/mangling/split.go
Gabriel Adrian Samfira 47537fb8b6 Update all dependencies
Update all deps.

Signed-off-by: Gabriel Adrian Samfira <gsamfira@cloudbasesolutions.com>
2026-01-06 01:13:34 +02:00

341 lines
9.7 KiB
Go

// SPDX-FileCopyrightText: Copyright 2015-2025 go-swagger maintainers
// SPDX-License-Identifier: Apache-2.0
package mangling
import (
"fmt"
"unicode"
)
type splitterOption func(*splitter)
// withPostSplitInitialismCheck allows to catch initialisms after main split process
func withPostSplitInitialismCheck(s *splitter) {
s.postSplitInitialismCheck = true
}
func withReplaceFunc(fn ReplaceFunc) func(*splitter) {
return func(s *splitter) {
s.replaceFunc = fn
}
}
func withInitialismsCache(c *initialismsCache) splitterOption {
return func(s *splitter) {
s.initialismsCache = c
}
}
type (
initialismMatch struct {
body []rune
start, end int
complete bool
hasPlural pluralForm
}
initialismMatches []initialismMatch
)
// String representation of a match, e.g. for debugging.
func (m initialismMatch) String() string {
return fmt.Sprintf("{body: %s (%d), start: %d, end; %d, complete: %t, hasPlural: %v}",
string(m.body), len(m.body), m.start, m.end, m.complete, m.hasPlural,
)
}
func (m initialismMatch) isZero() bool {
return m.start == 0 && m.end == 0
}
type splitter struct {
*initialismsCache
postSplitInitialismCheck bool
replaceFunc ReplaceFunc
}
func newSplitter(options ...splitterOption) splitter {
var s splitter
for _, option := range options {
option(&s)
}
if s.replaceFunc == nil {
s.replaceFunc = defaultReplaceTable
}
return s
}
func (s splitter) split(name string) *[]nameLexem {
nameRunes := []rune(name)
matches := s.gatherInitialismMatches(nameRunes)
if matches == nil {
return poolOfLexems.BorrowLexems()
}
return s.mapMatchesToNameLexems(nameRunes, matches)
}
func (s splitter) gatherInitialismMatches(nameRunes []rune) *initialismMatches {
matches := poolOfMatches.BorrowMatches()
const minLenInitialism = 1
if len(nameRunes) < minLenInitialism+1 {
// can't match initialism with 0 or 1 rune
return matches
}
// first iteration
s.findMatches(matches, nameRunes, nameRunes[0], 0)
for i, currentRune := range nameRunes[1:] {
currentRunePosition := i + 1
// recycle allocations as we loop over runes
// with such recycling, only 2 slices should be allocated per call
// instead of o(n).
//
// BorrowMatches always yields slices with zero length (with some capacity)
newMatches := poolOfMatches.BorrowMatches()
// check current initialism matches
for _, match := range *matches {
if keepCompleteMatch := match.complete; keepCompleteMatch {
// the match is already complete: keep it then move on to the next match
*newMatches = append(*newMatches, match)
continue
}
if currentRunePosition-match.start == len(match.body) {
// unmatched: skip
continue
}
// 1. by construction of the matches, we can't have currentRunePosition - match.start < 0
// because matches have been computed with their start <= currentRunePosition in the previous
// iterations.
// 2. by construction of the matches, we can't have currentRunePosition - match.start >= len(match.body)
currentMatchRune := match.body[currentRunePosition-match.start]
if currentMatchRune != currentRune {
// failed match, discard it then move on to the next match
continue
}
// try to complete the current match
if currentRunePosition-match.start == len(match.body)-1 {
// we are close: the next step is to check the symbol ahead
// if it is a lowercase letter, then it is not the end of match
// but the beginning of the next word.
//
// NOTE(fredbi): this heuristic sometimes leads to counterintuitive splits and
// perhaps (not sure yet) we should check against case _alternance_.
//
// Example:
//
// In the current version, in the sentence "IDS initialism", "ID" is recognized as an initialism,
// leading to a split like "id_s_initialism" (or IDSInitialism),
// whereas in the sentence "IDx initialism", it is not and produces something like
// "i_d_x_initialism" (or IDxInitialism). The generated file name is not great.
//
// Both go identifiers are tolerated by linters.
//
// Notice that the slightly different input "IDs initialism" is correctly detected
// as a pluralized initialism and produces something like "ids_initialism" (or IDsInitialism).
if currentRunePosition < len(nameRunes)-1 { // when before the last rune
nextRune := nameRunes[currentRunePosition+1]
// recognize a plural form for this initialism (only simple english pluralization is supported).
if nextRune == 's' && match.hasPlural == simplePlural {
// detected a pluralized initialism
match.body = append(match.body, nextRune)
lookAhead := currentRunePosition + 1
if lookAhead < len(nameRunes)-1 {
nextRune = nameRunes[lookAhead+1]
if newWord := unicode.IsLower(nextRune); newWord {
// it is the start of a new word.
// Match is only partial and the initialism is not recognized:
// move on to the next match, but do not advance the rune position
continue
}
}
// this is a pluralized match: keep it
currentRunePosition++
match.complete = true
match.hasPlural = simplePlural
match.end = currentRunePosition
*newMatches = append(*newMatches, match)
// match is complete: keep it then move on to the next match
continue
}
// other cases
// example: invariant plural such as "TLS"
if newWord := unicode.IsLower(nextRune); newWord {
// it is the start of a new word
// Match is only partial and the initialism is not recognized : move on
continue
}
}
match.complete = true
match.end = currentRunePosition
}
// append the ongoing matching attempt: it is not necessarily complete, but was successful so far.
// Let's see if it still matches on the next rune.
*newMatches = append(*newMatches, match)
}
s.findMatches(newMatches, nameRunes, currentRune, currentRunePosition)
poolOfMatches.RedeemMatches(matches)
matches = newMatches
}
// it is up to the caller to redeem this last slice
return matches
}
func (s splitter) findMatches(newMatches *initialismMatches, nameRunes []rune, currentRune rune, currentRunePosition int) {
// check for new initialism matches, based on the first character
for i, r := range s.initialismsRunes {
if r[0] != currentRune {
continue
}
if currentRunePosition+len(r) > len(nameRunes) {
continue // not eligible: would spilll over the initial string
}
// possible matches: all initialisms starting with the current rune and that can fit the given string (nameRunes)
*newMatches = append(*newMatches, initialismMatch{
start: currentRunePosition,
body: r,
complete: false,
hasPlural: s.initialismsPluralForm[i],
})
}
}
func (s splitter) mapMatchesToNameLexems(nameRunes []rune, matches *initialismMatches) *[]nameLexem {
nameLexems := poolOfLexems.BorrowLexems()
var lastAcceptedMatch initialismMatch
for _, match := range *matches {
if !match.complete {
continue
}
if firstMatch := lastAcceptedMatch.isZero(); firstMatch {
s.appendBrokenDownCasualString(nameLexems, nameRunes[:match.start])
*nameLexems = append(*nameLexems, s.breakInitialism(string(match.body)))
lastAcceptedMatch = match
continue
}
if overlappedMatch := match.start <= lastAcceptedMatch.end; overlappedMatch {
continue
}
middle := nameRunes[lastAcceptedMatch.end+1 : match.start]
s.appendBrokenDownCasualString(nameLexems, middle)
*nameLexems = append(*nameLexems, s.breakInitialism(string(match.body)))
lastAcceptedMatch = match
}
// we have not found any accepted matches
if lastAcceptedMatch.isZero() {
*nameLexems = (*nameLexems)[:0]
s.appendBrokenDownCasualString(nameLexems, nameRunes)
} else if lastAcceptedMatch.end+1 != len(nameRunes) {
rest := nameRunes[lastAcceptedMatch.end+1:]
s.appendBrokenDownCasualString(nameLexems, rest)
}
poolOfMatches.RedeemMatches(matches)
return nameLexems
}
func (s splitter) breakInitialism(original string) nameLexem {
return newInitialismNameLexem(original, original)
}
func (s splitter) appendBrokenDownCasualString(segments *[]nameLexem, str []rune) {
currentSegment := poolOfBuffers.BorrowBuffer(len(str)) // unlike strings.Builder, bytes.Buffer initial storage can reused
defer func() {
poolOfBuffers.RedeemBuffer(currentSegment)
}()
addCasualNameLexem := func(original string) {
*segments = append(*segments, newCasualNameLexem(original))
}
addInitialismNameLexem := func(original, match string) {
*segments = append(*segments, newInitialismNameLexem(original, match))
}
var addNameLexem func(string)
if s.postSplitInitialismCheck {
addNameLexem = func(original string) {
for i := range s.initialisms {
if isEqualFoldIgnoreSpace(s.initialismsUpperCased[i], original) {
addInitialismNameLexem(original, s.initialisms[i])
return
}
}
addCasualNameLexem(original)
}
} else {
addNameLexem = addCasualNameLexem
}
// NOTE: (performance). The few remaining non-amortized allocations
// lay in the code below: using String() forces
for _, rn := range str {
if replace, found := s.replaceFunc(rn); found {
if currentSegment.Len() > 0 {
addNameLexem(currentSegment.String())
currentSegment.Reset()
}
if replace != "" {
addNameLexem(replace)
}
continue
}
if !unicode.In(rn, unicode.L, unicode.M, unicode.N, unicode.Pc) {
if currentSegment.Len() > 0 {
addNameLexem(currentSegment.String())
currentSegment.Reset()
}
continue
}
if unicode.IsUpper(rn) {
if currentSegment.Len() > 0 {
addNameLexem(currentSegment.String())
}
currentSegment.Reset()
}
currentSegment.WriteRune(rn)
}
if currentSegment.Len() > 0 {
addNameLexem(currentSegment.String())
}
}