gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

strings.go (5362B)


      1 // Copyright 2019 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Package strs provides string manipulation functionality specific to protobuf.
      6 package strs
      7 
      8 import (
      9 	"go/token"
     10 	"strings"
     11 	"unicode"
     12 	"unicode/utf8"
     13 
     14 	"google.golang.org/protobuf/internal/flags"
     15 	"google.golang.org/protobuf/reflect/protoreflect"
     16 )
     17 
     18 // EnforceUTF8 reports whether to enforce strict UTF-8 validation.
     19 func EnforceUTF8(fd protoreflect.FieldDescriptor) bool {
     20 	if flags.ProtoLegacy {
     21 		if fd, ok := fd.(interface{ EnforceUTF8() bool }); ok {
     22 			return fd.EnforceUTF8()
     23 		}
     24 	}
     25 	return fd.Syntax() == protoreflect.Proto3
     26 }
     27 
     28 // GoCamelCase camel-cases a protobuf name for use as a Go identifier.
     29 //
     30 // If there is an interior underscore followed by a lower case letter,
     31 // drop the underscore and convert the letter to upper case.
     32 func GoCamelCase(s string) string {
     33 	// Invariant: if the next letter is lower case, it must be converted
     34 	// to upper case.
     35 	// That is, we process a word at a time, where words are marked by _ or
     36 	// upper case letter. Digits are treated as words.
     37 	var b []byte
     38 	for i := 0; i < len(s); i++ {
     39 		c := s[i]
     40 		switch {
     41 		case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]):
     42 			// Skip over '.' in ".{{lowercase}}".
     43 		case c == '.':
     44 			b = append(b, '_') // convert '.' to '_'
     45 		case c == '_' && (i == 0 || s[i-1] == '.'):
     46 			// Convert initial '_' to ensure we start with a capital letter.
     47 			// Do the same for '_' after '.' to match historic behavior.
     48 			b = append(b, 'X') // convert '_' to 'X'
     49 		case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
     50 			// Skip over '_' in "_{{lowercase}}".
     51 		case isASCIIDigit(c):
     52 			b = append(b, c)
     53 		default:
     54 			// Assume we have a letter now - if not, it's a bogus identifier.
     55 			// The next word is a sequence of characters that must start upper case.
     56 			if isASCIILower(c) {
     57 				c -= 'a' - 'A' // convert lowercase to uppercase
     58 			}
     59 			b = append(b, c)
     60 
     61 			// Accept lower case sequence that follows.
     62 			for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ {
     63 				b = append(b, s[i+1])
     64 			}
     65 		}
     66 	}
     67 	return string(b)
     68 }
     69 
     70 // GoSanitized converts a string to a valid Go identifier.
     71 func GoSanitized(s string) string {
     72 	// Sanitize the input to the set of valid characters,
     73 	// which must be '_' or be in the Unicode L or N categories.
     74 	s = strings.Map(func(r rune) rune {
     75 		if unicode.IsLetter(r) || unicode.IsDigit(r) {
     76 			return r
     77 		}
     78 		return '_'
     79 	}, s)
     80 
     81 	// Prepend '_' in the event of a Go keyword conflict or if
     82 	// the identifier is invalid (does not start in the Unicode L category).
     83 	r, _ := utf8.DecodeRuneInString(s)
     84 	if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) {
     85 		return "_" + s
     86 	}
     87 	return s
     88 }
     89 
     90 // JSONCamelCase converts a snake_case identifier to a camelCase identifier,
     91 // according to the protobuf JSON specification.
     92 func JSONCamelCase(s string) string {
     93 	var b []byte
     94 	var wasUnderscore bool
     95 	for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
     96 		c := s[i]
     97 		if c != '_' {
     98 			if wasUnderscore && isASCIILower(c) {
     99 				c -= 'a' - 'A' // convert to uppercase
    100 			}
    101 			b = append(b, c)
    102 		}
    103 		wasUnderscore = c == '_'
    104 	}
    105 	return string(b)
    106 }
    107 
    108 // JSONSnakeCase converts a camelCase identifier to a snake_case identifier,
    109 // according to the protobuf JSON specification.
    110 func JSONSnakeCase(s string) string {
    111 	var b []byte
    112 	for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
    113 		c := s[i]
    114 		if isASCIIUpper(c) {
    115 			b = append(b, '_')
    116 			c += 'a' - 'A' // convert to lowercase
    117 		}
    118 		b = append(b, c)
    119 	}
    120 	return string(b)
    121 }
    122 
    123 // MapEntryName derives the name of the map entry message given the field name.
    124 // See protoc v3.8.0: src/google/protobuf/descriptor.cc:254-276,6057
    125 func MapEntryName(s string) string {
    126 	var b []byte
    127 	upperNext := true
    128 	for _, c := range s {
    129 		switch {
    130 		case c == '_':
    131 			upperNext = true
    132 		case upperNext:
    133 			b = append(b, byte(unicode.ToUpper(c)))
    134 			upperNext = false
    135 		default:
    136 			b = append(b, byte(c))
    137 		}
    138 	}
    139 	b = append(b, "Entry"...)
    140 	return string(b)
    141 }
    142 
    143 // EnumValueName derives the camel-cased enum value name.
    144 // See protoc v3.8.0: src/google/protobuf/descriptor.cc:297-313
    145 func EnumValueName(s string) string {
    146 	var b []byte
    147 	upperNext := true
    148 	for _, c := range s {
    149 		switch {
    150 		case c == '_':
    151 			upperNext = true
    152 		case upperNext:
    153 			b = append(b, byte(unicode.ToUpper(c)))
    154 			upperNext = false
    155 		default:
    156 			b = append(b, byte(unicode.ToLower(c)))
    157 			upperNext = false
    158 		}
    159 	}
    160 	return string(b)
    161 }
    162 
    163 // TrimEnumPrefix trims the enum name prefix from an enum value name,
    164 // where the prefix is all lowercase without underscores.
    165 // See protoc v3.8.0: src/google/protobuf/descriptor.cc:330-375
    166 func TrimEnumPrefix(s, prefix string) string {
    167 	s0 := s // original input
    168 	for len(s) > 0 && len(prefix) > 0 {
    169 		if s[0] == '_' {
    170 			s = s[1:]
    171 			continue
    172 		}
    173 		if unicode.ToLower(rune(s[0])) != rune(prefix[0]) {
    174 			return s0 // no prefix match
    175 		}
    176 		s, prefix = s[1:], prefix[1:]
    177 	}
    178 	if len(prefix) > 0 {
    179 		return s0 // no prefix match
    180 	}
    181 	s = strings.TrimLeft(s, "_")
    182 	if len(s) == 0 {
    183 		return s0 // avoid returning empty string
    184 	}
    185 	return s
    186 }
    187 
    188 func isASCIILower(c byte) bool {
    189 	return 'a' <= c && c <= 'z'
    190 }
    191 func isASCIIUpper(c byte) bool {
    192 	return 'A' <= c && c <= 'Z'
    193 }
    194 func isASCIIDigit(c byte) bool {
    195 	return '0' <= c && c <= '9'
    196 }