strings.go (5362B)
1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package strs provides string manipulation functionality specific to protobuf. 6 package strs 7 8 import ( 9 "go/token" 10 "strings" 11 "unicode" 12 "unicode/utf8" 13 14 "google.golang.org/protobuf/internal/flags" 15 "google.golang.org/protobuf/reflect/protoreflect" 16 ) 17 18 // EnforceUTF8 reports whether to enforce strict UTF-8 validation. 19 func EnforceUTF8(fd protoreflect.FieldDescriptor) bool { 20 if flags.ProtoLegacy { 21 if fd, ok := fd.(interface{ EnforceUTF8() bool }); ok { 22 return fd.EnforceUTF8() 23 } 24 } 25 return fd.Syntax() == protoreflect.Proto3 26 } 27 28 // GoCamelCase camel-cases a protobuf name for use as a Go identifier. 29 // 30 // If there is an interior underscore followed by a lower case letter, 31 // drop the underscore and convert the letter to upper case. 32 func GoCamelCase(s string) string { 33 // Invariant: if the next letter is lower case, it must be converted 34 // to upper case. 35 // That is, we process a word at a time, where words are marked by _ or 36 // upper case letter. Digits are treated as words. 37 var b []byte 38 for i := 0; i < len(s); i++ { 39 c := s[i] 40 switch { 41 case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]): 42 // Skip over '.' in ".{{lowercase}}". 43 case c == '.': 44 b = append(b, '_') // convert '.' to '_' 45 case c == '_' && (i == 0 || s[i-1] == '.'): 46 // Convert initial '_' to ensure we start with a capital letter. 47 // Do the same for '_' after '.' to match historic behavior. 48 b = append(b, 'X') // convert '_' to 'X' 49 case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]): 50 // Skip over '_' in "_{{lowercase}}". 51 case isASCIIDigit(c): 52 b = append(b, c) 53 default: 54 // Assume we have a letter now - if not, it's a bogus identifier. 55 // The next word is a sequence of characters that must start upper case. 56 if isASCIILower(c) { 57 c -= 'a' - 'A' // convert lowercase to uppercase 58 } 59 b = append(b, c) 60 61 // Accept lower case sequence that follows. 62 for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ { 63 b = append(b, s[i+1]) 64 } 65 } 66 } 67 return string(b) 68 } 69 70 // GoSanitized converts a string to a valid Go identifier. 71 func GoSanitized(s string) string { 72 // Sanitize the input to the set of valid characters, 73 // which must be '_' or be in the Unicode L or N categories. 74 s = strings.Map(func(r rune) rune { 75 if unicode.IsLetter(r) || unicode.IsDigit(r) { 76 return r 77 } 78 return '_' 79 }, s) 80 81 // Prepend '_' in the event of a Go keyword conflict or if 82 // the identifier is invalid (does not start in the Unicode L category). 83 r, _ := utf8.DecodeRuneInString(s) 84 if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) { 85 return "_" + s 86 } 87 return s 88 } 89 90 // JSONCamelCase converts a snake_case identifier to a camelCase identifier, 91 // according to the protobuf JSON specification. 92 func JSONCamelCase(s string) string { 93 var b []byte 94 var wasUnderscore bool 95 for i := 0; i < len(s); i++ { // proto identifiers are always ASCII 96 c := s[i] 97 if c != '_' { 98 if wasUnderscore && isASCIILower(c) { 99 c -= 'a' - 'A' // convert to uppercase 100 } 101 b = append(b, c) 102 } 103 wasUnderscore = c == '_' 104 } 105 return string(b) 106 } 107 108 // JSONSnakeCase converts a camelCase identifier to a snake_case identifier, 109 // according to the protobuf JSON specification. 110 func JSONSnakeCase(s string) string { 111 var b []byte 112 for i := 0; i < len(s); i++ { // proto identifiers are always ASCII 113 c := s[i] 114 if isASCIIUpper(c) { 115 b = append(b, '_') 116 c += 'a' - 'A' // convert to lowercase 117 } 118 b = append(b, c) 119 } 120 return string(b) 121 } 122 123 // MapEntryName derives the name of the map entry message given the field name. 124 // See protoc v3.8.0: src/google/protobuf/descriptor.cc:254-276,6057 125 func MapEntryName(s string) string { 126 var b []byte 127 upperNext := true 128 for _, c := range s { 129 switch { 130 case c == '_': 131 upperNext = true 132 case upperNext: 133 b = append(b, byte(unicode.ToUpper(c))) 134 upperNext = false 135 default: 136 b = append(b, byte(c)) 137 } 138 } 139 b = append(b, "Entry"...) 140 return string(b) 141 } 142 143 // EnumValueName derives the camel-cased enum value name. 144 // See protoc v3.8.0: src/google/protobuf/descriptor.cc:297-313 145 func EnumValueName(s string) string { 146 var b []byte 147 upperNext := true 148 for _, c := range s { 149 switch { 150 case c == '_': 151 upperNext = true 152 case upperNext: 153 b = append(b, byte(unicode.ToUpper(c))) 154 upperNext = false 155 default: 156 b = append(b, byte(unicode.ToLower(c))) 157 upperNext = false 158 } 159 } 160 return string(b) 161 } 162 163 // TrimEnumPrefix trims the enum name prefix from an enum value name, 164 // where the prefix is all lowercase without underscores. 165 // See protoc v3.8.0: src/google/protobuf/descriptor.cc:330-375 166 func TrimEnumPrefix(s, prefix string) string { 167 s0 := s // original input 168 for len(s) > 0 && len(prefix) > 0 { 169 if s[0] == '_' { 170 s = s[1:] 171 continue 172 } 173 if unicode.ToLower(rune(s[0])) != rune(prefix[0]) { 174 return s0 // no prefix match 175 } 176 s, prefix = s[1:], prefix[1:] 177 } 178 if len(prefix) > 0 { 179 return s0 // no prefix match 180 } 181 s = strings.TrimLeft(s, "_") 182 if len(s) == 0 { 183 return s0 // avoid returning empty string 184 } 185 return s 186 } 187 188 func isASCIILower(c byte) bool { 189 return 'a' <= c && c <= 'z' 190 } 191 func isASCIIUpper(c byte) bool { 192 return 'A' <= c && c <= 'Z' 193 } 194 func isASCIIDigit(c byte) bool { 195 return '0' <= c && c <= '9' 196 }