gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

utf8.go (2120B)


      1 /*
      2  * Copyright 2022 ByteDance Inc.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package utf8
     18 
     19 import (
     20     `github.com/bytedance/sonic/internal/rt`
     21     `github.com/bytedance/sonic/internal/native/types`
     22     `github.com/bytedance/sonic/internal/native`
     23 )
     24 
     25 // CorrectWith corrects the invalid utf8 byte with repl string.
     26 func CorrectWith(dst []byte, src []byte, repl string) []byte {
     27     sstr := rt.Mem2Str(src)
     28     sidx := 0
     29 
     30     /* state machine records the invalid postions */
     31     m := types.NewStateMachine()
     32     m.Sp = 0 // invalid utf8 numbers
     33 
     34     for sidx < len(sstr) {
     35         scur  := sidx
     36         ecode := native.ValidateUTF8(&sstr, &sidx, m)
     37 
     38         if m.Sp != 0 {
     39             if m.Sp > len(sstr) {
     40                 panic("numbers of invalid utf8 exceed the string len!")
     41             }
     42         }
     43         
     44         for i := 0; i < m.Sp; i++ {
     45             ipos := m.Vt[i] // invalid utf8 position
     46             dst  = append(dst, sstr[scur:ipos]...)
     47             dst  = append(dst, repl...)
     48             scur = m.Vt[i] + 1
     49         }
     50         /* append the remained valid utf8 bytes */
     51         dst = append(dst, sstr[scur:sidx]...)
     52 
     53         /* not enough space, reset and continue */
     54         if ecode != 0 {
     55             m.Sp = 0
     56         }
     57     }
     58 
     59     types.FreeStateMachine(m)
     60     return dst
     61 }
     62 
     63 // Validate is a simd-accelereated drop-in replacement for the standard library's utf8.Valid.
     64 func Validate(src []byte) bool {
     65     return ValidateString(rt.Mem2Str(src))
     66 }
     67 
     68 // ValidateString as Validate, but for string.
     69 func ValidateString(src string) bool {
     70     return native.ValidateUTF8Fast(&src) == 0
     71 }