scanf.go (11649B)
1 // Copyright 2020 The Libc Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package libc // import "modernc.org/libc" 6 7 import ( 8 "strings" 9 "unsafe" 10 ) 11 12 // The format string consists of a sequence of directives which describe how to 13 // process the sequence of input characters. If processing of a directive 14 // fails, no further input is read, and scanf() returns. A "failure" can 15 // be either of the following: input failure, meaning that input characters 16 // were unavailable, or matching failure, meaning that the input was 17 // inappropriate. 18 func scanf(r *strings.Reader, format, args uintptr) (nvalues int32) { 19 // var src []byte //TODO- 20 var ok bool 21 out: 22 for { 23 c := *(*byte)(unsafe.Pointer(format)) 24 // src = append(src, c) //TODO- 25 switch c { 26 case '%': 27 var n int 28 var match bool 29 format, n, match = scanfConversion(r, format, &args) 30 if !match { 31 break out 32 } 33 34 nvalues += int32(n) 35 ok = true 36 case 0: 37 break out 38 case ' ', '\t', '\n', '\r', '\v', '\f': 39 format = skipWhiteSpace(format) 40 ok = true 41 next: 42 for { 43 c, err := r.ReadByte() 44 if err != nil { 45 break out 46 } 47 48 switch c { 49 case ' ', '\t', '\n', '\r', '\v', '\f': 50 // nop 51 default: 52 r.UnreadByte() 53 break next 54 } 55 } 56 default: 57 c2, err := r.ReadByte() 58 if err != nil { 59 break out 60 } 61 62 if c2 != c { 63 r.UnreadByte() 64 break out 65 } 66 67 format++ 68 ok = true 69 } 70 } 71 if ok { 72 return nvalues 73 } 74 75 return -1 // stdio.EOF but not defined for windows 76 } 77 78 func scanfConversion(r *strings.Reader, format uintptr, args *uintptr) (_ uintptr, nvalues int, match bool) { 79 format++ // '%' 80 81 // Each conversion specification in format begins with either the character '%' 82 // or the character sequence "%n$" (see below for the distinction) followed by: 83 84 mod := 0 85 width := -1 86 flags: 87 for { 88 switch c := *(*byte)(unsafe.Pointer(format)); c { 89 case '*': 90 // An optional '*' assignment-suppression character: scanf() reads input as 91 // directed by the conversion specification, but discards the input. No 92 // corresponding pointer argument is re‐ quired, and this specification is not 93 // included in the count of successful assignments returned by scanf(). 94 format++ 95 panic(todo("")) 96 case '\'': 97 // For decimal conversions, an optional quote character ('). This specifies 98 // that the input number may include thousands' separators as defined by the 99 // LC_NUMERIC category of the current locale. (See setlocale(3).) The quote 100 // character may precede or follow the '*' assignment-suppression character. 101 format++ 102 panic(todo("")) 103 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 104 // An optional decimal integer which specifies the maximum field width. 105 // Reading of characters stops either when this maximum is reached or when a 106 // nonmatching character is found, whichever happens first. Most conversions 107 // discard initial white space characters (the exceptions are noted below), and 108 // these discarded characters don't count toward the maximum field width. 109 // String input conversions store a terminating null byte ('\0') to mark the 110 // end of the input; the maximum field width does not include this terminator. 111 width = 0 112 num: 113 for { 114 var digit int 115 switch c := *(*byte)(unsafe.Pointer(format)); { 116 default: 117 break num 118 case c >= '0' && c <= '9': 119 format++ 120 digit = int(c) - '0' 121 } 122 width0 := width 123 width = 10*width + digit 124 if width < width0 { 125 panic(todo("")) 126 } 127 } 128 case 'h', 'j', 'l', 'L', 'q', 't', 'z': 129 format, mod = parseLengthModifier(format) 130 default: 131 break flags 132 } 133 } 134 135 // A conversion specifier that specifies the type of input conversion to be 136 // performed. 137 switch c := *(*byte)(unsafe.Pointer(format)); c { 138 case '%': 139 // Matches a literal '%'. That is, %% in the format string matches a single 140 // input '%' character. No conversion is done (but initial white space 141 // characters are discarded), and assign‐ ment does not occur. 142 format++ 143 panic(todo("")) 144 case 'd': 145 // Matches an optionally signed decimal integer; the next pointer must be a 146 // pointer to int. 147 format++ 148 skipReaderWhiteSpace(r) 149 var digit, n uint64 150 allowSign := true 151 neg := false 152 dec: 153 for ; width != 0; width-- { 154 c, err := r.ReadByte() 155 if err != nil { 156 if match { 157 break dec 158 } 159 160 panic(todo("", err)) 161 } 162 163 if allowSign { 164 switch c { 165 case '-': 166 allowSign = false 167 neg = true 168 continue 169 case '+': 170 allowSign = false 171 continue 172 } 173 } 174 175 switch { 176 case c >= '0' && c <= '9': 177 digit = uint64(c) - '0' 178 default: 179 r.UnreadByte() 180 break dec 181 } 182 match = true 183 n0 := n 184 n = n*10 + digit 185 if n < n0 { 186 panic(todo("")) 187 } 188 } 189 if !match { 190 break 191 } 192 193 arg := VaUintptr(args) 194 v := int64(n) 195 if neg { 196 v = -v 197 } 198 switch mod { 199 case modNone: 200 *(*int32)(unsafe.Pointer(arg)) = int32(v) 201 case modH: 202 *(*int16)(unsafe.Pointer(arg)) = int16(v) 203 case modHH: 204 *(*int8)(unsafe.Pointer(arg)) = int8(v) 205 case modL: 206 *(*long)(unsafe.Pointer(arg)) = long(n) 207 default: 208 panic(todo("")) 209 } 210 nvalues = 1 211 case 'D': 212 // Equivalent to ld; this exists only for backward compatibility. (Note: 213 // thus only in libc4. In libc5 and glibc the %D is silently ignored, causing 214 // old programs to fail mysteriously.) 215 format++ 216 panic(todo("")) 217 case 'i': 218 // Matches an optionally signed integer; the next pointer must be a pointer to 219 // int. The integer is read in base 16 if it begins with 0x or 0X, in base 8 220 // if it begins with 0, and in base 10 otherwise. Only characters that 221 // correspond to the base are used. 222 format++ 223 panic(todo("")) 224 case 'o': 225 // Matches an unsigned octal integer; the next pointer must be a pointer to 226 // unsigned int. 227 format++ 228 panic(todo("")) 229 case 'u': 230 // Matches an unsigned decimal integer; the next pointer must be a pointer to 231 // unsigned int. 232 format++ 233 panic(todo("")) 234 case 'x', 'X': 235 // Matches an unsigned hexadecimal integer; the next pointer must be a pointer 236 // to unsigned int. 237 format++ 238 skipReaderWhiteSpace(r) 239 var digit, n uint64 240 allowPrefix := true 241 var b []byte 242 hex: 243 for ; width != 0; width-- { 244 c, err := r.ReadByte() 245 if err != nil { 246 if match { 247 break hex 248 } 249 250 panic(todo("", err)) 251 } 252 253 if allowPrefix { 254 if len(b) == 1 && b[0] == '0' && (c == 'x' || c == 'X') { 255 allowPrefix = false 256 match = false 257 b = nil 258 continue 259 } 260 261 b = append(b, c) 262 } 263 264 switch { 265 case c >= '0' && c <= '9': 266 digit = uint64(c) - '0' 267 case c >= 'a' && c <= 'f': 268 digit = uint64(c) - 'a' + 10 269 case c >= 'A' && c <= 'F': 270 digit = uint64(c) - 'A' + 10 271 default: 272 r.UnreadByte() 273 break hex 274 } 275 match = true 276 n0 := n 277 n = n<<4 + digit 278 if n < n0 { 279 panic(todo("")) 280 } 281 } 282 if !match { 283 break 284 } 285 286 arg := VaUintptr(args) 287 switch mod { 288 case modNone: 289 *(*uint32)(unsafe.Pointer(arg)) = uint32(n) 290 case modH: 291 *(*uint16)(unsafe.Pointer(arg)) = uint16(n) 292 case modHH: 293 *(*byte)(unsafe.Pointer(arg)) = byte(n) 294 case modL: 295 *(*ulong)(unsafe.Pointer(arg)) = ulong(n) 296 default: 297 panic(todo("")) 298 } 299 nvalues = 1 300 case 'f', 'e', 'g', 'E', 'a': 301 // Matches an optionally signed floating-point number; the next pointer must be 302 // a pointer to float. 303 format++ 304 panic(todo("")) 305 case 's': 306 // Matches a sequence of non-white-space characters; the next pointer must be 307 // a pointer to the initial element of a character array that is long enough to 308 // hold the input sequence and the terminating null byte ('\0'), which is added 309 // automatically. The input string stops at white space or at the maximum 310 // field width, whichever occurs first. 311 format++ 312 panic(todo("")) 313 case 'c': 314 // Matches a sequence of characters whose length is specified by the maximum 315 // field width (default 1); the next pointer must be a pointer to char, and 316 // there must be enough room for all the characters (no terminating null byte 317 // is added). The usual skip of leading white space is suppressed. To skip 318 // white space first, use an explicit space in the format. 319 format++ 320 panic(todo("")) 321 case '[': 322 // Matches a nonempty sequence of characters from the specified set of 323 // accepted characters; the next pointer must be a pointer to char, and there 324 // must be enough room for all the char‐ acters in the string, plus a 325 // terminating null byte. The usual skip of leading white space is suppressed. 326 // The string is to be made up of characters in (or not in) a particular set; 327 // the set is defined by the characters between the open bracket [ character 328 // and a close bracket ] character. The set excludes those characters if the 329 // first character after the open bracket is a circumflex (^). To include a 330 // close bracket in the set, make it the first character after the open bracket 331 // or the circumflex; any other position will end the set. The hyphen 332 // character - is also special; when placed between two other characters, it 333 // adds all intervening characters to the set. To include a hyphen, make it 334 // the last character before the final close bracket. For instance, [^]0-9-] 335 // means the set "everything except close bracket, zero through nine, and 336 // hyphen". The string ends with the appearance of a character not in the 337 // (or, with a circumflex, in) set or when the field width runs out. 338 format++ 339 panic(todo("")) 340 case 'p': 341 // Matches a pointer value (as printed by %p in printf(3); the next pointer 342 // must be a pointer to a pointer to void. 343 format++ 344 skipReaderWhiteSpace(r) 345 c, err := r.ReadByte() 346 if err != nil { 347 panic(todo("")) 348 } 349 350 if c != '0' { 351 r.UnreadByte() 352 panic(todo("")) 353 } 354 355 if c, err = r.ReadByte(); err != nil { 356 panic(todo("")) 357 } 358 359 if c != 'x' && c != 'X' { 360 r.UnreadByte() 361 panic(todo("")) 362 } 363 364 var digit, n uint64 365 ptr: 366 for ; width != 0; width-- { 367 c, err := r.ReadByte() 368 if err != nil { 369 if match { 370 break ptr 371 } 372 373 panic(todo("")) 374 } 375 376 switch { 377 case c >= '0' && c <= '9': 378 digit = uint64(c) - '0' 379 case c >= 'a' && c <= 'f': 380 digit = uint64(c) - 'a' + 10 381 case c >= 'A' && c <= 'F': 382 digit = uint64(c) - 'A' + 10 383 default: 384 r.UnreadByte() 385 break ptr 386 } 387 match = true 388 n0 := n 389 n = n<<4 + digit 390 if n < n0 { 391 panic(todo("")) 392 } 393 } 394 if !match { 395 break 396 } 397 398 arg := VaUintptr(args) 399 *(*uintptr)(unsafe.Pointer(arg)) = uintptr(n) 400 nvalues = 1 401 case 'n': 402 // Nothing is expected; instead, the number of characters consumed thus far 403 // from the input is stored through the next pointer, which must be a pointer 404 // to int. This is not a conversion and does not increase the count returned 405 // by the function. The assignment can be suppressed with the * 406 // assignment-suppression character, but the effect on the return value is 407 // undefined. Therefore %*n conversions should not be used. 408 format++ 409 panic(todo("")) 410 default: 411 panic(todo("%#U", c)) 412 } 413 414 return format, nvalues, match 415 } 416 417 func skipReaderWhiteSpace(r *strings.Reader) error { 418 for { 419 c, err := r.ReadByte() 420 if err != nil { 421 return err 422 } 423 424 switch c { 425 case ' ', '\t', '\n', '\r', '\v', '\f': 426 // ok 427 default: 428 r.UnreadByte() 429 return nil 430 } 431 } 432 } 433 434 func skipWhiteSpace(s uintptr) uintptr { 435 for { 436 switch c := *(*byte)(unsafe.Pointer(s)); c { 437 case ' ', '\t', '\n', '\r', '\v', '\f': 438 s++ 439 default: 440 return s 441 } 442 } 443 }