printf.go (19580B)
1 // Copyright 2020 The Libc Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package libc // import "modernc.org/libc" 6 7 import ( 8 "bytes" 9 "fmt" 10 "runtime" 11 "strconv" 12 "strings" 13 "unsafe" 14 ) 15 16 const ( 17 modNone = iota 18 modHH 19 modH 20 modL 21 modLL 22 modLD 23 modQ 24 modCapitalL 25 modJ 26 modZ 27 modCapitalZ 28 modT 29 mod32 30 mod64 31 ) 32 33 // Format of the format string 34 // 35 // The format string is a character string, beginning and ending in its initial 36 // shift state, if any. The format string is composed of zero or more 37 // directives: ordinary characters (not %), which are copied unchanged to 38 // the output stream; and conversion specifications, each of which results in 39 // fetching zero or more subsequent arguments. 40 func printf(format, args uintptr) []byte { 41 format0 := format 42 args0 := args 43 buf := bytes.NewBuffer(nil) 44 for { 45 switch c := *(*byte)(unsafe.Pointer(format)); c { 46 case '%': 47 format = printfConversion(buf, format, &args) 48 case 0: 49 if dmesgs { 50 dmesg("%v: %q, %#x -> %q", origin(1), GoString(format0), args0, buf.Bytes()) 51 } 52 return buf.Bytes() 53 default: 54 format++ 55 buf.WriteByte(c) 56 } 57 } 58 } 59 60 // Each conversion specification is introduced by the character %, and ends 61 // with a conversion specifier. In between there may be (in this order) zero 62 // or more flags, an optional minimum field width, an optional precision and 63 // an optional length modifier. 64 func printfConversion(buf *bytes.Buffer, format uintptr, args *uintptr) uintptr { 65 format++ // '%' 66 spec := "%" 67 68 // Flags characters 69 // 70 // The character % is followed by zero or more of the following flags: 71 flags: 72 for { 73 switch c := *(*byte)(unsafe.Pointer(format)); c { 74 case '#': 75 // The value should be converted to an "alternate form". For o conversions, 76 // the first character of the output string is made zero (by prefixing a 0 if 77 // it was not zero already). For x and X conversions, a nonzero result has 78 // the string "0x" (or "0X" for X conversions) prepended to it. For a, A, e, 79 // E, f, F, g, and G conversions, the result will always contain a decimal 80 // point, even if no digits follow it (normally, a decimal point appears in the 81 // results of those conversions only if a digit follows). For g and G 82 // conversions, trailing zeros are not removed from the result as they would 83 // otherwise be. For other conversions, the result is undefined. 84 format++ 85 spec += "#" 86 case '0': 87 // The value should be zero padded. For d, i, o, u, x, X, a, A, e, E, f, F, 88 // g, and G conversions, the converted value is padded on the left with zeros 89 // rather than blanks. If the 0 and - flags both appear, the 0 flag is 90 // ignored. If a precision is given with a numeric conversion (d, i, o, u, x, 91 // and X), the 0 flag is ignored. For other conversions, the behav‐ ior is 92 // undefined. 93 format++ 94 spec += "0" 95 case '-': 96 // The converted value is to be left adjusted on the field boundary. (The 97 // default is right justification.) The converted value is padded on the right 98 // with blanks, rather than on the left with blanks or zeros. A - overrides a 99 // 0 if both are given. 100 format++ 101 spec += "-" 102 case ' ': 103 // A blank should be left before a positive number (or empty string) produced 104 // by a signed conversion. 105 format++ 106 spec += " " 107 case '+': 108 // A sign (+ or -) should always be placed before a number produced by a signed 109 // conversion. By default, a sign is used only for negative numbers. A + 110 // overrides a space if both are used. 111 format++ 112 spec += "+" 113 default: 114 break flags 115 } 116 } 117 format, width, hasWidth := parseFieldWidth(format) 118 if hasWidth { 119 spec += strconv.Itoa(width) 120 } 121 format, prec, hasPrecision := parsePrecision(format, args) 122 format, mod := parseLengthModifier(format) 123 124 var str string 125 126 more: 127 // Conversion specifiers 128 // 129 // A character that specifies the type of conversion to be applied. The 130 // conversion specifiers and their meanings are: 131 switch c := *(*byte)(unsafe.Pointer(format)); c { 132 case 'd', 'i': 133 // The int argument is converted to signed decimal notation. The precision, 134 // if any, gives the minimum number of digits that must appear; if the 135 // converted value requires fewer digits, it is padded on the left with zeros. 136 // The default precision is 1. When 0 is printed with an explicit precision 0, 137 // the output is empty. 138 format++ 139 var arg int64 140 if isWindows && mod == modL { 141 mod = modNone 142 } 143 switch mod { 144 case modL, modLL, mod64: 145 arg = VaInt64(args) 146 case modH: 147 arg = int64(int16(VaInt32(args))) 148 case modHH: 149 arg = int64(int8(VaInt32(args))) 150 case mod32, modNone: 151 arg = int64(VaInt32(args)) 152 default: 153 panic(todo("", mod)) 154 } 155 156 if arg == 0 && hasPrecision && prec == 0 { 157 break 158 } 159 160 if hasPrecision { 161 panic(todo("", prec)) 162 } 163 164 f := spec + "d" 165 str = fmt.Sprintf(f, arg) 166 case 'u': 167 // The unsigned int argument is converted to unsigned decimal notation. The 168 // precision, if any, gives the minimum number of digits that must appear; if 169 // the converted value requires fewer digits, it is padded on the left with 170 // zeros. The default precision is 1. When 0 is printed with an explicit 171 // precision 0, the output is empty. 172 format++ 173 var arg uint64 174 if isWindows && mod == modL { 175 mod = modNone 176 } 177 switch mod { 178 case modNone: 179 arg = uint64(VaUint32(args)) 180 case modL, modLL, mod64: 181 arg = VaUint64(args) 182 case modH: 183 arg = uint64(uint16(VaInt32(args))) 184 case modHH: 185 arg = uint64(uint8(VaInt32(args))) 186 case mod32: 187 arg = uint64(VaInt32(args)) 188 default: 189 panic(todo("", mod)) 190 } 191 192 if arg == 0 && hasPrecision && prec == 0 { 193 break 194 } 195 196 if hasPrecision { 197 panic(todo("", prec)) 198 } 199 200 f := spec + "d" 201 str = fmt.Sprintf(f, arg) 202 case 'o': 203 // The unsigned int argument is converted to unsigned octal notation. The 204 // precision, if any, gives the minimum number of digits that must appear; if 205 // the converted value requires fewer digits, it is padded on the left with 206 // zeros. The default precision is 1. When 0 is printed with an explicit 207 // precision 0, the output is empty. 208 format++ 209 var arg uint64 210 if isWindows && mod == modL { 211 mod = modNone 212 } 213 switch mod { 214 case modNone: 215 arg = uint64(VaUint32(args)) 216 case modL, modLL, mod64: 217 arg = VaUint64(args) 218 case modH: 219 arg = uint64(uint16(VaInt32(args))) 220 case modHH: 221 arg = uint64(uint8(VaInt32(args))) 222 case mod32: 223 arg = uint64(VaInt32(args)) 224 default: 225 panic(todo("", mod)) 226 } 227 228 if arg == 0 && hasPrecision && prec == 0 { 229 break 230 } 231 232 if hasPrecision { 233 panic(todo("", prec)) 234 } 235 236 f := spec + "o" 237 str = fmt.Sprintf(f, arg) 238 case 'I': 239 if !isWindows { 240 panic(todo("%#U", c)) 241 } 242 243 format++ 244 switch c = *(*byte)(unsafe.Pointer(format)); c { 245 case 'x', 'X': 246 // https://docs.microsoft.com/en-us/windows/win32/api/winuser/nf-winuser-wsprintfa 247 // 248 // Ix, IX 249 // 250 // 64-bit unsigned hexadecimal integer in lowercase or uppercase on 64-bit 251 // platforms, 32-bit unsigned hexadecimal integer in lowercase or uppercase on 252 // 32-bit platforms. 253 if unsafe.Sizeof(int(0)) == 4 { 254 mod = mod32 255 } 256 case '3': 257 // https://en.wikipedia.org/wiki/Printf_format_string#Length_field 258 // 259 // I32 For integer types, causes printf to expect a 32-bit (double word) integer argument. 260 format++ 261 switch c = *(*byte)(unsafe.Pointer(format)); c { 262 case '2': 263 format++ 264 mod = mod32 265 goto more 266 default: 267 panic(todo("%#U", c)) 268 } 269 case '6': 270 // https://en.wikipedia.org/wiki/Printf_format_string#Length_field 271 // 272 // I64 For integer types, causes printf to expect a 64-bit (quad word) integer argument. 273 format++ 274 switch c = *(*byte)(unsafe.Pointer(format)); c { 275 case '4': 276 format++ 277 mod = mod64 278 goto more 279 default: 280 panic(todo("%#U", c)) 281 } 282 default: 283 panic(todo("%#U", c)) 284 } 285 fallthrough 286 case 'X': 287 fallthrough 288 case 'x': 289 // The unsigned int argument is converted to unsigned hexadecimal notation. 290 // The letters abcdef are used for x conversions; the letters ABCDEF are used 291 // for X conversions. The precision, if any, gives the minimum number of 292 // digits that must appear; if the converted value requires fewer digits, it is 293 // padded on the left with zeros. The default precision is 1. When 0 is 294 // printed with an explicit precision 0, the output is empty. 295 format++ 296 var arg uint64 297 if isWindows && mod == modL { 298 mod = modNone 299 } 300 switch mod { 301 case modNone: 302 arg = uint64(VaUint32(args)) 303 case modL, modLL, mod64: 304 arg = VaUint64(args) 305 case modH: 306 arg = uint64(uint16(VaInt32(args))) 307 case modHH: 308 arg = uint64(uint8(VaInt32(args))) 309 case mod32: 310 arg = uint64(VaInt32(args)) 311 default: 312 panic(todo("", mod)) 313 } 314 315 if arg == 0 && hasPrecision && prec == 0 { 316 break 317 } 318 319 if strings.Contains(spec, "#") && arg == 0 { 320 spec = strings.ReplaceAll(spec, "#", "") 321 } 322 var f string 323 switch { 324 case hasPrecision: 325 f = fmt.Sprintf("%s.%d%c", spec, prec, c) 326 default: 327 f = spec + string(c) 328 } 329 str = fmt.Sprintf(f, arg) 330 case 'e', 'E': 331 // The double argument is rounded and converted in the style [-]d.ddde±dd where 332 // there is one digit before the decimal-point character and the number of 333 // digits after it is equal to the precision; if the precision is missing, it 334 // is taken as 6; if the precision is zero, no decimal-point character appears. 335 // An E conversion uses the letter E (rather than e) to intro‐ duce the 336 // exponent. The exponent always contains at least two digits; if the value is 337 // zero, the exponent is 00. 338 format++ 339 arg := VaFloat64(args) 340 if !hasPrecision { 341 prec = 6 342 } 343 f := fmt.Sprintf("%s.%d%c", spec, prec, c) 344 str = fmt.Sprintf(f, arg) 345 case 'f', 'F': 346 // The double argument is rounded and converted to decimal notation in the 347 // style [-]ddd.ddd, where the number of digits after the decimal-point 348 // character is equal to the precision specification. If the precision 349 // is missing, it is taken as 6; if the precision is explicitly zero, no 350 // decimal-point character appears. If a decimal point appears, at least one 351 // digit appears before it. 352 format++ 353 arg := VaFloat64(args) 354 if !hasPrecision { 355 prec = 6 356 } 357 f := fmt.Sprintf("%s.%d%c", spec, prec, c) 358 str = fixNanInf(fmt.Sprintf(f, arg)) 359 case 'G': 360 fallthrough 361 case 'g': 362 // The double argument is converted in style f or e (or F or E for G 363 // conversions). The precision specifies the number of significant digits. If 364 // the precision is missing, 6 digits are given; if the precision is zero, it 365 // is treated as 1. Style e is used if the exponent from its conversion is 366 // less than -4 or greater than or equal to the precision. Trailing zeros are 367 // removed from the fractional part of the result; a decimal point appears only 368 // if it is followed by at least one digit. 369 format++ 370 arg := VaFloat64(args) 371 if !hasPrecision { 372 prec = 6 373 } 374 if prec == 0 { 375 prec = 1 376 } 377 378 f := fmt.Sprintf("%s.%d%c", spec, prec, c) 379 str = fixNanInf(fmt.Sprintf(f, arg)) 380 case 's': 381 // If no l modifier is present: the const char * argument is expected to be a 382 // pointer to an array of character type (pointer to a string). Characters 383 // from the array are written up to (but not including) a terminating null byte 384 // ('\0'); if a precision is specified, no more than the number specified are 385 // written. If a precision is given, no null byte need be present; if 386 // the precision is not specified, or is greater than the size of the array, 387 // the array must contain a terminating null byte. 388 // 389 // If an l modifier is present: the const wchar_t * argument is expected 390 // to be a pointer to an array of wide characters. Wide characters from the 391 // array are converted to multibyte characters (each by a call to the 392 // wcrtomb(3) function, with a conversion state starting in the initial state 393 // before the first wide character), up to and including a terminating null 394 // wide character. The resulting multibyte characters are written up to 395 // (but not including) the terminating null byte. If a precision is specified, 396 // no more bytes than the number specified are written, but no partial 397 // multibyte characters are written. Note that the precision determines the 398 // number of bytes written, not the number of wide characters or screen 399 // positions. The array must contain a terminating null wide character, 400 // unless a precision is given and it is so small that the number of bytes 401 // written exceeds it before the end of the array is reached. 402 format++ 403 arg := VaUintptr(args) 404 switch mod { 405 case modNone: 406 var f string 407 switch { 408 case hasPrecision: 409 f = fmt.Sprintf("%s.%ds", spec, prec) 410 str = fmt.Sprintf(f, GoString(arg)) 411 default: 412 f = spec + "s" 413 str = fmt.Sprintf(f, GoString(arg)) 414 } 415 default: 416 panic(todo("")) 417 } 418 case 'p': 419 // The void * pointer argument is printed in hexadecimal (as if by %#x or 420 // %#lx). 421 format++ 422 switch runtime.GOOS { 423 case "windows": 424 switch runtime.GOARCH { 425 case "386", "arm": 426 fmt.Fprintf(buf, "%08X", VaUintptr(args)) 427 default: 428 fmt.Fprintf(buf, "%016X", VaUintptr(args)) 429 } 430 default: 431 fmt.Fprintf(buf, "%#0x", VaUintptr(args)) 432 } 433 case 'c': 434 // If no l modifier is present, the int argument is converted to an unsigned 435 // char, and the resulting character is written. If an l modifier is present, 436 // the wint_t (wide character) ar‐ gument is converted to a multibyte sequence 437 // by a call to the wcrtomb(3) function, with a conversion state starting in 438 // the initial state, and the resulting multibyte string is writ‐ ten. 439 format++ 440 switch mod { 441 case modNone: 442 arg := VaInt32(args) 443 buf.WriteByte(byte(arg)) 444 default: 445 panic(todo("")) 446 } 447 case '%': 448 // A '%' is written. No argument is converted. The complete conversion 449 // specification is '%%'. 450 format++ 451 buf.WriteByte('%') 452 default: 453 panic(todo("%#U", c)) 454 } 455 456 buf.WriteString(str) 457 return format 458 } 459 460 // Field width 461 // 462 // An optional decimal digit string (with nonzero first digit) specifying a 463 // minimum field width. If the converted value has fewer characters than the 464 // field width, it will be padded with spa‐ ces on the left (or right, if the 465 // left-adjustment flag has been given). Instead of a decimal digit string one 466 // may write "*" or "*m$" (for some decimal integer m) to specify that the 467 // field width is given in the next argument, or in the m-th argument, 468 // respectively, which must be of type int. A negative field width is taken as 469 // a '-' flag followed by a positive field width. In no case does a 470 // nonexistent or small field width cause truncation of a field; if the result 471 // of a conversion is wider than the field width, the field is expanded to 472 // contain the conversion result. 473 func parseFieldWidth(format uintptr) (_ uintptr, n int, ok bool) { 474 first := true 475 for { 476 var digit int 477 switch c := *(*byte)(unsafe.Pointer(format)); { 478 case first && c == '0': 479 return format, n, ok 480 case first && c == '*': 481 panic(todo("")) 482 case c >= '0' && c <= '9': 483 format++ 484 ok = true 485 first = false 486 digit = int(c) - '0' 487 default: 488 return format, n, ok 489 } 490 491 n0 := n 492 n = 10*n + digit 493 if n < n0 { 494 panic(todo("")) 495 } 496 } 497 } 498 499 // Precision 500 // 501 // An optional precision, in the form of a period ('.') followed by an 502 // optional decimal digit string. Instead of a decimal digit string one may 503 // write "*" or "*m$" (for some decimal integer m) to specify that the 504 // precision is given in the next argument, or in the m-th argument, 505 // respectively, which must be of type int. If the precision is given as just 506 // '.', the precision is taken to be zero. A negative precision is taken 507 // as if the precision were omitted. This gives the minimum number of digits 508 // to appear for d, i, o, u, x, and X conversions, the number of digits to 509 // appear after the radix character for a, A, e, E, f, and F conversions, the 510 // maximum number of significant digits for g and G conversions, or the maximum 511 // number of characters to be printed from a string for s and S conversions. 512 func parsePrecision(format uintptr, args *uintptr) (_ uintptr, n int, ok bool) { 513 for { 514 switch c := *(*byte)(unsafe.Pointer(format)); c { 515 case '.': 516 format++ 517 first := true 518 for { 519 switch c := *(*byte)(unsafe.Pointer(format)); { 520 case first && c == '*': 521 format++ 522 n = int(VaInt32(args)) 523 return format, n, true 524 case c >= '0' && c <= '9': 525 format++ 526 first = false 527 n0 := n 528 n = 10*n + (int(c) - '0') 529 if n < n0 { 530 panic(todo("")) 531 } 532 default: 533 return format, n, true 534 } 535 } 536 default: 537 return format, 0, false 538 } 539 } 540 } 541 542 // Length modifier 543 // 544 // Here, "integer conversion" stands for d, i, o, u, x, or X conversion. 545 // 546 // hh A following integer conversion corresponds to a signed char or 547 // unsigned char argument, or a following n conversion corresponds to a pointer 548 // to a signed char argument. 549 // 550 // h A following integer conversion corresponds to a short int or unsigned 551 // short int argument, or a following n conversion corresponds to a pointer to 552 // a short int argument. 553 // 554 // l (ell) A following integer conversion corresponds to a long int or 555 // unsigned long int argument, or a following n conversion corresponds to a 556 // pointer to a long int argument, or a fol‐ lowing c conversion corresponds to 557 // a wint_t argument, or a following s conversion corresponds to a pointer to 558 // wchar_t argument. 559 // 560 // ll (ell-ell). A following integer conversion corresponds to a long long 561 // int or unsigned long long int argument, or a following n conversion 562 // corresponds to a pointer to a long long int argument. 563 // 564 // q A synonym for ll. This is a nonstandard extension, derived from BSD; 565 // avoid its use in new code. 566 // 567 // L A following a, A, e, E, f, F, g, or G conversion corresponds to a 568 // long double argument. (C99 allows %LF, but SUSv2 does not.) 569 // 570 // j A following integer conversion corresponds to an intmax_t or 571 // uintmax_t argument, or a following n conversion corresponds to a pointer to 572 // an intmax_t argument. 573 // 574 // z A following integer conversion corresponds to a size_t or ssize_t 575 // argument, or a following n conversion corresponds to a pointer to a size_t 576 // argument. 577 // 578 // Z A nonstandard synonym for z that predates the appearance of z. Do 579 // not use in new code. 580 // 581 // t A following integer conversion corresponds to a ptrdiff_t argument, 582 // or a following n conversion corresponds to a pointer to a ptrdiff_t 583 // argument. 584 585 func parseLengthModifier(format uintptr) (_ uintptr, n int) { 586 switch c := *(*byte)(unsafe.Pointer(format)); c { 587 case 'h': 588 format++ 589 n = modH 590 switch c := *(*byte)(unsafe.Pointer(format)); c { 591 case 'h': 592 format++ 593 n = modHH 594 } 595 return format, n 596 case 'l': 597 format++ 598 n = modL 599 switch c := *(*byte)(unsafe.Pointer(format)); c { 600 case 'l': 601 format++ 602 n = modLL 603 } 604 return format, n 605 case 'q': 606 panic(todo("")) 607 case 'L': 608 format++ 609 n = modLD 610 return format, n 611 case 'j': 612 panic(todo("")) 613 case 'z': 614 panic(todo("")) 615 case 'Z': 616 panic(todo("")) 617 case 't': 618 panic(todo("")) 619 default: 620 return format, 0 621 } 622 } 623 624 func fixNanInf(s string) string { 625 switch s { 626 case "NaN": 627 return "nan" 628 case "+Inf", "-Inf": 629 return "inf" 630 default: 631 return s 632 } 633 }