Source file src/internal/strconv/atoi.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package strconv
     6  
     7  // lower(c) is a lower-case letter if and only if
     8  // c is either that lower-case letter or the equivalent upper-case letter.
     9  // Instead of writing c == 'x' || c == 'X' one can write lower(c) == 'x'.
    10  // Note that lower of non-letters can produce other non-letters.
    11  func lower(c byte) byte {
    12  	return c | ('x' - 'X')
    13  }
    14  
    15  type Error int
    16  
    17  const (
    18  	_ Error = iota
    19  	ErrRange
    20  	ErrSyntax
    21  	ErrBase
    22  	ErrBitSize
    23  )
    24  
    25  func (e Error) Error() string {
    26  	switch e {
    27  	case ErrRange:
    28  		return "value out of range"
    29  	case ErrSyntax:
    30  		return "invalid syntax"
    31  	case ErrBase:
    32  		return "invalid base"
    33  	case ErrBitSize:
    34  		return "invalid bit size"
    35  	}
    36  	return "unknown error"
    37  }
    38  
    39  const intSize = 32 << (^uint(0) >> 63)
    40  
    41  // IntSize is the size in bits of an int or uint value.
    42  const IntSize = intSize
    43  
    44  // ParseUint is like [ParseInt] but for unsigned numbers.
    45  //
    46  // A sign prefix is not permitted.
    47  func ParseUint(s string, base int, bitSize int) (uint64, error) {
    48  	const fnParseUint = "ParseUint"
    49  
    50  	if s == "" {
    51  		return 0, ErrSyntax
    52  	}
    53  
    54  	base0 := base == 0
    55  
    56  	s0 := s
    57  	switch {
    58  	case 2 <= base && base <= 36:
    59  		// valid base; nothing to do
    60  
    61  	case base == 0:
    62  		// Look for octal, hex prefix.
    63  		base = 10
    64  		if s[0] == '0' {
    65  			switch {
    66  			case len(s) >= 3 && lower(s[1]) == 'b':
    67  				base = 2
    68  				s = s[2:]
    69  			case len(s) >= 3 && lower(s[1]) == 'o':
    70  				base = 8
    71  				s = s[2:]
    72  			case len(s) >= 3 && lower(s[1]) == 'x':
    73  				base = 16
    74  				s = s[2:]
    75  			default:
    76  				base = 8
    77  				s = s[1:]
    78  			}
    79  		}
    80  
    81  	default:
    82  		return 0, ErrBase
    83  	}
    84  
    85  	if bitSize == 0 {
    86  		bitSize = IntSize
    87  	} else if bitSize < 0 || bitSize > 64 {
    88  		return 0, ErrBitSize
    89  	}
    90  
    91  	// Cutoff is the smallest number such that cutoff*base > maxUint64.
    92  	// Use compile-time constants for common cases.
    93  	var cutoff uint64
    94  	switch base {
    95  	case 10:
    96  		cutoff = maxUint64/10 + 1
    97  	case 16:
    98  		cutoff = maxUint64/16 + 1
    99  	default:
   100  		cutoff = maxUint64/uint64(base) + 1
   101  	}
   102  
   103  	maxVal := uint64(1)<<uint(bitSize) - 1
   104  
   105  	underscores := false
   106  	var n uint64
   107  	for _, c := range []byte(s) {
   108  		var d byte
   109  		switch {
   110  		case c == '_' && base0:
   111  			underscores = true
   112  			continue
   113  		case '0' <= c && c <= '9':
   114  			d = c - '0'
   115  		case 'a' <= lower(c) && lower(c) <= 'z':
   116  			d = lower(c) - 'a' + 10
   117  		default:
   118  			return 0, ErrSyntax
   119  		}
   120  
   121  		if d >= byte(base) {
   122  			return 0, ErrSyntax
   123  		}
   124  
   125  		if n >= cutoff {
   126  			// n*base overflows
   127  			return maxVal, ErrRange
   128  		}
   129  		n *= uint64(base)
   130  
   131  		n1 := n + uint64(d)
   132  		if n1 < n || n1 > maxVal {
   133  			// n+d overflows
   134  			return maxVal, ErrRange
   135  		}
   136  		n = n1
   137  	}
   138  
   139  	if underscores && !underscoreOK(s0) {
   140  		return 0, ErrSyntax
   141  	}
   142  
   143  	return n, nil
   144  }
   145  
   146  // ParseInt interprets a string s in the given base (0, 2 to 36) and
   147  // bit size (0 to 64) and returns the corresponding value i.
   148  //
   149  // The string may begin with a leading sign: "+" or "-".
   150  //
   151  // If the base argument is 0, the true base is implied by the string's
   152  // prefix following the sign (if present): 2 for "0b", 8 for "0" or "0o",
   153  // 16 for "0x", and 10 otherwise. Also, for argument base 0 only,
   154  // underscore characters are permitted as defined by the Go syntax for
   155  // [integer literals].
   156  //
   157  // The bitSize argument specifies the integer type
   158  // that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
   159  // correspond to int, int8, int16, int32, and int64.
   160  // If bitSize is below 0 or above 64, an error is returned.
   161  //
   162  // The errors that ParseInt returns have concrete type [*NumError]
   163  // and include err.Num = s. If s is empty or contains invalid
   164  // digits, err.Err = [ErrSyntax] and the returned value is 0;
   165  // if the value corresponding to s cannot be represented by a
   166  // signed integer of the given size, err.Err = [ErrRange] and the
   167  // returned value is the maximum magnitude integer of the
   168  // appropriate bitSize and sign.
   169  //
   170  // [integer literals]: https://go.dev/ref/spec#Integer_literals
   171  func ParseInt(s string, base int, bitSize int) (i int64, err error) {
   172  	const fnParseInt = "ParseInt"
   173  
   174  	if s == "" {
   175  		return 0, ErrSyntax
   176  	}
   177  
   178  	// Pick off leading sign.
   179  	neg := false
   180  	switch s[0] {
   181  	case '+':
   182  		s = s[1:]
   183  	case '-':
   184  		s = s[1:]
   185  		neg = true
   186  	}
   187  
   188  	// Convert unsigned and check range.
   189  	var un uint64
   190  	un, err = ParseUint(s, base, bitSize)
   191  	if err != nil && err != ErrRange {
   192  		return 0, err
   193  	}
   194  
   195  	if bitSize == 0 {
   196  		bitSize = IntSize
   197  	}
   198  
   199  	cutoff := uint64(1 << uint(bitSize-1))
   200  	if !neg && un >= cutoff {
   201  		return int64(cutoff - 1), ErrRange
   202  	}
   203  	if neg && un > cutoff {
   204  		return -int64(cutoff), ErrRange
   205  	}
   206  	n := int64(un)
   207  	if neg {
   208  		n = -n
   209  	}
   210  	return n, nil
   211  }
   212  
   213  // Atoi is equivalent to ParseInt(s, 10, 0), converted to type int.
   214  func Atoi(s string) (int, error) {
   215  	const fnAtoi = "Atoi"
   216  
   217  	sLen := len(s)
   218  	if intSize == 32 && (0 < sLen && sLen < 10) ||
   219  		intSize == 64 && (0 < sLen && sLen < 19) {
   220  		// Fast path for small integers that fit int type.
   221  		s0 := s
   222  		if s[0] == '-' || s[0] == '+' {
   223  			s = s[1:]
   224  			if len(s) < 1 {
   225  				return 0, ErrSyntax
   226  			}
   227  		}
   228  
   229  		n := 0
   230  		for _, ch := range []byte(s) {
   231  			ch -= '0'
   232  			if ch > 9 {
   233  				return 0, ErrSyntax
   234  			}
   235  			n = n*10 + int(ch)
   236  		}
   237  		if s0[0] == '-' {
   238  			n = -n
   239  		}
   240  		return n, nil
   241  	}
   242  
   243  	// Slow path for invalid, big, or underscored integers.
   244  	i64, err := ParseInt(s, 10, 0)
   245  	return int(i64), err
   246  }
   247  
   248  // underscoreOK reports whether the underscores in s are allowed.
   249  // Checking them in this one function lets all the parsers skip over them simply.
   250  // Underscore must appear only between digits or between a base prefix and a digit.
   251  func underscoreOK(s string) bool {
   252  	// saw tracks the last character (class) we saw:
   253  	// ^ for beginning of number,
   254  	// 0 for a digit or base prefix,
   255  	// _ for an underscore,
   256  	// ! for none of the above.
   257  	saw := '^'
   258  	i := 0
   259  
   260  	// Optional sign.
   261  	if len(s) >= 1 && (s[0] == '-' || s[0] == '+') {
   262  		s = s[1:]
   263  	}
   264  
   265  	// Optional base prefix.
   266  	hex := false
   267  	if len(s) >= 2 && s[0] == '0' && (lower(s[1]) == 'b' || lower(s[1]) == 'o' || lower(s[1]) == 'x') {
   268  		i = 2
   269  		saw = '0' // base prefix counts as a digit for "underscore as digit separator"
   270  		hex = lower(s[1]) == 'x'
   271  	}
   272  
   273  	// Number proper.
   274  	for ; i < len(s); i++ {
   275  		// Digits are always okay.
   276  		if '0' <= s[i] && s[i] <= '9' || hex && 'a' <= lower(s[i]) && lower(s[i]) <= 'f' {
   277  			saw = '0'
   278  			continue
   279  		}
   280  		// Underscore must follow digit.
   281  		if s[i] == '_' {
   282  			if saw != '0' {
   283  				return false
   284  			}
   285  			saw = '_'
   286  			continue
   287  		}
   288  		// Underscore must also be followed by digit.
   289  		if saw == '_' {
   290  			return false
   291  		}
   292  		// Saw non-digit, non-underscore.
   293  		saw = '!'
   294  	}
   295  	return saw != '_'
   296  }
   297  

View as plain text