A Replacement For atoi() Using A State Machine
/* file getint.c
*
* 15-Jul-2002
* (C) 2002 by Jack Klein
* All rights reserved
*
* License granted for free non-commercial use
*
* Description:
*
* this program demonstrates the design and use of a fully
* safe function which operates in a manner similar to the
* standard library function atoi() except that it does not
* invoke undefined behavior if the text string to be
* converted is not within the range of values representable
* in a signed int
*
* Purpose:
*
* the primary purpose for this program is to display the
* use of a simple state machine implemented in C
*
* the state machine processes the characters in a string
* sequentially and the action performed on each iteration
* is determined by both the current state of the state
* machine and the character being processed
*
* usage:
*
* int getint(const char *s);
*
* input:
*
* s is a pointer to a C string (an array of chars
* terminated by '\0'), or NULL
*
* output:
*
* returns 0 if s is NULL or if the string cannot be
* converted to a numeric value at all
*
* return INT_MAX if the string represents a value larger
* than INT_MAX
*
* returns -INT_MAX if the string represents a value smaller
* than -INT_MAX
*
* notes:
*
* a string which can be successfully converted can
* contain zero or more leading white space characters
* followed by zero or one sign characters ('+' or '-')
* followed by zero or more decimal digit characters
* '0' through '9'
*
* once the optional sign character or the first digit
* character is found, any non digit character ends
* the conversion
*
* this code takes advantage of the fact that both C
* and C++ guarantee that the numerical values for the
* characters '0' through '9' are consecutive and in
* ascending order, guaranteeing that if ch is a char
* for which isdigit() returns true, ch - '0' calculates
* the integer value which the digit character represents
*/
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <ctype.h>
/* the getint_type enum defines the states */
/* for the state machine */
typedef enum
{
SKIP_WHITE,
GET_DIGITS,
FINISHED
} getint_type;
/* the sign_type enum will be used to keep */
/* track of the sign of the number being */
/* generated */
typedef enum
{
POSITIVE,
NEGATIVE
} sign_type;
int getint(const char *s)
{
int limit = INT_MAX / 10;
getint_type state = SKIP_WHITE;
sign_type sign = POSITIVE;
int value = 0;
int ch;
if (s != NULL)
{
while (state != FINISHED)
{
ch = (unsigned char)*s++;
switch (state)
{
/* getint() starts out in the SKIP_WHITE */
/* state and remains in that state until */
/* a character is returned for which the */
/* the standard library function isspace()*/
/* returns false */
case SKIP_WHITE:
/* ignore leading white space */
if (isspace(ch))
{
; /* do nothing at all! */
}
/* haven't seen a sign or digit */
/* character yet so can accept */
/* either */
else if (ch == '-')
{
state = GET_DIGITS;
sign = NEGATIVE;
}
else if (ch == '+')
{
state = GET_DIGITS;
}
else if (isdigit(ch))
{
state = GET_DIGITS;
value = ch - '0';
}
/* the first non-space character */
/* is not a sign or a digit so */
/* the string cannot be converted*/
/* to an integer value */
else
{
state = FINISHED;
}
break;
/* once a sign or a digit character */
/* has been found, nothing but digit*/
/* characters are acceptable */
case GET_DIGITS:
if (isdigit(ch))
{
ch -= '0';
/* must make sure that another digit can be added to the */
/* total without overflow... */
/* if the current value is is less than INT_MAX / 10, */
/* then it can be multiplied by 10 to make room for the */
/* new digit without overflow and the product will be at */
/* least 10 less than INT_MAX so the new digit can be */
/* added without overflow */
/* if value is already at INT_MAX / 10, it can still be */
/* multiplied by 10 to make room for the new digit, but */
/* the new digit might not fit without overflow, as in */
/* the case of a 16 bit int implementation where INT_MAX */
/* is 32767 and the current value is equal to */
/* INT_MAX / 10, which is 3276 */
/* if the new digit is 7 or less, it can be added to the */
/* value but if it is 8 or 9 it would cause overflow */
if ((value < limit) ||
((INT_MAX - value * 10) >= ch))
{
value *= 10;
value += ch;
}
else
{
value = INT_MAX;
state = FINISHED;
}
}
/* now that a sign character or the */
/* first digit has been found, any */
/* character which is not a digit */
/* ends the conversion */
else
{
state = FINISHED;
}
break;
case FINISHED:
default:
break;
} /* end of switch (state) */
} /* end of while (state != FINISHED) */
} /* end of if (s != NULL) */
/* now see if there was a negative sign */
if (sign == NEGATIVE)
{
value *= -1;
}
return value;
}
int main(void)
{
char in [25];
for ( ; ; )
{
printf("\nEnter An Integer: ");
fflush(stdout);
if ((fgets(in, sizeof in, stdin)) == NULL)
{
break;
}
if (*in == '\n')
{
break;
}
printf("You entered %d\n", getint(in));
}
return 0;
}
©2002 By Jack Klein. All Rights Reserved.
All trademarks are acknowledged to belong to their respective owners.