A Replacement For atoi() Using A State Machine

/* file getint.c
*
*  15-Jul-2002
*  (C) 2002 by Jack Klein
*  All rights reserved
*
*  License granted for free non-commercial use
*
*  Description:
*
*  this program demonstrates the design and use of a fully
*  safe function which operates in a manner similar to the
*  standard library function atoi() except that it does not
*  invoke undefined behavior if the text string to be
*  converted is not within the range of values representable
*  in a signed int
*
*  Purpose:
*
*  the primary purpose for this program is to display the
*  use of a simple state machine implemented in C
*
*  the state machine processes the characters in a string
*  sequentially and the action performed on each iteration
*  is determined by both the current state of the state
*  machine and the character being processed
*
*  usage:
*
*  int getint(const char *s);
*
*  input:
*
*     s is a pointer to a C string (an array of chars
*     terminated by '\0'), or NULL
*
*  output:
*
*     returns 0 if s is NULL or if the string cannot be
*     converted to a numeric value at all
*
*     return INT_MAX if the string represents a value larger
*     than INT_MAX
*
*     returns -INT_MAX if the string represents a value smaller
*     than -INT_MAX
*
*  notes:
*
*     a string which can be successfully converted can
*     contain zero or more leading white space characters
*     followed by zero or one sign characters ('+' or '-')
*     followed by zero or more decimal digit characters
*     '0' through '9'
*
*     once the optional sign character or the first digit
*     character is found, any non digit character ends
*     the conversion
*
*     this code takes advantage of the fact that both C
*     and C++ guarantee that the numerical values for the
*     characters '0' through '9' are consecutive and in
*     ascending order, guaranteeing that if ch is a char
*     for which isdigit() returns true, ch - '0' calculates
*     the integer value which the digit character represents
*/

#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <ctype.h>

/* the getint_type enum defines the states   */
/* for the state machine                     */

typedef enum
{
   SKIP_WHITE,
   GET_DIGITS,
   FINISHED
} getint_type;

/* the sign_type enum will be used to keep   */
/* track of the sign of the number being     */
/* generated                                 */

typedef enum
{
   POSITIVE,
   NEGATIVE
} sign_type;

int getint(const char *s)
{
   int limit = INT_MAX / 10;
   getint_type state = SKIP_WHITE;
   sign_type sign = POSITIVE;
   int value = 0;
   int ch;

   if (s != NULL)
   {
      while (state != FINISHED)
      {
         ch = (unsigned char)*s++;

         switch (state)
         {
            /* getint() starts out in the SKIP_WHITE  */
            /* state and remains in that state until  */
            /* a character is returned for which the  */
            /* the standard library function isspace()*/
            /* returns false                          */
            case SKIP_WHITE:

               /* ignore leading white space */
               if (isspace(ch))
               {
                  ;  /* do nothing at all!   */
               }

               /* haven't seen a sign or digit  */
               /* character yet so can accept   */
               /* either                        */
               else if (ch == '-')
               {
                  state = GET_DIGITS;
                  sign = NEGATIVE;
               }
               else if (ch == '+')
               {
                  state = GET_DIGITS;
               }
               else if (isdigit(ch))
               {
                  state = GET_DIGITS;
                  value = ch - '0';
               }

               /* the first non-space character */
               /* is not a sign or a digit so   */
               /* the string cannot be converted*/
               /* to an integer value           */
               else
               {
                  state = FINISHED;
               }
               break;

            /* once a sign or a digit character */
            /* has been found, nothing but digit*/
            /* characters are acceptable        */
            case GET_DIGITS:
               if (isdigit(ch))
               {
                  ch -= '0';

/* must make sure that another digit can be added to the */
/* total without overflow...                             */

/* if the current value is is less than INT_MAX / 10,    */
/* then it can be multiplied by 10 to make room for the  */
/* new digit without overflow and the product will be at */
/* least 10 less than INT_MAX so the new digit can be    */
/* added without overflow                                */

/* if value is already at INT_MAX / 10, it can still be  */
/* multiplied by 10 to make room for the new digit, but  */
/* the new digit might not fit without overflow, as in   */
/* the case of a 16 bit int implementation where INT_MAX */
/* is 32767 and the current value is equal to            */
/* INT_MAX / 10, which is 3276                           */

/* if the new digit is 7 or less, it can be added to the */
/* value but if it is 8 or 9 it would cause overflow     */
                  if ((value < limit) ||
                     ((INT_MAX - value * 10) >= ch))
                  {
                     value *= 10;
                     value += ch;
                  }
                  else
                  {
                     value = INT_MAX;
                     state = FINISHED;
                  }
               }

               /* now that a sign character or the */
               /* first digit has been found, any  */
               /* character which is not a digit   */
               /* ends the conversion              */
               else
               {
                  state = FINISHED;
               }
               break;

            case FINISHED:
            default:
               break;

         }     /* end of switch (state)   */
      }     /* end of while (state != FINISHED) */
   }     /* end of if (s != NULL)   */

   /* now see if there was a negative sign   */
   if (sign == NEGATIVE)
   {
      value *= -1;
   }

   return value;
}

int main(void)
{
   char in [25];

   for ( ; ; )
   {
      printf("\nEnter An Integer: ");
      fflush(stdout);

      if ((fgets(in, sizeof in, stdin)) == NULL)
      {
         break;
      }
      
      if (*in == '\n')
      {
         break;
      }

      printf("You entered %d\n", getint(in));
   }

   return 0;
}

[ Top ]
[ C And C++ Main Page ]
[ Home ]

©2002 By Jack Klein. All Rights Reserved.
All trademarks are acknowledged to belong to their respective owners.