#1
  1. No Profile Picture
    Registered User
    Devshed Newbie (0 - 499 posts)

    Join Date
    Sep 2013
    Posts
    5
    Rep Power
    0

    Lexical Analysis


    This is my C code:

    #include <stdio.h>
    #include <stdlib.h>
    #include <stdarg.h>

    #define TRUE 1
    #define FALSE 0

    #define DEBUG 1 // 1 => Turn ON debugging, 0 => Turn OFF debugging

    void print_debug(const char * format, ...)
    {
    va_list args;
    if (DEBUG)
    {
    va_start (args, format);
    vfprintf (stdout, format, args);
    va_end (args);
    }
    }

    //----------------------------- token types ------------------------------
    #define KEYWORDS 12
    #define RESERVED 38
    #define VAR 1
    #define BEGIN 2
    #define END 3
    #define ASSIGN 4
    #define IF 5
    #define WHILE 6
    #define DO 7
    #define THEN 8
    #define PRINT 9
    #define INT 10
    #define REAL 11
    #define STRING 12
    #define PLUS 13
    #define MINUS 14
    #define UNDERSCORE 15
    #define DIV 16
    #define MULT 17
    #define EQUAL 18
    #define COLON 19
    #define COMMA 20
    #define SEMICOLON 21
    #define LBRAC 22
    #define RBRAC 23
    #define LPAREN 24
    #define RPAREN 25
    #define NOTEQUAL 26
    #define GREATER 27
    #define LESS 28
    #define LTEQ 29
    #define GTEQ 30
    #define LSHIFT 31
    #define RSHIFT 32
    #define DOT 33
    #define NUM 34
    #define INTID 35
    #define REALID 36
    #define ID 37
    #define ERROR 38

    //------------------- reserved words and token strings -----------------------
    char rel[]={'R','E','A','L'};
    char iden[]={'I','D'};
    char *reserved[] =
    { "",
    "VAR",
    "BEGIN",
    "END",
    "ASSIGN",
    "IF",
    "WHILE",
    "DO",
    "THEN",
    "PRINT",
    "INT",
    "REAL",
    "STRING",
    "+",
    "-",
    "_",
    "/",
    "*",
    "=",
    ":",
    ",",
    ";",
    "[",
    "]",
    "(",
    ")",
    "<>",
    ">",
    "<",
    "<=",
    ">=",
    "<<",
    ">>",
    ".",
    "NUM",
    "INTID",
    "REALID",
    "ID",
    "ERROR"
    };

    int printReserved(int ttype)
    {
    if (ttype <= RESERVED)
    {
    printf("%s\n",reserved[ttype]);
    return 1;
    }
    else
    return 0;
    }
    //---------------------------------------------------------

    //---------------------------------------------------------
    // Global Variables associated with the next input token
    // This implementation does not allow tokens that are more
    // than 100 characters long
    #define MAX_TOKEN_LENGTH 100



    char token[MAX_TOKEN_LENGTH]; // token string
    int tokenLength;
    int line_no = 0;

    //---------------------------------------------------------
    // Global Variables associated with the input buffer
    // This implementation does not allow lines that are more
    // than 500 characters long
    #define BUFFER_SIZE 500

    int buffer[BUFFER_SIZE];
    int start_pos = 0; // Signifies the start position in the buffer (#)
    int next_pos = 0; // Signifies the next character to be read (^)
    int maximal_pos = 0; // Signifies the last character of the maximal
    // token found (*)

    //---------------------------------------------------------
    // Functions associated with the input buffer

    void readNextLine()
    {
    int i;
    char c;
    start_pos = -1; // start position is the position just
    // before the beginning of the token string
    maximal_pos = -1; // maximal_position points to the end of the
    // maximal_matched token string
    next_pos = 0; // next_pos points to the next input character

    i = 0;
    c = getchar();
    while (c != '\n' && !feof(stdin) && (i < BUFFER_SIZE))
    {
    buffer[i] = c;
    c = getchar();
    i++;
    }
    if (i >= BUFFER_SIZE)
    {
    printf("Maximum allowed line length exceeded\n");
    exit(1);
    }
    else
    buffer[i] = EOF;
    line_no++;
    }

    /* you can use the following function for Debugging purposes */
    void printBuffer()
    {
    print_debug("BUFFER: ");
    int i = 0;
    while (buffer[i] != EOF)
    {
    print_debug("%c", (char) buffer[i]);
    i++;
    }
    print_debug("\n");
    print_debug("START_POS = %d\n", start_pos);
    print_debug("NEXT_POS = %d\n", next_pos);
    print_debug("MAXIMAL_POS = %d\n", maximal_pos);
    }

    int skip_space_and_set_positions()
    {
    while (isspace((char) buffer[next_pos]))
    {
    next_pos++;
    }
    maximal_pos = next_pos - 1;
    start_pos = next_pos - 1;
    }

    // this function copies the maximal token string to the token array
    // it assumes the token string is not empty
    void copy_token_string_from_buffer()
    {
    int i;
    tokenLength = 0;
    for (i = start_pos + 1; i <= maximal_pos; i++)
    {
    token[tokenLength] = (char) buffer[i];
    tokenLength++;
    }
    token[tokenLength] = '\0';
    }

    //----------------------------------------------------------

    // This function checks if a string is a keyword.
    // The implementation is not the most efficient, but probably
    // the simplest!
    int isKeyword(char *s)
    {
    int i;

    for (i = 1; i <= KEYWORDS; i++)
    if (strcmp(reserved[i],s) == 0)
    return i;
    return FALSE;
    }

    int scan_id_keyword()
    {
    int ttype;

    while (isalnum(buffer[next_pos]))
    {
    next_pos++;
    }
    maximal_pos = next_pos - 1;

    copy_token_string_from_buffer();

    ttype = isKeyword(token);

    if (ttype == 0)
    {
    ttype = ID;
    if(buffer[next_pos]='.')
    {
    int k = next_pos+1;

    if(isdigit(buffer[k]))
    {
    while(isdigit(buffer[k]))
    k++;
    if(isalpha(buffer[k]))
    {
    char temp[BUFFER_SIZE];
    int i,a=0;
    while(isalpha(buffer[k]))
    {
    temp[i]=buffer[k];
    i++;
    k++;
    }
    int r,m=0;
    for(m=0; m<sizeof(temp);m++ )
    {
    if(temp[m]==rel[m])
    r++;
    }

    if(r == sizeof(temp))
    {
    next_pos=k-1;
    maximal_pos=next_pos-1;
    ttype= REALID;
    copy_token_string_from_buffer();

    }

    for(m=0; m<sizeof(temp);m++ )
    {
    if(temp[m]==iden[m])
    a++;
    }
    if(sizeof(temp)==a)
    {
    next_pos=k-1;
    maximal_pos=next_pos-1;
    ttype= INTID;
    copy_token_string_from_buffer();

    }
    }
    }
    }

    return ttype;
    }

    return ttype;


    }

    // this function is called only if the next input character is a digit
    int scan_number()
    {
    int ttype;

    ttype = NUM;
    if (buffer[next_pos] != '0')
    while (isdigit(buffer[next_pos]))
    next_pos++;
    else // NUM is 0
    next_pos++;

    maximal_pos = next_pos - 1;

    copy_token_string_from_buffer();
    return ttype;
    }

    int getToken()
    {
    char c;
    int ttype;

    skip_space_and_set_positions();

    c = buffer[next_pos];
    next_pos++;

    switch (c)
    {
    case '.': ttype = DOT; break;
    case '+': ttype = PLUS; break;
    case '-': ttype = MINUS; break;
    case '_': ttype = UNDERSCORE; break;
    case '/': ttype = DIV; break;
    case '*': ttype = MULT; break;
    case '=': ttype = EQUAL; break;
    case ':': ttype = COLON; break;
    case ',': ttype = COMMA; break;
    case ';': ttype = SEMICOLON; break;
    case '[': ttype = LBRAC; break;
    case ']': ttype = RBRAC; break;
    case '(': ttype = LPAREN; break;
    case ')': ttype = RPAREN; break;
    case '<':
    if(buffer[next_pos]='<')
    {ttype = LSHIFT;
    next_pos++;
    break;}
    else if(buffer[next_pos]=='=')
    {ttype = LTEQ;
    next_pos++;
    break;}
    else if(buffer[next_pos]=='>')
    {ttype = NOTEQUAL;
    next_pos++;
    break;}
    else
    ttype = LESS;
    break;
    case '>':
    if(buffer[next_pos]=='=')
    {ttype = GTEQ;
    next_pos++;
    break;}
    else if(buffer[next_pos]=='>')
    {ttype = RSHIFT;
    next_pos++;
    break;}
    else
    ttype = GREATER ;
    break;
    default :
    if (isdigit(c))
    {
    next_pos--;
    ttype = scan_number();
    }
    else if (isalpha(c))
    {
    // token is either a keyword or ID or INTID or REALID
    next_pos--;
    ttype = scan_id_keyword();
    /*
    * TODO: Write code to handle INTID and REALID
    * It might be better to replace scan_id_keyword()
    * with a function that handles INTID and REALID
    * in addition to ID and keywords
    */
    }
    else if (c == EOF)
    ttype = EOF;
    else
    ttype = ERROR;
    break;
    } // End Switch
    return ttype;
    }

    int main()
    {
    int ttype;

    while (!feof(stdin))
    {
    readNextLine();
    // printBuffer();
    while ((ttype = getToken()) != EOF)
    {
    /* EOF indicates that we have reached the end of buffer */
    // printBuffer();
    printf("%d %d ", line_no, ttype);
    if ((ttype == NUM) || (ttype == ID) ||
    (ttype == INTID) || (ttype == REALID))
    {
    printf("%s \n", token);
    }
    else
    {
    printReserved(ttype);
    }
    }
    }
    return 0;
    }

    This code is working for the inputs 123 or 123.45.
    But it is not working for letter or a word. For Eg:
    If the input is "atreya" it is showing error as Segmentation Fault.

    Please look into this code and tell what is wrong with this.
    Any help would be appreciated.
    Thanks in advance
  2. #2
  3. Lord of the Dance
    Devshed Expert (3500 - 3999 posts)

    Join Date
    Oct 2003
    Posts
    3,534
    Rep Power
    1906
    Please use [code] tag as this will make it easier to read.

    Where in the code does the segmentation fault occur?
  4. #3
  5. No Profile Picture
    Registered User
    Devshed Newbie (0 - 499 posts)

    Join Date
    Sep 2013
    Posts
    5
    Rep Power
    0
    Hey MrFujin,

    Thanks for the tip. Will keep that in mind.
    I'm running the code in CentOS using the GCC compiler.
    The code is compiling properly.
    But only when the input consisting of letters is given, it shows errors.
    So I can't exactly point out where the code shows error.

    Thanks for your help.
  6. #4
  7. Contributing User
    Devshed Supreme Being (6500+ posts)

    Join Date
    Jan 2003
    Location
    USA
    Posts
    7,089
    Rep Power
    2222
    Originally Posted by fdrgiit
    The code is compiling properly.
    Does it? Really? Are you saying that it also does not throw any warnings? Because if you are getting warnings, then, no, the code is not compiling properly. In fact, the causes of many segfaults are reported by warnings, but the programmer claiming that his code "is compiling properly" is a fool who ignores warnings.

    When you invoke gcc, are you using the -Wall option? If not, then recompile with that option and correct all the warnings you get. I'm finishing my breakfast now and have to rush to work, so I won't be able to compile your code with -Wall until later. Save yourself some time and don't wait for me to perform the due diligence that you should be.

    PS
    If you're using C99 features, then I would not be able to check for warnings for you anyway.
  8. #5
  9. Contributing User
    Devshed Supreme Being (6500+ posts)

    Join Date
    Jan 2003
    Location
    USA
    Posts
    7,089
    Rep Power
    2222
    You know, it is truly amazing how utter predictable you people are. Over and over again, we have students proclaiming that their code "is compiling properly" when in fact it is not! For that matter, I cannot think of a single instance when the code they posted did not throw severe warnings.

    After correcting the location of some variable declarations (which were thankfully the only C99 features you used), here is what my MinGW gcc compiler using -Wall reported:
    C:TEST>gcc -Wall lex01.c
    lex01.c: In function `skip_space_and_set_positions':
    lex01.c:193: warning: implicit declaration of function `isspace'
    lex01.c:199: warning: control reaches end of non-void function
    lex01.c: In function `scan_id_keyword':
    lex01.c:235: warning: implicit declaration of function `isalnum'
    lex01.c:248: warning: suggest parentheses around assignment used as truth value
    lex01.c:252: warning: implicit declaration of function `isdigit'
    lex01.c:256: warning: implicit declaration of function `isalpha'
    lex01.c: In function `getToken':
    lex01.c:351: warning: suggest parentheses around assignment used as truth value

    C:TEST>
    Why didn't you include the header file, ctype.h, for those functions isspace, isalnum, isdigit, and isalpha? Never rely on implicit declarations!

    This warning,
    lex01.c:199: warning: control reaches end of non-void function
    , is because you declared that function to return an int, but then you did not return an int. You should never lie to the compiler:
    Originally Posted by Henry Spencer
    If you lie to the compiler, it will get its revenge.
    This series of warnings are about the same error you made twice:
    lex01.c:248: warning: suggest parentheses around assignment used as truth value
    lex01.c:351: warning: suggest parentheses around assignment used as truth value

    Those lines are:
    Line 248: if(buffer[next_pos]='.')
    Line 351: if(buffer[next_pos]='<')

    I think you should be able to see the problem there.

    Even though the warnings don't point directly to the cause of the SEGFAULT, they did uncover problems that would have driven you crazy trying to find them.

    Warnings are more important than error messages are. Always turn warnings on and up! Never ignore warnings!

    PS

    To find the SEGFAULT, you should run your program in the debugger.
    Last edited by dwise1_aol; September 3rd, 2013 at 02:31 PM. Reason: corrected error in header file name
  10. #6
  11. Contributing User
    Devshed Supreme Being (6500+ posts)

    Join Date
    Jan 2003
    Location
    USA
    Posts
    7,089
    Rep Power
    2222
    You neglected to tell us what happened when you would enter atreya. We need fairly specific information to be able to help you, so by withholding that information you are only hurting yourself.

    What we get is something like this:
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    4 38 ERROR
    and then it crashes. I'm quite certain that there was a lot more of the same output that preceded that, but my shell only buffers a finite number of lines.

    That is the output from main():
    Code:
            while ((ttype = getToken()) != EOF)
            {
                /*  EOF indicates that we have reached the end of buffer */
                // printBuffer();
                printf("%d %d ", line_no, ttype);
                if ((ttype == NUM) || (ttype == ID) || 
                    (ttype == INTID) || (ttype == REALID))
                {
                    printf("%s \n", token);
                }
                else
                {
                    printReserved(ttype);
                }
            }
    The first number is the input line number, the second is the token type, and the third is the reserved name of the token.

    That output is telling us two things:
    1. You're hitting an error, and
    2. You're stuck in an infinite loop. Apparently, that causes something to overflow within the console/terminal/shell; odd that it would happen both to Win7's cmd.exe and to your shell on CentOS.

    So where does ttype get set to ERROR? It's a local variable in main() that is set to the return value of GetToken(), which coincidentally returns the value of its own local variable named ttype -- nothing inherently wrong with reusing local variable names like that, except that it can cause confusion since you need to trace back and verify whether it is local or global; that is made worse by your use of global variables.

    Here is the code in GetToken() where ttype is set to ERROR:
    Code:
            default :
                if (isdigit(c))
                {
                    next_pos--;
                    ttype = scan_number();
                }
                else if (isalpha(c)) 
                {
                    // token is either a keyword or ID or INTID or REALID
                    next_pos--;
                    ttype = scan_id_keyword();
                    /* 
                     * TODO: Write code to handle INTID and REALID
                     * It might be better to replace scan_id_keyword() 
                     * with a function that handles INTID and REALID 
                     * in addition to ID and keywords
                     */
                }
                else if (c == EOF)
                  ttype = EOF;
                else
                  ttype = ERROR;
                break;
        }   // End Switch
        return ttype;
    }
    I find it curious that isalpha() did not return true. Do you think that might have anything to do with your not having #include'd its header file? BTW, I did not correct your code for this test. But what I do see is that each case changes the value of next_pos in order to advance to the next character in the input buffer. All except for EOF and ERROR.

    That means that if you hit an ERROR, then you get stuck in an infinite loop repeatedly testing the same character over and over again ad infinitum. Or until the shell blows up from under you.

    You need to come up with some method of error recovery, even if it's nothing more than to give up and not attempt to recover.

    BTW, when I do make the corrections indicated by the warnings, your program accepts "atreya" just fine.
    C:TEST>a
    324
    1 34 324
    atreya
    2 37 atreya
    ^Z

    C:TEST>
    But you do still need to look into finding a better way to handle an error.

    Remember:
    Always turn warnings on and up!
    Never ignore a warning.
    Warnings are much more important than error messages.

    Comments on this post

    • requinix agrees : I enjoyed reading this thread
    Last edited by dwise1_aol; September 3rd, 2013 at 02:37 PM.
  12. #7
  13. No Profile Picture
    Registered User
    Devshed Newbie (0 - 499 posts)

    Join Date
    Sep 2013
    Posts
    5
    Rep Power
    0
    Hey dwise1_aol,

    Thanks for your kind words.
    Actually I'm new to C language and that to execute this piece of code on Linux System. I know, I need to lear a lot. I'm trying my level best.
    So will surely keep these points mentioned by you in mind.
    Also I went through your replies and have one last error while debugging the code.
    This is my revised code as per directed by you:
    Code:
    #include <stdio.h>
    #include <stdlib.h>
    #include <stdarg.h>
    #include <ctype.h>
    #include <string.h>
    
    #define TRUE  1
    #define FALSE 0
    
    #define DEBUG 1     // 1 => Turn ON debugging, 0 => Turn OFF debugging
    
    void print_debug(const char * format, ...)
    {
        va_list args;
        if (DEBUG)
        {
            va_start (args, format);
            vfprintf (stdout, format, args);
            va_end (args);
        }
    }
    
    //----------------------------- token types ------------------------------
    #define KEYWORDS 	12
    #define RESERVED 	38
    #define VAR 		1
    #define BEGIN 		2
    #define END 		3
    #define ASSIGN 		4
    #define IF 			5
    #define WHILE 		6
    #define DO 			7
    #define THEN 		8
    #define PRINT 		9
    #define INT 		10
    #define REAL 		11
    #define STRING 		12
    #define PLUS 		13
    #define MINUS 		14
    #define UNDERSCORE 	15
    #define DIV 		16
    #define MULT 		17
    #define EQUAL 		18
    #define COLON 		19
    #define COMMA 		20
    #define SEMICOLON 	21
    #define LBRAC 		22
    #define RBRAC 		23
    #define LPAREN 		24
    #define RPAREN 		25
    #define NOTEQUAL 	26
    #define GREATER 	27
    #define LESS 		28
    #define LTEQ 		29
    #define GTEQ 		30
    #define LSHIFT 		31
    #define RSHIFT 		32
    #define DOT 		33
    #define NUM 		34
    #define INTID 		35rr
    #define REALID 		36
    #define ID 			37
    #define ERROR 		38
    
    //------------------- reserved words and token strings -----------------------
    char rel[4]={'R','E','A','L'};
    char iden[2]={'I','D'};
    char *reserved[] =
    	{	"",
    		"VAR",
    		"BEGIN",
    		"END",
    		"ASSIGN",
    		"IF",
    		"WHILE",
    		"DO",
    		"THEN",
    		"PRINT",
    		"INT",
    		"REAL",
    		"STRING",
    		"+",
    		"-",
    		"_",
    		"/",
    		"*",
    		"=",
    		":",
    		",",
    		";",
    		"[",
    		"]",
    		"(",
    		")",
    		"<>",
    		">",
    		"<",
    		"<=",
    		">=",
    		"<<",
    		">>",
    		".",
    		"NUM",
    		"INTID",
    		"REALID",
    		"ID",
    		"ERROR"
    		};
    
    int printReserved(int ttype)
    {
    	if (ttype <= RESERVED)
    	{
    		printf("%s\n",reserved[ttype]);
    		return 1;
    	}
    	else
    		return 0;
    }
    //---------------------------------------------------------
    
    //---------------------------------------------------------
    // Global Variables associated with the next input token
    // This implementation does not allow tokens that are more
    // than 100 characters long
    #define MAX_TOKEN_LENGTH 100
    
    char token[MAX_TOKEN_LENGTH];     // token string
    int  tokenLength;
    int  line_no = 0;
    
    //---------------------------------------------------------
    // Global Variables associated with the input buffer
    // This implementation does not allow lines that are more
    // than 500 characters long
    #define BUFFER_SIZE     500
    
    int buffer[BUFFER_SIZE];
    int start_pos   = 0;	// Signifies the start position in the buffer (#)
    int next_pos    = 0;	// Signifies the next character to be read (^)
    int maximal_pos = 0;	// Signifies the last character of the maximal 
                            // token found (*)
    
    //---------------------------------------------------------
    // Functions associated with the input buffer
    
    void readNextLine()
    {
    	int  i;
    	char c;
    	start_pos   = -1; // start position is the position just
                          // before the beginning of the token string
    	maximal_pos = -1; // maximal_position points to the end of the
                          // maximal_matched token string
    	next_pos    = 0;  // next_pos points to the next input character
    
    	i = 0;
    	c = getchar();
    	while (c != '\n' && !feof(stdin) && (i < BUFFER_SIZE))
    	{
    		buffer[i] = c;
    		c = getchar();
    		i++;
    	}
    	if (i >= BUFFER_SIZE)
    	{
    		printf("Maximum allowed line length exceeded\n");
    		exit(1);
    	}
    	else
    		buffer[i] = EOF;
    	line_no++;
    }
    
    /* you can use the following function for Debugging purposes */
    void printBuffer()
    {
    	print_debug("BUFFER: ");
    	int i = 0;
    	while (buffer[i] != EOF)
    	{
    		print_debug("%c", (char) buffer[i]);
    		i++;
    	}
    	print_debug("\n");
    	print_debug("START_POS   = %d\n", start_pos);
    	print_debug("NEXT_POS    = %d\n", next_pos);
    	print_debug("MAXIMAL_POS = %d\n", maximal_pos);
    }
    
    void skip_space_and_set_positions()
    {
    	while (isspace((char) buffer[next_pos]))
    	{
    		next_pos++;
    	}
    	maximal_pos = next_pos - 1;
    	start_pos   = next_pos - 1;
    }
    
    // this function copies the maximal token string to the token array
    // it assumes the token string is not empty
    void copy_token_string_from_buffer()
    {
    	int i;
    	tokenLength = 0;
    	for (i = start_pos + 1; i <= maximal_pos; i++)
    	{
    		token[tokenLength] = (char) buffer[i];
    		tokenLength++;
    	}
    	token[tokenLength] = '\0';
    }
    
    //----------------------------------------------------------
    
    // This function checks if a string is a keyword. 
    // The implementation is not the most efficient, but probably 
    // the simplest!
    int isKeyword(char *s)
    {
    	int i;
        
    	for (i = 1; i <= KEYWORDS; i++)
    		if (strcmp(reserved[i],s) == 0)
    	   		return i;
    	return FALSE;
    }
    
    int scan_id_keyword()
    {
    	int ttype;
    	int k = 0;
    	char temp[BUFFER_SIZE];
    	int i = 0;
    	int a = 0;
    	int r = 0;
    	int m = 0;
    	while (isalnum(buffer[next_pos]))
    	{
    		next_pos++;
    	}
    	maximal_pos = next_pos - 1;  
    	
    	copy_token_string_from_buffer();
    
    	ttype = isKeyword(token);
    	 
    	 if (ttype == 0)
    	 {
    	 ttype = ID;
    	 if(buffer[next_pos]== '.')
    	 {
    	    k = next_pos + 1;
    	   
    	    if(isdigit(buffer[k]))
    		{
    		  while(isdigit(buffer[k]))
    		  k++;
    		  if(isalpha(buffer[k]))
    		  {
     
    		    while(isalpha(buffer[k]))
    			{
    			temp[i] = buffer[k];
    			i++;
    			k++;
    		    }
    			for(m = 0; m<sizeof(temp);m++ )
    			{
    			if(temp[m] == rel[m])
    			r++;
    			}
    			
    			if(r == sizeof(temp))
    			{
    			  next_pos = k-1;
    			  maximal_pos = next_pos-1;
    			  ttype= REALID;
    			  copy_token_string_from_buffer();
    			  
    			}
    		     
    			for(m=0; m<sizeof(temp);m++ )
    			{
    			if(temp[m]==iden[m])
    			a++;
    			}
    		    if(sizeof(temp)==a)
    		     {
    		      next_pos=k-1;
    			  maximal_pos=next_pos-1;
    			  ttype= INTID;
    			  copy_token_string_from_buffer();
    			  
    		  }
    		  }
    		  }
    		  }
    		  
    		  }
    		   
    		return ttype;
    		
    		
    }
    
    // this function is called only if the next input character is a digit
    int scan_number()
    {
    	int ttype;
    
    	ttype = NUM;
    	if (buffer[next_pos] != '0')
    		while (isdigit(buffer[next_pos]))
    			next_pos++;
    	else // NUM is 0
    		next_pos++;
    
    	maximal_pos = next_pos - 1;
    
    	copy_token_string_from_buffer();
    	return ttype;
    }
    
    int getToken()
    {
    	char c;
    	int ttype;
    
    	skip_space_and_set_positions();
    
    	c = buffer[next_pos];
    	next_pos++;
    
    	switch (c)
    	{
    		case '.': ttype = DOT; break;
    		case '+': ttype = PLUS; break;
    		case '-': ttype = MINUS; break;
    		case '_': ttype = UNDERSCORE; break;
    		case '/': ttype = DIV; break;
    		case '*': ttype = MULT; break;
    		case '=': ttype = EQUAL; break;
    		case ':': ttype = COLON; break;
    		case ',': ttype = COMMA; break;
    		case ';': ttype = SEMICOLON; break;
    		case '[': ttype = LBRAC; break;
    		case ']': ttype = RBRAC; break;
    		case '(': ttype = LPAREN; break;
    		case ')': ttype = RPAREN; break;
    		case '<':
    		        if(buffer[next_pos]=='<')
    				{ttype = LSHIFT;
    				next_pos++;
    				break;}
    				else if(buffer[next_pos]=='=')
    				{ttype = LTEQ;
    				next_pos++;
    				break;}
    				else if(buffer[next_pos]=='>')
    				{ttype = NOTEQUAL;
    				next_pos++;
    				break;}
    				else 
    				ttype = LESS;
    				break;
    		case '>':	
    				if(buffer[next_pos]=='=')
    				{ttype = GTEQ;
    				next_pos++;
    				break;}
    				else if(buffer[next_pos]=='>')
    				{ttype = RSHIFT;
    				next_pos++;
    				break;}
    				else
    				ttype = GREATER ;
    				break;
    		default :
    			if (isdigit(c))
    			{
    				next_pos--;
    				ttype = scan_number();
    			}
    			else if (isalpha(c)) 
    			{
    				// token is either a keyword or ID or INTID or REALID
    				next_pos--;
    				ttype = scan_id_keyword();
    				/* 
    				 * TODO: Write code to handle INTID and REALID
    				 * It might be better to replace scan_id_keyword() 
    				 * with a function that handles INTID and REALID 
    				 * in addition to ID and keywords
    				 */
    			}
    			else if (c == EOF)
    			  ttype = EOF;
    			else
    			  ttype = ERROR;
    			break;
    	}	// End Switch
    	return ttype;
    }
    
    int main()
    {
    	int ttype;
    
    	while (!feof(stdin))
    	{
    		readNextLine();
    		// printBuffer();
    		while ((ttype = getToken()) != EOF)
    		{
    			/*  EOF indicates that we have reached the end of buffer */
    			// printBuffer();
    			printf("%d %d ", line_no, ttype);
    			if ((ttype == NUM) || (ttype == ID) || 
    				(ttype == INTID) || (ttype == REALID))
    			{
    				printf("%s \n", token);
    			}
    			else
    			{
    
    				printReserved(ttype);
    			}
    		}
    	}
    	return 0;
    }
    I'm debugging this code using the command: gcc -Wall try340.c
    The error I'm getting is:
    Code:
    try340.c:293:13: error: invalid suffix "rr" on integer constant 
    try340.c:421:15: error: invalid suffix "rr" on integer constant
    Please look into this and direct me as to how to avoid it.
    Thanks for helping me till now. Please do the needful.

    Thanks in advance.

    Regards,
    Abhinav SIngh
  14. #8
  15. Contributing User
    Devshed Supreme Being (6500+ posts)

    Join Date
    Jan 2003
    Location
    USA
    Posts
    7,089
    Rep Power
    2222
    The cause of the errors is INTID. Up in line 60, you defined it thus:
    Code:
    #define INTID 		35rr
    And there you see the "rr" mentioned in the error messages. As it turns out, there are some suffixes that are allowed on literals, such as f to make the literal a float, or l or L to make it long or u or U to make it unsigned, also ul/UL for unsigned long. The compiler thought that might be what you were trying to do, but "rr" is not a valid suffix. Remember that in order to understand many error messages and warnings, you need to understand what the compiler thought you were trying to tell it.

    Do you understand the pre-processor yet? Before the compiler starts parsing the source file, the pre-processor prepares it. One of the things that that the pre-processor does is to insert all the files indicated by the #include statements; those header files all become part of the source file when it is parsed and compiled.

    These errors stem from another thing that the pre-processor does. When you use #define, you are creating what's called a macro. The pre-processor uses those definitions to perform macro expansion. That means that wherever the macro name is found in the source code (eg, INTID), the pre-processor replaces that name quite literally with the macro definition (eg, 35rr). Then when the compiler finds an error or issues a warning, it doesn't know anything about the #define statement, but rather all it knows about is the code in which the macro had been expanded.

    For example, line 293,
    Code:
                  ttype= INTID;
    , becomes
    Code:
                  ttype= 35rr;
    and lines 420 and 421,
    Code:
                if ((ttype == NUM) || (ttype == ID) || 
                    (ttype == INTID) || (ttype == REALID))
    become
    Code:
                if ((ttype == 34) || (ttype == 37) || 
                    (ttype == 35rr) || (ttype == 36))
    . Can you see why these lines would raise errors?:
    Code:
                  ttype= 35rr;
    
     . . . 
    
                if ((ttype == 34) || (ttype == 37) || 
                    (ttype == 35rr) || (ttype == 36))
    When I correct the INTID macro definition, your code compiles cleanly, meaning that I get no errors and no warnings.

    Lesson that you learned here: when you get an error or warning in a line that contains a macro, expand that macro manually in order to see what the compiler saw.

    Here's an example that demonstrates another problem you could have with macros:
    Code:
    #define MAX_STR_LEN     80
    #define STR_SIZE     MAX_STR_LEN+1  // for declaring a string
    
        // calculate maximum number of characters allocated
        stringCount = 10;
        n = STR_SIZE * stringCount;
    The code looks right and it will compile cleanly, but it will give you the wrong answer. When you expand it you get this:
    Code:
        n = 80+1* stringCount;
    You expect to get 81 * stringCount, which is 810. Instead, you get 80+stringCount which is 90. The way to correct that is to use parentheses:
    Code:
    #define STR_SIZE     (MAX_STR_LEN+1)
    in which case that line of code would expand to this:
    Code:
        n = (80+1)* stringCount;
    while would give you the correct result of 810.

    It is common practice to base macros on other macros. For example:
    Code:
    #define ONE_SECOND     1           // one second
    #define ONE_MINUTE      (ONE_SECOND * 60)   // one minute
    #define ONE_HOUR     (ulong)(ONE_MINUTE * 60)   // one hour
    #define ONE_DAY         (ONE_HOUR * 24)  // one day
    When you use macros, you need to be aware of what they will expand to in each line of code that you use them in. That's part of the job.

    Comments on this post

    • b49P23TIvg agrees : Wow!
    Last edited by dwise1_aol; September 5th, 2013 at 11:24 AM.

IMN logo majestic logo threadwatch logo seochat tools logo