patterncMinor
Implementation of Symbol Table in C
Viewed 0 times
symbolimplementationtable
Problem
Following is my code for implementing basic symbol table in C. Please review my code and tell where I can improve my program.
```
#include
#include
#include
#include
#include
#define BUFFER_SIZE 1024
#define TOKEN_SIZE 32
typedef struct symbol
{
char* identifierName;
char* datatype;
int offset;
struct symbol* next;
}symbol;
void insert(symbol** headRef,char identifier,char type)
{
symbol newnode = (symbol) malloc(sizeof(symbol));
static symbol* prevnode;
newnode->identifierName = (char) malloc((strlen(identifier)+1)sizeof(char));
newnode->datatype = (char) malloc((strlen(type)+1)sizeof(char));
if(*headRef==NULL)
{
newnode->offset=0;
prevnode = newnode;
}
strcpy(newnode->identifierName,identifier);
strcpy(newnode->datatype,type);
if(strcmp(newnode->datatype,"int") == 0)
(newnode->offset) = (prevnode->offset) + sizeof(int);
else if(strcmp("float",newnode->datatype) == 0)
(newnode->offset) = (prevnode->offset) + sizeof(float);
else if(strcmp("char",newnode->datatype) == 0)
(newnode->offset) = (prevnode->offset) + sizeof(char);
else if(strcmp("double",newnode->datatype) == 0)
(newnode->offset) = (prevnode->offset) + sizeof(double);
prevnode = newnode;
newnode->next = (*headRef);
(*headRef) = newnode;
}
void display(symbol* node)
{
while(node!=NULL)
{
printf("%s %s %d \n",node->identifierName,node->datatype,node->offset);
node = node->next;
}
}
bool isIdentifier(const char* token)
{
if(isalpha(token[0]) || token[0]=='_')
{
for(int i=1;token[i]!=(char)NULL;i++)
{
if(!isalnum(token[i]))
return false;
}
return true;
}
return false;
}
bool isDatatype(const char* token)
{
const char *datatype[]={"char","double","float","int"};
for(size_t i=0;i2)
{
fprintf(stderr,"Too many arguments for read to perform.
```
#include
#include
#include
#include
#include
#define BUFFER_SIZE 1024
#define TOKEN_SIZE 32
typedef struct symbol
{
char* identifierName;
char* datatype;
int offset;
struct symbol* next;
}symbol;
void insert(symbol** headRef,char identifier,char type)
{
symbol newnode = (symbol) malloc(sizeof(symbol));
static symbol* prevnode;
newnode->identifierName = (char) malloc((strlen(identifier)+1)sizeof(char));
newnode->datatype = (char) malloc((strlen(type)+1)sizeof(char));
if(*headRef==NULL)
{
newnode->offset=0;
prevnode = newnode;
}
strcpy(newnode->identifierName,identifier);
strcpy(newnode->datatype,type);
if(strcmp(newnode->datatype,"int") == 0)
(newnode->offset) = (prevnode->offset) + sizeof(int);
else if(strcmp("float",newnode->datatype) == 0)
(newnode->offset) = (prevnode->offset) + sizeof(float);
else if(strcmp("char",newnode->datatype) == 0)
(newnode->offset) = (prevnode->offset) + sizeof(char);
else if(strcmp("double",newnode->datatype) == 0)
(newnode->offset) = (prevnode->offset) + sizeof(double);
prevnode = newnode;
newnode->next = (*headRef);
(*headRef) = newnode;
}
void display(symbol* node)
{
while(node!=NULL)
{
printf("%s %s %d \n",node->identifierName,node->datatype,node->offset);
node = node->next;
}
}
bool isIdentifier(const char* token)
{
if(isalpha(token[0]) || token[0]=='_')
{
for(int i=1;token[i]!=(char)NULL;i++)
{
if(!isalnum(token[i]))
return false;
}
return true;
}
return false;
}
bool isDatatype(const char* token)
{
const char *datatype[]={"char","double","float","int"};
for(size_t i=0;i2)
{
fprintf(stderr,"Too many arguments for read to perform.
Solution
I transcribed the comments from @user3629249 into this community wiki answer:
-
Regarding this line:
-
Regarding this line:
To avoid having the execution re-create this every time the function is called, add a
-
Regarding:
On every call to
-
In general, when checking the parameters (as in the
-
When a system function returns an error indication, such as the returned value from
-
When
-
Due to each new token being inserted at the beginning of the linked list, when the list is displayed, the tokens will be displayed in the reverse order. Is this what you want?
-
A symbol table needs to contain info on any modifiers being applied to a symbol. including when a modifier is a function name, a returned value, a returned type, etc etc linked values, such as struct's enum's, union's, etc need links between the type, the name, and the associated fields with their types, names, and order. The current code handles none of that.
-
The
-
The max token length is set to 31 characters+string delimiter. what is this code was reading a C source file? while only the first 32 characters may be significant, an identifier can be much longer. the code will overrun the token buffer in such an instance, resulting in undefined behaviour, leading to a seg fault event.
-
Regarding this line:
symbol* newnode = (symbol*) malloc(sizeof(symbol));- In C, do not cast the returned value from malloc and family of functions. This is because the returned value is a
void*, so can be assigned to any other pointer and maintenance becomes much easier as there is no need to fix the casting.
- Always check (!=NULL) the returned value to assure the operation was successful.
- The expression
sizeof(char)is always 1, so adds nothing to the value passed tomalloc()
-
Regarding this line:
const char *datatype[]={"char","double","float","int"};To avoid having the execution re-create this every time the function is called, add a
static modifier. I.E. static const char *datatype[]={"char","double","float","int"};-
Regarding:
char *token = strtok(statement,delimiters)On every call to
strtok(), always check (!=NULL) the returned value to assure the operation was successful. A failure of the call to strtok() will result in isIdentifier() trying to read from address 0. This is undefined behaviour and can lead to a seg fault event.-
In general, when checking the parameters (as in the
openfile() function) When the code finds a problem with the command line parameters, it should display a usage statement, so the user is informed of exactly what they should have done.-
When a system function returns an error indication, such as the returned value from
fopen(), the next line of code should be perror(); so the related system error message gets output to stderr.-
When
#defineing numeric values, always wrap the numeric value with parens so there are no 'text replacement' errors.-
Due to each new token being inserted at the beginning of the linked list, when the list is displayed, the tokens will be displayed in the reverse order. Is this what you want?
-
A symbol table needs to contain info on any modifiers being applied to a symbol. including when a modifier is a function name, a returned value, a returned type, etc etc linked values, such as struct's enum's, union's, etc need links between the type, the name, and the associated fields with their types, names, and order. The current code handles none of that.
-
The
isDeclaration() function will treat parens, braces, and other similar items incorrectly. The parsing needs to keep a 'state' history, so as to know how to treat the next token. (all part of lexical and syntax analysis) The posted code does not properly handle strings in double quotes and single quotes. The posted code does not handle a '\' at the end of an input line.-
The max token length is set to 31 characters+string delimiter. what is this code was reading a C source file? while only the first 32 characters may be significant, an identifier can be much longer. the code will overrun the token buffer in such an instance, resulting in undefined behaviour, leading to a seg fault event.
Code Snippets
symbol* newnode = (symbol*) malloc(sizeof(symbol));const char *datatype[]={"char","double","float","int"};char *token = strtok(statement,delimiters)Context
StackExchange Code Review Q#101694, answer score: 2
Revisions (0)
No revisions yet.