patterncsharpMinor
LL(1) tokenizer for LISP
Viewed 0 times
lispfortokenizer
Problem
I am trying to write a LISP interpreter in C#, so I started with a tokenizer. I haven't finished it yet (have to handle floating point numbers & symbols), but I already rewrote it two times because I can't wasn't satisfied with design.
```
public class TokenizerException : System.ApplicationException
{
public TokenizerException() {}
public TokenizerException(string message) {}
public TokenizerException(string message, System.Exception inner) {}
// Constructor needed for serialization
// when exception propagates from a remoting server to the client.
protected TokenizerException(System.Runtime.Serialization.SerializationInfo info,
System.Runtime.Serialization.StreamingContext context) {}
}
public abstract class Token
{
public string val;
public Token(string val)
{
if(val != null) this.val = val;
else this.val = "";
}
}
class OpenParenToken: Token
{
public OpenParenToken(string value) : base(value) {}
}
class CloseParenToken: Token
{
public CloseParenToken(string value) : base(value) {}
}
class NumberToken: Token
{
public NumberToken(string value) : base(value) {}
}
class StringToken: Token
{
public StringToken(string value) : base(value) {}
}
class IdToken: Token
{
public IdToken(string value) : base(value) {}
}
class SymbolToken: Token
{
public SymbolToken(string value) : base(value) {}
}
public class Tokenizer
{
private const string parens = "([])";
private string code;
private char ch;
private object token;
private List tokens;
private int p = 0;
public Tokenizer(string code)
{
this.code = code;
tokens = new List();
}
private char getCh()
{
ch = code[p];
return ch;
}
public void DumpTokens()
{
foreach(object t in tokens)
{
Console.Write(" ");
}
Console.WriteLine();
}
private char NextCh()
{
if(p >= code.
```
public class TokenizerException : System.ApplicationException
{
public TokenizerException() {}
public TokenizerException(string message) {}
public TokenizerException(string message, System.Exception inner) {}
// Constructor needed for serialization
// when exception propagates from a remoting server to the client.
protected TokenizerException(System.Runtime.Serialization.SerializationInfo info,
System.Runtime.Serialization.StreamingContext context) {}
}
public abstract class Token
{
public string val;
public Token(string val)
{
if(val != null) this.val = val;
else this.val = "";
}
}
class OpenParenToken: Token
{
public OpenParenToken(string value) : base(value) {}
}
class CloseParenToken: Token
{
public CloseParenToken(string value) : base(value) {}
}
class NumberToken: Token
{
public NumberToken(string value) : base(value) {}
}
class StringToken: Token
{
public StringToken(string value) : base(value) {}
}
class IdToken: Token
{
public IdToken(string value) : base(value) {}
}
class SymbolToken: Token
{
public SymbolToken(string value) : base(value) {}
}
public class Tokenizer
{
private const string parens = "([])";
private string code;
private char ch;
private object token;
private List tokens;
private int p = 0;
public Tokenizer(string code)
{
this.code = code;
tokens = new List();
}
private char getCh()
{
ch = code[p];
return ch;
}
public void DumpTokens()
{
foreach(object t in tokens)
{
Console.Write(" ");
}
Console.WriteLine();
}
private char NextCh()
{
if(p >= code.
Solution
In adition to Snowbear's points:
1) I don't use an inheritance hierarchy for the token, instead I find it more convenient to use an enum property on the token to identify the type.
2) You might want to think about "reading" the whitespace as a token that simply isn't returned (this is an extension of Snowbear's point about not using recursion to read the next char).
3) LL(1) is a term that refers to parsers, not scanners (tokenisers).
4) I also like implement my scanners as an IEnumerator that takes the string to be scanned in the constructor ... but that's just a matter of personal taste. :)
1) I don't use an inheritance hierarchy for the token, instead I find it more convenient to use an enum property on the token to identify the type.
2) You might want to think about "reading" the whitespace as a token that simply isn't returned (this is an extension of Snowbear's point about not using recursion to read the next char).
3) LL(1) is a term that refers to parsers, not scanners (tokenisers).
4) I also like implement my scanners as an IEnumerator that takes the string to be scanned in the constructor ... but that's just a matter of personal taste. :)
Context
StackExchange Code Review Q#680, answer score: 8
Revisions (0)
No revisions yet.