patterncsharpMajor
A completely overkill BrainFuck lexer/parser
Viewed 0 times
completelyparseroverkilllexerbrainfuck
Problem
I'll need to build a syntax tree (AST) for Rubberduck, but since VBA has dozens of tokens and complex rules, I needed a simpler language to play with first, so I thought BrainFuck would be a perfect candidate.
The result is completely overkill for BF, but the exercise was very educational.
Lexer
The lexer reads the code as a string or stream input, and yields tokens - a trivia token can span multiple characters, instruction tokens are all single-character:
```
using BrainFuck.Tokens;
namespace BrainFuck
{
///
/// An object responsible for tokenizing an input stream.
///
public sealed class Lexer
{
///
/// Yields tokens from the input stream.
///
/// Any stream of BrainFuck source code.
public IEnumerable Tokenize(System.IO.Stream input)
{
var reader = new System.IO.StreamReader(input);
var currentTokenPosition = Span.Empty;
var currentTriviaSpan = Span.Empty;
var builder = new StringBuilder();
var tokenCount = 0;
while (reader.Peek() > 0)
{
var current = (char) reader.Read();
var next = (char) reader.Peek();
if (IsNewLine(current, next))
{
builder.Append(current);
currentTriviaSpan = currentTriviaSpan.NextLine;
currentTokenPosition = currentTokenPosition.NewLine;
if (Environment.NewLine.Length == 2)
{
current = (char) reader.Read();
builder.Append(current);
}
continue;
}
Token token;
if (IsToken(currentTokenPosition, tokenCount, current, out token))
{
// if we were building a trivia token, we need to yield it first:
if (builder.Length != 0)
The result is completely overkill for BF, but the exercise was very educational.
Lexer
The lexer reads the code as a string or stream input, and yields tokens - a trivia token can span multiple characters, instruction tokens are all single-character:
```
using BrainFuck.Tokens;
namespace BrainFuck
{
///
/// An object responsible for tokenizing an input stream.
///
public sealed class Lexer
{
///
/// Yields tokens from the input stream.
///
/// Any stream of BrainFuck source code.
public IEnumerable Tokenize(System.IO.Stream input)
{
var reader = new System.IO.StreamReader(input);
var currentTokenPosition = Span.Empty;
var currentTriviaSpan = Span.Empty;
var builder = new StringBuilder();
var tokenCount = 0;
while (reader.Peek() > 0)
{
var current = (char) reader.Read();
var next = (char) reader.Peek();
if (IsNewLine(current, next))
{
builder.Append(current);
currentTriviaSpan = currentTriviaSpan.NextLine;
currentTokenPosition = currentTokenPosition.NewLine;
if (Environment.NewLine.Length == 2)
{
current = (char) reader.Read();
builder.Append(current);
}
continue;
}
Token token;
if (IsToken(currentTokenPosition, tokenCount, current, out token))
{
// if we were building a trivia token, we need to yield it first:
if (builder.Length != 0)
Solution
Additional C#6.0 Modifications
How C#6.0 would you like it to be? You still have a few places you can use more C#6.0:
I'm going to work bottom-to-top in your post:
Usually, I don't support using expression-bodied members on
We'll start with:
As:
Sidebar: I don't see a need for a
Why not just use a property with a
We can do two things with this one:
First, replace the
Then, expression-body member that thing:
But, the next question is, why are you using
Nice and short.
Next, we'll look at:
You can make
There's one potential problem with this, and we can make it more C#6.0 at the same time, what happens if
Well, we can fix that with
The last thing I want to look at is the
Instead of that ugliness, we have a new slightly less ugliness:
```
private static readonly Dictionary> SyntaxTrees =
new Dictionary>
{
[TokenType.Trivia] = () => new TriviaSyntax(),
[TokenType.Increment] = () => new IncrementInstructionSyntax(),
[TokenType.Decrement] = () => new DecrementInstructionSyntax(),
[TokenType.MoveLeft] = () => new MoveLeftInstruct
How C#6.0 would you like it to be? You still have a few places you can use more C#6.0:
I'm going to work bottom-to-top in your post:
Usually, I don't support using expression-bodied members on
void methods, but this class would benefit from a few of them:public class ExecutionContext
{
public ExecutionContext(int memorySize = short.MaxValue, Func onInput = null)
{
_onInput = onInput;
_memory = new int[memorySize];
_stdOutput = new StringBuilder();
}
private readonly int[] _memory;
private readonly Func _onInput;
private readonly StringBuilder _stdOutput;
private int _pointer;
public int Pointer => _pointer;
public int Value => _memory[_pointer];
public string StdOut => _stdOutput.ToString();
public bool IsTrue(int position = -1)
{
return (position == -1 ? _memory[_pointer] : _memory[position]) != 0;
}
public void MoveLeft()
{
if (_pointer == 0)
{
_pointer = _memory.Length;
}
else
{
_pointer--;
}
}
public void MoveRight()
{
if (_pointer == _memory.Length)
{
_pointer = 0;
}
else
{
_pointer++;
}
}
public void Increment()
{
_memory[_pointer] += 1;
}
public void Decrement()
{
_memory[_pointer] -= 1;
}
public void Output()
{
_stdOutput.Append((char)_memory[_pointer]);
}
public void Input()
{
_memory[_pointer] = _onInput?.Invoke() ?? Console.Read();
}
}We'll start with:
public void Increment()
{
_memory[_pointer] += 1;
}
public void Decrement()
{
_memory[_pointer] -= 1;
}As:
public void Increment() => _memory[_pointer]++;
public void Decrement() => _memory[_pointer]--;Sidebar: I don't see a need for a
private field here:private int _pointer;
public int Pointer => _pointer;Why not just use a property with a
private setter?We can do two things with this one:
public bool IsTrue(int position = -1)
{
return (position == -1 ? _memory[_pointer] : _memory[position]) != 0;
}First, replace the
return with:return _memory[position == -1 ? _pointer : position] != 0;Then, expression-body member that thing:
public bool IsTrue(int position = -1) => _memory[position == -1 ? _pointer : position] != 0;But, the next question is, why are you using
-1 as a special case? Make it nullable and we can do one more thing with it:public bool IsTrue(int? position = null) => _memory[position ?? _pointer] != 0;Nice and short.
Next, we'll look at:
public abstract class InstructionSyntaxTree : SyntaxTree, IInstruction
{
protected abstract void ExecuteOnce(ExecutionContext context);
public virtual void Execute(ExecutionContext context)
{
// ReSharper disable once UnusedVariable; instruction is the same for every token unless method is overridden.
foreach (var instruction in Tokens)
{
ExecuteOnce(context);
}
}
public override string ToString()
{
return $"{GetType().Name} x{Tokens.Count()}";
}
}You can make
ToString() use expression-bodied members:public override string ToString() => $"{GetType().Name} x{Tokens.Count()}";There's one potential problem with this, and we can make it more C#6.0 at the same time, what happens if
context is null?public sealed class IncrementInstructionSyntax : InstructionSyntaxTree
{
protected override void ExecuteOnce(ExecutionContext context)
{
context.Increment();
}
}Well, we can fix that with
?. (as I assume you know):protected override void ExecuteOnce(ExecutionContext context) => context?.Increment();The last thing I want to look at is the
Dictionary, you can now use the new C#6.0 dictionary initializer syntax instead of the following:private static readonly Dictionary> SyntaxTrees =
new Dictionary>
{
{TokenType.Trivia, () => new TriviaSyntax()},
{TokenType.Increment, () => new IncrementInstructionSyntax()},
{TokenType.Decrement, () => new DecrementInstructionSyntax()},
{TokenType.MoveLeft, () => new MoveLeftInstructionSyntax()},
{TokenType.MoveRight, () => new MoveRightInstructionSyntax()},
{TokenType.Input, () => new InputInstructionSyntax()},
{TokenType.Output, () => new OutputInstructionSyntax()},
};Instead of that ugliness, we have a new slightly less ugliness:
```
private static readonly Dictionary> SyntaxTrees =
new Dictionary>
{
[TokenType.Trivia] = () => new TriviaSyntax(),
[TokenType.Increment] = () => new IncrementInstructionSyntax(),
[TokenType.Decrement] = () => new DecrementInstructionSyntax(),
[TokenType.MoveLeft] = () => new MoveLeftInstruct
Code Snippets
public class ExecutionContext
{
public ExecutionContext(int memorySize = short.MaxValue, Func<int> onInput = null)
{
_onInput = onInput;
_memory = new int[memorySize];
_stdOutput = new StringBuilder();
}
private readonly int[] _memory;
private readonly Func<int> _onInput;
private readonly StringBuilder _stdOutput;
private int _pointer;
public int Pointer => _pointer;
public int Value => _memory[_pointer];
public string StdOut => _stdOutput.ToString();
public bool IsTrue(int position = -1)
{
return (position == -1 ? _memory[_pointer] : _memory[position]) != 0;
}
public void MoveLeft()
{
if (_pointer == 0)
{
_pointer = _memory.Length;
}
else
{
_pointer--;
}
}
public void MoveRight()
{
if (_pointer == _memory.Length)
{
_pointer = 0;
}
else
{
_pointer++;
}
}
public void Increment()
{
_memory[_pointer] += 1;
}
public void Decrement()
{
_memory[_pointer] -= 1;
}
public void Output()
{
_stdOutput.Append((char)_memory[_pointer]);
}
public void Input()
{
_memory[_pointer] = _onInput?.Invoke() ?? Console.Read();
}
}public void Increment()
{
_memory[_pointer] += 1;
}
public void Decrement()
{
_memory[_pointer] -= 1;
}public void Increment() => _memory[_pointer]++;
public void Decrement() => _memory[_pointer]--;private int _pointer;
public int Pointer => _pointer;public bool IsTrue(int position = -1)
{
return (position == -1 ? _memory[_pointer] : _memory[position]) != 0;
}Context
StackExchange Code Review Q#145060, answer score: 23
Revisions (0)
No revisions yet.