patterncsharpMinor
My recursive parser is wet behind the ears (well, it's not DRY at least)
Viewed 0 times
theparserwetearsrecursivewelldrybehindleastnot
Problem
I have an issue opened on GitHub since mid-November, to refactor & DRY up the Rubberduck
Yes, I've written this ........crap. I'm guilty of Ctrl+C/Ctrl+V coding here, of which I'm very unproud, but I'm not sure if it's because I messed up or because it's just the way it has to be (please, no!).
A bit of VBA theory here. In VBA, a code line can be "continued" using a "line continuation character", like this:
The code module will give me 3 lines for the above, but it's really 1 "logical" code line, with one instruction that declares 3 variables. The
Parser module (the idea is to model the code in a VBA project or code module, so it can be browsed and inspected programmatically).Yes, I've written this ........crap. I'm guilty of Ctrl+C/Ctrl+V coding here, of which I'm very unproud, but I'm not sure if it's because I messed up or because it's just the way it has to be (please, no!).
[ComVisible(false)]
public class Parser
{
private readonly IEnumerable _grammar;
public Parser(IEnumerable grammar)
{
_grammar = grammar;
}
public SyntaxTreeNode Parse(VBProject project)
{
var nodes = new List();
try
{
var components = project.VBComponents.Cast().ToList();
foreach (var component in components)
{
var lineCount = component.CodeModule.CountOfLines;
if (lineCount
/// Converts VBA code into a .
///
/// The name of the VBA Project, used for scoping public nodes.
/// The name of the module, used for scoping private nodes.
/// The code to parse.
///
public SyntaxTreeNode Parse(string projectName, string componentName, string code, bool isClassModule)
{
var content = SplitLogicalCodeLines(projectName, componentName, code);
var memberNodes = ParseModuleMembers(projectName, componentName, content).ToList();
var result = new ModuleNode(projectName, componentName, memberNodes, isClassModule);
return result;
}A bit of VBA theory here. In VBA, a code line can be "continued" using a "line continuation character", like this:
Dim foo As Integer, _
bar As String, _
baz As SomethingElse
The code module will give me 3 lines for the above, but it's really 1 "logical" code line, with one instruction that declares 3 variables. The
SplitLogicalCodeLines method scans the VBA source code and returns a LogicalCodeLine for each, wellSolution
Extract methods, extract methods everywhere, PLEASE!
It's embarassing, after extracting the body of this code:
which appears in the exactly same form in
After this you should also extract the correspondent bit of code in
I would also swap your while in the ParseCodeBlock by a for and get rid of of the increments that are made in the existing while body, and give more meaning to what we are doing: processing all lines.
Refactored code on pastebin.
It's embarassing, after extracting the body of this code:
foreach (var instruction in instructions)
{
var parsed = false;
foreach (var syntax in grammar)
{
SyntaxTreeNode node;
if (!syntax.IsMatch(publicScope, localScope, instruction, out node))
{
continue;
}
if (node.HasChildNodes)
{
var childNode = node as CodeBlockNode;
if (childNode != null)
{
node = ParseCodeBlock(publicScope, localScope, childNode, logicalCodeLines, ref currentIndex);
}
}
result.AddNode(node);
parsed = true;
break;
}
if (!parsed)
{
result.AddNode(new ExpressionNode(instruction, localScope));
}
}which appears in the exactly same form in
ParseProcedure and in ParseCodeBlock, I could, of course re-use it and make the code more readable and mantainable:private bool ParseCodeBlockInstruction(string publicScope, string localScope, IEnumerable logicalLines,
Instruction instruction, CodeBlockNode codeBlockNode, IEnumerable grammar, ref int index)
{
var result = codeBlockNode;
foreach (var syntax in grammar)
{
SyntaxTreeNode node;
if (!syntax.IsMatch(publicScope, localScope, instruction, out node))
{
continue;
}
var childNode = node as CodeBlockNode;
if (childNode != null)
{
node = ParseCodeBlock(publicScope, localScope, childNode, logicalLines, ref index);
}
result.AddNode(node);
return true;
}
return false;
}After this you should also extract the correspondent bit of code in
ParseModuleMembers. This time the reason is because the logic is a bit hard to follow up, and that parsed flag that appeared everywhere in those three methods doesn't really help:private class ParsedSyntaxTreeNode
{
public SyntaxTreeNode Node { get; set; }
public bool Parsed { get; set; }
public int Index { get; set; }
}
private IEnumerable ParseModuleInstruction(string publicScope, string localScope, IEnumerable lines,
Instruction instruction, int index)
{
foreach (var syntax in _grammar.Where(s => !s.IsChildNodeSyntax))
{
string currentLocalScope = localScope;
SyntaxTreeNode node;
if (!syntax.IsMatch(publicScope, currentLocalScope, instruction, out node))
{
continue;
}
if (syntax.Type.HasFlag(SyntaxType.HasChildNodes))
{
var codeBlockNode = node as CodeBlockNode;
if (codeBlockNode != null)
{
if (node is ProcedureNode)
{
currentLocalScope = localScope + "." + (node as ProcedureNode).Identifier.Name;
var parsedProcNode = ParseProcedure(publicScope, currentLocalScope, node as ProcedureNode, lines, ref index);
yield return new ParsedSyntaxTreeNode()
{
Node = parsedProcNode,
Parsed = true,
Index = index
};
yield break;
}
var parsedCodeNode = ParseCodeBlock(publicScope, currentLocalScope, codeBlockNode, lines, ref index);
yield return new ParsedSyntaxTreeNode()
{
Node = parsedCodeNode,
Parsed = true,
Index = index
};
yield break;
}
}
yield return new ParsedSyntaxTreeNode()
{
Node = node,
Parsed = true,
Index = index
};
}
yield return new ParsedSyntaxTreeNode(){Index = index, Parsed = false};
}I would also swap your while in the ParseCodeBlock by a for and get rid of of the increments that are made in the existing while body, and give more meaning to what we are doing: processing all lines.
for (++index; index lines[indexCaptured].Content.Trim().StartsWith(marker)))
{
break;
}
var line = lines[index];
if (string.IsNullOrWhiteSpace(line.Content))
{
continue;
}
var instructions = line.SplitInstructions();
foreach (var instruction in instructions)
{
bool hadMatch = ParseCodeBlockInstruction(publicScope, localScope, logicalCodeLines, instruction, codeBlockNode, grammar, ref index);
if (!hadMatch)
{
result.AddNode(new ExpressionNode(instruction, localScope));
}
}
if (lines[index + 1].Content.Trim().StartsWith(ReservedKeywords.Else))
{
break;
}
}
index = Math.Max(index, lines.Length);Refactored code on pastebin.
Code Snippets
foreach (var instruction in instructions)
{
var parsed = false;
foreach (var syntax in grammar)
{
SyntaxTreeNode node;
if (!syntax.IsMatch(publicScope, localScope, instruction, out node))
{
continue;
}
if (node.HasChildNodes)
{
var childNode = node as CodeBlockNode;
if (childNode != null)
{
node = ParseCodeBlock(publicScope, localScope, childNode, logicalCodeLines, ref currentIndex);
}
}
result.AddNode(node);
parsed = true;
break;
}
if (!parsed)
{
result.AddNode(new ExpressionNode(instruction, localScope));
}
}private bool ParseCodeBlockInstruction(string publicScope, string localScope, IEnumerable<LogicalCodeLine> logicalLines,
Instruction instruction, CodeBlockNode codeBlockNode, IEnumerable<ISyntax> grammar, ref int index)
{
var result = codeBlockNode;
foreach (var syntax in grammar)
{
SyntaxTreeNode node;
if (!syntax.IsMatch(publicScope, localScope, instruction, out node))
{
continue;
}
var childNode = node as CodeBlockNode;
if (childNode != null)
{
node = ParseCodeBlock(publicScope, localScope, childNode, logicalLines, ref index);
}
result.AddNode(node);
return true;
}
return false;
}private class ParsedSyntaxTreeNode
{
public SyntaxTreeNode Node { get; set; }
public bool Parsed { get; set; }
public int Index { get; set; }
}
private IEnumerable<ParsedSyntaxTreeNode> ParseModuleInstruction(string publicScope, string localScope, IEnumerable<LogicalCodeLine> lines,
Instruction instruction, int index)
{
foreach (var syntax in _grammar.Where(s => !s.IsChildNodeSyntax))
{
string currentLocalScope = localScope;
SyntaxTreeNode node;
if (!syntax.IsMatch(publicScope, currentLocalScope, instruction, out node))
{
continue;
}
if (syntax.Type.HasFlag(SyntaxType.HasChildNodes))
{
var codeBlockNode = node as CodeBlockNode;
if (codeBlockNode != null)
{
if (node is ProcedureNode)
{
currentLocalScope = localScope + "." + (node as ProcedureNode).Identifier.Name;
var parsedProcNode = ParseProcedure(publicScope, currentLocalScope, node as ProcedureNode, lines, ref index);
yield return new ParsedSyntaxTreeNode()
{
Node = parsedProcNode,
Parsed = true,
Index = index
};
yield break;
}
var parsedCodeNode = ParseCodeBlock(publicScope, currentLocalScope, codeBlockNode, lines, ref index);
yield return new ParsedSyntaxTreeNode()
{
Node = parsedCodeNode,
Parsed = true,
Index = index
};
yield break;
}
}
yield return new ParsedSyntaxTreeNode()
{
Node = node,
Parsed = true,
Index = index
};
}
yield return new ParsedSyntaxTreeNode(){Index = index, Parsed = false};
}for (++index; index < lines.Length; ++index)
{
int indexCaptured = index;
if (result.EndOfBlockMarkers.Any(marker => lines[indexCaptured].Content.Trim().StartsWith(marker)))
{
break;
}
var line = lines[index];
if (string.IsNullOrWhiteSpace(line.Content))
{
continue;
}
var instructions = line.SplitInstructions();
foreach (var instruction in instructions)
{
bool hadMatch = ParseCodeBlockInstruction(publicScope, localScope, logicalCodeLines, instruction, codeBlockNode, grammar, ref index);
if (!hadMatch)
{
result.AddNode(new ExpressionNode(instruction, localScope));
}
}
if (lines[index + 1].Content.Trim().StartsWith(ReservedKeywords.Else))
{
break;
}
}
index = Math.Max(index, lines.Length);Context
StackExchange Code Review Q#74553, answer score: 6
Revisions (0)
No revisions yet.