patternjavascriptMinor
Parsing function is 50 lines long
Viewed 0 times
longfunctionparsinglines
Problem
This is a parsing function that will at tildes (~) to end of search terms in certain circumstances.
Example an inputs and outputs:
Input: Output:
name:(john doe) name:(john~ doe~)
name:[andy TO charlie] name:[andy TO charlie]
john doe john~ doe~
james NOT jane james~ NOT jane
james NOT (james smith) james~ NOT (james smith)
james NOT jane smith james~ NOT jane smith~
name:"john doe" australia name:"john doe" australia~
```
function addTilde(string) {
if (!/[\[\[\]~"(NOT)\-\!\d\(\)(OR)(AND)\&\|\: ]/.test(string)) {
string = string.concat("~");
}
return string;
};
function fuzzQuery(rawQuery) {
/split the string into spaces, brackets, double quotes and words/
re = /(?=[()\[\] "])|(?=[^\W])\b/;
strSplit = rawQuery.split(re);
newQuery = "";
for (var i = 0; i < strSplit.length; i++) {
var s = strSplit[i];
var newElement = "";
/if it contains a [ or "/
if (s.indexOf("\x22") != -1 || s.indexOf("[") != -1) {
/determine closing symbol/
var closingSymbol;
if (s == "\x22") {
closingSymbol = "\x22";
newElement = newElement.concat(strSplit[i++]); /need to skip opening one for double quotes/
} else closingSymbol = "]";
/concat elements together until closing element found)/
do {
newElement = newElement.concat(strSplit[i]);
}
while (strSplit[i++] != closingSymbol)
}
/if it contains a NOT/
else if (s.indexOf("NOT") != -1) {
newElement = strSplit[i++]; /concat the NOT/
/concat any spaces/
while (strSplit[i] == " ") {
newElement = newElement.concat(strSplit[i++]);
}
if (strSplit[i] == "(") {
do {
newElement = ne
Example an inputs and outputs:
Input: Output:
name:(john doe) name:(john~ doe~)
name:[andy TO charlie] name:[andy TO charlie]
john doe john~ doe~
james NOT jane james~ NOT jane
james NOT (james smith) james~ NOT (james smith)
james NOT jane smith james~ NOT jane smith~
name:"john doe" australia name:"john doe" australia~
```
function addTilde(string) {
if (!/[\[\[\]~"(NOT)\-\!\d\(\)(OR)(AND)\&\|\: ]/.test(string)) {
string = string.concat("~");
}
return string;
};
function fuzzQuery(rawQuery) {
/split the string into spaces, brackets, double quotes and words/
re = /(?=[()\[\] "])|(?=[^\W])\b/;
strSplit = rawQuery.split(re);
newQuery = "";
for (var i = 0; i < strSplit.length; i++) {
var s = strSplit[i];
var newElement = "";
/if it contains a [ or "/
if (s.indexOf("\x22") != -1 || s.indexOf("[") != -1) {
/determine closing symbol/
var closingSymbol;
if (s == "\x22") {
closingSymbol = "\x22";
newElement = newElement.concat(strSplit[i++]); /need to skip opening one for double quotes/
} else closingSymbol = "]";
/concat elements together until closing element found)/
do {
newElement = newElement.concat(strSplit[i]);
}
while (strSplit[i++] != closingSymbol)
}
/if it contains a NOT/
else if (s.indexOf("NOT") != -1) {
newElement = strSplit[i++]; /concat the NOT/
/concat any spaces/
while (strSplit[i] == " ") {
newElement = newElement.concat(strSplit[i++]);
}
if (strSplit[i] == "(") {
do {
newElement = ne
Solution
You aren't using regular expressions to your advantage. Capture, don't split. Capturing helps you analyze the tokens you are interested in. Splitting just gets you the location of the delimiters.
Examples:
-
-
-
-
-
-
-
function fuzzQuery(rawQuery) {
"use strict";
// ( 1 ) ( 2 ) ( 3 ) ( 4 ) ( 5 ) ( 6 )
var re = /\s*(?:(NOT)\s+)?([a-z]+:)?(?:("[^"]*")|(\([^)]*\))|(\[[^\]]*\])|([a-z]+))\s*/g;
var matches;
var lastIndex = -1;
while (matches = re.exec(rawQuery)) {
var relOp = matches[1],
qualifier = matches[2],
quotedStr = matches[3],
parensStr = matches[4],
bracketStr = matches[5],
bareWord = matches[6];
lastIndex = re.lastIndex;
console.log("relOp=" + relOp +
", qualifier=" + qualifier +
", quotedStr=" + quotedStr +
", parensStr=" + parensStr +
", bracketStr=" + bracketStr +
", bareWord=" + bareWord);
}
if (lastIndex != rawQuery.length) {
console.log("Junk=" + rawQuery.substring(lastIndex));
}
}Examples:
-
name:(john doe)relOp=undefined, qualifier=name:, quotedStr=undefined, parensStr=(john doe), bracketStr=undefined, bareWord=undefined-
name:[andy TO charlie]relOp=undefined, qualifier=name:, quotedStr=undefined, parensStr=undefined, bracketStr=[andy TO charlie], bareWord=undefined-
john doerelOp=undefined, qualifier=undefined, quotedStr=undefined, parensStr=undefined, bracketStr=undefined, bareWord=john
relOp=undefined, qualifier=undefined, quotedStr=undefined, parensStr=undefined, bracketStr=undefined, bareWord=doe-
james NOT janerelOp=undefined, qualifier=undefined, quotedStr=undefined, parensStr=undefined, bracketStr=undefined, bareWord=james
relOp=NOT, qualifier=undefined, quotedStr=undefined, parensStr=undefined, bracketStr=undefined, bareWord=jane-
james NOT (james smith)relOp=undefined, qualifier=undefined, quotedStr=undefined, parensStr=undefined, bracketStr=undefined, bareWord=james
relOp=NOT, qualifier=undefined, quotedStr=undefined, parensStr=(james smith), bracketStr=undefined, bareWord=undefined-
james NOT jane smithrelOp=undefined, qualifier=undefined, quotedStr=undefined, parensStr=undefined, bracketStr=undefined, bareWord=james
relOp=NOT, qualifier=undefined, quotedStr=undefined, parensStr=undefined, bracketStr=undefined, bareWord=jane
relOp=undefined, qualifier=undefined, quotedStr=undefined, parensStr=undefined, bracketStr=undefined, bareWord=smith-
name:"john doe" australiarelOp=undefined, qualifier=undefined, quotedStr="john doe", parensStr=undefined, bracketStr=undefined, bareWord=undefined
relOp=undefined, qualifier=undefined, quotedStr=undefined, parensStr=undefined, bracketStr=undefined, bareWord=australiaCode Snippets
function fuzzQuery(rawQuery) {
"use strict";
// ( 1 ) ( 2 ) ( 3 ) ( 4 ) ( 5 ) ( 6 )
var re = /\s*(?:(NOT)\s+)?([a-z]+:)?(?:("[^"]*")|(\([^)]*\))|(\[[^\]]*\])|([a-z]+))\s*/g;
var matches;
var lastIndex = -1;
while (matches = re.exec(rawQuery)) {
var relOp = matches[1],
qualifier = matches[2],
quotedStr = matches[3],
parensStr = matches[4],
bracketStr = matches[5],
bareWord = matches[6];
lastIndex = re.lastIndex;
console.log("relOp=" + relOp +
", qualifier=" + qualifier +
", quotedStr=" + quotedStr +
", parensStr=" + parensStr +
", bracketStr=" + bracketStr +
", bareWord=" + bareWord);
}
if (lastIndex != rawQuery.length) {
console.log("Junk=" + rawQuery.substring(lastIndex));
}
}relOp=undefined, qualifier=name:, quotedStr=undefined, parensStr=(john doe), bracketStr=undefined, bareWord=undefinedrelOp=undefined, qualifier=name:, quotedStr=undefined, parensStr=undefined, bracketStr=[andy TO charlie], bareWord=undefinedrelOp=undefined, qualifier=undefined, quotedStr=undefined, parensStr=undefined, bracketStr=undefined, bareWord=john
relOp=undefined, qualifier=undefined, quotedStr=undefined, parensStr=undefined, bracketStr=undefined, bareWord=doerelOp=undefined, qualifier=undefined, quotedStr=undefined, parensStr=undefined, bracketStr=undefined, bareWord=james
relOp=NOT, qualifier=undefined, quotedStr=undefined, parensStr=undefined, bracketStr=undefined, bareWord=janeContext
StackExchange Code Review Q#39715, answer score: 3
Revisions (0)
No revisions yet.