patternjavascriptMinor
Formatting XML strings in JavaScript for readability
Viewed 0 times
formattingjavascriptreadabilityxmlforstrings
Problem
I've created a vanilla JS function to indent XML strings so they can be more easily read. It uses some pretty nasty regex...yes, I know it's a cardinal sin for XML/HTML, but it works. For instance, this string...
...would look like this after being passed through the function:
Here's the function itself. What can I do to simplify it?
...would look like this after being passed through the function:
Here's the function itself. What can I do to simplify it?
function formatXML(input) {
// PART 1: Add \n where necessary
// A) add \n between sets of angled brackets without content between them
// B) remove \n between opening and closing tags of the same node if no content is between them
// C) add \n between a self-closing set of angled brackets and the next set
// D) split it into an array
xmlString = input.trim()
.replace(/>\s*\n].*>)\n(]+>|]+\/>)(]+>)/g,'$1\n$2');
xmlArr = xmlString.split('\n');
// PART 2: indent each line appropriately
var tabs = ''; //store the current indentation
var start = 0; //starting line
if (/^.*|]\/>/.test(line)) { // if the line contains an entire node
// leave the store as is
// add the tabs at the beginning of the line
xmlArr[i] = tabs + line;
} else { // if the line starts with an opening tag and does not contain an entire node
// add the tabs at the beginning of the line
// and add one tab to the store
xmlArr[i] = tabs + line;
tabs += '\t';
}
}
//rejoin the array to a string and return it
return xmlArr.join('\n');
}Solution
I've been looking something like this, nice idea. But Your function couldn't parse text nodes. Try insert some text, example:
My proposition for this problem is:
1) change spliting code in this way:
2) little modify third statement
So finaly, our function will be:
textNodeinnerTextMy proposition for this problem is:
1) change spliting code in this way:
.replace( /(]*>)(?!|[\w\s])/g, "$1\n" ) //add \n after tag if not followed by the closing tag of pair or text node
.replace( /(]*>)/g, "$1\n") //add \n after closing tag
.replace( />\s+(.+?)\s+\n$1\n(.+?)\n$1\n\n<") //detect a header of XML2) little modify third statement
else if and add one more:else if (//.test(line)) //if the line starts with an opening tag and does not contain an entire node
{
xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
tabs += indent; //and add one indent to the store
}
else //if the line contain a text node
{
xmlArr[i] = tabs + line; // add the tabs at the beginning of the line
}So finaly, our function will be:
function formatXML(input,indent)
{
indent = indent || '\t'; //you can set/define other ident than tabs
//PART 1: Add \n where necessary
xmlString = input.replace(/^\s+|\s+$/g, ''); //trim it (just in case) {method trim() not working in IE8}
xmlString = input
.replace( /(]*>)(?!|[\w\s])/g, "$1\n" ) //add \n after tag if not followed by the closing tag of pair or text node
.replace( /(]*>)/g, "$1\n") //add \n after closing tag
.replace( />\s+(.+?)\s+\n$1\n(.+?)\n$1\n\n.*|]\/>/.test(line)) //if the line contains an entire node
{
//leave the store as is
xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
}
else if (//.test(line)) //if the line starts with an opening tag and does not contain an entire node
{
xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
tabs += indent; //and add one indent to the store
}
else //if the line contain a text node
{
xmlArr[i] = tabs + line; // add the tabs at the beginning of the line
}
}
//PART 3: return formatted string (source)
return xmlArr.join('\n'); //rejoin the array to a string and return it
}Code Snippets
<?xml version="1.0" encoding="UTF-8"?><soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/"><soapenv:Body>textNode<soapenv:temp>innerText</soapenv:temp><ns:temp><ns:getCourseResponse xmlns:ns="http://course.ws.blackboard" /></ns:temp></soapenv:Body></soapenv:Envelope>.replace( /(<([a-zA-Z]+\b)[^>]*>)(?!<\/\2>|[\w\s])/g, "$1\n" ) //add \n after tag if not followed by the closing tag of pair or text node
.replace( /(<\/[a-zA-Z]+[^>]*>)/g, "$1\n") //add \n after closing tag
.replace( />\s+(.+?)\s+<(?!\/)/g, ">\n$1\n<") //add \n between sets of angled brackets and text node between them
.replace( />(.+?)<([a-zA-Z])/g, ">\n$1\n<$2") //add \n between angled brackets and text node between them
.replace(/\?></, "?>\n<") //detect a header of XMLelse if (/<.*>/.test(line)) //if the line starts with an opening tag and does not contain an entire node
{
xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
tabs += indent; //and add one indent to the store
}
else //if the line contain a text node
{
xmlArr[i] = tabs + line; // add the tabs at the beginning of the line
}function formatXML(input,indent)
{
indent = indent || '\t'; //you can set/define other ident than tabs
//PART 1: Add \n where necessary
xmlString = input.replace(/^\s+|\s+$/g, ''); //trim it (just in case) {method trim() not working in IE8}
xmlString = input
.replace( /(<([a-zA-Z]+\b)[^>]*>)(?!<\/\2>|[\w\s])/g, "$1\n" ) //add \n after tag if not followed by the closing tag of pair or text node
.replace( /(<\/[a-zA-Z]+[^>]*>)/g, "$1\n") //add \n after closing tag
.replace( />\s+(.+?)\s+<(?!\/)/g, ">\n$1\n<") //add \n between sets of angled brackets and text node between them
.replace( />(.+?)<([a-zA-Z])/g, ">\n$1\n<$2") //add \n between angled brackets and text node between them
.replace(/\?></, "?>\n<") //detect a header of XML
xmlArr = xmlString.split('\n'); //split it into an array (for analise each line separately)
//PART 2: indent each line appropriately
var tabs = ''; //store the current indentation
var start = 0; //starting line
if (/^<[?]xml/.test(xmlArr[0])) start++; //if the first line is a header, ignore it
for (var i = start; i < xmlArr.length; i++) //for each line
{
var line = xmlArr[i].replace(/^\s+|\s+$/g, ''); //trim it (just in case)
if (/^<[/]/.test(line)) //if the line is a closing tag
{
tabs = tabs.replace(indent, ''); //remove one indent from the store
xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
}
else if (/<.*>.*<\/.*>|<.*[^>]\/>/.test(line)) //if the line contains an entire node
{
//leave the store as is
xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
}
else if (/<.*>/.test(line)) //if the line starts with an opening tag and does not contain an entire node
{
xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
tabs += indent; //and add one indent to the store
}
else //if the line contain a text node
{
xmlArr[i] = tabs + line; // add the tabs at the beginning of the line
}
}
//PART 3: return formatted string (source)
return xmlArr.join('\n'); //rejoin the array to a string and return it
}Context
StackExchange Code Review Q#142712, answer score: 3
Revisions (0)
No revisions yet.