HiveBrain v1.2.0
Get Started
← Back to all entries
patternjavascriptMinor

Formatting XML strings in JavaScript for readability

Submitted by: @import:stackexchange-codereview··
0
Viewed 0 times
formattingjavascriptreadabilityxmlforstrings

Problem

I've created a vanilla JS function to indent XML strings so they can be more easily read. It uses some pretty nasty regex...yes, I know it's a cardinal sin for XML/HTML, but it works. For instance, this string...



...would look like this after being passed through the function:


    
        
    


Here's the function itself. What can I do to simplify it?

function formatXML(input) {

    // PART 1: Add \n where necessary
    // A) add \n between sets of angled brackets without content between them
    // B) remove \n between opening and closing tags of the same node if no content is between them
    // C) add \n between a self-closing set of angled brackets and the next set
    // D) split it into an array

    xmlString = input.trim()
        .replace(/>\s*\n].*>)\n(]+>|]+\/>)(]+>)/g,'$1\n$2');            
    xmlArr = xmlString.split('\n');

    // PART 2: indent each line appropriately

    var tabs = '';          //store the current indentation
    var start = 0;          //starting line
    if (/^.*|]\/>/.test(line)) { // if the line contains an entire node                
            // leave the store as is
            // add the tabs at the beginning of the line
            xmlArr[i] = tabs + line;
        } else { // if the line starts with an opening tag and does not contain an entire node                
            // add the tabs at the beginning of the line
            // and add one tab to the store
            xmlArr[i] = tabs + line;            
            tabs += '\t';
        }                    
    }

    //rejoin the array to a string and return it
    return xmlArr.join('\n');
}

Solution

I've been looking something like this, nice idea. But Your function couldn't parse text nodes. Try insert some text, example:

textNodeinnerText


My proposition for this problem is:

1) change spliting code in this way:

.replace( /(]*>)(?!|[\w\s])/g, "$1\n" ) //add \n after tag if not followed by the closing tag of pair or text node
.replace( /(]*>)/g, "$1\n") //add \n after closing tag
.replace( />\s+(.+?)\s+\n$1\n(.+?)\n$1\n\n<") //detect a header of XML


2) little modify third statement else if and add one more:

else if (//.test(line)) //if the line starts with an opening tag and does not contain an entire node
 {
  xmlArr[i] = tabs + line;  //add the tabs at the beginning of the line
  tabs += indent;  //and add one indent to the store
 }
 else  //if the line contain a text node
 {
  xmlArr[i] = tabs + line;  // add the tabs at the beginning of the line
 }


So finaly, our function will be:

function formatXML(input,indent)
{
  indent = indent || '\t'; //you can set/define other ident than tabs

  //PART 1: Add \n where necessary
  xmlString = input.replace(/^\s+|\s+$/g, '');  //trim it (just in case) {method trim() not working in IE8}

  xmlString = input
                   .replace( /(]*>)(?!|[\w\s])/g, "$1\n" ) //add \n after tag if not followed by the closing tag of pair or text node
                   .replace( /(]*>)/g, "$1\n") //add \n after closing tag
                   .replace( />\s+(.+?)\s+\n$1\n(.+?)\n$1\n\n.*|]\/>/.test(line))  //if the line contains an entire node
     {
      //leave the store as is
      xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
     }
     else if (//.test(line)) //if the line starts with an opening tag and does not contain an entire node
     {
      xmlArr[i] = tabs + line;  //add the tabs at the beginning of the line
      tabs += indent;  //and add one indent to the store
     }
     else  //if the line contain a text node
     {
      xmlArr[i] = tabs + line;  // add the tabs at the beginning of the line
     }
  }

  //PART 3: return formatted string (source)
  return  xmlArr.join('\n');  //rejoin the array to a string and return it
}

Code Snippets

<?xml version="1.0" encoding="UTF-8"?><soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/"><soapenv:Body>textNode<soapenv:temp>innerText</soapenv:temp><ns:temp><ns:getCourseResponse xmlns:ns="http://course.ws.blackboard" /></ns:temp></soapenv:Body></soapenv:Envelope>
.replace( /(<([a-zA-Z]+\b)[^>]*>)(?!<\/\2>|[\w\s])/g, "$1\n" ) //add \n after tag if not followed by the closing tag of pair or text node
.replace( /(<\/[a-zA-Z]+[^>]*>)/g, "$1\n") //add \n after closing tag
.replace( />\s+(.+?)\s+<(?!\/)/g, ">\n$1\n<") //add \n between sets of angled brackets and text node between them
.replace( />(.+?)<([a-zA-Z])/g, ">\n$1\n<$2") //add \n between angled brackets and text node between them
.replace(/\?></, "?>\n<") //detect a header of XML
else if (/<.*>/.test(line)) //if the line starts with an opening tag and does not contain an entire node
 {
  xmlArr[i] = tabs + line;  //add the tabs at the beginning of the line
  tabs += indent;  //and add one indent to the store
 }
 else  //if the line contain a text node
 {
  xmlArr[i] = tabs + line;  // add the tabs at the beginning of the line
 }
function formatXML(input,indent)
{
  indent = indent || '\t'; //you can set/define other ident than tabs


  //PART 1: Add \n where necessary
  xmlString = input.replace(/^\s+|\s+$/g, '');  //trim it (just in case) {method trim() not working in IE8}

  xmlString = input
                   .replace( /(<([a-zA-Z]+\b)[^>]*>)(?!<\/\2>|[\w\s])/g, "$1\n" ) //add \n after tag if not followed by the closing tag of pair or text node
                   .replace( /(<\/[a-zA-Z]+[^>]*>)/g, "$1\n") //add \n after closing tag
                   .replace( />\s+(.+?)\s+<(?!\/)/g, ">\n$1\n<") //add \n between sets of angled brackets and text node between them
                   .replace( />(.+?)<([a-zA-Z])/g, ">\n$1\n<$2") //add \n between angled brackets and text node between them
                   .replace(/\?></, "?>\n<") //detect a header of XML

  xmlArr = xmlString.split('\n');  //split it into an array (for analise each line separately)



  //PART 2: indent each line appropriately

  var tabs = '';  //store the current indentation
  var start = 0;  //starting line

  if (/^<[?]xml/.test(xmlArr[0]))  start++;  //if the first line is a header, ignore it

  for (var i = start; i < xmlArr.length; i++) //for each line
  {  
    var line = xmlArr[i].replace(/^\s+|\s+$/g, '');  //trim it (just in case)

    if (/^<[/]/.test(line))  //if the line is a closing tag
     {
      tabs = tabs.replace(indent, '');  //remove one indent from the store
      xmlArr[i] = tabs + line;  //add the tabs at the beginning of the line
     }
     else if (/<.*>.*<\/.*>|<.*[^>]\/>/.test(line))  //if the line contains an entire node
     {
      //leave the store as is
      xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
     }
     else if (/<.*>/.test(line)) //if the line starts with an opening tag and does not contain an entire node
     {
      xmlArr[i] = tabs + line;  //add the tabs at the beginning of the line
      tabs += indent;  //and add one indent to the store
     }
     else  //if the line contain a text node
     {
      xmlArr[i] = tabs + line;  // add the tabs at the beginning of the line
     }
  }


  //PART 3: return formatted string (source)
  return  xmlArr.join('\n');  //rejoin the array to a string and return it
}

Context

StackExchange Code Review Q#142712, answer score: 3

Revisions (0)

No revisions yet.