HiveBrain v1.2.0
Get Started
← Back to all entries
patternjavaMinor

HTTP Authorization header parser

Submitted by: @import:stackexchange-codereview··
0
Viewed 0 times
parserauthorizationhttpheader

Problem

I'm writing a parser for HTTP Authorization header (see RFC2616#14.8 and RFC2617#1.2). Note that I explicitly don't care about the base64-encoded syntax used by HTTP Basic authentication. I'm only interested in the auth-param syntax used by Digest authentication (to be more specific, I'm implementing a custom Authorization header similar to this question on SO). Basically, it's just a list of key=value pairs separated by commas and value could be quoted or unquoted.

Here's my code, which seems to parse the examples from the RFC just fine:

```
package com.example.sample;

import java.util.HashMap;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class AuthorizationHeaderParser {
private static final String SEPARATORS = "()<>@,;:\\\\\"/\\[\\]?={} \t";
private static final Pattern TOKEN_PATTERN = Pattern
.compile("[[\\p{ASCII}]&&[^" + SEPARATORS + "]&&[^\\p{Cntrl}]]+");
private static final Pattern EQ_PATTERN = Pattern.compile("=");
private static final Pattern TOKEN_QUOTED_PATTERN = Pattern
.compile("\"([^\"]|\\\\\\p{ASCII})*\"");
private static final Pattern COMMA_PATTERN = Pattern.compile(",");
private static final Pattern LWS_PATTERN = Pattern
.compile("(\r?\n)?[ \t]+");

private static class Tokenizer {
private String remaining;

public Tokenizer(String input) {
remaining = input;
}

private void skipSpaces() {
Matcher m = LWS_PATTERN.matcher(remaining);
if (!m.lookingAt()) {
return;
}
String match = m.group();
remaining = remaining.substring(match.length());
}

public String match(Pattern p) {
skipSpaces();
Matcher m = p.matcher(remaining);
if (!m.lookingAt()) {
return null;
}
String match = m.group(

Solution

Going through your specific questions, I have the following suggestions:

  • Should you write a 'real' parser? - Depends. Parsers can be complicated, and they make assumptions. Regardless, you have already written your own parser, and it is 'real'.



  • This is the BIG question... is it right? - With regexes it is often hard to tell, and it requires careful analysis of the regex and the data to find out. I have looked at your code, and inspected the regex, and, frankly, it was more complicated than I could easily understand in one sitting.... (and without 'playing' with the code). So, is it right? I don't know.



  • Are the regexes too complicated (can they be simplified)? - yes, I would say yes to being too complicated, and unsure about whether they can be simplified.



  • Other suggestions? - yes, a few.... which leads on to:



OK, so what are the other suggestions.....

  • since you have a class called Tokenizer, it is apparent you are breaking the code in to tokens.... why don't you just use the tools in Java to do the work for you?



  • This problem is commonly solved with a State machine as well, which are sometimes much faster, and quite interesting.



So, as an exercise, I took your code, and implemented both a state-machine and a Scanner implementation. I have used statemachines in the past to parse comma-separated value files, and the process was very fast... I figured it made sense here too. The Scanner is more complicated than I would have hoped, but you may find the implementation to be educational (I did).

As for a review of your code.... I found it 'easier' to write it again myself, than to try to understand yours. In a sense, that says a lot.

```
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class AuthorizationHeaderParser {

/ *
* OP Mechanism
/

private static final String SEPARATORS = "()<>@,;:\\\\\"/\\[\\]?={} \t";

private static final Pattern TOKEN_PATTERN = Pattern
.compile("[[\\p{ASCII}]&&[^" + SEPARATORS + "]&&[^\\p{Cntrl}]]+");
private static final Pattern EQ_PATTERN = Pattern.compile("=");
private static final Pattern TOKEN_QUOTED_PATTERN = Pattern
.compile("\"([^\"]|\\\\\\p{ASCII})*\"");
private static final Pattern COMMA_PATTERN = Pattern.compile(",");
private static final Pattern LWS_PATTERN = Pattern
.compile("(\r?\n)?[ \t]+");

private static class Tokenizer {
private String remaining;

public Tokenizer(String input) {
remaining = input;
}

private void skipSpaces() {
Matcher m = LWS_PATTERN.matcher(remaining);
if (!m.lookingAt()) {
return;
}
String match = m.group();
remaining = remaining.substring(match.length());
}

public String match(Pattern p) {
skipSpaces();
Matcher m = p.matcher(remaining);
if (!m.lookingAt()) {
return null;
}
String match = m.group();
remaining = remaining.substring(match.length());
return match;
}

public String mustMatch(Pattern p) {
String match = match(p);
if (match == null) {
throw new NoSuchElementException();
}
return match;
}

public boolean hasMore() {
skipSpaces();
return remaining.length() > 0;
}

}

public static Map parse(String input) {
Tokenizer t = new Tokenizer(input);
Map map = new HashMap();

String authScheme = t.match(TOKEN_PATTERN);
map.put(":auth-scheme", authScheme);

while (true) {
while (t.match(COMMA_PATTERN) != null) {
// Skip null list elements
}

if (!t.hasMore()) {
break;
}

String key = t.mustMatch(TOKEN_PATTERN);
t.mustMatch(EQ_PATTERN);
String value = t.match(TOKEN_PATTERN);
if (value == null) {
value = t.mustMatch(TOKEN_QUOTED_PATTERN);
// trim quotes
value = value.substring(1, value.length() - 1);
}

map.put(key, value);

if (t.hasMore()) {
t.mustMatch(COMMA_PATTERN);
}

}
return map;
}

/ *
* State Machine Mechanism
/

private static enum ParseState{
PROLOGSPACE,
PROLOGWORD,
KEY,
KEYVALGAP,
VALUE,
QUOTEDVALUE,
SEPARATOR,
COMPLETE;
}

private static final String WHITESPAC

Code Snippets

import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class AuthorizationHeaderParser {


    /* ****************************************
     * OP Mechanism
     * **************************************** */

    private static final String SEPARATORS = "()<>@,;:\\\\\"/\\[\\]?={} \t";

    private static final Pattern TOKEN_PATTERN = Pattern
            .compile("[[\\p{ASCII}]&&[^" + SEPARATORS + "]&&[^\\p{Cntrl}]]+");
    private static final Pattern EQ_PATTERN = Pattern.compile("=");
    private static final Pattern TOKEN_QUOTED_PATTERN = Pattern
            .compile("\"([^\"]|\\\\\\p{ASCII})*\"");
    private static final Pattern COMMA_PATTERN = Pattern.compile(",");
    private static final Pattern LWS_PATTERN = Pattern
            .compile("(\r?\n)?[ \t]+");

    private static class Tokenizer {
        private String remaining;

        public Tokenizer(String input) {
            remaining = input;
        }

        private void skipSpaces() {
            Matcher m = LWS_PATTERN.matcher(remaining);
            if (!m.lookingAt()) {
                return;
            }
            String match = m.group();
            remaining = remaining.substring(match.length());
        }

        public String match(Pattern p) {
            skipSpaces();
            Matcher m = p.matcher(remaining);
            if (!m.lookingAt()) {
                return null;
            }
            String match = m.group();
            remaining = remaining.substring(match.length());
            return match;
        }

        public String mustMatch(Pattern p) {
            String match = match(p);
            if (match == null) {
                throw new NoSuchElementException();
            }
            return match;
        }

        public boolean hasMore() {
            skipSpaces();
            return remaining.length() > 0;
        }

    }

    public static Map<String, String> parse(String input) {
        Tokenizer t = new Tokenizer(input);
        Map<String, String> map = new HashMap<String, String>();

        String authScheme = t.match(TOKEN_PATTERN);
        map.put(":auth-scheme", authScheme);

        while (true) {
            while (t.match(COMMA_PATTERN) != null) {
                // Skip null list elements
            }

            if (!t.hasMore()) {
                break;
            }

            String key = t.mustMatch(TOKEN_PATTERN);
            t.mustMatch(EQ_PATTERN);
            String value = t.match(TOKEN_PATTERN);
            if (value == null) {
                value = t.mustMatch(TOKEN_QUOTED_PATTERN);
                // trim quotes
                value = value.substring(1, value.length() - 1);
            }

            map.put(key, value);

            if (t.hasMore()) {
                t.mustMatch(COMMA_PATTERN);
            }

        }
        return map;
  

Context

StackExchange Code Review Q#41270, answer score: 2

Revisions (0)

No revisions yet.