patterncppMinor
Serialization: Escape Input String
Viewed 0 times
inputserializationstringescape
Problem
In Json strings characters can be escaped with
Here is an iterator that can read such strings and convert the escaped characters to UTF-8
```
#ifndef THORSANVIL_SERIALIZATION_UNICODE_ITERATOR_H
#define THORSANVIL_SERIALIZATION_UNICODE_ITERATOR_H
#include
namespace ThorsAnvil
{
namespace Serialization
{
namespace
{
long convertHexToDec(char x)
{
if (x >= '0' && x = 'A' && x = 'a' && x
struct UnicodePushBackIterator: std::iterator
{
C& cont;
bool lastWasSlash;
int unicodeCount;
uint32_t unicodeValue;
UnicodePushBackIterator(C& c)
: cont(c)
, lastWasSlash(false)
, unicodeCount(0)
{}
UnicodePushBackIterator& operator++() {return *this;}
UnicodePushBackIterator& operator() {return this;}
void operator=(char x)
{
if (unicodeCount)
{
if (unicodeCount == 6)
{
if (x != '\\')
{
throw std::runtime_error("ThorsAnvil::Serialization::UnicodeIterator: Push->Surrogate pair(No Slash): \\uD8xx Must be followed by \\uDCxx");
}
--unicodeCount;
}
else if (unicodeCount == 5)
{
if (x != 'u')
{
throw std::runtime_error("ThorsAnvil::Serialization::UnicodeIterator: Push->Surrogate pair(No u): \\uD8xx Must be followed by \\uDCxx");
}
--unicodeCount;
}
else
{
unicodeValue > 6)));
cont.push_back(0x80 |((unicodeValue >> 0) & 0x3F));
}
else if (unicodeValue > 12)));
cont.push_back(0x80 |((unicodeValue >> 6) & 0x3F));
cont.push_back(0x80 |((unicodeValue >> 0) & 0x3F));
}
else
{
\\.Here is an iterator that can read such strings and convert the escaped characters to UTF-8
```
#ifndef THORSANVIL_SERIALIZATION_UNICODE_ITERATOR_H
#define THORSANVIL_SERIALIZATION_UNICODE_ITERATOR_H
#include
namespace ThorsAnvil
{
namespace Serialization
{
namespace
{
long convertHexToDec(char x)
{
if (x >= '0' && x = 'A' && x = 'a' && x
struct UnicodePushBackIterator: std::iterator
{
C& cont;
bool lastWasSlash;
int unicodeCount;
uint32_t unicodeValue;
UnicodePushBackIterator(C& c)
: cont(c)
, lastWasSlash(false)
, unicodeCount(0)
{}
UnicodePushBackIterator& operator++() {return *this;}
UnicodePushBackIterator& operator() {return this;}
void operator=(char x)
{
if (unicodeCount)
{
if (unicodeCount == 6)
{
if (x != '\\')
{
throw std::runtime_error("ThorsAnvil::Serialization::UnicodeIterator: Push->Surrogate pair(No Slash): \\uD8xx Must be followed by \\uDCxx");
}
--unicodeCount;
}
else if (unicodeCount == 5)
{
if (x != 'u')
{
throw std::runtime_error("ThorsAnvil::Serialization::UnicodeIterator: Push->Surrogate pair(No u): \\uD8xx Must be followed by \\uDCxx");
}
--unicodeCount;
}
else
{
unicodeValue > 6)));
cont.push_back(0x80 |((unicodeValue >> 0) & 0x3F));
}
else if (unicodeValue > 12)));
cont.push_back(0x80 |((unicodeValue >> 6) & 0x3F));
cont.push_back(0x80 |((unicodeValue >> 0) & 0x3F));
}
else
{
Solution
The newsletter suggested I try to answer this, so here is my attempt:
This looks like a lot of repeated code here. I don't think this can be shortened really, but I would create a function to do the conversion in:
More like this:
Then, you can change the switch statement like this:
Further down, you could reuse that again:
Otherwise, it looks good to me.
This looks like a lot of repeated code here. I don't think this can be shortened really, but I would create a function to do the conversion in:
switch(x)
{
case '"': cont.push_back('"'); break;
case '\\': cont.push_back('\\'); break;
case '/': cont.push_back('/'); break;
case 'b': cont.push_back('\b'); break;
case 'f': cont.push_back('\f'); break;
case 'n': cont.push_back('\n'); break;
case 'r': cont.push_back('\r'); break;
case 't': cont.push_back('\t'); break;
case 'u': unicodeCount = 4; unicodeValue = 0; break;
}
More like this:
char convertChar (char c) {
switch(x)
{
case '"':
case '\\':
case '/':
return x;
case 'b': return '\b';
case 'f': return '\f';
case 'n': return '\n';
case 'r': return '\r';
case 't': return '\t';
}
}
Then, you can change the switch statement like this:
if (x == 'u')
{
unicodeCount = 4;
unicodeValue = 0;
}
else
{
cont.push_back(convertChar(x));
}
Further down, you could reuse that again:
switch(next)
{
case '"':
case '\\':
case '/':
case 'b':
case 'f':
case 'n':
case 'r':
case 't':
cont[0] = convertChar(next); break;
case 'u': decodeUnicode(); break;
default: cont[0] = next; break;
}
Otherwise, it looks good to me.
Context
StackExchange Code Review Q#79281, answer score: 3
Revisions (0)
No revisions yet.