Skip to content

Commit

Permalink
Rename percent encoding functions to snake_case
Browse files Browse the repository at this point in the history
* AppendEscapedChar -> append_percent_encoded_byte

* AppendUTF8EscapedChar -> append_utf8_percent_encoded_char

* Remove AppendUTF8EscapedValue function and move its content to
  append_utf8_percent_encoded_char function

* AppendStringOfType -> append_utf8_percent_encoded
  • Loading branch information
rmisev committed Oct 24, 2023
1 parent 429cbed commit 28bc95e
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 45 deletions.
28 changes: 14 additions & 14 deletions include/upa/url.h
Original file line number Diff line number Diff line change
Expand Up @@ -1412,7 +1412,7 @@ inline bool url::username(StrT&& str) {

std::string& str_username = urls.start_part(url::USERNAME);
// UTF-8 percent encode it using the userinfo encode set
detail::AppendStringOfType(inp.begin(), inp.end(), userinfo_no_encode_set, str_username);
detail::append_utf8_percent_encoded(inp.begin(), inp.end(), userinfo_no_encode_set, str_username);
urls.save_part();
return true;
}
Expand All @@ -1428,7 +1428,7 @@ inline bool url::password(StrT&& str) {

std::string& str_password = urls.start_part(url::PASSWORD);
// UTF-8 percent encode it using the userinfo encode set
detail::AppendStringOfType(inp.begin(), inp.end(), userinfo_no_encode_set, str_password);
detail::append_utf8_percent_encoded(inp.begin(), inp.end(), userinfo_no_encode_set, str_password);
urls.save_part();
return true;
}
Expand Down Expand Up @@ -1812,12 +1812,12 @@ inline validation_errc url_parser::url_parse(url_serializer& urls, const CharT*
if (not_empty_password || std::distance(pointer, it_colon) > 0 /*not empty username*/) {
// username
std::string& str_username = urls.start_part(url::USERNAME);
detail::AppendStringOfType(pointer, it_colon, userinfo_no_encode_set, str_username); // UTF-8 percent encode, @ -> %40
detail::append_utf8_percent_encoded(pointer, it_colon, userinfo_no_encode_set, str_username); // UTF-8 percent encode, @ -> %40
urls.save_part();
// password
if (not_empty_password) {
std::string& str_password = urls.start_part(url::PASSWORD);
detail::AppendStringOfType(it_colon + 1, it_eta, userinfo_no_encode_set, str_password); // UTF-8 percent encode, @ -> %40
detail::append_utf8_percent_encoded(it_colon + 1, it_eta, userinfo_no_encode_set, str_password); // UTF-8 percent encode, @ -> %40
urls.save_part();
}
}
Expand Down Expand Up @@ -2173,19 +2173,19 @@ inline validation_errc url_parser::url_parse(url_serializer& urls, const CharT*
// the result to url’s query.
// TODO: now supports UTF-8 encoding only, maybe later add other encodings
std::string& str_query = urls.start_part(url::QUERY);
// detail::AppendStringOfType(pointer, end_of_query, query_cpset, str_query);
// detail::append_utf8_percent_encoded(pointer, end_of_query, query_cpset, str_query);
while (pointer != end_of_query) {
// UTF-8 percent encode c using the fragment percent-encode set
// and ignore '\0'
const auto uch = static_cast<UCharT>(*pointer);
if (uch >= 0x80) {
// invalid utf-8/16/32 sequences will be replaced with kUnicodeReplacementCharacter
detail::AppendUTF8EscapedChar(pointer, end_of_query, str_query);
detail::append_utf8_percent_encoded_char(pointer, end_of_query, str_query);
} else {
// Just append the 7-bit character, possibly escaping it.
const auto uc = static_cast<unsigned char>(uch);
if (!detail::is_char_in_set(uc, query_cpset))
detail::AppendEscapedChar(uch, str_query);
detail::append_percent_encoded_byte(uch, str_query);
else
str_query.push_back(uc);
++pointer;
Expand Down Expand Up @@ -2215,15 +2215,15 @@ inline validation_errc url_parser::url_parse(url_serializer& urls, const CharT*
const auto uch = static_cast<UCharT>(*pointer);
if (uch >= 0x80) {
// invalid utf-8/16/32 sequences will be replaced with kUnicodeReplacementCharacter
detail::AppendUTF8EscapedChar(pointer, last, str_frag);
detail::append_utf8_percent_encoded_char(pointer, last, str_frag);
} else {
// Just append the 7-bit character, possibly escaping it.
const auto uc = static_cast<unsigned char>(uch);
if (detail::is_char_in_set(uc, fragment_no_encode_set)) {
str_frag.push_back(uc);
} else {
// other characters are escaped
detail::AppendEscapedChar(uch, str_frag);
detail::append_percent_encoded_byte(uch, str_frag);
}
++pointer;
}
Expand Down Expand Up @@ -2329,12 +2329,12 @@ inline bool url_parser::do_path_segment(const CharT* pointer, const CharT* last,
const auto uch = static_cast<UCharT>(*pointer);
if (uch >= 0x80) {
// invalid utf-8/16/32 sequences will be replaced with 0xfffd
success &= detail::AppendUTF8EscapedChar(pointer, last, output);
success &= detail::append_utf8_percent_encoded_char(pointer, last, output);
} else {
// Just append the 7-bit character, possibly escaping it.
const auto uc = static_cast<unsigned char>(uch);
if (!detail::is_char_in_set(uc, path_no_encode_set))
detail::AppendEscapedChar(uc, output);
detail::append_percent_encoded_byte(uc, output);
else
output.push_back(uc);
++pointer;
Expand All @@ -2358,11 +2358,11 @@ inline bool url_parser::do_simple_path(const CharT* pointer, const CharT* last,
const auto uch = static_cast<UCharT>(*pointer);
if (uch >= 0x7f) {
// invalid utf-8/16/32 sequences will be replaced with 0xfffd
success &= detail::AppendUTF8EscapedChar(pointer, last, output);
success &= detail::append_utf8_percent_encoded_char(pointer, last, output);
} else {
// Just append the 7-bit character, escaping C0 control chars:
if (uch <= 0x1f)
detail::AppendEscapedChar(uch, output);
detail::append_percent_encoded_byte(uch, output);
else
output.push_back(static_cast<unsigned char>(uch));
++pointer;
Expand Down Expand Up @@ -3045,7 +3045,7 @@ inline url url_from_file_path(StrT&& str) {
}

// make URL
detail::AppendStringOfType(pointer, last, *no_encode_set, str_url);
detail::append_utf8_percent_encoded(pointer, last, *no_encode_set, str_url);
return url(str_url);
}

Expand Down
6 changes: 3 additions & 3 deletions include/upa/url_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ inline validation_errc host_parser::parse_opaque_host(const CharT* first, const
std::string& str_host = dest.hostStart();

//TODO: UTF-8 percent encode it using the C0 control percent-encode set
//detail::AppendStringOfType(first, last, detail::CHAR_C0_CTRL, str_host);
//detail::append_utf8_percent_encoded(first, last, detail::CHAR_C0_CTRL, str_host);
using UCharT = typename std::make_unsigned<CharT>::type;

const CharT* pointer = first;
Expand All @@ -295,11 +295,11 @@ inline validation_errc host_parser::parse_opaque_host(const CharT* first, const
const auto uch = static_cast<UCharT>(*pointer);
if (uch >= 0x7f) {
// invalid utf-8/16/32 sequences will be replaced with 0xfffd
detail::AppendUTF8EscapedChar(pointer, last, str_host);
detail::append_utf8_percent_encoded_char(pointer, last, str_host);
} else {
// Just append the 7-bit character, escaping C0 control chars:
if (uch <= 0x1f)
detail::AppendEscapedChar(uch, str_host);
detail::append_percent_encoded_byte(uch, str_host);
else
str_host.push_back(static_cast<unsigned char>(uch));
++pointer;
Expand Down
47 changes: 19 additions & 28 deletions include/upa/url_percent_encode.h
Original file line number Diff line number Diff line change
Expand Up @@ -445,61 +445,52 @@ inline bool decode_hex_to_byte(const CharT*& first, const CharT* last, unsigned
// ----------------------------------------------------------------------------
// Percent encode

// Write a single character, escaped, to the output. This always escapes: it
// does no checking that thee character requires escaping.
// Escaping makes sense only 8 bit chars, so code works in all cases of
// input parameters (8/16bit).
// Percent-encodes byte and appends to string
// See: https://url.spec.whatwg.org/#percent-encode

template<typename UINCHAR, typename OUTCHAR>
inline void AppendEscapedChar(UINCHAR ch, std::basic_string<OUTCHAR>& output) {
inline void append_percent_encoded_byte(UINCHAR ch, std::basic_string<OUTCHAR>& output) {
output.push_back('%');
output.push_back(kHexCharLookup[(ch >> 4) & 0xf]);
output.push_back(kHexCharLookup[ch & 0xf]);
}

// Writes the given character to the output as UTF-8, escaping ALL
// characters (even when they are ASCII). This does NO checking of the
// validity of the Unicode characters; the caller should ensure that the value
// it is appending is valid to append.
inline void AppendUTF8EscapedValue(unsigned char_value, std::string& output) {
url_utf::append_utf8<std::string, AppendEscapedChar>(char_value, output);
}

// Writes the given character to the output as UTF-8, escaped. Call this
// function only when the input is wide. Returns true on success. Failure
// means there was some problem with the encoding, we'll still try to
// update the |*begin| pointer and add a placeholder character to the
// output so processing can continue.
// Reads one character from string (first, last), converts to UTF-8, then
// percent-encodes, and appends to `output`. Replaces invalid UTF-8, UTF-16 or UTF-32
// sequences in input with Unicode replacement characters (U+FFFD) if present.

template <typename CharT>
inline bool AppendUTF8EscapedChar(const CharT*& first, const CharT* last, std::string& output) {
inline bool append_utf8_percent_encoded_char(const CharT*& first, const CharT* last, std::string& output) {
// url_util::read_utf_char(..) will handle invalid characters for us and give
// us the kUnicodeReplacementCharacter, so we don't have to do special
// checking after failure, just pass through the failure to the caller.
const auto cp_res = url_utf::read_utf_char(first, last);
AppendUTF8EscapedValue(cp_res.value, output);
// convert cp_res.value code point to UTF-8, then percent encode and append to `output`
url_utf::append_utf8<std::string, append_percent_encoded_byte>(cp_res.value, output);
return cp_res.result;
}

// Appends the given string to the output, escaping characters that do not
// match the given |charsType| in CharsType.
// Converts input string (first, last) to UTF-8, then percent encodes bytes not
// in `cpset`, and appends to `output`. Replaces invalid UTF-8, UTF-16 or UTF-32
// sequences in input with Unicode replacement characters (U+FFFD) if present.

template<typename CharT>
void AppendStringOfType(const CharT* first, const CharT* last, const code_point_set& cpset, std::string& output) {
void append_utf8_percent_encoded(const CharT* first, const CharT* last, const code_point_set& cpset, std::string& output) {
using UCharT = typename std::make_unsigned<CharT>::type;

for (auto it = first; it < last; ) {
const auto ch = static_cast<UCharT>(*it);
if (ch >= 0x80) {
// invalid utf-8/16/32 sequences will be replaced with kUnicodeReplacementCharacter
AppendUTF8EscapedChar(it, last, output);
append_utf8_percent_encoded_char(it, last, output);
} else {
// Just append the 7-bit character, possibly escaping it.
// Just append the 7-bit character, possibly percent encoding it.
const auto uch = static_cast<unsigned char>(ch);
if (is_char_in_set(uch, cpset)) {
output.push_back(uch);
} else {
// other characters are escaped
AppendEscapedChar(uch, output);
// other characters are percent encoded
append_percent_encoded_byte(uch, output);
}
++it;
}
Expand Down Expand Up @@ -579,7 +570,7 @@ inline std::string percent_encode(StrT&& str, const code_point_set& no_encode_se
const auto inp = make_str_arg(std::forward<StrT>(str));

std::string out;
detail::AppendStringOfType(inp.begin(), inp.end(), no_encode_set, out);
detail::append_utf8_percent_encoded(inp.begin(), inp.end(), no_encode_set, out);
return out;
}

Expand Down

0 comments on commit 28bc95e

Please sign in to comment.