Skip to content

Commit

Permalink
Fix: | is not allowed in drive letter of Windows OS path
Browse files Browse the repository at this point in the history
Historically, the `|` (vertical line) character can be in the Windows
drive letter of the file URL path. It is not used in the drive letter of
the Windows OS path.

See: https://datatracker.ietf.org/doc/html/rfc8089#appendix-E.2.2
  • Loading branch information
rmisev committed Aug 23, 2024
1 parent 05a58fd commit 02e830b
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 9 deletions.
20 changes: 12 additions & 8 deletions include/upa/url.h
Original file line number Diff line number Diff line change
Expand Up @@ -1008,12 +1008,12 @@ constexpr bool is_special_authority_end_char(CharT c) noexcept {
// Windows drive letter

// https://url.spec.whatwg.org/#windows-drive-letter

template <typename CharT>
constexpr bool is_windows_drive(CharT c1, CharT c2) noexcept {
return is_ascii_alpha(c1) && (c2 == ':' || c2 == '|');
}

// https://url.spec.whatwg.org/#normalized-windows-drive-letter
template <typename CharT>
constexpr bool is_normalized_windows_drive(CharT c1, CharT c2) noexcept {
return is_ascii_alpha(c1) && c2 == ':';
Expand All @@ -1035,23 +1035,27 @@ inline bool starts_with_windows_drive(const CharT* pointer, const CharT* last) n
#endif
}

// Windows drive letter in OS path
//
// NOTE: Windows OS supports only normalized Windows drive letters.

// Check url's pathname has Windows drive, i.e. starts with "/C:/" or is "/C:"
// see also: detail::starts_with_windows_drive
inline bool pathname_has_windows_drive(string_view pathname) noexcept {
inline bool pathname_has_windows_os_drive(string_view pathname) noexcept {
return
(pathname.length() == 3 || (pathname.length() > 3 && is_windows_slash(pathname[3]))) &&
is_windows_slash(pathname[0]) &&
is_windows_drive(pathname[1], pathname[2]);
is_normalized_windows_drive(pathname[1], pathname[2]);
}

/// Check string is absolute Windows drive path (for example: "C:\\path" or "C:/path")
/// @return pointer to the path after first (back)slash, or `nullptr` if path is not
/// absolute Windows drive path
template <typename CharT>
constexpr const CharT* is_windows_drive_absolute_path(const CharT* pointer, const CharT* last) noexcept {
constexpr const CharT* is_windows_os_drive_absolute_path(const CharT* pointer, const CharT* last) noexcept {
return (last - pointer > 2 &&
detail::is_windows_drive(pointer[0], pointer[1]) &&
detail::is_windows_slash(pointer[2]))
is_normalized_windows_drive(pointer[0], pointer[1]) &&
is_windows_slash(pointer[2]))
? pointer + 3 : nullptr;
}

Expand Down Expand Up @@ -3215,7 +3219,7 @@ inline url url_from_file_path(StrT&& str, file_path_format format = file_path_fo
}
start_of_check = is_unc
? detail::is_unc_path(pointer, last)
: detail::is_windows_drive_absolute_path(pointer, last);
: detail::is_windows_os_drive_absolute_path(pointer, last);
if (start_of_check == nullptr ||
detail::has_dot_dot_segment(start_of_check, last, detail::is_windows_slash<CharT>))
throw url_error(validation_errc::file_unsupported_path, "Unsupported file path");
Expand Down Expand Up @@ -3284,7 +3288,7 @@ inline std::string path_from_file_url(const url& file_url, file_path_format form
if (!detail::is_unc_path(path.data() + 2, path.data() + path.length()))
throw url_error(validation_errc::file_url_invalid_unc, "Invalid UNC path");
} else {
if (detail::pathname_has_windows_drive(path)) {
if (detail::pathname_has_windows_os_drive(path)) {
path.erase(0, 1); // remove leading '\\'
if (path.length() == 2)
path.push_back('\\'); // "C:" -> "C:\"
Expand Down
4 changes: 3 additions & 1 deletion test/test-url.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -649,7 +649,6 @@ TEST_CASE("url_from_file_path") {
// https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file
CHECK(upa::url_from_file_path("C:\\", upa::file_path_format::windows).href() == "file:///C:/");
CHECK(upa::url_from_file_path("C:\\path", upa::file_path_format::windows).href() == "file:///C:/path");
CHECK(upa::url_from_file_path("C|\\path", upa::file_path_format::windows).href() == "file:///C:/path");
CHECK(upa::url_from_file_path("C:/path", upa::file_path_format::windows).href() == "file:///C:/path");
CHECK(upa::url_from_file_path("C:\\path %#", upa::file_path_format::windows).href() == "file:///C:/path%20%25%23");
// UNC: one-character hostname
Expand Down Expand Up @@ -683,6 +682,8 @@ TEST_CASE("url_from_file_path") {
CHECK_THROWS_AS(upa::url_from_file_path("C:path", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("path", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("/", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("C|\\path", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("C|/path", upa::file_path_format::windows), upa::url_error);
// invalid UNC
CHECK_THROWS_AS(upa::url_from_file_path("\\\\", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\h", upa::file_path_format::windows), upa::url_error);
Expand Down Expand Up @@ -754,6 +755,7 @@ TEST_CASE("path_from_file_url") {
CHECK(path_from_file_url("file:///C%3A%5Cpath", upa::file_path_format::windows) == "C:\\path");
// Not a Windows path
CHECK_THROWS_AS(path_from_file_url("file:///", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(path_from_file_url("file:///C%7C", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(path_from_file_url("file:///p", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(path_from_file_url("file:///h/p", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(path_from_file_url("file://////h/p", upa::file_path_format::windows), upa::url_error);
Expand Down

0 comments on commit 02e830b

Please sign in to comment.