Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions config.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,14 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS})
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_punycode.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_idna.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_text.cmake")
Comment thread
jviotti marked this conversation as resolved.
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_dns.cmake")
elseif(component STREQUAL "email")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_ip.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_punycode.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_idna.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_text.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_dns.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_email.cmake")
elseif(component STREQUAL "uri")
Expand Down Expand Up @@ -184,6 +186,7 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS})
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jose.cmake")
elseif(component STREQUAL "semver")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_preprocessor.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_text.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_semver.cmake")
elseif(component STREQUAL "gzip")
find_dependency(LibDeflate CONFIG)
Expand Down
3 changes: 2 additions & 1 deletion src/core/dns/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ endif()
target_link_libraries(sourcemeta_core_dns
PRIVATE sourcemeta::core::unicode
PRIVATE sourcemeta::core::punycode
PRIVATE sourcemeta::core::idna)
PRIVATE sourcemeta::core::idna
PRIVATE sourcemeta::core::text)
12 changes: 3 additions & 9 deletions src/core/dns/hostname.cc
Original file line number Diff line number Diff line change
@@ -1,19 +1,12 @@
#include <sourcemeta/core/dns.h>
#include <sourcemeta/core/idna.h>
#include <sourcemeta/core/text.h>

#include <string> // std::string
#include <string_view> // std::string_view

namespace sourcemeta::core {

// RFC 952 §B: let-dig = ALPHA / DIGIT
// RFC 1123 §2.1: first character of a label is letter or digit
static constexpr auto is_let_dig(const char character) -> bool {
return (character >= 'A' && character <= 'Z') ||
(character >= 'a' && character <= 'z') ||
(character >= '0' && character <= '9');
}

auto is_hostname(const std::string_view value) -> bool {
// RFC 952 §B: <hname> requires at least one <name>
if (value.empty()) {
Expand Down Expand Up @@ -46,7 +39,8 @@ auto is_hostname(const std::string_view value) -> bool {
continue;
}

if (is_let_dig(character)) {
// RFC 952 §B: let-dig = ALPHA / DIGIT
if (is_alphanum(character)) {
last_was_hyphen = false;
position += 1;
label_has_content = true;
Expand Down
2 changes: 2 additions & 0 deletions src/core/email/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ target_link_libraries(sourcemeta_core_email
PRIVATE sourcemeta::core::ip)
target_link_libraries(sourcemeta_core_email
PRIVATE sourcemeta::core::unicode)
target_link_libraries(sourcemeta_core_email
PRIVATE sourcemeta::core::text)
16 changes: 4 additions & 12 deletions src/core/email/helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define SOURCEMETA_CORE_EMAIL_HELPERS_H_

#include <sourcemeta/core/ip.h>
#include <sourcemeta/core/text.h>

#include <cstdint> // std::uint8_t, std::uint16_t
#include <string_view> // std::string_view
Expand Down Expand Up @@ -34,9 +35,7 @@ inline constexpr auto is_atext(const char character) -> bool {
case '~':
return true;
default:
return (character >= 'A' && character <= 'Z') ||
(character >= 'a' && character <= 'z') ||
(character >= '0' && character <= '9');
return sourcemeta::core::is_alphanum(character);
}
}

Expand All @@ -47,13 +46,6 @@ inline constexpr auto is_qtext_smtp(const unsigned char character) -> bool {
(character >= 93 && character <= 126);
}

// RFC 5321 §4.1.2: Let-dig = ALPHA / DIGIT
inline constexpr auto is_let_dig(const char character) -> bool {
return (character >= 'A' && character <= 'Z') ||
(character >= 'a' && character <= 'z') ||
(character >= '0' && character <= '9');
}

// RFC 5321 §4.1.3: dcontent = %d33-90 / %d94-126
inline constexpr auto is_dcontent(const unsigned char character) -> bool {
return (character >= 33 && character <= 90) ||
Expand All @@ -63,13 +55,13 @@ inline constexpr auto is_dcontent(const unsigned char character) -> bool {
// RFC 5321 §4.1.2: Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig
// RFC 5321 §4.1.3: Standardized-tag = Ldh-str
inline constexpr auto is_ldh_str(const std::string_view value) -> bool {
if (value.empty() || !is_let_dig(value.back())) {
if (value.empty() || !sourcemeta::core::is_alphanum(value.back())) {
return false;
}
for (std::string_view::size_type position{0}; position + 1 < value.size();
position += 1) {
const auto character{value[position]};
if (!is_let_dig(character) && character != '-') {
if (!sourcemeta::core::is_alphanum(character) && character != '-') {
return false;
}
}
Expand Down
2 changes: 2 additions & 0 deletions src/core/semver/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME semver
SOURCES semver.cc)
target_link_libraries(sourcemeta_core_semver
PUBLIC sourcemeta::core::preprocessor)
target_link_libraries(sourcemeta_core_semver
PRIVATE sourcemeta::core::text)

if(SOURCEMETA_CORE_INSTALL)
sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME semver)
Expand Down
24 changes: 9 additions & 15 deletions src/core/semver/semver.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <sourcemeta/core/semver.h>
#include <sourcemeta/core/text.h>

#include <array> // std::array
#include <charconv> // std::to_chars
Expand All @@ -8,17 +9,8 @@

namespace {

auto is_digit(const char character) -> bool {
return character >= '0' && character <= '9';
}

auto is_letter(const char character) -> bool {
return (character >= 'A' && character <= 'Z') ||
(character >= 'a' && character <= 'z');
}

auto is_identifier_character(const char character) -> bool {
return is_digit(character) || is_letter(character) || character == '-';
return sourcemeta::core::is_alphanum(character) || character == '-';
}

constexpr auto UINT64_MAX_VALUE = std::numeric_limits<std::uint64_t>::max();
Expand All @@ -30,17 +22,19 @@ enum class NumericParseResult : std::uint8_t { success, invalid, overflow };
auto parse_numeric_identifier(const std::string_view input,
std::size_t &position, std::uint64_t &result)
-> NumericParseResult {
if (position >= input.size() || !is_digit(input[position])) {
if (position >= input.size() ||
!sourcemeta::core::is_digit(input[position])) {
return NumericParseResult::invalid;
}

if (input[position] == '0' && position + 1 < input.size() &&
is_digit(input[position + 1])) {
sourcemeta::core::is_digit(input[position + 1])) {
return NumericParseResult::invalid;
}

std::uint64_t value = 0;
while (position < input.size() && is_digit(input[position])) {
while (position < input.size() &&
sourcemeta::core::is_digit(input[position])) {
const auto digit = static_cast<std::uint64_t>(input[position] - '0');
if (value > UINT64_MAX_DIV_10 ||
(value == UINT64_MAX_DIV_10 && digit > UINT64_MAX_MOD_10)) {
Expand All @@ -67,7 +61,7 @@ auto validate_pre_release_identifier(const std::string_view identifier)
return false;
}

if (!is_digit(character)) {
if (!sourcemeta::core::is_digit(character)) {
has_non_digit = true;
}
}
Expand Down Expand Up @@ -130,7 +124,7 @@ auto classify_identifier(const std::string_view identifier) noexcept
-> IdentifierInfo {
std::uint64_t value = 0;
for (const auto character : identifier) {
if (!is_digit(character)) {
if (!sourcemeta::core::is_digit(character)) {
return {.is_numeric = false, .overflowed = false, .numeric_value = 0};
}

Expand Down
6 changes: 3 additions & 3 deletions src/core/uri/grammar.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef SOURCEMETA_CORE_URI_GRAMMAR_H_
#define SOURCEMETA_CORE_URI_GRAMMAR_H_

#include <cctype> // std::isalnum, std::isalpha, std::isdigit
#include <sourcemeta/core/text.h>

namespace sourcemeta::core {

Expand Down Expand Up @@ -50,7 +50,7 @@ constexpr char URI_PERCENT = '%';
// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
// See https://www.rfc-editor.org/rfc/rfc3986#section-2.3
inline auto uri_is_unreserved(const char character) -> bool {
if (std::isalnum(static_cast<unsigned char>(character))) {
if (is_alphanum(character)) {
return true;
}

Expand Down Expand Up @@ -89,7 +89,7 @@ inline auto uri_is_sub_delim(const char character) -> bool {
// Scheme characters: ALPHA / DIGIT / "+" / "-" / "."
// See https://www.rfc-editor.org/rfc/rfc3986#section-3.1
inline auto uri_is_scheme_char(const char character) -> bool {
if (std::isalnum(static_cast<unsigned char>(character))) {
if (is_alphanum(character)) {
return true;
}

Expand Down
63 changes: 63 additions & 0 deletions src/lang/text/include/sourcemeta/core/text.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,69 @@ inline auto is_lowercase(const String &value) noexcept -> bool {
SOURCEMETA_CORE_TEXT_EXPORT
auto is_lowercase(const std::filesystem::path &value) noexcept -> bool;

/// @ingroup text
///
/// Return whether a character is an ASCII letter (A-Z or a-z). For example:
///
/// ```cpp
/// #include <sourcemeta/core/text.h>
/// #include <cassert>
///
/// assert(sourcemeta::core::is_alpha('a'));
/// assert(sourcemeta::core::is_alpha('Z'));
/// assert(!sourcemeta::core::is_alpha('5'));
/// ```
template <typename Character>
requires std::same_as<Character, char> ||
std::same_as<Character, signed char> ||
std::same_as<Character, unsigned char> ||
std::same_as<Character, wchar_t>
inline constexpr auto is_alpha(const Character character) noexcept -> bool {
return (character >= 'a' && character <= 'z') ||
(character >= 'A' && character <= 'Z');
}

/// @ingroup text
///
/// Return whether a character is an ASCII digit (0-9). For example:
///
/// ```cpp
/// #include <sourcemeta/core/text.h>
/// #include <cassert>
///
/// assert(sourcemeta::core::is_digit('5'));
/// assert(!sourcemeta::core::is_digit('a'));
/// ```
template <typename Character>
requires std::same_as<Character, char> ||
std::same_as<Character, signed char> ||
std::same_as<Character, unsigned char> ||
std::same_as<Character, wchar_t>
inline constexpr auto is_digit(const Character character) noexcept -> bool {
return character >= '0' && character <= '9';
}

/// @ingroup text
///
/// Return whether a character is an ASCII letter or digit. For example:
///
/// ```cpp
/// #include <sourcemeta/core/text.h>
/// #include <cassert>
///
/// assert(sourcemeta::core::is_alphanum('a'));
/// assert(sourcemeta::core::is_alphanum('5'));
/// assert(!sourcemeta::core::is_alphanum('-'));
/// ```
template <typename Character>
requires std::same_as<Character, char> ||
std::same_as<Character, signed char> ||
std::same_as<Character, unsigned char> ||
std::same_as<Character, wchar_t>
inline constexpr auto is_alphanum(const Character character) noexcept -> bool {
return is_alpha(character) || is_digit(character);
}

/// @ingroup text
///
/// Truncate a string in place to at most `maximum_length` bytes, appending
Expand Down
1 change: 1 addition & 0 deletions test/text/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
sourcemeta_googletest(NAMESPACE sourcemeta PROJECT core NAME text
SOURCES text_to_title_case_test.cc text_to_lowercase_test.cc
text_is_lowercase_test.cc
text_is_alpha_test.cc text_is_digit_test.cc text_is_alphanum_test.cc
text_truncate_test.cc text_remove_suffix_ignore_case_test.cc
text_equals_ignore_case_test.cc
text_trim_test.cc text_take_until_test.cc text_split_once_test.cc
Expand Down
34 changes: 34 additions & 0 deletions test/text/text_is_alpha_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#include <gtest/gtest.h>

#include <sourcemeta/core/text.h>

TEST(Text_is_alpha, lowercase_letters) {
EXPECT_TRUE(sourcemeta::core::is_alpha('a'));
EXPECT_TRUE(sourcemeta::core::is_alpha('m'));
EXPECT_TRUE(sourcemeta::core::is_alpha('z'));
}

TEST(Text_is_alpha, uppercase_letters) {
EXPECT_TRUE(sourcemeta::core::is_alpha('A'));
EXPECT_TRUE(sourcemeta::core::is_alpha('M'));
EXPECT_TRUE(sourcemeta::core::is_alpha('Z'));
}

TEST(Text_is_alpha, digits) {
EXPECT_FALSE(sourcemeta::core::is_alpha('0'));
EXPECT_FALSE(sourcemeta::core::is_alpha('9'));
}

TEST(Text_is_alpha, punctuation) {
EXPECT_FALSE(sourcemeta::core::is_alpha('-'));
EXPECT_FALSE(sourcemeta::core::is_alpha('_'));
EXPECT_FALSE(sourcemeta::core::is_alpha(' '));
}

TEST(Text_is_alpha, ascii_boundaries) {
// The ASCII characters immediately outside the two letter ranges
EXPECT_FALSE(sourcemeta::core::is_alpha('@'));
EXPECT_FALSE(sourcemeta::core::is_alpha('['));
EXPECT_FALSE(sourcemeta::core::is_alpha('`'));
EXPECT_FALSE(sourcemeta::core::is_alpha('{'));
}
20 changes: 20 additions & 0 deletions test/text/text_is_alphanum_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#include <gtest/gtest.h>

#include <sourcemeta/core/text.h>

TEST(Text_is_alphanum, letters) {
EXPECT_TRUE(sourcemeta::core::is_alphanum('a'));
EXPECT_TRUE(sourcemeta::core::is_alphanum('Z'));
}

TEST(Text_is_alphanum, digits) {
EXPECT_TRUE(sourcemeta::core::is_alphanum('0'));
EXPECT_TRUE(sourcemeta::core::is_alphanum('9'));
}

TEST(Text_is_alphanum, punctuation) {
EXPECT_FALSE(sourcemeta::core::is_alphanum('-'));
EXPECT_FALSE(sourcemeta::core::is_alphanum('_'));
EXPECT_FALSE(sourcemeta::core::is_alphanum('.'));
EXPECT_FALSE(sourcemeta::core::is_alphanum(' '));
}
25 changes: 25 additions & 0 deletions test/text/text_is_digit_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#include <gtest/gtest.h>

#include <sourcemeta/core/text.h>

TEST(Text_is_digit, digits) {
EXPECT_TRUE(sourcemeta::core::is_digit('0'));
EXPECT_TRUE(sourcemeta::core::is_digit('5'));
EXPECT_TRUE(sourcemeta::core::is_digit('9'));
}

TEST(Text_is_digit, letters) {
EXPECT_FALSE(sourcemeta::core::is_digit('a'));
EXPECT_FALSE(sourcemeta::core::is_digit('Z'));
}

TEST(Text_is_digit, punctuation) {
EXPECT_FALSE(sourcemeta::core::is_digit('-'));
EXPECT_FALSE(sourcemeta::core::is_digit(' '));
}

TEST(Text_is_digit, ascii_boundaries) {
// The ASCII characters immediately outside the digit range
EXPECT_FALSE(sourcemeta::core::is_digit('/'));
EXPECT_FALSE(sourcemeta::core::is_digit(':'));
}
Loading