Merge pull request #65 from lichray/scan

Simplify parsing numeric arguments with .scan
2025-08-23 22:04:40 +00:00 · 2019-11-26 08:12:24 -06:00 · 2019-11-26 08:12:24 -06:00 · 4277e68e57
commit 4277e68e57
parent c851668339 e8a44d289d
5 changed files with 572 additions and 4 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -4,6 +4,13 @@ matrix:
      dist: bionic
      language: cpp
      compiler: gcc
+      addons:
+        apt:
+          sources:
+            - ubuntu-toolchain-r-test
+          packages:
+            - g++-8
+      env: CXX=g++-8 CC=gcc-8
    - os: osx
      osx_image: xcode10.2
      language: cpp
--- a/README.md
+++ b/README.md
@ -587,10 +587,13 @@ $ ./main fex
 baz
 ```

-## Supported Compilers
-* GCC >= 7.0.0
-* Clang >= 4.0
-* MSVC >= 2017
+## Supported Toolchains
+
+| Compiler             | Standard Library | Test Environment   |
+| :------------------- | :--------------- | :----------------- |
+| GCC >= 8.3.0         | libstdc++        | Ubuntu 18.04       |
+| Clang >= 7.0.0       | libc++           | Xcode 10.2         |
+| MSVC >= 14.16        | Microsoft STL    | Visual Studio 2017 |

 ## Contributing
 Contributions are welcome, have a look at the [CONTRIBUTING.md](CONTRIBUTING.md) document for more information.
--- a/include/argparse.hpp
+++ b/include/argparse.hpp
@ -30,6 +30,8 @@ SOFTWARE.
 #pragma once
 #include <algorithm>
 #include <any>
+#include <cerrno>
+#include <charconv>
 #include <cstdlib>
 #include <functional>
 #include <iostream>
@ -76,6 +78,25 @@ struct is_string_like
    : std::conjunction<std::is_constructible<std::string, T>,
                       std::is_convertible<T, std::string_view>> {};

+template <typename T> constexpr bool standard_signed_integer = false;
+template <> constexpr bool standard_signed_integer<signed char> = true;
+template <> constexpr bool standard_signed_integer<short int> = true;
+template <> constexpr bool standard_signed_integer<int> = true;
+template <> constexpr bool standard_signed_integer<long int> = true;
+template <> constexpr bool standard_signed_integer<long long int> = true;
+
+template <typename T> constexpr bool standard_unsigned_integer = false;
+template <> constexpr bool standard_unsigned_integer<unsigned char> = true;
+template <> constexpr bool standard_unsigned_integer<unsigned short int> = true;
+template <> constexpr bool standard_unsigned_integer<unsigned int> = true;
+template <> constexpr bool standard_unsigned_integer<unsigned long int> = true;
+template <>
+constexpr bool standard_unsigned_integer<unsigned long long int> = true;
+
+template <typename T>
+constexpr bool standard_integer =
+    standard_signed_integer<T> || standard_unsigned_integer<T>;
+
 template <class F, class Tuple, class Extra, size_t... I>
 constexpr decltype(auto) apply_plus_one_impl(F &&f, Tuple &&t, Extra &&x,
                                             std::index_sequence<I...>) {
@ -91,6 +112,155 @@ constexpr decltype(auto) apply_plus_one(F &&f, Tuple &&t, Extra &&x) {
          std::tuple_size_v<std::remove_reference_t<Tuple>>>{});
 }

+constexpr auto pointer_range(std::string_view s) noexcept {
+  return std::tuple(s.data(), s.data() + s.size());
+}
+
+template <class CharT, class Traits>
+constexpr bool starts_with(std::basic_string_view<CharT, Traits> prefix,
+                           std::basic_string_view<CharT, Traits> s) noexcept {
+  return s.substr(0, prefix.size()) == prefix;
+}
+
+enum class chars_format {
+  scientific = 0x1,
+  fixed = 0x2,
+  hex = 0x4,
+  general = fixed | scientific
+};
+
+struct consume_hex_prefix_result {
+  bool is_hexadecimal;
+  std::string_view rest;
+};
+
+using namespace std::literals;
+
+constexpr auto consume_hex_prefix(std::string_view s)
+    -> consume_hex_prefix_result {
+  if (starts_with("0x"sv, s) || starts_with("0X"sv, s)) {
+    s.remove_prefix(2);
+    return {true, s};
+  } else {
+    return {false, s};
+  }
+}
+
+template <class T, auto Param>
+inline auto do_from_chars(std::string_view s) -> T {
+  T x;
+  auto [first, last] = pointer_range(s);
+  auto [ptr, ec] = std::from_chars(first, last, x, Param);
+  if (ec == std::errc()) {
+    if (ptr == last)
+      return x;
+    else
+      throw std::invalid_argument{"pattern does not match to the end"};
+  } else if (ec == std::errc::invalid_argument) {
+    throw std::invalid_argument{"pattern not found"};
+  } else if (ec == std::errc::result_out_of_range) {
+    throw std::range_error{"not representable"};
+  } else {
+    return x; // unreachable
+  }
+}
+
+template <class T, auto Param = 0> struct parse_number {
+  auto operator()(std::string_view s) -> T {
+    return do_from_chars<T, Param>(s);
+  }
+};
+
+template <class T> struct parse_number<T, 16> {
+  auto operator()(std::string_view s) -> T {
+    if (auto [ok, rest] = consume_hex_prefix(s); ok)
+      return do_from_chars<T, 16>(rest);
+    else
+      throw std::invalid_argument{"pattern not found"};
+  }
+};
+
+template <class T> struct parse_number<T> {
+  auto operator()(std::string_view s) -> T {
+    if (auto [ok, rest] = consume_hex_prefix(s); ok)
+      return do_from_chars<T, 16>(rest);
+    else if (starts_with("0"sv, s))
+      return do_from_chars<T, 8>(rest);
+    else
+      return do_from_chars<T, 10>(rest);
+  }
+};
+
+template <class T> constexpr auto generic_strtod = nullptr;
+template <> constexpr auto generic_strtod<float> = strtof;
+template <> constexpr auto generic_strtod<double> = strtod;
+template <> constexpr auto generic_strtod<long double> = strtold;
+
+template <class T> inline auto do_strtod(std::string const &s) -> T {
+  if (isspace(static_cast<unsigned char>(s[0])) || s[0] == '+')
+    throw std::invalid_argument{"pattern not found"};
+
+  auto [first, last] = pointer_range(s);
+  char *ptr;
+
+  errno = 0;
+  if (auto x = generic_strtod<T>(first, &ptr); errno == 0) {
+    if (ptr == last)
+      return x;
+    else
+      throw std::invalid_argument{"pattern does not match to the end"};
+  } else if (errno == ERANGE) {
+    throw std::range_error{"not representable"};
+  } else {
+    return x; // unreachable
+  }
+}
+
+template <class T> struct parse_number<T, chars_format::general> {
+  auto operator()(std::string const &s) -> T {
+    if (auto r = consume_hex_prefix(s); r.is_hexadecimal)
+      throw std::invalid_argument{
+          "chars_format::general does not parse hexfloat"};
+
+    return do_strtod<T>(s);
+  }
+};
+
+template <class T> struct parse_number<T, chars_format::hex> {
+  auto operator()(std::string const &s) -> T {
+    if (auto r = consume_hex_prefix(s); !r.is_hexadecimal)
+      throw std::invalid_argument{"chars_format::hex parses hexfloat"};
+
+    return do_strtod<T>(s);
+  }
+};
+
+template <class T> struct parse_number<T, chars_format::scientific> {
+  auto operator()(std::string const &s) -> T {
+    if (auto r = consume_hex_prefix(s); r.is_hexadecimal)
+      throw std::invalid_argument{
+          "chars_format::scientific does not parse hexfloat"};
+    if (s.find_first_of("eE") == s.npos)
+      throw std::invalid_argument{
+          "chars_format::scientific requires exponent part"};
+
+    return do_strtod<T>(s);
+  }
+};
+
+template <class T> struct parse_number<T, chars_format::fixed> {
+  auto operator()(std::string const &s) -> T {
+    if (auto r = consume_hex_prefix(s); r.is_hexadecimal)
+      throw std::invalid_argument{
+          "chars_format::fixed does not parse hexfloat"};
+    if (s.find_first_of("eE") != s.npos)
+      throw std::invalid_argument{
+          "chars_format::fixed does not parse exponent part"};
+
+    return do_strtod<T>(s);
+  }
+};
+
 } // namespace details

 class ArgumentParser;
@ -161,6 +331,45 @@ public:
    return *this;
  }

+  template <char Shape, typename T>
+  auto scan() -> std::enable_if_t<std::is_arithmetic_v<T>, Argument &> {
+    static_assert(!(std::is_const_v<T> || std::is_volatile_v<T>),
+                  "T should not be cv-qualified");
+    auto is_one_of = [](char c, auto... x) constexpr {
+      return ((c == x) || ...);
+    };
+
+    if constexpr (is_one_of(Shape, 'd') && details::standard_integer<T>)
+      action(details::parse_number<T, 10>());
+    else if constexpr (is_one_of(Shape, 'i') && details::standard_integer<T>)
+      action(details::parse_number<T>());
+    else if constexpr (is_one_of(Shape, 'u') &&
+                       details::standard_unsigned_integer<T>)
+      action(details::parse_number<T, 10>());
+    else if constexpr (is_one_of(Shape, 'o') &&
+                       details::standard_unsigned_integer<T>)
+      action(details::parse_number<T, 8>());
+    else if constexpr (is_one_of(Shape, 'x', 'X') &&
+                       details::standard_unsigned_integer<T>)
+      action(details::parse_number<T, 16>());
+    else if constexpr (is_one_of(Shape, 'a', 'A') &&
+                       std::is_floating_point_v<T>)
+      action(details::parse_number<T, details::chars_format::hex>());
+    else if constexpr (is_one_of(Shape, 'e', 'E') &&
+                       std::is_floating_point_v<T>)
+      action(details::parse_number<T, details::chars_format::scientific>());
+    else if constexpr (is_one_of(Shape, 'f', 'F') &&
+                       std::is_floating_point_v<T>)
+      action(details::parse_number<T, details::chars_format::fixed>());
+    else if constexpr (is_one_of(Shape, 'g', 'G') &&
+                       std::is_floating_point_v<T>)
+      action(details::parse_number<T, details::chars_format::general>());
+    else
+      static_assert(alignof(T) == 0, "No scan specification for T");
+
+    return *this;
+  }
+
  Argument &nargs(int aNumArgs) {
    if (aNumArgs < 0)
      throw std::logic_error("Number of arguments must be non-negative");
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@ -37,6 +37,7 @@ file(GLOB ARGPARSE_TEST_SOURCES
    test_parse_args.cpp
    test_positional_arguments.cpp
    test_required_arguments.cpp
+    test_scan.cpp
    test_value_semantics.cpp
 )
 set_source_files_properties(main.cpp
--- a/test/test_scan.cpp
+++ b/test/test_scan.cpp
@ -0,0 +1,348 @@
+#include <argparse.hpp>
+#include <doctest.hpp>
+#include <stdint.h>
+
+using doctest::test_suite;
+
+TEST_CASE_TEMPLATE("Parse a decimal integer argument" * test_suite("scan"), T,
+                   int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
+                   uint32_t, uint64_t) {
+  argparse::ArgumentParser program("test");
+  program.add_argument("-n").scan<'d', T>();
+
+  SUBCASE("zero") {
+    program.parse_args({"test", "-n", "0"});
+    REQUIRE(program.get<T>("-n") == 0);
+  }
+
+  SUBCASE("non-negative") {
+    program.parse_args({"test", "-n", "5"});
+    REQUIRE(program.get<T>("-n") == 5);
+  }
+
+  SUBCASE("negative") {
+    if constexpr (std::is_signed_v<T>) {
+      program.parse_args({"test", "-n", "-128"});
+      REQUIRE(program.get<T>("-n") == -128);
+    } else {
+      REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "-135"}),
+                        std::invalid_argument);
+    }
+  }
+
+  SUBCASE("left-padding is not allowed") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", " 32"}),
+                      std::invalid_argument);
+  }
+
+  SUBCASE("right-padding is not allowed") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "12 "}),
+                      std::invalid_argument);
+  }
+
+  SUBCASE("plus sign is not allowed") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "+12"}),
+                      std::invalid_argument);
+  }
+
+  SUBCASE("does not fit") {
+    REQUIRE_THROWS_AS(
+        program.parse_args({"test", "-n", "987654321987654321987654321"}),
+        std::range_error);
+  }
+}
+
+TEST_CASE_TEMPLATE("Parse an octal integer argument" * test_suite("scan"), T,
+                   uint8_t, uint16_t, uint32_t, uint64_t) {
+  argparse::ArgumentParser program("test");
+  program.add_argument("-n").scan<'o', T>();
+
+  SUBCASE("zero") {
+    program.parse_args({"test", "-n", "0"});
+    REQUIRE(program.get<T>("-n") == 0);
+  }
+
+  SUBCASE("with octal base") {
+    program.parse_args({"test", "-n", "066"});
+    REQUIRE(program.get<T>("-n") == 066);
+  }
+
+  SUBCASE("minus sign produces an optional argument") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "-003"}),
+                      std::runtime_error);
+  }
+
+  SUBCASE("plus sign is not allowed") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "+012"}),
+                      std::invalid_argument);
+  }
+
+  SUBCASE("does not fit") {
+    REQUIRE_THROWS_AS(
+        program.parse_args({"test", "-n", "02000000000000000000001"}),
+        std::range_error);
+  }
+}
+
+TEST_CASE_TEMPLATE("Parse a hexadecimal integer argument" * test_suite("scan"),
+                   T, uint8_t, uint16_t, uint32_t, uint64_t) {
+  argparse::ArgumentParser program("test");
+  program.add_argument("-n").scan<'X', T>();
+
+  SUBCASE("with hex digit") {
+    program.parse_args({"test", "-n", "0x1a"});
+    REQUIRE(program.get<T>("-n") == 0x1a);
+  }
+
+  SUBCASE("minus sign produces an optional argument") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "-0x1"}),
+                      std::runtime_error);
+  }
+
+  SUBCASE("plus sign is not allowed") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "+0x1a"}),
+                      std::invalid_argument);
+  }
+
+  SUBCASE("does not fit") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "0XFFFFFFFFFFFFFFFF1"}),
+                      std::range_error);
+  }
+}
+
+TEST_CASE_TEMPLATE("Parse integer argument of any format" * test_suite("scan"),
+                   T, int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
+                   uint32_t, uint64_t) {
+  argparse::ArgumentParser program("test");
+  program.add_argument("-n").scan<'i', T>();
+
+  SUBCASE("zero") {
+    program.parse_args({"test", "-n", "0"});
+    REQUIRE(program.get<T>("-n") == 0);
+  }
+
+  SUBCASE("octal") {
+    program.parse_args({"test", "-n", "077"});
+    REQUIRE(program.get<T>("-n") == 077);
+  }
+
+  SUBCASE("no negative octal") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "-0777"}),
+                      std::runtime_error);
+  }
+
+  SUBCASE("hex") {
+    program.parse_args({"test", "-n", "0X2c"});
+    REQUIRE(program.get<T>("-n") == 0X2c);
+  }
+
+  SUBCASE("no negative hex") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "-0X2A"}),
+                      std::runtime_error);
+  }
+
+  SUBCASE("decimal") {
+    program.parse_args({"test", "-n", "98"});
+    REQUIRE(program.get<T>("-n") == 98);
+  }
+
+  SUBCASE("negative decimal") {
+    if constexpr (std::is_signed_v<T>) {
+      program.parse_args({"test", "-n", "-39"});
+      REQUIRE(program.get<T>("-n") == -39);
+    } else {
+      REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "-39"}),
+                        std::invalid_argument);
+    }
+  }
+
+  SUBCASE("left-padding is not allowed") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "\t32"}),
+                      std::invalid_argument);
+  }
+
+  SUBCASE("right-padding is not allowed") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "32\n"}),
+                      std::invalid_argument);
+  }
+
+  SUBCASE("plus sign is not allowed") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "+670"}),
+                      std::invalid_argument);
+  }
+}
+
+#define FLOAT_G(t, literal)                                                    \
+  ([] {                                                                        \
+    if constexpr (std::is_same_v<t, float>)                                    \
+      return literal##f;                                                       \
+    else if constexpr (std::is_same_v<t, double>)                              \
+      return literal;                                                          \
+    else if constexpr (std::is_same_v<t, long double>)                         \
+      return literal##l;                                                       \
+  }())
+
+TEST_CASE_TEMPLATE("Parse floating-point argument of general format" *
+                       test_suite("scan"),
+                   T, float, double, long double) {
+  argparse::ArgumentParser program("test");
+  program.add_argument("-n").scan<'g', T>();
+
+  SUBCASE("zero") {
+    program.parse_args({"test", "-n", "0"});
+    REQUIRE(program.get<T>("-n") == 0.);
+  }
+
+  SUBCASE("non-negative") {
+    program.parse_args({"test", "-n", "3.14"});
+    REQUIRE(program.get<T>("-n") == FLOAT_G(T, 3.14));
+  }
+
+  SUBCASE("negative") {
+    program.parse_args({"test", "-n", "-0.12"});
+    REQUIRE(program.get<T>("-n") == FLOAT_G(T, -0.12));
+  }
+
+  SUBCASE("left-padding is not allowed") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "\t.32"}),
+                      std::invalid_argument);
+  }
+
+  SUBCASE("right-padding is not allowed") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", ".32\n"}),
+                      std::invalid_argument);
+  }
+
+  SUBCASE("plus sign is not allowed") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "+.12"}),
+                      std::invalid_argument);
+  }
+
+  SUBCASE("plus sign after padding is not allowed") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "   +.12"}),
+                      std::invalid_argument);
+  }
+
+  SUBCASE("hexfloat is not allowed") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "0x1a.3p+1"}),
+                      std::invalid_argument);
+  }
+
+  SUBCASE("does not fit") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "1.3e+5000"}),
+                      std::range_error);
+  }
+}
+
+TEST_CASE_TEMPLATE("Parse hexadecimal floating-point argument" *
+                       test_suite("scan"),
+                   T, float, double, long double) {
+  argparse::ArgumentParser program("test");
+  program.add_argument("-n").scan<'a', T>();
+
+  SUBCASE("zero") {
+    // binary-exponent-part is not optional in C++ grammar
+    program.parse_args({"test", "-n", "0x0"});
+    REQUIRE(program.get<T>("-n") == 0x0.p0);
+  }
+
+  SUBCASE("non-negative") {
+    program.parse_args({"test", "-n", "0x1a.3p+1"});
+    REQUIRE(program.get<T>("-n") == 0x1a.3p+1);
+  }
+
+  SUBCASE("minus sign produces an optional argument") {
+    // XXX may worth a fix
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "-0x0.12p1"}),
+                      std::runtime_error);
+  }
+
+  SUBCASE("plus sign is not allowed") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "+0x1p0"}),
+                      std::invalid_argument);
+  }
+
+  SUBCASE("general format is not allowed") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "3.14"}),
+                      std::invalid_argument);
+  }
+}
+
+TEST_CASE_TEMPLATE("Parse floating-point argument of scientific format" *
+                       test_suite("scan"),
+                   T, float, double, long double) {
+  argparse::ArgumentParser program("test");
+  program.add_argument("-n").scan<'e', T>();
+
+  SUBCASE("zero") {
+    program.parse_args({"test", "-n", "0e0"});
+    REQUIRE(program.get<T>("-n") == 0e0);
+  }
+
+  SUBCASE("non-negative") {
+    program.parse_args({"test", "-n", "3.14e-1"});
+    REQUIRE(program.get<T>("-n") == FLOAT_G(T, 3.14e-1));
+  }
+
+  SUBCASE("negative") {
+    program.parse_args({"test", "-n", "-0.12e+1"});
+    REQUIRE(program.get<T>("-n") == FLOAT_G(T, -0.12e+1));
+  }
+
+  SUBCASE("plus sign is not allowed") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "+.12e+1"}),
+                      std::invalid_argument);
+  }
+
+  SUBCASE("fixed format is not allowed") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "3.14"}),
+                      std::invalid_argument);
+  }
+
+  SUBCASE("hexfloat is not allowed") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "0x1.33p+0"}),
+                      std::invalid_argument);
+  }
+
+  SUBCASE("does not fit") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "1.3e+5000"}),
+                      std::range_error);
+  }
+}
+
+TEST_CASE_TEMPLATE("Parse floating-point argument of fixed format" *
+                       test_suite("scan"),
+                   T, float, double, long double) {
+  argparse::ArgumentParser program("test");
+  program.add_argument("-n").scan<'f', T>();
+
+  SUBCASE("zero") {
+    program.parse_args({"test", "-n", ".0"});
+    REQUIRE(program.get<T>("-n") == .0);
+  }
+
+  SUBCASE("non-negative") {
+    program.parse_args({"test", "-n", "3.14"});
+    REQUIRE(program.get<T>("-n") == FLOAT_G(T, 3.14));
+  }
+
+  SUBCASE("negative") {
+    program.parse_args({"test", "-n", "-0.12"});
+    REQUIRE(program.get<T>("-n") == FLOAT_G(T, -0.12));
+  }
+
+  SUBCASE("plus sign is not allowed") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "+.12"}),
+                      std::invalid_argument);
+  }
+
+  SUBCASE("scientific format is not allowed") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "3.14e+0"}),
+                      std::invalid_argument);
+  }
+
+  SUBCASE("hexfloat is not allowed") {
+    REQUIRE_THROWS_AS(program.parse_args({"test", "-n", "0x1.33p+0"}),
+                      std::invalid_argument);
+  }
+}