kaitai-io
diff --git a/‎kaitai/kaitaistream.cpp‎
Lines changed: 87 additions & 11 deletions b/‎kaitai/kaitaistream.cpp‎
Lines changed: 87 additions & 11 deletions
diff --git a/‎kaitai/kaitaistream.h‎
Lines changed: 41 additions & 65 deletions b/‎kaitai/kaitaistream.h‎
Lines changed: 41 additions & 65 deletions
diff --git a/‎tests/unittest.cpp‎
Lines changed: 71 additions & 24 deletions b/‎tests/unittest.cpp‎
Lines changed: 71 additions & 24 deletions
@@ -791,20 +791,96 @@ int kaitai::kstream::mod(int a, int b) {
  return r;
 }
 
-void kaitai::kstream::unsigned_to_decimal(uint64_t number, char *buffer) {
- // Implementation from https://ideone.com/nrQfA8 by Alf P. Steinbach
+void kaitai::kstream::unsigned_to_decimal(uint64_t number, char *buf, std::size_t &buf_contents_start) {
+ // Implementation inspired by https://ideone.com/nrQfA8 by Alf P. Steinbach
  // (see https://vitaut.net/posts/2013/integer-to-string-conversion-in-cplusplus/)
- if (number == 0) {
- *buffer++ = '0';
+ do {
+ buf[--buf_contents_start] = static_cast<char>('0' + (number % 10));
+ number /= 10;
+ } while (number != 0);
+}
+
+std::string kaitai::kstream::to_string_signed(int64_t val) {
+ // `digits10 + 2` because of minus sign + leading digit (NB: null terminator is not used)
+ char buf[std::numeric_limits<int64_t>::digits10 + 2];
+ std::size_t buf_contents_start = sizeof(buf);
+ if (val < 0) {
+ // NB: `val` is negative and we need to get its absolute value (i.e. minus `val`). However, since
+ // `int64_t` uses two's complement representation, its range is `[-2**63, 2**63 - 1] =
+ // [-0x8000_0000_0000_0000, 0x7fff_ffff_ffff_ffff]` (both ends inclusive) and thus the naive
+ // `-val` operation will overflow for `val = std::numeric_limits<int64_t>::min() =
+ // -0x8000_0000_0000_0000` (because the result of `-val` is mathematically
+ // `-(-0x8000_0000_0000_0000) = 0x8000_0000_0000_0000`, but the `int64_t` type can represent at
+ // most `0x7fff_ffff_ffff_ffff`). And signed integer overflow is undefined behavior in C++.
+ //
+ // To avoid undefined behavior for `val = -0x8000_0000_0000_0000 = -2**63`, we do the following
+ // steps for all negative `val`s:
+ //
+ // 1. Convert the signed (and negative) `val` to an unsigned `uint64_t` type. This is a
+ // well-defined operation in C++: the resulting `uint64_t` value will be `val mod 2**64` (`mod`
+ // is modulo). The maximum `val` we can have here is `-1` (because `val < 0`), a theoretical
+ // minimum we are able to support would be `-2**64 + 1 = -0xffff_ffff_ffff_ffff` (even though
+ // in practice the widest standard type is `int64_t` with the minimum of `-2**63`):
+ //
+ // * `static_cast<uint64_t>(-1) = -1 mod 2**64 = 2**64 + (-1) = 0xffff_ffff_ffff_ffff = 2**64 - 1`
+ // * `static_cast<uint64_t>(-2**64 + 1) = (-2**64 + 1) mod 2**64 = 2**64 + (-2**64 + 1) = 1`
+ //
+ // 2. Subtract `static_cast<uint64_t>(val)` from `2**64 - 1 = 0xffff_ffff_ffff_ffff`. Since
+ // `static_cast<uint64_t>(val)` is in range `[1, 2**64 - 1]` (see step 1), the result of this
+ // subtraction will be mathematically in range `[0, (2**64 - 1) - 1] = [0, 2**64 - 2]`. So the
+ // mathematical result cannot be negative, hence this unsigned integer subtraction can never
+ // wrap around (which wouldn't be a good thing to rely upon because it confuses programmers and
+ // code analysis tools).
+ //
+ // 3. Since we did mathematically `(2**64 - 1) - (2**64 + val) = -val - 1` so far (and we wanted
+ // to do `-val`), we add `1` to correct that. From step 2 we know that the result of `-val - 1`
+ // is in range `[0, 2**64 - 2]`, so adding `1` will not wrap (at most we could get `2**64 - 1 =
+ // 0xffff_ffff_ffff_ffff`, which is still in the valid range of `uint64_t`).
+ unsigned_to_decimal((std::numeric_limits<uint64_t>::max() - static_cast<uint64_t>(val)) + 1, buf, buf_contents_start);
+
+ buf[--buf_contents_start] = '-';
  } else {
- char *p_first = buffer;
- while (number != 0) {
- *buffer++ = static_cast<char>('0' + number % 10);
- number /= 10;
- }
- std::reverse(p_first, buffer);
+ unsigned_to_decimal(static_cast<uint64_t>(val), buf, buf_contents_start);
  }
- *buffer = '\0';
+ return std::string(&buf[buf_contents_start], sizeof(buf) - buf_contents_start);
+}
+
+std::string kaitai::kstream::to_string_unsigned(uint64_t val) {
+ // `digits10 + 1` because of leading digit (NB: null terminator is not used)
+ char buf[std::numeric_limits<uint64_t>::digits10 + 1];
+ std::size_t buf_contents_start = sizeof(buf);
+ unsigned_to_decimal(val, buf, buf_contents_start);
+ return std::string(&buf[buf_contents_start], sizeof(buf) - buf_contents_start);
+}
+
+// NB: the following 6 overloads are exactly the ones that
+// [`std::to_string`](https://en.cppreference.com/w/cpp/string/basic_string/to_string) has.
+// Testing has shown that they are all necessary: if you remove any of them, you will get
+// something like `error: call to 'to_string' is ambiguous` when trying to call `to_string`
+// with the integer type for which you removed the overload.
+
+std::string kaitai::kstream::to_string(int val) {
+ return to_string_signed(val);
+}
+
+std::string kaitai::kstream::to_string(long val) {
+ return to_string_signed(val);
+}
+
+std::string kaitai::kstream::to_string(long long val) {
+ return to_string_signed(val);
+}
+
+std::string kaitai::kstream::to_string(unsigned val) {
+ return to_string_unsigned(val);
+}
+
+std::string kaitai::kstream::to_string(unsigned long val) {
+ return to_string_unsigned(val);
+}
+
+std::string kaitai::kstream::to_string(unsigned long long val) {
+ return to_string_unsigned(val);
 }
 
 int64_t kaitai::kstream::string_to_int(const std::string& str, int base) {
 
@@ -12,14 +12,10 @@
 #include <stdint.h> // int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t
 
 #include <ios> // std::streamsize, forward declaration of std::istream // IWYU pragma: keep
-#include <limits> // std::numeric_limits
+#include <cstddef> // std::size_t
 #include <sstream> // std::istringstream // IWYU pragma: keep
 #include <string> // std::string
 
-#ifdef KAITAI_STREAM_H_CPP11_SUPPORT
-#include <type_traits> // std::enable_if, std::is_integral
-#endif
-
 namespace kaitai {
 
 /**
@@ -234,67 +230,45 @@ class kstream {
 
  /**
  * Converts given integer `val` to a decimal string representation.
- * Should be used in place of std::to_string() (which is available only
+ * Should be used in place of `std::to_string(int)` (which is available only
  * since C++11) in older C++ implementations.
  */
- template<typename I>
-#ifdef KAITAI_STREAM_H_CPP11_SUPPORT
- // https://stackoverflow.com/a/27913885
- typename std::enable_if<
- std::is_integral<I>::value &&
- // check if we don't have something too large like GCC's `__int128_t`
- std::numeric_limits<I>::max() >= 0 &&
- std::numeric_limits<I>::max() <= std::numeric_limits<uint64_t>::max(),
- std::string
- >::type
-#else
- std::string
-#endif
- static to_string(I val) {
- // in theory, `digits10 + 3` would be enough (minus sign + leading digit
- // + null terminator), but let's add a little more to be safe
- char buf[std::numeric_limits<I>::digits10 + 5];
- if (val < 0) {
- buf[0] = '-';
-
- // NB: `val` is negative and we need to get its absolute value (i.e. minus `val`). However, since
- // `int64_t` uses two's complement representation, its range is `[-2**63, 2**63 - 1] =
- // [-0x8000_0000_0000_0000, 0x7fff_ffff_ffff_ffff]` (both ends inclusive) and thus the naive
- // `-val` operation will overflow for `val = std::numeric_limits<int64_t>::min() =
- // -0x8000_0000_0000_0000` (because the result of `-val` is mathematically
- // `-(-0x8000_0000_0000_0000) = 0x8000_0000_0000_0000`, but the `int64_t` type can represent at
- // most `0x7fff_ffff_ffff_ffff`). And signed integer overflow is undefined behavior in C++.
- //
- // To avoid undefined behavior for `val = -0x8000_0000_0000_0000 = -2**63`, we do the following
- // steps for all negative `val`s:
- //
- // 1. Convert the signed (and negative) `val` to an unsigned `uint64_t` type. This is a
- // well-defined operation in C++: the resulting `uint64_t` value will be `val mod 2**64` (`mod`
- // is modulo). The maximum `val` we can have here is `-1` (because `val < 0`), a theoretical
- // minimum we are able to support would be `-2**64 + 1 = -0xffff_ffff_ffff_ffff` (even though
- // in practice the widest standard type is `int64_t` with the minimum of `-2**63`):
- //
- // * `static_cast<uint64_t>(-1) = -1 mod 2**64 = 2**64 + (-1) = 0xffff_ffff_ffff_ffff = 2**64 - 1`
- // * `static_cast<uint64_t>(-2**64 + 1) = (-2**64 + 1) mod 2**64 = 2**64 + (-2**64 + 1) = 1`
- //
- // 2. Subtract `static_cast<uint64_t>(val)` from `2**64 - 1 = 0xffff_ffff_ffff_ffff`. Since
- // `static_cast<uint64_t>(val)` is in range `[1, 2**64 - 1]` (see step 1), the result of this
- // subtraction will be mathematically in range `[0, (2**64 - 1) - 1] = [0, 2**64 - 2]`. So the
- // mathematical result cannot be negative, hence this unsigned integer subtraction can never
- // wrap around (which wouldn't be a good thing to rely upon because it confuses programmers and
- // code analysis tools).
- //
- // 3. Since we did mathematically `(2**64 - 1) - (2**64 + val) = -val - 1` so far (and we wanted
- // to do `-val`), we add `1` to correct that. From step 2 we know that the result of `-val - 1`
- // is in range `[0, 2**64 - 2]`, so adding `1` will not wrap (at most we could get `2**64 - 1 =
- // 0xffff_ffff_ffff_ffff`, which is still in the valid range of `uint64_t`).
-
- unsigned_to_decimal((std::numeric_limits<uint64_t>::max() - static_cast<uint64_t>(val)) + 1, &buf[1]);
- } else {
- unsigned_to_decimal(val, buf);
- }
- return std::string(buf);
- }
+ static std::string to_string(int val);
+
+ /**
+ * Converts given integer `val` to a decimal string representation.
+ * Should be used in place of `std::to_string(long)` (which is available only
+ * since C++11) in older C++ implementations.
+ */
+ static std::string to_string(long val);
+
+ /**
+ * Converts given integer `val` to a decimal string representation.
+ * Should be used in place of `std::to_string(long long)` (which is available only
+ * since C++11) in older C++ implementations.
+ */
+ static std::string to_string(long long val);
+
+ /**
+ * Converts given integer `val` to a decimal string representation.
+ * Should be used in place of `std::to_string(unsigned)` (which is available only
+ * since C++11) in older C++ implementations.
+ */
+ static std::string to_string(unsigned val);
+
+ /**
+ * Converts given integer `val` to a decimal string representation.
+ * Should be used in place of `std::to_string(unsigned long)` (which is available only
+ * since C++11) in older C++ implementations.
+ */
+ static std::string to_string(unsigned long val);
+
+ /**
+ * Converts given integer `val` to a decimal string representation.
+ * Should be used in place of `std::to_string(unsigned long long)` (which is available only
+ * since C++11) in older C++ implementations.
+ */
+ static std::string to_string(unsigned long long val);
 
  /**
  * Converts string `str` to an integer value. Throws an exception if the
@@ -347,7 +321,9 @@ class kstream {
  void init();
  void exceptions_enable() const;
 
- static void unsigned_to_decimal(uint64_t number, char *buffer);
+ static void unsigned_to_decimal(uint64_t number, char *buf, std::size_t &buf_contents_start);
+ static std::string to_string_signed(int64_t val);
+ static std::string to_string_unsigned(uint64_t val);
 
 #ifdef KS_STR_ENCODING_WIN32API
  enum {
 
@@ -66,52 +66,99 @@ TEST(KaitaiStreamTest, to_string)
  EXPECT_EQ(kaitai::kstream::to_string(-123), "-123");
 }
 
-TEST(KaitaiStreamTest, to_string_uint8)
+// Since `kstream::to_string` must have several overloads (just like
+// [`std::to_string`](https://en.cppreference.com/w/cpp/string/basic_string/to_string)) to
+// cover all [standard integer
+// types](https://en.cppreference.com/w/cpp/language/types#Properties) while avoiding
+// templates, it's a good idea to test whether it actually works with each standard
+// integer type. If even just one of the 6 required overloads is missing or not working,
+// these tests should be able to detect it.
+//
+// We test the standard integer types (keywords), not [fixed width integer
+// types](https://en.cppreference.com/w/cpp/header/cstdint) (like `int32_t`), because then
+// we could potentially have a blind spot: `int32_t` tends to be almost universally
+// equivalent to `int`, but `int64_t` is either `long` (typically on 64-bit Linux) or
+// `long long` (typically on 64-bit Windows) but not both. So I believe that using
+// standard integer types gives us better coverage.
+
+TEST(KaitaiStreamTest, to_string_unsigned_char)
 {
- EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<uint8_t>::min()), "0");
- EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<uint8_t>::max()), "255");
+ EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<unsigned char>::min()), "0");
+ EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<unsigned char>::max()), "255");
 }
 
-TEST(KaitaiStreamTest, to_string_int8)
+TEST(KaitaiStreamTest, to_string_signed_char)
 {
- EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<int8_t>::min()), "-128");
- EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<int8_t>::max()), "127");
+ EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<signed char>::min()), "-128");
+ EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<signed char>::max()), "127");
 }
 
-TEST(KaitaiStreamTest, to_string_uint16)
+TEST(KaitaiStreamTest, to_string_unsigned_short)
 {
- EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<uint16_t>::min()), "0");
- EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<uint16_t>::max()), "65535");
+ EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<unsigned short>::min()), "0");
+ EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<unsigned short>::max()), "65535");
 }
 
-TEST(KaitaiStreamTest, to_string_int16)
+TEST(KaitaiStreamTest, to_string_short)
 {
- EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<int16_t>::min()), "-32768");
- EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<int16_t>::max()), "32767");
+ EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<short>::min()), "-32768");
+ EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<short>::max()), "32767");
 }
 
-TEST(KaitaiStreamTest, to_string_uint32)
+TEST(KaitaiStreamTest, to_string_unsigned)
 {
- EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<uint32_t>::min()), "0");
- EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<uint32_t>::max()), "4294967295");
+ EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<unsigned>::min()), "0");
+ EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<unsigned>::max()), "4294967295");
 }
 
-TEST(KaitaiStreamTest, to_string_int32)
+TEST(KaitaiStreamTest, to_string_int)
 {
- EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<int32_t>::min()), "-2147483648");
- EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<int32_t>::max()), "2147483647");
+ EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<int>::min()), "-2147483648");
+ EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<int>::max()), "2147483647");
+}
+
+#ifdef _MSC_VER
+#pragma warning(push)
+// Disable `warning C4127: conditional expression is constant`
+// (see https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-4-c4127?view=msvc-170)
+#pragma warning(disable: 4127)
+#endif
+
+TEST(KaitaiStreamTest, to_string_unsigned_long)
+{
+ EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<unsigned long>::min()), "0");
+ if (sizeof(unsigned long) == 4) {
+ EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<unsigned long>::max()), "4294967295");
+ } else {
+ EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<unsigned long>::max()), "18446744073709551615");
+ }
 }
 
-TEST(KaitaiStreamTest, to_string_uint64)
+TEST(KaitaiStreamTest, to_string_long)
+{
+ if (sizeof(long) == 4) {
+ EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<long>::min()), "-2147483648");
+ EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<long>::max()), "2147483647");
+ } else {
+ EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<long>::min()), "-9223372036854775808");
+ EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<long>::max()), "9223372036854775807");
+ }
+}
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+TEST(KaitaiStreamTest, to_string_unsigned_long_long)
 {
- EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<uint64_t>::min()), "0");
- EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<uint64_t>::max()), "18446744073709551615");
+ EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<unsigned long long>::min()), "0");
+ EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<unsigned long long>::max()), "18446744073709551615");
 }
 
-TEST(KaitaiStreamTest, to_string_int64)
+TEST(KaitaiStreamTest, to_string_long_long)
 {
- EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<int64_t>::min()), "-9223372036854775808");
- EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<int64_t>::max()), "9223372036854775807");
+ EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<long long>::min()), "-9223372036854775808");
+ EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits<long long>::max()), "9223372036854775807");
 }
 
 TEST(KaitaiStreamTest, string_to_int)