From e84280e39361c4c699a63fcff104acd7227f3d7d Mon Sep 17 00:00:00 2001 From: keqingmoe Date: Thu, 13 Mar 2025 18:04:49 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9E=20module/string.cppm?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- module/string.cppm | 704 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 704 insertions(+) create mode 100644 module/string.cppm diff --git a/module/string.cppm b/module/string.cppm new file mode 100644 index 0000000..4922075 --- /dev/null +++ b/module/string.cppm @@ -0,0 +1,704 @@ +export module kqm.str.string; + +import std; +import kqm.str.utf; +import kqm.str.view; + +export struct const_propagator +{ + template + constexpr auto operator()(this Self&, T* ptr) noexcept + { + if constexpr (std::is_const_v) { + return static_cast(ptr); + } else return ptr; + } +}; + +struct dyn_t +{ + std::size_t siz; + std::size_t cap; +}; + +struct sso_t +{ + std::uint8_t siz; + std::byte dat[sizeof(dyn_t) - 1uz]; +}; + +constexpr auto sso_size = sizeof(sso_t::dat); + +using byte_span = std::span; +using byte_view = std::span; + +template +concept Utf32Range = + std::ranges::input_range && std::same_as>, char32_t>; + +template +concept Utf8ByteRange = + std::ranges::input_range && kqm::utf::Utf8CharOrByte>>; + +export namespace kqm +{ + +template > +struct basic_string +{ +public: + using value_type = char32_t; + using allocator_type = Allocator; + using size_type = std::allocator_traits::size_type; + using difference_type = std::allocator_traits::difference_type; + using const_iterator = utf::utf8_to_utf32_iterator; + using iterator = const_iterator; + using const_reverse_iterator = std::reverse_iterator; + using reverse_iterator = const_reverse_iterator; + +private: + std::byte* mem_; + union + { + dyn_t dyn_; + sso_t sso_; + }; + [[no_unique_address]] std::allocator alloc_; + [[no_unique_address]] const_propagator cp_; + + + constexpr auto replace_impl(byte_span dest, byte_view data) + { + auto [new_siz, allocs] = [&] { + auto old_siz = size_bytes(); + auto rem_siz = dest.size(); + auto rep_siz = data.size(); + auto tmp_siz = old_siz - rem_siz; + if (max_size_bytes() - tmp_siz < rep_siz) { + throw std::length_error{"size() + delta-size > max_size()"}; + } + auto new_siz = tmp_siz + rep_siz; + auto new_cap = std::max(old_siz * 2, new_siz); + return std::tuple{new_siz, new_cap}; + }(); + auto dst = dest.data(); + + auto first = this->data(); + auto mid = dest.data(); + auto last = dest.end().base(); + auto tail = first + size_bytes(); + + auto fsize = mid - first; + auto msize = dest.size(); + auto lsize = tail - last; + + if (auto cap = capacity(); new_siz > cap) { + auto [new_mem, new_cap] = alloc_.allocate_at_least(allocs); + std::memcpy(new_mem, first, fsize); + std::memcpy(new_mem + fsize, data.data(), data.size()); + std::memcpy(new_mem + fsize + data.size(), last, lsize); + if (mem_) { + alloc_.deallocate(mem_, dyn_.cap); + } + mem_ = new_mem; + dyn_ = dyn_t{new_siz, new_cap}; + } else { + std::memmove(mid + data.size(), last, lsize); + if (!data.empty()) { + std::memcpy(dst, data.data(), data.size()); + } + if (mem_) { + dyn_.siz = new_siz; + } else { + sso_.siz = new_siz; + } + } + } + + constexpr auto replace_impl(byte_view dest, byte_view data) + { + replace_impl(byte_span{const_cast(dest.data()), dest.size()}, data); + } + + constexpr auto move_impl(basic_string& other) noexcept + { + if (other.mem_) { + mem_ = other.mem_; + dyn_ = other.dyn_; + + other.mem_ = nullptr; + other.sso_ = {}; + } else { + sso_.siz = other.sso_.siz; + memcpy(sso_.dat, other.sso_.dat, sso_.siz); + } + } + +public: + constexpr basic_string() noexcept(std::is_nothrow_default_constructible_v) : mem_{}, sso_{}, alloc_{} {} + + constexpr basic_string(allocator_type alloc) noexcept : mem_{}, sso_{}, alloc_{std::move(alloc)} {} + + template S> + requires std::convertible_to, char32_t> + constexpr basic_string(Iter first, S last, allocator_type alloc = allocator_type{}) : basic_string{std::move(alloc)} + { + append(std::ranges::subrange{first, last}); + } + + constexpr basic_string(string_view sv, allocator_type alloc = allocator_type{}) : basic_string{std::move(alloc)} + { + append(sv); + } + + template + constexpr basic_string(const char (&str)[N], allocator_type alloc = allocator_type{}) + : basic_string{std::move(alloc)} + { + append(string_view{ + std::string_view{str, N} + }); + } + + constexpr basic_string(std::nullptr_t) = delete; + + constexpr basic_string(const basic_string& other) : basic_string{other.alloc_} + { + assign(other.all()); + } + + constexpr basic_string(basic_string&& other) noexcept : alloc_{std::move(other.alloc_)} + { + move_impl(other); + } + + constexpr ~basic_string() + { + if (mem_) { + alloc_.deallocate(mem_, dyn_.cap); + } + } + + constexpr auto operator=(const basic_string& other) & -> basic_string& + { + if (this != &other) { + assign(other); + } + return *this; + } + + constexpr auto operator=(basic_string&& other) & noexcept( + std::allocator_traits::propagate_on_container_move_assignment::value + || std::allocator_traits::is_always_equal::value) -> basic_string& + { + std::swap(alloc_, other.alloc_); + if (mem_) { + other.alloc_.deallocate(mem_, dyn_.siz); + } + move_impl(other); + return *this; + } + + constexpr auto operator=(string_view other) & -> basic_string& + { + if (this != &other) { + assign(other); + } + return *this; + } + + auto get_allocator() const -> allocator_type + { + return alloc_; + } + + constexpr auto front() const noexcept -> value_type + { + return *begin(); + } + + constexpr auto back() const noexcept -> value_type + { + return *(std::prev(end())); + } + + + constexpr auto begin(this auto& self) noexcept + { + return utf::utf8_to_utf32_iterator{self.cp_(self.data())}; + } + + constexpr auto begin() const&& noexcept -> std::ranges::dangling + { + return {}; + } + + constexpr auto end(this auto& self) noexcept + { + return utf::utf8_to_utf32_iterator{self.cp_(self.data() + self.size_bytes())}; + } + + constexpr auto end() const&& noexcept -> std::ranges::dangling + { + return {}; + } + + constexpr auto cbegin() const& noexcept -> iterator + { + return begin(); + } + + constexpr auto cbegin() const&& noexcept -> std::ranges::dangling + { + return {}; + } + + constexpr auto cend() const& noexcept -> iterator + { + return end(); + } + + constexpr auto cend() const&& noexcept -> std::ranges::dangling + { + return {}; + } + + constexpr auto rbegin(this auto& self) noexcept + { + return std::reverse_iterator{self.begin()}; + } + + constexpr auto rbegin() const&& noexcept -> std::ranges::dangling + { + return {}; + } + + constexpr auto rend(this auto& self) noexcept + { + return std::reverse_iterator{self.end()}; + } + + constexpr auto rend() const&& noexcept -> std::ranges::dangling + { + return {}; + } + + constexpr auto crbegin() const& noexcept -> const_reverse_iterator + { + return rbegin(); + } + + constexpr auto crbegin() const&& noexcept -> std::ranges::dangling + { + return {}; + } + + constexpr auto crend() const& noexcept -> const_reverse_iterator + { + return rend(); + } + + constexpr auto crend() const&& noexcept -> std::ranges::dangling + { + return {}; + } + + constexpr auto clear() & noexcept -> void + { + replace_impl(as_const_bytes(), {}); + } + + constexpr auto insert(iterator pos, value_type ch) & -> basic_string& + { + auto buf = std::array{}; + auto last = utf::utf32_to_utf8(ch, buf.data()); + replace_impl({pos.base(), 0uz}, {buf.data(), last}); + } + + constexpr auto erase(iterator first, iterator last) & noexcept -> iterator + { + replace_impl({first.base(), last.base()}, {}); + return iterator{const_cast(first.base())}; + } + + constexpr auto erase(iterator pos) & noexcept -> iterator + { + return erase(pos, end()); + } + + constexpr auto push_back(value_type ch) & -> void + { + insert(end(), ch); + } + + constexpr auto pop_back() & noexcept -> void + { + auto last = end(); + auto first = std::prev(last); + replace_impl({first.base(), last.base()}, {}); + } + + constexpr auto append(Utf32Range auto&& rg) & -> auto&& + { + for (auto ch : rg) { + push_back(ch); + } + return *this; + } + + constexpr auto append(value_type ch) & -> auto&& + { + append(std::array{ch}); + return *this; + } + + constexpr auto append(Utf8ByteRange auto&& rg) & -> auto&& + { + if (!std::ranges::empty(rg)) { + auto pos = byte_view{end().base(), 0uz}; + if constexpr (std::ranges::contiguous_range) { + replace_impl(pos, std::as_bytes(std::span{std::ranges::data(rg), std::ranges::size(rg)})); + } else { + for (auto byte : rg) { + replace_impl(pos, {&byte, 1uz}); + } + } + } + return *this; + } + + constexpr auto append(string_view str) & -> auto&& + { + append(str.as_bytes()); + return *this; + } + + constexpr auto operator+=(auto&& rhs) & -> auto&& + { + append(std::forward(rhs)); + return *this; + } + + constexpr auto replace(iterator first, iterator last, string_view str) & -> auto&& + { + replace_impl({first.base(), last.base()}, str.as_bytes()); + return *this; + } + + constexpr auto replace(iterator first, iterator last, Utf32Range auto&& rg) & -> auto&& + { + auto buf = basic_string{alloc_}; + buf.append(rg); + replace(first, last, buf); + return *this; + } + + constexpr auto replace(iterator first, iterator last, Utf8ByteRange auto&& rg) & -> auto&& + { + if (!std::ranges::empty(rg)) { + if constexpr (std::ranges::contiguous_range) { + replace_impl({first.base(), last.base()}, + std::as_bytes(std::span{std::ranges::data(rg), std::ranges::size(rg)})); + } else { + auto buf = basic_string{alloc_}; + buf.append(rg); + replace(first, last, buf); + } + } + return *this; + } + + constexpr auto replace(iterator first, iterator last, value_type ch) & -> auto&& + { + auto buf = std::array{}; + auto buf_last = utf::utf32_to_utf8(ch, buf.data()); + replace(first, last, byte_view{buf.data(), buf_last}); + return *this; + } + + constexpr auto assign(string_view other) & -> auto&& + { + clear(); + append(other.as_bytes()); + return *this; + } + + constexpr auto assign(Utf32Range auto&& rg) & -> auto&& + { + clear(); + append(std::forward(rg)); + return *this; + } + + constexpr auto assign(Utf8ByteRange auto&& rg) & -> auto&& + { + clear(); + append(std::forward(rg)); + return *this; + } + + constexpr auto assign(value_type ch) & -> auto&& + { + clear(); + append(ch); + return *this; + } + + + constexpr auto empty() const noexcept -> bool + { + return !mem_ && !sso_.siz; + } + + constexpr auto size_bytes() const noexcept -> std::size_t + { + if (mem_) { + return dyn_.siz; + } else { + return sso_.siz; + } + } + + static constexpr auto max_size_bytes() noexcept -> std::size_t + { + return std::numeric_limits::max(); + } + + constexpr auto capacity() const noexcept -> std::size_t + { + if (mem_) { + return dyn_.cap; + } else { + return sso_size; + } + } + + constexpr auto reserve(std::size_t new_cap) & -> void + { + if (new_cap > max_size_bytes()) { + throw std::length_error{"new_cap > max_size_bytes()"}; + } + auto [cap, mem, siz] = [this] { + if (mem_) { + return std::tuple{dyn_.cap, mem_, dyn_.siz}; + } else { + return std::tuple{sso_size, sso_.dat, sso_.siz}; + } + }(); + if (new_cap < cap) return; + auto [new_mem, alloc_siz] = alloc_.allocate_at_least(new_cap); + std::memcpy(new_mem, mem, siz); + if (mem_) { + alloc_.deallocate(mem_, dyn_.cap); + dyn_.cap = alloc_siz; + } else { + sso_.siz = alloc_siz; + } + mem_ = new_mem; + } + + constexpr auto shrink_to_fit() & -> void {} + + constexpr auto + swap(basic_string& other) & noexcept(std::allocator_traits::propagate_on_container_swap::value + || std::allocator_traits::is_always_equal::value) -> void + { + if (this == &other) { + auto tmp = std::move(other); + other = std::move(*this); + *this = std::move(tmp); + } + } + + constexpr auto data(this auto& self) noexcept + { + if (self.mem_) { + return self.cp_(self.mem_); + } else { + return self.sso_.dat; + } + } + + constexpr auto data() const&& noexcept -> std::ranges::dangling + { + return {}; + } + + constexpr auto as_bytes(this auto& self) noexcept + { + return std::span{self.data(), self.bytes()}; + } + + constexpr auto as_bytes() const&& noexcept -> std::ranges::dangling + { + return {}; + } + + constexpr auto as_const_bytes() const& noexcept -> std::span + { + return std::span{data(), size_bytes()}; + } + + constexpr auto as_const_bytes() const&& noexcept -> std::ranges::dangling + { + return {}; + } + + constexpr auto all() const& noexcept -> string_view + { + return {begin(), end()}; + } + + constexpr auto all() const&& noexcept -> std::ranges::dangling + { + return {}; + } + + constexpr operator string_view() const& noexcept + { + return all(); + } + + constexpr operator string_view() const&& noexcept = delete; + + constexpr auto start_with(value_type ch) const noexcept -> bool + { + return all().starts_with(ch); + } + + constexpr auto start_with(string_view sv) const noexcept -> bool + { + return all().starts_with(sv); + } + + constexpr auto ends_with(value_type ch) const noexcept -> bool + { + return all().ends_with(ch); + } + + constexpr auto ends_with(string_view sv) const noexcept -> bool + { + return all().ends_with(sv); + } + + constexpr auto contains(value_type ch) const noexcept -> bool + { + return all().contains(ch); + } + + constexpr auto contains(string_view sv) const noexcept -> bool + { + return all().contains(sv); + } + + constexpr auto find(value_type ch) const& noexcept -> iterator + { + return all().find(ch); + } + + constexpr auto find(value_type ch) const&& noexcept -> std::ranges::dangling + { + return {}; + } + + constexpr auto find(string_view sv) const& noexcept -> string_view + { + return all().find(sv); + } + + constexpr auto find(string_view sv) const&& noexcept -> std::ranges::dangling + { + return {}; + } + + constexpr auto rfind(value_type ch) const& noexcept -> iterator + { + return all().rfind(ch); + } + + constexpr auto rfind(value_type ch) const&& noexcept -> std::ranges::dangling + { + return {}; + } + + constexpr auto rfind(string_view sv) const& noexcept -> string_view + { + return all().rfind(sv); + } + + constexpr auto rfind(string_view sv) const&& noexcept -> std::ranges::dangling + { + return {}; + } + + + constexpr auto to_std_string() const + { + using ret_t = std::basic_string, allocator_type>; + return ret_t{all(), alloc_}; + } + + friend constexpr auto operator==(const basic_string& lhs, const basic_string& rhs) noexcept -> bool + { + return lhs.all() == rhs.all(); + } + + friend constexpr auto operator<=>(const basic_string& lhs, const basic_string& rhs) noexcept -> std::strong_ordering + { + return lhs.all() <=> rhs.all(); + } + + friend constexpr auto operator+(const basic_string& lhs, const basic_string& rhs) noexcept -> basic_string + { + auto ret = lhs; + ret.append(rhs); + return ret; + } + + friend constexpr auto operator+(const basic_string& lhs, string_view rhs) noexcept -> basic_string + { + auto ret = lhs; + ret.append(rhs); + return ret; + } + + friend constexpr auto operator+(const basic_string& lhs, value_type rhs) noexcept -> basic_string + { + auto ret = lhs; + ret.append(rhs); + return ret; + } + + friend constexpr auto operator+(string_view lhs, const basic_string& rhs) noexcept -> basic_string + { + auto ret = basic_string{lhs}; + ret.append(rhs); + return ret; + } + + friend constexpr auto operator+(value_type lhs, const basic_string& rhs) noexcept -> basic_string + { + auto ret = basic_string{lhs}; + ret.append(rhs); + return ret; + } +}; + +template +constexpr auto swap(basic_string& lhs, basic_string& rhs) noexcept(noexcept(lhs.swap(rhs))) +{ + lhs.swap(rhs); +} + +using string = basic_string<>; + +namespace pmr +{ + +using basic_string = basic_string>; + +using string = basic_string; + +} + +}