pluto-src 0.1.1+0.10.4

Sources of Pluto (Lua 5.4 dialect) and logic to build it.
Documentation
#pragma once

#include <cstdint>
#include <string>

#include "base.hpp"

#define UTF8_CONTINUATION_FLAG 0b10000000

#define UTF8_HAS_CONTINUATION(ch) ((ch) & 0b10000000)

#define UTF8_IS_CONTINUATION(ch) (((ch) & 0b11000000) == UTF8_CONTINUATION_FLAG)


#define UTF16_IS_HIGH_SURROGATE(ch) (((ch) >> 10) == 0x36)

#define UTF16_IS_LOW_SURROGATE(ch) (((ch) >> 10) == 0x37)


#if SOUP_WINDOWS
#include <windows.h>

#define UTF16_CHAR_TYPE wchar_t

#define UTF16_STRING_TYPE std::wstring

#else
#define UTF16_CHAR_TYPE char16_t

#define UTF16_STRING_TYPE std::u16string

#endif
static_assert(sizeof(UTF16_CHAR_TYPE) == 2);

NAMESPACE_SOUP
{
	struct unicode
	{
		static constexpr uint32_t REPLACEMENT_CHAR = 0xFFFD;

		[[nodiscard]] static char32_t utf8_to_utf32_char(std::string::const_iterator& it, const std::string::const_iterator end) noexcept;
#if SOUP_CPP20
		[[nodiscard]] static std::u32string utf8_to_utf32(const char8_t* utf8) noexcept;
#endif
		[[nodiscard]] static std::u32string utf8_to_utf32(const std::string& utf8) noexcept;
#if SOUP_CPP20
		[[nodiscard]] static UTF16_STRING_TYPE utf8_to_utf16(const char8_t* utf8) noexcept;
#endif
		[[nodiscard]] static UTF16_STRING_TYPE utf8_to_utf16(const std::string& utf8) noexcept;
#if SOUP_WINDOWS
		[[nodiscard]] static UTF16_STRING_TYPE acp_to_utf16(const std::string& acp) noexcept;
#endif
		[[nodiscard]] static UTF16_STRING_TYPE utf32_to_utf16(const std::u32string& utf32) noexcept;
		static void utf32_to_utf16_char(UTF16_STRING_TYPE& utf16, char32_t c) noexcept;
		[[nodiscard]] static std::string utf32_to_utf8(char32_t utf32) noexcept;
		[[nodiscard]] static std::string utf32_to_utf8(const std::u32string& utf32) noexcept;

		template <typename Str = std::u16string>
		[[nodiscard]] static char32_t utf16_to_utf32(typename Str::const_iterator& it, const typename Str::const_iterator end) noexcept
		{
			char32_t w1 = *it++;
			if (UTF16_IS_HIGH_SURROGATE(w1))
			{
				SOUP_IF_UNLIKELY (it == end)
				{
					return 0;
				}
				char32_t w2 = *it++;
				return utf16_to_utf32(w1, w2);
			}
			return w1;
		}

		[[nodiscard]] static char32_t utf16_to_utf32(char32_t hi, char32_t lo) noexcept
		{
			hi &= 0x3FF;
			lo &= 0x3FF;
			return (((hi * 0x400) + lo) + 0x10000);
		}

		template <typename Str>
		[[nodiscard]] static std::u32string utf16_to_utf32(const Str& utf16)
		{
			std::u32string utf32{};
			auto it = utf16.cbegin();
			const auto end = utf16.cend();
			while (it != end)
			{
				auto uni = utf16_to_utf32<Str>(it, end);
				if (uni == 0)
				{
					utf32.push_back(REPLACEMENT_CHAR);
				}
				else
				{
					utf32.push_back(uni);
				}
			}
			return utf32;
		}

		template <typename Str = UTF16_STRING_TYPE>
		[[nodiscard]] static std::string utf16_to_utf8(const Str& utf16)
		{
#if SOUP_WINDOWS
			std::string res;
			const int sizeRequired = WideCharToMultiByte(CP_UTF8, 0, (const wchar_t*)utf16.data(), (int)utf16.size(), NULL, 0, NULL, NULL);
			SOUP_IF_LIKELY (sizeRequired != 0)
			{
				res = std::string(sizeRequired, 0);
				WideCharToMultiByte(CP_UTF8, 0, (const wchar_t*)utf16.data(), (int)utf16.size(), res.data(), sizeRequired, NULL, NULL);
			}
			return res;
#else
			return utf32_to_utf8(utf16_to_utf32(utf16));
#endif
		}

		[[nodiscard]] static size_t utf8_char_len(const std::string& str) noexcept;
		[[nodiscard]] static size_t utf16_char_len(const UTF16_STRING_TYPE& str) noexcept;

		template <typename Iterator>
		static void utf8_add(Iterator& it, Iterator end)
		{
			if (UTF8_HAS_CONTINUATION(*it))
			{
				do
				{
					++it;
				} while (it != end && UTF8_IS_CONTINUATION(*it));
			}
			else
			{
				++it;
			}
		}

		template <typename Iterator>
		static void utf8_sub(Iterator& it, Iterator begin)
		{
			--it;
			while (UTF8_IS_CONTINUATION(*it) && it != begin)
			{
				--it;
			}
		}
	};
}