Skip to content

Commit 2b73f36

Browse files
authored
Merge pull request #913 from UE4SS-RE/utf8-path-utils
Improve path conversions and add new string utilities
2 parents e6a283b + 61ee57e commit 2b73f36

2 files changed

Lines changed: 130 additions & 10 deletions

File tree

assets/Changelog.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,14 @@ Added override Lua files for CallFunctionByNameWithArguments [UE4SS #848](https:
4747

4848
Add error messages in places where only error codes were previously logged (e.g. load a C++ mod) [UE4SS #902](https://github.com/UE4SS-RE/RE-UE4SS/pull/902)
4949

50+
Added improved string and path conversion utilities with proper UTF-8 support ([UE4SS #913](https://github.com/UE4SS-RE/RE-UE4SS/pull/913))
51+
- Rewrote `to_charT_string_path()` to properly handle UTF-8 and UTF-16 encodings
52+
- Added `ensure_str_as<CharT>()` for explicit target character type conversion
53+
- Added `to_utf8_string()` for convenient UTF-8 string conversion
54+
- Added `normalize_path_for_lua()` to convert paths to UTF-8 with forward slashes for Lua compatibility
55+
- Added `utf8_to_wpath()` to convert UTF-8 paths to Windows wide strings
56+
- **BREAKING:** `to_charT_string_path()` now returns UTF-8 encoded strings for char type instead of locale-dependent encoding
57+
5058
### Live View
5159
Added search filter: `IncludeClassNames`. ([UE4SS #472](https://github.com/UE4SS-RE/RE-UE4SS/pull/472)) - Buckminsterfullerene
5260

deps/first/Helpers/include/Helpers/String.hpp

Lines changed: 122 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -386,21 +386,87 @@ namespace RC
386386
}
387387
}
388388

389-
template <typename CharT, typename T>
390-
auto inline to_charT_string_path(T&& arg) -> std::basic_string<CharT>
389+
template <typename CharT, typename T_Path>
390+
auto inline to_charT_string_path(T_Path&& arg) -> std::basic_string<CharT>
391391
{
392-
// Dispatch to the correct conversion function based on the CharT type
393-
if constexpr (std::is_same_v<CharT, wchar_t>)
392+
static_assert(std::is_same_v<std::decay_t<T_Path>, std::filesystem::path>, "Input must be std::filesystem::path");
393+
394+
if constexpr (std::is_same_v<CharT, wchar_t>) // Covers WIDECHAR and Windows TCHAR
394395
{
395396
return arg.wstring();
396397
}
397-
else if constexpr (std::is_same_v<CharT, char16_t>)
398+
else if constexpr (std::is_same_v<CharT, char>) // Covers ANSICHAR, for UTF-8 output
398399
{
399-
return arg.u16string();
400+
// For UTF-8 std::string output from path
401+
std::u8string u8_s = arg.u8string(); // path.u8string() IS UTF-8
402+
return std::basic_string<CharT>(reinterpret_cast<const CharT*>(u8_s.c_str()), u8_s.length());
400403
}
401-
else if constexpr (std::is_same_v<CharT, char>)
404+
else if constexpr (std::is_same_v<CharT, uint16_t> || std::is_same_v<CharT, char16_t>) // Covers CHAR16 and standard char16_t
405+
{
406+
// Goal: Convert path to std::basic_string<uint16_t> or std::basic_string<char16_t> (UTF-16)
407+
408+
// Option 1: If wchar_t is 16-bit (like on Windows) and represents UTF-16
409+
if constexpr (sizeof(wchar_t) == sizeof(CharT))
410+
{
411+
std::wstring temp_ws = arg.wstring(); // Get UTF-16 as std::wstring
412+
// If CharT is uint16_t and wchar_t is also 16-bit, this reinterpret_cast is common.
413+
// If CharT is char16_t and wchar_t is also 16-bit, also common.
414+
return std::basic_string<CharT>(reinterpret_cast<const CharT*>(temp_ws.c_str()), temp_ws.length());
415+
}
416+
// Option 2: Convert from path's u8string (UTF-8) to UTF-16 (CharT)
417+
// This is more portable if wchar_t size varies or isn't guaranteed to be UTF-16.
418+
else
419+
{
420+
std::u8string u8_s = arg.u8string();
421+
if (u8_s.empty())
422+
{
423+
return std::basic_string<CharT>();
424+
}
425+
426+
try
427+
{
428+
#if defined(_MSC_VER)
429+
#pragma warning(push)
430+
#pragma warning(disable : 4996)
431+
#elif defined(__clang__) || defined(__GNUC__)
432+
#pragma GCC diagnostic push
433+
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
434+
#endif
435+
436+
// Ensure CharT for the converter is char16_t if that's what codecvt expects
437+
// If CharT is uint16_t, we might need to cast the result or use a char16_t intermediate.
438+
// For simplicity, let's assume we convert to std::u16string (char16_t based)
439+
// and then construct std::basic_string<CharT> from it.
440+
441+
using IntermediateChar16Type = char16_t; // Standard type for codecvt
442+
std::wstring_convert<std::codecvt_utf8_utf16<IntermediateChar16Type, 0x10FFFF, std::little_endian>, IntermediateChar16Type> converter;
443+
std::basic_string<IntermediateChar16Type> u16_intermediate_s =
444+
converter.from_bytes(reinterpret_cast<const char*>(u8_s.data()), reinterpret_cast<const char*>(u8_s.data() + u8_s.length()));
445+
446+
// Now construct the final std::basic_string<CharT>
447+
// This assumes CharT (e.g., uint16_t) and IntermediateChar16Type (char16_t)
448+
// have compatible representations for UTF-16 code units.
449+
return std::basic_string<CharT>(reinterpret_cast<const CharT*>(u16_intermediate_s.data()), u16_intermediate_s.length());
450+
451+
#if defined(_MSC_VER)
452+
#pragma warning(pop)
453+
#elif defined(__clang__) || defined(__GNUC__)
454+
#pragma GCC diagnostic pop
455+
#endif
456+
}
457+
catch (const std::exception&)
458+
{
459+
// Catching std::exception for broader compatibility
460+
throw std::runtime_error("Failed to convert path from UTF-8 to UTF-16");
461+
}
462+
}
463+
}
464+
else
402465
{
403-
return arg.string();
466+
// This static_assert will provide a compile-time error for unsupported CharT types.
467+
static_assert(std::is_same_v<CharT, wchar_t> || std::is_same_v<CharT, char> || std::is_same_v<CharT, uint16_t> || std::is_same_v<CharT, char16_t>,
468+
"to_charT_string_path: Unsupported target CharT for path conversion");
469+
return std::basic_string<CharT>(); // Should be unreachable due to static_assert
404470
}
405471
}
406472

@@ -434,15 +500,61 @@ namespace RC
434500

435501
// Ensure that a string is compatible with UE4SS, converting it if neccessary
436502
template <typename T>
437-
auto inline ensure_str(T&& arg)
503+
auto inline ensure_str(T&& arg) /* -> StringType */
504+
{
505+
return ensure_str_as<CharType>(std::forward<T>(arg)); // CharType is the project's native char type
506+
}
507+
508+
template <typename TargetCharT, typename T>
509+
auto inline ensure_str_as(T&& arg) -> std::basic_string<TargetCharT>
510+
{
511+
return to_charT<TargetCharT>(std::forward<T>(arg));
512+
}
513+
514+
template <typename T>
515+
auto inline to_utf8_string(T&& arg) -> std::string
438516
{
439-
return to_charT<CharType>(std::forward<T>(arg));
517+
return ensure_str_as<char>(std::forward<T>(arg));
440518
}
441519

442520
// You can add more to_* function if needed
443521

444522
// Auto Type Conversion Done
445523

524+
/**
525+
* Normalizes a path for use in Lua, ensuring:
526+
* 1. UTF-8 encoding for proper Unicode handling
527+
* 2. Forward slashes for consistency across platforms
528+
*
529+
* @param path The path to normalize
530+
* @return A UTF-8 encoded string with forward slashes
531+
* @throws std::runtime_error if conversion fails
532+
*/
533+
auto inline normalize_path_for_lua(const std::filesystem::path& path) -> std::string
534+
{
535+
std::string utf8_path = to_utf8_string(path);
536+
537+
// Replace backslashes with forward slashes for Lua
538+
std::replace(utf8_path.begin(), utf8_path.end(), '\\', '/');
539+
540+
return utf8_path;
541+
}
542+
543+
/**
544+
* Creates a Windows-compatible wide string from a UTF-8 path string
545+
* This is useful when opening files with Windows APIs that expect UTF-16
546+
*
547+
* @param utf8_path UTF-8 encoded path string
548+
* @return Wide string (UTF-16) for Windows APIs
549+
* @throws std::runtime_error if conversion fails
550+
*/
551+
auto inline utf8_to_wpath(const std::string& utf8_path) -> std::wstring
552+
{
553+
// No fallbacks - if this fails, it should throw since it's a critical error
554+
// that indicates invalid UTF-8 input
555+
return to_wstring(utf8_path);
556+
}
557+
446558
auto inline to_generic_string(const auto& input) -> StringType
447559
{
448560
if constexpr (std::is_same_v<std::remove_cvref_t<std::remove_pointer_t<std::remove_cvref_t<decltype(input)>>>, StringViewType>)

0 commit comments

Comments
 (0)