Support listing files for multiple inputs and u8string utf8 utilities. (#1103)

This is a piece of legacy tool functionality, used in our asset
generation script, that was missing.

This PR also adds a u8string version of the utf8 utilities in
platform_utils.h and u8string constructors to OutputStream{,Ex} to make
things work when building with c++20 and newer. All is protected by
macro checks for u8string support.
This commit is contained in:
Mark Callow
2026-01-03 17:15:49 +09:00
committed by GitHub
parent 0eaebb015c
commit bb7c7d1007
5 changed files with 219 additions and 5 deletions

View File

@@ -10,6 +10,7 @@
#include <vector>
#include <iostream>
#include "utility.h"
#include "platform_utils.h"
#include <cxxopts.hpp>
#include <fmt/ostream.h>
@@ -312,11 +313,82 @@ struct OptionsMultiInSingleOut {
("stdin", "Use stdin as the first input file. (Using a single dash '-' as the first input file has the same effect)")
("stdout", "Use stdout as the output file. (Using a single dash '-' as the output file has the same effect)")
("files", "Input/output files. Last file specified will be used as output."
" Using a single dash '-' as an input or output file will use stdin/stdout.", cxxopts::value<std::vector<std::string>>(), "<filepath>");
" Using a single dash '-' as an input or output file will use stdin/stdout."
" A filepath prefixed with @ is read as a file name listing file. Listing text"
" files specify which actual files to process, one file name per line. Names can"
" be absolute paths or relative to the current directory when the application is"
" run. If the file is prefixed with @b @@ the names must be relative to the listing file.",
cxxopts::value<std::vector<std::string>>(), "<filepath>");
opts.parse_positional("files");
opts.positional_help("<input-file...> <output-file>");
}
void loadFileList(const std::string &f, bool relativize,
std::vector<std::string>& files,
Reporter& report) {
std::string listName(f);
listName.erase(0, relativize ? 2 : 1);
FILE *lf = nullptr;
lf = fopenUTF8(listName, "r");
if (!lf) {
report.fatal(rc::RUNTIME_ERROR, "Opening filename list: \"{}\" failed: {}\n",
listName.c_str(), errnoMessage());
}
std::string dirname;
if (relativize) {
size_t dirnameEnd = listName.find_last_of('/');
if (dirnameEnd == std::string::npos) {
relativize = false;
} else {
dirname = listName.substr(0, dirnameEnd + 1);
}
}
for (;;) {
// Cross platform PATH_MAX def is too much trouble!
char buf[4096];
buf[0] = '\0';
char *p = fgets(buf, sizeof(buf), lf);
if (!p) {
if (ferror(lf)) {
report.fatal(rc::RUNTIME_ERROR, "Reading filename list: \"{}\" failed: {}\n",
listName.c_str(), errnoMessage());
fclose(lf);
} else
break;
}
std::string readFilename(p);
while (readFilename.size()) {
if (readFilename[0] == ' ')
readFilename.erase(0, 1);
else
break;
}
while (readFilename.size()) {
const char c = readFilename.back();
if ((c == ' ') || (c == '\n') || (c == '\r'))
readFilename.erase(readFilename.size() - 1, 1);
else
break;
}
if (readFilename.size()) {
if (relativize)
files.push_back(dirname + readFilename);
else
files.push_back(readFilename);
}
}
fclose(lf);
}
void process(cxxopts::Options&, cxxopts::ParseResult& args, Reporter& report) {
std::vector<std::string> files;
if (args.count("stdin"))
@@ -334,7 +406,14 @@ struct OptionsMultiInSingleOut {
outputFilepath = std::move(files.back());
files.pop_back();
inputFilepaths = std::move(files);
std::vector<std::string>::const_iterator fit;
for (fit = files.begin(); fit < files.end(); fit++) {
if (fit[0][0] == '@') {
loadFileList(*fit, fit[0][1] == '@', inputFilepaths, report);
} else {
inputFilepaths.push_back(std::move(*fit));
}
}
if (std::count(inputFilepaths.begin(), inputFilepaths.end(), "-") > 1)
report.fatal_usage("'-' or --stdin as input file was specified more than once.");
@@ -393,6 +472,14 @@ protected:
public:
OutputStream(const std::string& filepath, Reporter& report);
#if defined(__cpp_lib_char8_t)
// This is a simplest way to make this work when compiled with >= c++20
// and the caller of this is passing the output of std::filesystem::path::u8string().
// At some point we should consider making filepath a u8string.
OutputStream::OutputStream(const std::u8string& filepath, Reporter& report) :
OutputStream(from_u8string(filepath), report) { }
#endif
~OutputStream();
const std::string& str() {

View File

@@ -81,6 +81,10 @@ class OutputStreamEx : public OutputStream {
public:
OutputStreamEx(const std::string& filepath, Reporter& report)
: OutputStream(filepath, report) { }
#if defined(__cpp_lib_char8_t)
OutputStreamEx(const std::u8string& filepath, Reporter& report)
: OutputStream(filepath, report) { }
#endif
void writeKTX2(ktxTexture1* texture, Reporter& report) {
const auto ret = ktxTexture1_WriteKTX2ToStdioStream(texture, file);

View File

@@ -848,6 +848,12 @@ Create a KTX2 file from various input files.
If the @e input-file is '-' the file will be read from the stdin.
If the @e output-path is '-' the output file will be written to the stdout.
An @e input-file prefixed with @b \@ will be read as a file name listing file.
Listing text files specify which actual files to process, one file name per line.
Names can be absolute paths or relative to the current directory when the
application is run. If the file is prefixed with @b \@\@ the names must be
relative to the listing file.
Each @e input-file must be a valid EXR (.exr), PNG (.png) or Raw (.raw) file.
PNG files with luminance (L) or luminance + alpha (LA) data will be converted
to RGB as LLL and RGBA as LLLA before processing further.

View File

@@ -21,6 +21,31 @@
#include <shellapi.h>
#endif
/*
* @internal
* @file
* @~English
*
* @brief Cross-platform utilities for handling utf-8 file names.
*
* To display UTF-8 strings streamed to the console correctly on WindowsPowerShell
* or Command Prompt they must be set to display UTF-8 text. For PowerShell run the
* following command before executing the program or add it to your $PROFILE:
* $OutputEncoding = [Console]::OutputEncoding = [System.Text.UTF8Encoding]::new()
* See https://stackoverflow.com/questions/57131654/using-utf-8-encoding-chcp-65001-in-command-prompt-windows-powershell-window
* for more details and how to change the encoding for Command Prompt.
*
* Note that the PS console spawned by Visual Studio when running a console
* application does not load $PROFILE so displayed utf-8 will be mojibake.
*
* Note also that fmt::print works correctly without changing the console
* encoding because it uses the Windows wide char APIs to write to the
* console.
*
* @author Daniel Rákos
* @author Mark Callow
*/
#if defined(_WIN32) && !defined(_UNICODE)
// For Windows, we convert the UTF-8 path to a UTF-16 path to force using
// the APIs that correctly handle unicode characters.
@@ -46,13 +71,22 @@ inline void InitUTF8CLI(int& argc, char* argv[]) {
#if defined(_WIN32)
// Windows does not support UTF-8 argv so we have to manually acquire it
static std::vector<std::unique_ptr<char[]>> utf8Argv(argc);
// argc may be different from wargc if the caller of this is the
// secondary receiver of the command line args, e.g. in a gtest program
// where gtest removes its own args first.
int wargc;
LPWSTR commandLine = GetCommandLineW();
LPWSTR* wideArgv = CommandLineToArgvW(commandLine, &argc);
LPWSTR* wideArgv = CommandLineToArgvW(commandLine, &wargc);
for (int i = 0; i < argc; ++i) {
int byteSize = WideCharToMultiByte(CP_UTF8, 0, wideArgv[i], -1, nullptr, 0, nullptr, nullptr);
utf8Argv[i] = std::make_unique<char[]>(byteSize);
WideCharToMultiByte(CP_UTF8, 0, wideArgv[i], -1, utf8Argv[i].get(), byteSize, nullptr, nullptr);
WideCharToMultiByte(CP_UTF8, 0, wideArgv[i], -1, utf8Argv[i].get(),
byteSize, nullptr, nullptr);
argv[i] = utf8Argv[i].get();
if (i == 0) {
// Skip over the removed args.
i += (wargc - argc);
}
}
#else
// Nothing to do for other platforms
@@ -79,3 +113,86 @@ inline int unlinkUTF8(const std::string& path) {
return unlink(path.c_str());
#endif
}
#if defined(__cpp_lib_char8_t)
// Casting from u8string to string is not allowed in C++20. Neither
// can char8_t or std::u8string be streamed to ostreams. This provides
// an explicit conversion. Note that this does not perform any encoding.
inline std::string from_u8string(const std::u8string& s) {
return std::string(s.begin(), s.end());
}
inline std::string to_u8string(const std::string& s) {
return std::u8string(s.begin(), s.end());
}
#if defined(_WIN32) && !defined(_UNICODE)
// For Windows, we convert the UTF-8 path to a UTF-16 path to force using
// the APIs that correctly handle unicode characters.
inline std::wstring DecodeUTF8Path(std::u8string u8path) {
std::wstring result;
std::string path = from_u8string(u8path);
int len =
MultiByteToWideChar(CP_UTF8, 0, path.c_str(), static_cast<int>(path.length()),
NULL, 0);
if (len > 0) {
result.resize(len);
MultiByteToWideChar(CP_UTF8, 0, path.c_str(), static_cast<int>(path.length()),
&result[0], len);
}
return result;
}
#else
// For other platforms convert to a regular string.
inline std::string DecodeUTF8Path(std::u8string path) { return from_u8string(path); }
#endif
inline void InitUTF8CLI(int& argc, char* argv[], std::vector<std::u8string>& u8argv) {
u8argv.resize(argc);
#if defined(_WIN32)
// Windows does not support UTF-8 argv so we have to manually acquire it
(void)argv; // Unused
// See note in non-char8_t InitUTF8CLI about argc vs wargc.
int wargc;
LPWSTR commandLine = GetCommandLineW();
LPWSTR* wideArgv = CommandLineToArgvW(commandLine, &wargc);
for (int i = 0; i < argc; ++i) {
int byteSize =
WideCharToMultiByte(CP_UTF8, 0, wideArgv[i], -1, nullptr, 0, nullptr, nullptr);
byteSize--; // Returned byteSize includes the terminating NUL.
u8argv[i].resize(byteSize);
WideCharToMultiByte(CP_UTF8, 0, wideArgv[i], -1, (LPSTR)u8argv[i].data(),
byteSize, nullptr, nullptr);
if (i == 0) {
// Skip over the removed args.
i += (wargc - argc);
}
}
#else
for (int i = 0; i < argc; ++i) {
u8argv[i] = std::u8string(reinterpret_cast<const char8_t*>(argv[i]));
}
#endif
}
inline FILE* fopenUTF8(const std::u8string& path, const std::string& mode) {
#if defined(_WIN32)
FILE* fp;
// Returned errno_t value is also set in the global errno.
(void)_wfopen_s(&fp, DecodeUTF8Path(path).c_str(), DecodeUTF8Path(mode).c_str());
return fp;
#else
return fopen(from_u8string(path).c_str(), mode.c_str());
#endif
}
inline int unlinkUTF8(const std::u8string& path) {
#if defined(_WIN32)
return _wunlink(DecodeUTF8Path(path).c_str());
#else
return unlink(DecodeUTF8Path(path).c_str());
#endif
}
#else
#define from_u8string(s) (s)
#define to_u8string(s) (s)
#endif